buildingdata 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- buildingdata/__init__.py +16 -0
- buildingdata/_cli.py +29 -0
- buildingdata/cache.py +92 -0
- buildingdata/config.py +119 -0
- buildingdata/exceptions.py +17 -0
- buildingdata/gcs.py +82 -0
- buildingdata/reference/__init__.py +5 -0
- buildingdata/reference/census.py +48 -0
- buildingdata/reference/diagnosis.py +58 -0
- buildingdata/reference/districts.py +41 -0
- buildingdata/reference/gas_network.py +40 -0
- buildingdata/simulation/__init__.py +3 -0
- buildingdata/simulation/bdtopo.py +87 -0
- buildingdata/simulation/era5.py +264 -0
- buildingdata/tests/__init__.py +1 -0
- buildingdata/tests/conftest.py +60 -0
- buildingdata/tests/test_cache.py +78 -0
- buildingdata/tests/test_config.py +80 -0
- buildingdata/tests/test_public_api.py +49 -0
- buildingdata/tests/test_reference.py +231 -0
- buildingdata/tests/test_simulation.py +184 -0
- buildingdata-0.1.0.dist-info/METADATA +26 -0
- buildingdata-0.1.0.dist-info/RECORD +26 -0
- buildingdata-0.1.0.dist-info/WHEEL +5 -0
- buildingdata-0.1.0.dist-info/entry_points.txt +2 -0
- buildingdata-0.1.0.dist-info/top_level.txt +1 -0
buildingdata/__init__.py
ADDED
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
from .config import write_config
|
|
3
|
+
from .reference import get_census, get_diagnosis, get_districts, get_gas_network
|
|
4
|
+
from .simulation import get_bdtopo, get_era5_climate
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def configure(bucket=None, cache_dir=None, credentials=None):
|
|
8
|
+
"""Write or update ~/.config/buildingdata/config.ini.
|
|
9
|
+
|
|
10
|
+
Args:
|
|
11
|
+
bucket (str, optional): GCS bucket name.
|
|
12
|
+
cache_dir (str or pathlib.Path, optional): local cache directory.
|
|
13
|
+
credentials (str or pathlib.Path, optional): path to GCS service
|
|
14
|
+
account JSON. Pass None to use Application Default Credentials.
|
|
15
|
+
"""
|
|
16
|
+
write_config(bucket=bucket, cache_dir=cache_dir, credentials=credentials)
|
buildingdata/_cli.py
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import argparse
|
|
3
|
+
|
|
4
|
+
from .config import write_config
|
|
5
|
+
|
|
6
|
+
|
|
7
|
+
def main():
|
|
8
|
+
parser = argparse.ArgumentParser(
|
|
9
|
+
prog="buildingdata",
|
|
10
|
+
description="Configure the buildingdata package.",
|
|
11
|
+
)
|
|
12
|
+
sub = parser.add_subparsers(dest="command")
|
|
13
|
+
|
|
14
|
+
cfg_parser = sub.add_parser("configure", help="Write ~/.config/buildingdata/config.ini")
|
|
15
|
+
cfg_parser.add_argument("--bucket", default=None, help="GCS bucket name")
|
|
16
|
+
cfg_parser.add_argument("--cache-dir", default=None, dest="cache_dir", help="Local cache directory")
|
|
17
|
+
cfg_parser.add_argument("--credentials", default=None, help="Path to GCS service account JSON")
|
|
18
|
+
|
|
19
|
+
args = parser.parse_args()
|
|
20
|
+
|
|
21
|
+
if args.command == "configure":
|
|
22
|
+
write_config(
|
|
23
|
+
bucket=args.bucket,
|
|
24
|
+
cache_dir=args.cache_dir,
|
|
25
|
+
credentials=args.credentials,
|
|
26
|
+
)
|
|
27
|
+
print("Configuration saved.")
|
|
28
|
+
else:
|
|
29
|
+
parser.print_help()
|
buildingdata/cache.py
ADDED
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import json
|
|
3
|
+
from datetime import datetime, timezone
|
|
4
|
+
from pathlib import Path
|
|
5
|
+
|
|
6
|
+
from .config import get_cache_dir
|
|
7
|
+
from .exceptions import CacheError
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
def cache_path(name):
|
|
11
|
+
"""Return the local Path where a named artifact should be cached.
|
|
12
|
+
|
|
13
|
+
Args:
|
|
14
|
+
name (str): logical artifact name, e.g. "census_latest.parquet".
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
pathlib.Path: absolute path under the configured cache dir.
|
|
18
|
+
"""
|
|
19
|
+
return get_cache_dir() / name
|
|
20
|
+
|
|
21
|
+
|
|
22
|
+
def _sidecar_path(name):
|
|
23
|
+
return get_cache_dir() / (name + ".meta.json")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
def is_cached(name):
|
|
27
|
+
"""Return True if the artifact file and a valid sidecar both exist.
|
|
28
|
+
|
|
29
|
+
Does not contact GCS. Use needs_refresh() for a freshness check.
|
|
30
|
+
|
|
31
|
+
Args:
|
|
32
|
+
name (str): logical artifact name.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
bool: True if the local copy can be used without downloading.
|
|
36
|
+
"""
|
|
37
|
+
return cache_path(name).exists() and _sidecar_path(name).exists()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def needs_refresh(name, blob):
|
|
41
|
+
"""Return True if the local copy is older than the GCS blob.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
name (str): logical artifact name.
|
|
45
|
+
blob (google.cloud.storage.Blob): live blob object from GCS.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
bool: True if the blob generation has advanced past the cached one.
|
|
49
|
+
"""
|
|
50
|
+
sidecar = _sidecar_path(name)
|
|
51
|
+
if not sidecar.exists():
|
|
52
|
+
return True
|
|
53
|
+
try:
|
|
54
|
+
with open(sidecar) as f:
|
|
55
|
+
meta = json.load(f)
|
|
56
|
+
return int(meta.get("generation", -1)) < blob.generation
|
|
57
|
+
except Exception:
|
|
58
|
+
return True
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def write_sidecar(name, blob):
|
|
62
|
+
"""Write a .meta.json sidecar after a successful download.
|
|
63
|
+
|
|
64
|
+
Args:
|
|
65
|
+
name (str): logical artifact name.
|
|
66
|
+
blob (google.cloud.storage.Blob): the blob that was downloaded.
|
|
67
|
+
"""
|
|
68
|
+
sidecar = _sidecar_path(name)
|
|
69
|
+
meta = {
|
|
70
|
+
"blob_name": blob.name,
|
|
71
|
+
"generation": blob.generation,
|
|
72
|
+
"downloaded_at": datetime.now(timezone.utc).isoformat(),
|
|
73
|
+
}
|
|
74
|
+
try:
|
|
75
|
+
with open(sidecar, "w") as f:
|
|
76
|
+
json.dump(meta, f, indent=2)
|
|
77
|
+
except OSError as e:
|
|
78
|
+
raise CacheError(f"Failed to write cache sidecar {sidecar}: {e}") from e
|
|
79
|
+
|
|
80
|
+
|
|
81
|
+
def ensure_subdir(subdir):
|
|
82
|
+
"""Return a subdirectory of the cache dir, creating it if absent.
|
|
83
|
+
|
|
84
|
+
Args:
|
|
85
|
+
subdir (str): subdirectory name, e.g. "bdtopo" or "era5".
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
pathlib.Path: absolute path to the subdirectory.
|
|
89
|
+
"""
|
|
90
|
+
path = get_cache_dir() / subdir
|
|
91
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
92
|
+
return path
|
buildingdata/config.py
ADDED
|
@@ -0,0 +1,119 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import configparser
|
|
3
|
+
import hashlib
|
|
4
|
+
import os
|
|
5
|
+
from pathlib import Path
|
|
6
|
+
|
|
7
|
+
from platformdirs import user_data_dir
|
|
8
|
+
|
|
9
|
+
_DEFAULT_BUCKET = "building-inference-data"
|
|
10
|
+
_CONFIG_FILE = Path.home() / ".config" / "buildingdata" / "config.ini"
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
def _install_id():
|
|
14
|
+
"""Return a short, stable id for this package installation.
|
|
15
|
+
|
|
16
|
+
Derived from the on-disk location of the installed package, so that
|
|
17
|
+
each install (e.g. a separate virtualenv) maps to a distinct id while
|
|
18
|
+
reinstalling in place keeps the same one.
|
|
19
|
+
|
|
20
|
+
Returns:
|
|
21
|
+
str: 8-char hex digest identifying this installation.
|
|
22
|
+
"""
|
|
23
|
+
install_root = str(Path(__file__).resolve().parent)
|
|
24
|
+
return hashlib.sha256(install_root.encode("utf-8")).hexdigest()[:8]
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def _default_cache_dir():
|
|
28
|
+
"""Return the per-installation default cache directory.
|
|
29
|
+
|
|
30
|
+
Uses the platform user-data dir as the base and namespaces it by
|
|
31
|
+
installation id so multiple installs do not overwrite each other's
|
|
32
|
+
cache.
|
|
33
|
+
|
|
34
|
+
Returns:
|
|
35
|
+
pathlib.Path: absolute path to the default cache directory.
|
|
36
|
+
"""
|
|
37
|
+
return Path(user_data_dir("buildingdata")) / "cache" / _install_id()
|
|
38
|
+
|
|
39
|
+
|
|
40
|
+
def _read_config():
|
|
41
|
+
cfg = configparser.ConfigParser()
|
|
42
|
+
if _CONFIG_FILE.exists():
|
|
43
|
+
cfg.read(_CONFIG_FILE)
|
|
44
|
+
return cfg
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def get_bucket():
|
|
48
|
+
"""Return the GCS bucket name.
|
|
49
|
+
|
|
50
|
+
Precedence: BUILDINGDATA_BUCKET env var → config file → default.
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
str: GCS bucket name without gs:// prefix.
|
|
54
|
+
"""
|
|
55
|
+
if "BUILDINGDATA_BUCKET" in os.environ:
|
|
56
|
+
return os.environ["BUILDINGDATA_BUCKET"]
|
|
57
|
+
cfg = _read_config()
|
|
58
|
+
return cfg.get("gcs", "bucket", fallback=_DEFAULT_BUCKET)
|
|
59
|
+
|
|
60
|
+
|
|
61
|
+
def get_cache_dir():
|
|
62
|
+
"""Return the local cache directory, creating it if absent.
|
|
63
|
+
|
|
64
|
+
Precedence: BUILDINGDATA_CACHE_DIR env var → config file → default.
|
|
65
|
+
|
|
66
|
+
Returns:
|
|
67
|
+
pathlib.Path: absolute path to the cache directory.
|
|
68
|
+
"""
|
|
69
|
+
if "BUILDINGDATA_CACHE_DIR" in os.environ:
|
|
70
|
+
path = Path(os.environ["BUILDINGDATA_CACHE_DIR"])
|
|
71
|
+
else:
|
|
72
|
+
cfg = _read_config()
|
|
73
|
+
raw = cfg.get("cache", "dir", fallback=str(_default_cache_dir()))
|
|
74
|
+
path = Path(raw)
|
|
75
|
+
path.mkdir(parents=True, exist_ok=True)
|
|
76
|
+
return path
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
def get_credentials_file():
|
|
80
|
+
"""Return path to a GCS service account JSON, or None for anonymous access.
|
|
81
|
+
|
|
82
|
+
When None, the storage client reads public buckets anonymously, with no
|
|
83
|
+
authentication required.
|
|
84
|
+
|
|
85
|
+
Precedence: GOOGLE_APPLICATION_CREDENTIALS env var → config file → None.
|
|
86
|
+
|
|
87
|
+
Returns:
|
|
88
|
+
pathlib.Path or None: path to credentials JSON, or None.
|
|
89
|
+
"""
|
|
90
|
+
if "GOOGLE_APPLICATION_CREDENTIALS" in os.environ:
|
|
91
|
+
return Path(os.environ["GOOGLE_APPLICATION_CREDENTIALS"])
|
|
92
|
+
cfg = _read_config()
|
|
93
|
+
raw = cfg.get("gcs", "credentials", fallback=None)
|
|
94
|
+
return Path(raw) if raw else None
|
|
95
|
+
|
|
96
|
+
|
|
97
|
+
def write_config(bucket=None, cache_dir=None, credentials=None):
|
|
98
|
+
"""Write or update ~/.config/buildingdata/config.ini.
|
|
99
|
+
|
|
100
|
+
Args:
|
|
101
|
+
bucket (str, optional): GCS bucket name.
|
|
102
|
+
cache_dir (str or pathlib.Path, optional): local cache directory.
|
|
103
|
+
credentials (str or pathlib.Path, optional): path to GCS service
|
|
104
|
+
account JSON. Pass None to leave unchanged.
|
|
105
|
+
"""
|
|
106
|
+
_CONFIG_FILE.parent.mkdir(parents=True, exist_ok=True)
|
|
107
|
+
cfg = _read_config()
|
|
108
|
+
if not cfg.has_section("gcs"):
|
|
109
|
+
cfg.add_section("gcs")
|
|
110
|
+
if not cfg.has_section("cache"):
|
|
111
|
+
cfg.add_section("cache")
|
|
112
|
+
if bucket is not None:
|
|
113
|
+
cfg.set("gcs", "bucket", bucket)
|
|
114
|
+
if credentials is not None:
|
|
115
|
+
cfg.set("gcs", "credentials", str(credentials))
|
|
116
|
+
if cache_dir is not None:
|
|
117
|
+
cfg.set("cache", "dir", str(cache_dir))
|
|
118
|
+
with open(_CONFIG_FILE, "w") as f:
|
|
119
|
+
cfg.write(f)
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
class BuildingDataError(Exception):
|
|
5
|
+
"""Base exception for buildingdata errors."""
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class CacheError(BuildingDataError):
|
|
9
|
+
"""Raised when a cache read or write operation fails."""
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
class RemoteNotAvailableError(BuildingDataError):
|
|
13
|
+
"""Raised when a remote endpoint returns a non-success status."""
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ConfigurationError(BuildingDataError):
|
|
17
|
+
"""Raised when required configuration (credentials, API key) is missing."""
|
buildingdata/gcs.py
ADDED
|
@@ -0,0 +1,82 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
from pathlib import Path
|
|
3
|
+
|
|
4
|
+
from tqdm import tqdm
|
|
5
|
+
|
|
6
|
+
from .config import get_bucket, get_credentials_file
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def get_client():
|
|
10
|
+
"""Return a google.cloud.storage.Client.
|
|
11
|
+
|
|
12
|
+
Uses the credentials file from config if one is set. Otherwise returns
|
|
13
|
+
an anonymous client, which can read public buckets without any
|
|
14
|
+
authentication or Google Cloud project.
|
|
15
|
+
|
|
16
|
+
Returns:
|
|
17
|
+
google.cloud.storage.Client: storage client.
|
|
18
|
+
"""
|
|
19
|
+
from google.cloud import storage
|
|
20
|
+
|
|
21
|
+
creds_file = get_credentials_file()
|
|
22
|
+
if creds_file is not None:
|
|
23
|
+
return storage.Client.from_service_account_json(str(creds_file))
|
|
24
|
+
return storage.Client.create_anonymous_client()
|
|
25
|
+
|
|
26
|
+
|
|
27
|
+
def get_blob(name):
|
|
28
|
+
"""Return the Blob object for a given name, or None if not found.
|
|
29
|
+
|
|
30
|
+
Args:
|
|
31
|
+
name (str): blob name inside the configured bucket.
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
google.cloud.storage.Blob or None.
|
|
35
|
+
"""
|
|
36
|
+
client = get_client()
|
|
37
|
+
bucket = client.bucket(get_bucket())
|
|
38
|
+
blob = bucket.blob(name)
|
|
39
|
+
blob.reload()
|
|
40
|
+
return blob if blob.exists() else None
|
|
41
|
+
|
|
42
|
+
|
|
43
|
+
def download_blob(name, dest_path, show_progress=True):
|
|
44
|
+
"""Download a blob from the configured bucket to dest_path.
|
|
45
|
+
|
|
46
|
+
Args:
|
|
47
|
+
name (str): blob name inside the bucket, e.g. "census_latest.parquet".
|
|
48
|
+
dest_path (pathlib.Path): local destination file path.
|
|
49
|
+
show_progress (bool): display a tqdm progress bar. Defaults to True.
|
|
50
|
+
"""
|
|
51
|
+
from google.cloud import storage
|
|
52
|
+
|
|
53
|
+
dest_path = Path(dest_path)
|
|
54
|
+
dest_path.parent.mkdir(parents=True, exist_ok=True)
|
|
55
|
+
|
|
56
|
+
client = get_client()
|
|
57
|
+
bucket = client.bucket(get_bucket())
|
|
58
|
+
blob = bucket.blob(name)
|
|
59
|
+
blob.reload()
|
|
60
|
+
|
|
61
|
+
total = blob.size or 0
|
|
62
|
+
tmp_path = dest_path.with_suffix(dest_path.suffix + ".tmp")
|
|
63
|
+
|
|
64
|
+
with open(tmp_path, "wb") as f:
|
|
65
|
+
if show_progress:
|
|
66
|
+
with tqdm(
|
|
67
|
+
total=total,
|
|
68
|
+
unit="B",
|
|
69
|
+
unit_scale=True,
|
|
70
|
+
desc=name,
|
|
71
|
+
leave=True,
|
|
72
|
+
) as bar:
|
|
73
|
+
def _callback(chunk):
|
|
74
|
+
bar.update(len(chunk))
|
|
75
|
+
|
|
76
|
+
blob.download_to_file(f, checksum="md5", raw_download=True)
|
|
77
|
+
bar.update(total - bar.n)
|
|
78
|
+
else:
|
|
79
|
+
blob.download_to_file(f)
|
|
80
|
+
|
|
81
|
+
tmp_path.replace(dest_path)
|
|
82
|
+
return blob
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import polars as pl
|
|
3
|
+
|
|
4
|
+
from ..cache import cache_path, is_cached, needs_refresh, write_sidecar
|
|
5
|
+
from ..exceptions import RemoteNotAvailableError
|
|
6
|
+
from ..gcs import download_blob, get_blob
|
|
7
|
+
|
|
8
|
+
_BLOB_NAME = "census_latest.parquet"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_census(city_group_list=None, refresh=False):
|
|
12
|
+
"""Return the processed INSEE census DataFrame, optionally pre-filtered.
|
|
13
|
+
|
|
14
|
+
Downloads census_latest.parquet from GCS to the local cache on first call
|
|
15
|
+
(or when refresh=True). Filtering by city_group_list uses Polars lazy scan
|
|
16
|
+
so only the needed rows are loaded into memory.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
city_group_list (list of str, optional): EPCI codes to filter on. If
|
|
20
|
+
None, the full national census is returned. Defaults to None.
|
|
21
|
+
refresh (bool): force re-download even if the cache is warm.
|
|
22
|
+
Defaults to False.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
polars.DataFrame: census records with columns occupancy_type,
|
|
26
|
+
occupant_status, living_area_class, occupant_count, heating_system,
|
|
27
|
+
residential_type, construction_year_class, district, city,
|
|
28
|
+
city_group, department, region, main_cooking_energy,
|
|
29
|
+
secondary_cooking_energy, ipondl.
|
|
30
|
+
"""
|
|
31
|
+
dest = cache_path(_BLOB_NAME)
|
|
32
|
+
|
|
33
|
+
if refresh or not is_cached(_BLOB_NAME):
|
|
34
|
+
blob = get_blob(_BLOB_NAME)
|
|
35
|
+
if blob is None:
|
|
36
|
+
raise RemoteNotAvailableError(f"Blob {_BLOB_NAME!r} not found in GCS bucket.")
|
|
37
|
+
download_blob(_BLOB_NAME, dest)
|
|
38
|
+
write_sidecar(_BLOB_NAME, blob)
|
|
39
|
+
elif refresh:
|
|
40
|
+
blob = get_blob(_BLOB_NAME)
|
|
41
|
+
if blob is not None and needs_refresh(_BLOB_NAME, blob):
|
|
42
|
+
download_blob(_BLOB_NAME, dest)
|
|
43
|
+
write_sidecar(_BLOB_NAME, blob)
|
|
44
|
+
|
|
45
|
+
lf = pl.scan_parquet(dest)
|
|
46
|
+
if city_group_list is not None:
|
|
47
|
+
lf = lf.filter(pl.col("city_group").is_in(city_group_list))
|
|
48
|
+
return lf.collect()
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import polars as pl
|
|
3
|
+
|
|
4
|
+
from ..cache import cache_path, is_cached, needs_refresh, write_sidecar
|
|
5
|
+
from ..exceptions import RemoteNotAvailableError
|
|
6
|
+
from ..gcs import download_blob, get_blob
|
|
7
|
+
|
|
8
|
+
_BLOB_NAME = "energy_performance_diagnosis_latest.parquet"
|
|
9
|
+
|
|
10
|
+
# Heating/DHW energies excluded from inference (no meaningful DPE data for coal)
|
|
11
|
+
_EXCLUDED_ENERGIES = ["Charbon"]
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def get_diagnosis(refresh=False):
|
|
15
|
+
"""Return the cleaned DPE energy performance diagnosis DataFrame.
|
|
16
|
+
|
|
17
|
+
Downloads energy_performance_diagnosis_latest.parquet from GCS on first
|
|
18
|
+
call. Applies the filtering and type casts that previously lived in
|
|
19
|
+
buildingmodel/io/diagnosis.py so that buildingmodel receives a clean frame.
|
|
20
|
+
|
|
21
|
+
Args:
|
|
22
|
+
refresh (bool): force re-download even if the cache is warm.
|
|
23
|
+
Defaults to False.
|
|
24
|
+
|
|
25
|
+
Returns:
|
|
26
|
+
polars.DataFrame: DPE records with columns heating_system (Categorical),
|
|
27
|
+
region (Int64), backup_heating_energy, dhw_energy, and all U-value
|
|
28
|
+
and efficiency columns used by inference/building_attributes.py.
|
|
29
|
+
"""
|
|
30
|
+
dest = cache_path(_BLOB_NAME)
|
|
31
|
+
|
|
32
|
+
if not is_cached(_BLOB_NAME) or refresh:
|
|
33
|
+
blob = get_blob(_BLOB_NAME)
|
|
34
|
+
if blob is None:
|
|
35
|
+
raise RemoteNotAvailableError(f"Blob {_BLOB_NAME!r} not found in GCS bucket.")
|
|
36
|
+
if not is_cached(_BLOB_NAME) or needs_refresh(_BLOB_NAME, blob):
|
|
37
|
+
download_blob(_BLOB_NAME, dest)
|
|
38
|
+
write_sidecar(_BLOB_NAME, blob)
|
|
39
|
+
|
|
40
|
+
df = pl.read_parquet(dest)
|
|
41
|
+
|
|
42
|
+
# Remove records with coal heating/DHW — no useful inference data
|
|
43
|
+
df = df.filter(
|
|
44
|
+
~pl.col("backup_heating_energy").is_in(_EXCLUDED_ENERGIES)
|
|
45
|
+
& ~pl.col("dhw_energy").is_in(_EXCLUDED_ENERGIES)
|
|
46
|
+
)
|
|
47
|
+
|
|
48
|
+
df = df.with_columns([
|
|
49
|
+
pl.col("heating_system").cast(pl.Categorical),
|
|
50
|
+
pl.col("region").cast(pl.Int64),
|
|
51
|
+
])
|
|
52
|
+
|
|
53
|
+
if "living_area" in df.columns:
|
|
54
|
+
df = df.drop(["living_area"])
|
|
55
|
+
if "living_area_class" in df.columns:
|
|
56
|
+
df = df.drop(["living_area_class"])
|
|
57
|
+
|
|
58
|
+
return df
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import geopandas as gpd
|
|
3
|
+
|
|
4
|
+
from ..cache import cache_path, is_cached, needs_refresh, write_sidecar
|
|
5
|
+
from ..exceptions import RemoteNotAvailableError
|
|
6
|
+
from ..gcs import download_blob, get_blob
|
|
7
|
+
|
|
8
|
+
_BLOB_NAME = "districts_latest.parquet"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_districts(bbox=None, refresh=False):
|
|
12
|
+
"""Return the IRIS district boundary GeoDataFrame in EPSG:2154.
|
|
13
|
+
|
|
14
|
+
Downloads districts_latest.parquet from GCS on first call.
|
|
15
|
+
|
|
16
|
+
Args:
|
|
17
|
+
bbox (tuple of float, optional): (minx, miny, maxx, maxy) bounding box
|
|
18
|
+
in EPSG:2154 passed to geopandas.read_parquet for spatial filtering.
|
|
19
|
+
If None, the full national layer is returned. Defaults to None.
|
|
20
|
+
refresh (bool): force re-download even if the cache is warm.
|
|
21
|
+
Defaults to False.
|
|
22
|
+
|
|
23
|
+
Returns:
|
|
24
|
+
geopandas.GeoDataFrame: IRIS polygons in EPSG:2154 with columns
|
|
25
|
+
district, city, city_name, city_group, department, region,
|
|
26
|
+
has_network_city_level, has_network_grdf_data, geometry.
|
|
27
|
+
"""
|
|
28
|
+
dest = cache_path(_BLOB_NAME)
|
|
29
|
+
|
|
30
|
+
if not is_cached(_BLOB_NAME) or refresh:
|
|
31
|
+
blob = get_blob(_BLOB_NAME)
|
|
32
|
+
if blob is None:
|
|
33
|
+
raise RemoteNotAvailableError(f"Blob {_BLOB_NAME!r} not found in GCS bucket.")
|
|
34
|
+
if not is_cached(_BLOB_NAME) or needs_refresh(_BLOB_NAME, blob):
|
|
35
|
+
download_blob(_BLOB_NAME, dest)
|
|
36
|
+
write_sidecar(_BLOB_NAME, blob)
|
|
37
|
+
|
|
38
|
+
kwargs = {}
|
|
39
|
+
if bbox is not None:
|
|
40
|
+
kwargs["bbox"] = bbox
|
|
41
|
+
return gpd.read_parquet(dest, **kwargs)
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import geopandas as gpd
|
|
3
|
+
|
|
4
|
+
from ..cache import cache_path, is_cached, needs_refresh, write_sidecar
|
|
5
|
+
from ..exceptions import RemoteNotAvailableError
|
|
6
|
+
from ..gcs import download_blob, get_blob
|
|
7
|
+
|
|
8
|
+
_BLOB_NAME = "gas_network_route_latest.gpkg"
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
def get_gas_network(bbox=None, refresh=False):
|
|
12
|
+
"""Return the GRDF gas pipeline network GeoDataFrame in EPSG:2154.
|
|
13
|
+
|
|
14
|
+
Downloads gas_network_route_latest.gpkg from GCS on first call (~789 MB).
|
|
15
|
+
The bbox argument is passed to geopandas.read_file for GDAL-level spatial
|
|
16
|
+
filtering so that only the local area is loaded from the GeoPackage.
|
|
17
|
+
|
|
18
|
+
Args:
|
|
19
|
+
bbox (tuple of float, optional): (minx, miny, maxx, maxy) in EPSG:2154.
|
|
20
|
+
If None, the full national layer is returned. Defaults to None.
|
|
21
|
+
refresh (bool): force re-download even if the cache is warm.
|
|
22
|
+
Defaults to False.
|
|
23
|
+
|
|
24
|
+
Returns:
|
|
25
|
+
geopandas.GeoDataFrame: gas pipeline routes in EPSG:2154.
|
|
26
|
+
"""
|
|
27
|
+
dest = cache_path(_BLOB_NAME)
|
|
28
|
+
|
|
29
|
+
if not is_cached(_BLOB_NAME) or refresh:
|
|
30
|
+
blob = get_blob(_BLOB_NAME)
|
|
31
|
+
if blob is None:
|
|
32
|
+
raise RemoteNotAvailableError(f"Blob {_BLOB_NAME!r} not found in GCS bucket.")
|
|
33
|
+
if not is_cached(_BLOB_NAME) or needs_refresh(_BLOB_NAME, blob):
|
|
34
|
+
download_blob(_BLOB_NAME, dest)
|
|
35
|
+
write_sidecar(_BLOB_NAME, blob)
|
|
36
|
+
|
|
37
|
+
kwargs = {}
|
|
38
|
+
if bbox is not None:
|
|
39
|
+
kwargs["bbox"] = bbox
|
|
40
|
+
return gpd.read_file(dest, **kwargs)
|
|
@@ -0,0 +1,87 @@
|
|
|
1
|
+
# -*- coding: utf-8 -*-
|
|
2
|
+
import geopandas as gpd
|
|
3
|
+
import requests
|
|
4
|
+
|
|
5
|
+
from ..cache import ensure_subdir
|
|
6
|
+
from ..exceptions import RemoteNotAvailableError
|
|
7
|
+
from ..reference.districts import get_districts
|
|
8
|
+
|
|
9
|
+
# IGN Géoplateforme WFS endpoint for BDTOPO buildings
|
|
10
|
+
_IGN_BASE_URL = (
|
|
11
|
+
"https://data.geopf.fr/wfs/ows"
|
|
12
|
+
"?SERVICE=WFS"
|
|
13
|
+
"&VERSION=2.0.0"
|
|
14
|
+
"&REQUEST=GetFeature"
|
|
15
|
+
"&TYPENAMES=BDTOPO_V3:batiment"
|
|
16
|
+
"&OUTPUTFORMAT=application/json"
|
|
17
|
+
"&SRSNAME=EPSG:2154"
|
|
18
|
+
"&BBOX={minx},{miny},{maxx},{maxy},urn:ogc:def:crs:EPSG::2154"
|
|
19
|
+
)
|
|
20
|
+
|
|
21
|
+
_TIMEOUT = 120 # seconds
|
|
22
|
+
|
|
23
|
+
|
|
24
|
+
def _cache_path(iris_code):
|
|
25
|
+
subdir = ensure_subdir("bdtopo")
|
|
26
|
+
return subdir / f"bdtopo_iris_{iris_code}.parquet"
|
|
27
|
+
|
|
28
|
+
|
|
29
|
+
def _sidecar_path(iris_code):
|
|
30
|
+
subdir = ensure_subdir("bdtopo")
|
|
31
|
+
return subdir / f"bdtopo_iris_{iris_code}.parquet.meta.json"
|
|
32
|
+
|
|
33
|
+
|
|
34
|
+
def get_bdtopo(iris_code, refresh=False):
|
|
35
|
+
"""Download BDTOPO buildings for one IRIS from the IGN Géoplateforme WFS.
|
|
36
|
+
|
|
37
|
+
The GeoDataFrame is cached as a parquet file under
|
|
38
|
+
~/.cache/buildingdata/bdtopo/bdtopo_iris_<iris_code>.parquet.
|
|
39
|
+
Column names are the raw French BDTOPO attribute names (hauteur,
|
|
40
|
+
nombre_de_logements, etc.) so the result can be passed directly as
|
|
41
|
+
building_data to buildingmodel.Simulation.
|
|
42
|
+
|
|
43
|
+
Args:
|
|
44
|
+
iris_code (str): 9-digit IRIS code, e.g. "461290000".
|
|
45
|
+
refresh (bool): force re-download even if cached. Defaults to False.
|
|
46
|
+
|
|
47
|
+
Returns:
|
|
48
|
+
geopandas.GeoDataFrame: BDTOPO buildings in EPSG:2154.
|
|
49
|
+
|
|
50
|
+
Raises:
|
|
51
|
+
RemoteNotAvailableError: if the IGN endpoint returns a non-200 status
|
|
52
|
+
or returns no features for the given IRIS code.
|
|
53
|
+
"""
|
|
54
|
+
dest = _cache_path(iris_code)
|
|
55
|
+
|
|
56
|
+
if dest.exists() and not refresh:
|
|
57
|
+
return gpd.read_parquet(dest)
|
|
58
|
+
|
|
59
|
+
# Get the district geometry to derive the bounding box and filter buildings
|
|
60
|
+
districts = get_districts()
|
|
61
|
+
iris_geom = districts[districts["district"] == iris_code]
|
|
62
|
+
if iris_geom.empty:
|
|
63
|
+
raise RemoteNotAvailableError(f"IRIS code {iris_code!r} not found in districts.")
|
|
64
|
+
|
|
65
|
+
district_geom = iris_geom.iloc[0].geometry
|
|
66
|
+
bbox = district_geom.bounds # (minx, miny, maxx, maxy)
|
|
67
|
+
|
|
68
|
+
url = _IGN_BASE_URL.format(minx=bbox[0], miny=bbox[1], maxx=bbox[2], maxy=bbox[3])
|
|
69
|
+
response = requests.get(url, timeout=_TIMEOUT)
|
|
70
|
+
if response.status_code != 200:
|
|
71
|
+
raise RemoteNotAvailableError(
|
|
72
|
+
f"IGN Géoplateforme returned HTTP {response.status_code} for IRIS {iris_code!r}."
|
|
73
|
+
)
|
|
74
|
+
|
|
75
|
+
features = response.json()["features"]
|
|
76
|
+
if not features:
|
|
77
|
+
raise RemoteNotAvailableError(
|
|
78
|
+
f"IGN Géoplateforme returned no buildings for IRIS {iris_code!r}."
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
gdf = gpd.GeoDataFrame.from_features(features, crs="EPSG:2154")
|
|
82
|
+
|
|
83
|
+
# Filter to keep only buildings that intersect with the district geometry
|
|
84
|
+
gdf = gdf[gdf.geometry.intersects(district_geom)].copy()
|
|
85
|
+
|
|
86
|
+
gdf.to_parquet(dest)
|
|
87
|
+
return gdf
|