buildstock-fetch 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of buildstock-fetch might be problematic. Click here for more details.
- buildstock_fetch/data/buildstock_releases.json +166 -77
- buildstock_fetch/data/weather_station_map/weather_station_map.parquet +0 -0
- buildstock_fetch/main.py +458 -99
- buildstock_fetch/main_cli.py +166 -58
- {buildstock_fetch-1.1.0.dist-info → buildstock_fetch-1.2.0.dist-info}/METADATA +1 -1
- {buildstock_fetch-1.1.0.dist-info → buildstock_fetch-1.2.0.dist-info}/RECORD +10 -10
- {buildstock_fetch-1.1.0.dist-info → buildstock_fetch-1.2.0.dist-info}/WHEEL +0 -0
- {buildstock_fetch-1.1.0.dist-info → buildstock_fetch-1.2.0.dist-info}/entry_points.txt +0 -0
- {buildstock_fetch-1.1.0.dist-info → buildstock_fetch-1.2.0.dist-info}/licenses/LICENSE +0 -0
- {buildstock_fetch-1.1.0.dist-info → buildstock_fetch-1.2.0.dist-info}/top_level.txt +0 -0
buildstock_fetch/main.py
CHANGED
|
@@ -71,6 +71,12 @@ class UnknownAggregationFunctionError(ValueError):
|
|
|
71
71
|
pass
|
|
72
72
|
|
|
73
73
|
|
|
74
|
+
class NoWeatherFileError(ValueError):
|
|
75
|
+
"""Raised when weather file is not available for a release."""
|
|
76
|
+
|
|
77
|
+
pass
|
|
78
|
+
|
|
79
|
+
|
|
74
80
|
METADATA_DIR = Path(
|
|
75
81
|
str(files("buildstock_fetch").joinpath("data").joinpath("building_data").joinpath("combined_metadata.parquet"))
|
|
76
82
|
)
|
|
@@ -83,6 +89,7 @@ LOAD_CURVE_COLUMN_AGGREGATION = Path(
|
|
|
83
89
|
.joinpath("2024_resstock_load_curve_columns.csv")
|
|
84
90
|
)
|
|
85
91
|
)
|
|
92
|
+
WEATHER_FILE_DIR = Path(str(files("buildstock_fetch").joinpath("data").joinpath("weather_station_map")))
|
|
86
93
|
|
|
87
94
|
|
|
88
95
|
@dataclass
|
|
@@ -95,6 +102,7 @@ class RequestedFileTypes:
|
|
|
95
102
|
load_curve_daily: bool = False
|
|
96
103
|
load_curve_monthly: bool = False
|
|
97
104
|
load_curve_annual: bool = False
|
|
105
|
+
weather: bool = False
|
|
98
106
|
|
|
99
107
|
|
|
100
108
|
@dataclass
|
|
@@ -274,6 +282,85 @@ class BuildingID:
|
|
|
274
282
|
else:
|
|
275
283
|
return ""
|
|
276
284
|
|
|
285
|
+
def get_weather_file_url(self) -> str:
|
|
286
|
+
"""Generate the S3 download URL for this building."""
|
|
287
|
+
if self.get_weather_station_name() == "":
|
|
288
|
+
return ""
|
|
289
|
+
return self._build_weather_url()
|
|
290
|
+
|
|
291
|
+
def _build_weather_url(self) -> str:
|
|
292
|
+
"""Build the weather file URL based on release year and weather type."""
|
|
293
|
+
if self.release_year == "2021":
|
|
294
|
+
return self._build_2021_weather_url()
|
|
295
|
+
elif self.release_year == "2022":
|
|
296
|
+
return self._build_2022_weather_url()
|
|
297
|
+
elif self.release_year == "2023":
|
|
298
|
+
return self._build_2023_weather_url()
|
|
299
|
+
elif self.release_year == "2024":
|
|
300
|
+
return self._build_2024_weather_url()
|
|
301
|
+
elif self.release_year == "2025":
|
|
302
|
+
return self._build_2025_weather_url()
|
|
303
|
+
else:
|
|
304
|
+
return ""
|
|
305
|
+
|
|
306
|
+
def _build_2021_weather_url(self) -> str:
|
|
307
|
+
"""Build weather URL for 2021 release."""
|
|
308
|
+
if self.weather == "tmy3":
|
|
309
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_tmy3.csv"
|
|
310
|
+
elif self.weather == "amy2018":
|
|
311
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
|
|
312
|
+
elif self.weather == "amy2012":
|
|
313
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2012.csv"
|
|
314
|
+
else:
|
|
315
|
+
return ""
|
|
316
|
+
|
|
317
|
+
def _build_2022_weather_url(self) -> str:
|
|
318
|
+
"""Build weather URL for 2022 release."""
|
|
319
|
+
if self.weather == "tmy3":
|
|
320
|
+
return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_TMY3.csv"
|
|
321
|
+
elif self.weather == "amy2018":
|
|
322
|
+
return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2018.csv"
|
|
323
|
+
elif self.weather == "amy2012":
|
|
324
|
+
return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2012.csv"
|
|
325
|
+
else:
|
|
326
|
+
return ""
|
|
327
|
+
|
|
328
|
+
def _build_2023_weather_url(self) -> str:
|
|
329
|
+
"""Build weather URL for 2023 release."""
|
|
330
|
+
if self.weather == "tmy3":
|
|
331
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_TMY3.csv"
|
|
332
|
+
elif self.weather == "amy2018":
|
|
333
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
|
|
334
|
+
elif self.weather == "amy2012":
|
|
335
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2012.csv"
|
|
336
|
+
else:
|
|
337
|
+
return ""
|
|
338
|
+
|
|
339
|
+
def _build_2024_weather_url(self) -> str:
|
|
340
|
+
"""Build weather URL for 2024 release."""
|
|
341
|
+
if self.res_com == "comstock" and self.weather == "amy2018":
|
|
342
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
|
|
343
|
+
else:
|
|
344
|
+
if self.weather == "tmy3":
|
|
345
|
+
return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_TMY3.csv"
|
|
346
|
+
elif self.weather == "amy2018":
|
|
347
|
+
return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2018.csv"
|
|
348
|
+
elif self.weather == "amy2012":
|
|
349
|
+
return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2012.csv"
|
|
350
|
+
else:
|
|
351
|
+
return ""
|
|
352
|
+
|
|
353
|
+
def _build_2025_weather_url(self) -> str:
|
|
354
|
+
"""Build weather URL for 2025 release."""
|
|
355
|
+
if self.weather == "tmy3":
|
|
356
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_TMY3.csv"
|
|
357
|
+
elif self.weather == "amy2018":
|
|
358
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
|
|
359
|
+
elif self.weather == "amy2012":
|
|
360
|
+
return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2012.csv"
|
|
361
|
+
else:
|
|
362
|
+
return ""
|
|
363
|
+
|
|
277
364
|
def get_annual_load_curve_filename(self) -> str:
|
|
278
365
|
"""Generate the filename for the annual load curve."""
|
|
279
366
|
if self.release_year == "2021":
|
|
@@ -303,6 +390,28 @@ class BuildingID:
|
|
|
303
390
|
else:
|
|
304
391
|
return ""
|
|
305
392
|
|
|
393
|
+
def get_weather_station_name(self) -> str:
|
|
394
|
+
"""Get the weather station name for this building."""
|
|
395
|
+
weather_map_df = pl.read_parquet(WEATHER_FILE_DIR)
|
|
396
|
+
|
|
397
|
+
# Filter by multiple fields for a more specific match
|
|
398
|
+
weather_station_map = weather_map_df.filter(
|
|
399
|
+
(pl.col("product") == self.res_com)
|
|
400
|
+
& (pl.col("release_year") == self.release_year)
|
|
401
|
+
& (pl.col("weather_file") == self.weather)
|
|
402
|
+
& (pl.col("release_version") == self.release_number)
|
|
403
|
+
& (pl.col("bldg_id") == self.bldg_id)
|
|
404
|
+
)
|
|
405
|
+
|
|
406
|
+
# Check if we found a match
|
|
407
|
+
if weather_station_map.height > 0:
|
|
408
|
+
# Return the weather station name from the first (and should be only) match
|
|
409
|
+
weather_station_name = weather_station_map.select("weather_station_name").item()
|
|
410
|
+
return str(weather_station_name) if weather_station_name is not None else ""
|
|
411
|
+
else:
|
|
412
|
+
# No match found, return empty string
|
|
413
|
+
return ""
|
|
414
|
+
|
|
306
415
|
def _build_annual_load_state_url(self) -> str:
|
|
307
416
|
"""Build the state-level URL for annual load curve data.
|
|
308
417
|
|
|
@@ -430,6 +539,11 @@ def _validate_release_name(release_name: str) -> bool:
|
|
|
430
539
|
return release_name in valid_release_names
|
|
431
540
|
|
|
432
541
|
|
|
542
|
+
def _resolve_unique_metadata_urls(bldg_ids: list[BuildingID]) -> list[str]:
|
|
543
|
+
"""Resolve the unique metadata URLs for a list of building IDs."""
|
|
544
|
+
return list({bldg_id.get_metadata_url() for bldg_id in bldg_ids})
|
|
545
|
+
|
|
546
|
+
|
|
433
547
|
def fetch_bldg_ids(
|
|
434
548
|
product: str, release_year: str, weather_file: str, release_version: str, state: str, upgrade_id: str
|
|
435
549
|
) -> list[BuildingID]:
|
|
@@ -499,13 +613,13 @@ def fetch_bldg_ids(
|
|
|
499
613
|
def _download_with_progress(url: str, output_file: Path, progress: Progress, task_id: TaskID) -> int:
|
|
500
614
|
"""Download a file with progress tracking."""
|
|
501
615
|
# Get file size first
|
|
502
|
-
response = requests.head(url, timeout=30)
|
|
616
|
+
response = requests.head(url, timeout=30, verify=True)
|
|
503
617
|
response.raise_for_status()
|
|
504
618
|
total_size = int(response.headers.get("content-length", 0))
|
|
505
619
|
progress.update(task_id, total=total_size)
|
|
506
620
|
|
|
507
621
|
# Download with streaming
|
|
508
|
-
response = requests.get(url, stream=True, timeout=30)
|
|
622
|
+
response = requests.get(url, stream=True, timeout=30, verify=True)
|
|
509
623
|
response.raise_for_status()
|
|
510
624
|
|
|
511
625
|
downloaded_size = 0
|
|
@@ -522,6 +636,65 @@ def _download_with_progress(url: str, output_file: Path, progress: Progress, tas
|
|
|
522
636
|
return downloaded_size
|
|
523
637
|
|
|
524
638
|
|
|
639
|
+
def _download_with_progress_metadata(url: str, output_file: Path, progress: Progress, task_id: TaskID) -> int:
|
|
640
|
+
"""Download a metadata file with progress tracking and append to existing file if it exists."""
|
|
641
|
+
# Get file size first
|
|
642
|
+
response = requests.head(url, timeout=30, verify=True)
|
|
643
|
+
response.raise_for_status()
|
|
644
|
+
total_size = int(response.headers.get("content-length", 0))
|
|
645
|
+
progress.update(task_id, total=total_size)
|
|
646
|
+
|
|
647
|
+
# Download with streaming
|
|
648
|
+
response = requests.get(url, stream=True, timeout=30, verify=True)
|
|
649
|
+
response.raise_for_status()
|
|
650
|
+
|
|
651
|
+
downloaded_size = 0
|
|
652
|
+
|
|
653
|
+
# Check if output file already exists
|
|
654
|
+
if output_file.exists():
|
|
655
|
+
# Read existing parquet file
|
|
656
|
+
existing_df = pl.read_parquet(output_file)
|
|
657
|
+
|
|
658
|
+
# Download new data to temporary file
|
|
659
|
+
with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as temp_file:
|
|
660
|
+
temp_path = Path(temp_file.name)
|
|
661
|
+
|
|
662
|
+
try:
|
|
663
|
+
# Download to temp file
|
|
664
|
+
with open(temp_path, "wb") as file:
|
|
665
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
666
|
+
if chunk:
|
|
667
|
+
file.write(chunk)
|
|
668
|
+
downloaded_size += len(chunk)
|
|
669
|
+
if total_size > 0:
|
|
670
|
+
progress.update(task_id, completed=downloaded_size)
|
|
671
|
+
|
|
672
|
+
# Read new data
|
|
673
|
+
new_df = pl.read_parquet(temp_path)
|
|
674
|
+
|
|
675
|
+
# Concatenate existing and new data, removing duplicates
|
|
676
|
+
combined_df = pl.concat([existing_df, new_df]).unique()
|
|
677
|
+
|
|
678
|
+
# Write combined data back to original file
|
|
679
|
+
combined_df.write_parquet(output_file)
|
|
680
|
+
|
|
681
|
+
finally:
|
|
682
|
+
# Clean up temp file
|
|
683
|
+
if temp_path.exists():
|
|
684
|
+
temp_path.unlink()
|
|
685
|
+
else:
|
|
686
|
+
# File doesn't exist, download normally
|
|
687
|
+
with open(str(output_file), "wb") as file:
|
|
688
|
+
for chunk in response.iter_content(chunk_size=8192):
|
|
689
|
+
if chunk:
|
|
690
|
+
file.write(chunk)
|
|
691
|
+
downloaded_size += len(chunk)
|
|
692
|
+
if total_size > 0:
|
|
693
|
+
progress.update(task_id, completed=downloaded_size)
|
|
694
|
+
|
|
695
|
+
return downloaded_size
|
|
696
|
+
|
|
697
|
+
|
|
525
698
|
def _get_time_step_grouping_key(aggregate_time_step: str) -> tuple[str, str]:
|
|
526
699
|
"""Get the grouping key and format string for a given time step.
|
|
527
700
|
|
|
@@ -618,7 +791,7 @@ def _download_and_process_aggregate(
|
|
|
618
791
|
) -> int:
|
|
619
792
|
"""Download aggregate time step load curve to temporary file, process with Polars, and save result."""
|
|
620
793
|
# Get file size first for progress tracking
|
|
621
|
-
response = requests.head(url, timeout=30)
|
|
794
|
+
response = requests.head(url, timeout=30, verify=True)
|
|
622
795
|
response.raise_for_status()
|
|
623
796
|
total_size = int(response.headers.get("content-length", 0))
|
|
624
797
|
progress.update(task_id, total=total_size)
|
|
@@ -635,7 +808,7 @@ def _download_and_process_aggregate(
|
|
|
635
808
|
session.mount("https://", retry_strategy)
|
|
636
809
|
|
|
637
810
|
# Download with streaming to temp file
|
|
638
|
-
response = session.get(url, stream=True, timeout=60)
|
|
811
|
+
response = session.get(url, stream=True, timeout=60, verify=True)
|
|
639
812
|
response.raise_for_status()
|
|
640
813
|
|
|
641
814
|
downloaded_size = 0
|
|
@@ -706,7 +879,7 @@ def download_bldg_data(
|
|
|
706
879
|
if progress and task_id is not None:
|
|
707
880
|
_download_with_progress(download_url, output_file, progress, task_id)
|
|
708
881
|
else:
|
|
709
|
-
response = requests.get(download_url, timeout=30)
|
|
882
|
+
response = requests.get(download_url, timeout=30, verify=True)
|
|
710
883
|
response.raise_for_status()
|
|
711
884
|
output_file.write_bytes(response.content)
|
|
712
885
|
|
|
@@ -763,33 +936,6 @@ def download_bldg_data(
|
|
|
763
936
|
return downloaded_paths
|
|
764
937
|
|
|
765
938
|
|
|
766
|
-
def download_metadata(bldg_id: BuildingID, output_dir: Path) -> Path:
|
|
767
|
-
"""Download the metadata for a given building.
|
|
768
|
-
|
|
769
|
-
Args:
|
|
770
|
-
bldg_id: A BuildingID object to download metadata for.
|
|
771
|
-
output_dir: Directory to save the downloaded metadata.
|
|
772
|
-
"""
|
|
773
|
-
|
|
774
|
-
download_url = bldg_id.get_metadata_url()
|
|
775
|
-
if download_url == "":
|
|
776
|
-
message = f"Metadata is not available for {bldg_id.get_release_name()}"
|
|
777
|
-
raise NoMetadataError(message)
|
|
778
|
-
response = requests.get(download_url, timeout=30)
|
|
779
|
-
response.raise_for_status()
|
|
780
|
-
output_file = (
|
|
781
|
-
output_dir
|
|
782
|
-
/ bldg_id.get_release_name()
|
|
783
|
-
/ "metadata"
|
|
784
|
-
/ f"state={bldg_id.state}"
|
|
785
|
-
/ f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
|
|
786
|
-
/ "metadata.parquet"
|
|
787
|
-
)
|
|
788
|
-
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
789
|
-
output_file.write_bytes(response.content)
|
|
790
|
-
return output_file
|
|
791
|
-
|
|
792
|
-
|
|
793
939
|
def download_15min_load_curve(bldg_id: BuildingID, output_dir: Path) -> Path:
|
|
794
940
|
"""Download the 15 min load profile timeseries for a given building.
|
|
795
941
|
|
|
@@ -802,7 +948,7 @@ def download_15min_load_curve(bldg_id: BuildingID, output_dir: Path) -> Path:
|
|
|
802
948
|
if download_url == "":
|
|
803
949
|
message = f"15 min load profile timeseries is not available for {bldg_id.get_release_name()}"
|
|
804
950
|
raise No15minLoadCurveError(message)
|
|
805
|
-
response = requests.get(download_url, timeout=30)
|
|
951
|
+
response = requests.get(download_url, timeout=30, verify=True)
|
|
806
952
|
response.raise_for_status()
|
|
807
953
|
output_file = (
|
|
808
954
|
output_dir
|
|
@@ -850,7 +996,7 @@ def download_15min_load_curve_with_progress(
|
|
|
850
996
|
if progress and task_id is not None:
|
|
851
997
|
_download_with_progress(download_url, output_file, progress, task_id)
|
|
852
998
|
else:
|
|
853
|
-
response = requests.get(download_url, timeout=30)
|
|
999
|
+
response = requests.get(download_url, timeout=30, verify=True)
|
|
854
1000
|
response.raise_for_status()
|
|
855
1001
|
output_file.write_bytes(response.content)
|
|
856
1002
|
|
|
@@ -900,7 +1046,7 @@ def download_aggregate_time_step_load_curve_with_progress(
|
|
|
900
1046
|
with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as temp_file:
|
|
901
1047
|
temp_path = Path(temp_file.name)
|
|
902
1048
|
try:
|
|
903
|
-
response = requests.get(download_url, timeout=30)
|
|
1049
|
+
response = requests.get(download_url, timeout=30, verify=True)
|
|
904
1050
|
response.raise_for_status()
|
|
905
1051
|
temp_path.write_bytes(response.content)
|
|
906
1052
|
|
|
@@ -936,9 +1082,38 @@ def _parse_requested_file_type(file_type: tuple[str, ...]) -> RequestedFileTypes
|
|
|
936
1082
|
file_type_obj.load_curve_monthly = True
|
|
937
1083
|
if "load_curve_annual" in file_type:
|
|
938
1084
|
file_type_obj.load_curve_annual = True
|
|
1085
|
+
if "weather" in file_type:
|
|
1086
|
+
file_type_obj.weather = True
|
|
939
1087
|
return file_type_obj
|
|
940
1088
|
|
|
941
1089
|
|
|
1090
|
+
def _process_metadata_results(bldg_ids: list[BuildingID], output_dir: Path, downloaded_paths: list[Path]) -> None:
|
|
1091
|
+
"""Process the results of a completed metadata download."""
|
|
1092
|
+
metadata_to_bldg_id_mapping: dict[Path, list[int]] = {}
|
|
1093
|
+
for bldg_id in bldg_ids:
|
|
1094
|
+
output_file = (
|
|
1095
|
+
output_dir
|
|
1096
|
+
/ bldg_id.get_release_name()
|
|
1097
|
+
/ "metadata"
|
|
1098
|
+
/ f"state={bldg_id.state}"
|
|
1099
|
+
/ f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
|
|
1100
|
+
/ "metadata.parquet"
|
|
1101
|
+
)
|
|
1102
|
+
if output_file in downloaded_paths:
|
|
1103
|
+
if output_file in metadata_to_bldg_id_mapping:
|
|
1104
|
+
metadata_to_bldg_id_mapping[output_file].append(bldg_id.bldg_id)
|
|
1105
|
+
else:
|
|
1106
|
+
metadata_to_bldg_id_mapping[output_file] = [bldg_id.bldg_id]
|
|
1107
|
+
|
|
1108
|
+
for metadata_file, bldg_id_list in metadata_to_bldg_id_mapping.items():
|
|
1109
|
+
# Use scan_parquet for lazy evaluation and better memory efficiency
|
|
1110
|
+
metadata_df_filtered = pl.scan_parquet(metadata_file).filter(pl.col("bldg_id").is_in(bldg_id_list)).collect()
|
|
1111
|
+
# Write the filtered dataframe back to the same file
|
|
1112
|
+
metadata_df_filtered.write_parquet(metadata_file)
|
|
1113
|
+
|
|
1114
|
+
return
|
|
1115
|
+
|
|
1116
|
+
|
|
942
1117
|
def _process_download_results(
|
|
943
1118
|
future: concurrent.futures.Future,
|
|
944
1119
|
bldg_id: BuildingID,
|
|
@@ -965,37 +1140,73 @@ def _process_download_results(
|
|
|
965
1140
|
console.print(f"[red]Download failed for bldg_id {bldg_id}: {e}[/red]")
|
|
966
1141
|
|
|
967
1142
|
|
|
968
|
-
def _download_metadata_with_progress(
|
|
1143
|
+
def _download_metadata_with_progress(
|
|
1144
|
+
bldg_ids: list[BuildingID],
|
|
1145
|
+
output_dir: Path,
|
|
1146
|
+
progress: Progress,
|
|
1147
|
+
downloaded_paths: list[Path],
|
|
1148
|
+
failed_downloads: list[str],
|
|
1149
|
+
console: Console,
|
|
1150
|
+
) -> tuple[list[Path], list[str]]:
|
|
969
1151
|
"""Download metadata file with progress tracking."""
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
1152
|
+
metadata_urls = _resolve_unique_metadata_urls(bldg_ids)
|
|
1153
|
+
downloaded_urls: list[str] = []
|
|
1154
|
+
for bldg_id in bldg_ids:
|
|
1155
|
+
output_file = (
|
|
1156
|
+
output_dir
|
|
1157
|
+
/ bldg_id.get_release_name()
|
|
1158
|
+
/ "metadata"
|
|
1159
|
+
/ f"state={bldg_id.state}"
|
|
1160
|
+
/ f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
|
|
1161
|
+
/ "metadata.parquet"
|
|
1162
|
+
)
|
|
1163
|
+
download_url = bldg_id.get_metadata_url()
|
|
1164
|
+
if download_url == "":
|
|
1165
|
+
failed_downloads.append(str(output_file))
|
|
1166
|
+
continue
|
|
1167
|
+
if download_url in downloaded_urls:
|
|
1168
|
+
continue
|
|
1169
|
+
downloaded_urls.append(download_url)
|
|
1170
|
+
if download_url in metadata_urls:
|
|
1171
|
+
metadata_urls.remove(download_url)
|
|
1172
|
+
metadata_task = progress.add_task(
|
|
1173
|
+
f"[yellow]Downloading metadata: {download_url}",
|
|
1174
|
+
total=0, # Will be updated when we get the file size
|
|
1175
|
+
)
|
|
1176
|
+
# Get file size first
|
|
1177
|
+
response = requests.head(download_url, timeout=30)
|
|
1178
|
+
response.raise_for_status()
|
|
1179
|
+
total_size = int(response.headers.get("content-length", 0))
|
|
1180
|
+
progress.update(metadata_task, total=total_size)
|
|
974
1181
|
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
1182
|
+
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
1183
|
+
try:
|
|
1184
|
+
_download_with_progress_metadata(download_url, output_file, progress, metadata_task)
|
|
1185
|
+
downloaded_paths.append(output_file)
|
|
1186
|
+
except Exception as e:
|
|
1187
|
+
failed_downloads.append(str(output_file))
|
|
1188
|
+
console.print(f"[red]Download failed for metadata {bldg_id.bldg_id}: {e}[/red]")
|
|
1189
|
+
|
|
1190
|
+
return downloaded_paths, failed_downloads
|
|
980
1191
|
|
|
981
|
-
# Get file size first
|
|
982
|
-
response = requests.head(download_url, timeout=30)
|
|
983
|
-
response.raise_for_status()
|
|
984
|
-
total_size = int(response.headers.get("content-length", 0))
|
|
985
|
-
progress.update(metadata_task, total=total_size)
|
|
986
1192
|
|
|
987
|
-
|
|
1193
|
+
def download_weather_file_with_progress(
|
|
1194
|
+
bldg_id: BuildingID, output_dir: Path, progress: Progress, task_id: TaskID
|
|
1195
|
+
) -> Path:
|
|
1196
|
+
"""Download weather file with progress tracking."""
|
|
1197
|
+
download_url = bldg_id.get_weather_file_url()
|
|
1198
|
+
if download_url == "":
|
|
1199
|
+
raise NoWeatherFileError()
|
|
988
1200
|
output_file = (
|
|
989
1201
|
output_dir
|
|
990
|
-
/
|
|
991
|
-
/ "
|
|
992
|
-
/ f"state={
|
|
993
|
-
/ f"upgrade={str(int(
|
|
994
|
-
/ "
|
|
1202
|
+
/ bldg_id.get_release_name()
|
|
1203
|
+
/ "weather"
|
|
1204
|
+
/ f"state={bldg_id.state}"
|
|
1205
|
+
/ f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
|
|
1206
|
+
/ f"{bldg_id.get_weather_station_name()}.csv"
|
|
995
1207
|
)
|
|
996
1208
|
output_file.parent.mkdir(parents=True, exist_ok=True)
|
|
997
|
-
_download_with_progress(download_url, output_file, progress,
|
|
998
|
-
|
|
1209
|
+
_download_with_progress(download_url, output_file, progress, task_id)
|
|
999
1210
|
return output_file
|
|
1000
1211
|
|
|
1001
1212
|
|
|
@@ -1244,19 +1455,19 @@ def _download_aggregate_load_curves_parallel(
|
|
|
1244
1455
|
)
|
|
1245
1456
|
|
|
1246
1457
|
|
|
1247
|
-
def
|
|
1458
|
+
def _download_metadata(
|
|
1248
1459
|
bldg_ids: list[BuildingID],
|
|
1249
1460
|
output_dir: Path,
|
|
1250
1461
|
progress: Progress,
|
|
1251
1462
|
downloaded_paths: list[Path],
|
|
1463
|
+
failed_downloads: list[str],
|
|
1464
|
+
console: Console,
|
|
1252
1465
|
) -> None:
|
|
1253
1466
|
"""Download metadata file (only one needed per release)."""
|
|
1254
1467
|
if not bldg_ids:
|
|
1255
1468
|
return
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
metadata_file = _download_metadata_with_progress(bldg, output_dir, progress)
|
|
1259
|
-
downloaded_paths.append(metadata_file)
|
|
1469
|
+
_download_metadata_with_progress(bldg_ids, output_dir, progress, downloaded_paths, failed_downloads, console)
|
|
1470
|
+
_process_metadata_results(bldg_ids, output_dir, downloaded_paths)
|
|
1260
1471
|
|
|
1261
1472
|
|
|
1262
1473
|
def download_annual_load_curve_with_progress(
|
|
@@ -1302,7 +1513,7 @@ def download_annual_load_curve_with_progress(
|
|
|
1302
1513
|
if progress and task_id is not None:
|
|
1303
1514
|
_download_with_progress(download_url, output_file, progress, task_id)
|
|
1304
1515
|
else:
|
|
1305
|
-
response = requests.get(download_url, timeout=30)
|
|
1516
|
+
response = requests.get(download_url, timeout=30, verify=True)
|
|
1306
1517
|
response.raise_for_status()
|
|
1307
1518
|
with open(output_file, "wb") as file:
|
|
1308
1519
|
file.write(response.content)
|
|
@@ -1369,6 +1580,97 @@ def _download_annual_load_curves_parallel(
|
|
|
1369
1580
|
console.print(f"[red]Download failed for annual load curve {bldg_id.bldg_id}: {e}[/red]")
|
|
1370
1581
|
|
|
1371
1582
|
|
|
1583
|
+
def _download_weather_files_parallel(
|
|
1584
|
+
bldg_ids: list[BuildingID],
|
|
1585
|
+
output_dir: Path,
|
|
1586
|
+
max_workers: int,
|
|
1587
|
+
progress: Progress,
|
|
1588
|
+
downloaded_paths: list[Path],
|
|
1589
|
+
failed_downloads: list[str],
|
|
1590
|
+
console: Console,
|
|
1591
|
+
weather_states: Union[list[str], None] = None,
|
|
1592
|
+
) -> None:
|
|
1593
|
+
"""Download weather files in parallel with progress tracking."""
|
|
1594
|
+
# Initialize weather_states to empty list if None
|
|
1595
|
+
if weather_states is None:
|
|
1596
|
+
weather_states = []
|
|
1597
|
+
|
|
1598
|
+
# Break if weather_states is empty
|
|
1599
|
+
if len(weather_states) == 0:
|
|
1600
|
+
for bldg_id in bldg_ids:
|
|
1601
|
+
output_file = (
|
|
1602
|
+
output_dir
|
|
1603
|
+
/ bldg_id.get_release_name()
|
|
1604
|
+
/ "weather"
|
|
1605
|
+
/ f"state={bldg_id.state}"
|
|
1606
|
+
/ f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
|
|
1607
|
+
/ f"{bldg_id.get_weather_station_name()}.csv"
|
|
1608
|
+
)
|
|
1609
|
+
failed_downloads.append(str(output_file))
|
|
1610
|
+
console.print(f"[red]Weather file not available for {bldg_id.get_release_name()}[/red]")
|
|
1611
|
+
return
|
|
1612
|
+
# Create progress tasks for weather file downloads
|
|
1613
|
+
weather_file_tasks = {}
|
|
1614
|
+
for i, bldg_id in enumerate(bldg_ids):
|
|
1615
|
+
if bldg_id.state in weather_states:
|
|
1616
|
+
task_id = progress.add_task(
|
|
1617
|
+
f"[magenta]Weather file {bldg_id.bldg_id} (upgrade {bldg_id.upgrade_id})",
|
|
1618
|
+
total=0, # Will be updated when we get the file size
|
|
1619
|
+
)
|
|
1620
|
+
weather_file_tasks[i] = task_id
|
|
1621
|
+
else:
|
|
1622
|
+
output_file = (
|
|
1623
|
+
output_dir
|
|
1624
|
+
/ bldg_id.get_release_name()
|
|
1625
|
+
/ "weather"
|
|
1626
|
+
/ f"state={bldg_id.state}"
|
|
1627
|
+
/ f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
|
|
1628
|
+
/ f"{bldg_id.get_weather_station_name()}.csv"
|
|
1629
|
+
)
|
|
1630
|
+
failed_downloads.append(str(output_file))
|
|
1631
|
+
console.print(f"[red]Weather file not available for {bldg_id.get_release_name()}[/red]")
|
|
1632
|
+
|
|
1633
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
|
|
1634
|
+
# Create a modified version of the download function that uses the specific task IDs
|
|
1635
|
+
def download_weather_file_with_task_id(bldg_id: BuildingID, output_dir: Path, task_id: TaskID) -> Path:
|
|
1636
|
+
return download_weather_file_with_progress(bldg_id, output_dir, progress, task_id)
|
|
1637
|
+
|
|
1638
|
+
future_to_bldg = {
|
|
1639
|
+
executor.submit(download_weather_file_with_task_id, bldg_id, output_dir, weather_file_tasks[i]): bldg_id
|
|
1640
|
+
for i, bldg_id in enumerate(bldg_ids)
|
|
1641
|
+
}
|
|
1642
|
+
|
|
1643
|
+
for future in concurrent.futures.as_completed(future_to_bldg):
|
|
1644
|
+
bldg_id = future_to_bldg[future]
|
|
1645
|
+
try:
|
|
1646
|
+
output_file = future.result()
|
|
1647
|
+
downloaded_paths.append(output_file)
|
|
1648
|
+
except NoWeatherFileError:
|
|
1649
|
+
output_file = (
|
|
1650
|
+
output_dir
|
|
1651
|
+
/ bldg_id.get_release_name()
|
|
1652
|
+
/ "weather"
|
|
1653
|
+
/ f"state={bldg_id.state}"
|
|
1654
|
+
/ f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
|
|
1655
|
+
/ f"{bldg_id.get_weather_station_name()}.csv"
|
|
1656
|
+
)
|
|
1657
|
+
failed_downloads.append(str(output_file))
|
|
1658
|
+
console.print(f"[red]Weather file not available for {bldg_id.get_release_name()}[/red]")
|
|
1659
|
+
raise
|
|
1660
|
+
except Exception as e:
|
|
1661
|
+
output_file = (
|
|
1662
|
+
output_dir
|
|
1663
|
+
/ bldg_id.get_release_name()
|
|
1664
|
+
/ "weather"
|
|
1665
|
+
/ f"state={bldg_id.state}"
|
|
1666
|
+
/ f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
|
|
1667
|
+
/ f"{bldg_id.get_weather_station_name()}.csv"
|
|
1668
|
+
)
|
|
1669
|
+
failed_downloads.append(str(output_file))
|
|
1670
|
+
console.print(f"[red]Download failed for weather file {bldg_id.bldg_id}: {e}[/red]")
|
|
1671
|
+
raise
|
|
1672
|
+
|
|
1673
|
+
|
|
1372
1674
|
def _print_download_summary(downloaded_paths: list[Path], failed_downloads: list[str], console: Console) -> None:
|
|
1373
1675
|
"""Print a summary of the download results."""
|
|
1374
1676
|
console.print("\n[bold green]Download complete![/bold green]")
|
|
@@ -1380,7 +1682,11 @@ def _print_download_summary(downloaded_paths: list[Path], failed_downloads: list
|
|
|
1380
1682
|
|
|
1381
1683
|
|
|
1382
1684
|
def fetch_bldg_data(
|
|
1383
|
-
bldg_ids: list[BuildingID],
|
|
1685
|
+
bldg_ids: list[BuildingID],
|
|
1686
|
+
file_type: tuple[str, ...],
|
|
1687
|
+
output_dir: Path,
|
|
1688
|
+
max_workers: int = 5,
|
|
1689
|
+
weather_states: Union[list[str], None] = None,
|
|
1384
1690
|
) -> tuple[list[Path], list[str]]:
|
|
1385
1691
|
"""Download building data for a given list of building ids
|
|
1386
1692
|
|
|
@@ -1395,19 +1701,27 @@ def fetch_bldg_data(
|
|
|
1395
1701
|
file_type_obj = _parse_requested_file_type(file_type)
|
|
1396
1702
|
console = Console()
|
|
1397
1703
|
|
|
1704
|
+
# Initialize weather_states to empty list if None
|
|
1705
|
+
if weather_states is None:
|
|
1706
|
+
weather_states = []
|
|
1707
|
+
|
|
1398
1708
|
downloaded_paths: list[Path] = []
|
|
1399
1709
|
failed_downloads: list[str] = []
|
|
1400
1710
|
|
|
1401
1711
|
# Calculate total files to download
|
|
1402
1712
|
total_files = 0
|
|
1403
1713
|
if file_type_obj.metadata:
|
|
1404
|
-
|
|
1714
|
+
unique_metadata_urls = _resolve_unique_metadata_urls(bldg_ids)
|
|
1715
|
+
total_files += len(unique_metadata_urls) # Add metadata file
|
|
1405
1716
|
if file_type_obj.load_curve_15min:
|
|
1406
1717
|
total_files += len(bldg_ids) # Add 15-minute load curve files
|
|
1407
1718
|
if file_type_obj.load_curve_monthly:
|
|
1408
1719
|
total_files += len(bldg_ids) # Add 15-minute load curve files
|
|
1409
1720
|
if file_type_obj.load_curve_annual:
|
|
1410
1721
|
total_files += len(bldg_ids) # Add annual load curve files
|
|
1722
|
+
if file_type_obj.weather:
|
|
1723
|
+
available_bldg_ids = [bldg_id for bldg_id in bldg_ids if bldg_id.state in weather_states]
|
|
1724
|
+
total_files += len(available_bldg_ids) * len(weather_states) # Add weather map files
|
|
1411
1725
|
|
|
1412
1726
|
console.print(f"\n[bold blue]Starting download of {total_files} files...[/bold blue]")
|
|
1413
1727
|
|
|
@@ -1425,45 +1739,90 @@ def fetch_bldg_data(
|
|
|
1425
1739
|
console=console,
|
|
1426
1740
|
transient=False,
|
|
1427
1741
|
) as progress:
|
|
1428
|
-
|
|
1429
|
-
|
|
1430
|
-
|
|
1431
|
-
|
|
1432
|
-
|
|
1742
|
+
_execute_downloads(
|
|
1743
|
+
file_type_obj,
|
|
1744
|
+
bldg_ids,
|
|
1745
|
+
output_dir,
|
|
1746
|
+
max_workers,
|
|
1747
|
+
progress,
|
|
1748
|
+
downloaded_paths,
|
|
1749
|
+
failed_downloads,
|
|
1750
|
+
console,
|
|
1751
|
+
weather_states,
|
|
1752
|
+
)
|
|
1433
1753
|
|
|
1434
|
-
|
|
1435
|
-
if file_type_obj.metadata:
|
|
1436
|
-
_download_metadata_single(bldg_ids, output_dir, progress, downloaded_paths)
|
|
1754
|
+
_print_download_summary(downloaded_paths, failed_downloads, console)
|
|
1437
1755
|
|
|
1438
|
-
|
|
1439
|
-
if file_type_obj.load_curve_15min:
|
|
1440
|
-
_download_15min_load_curves_parallel(
|
|
1441
|
-
bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
|
|
1442
|
-
)
|
|
1756
|
+
return downloaded_paths, failed_downloads
|
|
1443
1757
|
|
|
1444
|
-
if file_type_obj.load_curve_monthly:
|
|
1445
|
-
aggregate_time_step = "monthly"
|
|
1446
|
-
_download_aggregate_load_curves_parallel(
|
|
1447
|
-
bldg_ids,
|
|
1448
|
-
output_dir,
|
|
1449
|
-
aggregate_time_step,
|
|
1450
|
-
max_workers,
|
|
1451
|
-
progress,
|
|
1452
|
-
downloaded_paths,
|
|
1453
|
-
failed_downloads,
|
|
1454
|
-
console,
|
|
1455
|
-
)
|
|
1456
1758
|
|
|
1457
|
-
|
|
1458
|
-
|
|
1459
|
-
|
|
1460
|
-
|
|
1461
|
-
|
|
1759
|
+
def _execute_downloads(
|
|
1760
|
+
file_type_obj: RequestedFileTypes,
|
|
1761
|
+
bldg_ids: list[BuildingID],
|
|
1762
|
+
output_dir: Path,
|
|
1763
|
+
max_workers: int,
|
|
1764
|
+
progress: Progress,
|
|
1765
|
+
downloaded_paths: list[Path],
|
|
1766
|
+
failed_downloads: list[str],
|
|
1767
|
+
console: Console,
|
|
1768
|
+
weather_states: Union[list[str], None] = None,
|
|
1769
|
+
) -> None:
|
|
1770
|
+
"""Execute all requested downloads based on file type configuration."""
|
|
1771
|
+
# Initialize weather_states to empty list if None
|
|
1772
|
+
if weather_states is None:
|
|
1773
|
+
weather_states = []
|
|
1774
|
+
|
|
1775
|
+
# Download building data if requested.
|
|
1776
|
+
if file_type_obj.hpxml or file_type_obj.schedule:
|
|
1777
|
+
_download_building_data_parallel(
|
|
1778
|
+
bldg_ids, file_type_obj, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
|
|
1779
|
+
)
|
|
1462
1780
|
|
|
1463
|
-
|
|
1781
|
+
# Get metadata if requested. Only one building is needed to get the metadata.
|
|
1782
|
+
if file_type_obj.metadata:
|
|
1783
|
+
_download_metadata(bldg_ids, output_dir, progress, downloaded_paths, failed_downloads, console)
|
|
1464
1784
|
|
|
1465
|
-
|
|
1785
|
+
# Get 15 min load profile timeseries if requested.
|
|
1786
|
+
if file_type_obj.load_curve_15min:
|
|
1787
|
+
_download_15min_load_curves_parallel(
|
|
1788
|
+
bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
|
|
1789
|
+
)
|
|
1790
|
+
|
|
1791
|
+
if file_type_obj.load_curve_monthly:
|
|
1792
|
+
aggregate_time_step = "monthly"
|
|
1793
|
+
_download_aggregate_load_curves_parallel(
|
|
1794
|
+
bldg_ids,
|
|
1795
|
+
output_dir,
|
|
1796
|
+
aggregate_time_step,
|
|
1797
|
+
max_workers,
|
|
1798
|
+
progress,
|
|
1799
|
+
downloaded_paths,
|
|
1800
|
+
failed_downloads,
|
|
1801
|
+
console,
|
|
1802
|
+
)
|
|
1803
|
+
|
|
1804
|
+
# Get annual load curve if requested.
|
|
1805
|
+
if file_type_obj.load_curve_annual:
|
|
1806
|
+
_download_annual_load_curves_parallel(
|
|
1807
|
+
bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
|
|
1808
|
+
)
|
|
1809
|
+
|
|
1810
|
+
# Get weather files if requested.
|
|
1811
|
+
if file_type_obj.weather:
|
|
1812
|
+
_download_weather_files_parallel(
|
|
1813
|
+
bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console, weather_states
|
|
1814
|
+
)
|
|
1466
1815
|
|
|
1467
1816
|
|
|
1468
1817
|
if __name__ == "__main__": # pragma: no cover
|
|
1469
|
-
|
|
1818
|
+
bldg_ids = [
|
|
1819
|
+
BuildingID(
|
|
1820
|
+
bldg_id=67, release_year="2024", res_com="comstock", weather="tmy3", upgrade_id="0", release_number="2"
|
|
1821
|
+
),
|
|
1822
|
+
]
|
|
1823
|
+
file_type = ("weather",)
|
|
1824
|
+
output_dir = Path("data")
|
|
1825
|
+
weather_states: list[str] = []
|
|
1826
|
+
downloaded_paths, failed_downloads = fetch_bldg_data(bldg_ids, file_type, output_dir, weather_states=weather_states)
|
|
1827
|
+
print(downloaded_paths)
|
|
1828
|
+
print(failed_downloads)
|