buildstock-fetch 1.1.0__py3-none-any.whl → 1.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of buildstock-fetch might be problematic. Click here for more details.

buildstock_fetch/main.py CHANGED
@@ -71,6 +71,12 @@ class UnknownAggregationFunctionError(ValueError):
71
71
  pass
72
72
 
73
73
 
74
+ class NoWeatherFileError(ValueError):
75
+ """Raised when weather file is not available for a release."""
76
+
77
+ pass
78
+
79
+
74
80
  METADATA_DIR = Path(
75
81
  str(files("buildstock_fetch").joinpath("data").joinpath("building_data").joinpath("combined_metadata.parquet"))
76
82
  )
@@ -83,6 +89,7 @@ LOAD_CURVE_COLUMN_AGGREGATION = Path(
83
89
  .joinpath("2024_resstock_load_curve_columns.csv")
84
90
  )
85
91
  )
92
+ WEATHER_FILE_DIR = Path(str(files("buildstock_fetch").joinpath("data").joinpath("weather_station_map")))
86
93
 
87
94
 
88
95
  @dataclass
@@ -95,6 +102,7 @@ class RequestedFileTypes:
95
102
  load_curve_daily: bool = False
96
103
  load_curve_monthly: bool = False
97
104
  load_curve_annual: bool = False
105
+ weather: bool = False
98
106
 
99
107
 
100
108
  @dataclass
@@ -274,6 +282,85 @@ class BuildingID:
274
282
  else:
275
283
  return ""
276
284
 
285
+ def get_weather_file_url(self) -> str:
286
+ """Generate the S3 download URL for this building."""
287
+ if self.get_weather_station_name() == "":
288
+ return ""
289
+ return self._build_weather_url()
290
+
291
+ def _build_weather_url(self) -> str:
292
+ """Build the weather file URL based on release year and weather type."""
293
+ if self.release_year == "2021":
294
+ return self._build_2021_weather_url()
295
+ elif self.release_year == "2022":
296
+ return self._build_2022_weather_url()
297
+ elif self.release_year == "2023":
298
+ return self._build_2023_weather_url()
299
+ elif self.release_year == "2024":
300
+ return self._build_2024_weather_url()
301
+ elif self.release_year == "2025":
302
+ return self._build_2025_weather_url()
303
+ else:
304
+ return ""
305
+
306
+ def _build_2021_weather_url(self) -> str:
307
+ """Build weather URL for 2021 release."""
308
+ if self.weather == "tmy3":
309
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_tmy3.csv"
310
+ elif self.weather == "amy2018":
311
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
312
+ elif self.weather == "amy2012":
313
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2012.csv"
314
+ else:
315
+ return ""
316
+
317
+ def _build_2022_weather_url(self) -> str:
318
+ """Build weather URL for 2022 release."""
319
+ if self.weather == "tmy3":
320
+ return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_TMY3.csv"
321
+ elif self.weather == "amy2018":
322
+ return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2018.csv"
323
+ elif self.weather == "amy2012":
324
+ return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2012.csv"
325
+ else:
326
+ return ""
327
+
328
+ def _build_2023_weather_url(self) -> str:
329
+ """Build weather URL for 2023 release."""
330
+ if self.weather == "tmy3":
331
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_TMY3.csv"
332
+ elif self.weather == "amy2018":
333
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
334
+ elif self.weather == "amy2012":
335
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2012.csv"
336
+ else:
337
+ return ""
338
+
339
+ def _build_2024_weather_url(self) -> str:
340
+ """Build weather URL for 2024 release."""
341
+ if self.res_com == "comstock" and self.weather == "amy2018":
342
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
343
+ else:
344
+ if self.weather == "tmy3":
345
+ return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_TMY3.csv"
346
+ elif self.weather == "amy2018":
347
+ return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2018.csv"
348
+ elif self.weather == "amy2012":
349
+ return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2012.csv"
350
+ else:
351
+ return ""
352
+
353
+ def _build_2025_weather_url(self) -> str:
354
+ """Build weather URL for 2025 release."""
355
+ if self.weather == "tmy3":
356
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_TMY3.csv"
357
+ elif self.weather == "amy2018":
358
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
359
+ elif self.weather == "amy2012":
360
+ return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2012.csv"
361
+ else:
362
+ return ""
363
+
277
364
  def get_annual_load_curve_filename(self) -> str:
278
365
  """Generate the filename for the annual load curve."""
279
366
  if self.release_year == "2021":
@@ -303,6 +390,28 @@ class BuildingID:
303
390
  else:
304
391
  return ""
305
392
 
393
+ def get_weather_station_name(self) -> str:
394
+ """Get the weather station name for this building."""
395
+ weather_map_df = pl.read_parquet(WEATHER_FILE_DIR)
396
+
397
+ # Filter by multiple fields for a more specific match
398
+ weather_station_map = weather_map_df.filter(
399
+ (pl.col("product") == self.res_com)
400
+ & (pl.col("release_year") == self.release_year)
401
+ & (pl.col("weather_file") == self.weather)
402
+ & (pl.col("release_version") == self.release_number)
403
+ & (pl.col("bldg_id") == self.bldg_id)
404
+ )
405
+
406
+ # Check if we found a match
407
+ if weather_station_map.height > 0:
408
+ # Return the weather station name from the first (and should be only) match
409
+ weather_station_name = weather_station_map.select("weather_station_name").item()
410
+ return str(weather_station_name) if weather_station_name is not None else ""
411
+ else:
412
+ # No match found, return empty string
413
+ return ""
414
+
306
415
  def _build_annual_load_state_url(self) -> str:
307
416
  """Build the state-level URL for annual load curve data.
308
417
 
@@ -430,6 +539,11 @@ def _validate_release_name(release_name: str) -> bool:
430
539
  return release_name in valid_release_names
431
540
 
432
541
 
542
+ def _resolve_unique_metadata_urls(bldg_ids: list[BuildingID]) -> list[str]:
543
+ """Resolve the unique metadata URLs for a list of building IDs."""
544
+ return list({bldg_id.get_metadata_url() for bldg_id in bldg_ids})
545
+
546
+
433
547
  def fetch_bldg_ids(
434
548
  product: str, release_year: str, weather_file: str, release_version: str, state: str, upgrade_id: str
435
549
  ) -> list[BuildingID]:
@@ -499,13 +613,13 @@ def fetch_bldg_ids(
499
613
  def _download_with_progress(url: str, output_file: Path, progress: Progress, task_id: TaskID) -> int:
500
614
  """Download a file with progress tracking."""
501
615
  # Get file size first
502
- response = requests.head(url, timeout=30)
616
+ response = requests.head(url, timeout=30, verify=True)
503
617
  response.raise_for_status()
504
618
  total_size = int(response.headers.get("content-length", 0))
505
619
  progress.update(task_id, total=total_size)
506
620
 
507
621
  # Download with streaming
508
- response = requests.get(url, stream=True, timeout=30)
622
+ response = requests.get(url, stream=True, timeout=30, verify=True)
509
623
  response.raise_for_status()
510
624
 
511
625
  downloaded_size = 0
@@ -522,6 +636,65 @@ def _download_with_progress(url: str, output_file: Path, progress: Progress, tas
522
636
  return downloaded_size
523
637
 
524
638
 
639
+ def _download_with_progress_metadata(url: str, output_file: Path, progress: Progress, task_id: TaskID) -> int:
640
+ """Download a metadata file with progress tracking and append to existing file if it exists."""
641
+ # Get file size first
642
+ response = requests.head(url, timeout=30, verify=True)
643
+ response.raise_for_status()
644
+ total_size = int(response.headers.get("content-length", 0))
645
+ progress.update(task_id, total=total_size)
646
+
647
+ # Download with streaming
648
+ response = requests.get(url, stream=True, timeout=30, verify=True)
649
+ response.raise_for_status()
650
+
651
+ downloaded_size = 0
652
+
653
+ # Check if output file already exists
654
+ if output_file.exists():
655
+ # Read existing parquet file
656
+ existing_df = pl.read_parquet(output_file)
657
+
658
+ # Download new data to temporary file
659
+ with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as temp_file:
660
+ temp_path = Path(temp_file.name)
661
+
662
+ try:
663
+ # Download to temp file
664
+ with open(temp_path, "wb") as file:
665
+ for chunk in response.iter_content(chunk_size=8192):
666
+ if chunk:
667
+ file.write(chunk)
668
+ downloaded_size += len(chunk)
669
+ if total_size > 0:
670
+ progress.update(task_id, completed=downloaded_size)
671
+
672
+ # Read new data
673
+ new_df = pl.read_parquet(temp_path)
674
+
675
+ # Concatenate existing and new data, removing duplicates
676
+ combined_df = pl.concat([existing_df, new_df]).unique()
677
+
678
+ # Write combined data back to original file
679
+ combined_df.write_parquet(output_file)
680
+
681
+ finally:
682
+ # Clean up temp file
683
+ if temp_path.exists():
684
+ temp_path.unlink()
685
+ else:
686
+ # File doesn't exist, download normally
687
+ with open(str(output_file), "wb") as file:
688
+ for chunk in response.iter_content(chunk_size=8192):
689
+ if chunk:
690
+ file.write(chunk)
691
+ downloaded_size += len(chunk)
692
+ if total_size > 0:
693
+ progress.update(task_id, completed=downloaded_size)
694
+
695
+ return downloaded_size
696
+
697
+
525
698
  def _get_time_step_grouping_key(aggregate_time_step: str) -> tuple[str, str]:
526
699
  """Get the grouping key and format string for a given time step.
527
700
 
@@ -618,7 +791,7 @@ def _download_and_process_aggregate(
618
791
  ) -> int:
619
792
  """Download aggregate time step load curve to temporary file, process with Polars, and save result."""
620
793
  # Get file size first for progress tracking
621
- response = requests.head(url, timeout=30)
794
+ response = requests.head(url, timeout=30, verify=True)
622
795
  response.raise_for_status()
623
796
  total_size = int(response.headers.get("content-length", 0))
624
797
  progress.update(task_id, total=total_size)
@@ -635,7 +808,7 @@ def _download_and_process_aggregate(
635
808
  session.mount("https://", retry_strategy)
636
809
 
637
810
  # Download with streaming to temp file
638
- response = session.get(url, stream=True, timeout=60)
811
+ response = session.get(url, stream=True, timeout=60, verify=True)
639
812
  response.raise_for_status()
640
813
 
641
814
  downloaded_size = 0
@@ -706,7 +879,7 @@ def download_bldg_data(
706
879
  if progress and task_id is not None:
707
880
  _download_with_progress(download_url, output_file, progress, task_id)
708
881
  else:
709
- response = requests.get(download_url, timeout=30)
882
+ response = requests.get(download_url, timeout=30, verify=True)
710
883
  response.raise_for_status()
711
884
  output_file.write_bytes(response.content)
712
885
 
@@ -763,33 +936,6 @@ def download_bldg_data(
763
936
  return downloaded_paths
764
937
 
765
938
 
766
- def download_metadata(bldg_id: BuildingID, output_dir: Path) -> Path:
767
- """Download the metadata for a given building.
768
-
769
- Args:
770
- bldg_id: A BuildingID object to download metadata for.
771
- output_dir: Directory to save the downloaded metadata.
772
- """
773
-
774
- download_url = bldg_id.get_metadata_url()
775
- if download_url == "":
776
- message = f"Metadata is not available for {bldg_id.get_release_name()}"
777
- raise NoMetadataError(message)
778
- response = requests.get(download_url, timeout=30)
779
- response.raise_for_status()
780
- output_file = (
781
- output_dir
782
- / bldg_id.get_release_name()
783
- / "metadata"
784
- / f"state={bldg_id.state}"
785
- / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
786
- / "metadata.parquet"
787
- )
788
- output_file.parent.mkdir(parents=True, exist_ok=True)
789
- output_file.write_bytes(response.content)
790
- return output_file
791
-
792
-
793
939
  def download_15min_load_curve(bldg_id: BuildingID, output_dir: Path) -> Path:
794
940
  """Download the 15 min load profile timeseries for a given building.
795
941
 
@@ -802,7 +948,7 @@ def download_15min_load_curve(bldg_id: BuildingID, output_dir: Path) -> Path:
802
948
  if download_url == "":
803
949
  message = f"15 min load profile timeseries is not available for {bldg_id.get_release_name()}"
804
950
  raise No15minLoadCurveError(message)
805
- response = requests.get(download_url, timeout=30)
951
+ response = requests.get(download_url, timeout=30, verify=True)
806
952
  response.raise_for_status()
807
953
  output_file = (
808
954
  output_dir
@@ -850,7 +996,7 @@ def download_15min_load_curve_with_progress(
850
996
  if progress and task_id is not None:
851
997
  _download_with_progress(download_url, output_file, progress, task_id)
852
998
  else:
853
- response = requests.get(download_url, timeout=30)
999
+ response = requests.get(download_url, timeout=30, verify=True)
854
1000
  response.raise_for_status()
855
1001
  output_file.write_bytes(response.content)
856
1002
 
@@ -900,7 +1046,7 @@ def download_aggregate_time_step_load_curve_with_progress(
900
1046
  with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as temp_file:
901
1047
  temp_path = Path(temp_file.name)
902
1048
  try:
903
- response = requests.get(download_url, timeout=30)
1049
+ response = requests.get(download_url, timeout=30, verify=True)
904
1050
  response.raise_for_status()
905
1051
  temp_path.write_bytes(response.content)
906
1052
 
@@ -936,9 +1082,38 @@ def _parse_requested_file_type(file_type: tuple[str, ...]) -> RequestedFileTypes
936
1082
  file_type_obj.load_curve_monthly = True
937
1083
  if "load_curve_annual" in file_type:
938
1084
  file_type_obj.load_curve_annual = True
1085
+ if "weather" in file_type:
1086
+ file_type_obj.weather = True
939
1087
  return file_type_obj
940
1088
 
941
1089
 
1090
+ def _process_metadata_results(bldg_ids: list[BuildingID], output_dir: Path, downloaded_paths: list[Path]) -> None:
1091
+ """Process the results of a completed metadata download."""
1092
+ metadata_to_bldg_id_mapping: dict[Path, list[int]] = {}
1093
+ for bldg_id in bldg_ids:
1094
+ output_file = (
1095
+ output_dir
1096
+ / bldg_id.get_release_name()
1097
+ / "metadata"
1098
+ / f"state={bldg_id.state}"
1099
+ / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
1100
+ / "metadata.parquet"
1101
+ )
1102
+ if output_file in downloaded_paths:
1103
+ if output_file in metadata_to_bldg_id_mapping:
1104
+ metadata_to_bldg_id_mapping[output_file].append(bldg_id.bldg_id)
1105
+ else:
1106
+ metadata_to_bldg_id_mapping[output_file] = [bldg_id.bldg_id]
1107
+
1108
+ for metadata_file, bldg_id_list in metadata_to_bldg_id_mapping.items():
1109
+ # Use scan_parquet for lazy evaluation and better memory efficiency
1110
+ metadata_df_filtered = pl.scan_parquet(metadata_file).filter(pl.col("bldg_id").is_in(bldg_id_list)).collect()
1111
+ # Write the filtered dataframe back to the same file
1112
+ metadata_df_filtered.write_parquet(metadata_file)
1113
+
1114
+ return
1115
+
1116
+
942
1117
  def _process_download_results(
943
1118
  future: concurrent.futures.Future,
944
1119
  bldg_id: BuildingID,
@@ -965,37 +1140,73 @@ def _process_download_results(
965
1140
  console.print(f"[red]Download failed for bldg_id {bldg_id}: {e}[/red]")
966
1141
 
967
1142
 
968
- def _download_metadata_with_progress(bldg: BuildingID, output_dir: Path, progress: Progress) -> Path:
1143
+ def _download_metadata_with_progress(
1144
+ bldg_ids: list[BuildingID],
1145
+ output_dir: Path,
1146
+ progress: Progress,
1147
+ downloaded_paths: list[Path],
1148
+ failed_downloads: list[str],
1149
+ console: Console,
1150
+ ) -> tuple[list[Path], list[str]]:
969
1151
  """Download metadata file with progress tracking."""
970
- download_url = bldg.get_metadata_url()
971
- if download_url == "":
972
- message = f"Metadata is not available for {bldg.get_release_name()}"
973
- raise NoMetadataError(message)
1152
+ metadata_urls = _resolve_unique_metadata_urls(bldg_ids)
1153
+ downloaded_urls: list[str] = []
1154
+ for bldg_id in bldg_ids:
1155
+ output_file = (
1156
+ output_dir
1157
+ / bldg_id.get_release_name()
1158
+ / "metadata"
1159
+ / f"state={bldg_id.state}"
1160
+ / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
1161
+ / "metadata.parquet"
1162
+ )
1163
+ download_url = bldg_id.get_metadata_url()
1164
+ if download_url == "":
1165
+ failed_downloads.append(str(output_file))
1166
+ continue
1167
+ if download_url in downloaded_urls:
1168
+ continue
1169
+ downloaded_urls.append(download_url)
1170
+ if download_url in metadata_urls:
1171
+ metadata_urls.remove(download_url)
1172
+ metadata_task = progress.add_task(
1173
+ f"[yellow]Downloading metadata: {download_url}",
1174
+ total=0, # Will be updated when we get the file size
1175
+ )
1176
+ # Get file size first
1177
+ response = requests.head(download_url, timeout=30)
1178
+ response.raise_for_status()
1179
+ total_size = int(response.headers.get("content-length", 0))
1180
+ progress.update(metadata_task, total=total_size)
974
1181
 
975
- # Create metadata task with progress tracking
976
- metadata_task = progress.add_task(
977
- "[yellow]Downloading metadata",
978
- total=0, # Will be updated when we get the file size
979
- )
1182
+ output_file.parent.mkdir(parents=True, exist_ok=True)
1183
+ try:
1184
+ _download_with_progress_metadata(download_url, output_file, progress, metadata_task)
1185
+ downloaded_paths.append(output_file)
1186
+ except Exception as e:
1187
+ failed_downloads.append(str(output_file))
1188
+ console.print(f"[red]Download failed for metadata {bldg_id.bldg_id}: {e}[/red]")
1189
+
1190
+ return downloaded_paths, failed_downloads
980
1191
 
981
- # Get file size first
982
- response = requests.head(download_url, timeout=30)
983
- response.raise_for_status()
984
- total_size = int(response.headers.get("content-length", 0))
985
- progress.update(metadata_task, total=total_size)
986
1192
 
987
- # Download with progress
1193
+ def download_weather_file_with_progress(
1194
+ bldg_id: BuildingID, output_dir: Path, progress: Progress, task_id: TaskID
1195
+ ) -> Path:
1196
+ """Download weather file with progress tracking."""
1197
+ download_url = bldg_id.get_weather_file_url()
1198
+ if download_url == "":
1199
+ raise NoWeatherFileError()
988
1200
  output_file = (
989
1201
  output_dir
990
- / bldg.get_release_name()
991
- / "metadata"
992
- / f"state={bldg.state}"
993
- / f"upgrade={str(int(bldg.upgrade_id)).zfill(2)}"
994
- / "metadata.parquet"
1202
+ / bldg_id.get_release_name()
1203
+ / "weather"
1204
+ / f"state={bldg_id.state}"
1205
+ / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
1206
+ / f"{bldg_id.get_weather_station_name()}.csv"
995
1207
  )
996
1208
  output_file.parent.mkdir(parents=True, exist_ok=True)
997
- _download_with_progress(download_url, output_file, progress, metadata_task)
998
-
1209
+ _download_with_progress(download_url, output_file, progress, task_id)
999
1210
  return output_file
1000
1211
 
1001
1212
 
@@ -1244,19 +1455,19 @@ def _download_aggregate_load_curves_parallel(
1244
1455
  )
1245
1456
 
1246
1457
 
1247
- def _download_metadata_single(
1458
+ def _download_metadata(
1248
1459
  bldg_ids: list[BuildingID],
1249
1460
  output_dir: Path,
1250
1461
  progress: Progress,
1251
1462
  downloaded_paths: list[Path],
1463
+ failed_downloads: list[str],
1464
+ console: Console,
1252
1465
  ) -> None:
1253
1466
  """Download metadata file (only one needed per release)."""
1254
1467
  if not bldg_ids:
1255
1468
  return
1256
-
1257
- bldg = bldg_ids[0]
1258
- metadata_file = _download_metadata_with_progress(bldg, output_dir, progress)
1259
- downloaded_paths.append(metadata_file)
1469
+ _download_metadata_with_progress(bldg_ids, output_dir, progress, downloaded_paths, failed_downloads, console)
1470
+ _process_metadata_results(bldg_ids, output_dir, downloaded_paths)
1260
1471
 
1261
1472
 
1262
1473
  def download_annual_load_curve_with_progress(
@@ -1302,7 +1513,7 @@ def download_annual_load_curve_with_progress(
1302
1513
  if progress and task_id is not None:
1303
1514
  _download_with_progress(download_url, output_file, progress, task_id)
1304
1515
  else:
1305
- response = requests.get(download_url, timeout=30)
1516
+ response = requests.get(download_url, timeout=30, verify=True)
1306
1517
  response.raise_for_status()
1307
1518
  with open(output_file, "wb") as file:
1308
1519
  file.write(response.content)
@@ -1369,6 +1580,97 @@ def _download_annual_load_curves_parallel(
1369
1580
  console.print(f"[red]Download failed for annual load curve {bldg_id.bldg_id}: {e}[/red]")
1370
1581
 
1371
1582
 
1583
+ def _download_weather_files_parallel(
1584
+ bldg_ids: list[BuildingID],
1585
+ output_dir: Path,
1586
+ max_workers: int,
1587
+ progress: Progress,
1588
+ downloaded_paths: list[Path],
1589
+ failed_downloads: list[str],
1590
+ console: Console,
1591
+ weather_states: Union[list[str], None] = None,
1592
+ ) -> None:
1593
+ """Download weather files in parallel with progress tracking."""
1594
+ # Initialize weather_states to empty list if None
1595
+ if weather_states is None:
1596
+ weather_states = []
1597
+
1598
+ # Break if weather_states is empty
1599
+ if len(weather_states) == 0:
1600
+ for bldg_id in bldg_ids:
1601
+ output_file = (
1602
+ output_dir
1603
+ / bldg_id.get_release_name()
1604
+ / "weather"
1605
+ / f"state={bldg_id.state}"
1606
+ / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
1607
+ / f"{bldg_id.get_weather_station_name()}.csv"
1608
+ )
1609
+ failed_downloads.append(str(output_file))
1610
+ console.print(f"[red]Weather file not available for {bldg_id.get_release_name()}[/red]")
1611
+ return
1612
+ # Create progress tasks for weather file downloads
1613
+ weather_file_tasks = {}
1614
+ for i, bldg_id in enumerate(bldg_ids):
1615
+ if bldg_id.state in weather_states:
1616
+ task_id = progress.add_task(
1617
+ f"[magenta]Weather file {bldg_id.bldg_id} (upgrade {bldg_id.upgrade_id})",
1618
+ total=0, # Will be updated when we get the file size
1619
+ )
1620
+ weather_file_tasks[i] = task_id
1621
+ else:
1622
+ output_file = (
1623
+ output_dir
1624
+ / bldg_id.get_release_name()
1625
+ / "weather"
1626
+ / f"state={bldg_id.state}"
1627
+ / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
1628
+ / f"{bldg_id.get_weather_station_name()}.csv"
1629
+ )
1630
+ failed_downloads.append(str(output_file))
1631
+ console.print(f"[red]Weather file not available for {bldg_id.get_release_name()}[/red]")
1632
+
1633
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
1634
+ # Create a modified version of the download function that uses the specific task IDs
1635
+ def download_weather_file_with_task_id(bldg_id: BuildingID, output_dir: Path, task_id: TaskID) -> Path:
1636
+ return download_weather_file_with_progress(bldg_id, output_dir, progress, task_id)
1637
+
1638
+ future_to_bldg = {
1639
+ executor.submit(download_weather_file_with_task_id, bldg_id, output_dir, weather_file_tasks[i]): bldg_id
1640
+ for i, bldg_id in enumerate(bldg_ids)
1641
+ }
1642
+
1643
+ for future in concurrent.futures.as_completed(future_to_bldg):
1644
+ bldg_id = future_to_bldg[future]
1645
+ try:
1646
+ output_file = future.result()
1647
+ downloaded_paths.append(output_file)
1648
+ except NoWeatherFileError:
1649
+ output_file = (
1650
+ output_dir
1651
+ / bldg_id.get_release_name()
1652
+ / "weather"
1653
+ / f"state={bldg_id.state}"
1654
+ / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
1655
+ / f"{bldg_id.get_weather_station_name()}.csv"
1656
+ )
1657
+ failed_downloads.append(str(output_file))
1658
+ console.print(f"[red]Weather file not available for {bldg_id.get_release_name()}[/red]")
1659
+ raise
1660
+ except Exception as e:
1661
+ output_file = (
1662
+ output_dir
1663
+ / bldg_id.get_release_name()
1664
+ / "weather"
1665
+ / f"state={bldg_id.state}"
1666
+ / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
1667
+ / f"{bldg_id.get_weather_station_name()}.csv"
1668
+ )
1669
+ failed_downloads.append(str(output_file))
1670
+ console.print(f"[red]Download failed for weather file {bldg_id.bldg_id}: {e}[/red]")
1671
+ raise
1672
+
1673
+
1372
1674
  def _print_download_summary(downloaded_paths: list[Path], failed_downloads: list[str], console: Console) -> None:
1373
1675
  """Print a summary of the download results."""
1374
1676
  console.print("\n[bold green]Download complete![/bold green]")
@@ -1380,7 +1682,11 @@ def _print_download_summary(downloaded_paths: list[Path], failed_downloads: list
1380
1682
 
1381
1683
 
1382
1684
  def fetch_bldg_data(
1383
- bldg_ids: list[BuildingID], file_type: tuple[str, ...], output_dir: Path, max_workers: int = 5
1685
+ bldg_ids: list[BuildingID],
1686
+ file_type: tuple[str, ...],
1687
+ output_dir: Path,
1688
+ max_workers: int = 5,
1689
+ weather_states: Union[list[str], None] = None,
1384
1690
  ) -> tuple[list[Path], list[str]]:
1385
1691
  """Download building data for a given list of building ids
1386
1692
 
@@ -1395,19 +1701,27 @@ def fetch_bldg_data(
1395
1701
  file_type_obj = _parse_requested_file_type(file_type)
1396
1702
  console = Console()
1397
1703
 
1704
+ # Initialize weather_states to empty list if None
1705
+ if weather_states is None:
1706
+ weather_states = []
1707
+
1398
1708
  downloaded_paths: list[Path] = []
1399
1709
  failed_downloads: list[str] = []
1400
1710
 
1401
1711
  # Calculate total files to download
1402
1712
  total_files = 0
1403
1713
  if file_type_obj.metadata:
1404
- total_files += 1 # Add metadata file
1714
+ unique_metadata_urls = _resolve_unique_metadata_urls(bldg_ids)
1715
+ total_files += len(unique_metadata_urls) # Add metadata file
1405
1716
  if file_type_obj.load_curve_15min:
1406
1717
  total_files += len(bldg_ids) # Add 15-minute load curve files
1407
1718
  if file_type_obj.load_curve_monthly:
1408
1719
  total_files += len(bldg_ids) # Add 15-minute load curve files
1409
1720
  if file_type_obj.load_curve_annual:
1410
1721
  total_files += len(bldg_ids) # Add annual load curve files
1722
+ if file_type_obj.weather:
1723
+ available_bldg_ids = [bldg_id for bldg_id in bldg_ids if bldg_id.state in weather_states]
1724
+ total_files += len(available_bldg_ids) * len(weather_states) # Add weather map files
1411
1725
 
1412
1726
  console.print(f"\n[bold blue]Starting download of {total_files} files...[/bold blue]")
1413
1727
 
@@ -1425,45 +1739,90 @@ def fetch_bldg_data(
1425
1739
  console=console,
1426
1740
  transient=False,
1427
1741
  ) as progress:
1428
- # Download building data if requested.
1429
- if file_type_obj.hpxml or file_type_obj.schedule:
1430
- _download_building_data_parallel(
1431
- bldg_ids, file_type_obj, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
1432
- )
1742
+ _execute_downloads(
1743
+ file_type_obj,
1744
+ bldg_ids,
1745
+ output_dir,
1746
+ max_workers,
1747
+ progress,
1748
+ downloaded_paths,
1749
+ failed_downloads,
1750
+ console,
1751
+ weather_states,
1752
+ )
1433
1753
 
1434
- # Get metadata if requested. Only one building is needed to get the metadata.
1435
- if file_type_obj.metadata:
1436
- _download_metadata_single(bldg_ids, output_dir, progress, downloaded_paths)
1754
+ _print_download_summary(downloaded_paths, failed_downloads, console)
1437
1755
 
1438
- # Get 15 min load profile timeseries if requested.
1439
- if file_type_obj.load_curve_15min:
1440
- _download_15min_load_curves_parallel(
1441
- bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
1442
- )
1756
+ return downloaded_paths, failed_downloads
1443
1757
 
1444
- if file_type_obj.load_curve_monthly:
1445
- aggregate_time_step = "monthly"
1446
- _download_aggregate_load_curves_parallel(
1447
- bldg_ids,
1448
- output_dir,
1449
- aggregate_time_step,
1450
- max_workers,
1451
- progress,
1452
- downloaded_paths,
1453
- failed_downloads,
1454
- console,
1455
- )
1456
1758
 
1457
- # Get annual load curve if requested.
1458
- if file_type_obj.load_curve_annual:
1459
- _download_annual_load_curves_parallel(
1460
- bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
1461
- )
1759
+ def _execute_downloads(
1760
+ file_type_obj: RequestedFileTypes,
1761
+ bldg_ids: list[BuildingID],
1762
+ output_dir: Path,
1763
+ max_workers: int,
1764
+ progress: Progress,
1765
+ downloaded_paths: list[Path],
1766
+ failed_downloads: list[str],
1767
+ console: Console,
1768
+ weather_states: Union[list[str], None] = None,
1769
+ ) -> None:
1770
+ """Execute all requested downloads based on file type configuration."""
1771
+ # Initialize weather_states to empty list if None
1772
+ if weather_states is None:
1773
+ weather_states = []
1774
+
1775
+ # Download building data if requested.
1776
+ if file_type_obj.hpxml or file_type_obj.schedule:
1777
+ _download_building_data_parallel(
1778
+ bldg_ids, file_type_obj, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
1779
+ )
1462
1780
 
1463
- _print_download_summary(downloaded_paths, failed_downloads, console)
1781
+ # Get metadata if requested. Only one building is needed to get the metadata.
1782
+ if file_type_obj.metadata:
1783
+ _download_metadata(bldg_ids, output_dir, progress, downloaded_paths, failed_downloads, console)
1464
1784
 
1465
- return downloaded_paths, failed_downloads
1785
+ # Get 15 min load profile timeseries if requested.
1786
+ if file_type_obj.load_curve_15min:
1787
+ _download_15min_load_curves_parallel(
1788
+ bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
1789
+ )
1790
+
1791
+ if file_type_obj.load_curve_monthly:
1792
+ aggregate_time_step = "monthly"
1793
+ _download_aggregate_load_curves_parallel(
1794
+ bldg_ids,
1795
+ output_dir,
1796
+ aggregate_time_step,
1797
+ max_workers,
1798
+ progress,
1799
+ downloaded_paths,
1800
+ failed_downloads,
1801
+ console,
1802
+ )
1803
+
1804
+ # Get annual load curve if requested.
1805
+ if file_type_obj.load_curve_annual:
1806
+ _download_annual_load_curves_parallel(
1807
+ bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
1808
+ )
1809
+
1810
+ # Get weather files if requested.
1811
+ if file_type_obj.weather:
1812
+ _download_weather_files_parallel(
1813
+ bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console, weather_states
1814
+ )
1466
1815
 
1467
1816
 
1468
1817
  if __name__ == "__main__": # pragma: no cover
1469
- print(fetch_bldg_ids("comstock", "2021", "tmy3", "1", "MA", "0")[:3])
1818
+ bldg_ids = [
1819
+ BuildingID(
1820
+ bldg_id=67, release_year="2024", res_com="comstock", weather="tmy3", upgrade_id="0", release_number="2"
1821
+ ),
1822
+ ]
1823
+ file_type = ("weather",)
1824
+ output_dir = Path("data")
1825
+ weather_states: list[str] = []
1826
+ downloaded_paths, failed_downloads = fetch_bldg_data(bldg_ids, file_type, output_dir, weather_states=weather_states)
1827
+ print(downloaded_paths)
1828
+ print(failed_downloads)