PyPI - buildstock-fetch - Versions diffs - 1.1.0__py3-none-any.whl → 1.3.0__py3-none-any.whl - Mend - Supply Chain Defender

buildstock-fetch 1.1.0py3-none-any.whl → 1.3.0py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of buildstock-fetch might be problematic. Click here for more details.

Files changed (10) hide show

buildstock_fetch/main.py CHANGED Viewed

@@ -3,6 +3,7 @@ import json
 import tempfile
 import zipfile
 from dataclasses import asdict, dataclass
+from datetime import timedelta
 from importlib.resources import files
 from pathlib import Path
 from typing import Optional, Union
@@ -71,18 +72,18 @@ class UnknownAggregationFunctionError(ValueError):
     pass
+class NoWeatherFileError(ValueError):
+    """Raised when weather file is not available for a release."""
+    pass
 METADATA_DIR = Path(
     str(files("buildstock_fetch").joinpath("data").joinpath("building_data").joinpath("combined_metadata.parquet"))
 )
 RELEASE_JSON_FILE = Path(str(files("buildstock_fetch").joinpath("data").joinpath("buildstock_releases.json")))
-LOAD_CURVE_COLUMN_AGGREGATION = Path(
-    str(
-        files("buildstock_fetch")
-        .joinpath("data")
-        .joinpath("load_curve_column_map")
-        .joinpath("2024_resstock_load_curve_columns.csv")
-    )
-)
+LOAD_CURVE_COLUMN_AGGREGATION = Path(str(files("buildstock_fetch").joinpath("data").joinpath("load_curve_column_map")))
+WEATHER_FILE_DIR = Path(str(files("buildstock_fetch").joinpath("data").joinpath("weather_station_map")))
 @dataclass
@@ -95,6 +96,7 @@ class RequestedFileTypes:
     load_curve_daily: bool = False
     load_curve_monthly: bool = False
     load_curve_annual: bool = False
+    weather: bool = False
 @dataclass
@@ -274,6 +276,85 @@ class BuildingID:
         else:
             return ""
+    def get_weather_file_url(self) -> str:
+        """Generate the S3 download URL for this building."""
+        if self.get_weather_station_name() == "":
+            return ""
+        return self._build_weather_url()
+    def _build_weather_url(self) -> str:
+        """Build the weather file URL based on release year and weather type."""
+        if self.release_year == "2021":
+            return self._build_2021_weather_url()
+        elif self.release_year == "2022":
+            return self._build_2022_weather_url()
+        elif self.release_year == "2023":
+            return self._build_2023_weather_url()
+        elif self.release_year == "2024":
+            return self._build_2024_weather_url()
+        elif self.release_year == "2025":
+            return self._build_2025_weather_url()
+        else:
+            return ""
+    def _build_2021_weather_url(self) -> str:
+        """Build weather URL for 2021 release."""
+        if self.weather == "tmy3":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_tmy3.csv"
+        elif self.weather == "amy2018":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
+        elif self.weather == "amy2012":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2012.csv"
+        else:
+            return ""
+    def _build_2022_weather_url(self) -> str:
+        """Build weather URL for 2022 release."""
+        if self.weather == "tmy3":
+            return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_TMY3.csv"
+        elif self.weather == "amy2018":
+            return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2018.csv"
+        elif self.weather == "amy2012":
+            return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2012.csv"
+        else:
+            return ""
+    def _build_2023_weather_url(self) -> str:
+        """Build weather URL for 2023 release."""
+        if self.weather == "tmy3":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_TMY3.csv"
+        elif self.weather == "amy2018":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
+        elif self.weather == "amy2012":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2012.csv"
+        else:
+            return ""
+    def _build_2024_weather_url(self) -> str:
+        """Build weather URL for 2024 release."""
+        if self.res_com == "comstock" and self.weather == "amy2018":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
+        else:
+            if self.weather == "tmy3":
+                return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_TMY3.csv"
+            elif self.weather == "amy2018":
+                return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2018.csv"
+            elif self.weather == "amy2012":
+                return f"{self.base_url}weather/state={self.state}/{self.get_weather_station_name()}_2012.csv"
+            else:
+                return ""
+    def _build_2025_weather_url(self) -> str:
+        """Build weather URL for 2025 release."""
+        if self.weather == "tmy3":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_TMY3.csv"
+        elif self.weather == "amy2018":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2018.csv"
+        elif self.weather == "amy2012":
+            return f"{self.base_url}weather/{self.weather}/{self.get_weather_station_name()}_2012.csv"
+        else:
+            return ""
     def get_annual_load_curve_filename(self) -> str:
         """Generate the filename for the annual load curve."""
         if self.release_year == "2021":
@@ -303,6 +384,28 @@ class BuildingID:
         else:
             return ""
+    def get_weather_station_name(self) -> str:
+        """Get the weather station name for this building."""
+        weather_map_df = pl.read_parquet(WEATHER_FILE_DIR)
+        # Filter by multiple fields for a more specific match
+        weather_station_map = weather_map_df.filter(
+            (pl.col("product") == self.res_com)
+            & (pl.col("release_year") == self.release_year)
+            & (pl.col("weather_file") == self.weather)
+            & (pl.col("release_version") == self.release_number)
+            & (pl.col("bldg_id") == self.bldg_id)
+        )
+        # Check if we found a match
+        if weather_station_map.height > 0:
+            # Return the weather station name from the first (and should be only) match
+            weather_station_name = weather_station_map.select("weather_station_name").item()
+            return str(weather_station_name) if weather_station_name is not None else ""
+        else:
+            # No match found, return empty string
+            return ""
     def _build_annual_load_state_url(self) -> str:
         """Build the state-level URL for annual load curve data.
@@ -430,6 +533,11 @@ def _validate_release_name(release_name: str) -> bool:
     return release_name in valid_release_names
+def _resolve_unique_metadata_urls(bldg_ids: list[BuildingID]) -> list[str]:
+    """Resolve the unique metadata URLs for a list of building IDs."""
+    return list({bldg_id.get_metadata_url() for bldg_id in bldg_ids})
 def fetch_bldg_ids(
     product: str, release_year: str, weather_file: str, release_version: str, state: str, upgrade_id: str
 ) -> list[BuildingID]:
@@ -499,13 +607,13 @@ def fetch_bldg_ids(
 def _download_with_progress(url: str, output_file: Path, progress: Progress, task_id: TaskID) -> int:
     """Download a file with progress tracking."""
     # Get file size first
-    response = requests.head(url, timeout=30)
+    response = requests.head(url, timeout=30, verify=True)
     response.raise_for_status()
     total_size = int(response.headers.get("content-length", 0))
     progress.update(task_id, total=total_size)
     # Download with streaming
-    response = requests.get(url, stream=True, timeout=30)
+    response = requests.get(url, stream=True, timeout=30, verify=True)
     response.raise_for_status()
     downloaded_size = 0
@@ -522,6 +630,65 @@ def _download_with_progress(url: str, output_file: Path, progress: Progress, tas
     return downloaded_size
+def _download_with_progress_metadata(url: str, output_file: Path, progress: Progress, task_id: TaskID) -> int:
+    """Download a metadata file with progress tracking and append to existing file if it exists."""
+    # Get file size first
+    response = requests.head(url, timeout=30, verify=True)
+    response.raise_for_status()
+    total_size = int(response.headers.get("content-length", 0))
+    progress.update(task_id, total=total_size)
+    # Download with streaming
+    response = requests.get(url, stream=True, timeout=30, verify=True)
+    response.raise_for_status()
+    downloaded_size = 0
+    # Check if output file already exists
+    if output_file.exists():
+        # Read existing parquet file
+        existing_df = pl.read_parquet(output_file)
+        # Download new data to temporary file
+        with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as temp_file:
+            temp_path = Path(temp_file.name)
+            try:
+                # Download to temp file
+                with open(temp_path, "wb") as file:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        if chunk:
+                            file.write(chunk)
+                            downloaded_size += len(chunk)
+                            if total_size > 0:
+                                progress.update(task_id, completed=downloaded_size)
+                # Read new data
+                new_df = pl.read_parquet(temp_path)
+                # Concatenate existing and new data, removing duplicates
+                combined_df = pl.concat([existing_df, new_df]).unique()
+                # Write combined data back to original file
+                combined_df.write_parquet(output_file)
+            finally:
+                # Clean up temp file
+                if temp_path.exists():
+                    temp_path.unlink()
+    else:
+        # File doesn't exist, download normally
+        with open(str(output_file), "wb") as file:
+            for chunk in response.iter_content(chunk_size=8192):
+                if chunk:
+                    file.write(chunk)
+                    downloaded_size += len(chunk)
+                    if total_size > 0:
+                        progress.update(task_id, completed=downloaded_size)
+    return downloaded_size
 def _get_time_step_grouping_key(aggregate_time_step: str) -> tuple[str, str]:
     """Get the grouping key and format string for a given time step.
@@ -579,10 +746,19 @@ def _create_aggregation_expressions(load_curve: pl.DataFrame, column_aggregation
     return agg_exprs
-def _aggregate_load_curve_aggregate(load_curve: pl.DataFrame, aggregate_time_step: str) -> pl.DataFrame:
-    """Aggregate the 15-minute load curve to specified time step based on aggregation rules."""
+def _aggregate_load_curve_aggregate(
+    load_curve: pl.DataFrame, aggregate_time_step: str, release_year: str
+) -> pl.DataFrame:
+    """Aggregate the 15-minute load curve to specified time step based on aggregation rules.
+    Removes the last row to ensure complete aggregation periods.
+    """
     # Read the aggregation rules from CSV
-    aggregation_rules = pl.read_csv(LOAD_CURVE_COLUMN_AGGREGATION)
+    if release_year == "2024":
+        load_curve_map = LOAD_CURVE_COLUMN_AGGREGATION.joinpath("2024_resstock_load_curve_columns.csv")
+    elif release_year == "2022":
+        load_curve_map = LOAD_CURVE_COLUMN_AGGREGATION.joinpath("2022_resstock_load_curve_columns.csv")
+    aggregation_rules = pl.read_csv(load_curve_map)
     # Create a dictionary mapping column names to their aggregation functions
     column_aggregations = dict(zip(aggregation_rules["name"], aggregation_rules["Aggregate_function"]))
@@ -595,6 +771,13 @@ def _aggregate_load_curve_aggregate(load_curve: pl.DataFrame, aggregate_time_ste
     # Convert timestamp to datetime if it's not already
     load_curve = load_curve.with_columns(pl.col("timestamp").cast(pl.Datetime))
+    # We want to subtract 15 minutes because the original load curve provides information
+    # for the previous 15 minutes for each timestamp. For example, the first timestamp is 00:00:15,
+    # and the columns correspond to consumption from 00:00:00 to 00:00:15. When aggregating,
+    # we want the 00:00:00 timestamp to correspond to the consumption from 00:00:00 to whenever the
+    # next timestamp is.
+    load_curve = load_curve.with_columns((pl.col("timestamp") - timedelta(minutes=15)).alias("timestamp"))
     # Get the grouping key configuration
     grouping_key, format_string = _get_time_step_grouping_key(aggregate_time_step)
@@ -614,11 +797,11 @@ def _aggregate_load_curve_aggregate(load_curve: pl.DataFrame, aggregate_time_ste
 def _download_and_process_aggregate(
-    url: str, output_file: Path, progress: Progress, task_id: TaskID, aggregate_time_step: str
+    url: str, output_file: Path, progress: Progress, task_id: TaskID, aggregate_time_step: str, release_year: str
 ) -> int:
     """Download aggregate time step load curve to temporary file, process with Polars, and save result."""
     # Get file size first for progress tracking
-    response = requests.head(url, timeout=30)
+    response = requests.head(url, timeout=30, verify=True)
     response.raise_for_status()
     total_size = int(response.headers.get("content-length", 0))
     progress.update(task_id, total=total_size)
@@ -635,7 +818,7 @@ def _download_and_process_aggregate(
             session.mount("https://", retry_strategy)
             # Download with streaming to temp file
-            response = session.get(url, stream=True, timeout=60)
+            response = session.get(url, stream=True, timeout=60, verify=True)
             response.raise_for_status()
             downloaded_size = 0
@@ -649,7 +832,7 @@ def _download_and_process_aggregate(
             # Process with Polars
             load_curve_15min = pl.read_parquet(temp_path)
-            load_curve_aggregate = _aggregate_load_curve_aggregate(load_curve_15min, aggregate_time_step)
+            load_curve_aggregate = _aggregate_load_curve_aggregate(load_curve_15min, aggregate_time_step, release_year)
             # Save processed file to final destination
             load_curve_aggregate.write_parquet(output_file)
@@ -706,7 +889,7 @@ def download_bldg_data(
         if progress and task_id is not None:
             _download_with_progress(download_url, output_file, progress, task_id)
         else:
-            response = requests.get(download_url, timeout=30)
+            response = requests.get(download_url, timeout=30, verify=True)
             response.raise_for_status()
             output_file.write_bytes(response.content)
@@ -763,33 +946,6 @@ def download_bldg_data(
     return downloaded_paths
-def download_metadata(bldg_id: BuildingID, output_dir: Path) -> Path:
-    """Download the metadata for a given building.
-    Args:
-        bldg_id: A BuildingID object to download metadata for.
-        output_dir: Directory to save the downloaded metadata.
-    """
-    download_url = bldg_id.get_metadata_url()
-    if download_url == "":
-        message = f"Metadata is not available for {bldg_id.get_release_name()}"
-        raise NoMetadataError(message)
-    response = requests.get(download_url, timeout=30)
-    response.raise_for_status()
-    output_file = (
-        output_dir
-        / bldg_id.get_release_name()
-        / "metadata"
-        / f"state={bldg_id.state}"
-        / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
-        / "metadata.parquet"
-    )
-    output_file.parent.mkdir(parents=True, exist_ok=True)
-    output_file.write_bytes(response.content)
-    return output_file
 def download_15min_load_curve(bldg_id: BuildingID, output_dir: Path) -> Path:
     """Download the 15 min load profile timeseries for a given building.
@@ -802,7 +958,7 @@ def download_15min_load_curve(bldg_id: BuildingID, output_dir: Path) -> Path:
     if download_url == "":
         message = f"15 min load profile timeseries is not available for {bldg_id.get_release_name()}"
         raise No15minLoadCurveError(message)
-    response = requests.get(download_url, timeout=30)
+    response = requests.get(download_url, timeout=30, verify=True)
     response.raise_for_status()
     output_file = (
         output_dir
@@ -850,7 +1006,7 @@ def download_15min_load_curve_with_progress(
     if progress and task_id is not None:
         _download_with_progress(download_url, output_file, progress, task_id)
     else:
-        response = requests.get(download_url, timeout=30)
+        response = requests.get(download_url, timeout=30, verify=True)
         response.raise_for_status()
         output_file.write_bytes(response.content)
@@ -894,19 +1050,23 @@ def download_aggregate_time_step_load_curve_with_progress(
     # Download with progress tracking if progress object is provided
     if progress and task_id is not None:
-        _download_and_process_aggregate(download_url, output_file, progress, task_id, aggregate_time_step)
+        _download_and_process_aggregate(
+            download_url, output_file, progress, task_id, aggregate_time_step, bldg_id.release_year
+        )
     else:
         # For non-progress downloads, still use temp file approach for consistency
         with tempfile.NamedTemporaryFile(delete=False, suffix=".parquet") as temp_file:
             temp_path = Path(temp_file.name)
             try:
-                response = requests.get(download_url, timeout=30)
+                response = requests.get(download_url, timeout=30, verify=True)
                 response.raise_for_status()
                 temp_path.write_bytes(response.content)
                 # Process with Polars
                 load_curve_15min = pl.read_parquet(temp_path)
-                load_curve_aggregate = _aggregate_load_curve_aggregate(load_curve_15min, aggregate_time_step)
+                load_curve_aggregate = _aggregate_load_curve_aggregate(
+                    load_curve_15min, aggregate_time_step, bldg_id.release_year
+                )
                 # Save processed file to final destination
                 load_curve_aggregate.write_parquet(output_file)
@@ -936,9 +1096,38 @@ def _parse_requested_file_type(file_type: tuple[str, ...]) -> RequestedFileTypes
         file_type_obj.load_curve_monthly = True
     if "load_curve_annual" in file_type:
         file_type_obj.load_curve_annual = True
+    if "weather" in file_type:
+        file_type_obj.weather = True
     return file_type_obj
+def _process_metadata_results(bldg_ids: list[BuildingID], output_dir: Path, downloaded_paths: list[Path]) -> None:
+    """Process the results of a completed metadata download."""
+    metadata_to_bldg_id_mapping: dict[Path, list[int]] = {}
+    for bldg_id in bldg_ids:
+        output_file = (
+            output_dir
+            / bldg_id.get_release_name()
+            / "metadata"
+            / f"state={bldg_id.state}"
+            / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
+            / "metadata.parquet"
+        )
+        if output_file in downloaded_paths:
+            if output_file in metadata_to_bldg_id_mapping:
+                metadata_to_bldg_id_mapping[output_file].append(bldg_id.bldg_id)
+            else:
+                metadata_to_bldg_id_mapping[output_file] = [bldg_id.bldg_id]
+    for metadata_file, bldg_id_list in metadata_to_bldg_id_mapping.items():
+        # Use scan_parquet for lazy evaluation and better memory efficiency
+        metadata_df_filtered = pl.scan_parquet(metadata_file).filter(pl.col("bldg_id").is_in(bldg_id_list)).collect()
+        # Write the filtered dataframe back to the same file
+        metadata_df_filtered.write_parquet(metadata_file)
+    return
 def _process_download_results(
     future: concurrent.futures.Future,
     bldg_id: BuildingID,
@@ -965,37 +1154,73 @@ def _process_download_results(
         console.print(f"[red]Download failed for bldg_id {bldg_id}: {e}[/red]")
-def _download_metadata_with_progress(bldg: BuildingID, output_dir: Path, progress: Progress) -> Path:
+def _download_metadata_with_progress(
+    bldg_ids: list[BuildingID],
+    output_dir: Path,
+    progress: Progress,
+    downloaded_paths: list[Path],
+    failed_downloads: list[str],
+    console: Console,
+) -> tuple[list[Path], list[str]]:
     """Download metadata file with progress tracking."""
-    download_url = bldg.get_metadata_url()
-    if download_url == "":
-        message = f"Metadata is not available for {bldg.get_release_name()}"
-        raise NoMetadataError(message)
+    metadata_urls = _resolve_unique_metadata_urls(bldg_ids)
+    downloaded_urls: list[str] = []
+    for bldg_id in bldg_ids:
+        output_file = (
+            output_dir
+            / bldg_id.get_release_name()
+            / "metadata"
+            / f"state={bldg_id.state}"
+            / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
+            / "metadata.parquet"
+        )
+        download_url = bldg_id.get_metadata_url()
+        if download_url == "":
+            failed_downloads.append(str(output_file))
+            continue
+        if download_url in downloaded_urls:
+            continue
+        downloaded_urls.append(download_url)
+        if download_url in metadata_urls:
+            metadata_urls.remove(download_url)
+        metadata_task = progress.add_task(
+            f"[yellow]Downloading metadata: {download_url}",
+            total=0,  # Will be updated when we get the file size
+        )
+        # Get file size first
+        response = requests.head(download_url, timeout=30)
+        response.raise_for_status()
+        total_size = int(response.headers.get("content-length", 0))
+        progress.update(metadata_task, total=total_size)
-    # Create metadata task with progress tracking
-    metadata_task = progress.add_task(
-        "[yellow]Downloading metadata",
-        total=0,  # Will be updated when we get the file size
-    )
+        output_file.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            _download_with_progress_metadata(download_url, output_file, progress, metadata_task)
+            downloaded_paths.append(output_file)
+        except Exception as e:
+            failed_downloads.append(str(output_file))
+            console.print(f"[red]Download failed for metadata {bldg_id.bldg_id}: {e}[/red]")
-    # Get file size first
-    response = requests.head(download_url, timeout=30)
-    response.raise_for_status()
-    total_size = int(response.headers.get("content-length", 0))
-    progress.update(metadata_task, total=total_size)
+    return downloaded_paths, failed_downloads
-    # Download with progress
+def download_weather_file_with_progress(
+    bldg_id: BuildingID, output_dir: Path, progress: Progress, task_id: TaskID
+) -> Path:
+    """Download weather file with progress tracking."""
+    download_url = bldg_id.get_weather_file_url()
+    if download_url == "":
+        raise NoWeatherFileError()
     output_file = (
         output_dir
-        / bldg.get_release_name()
-        / "metadata"
-        / f"state={bldg.state}"
-        / f"upgrade={str(int(bldg.upgrade_id)).zfill(2)}"
-        / "metadata.parquet"
+        / bldg_id.get_release_name()
+        / "weather"
+        / f"state={bldg_id.state}"
+        / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
+        / f"{bldg_id.get_weather_station_name()}.csv"
     )
     output_file.parent.mkdir(parents=True, exist_ok=True)
-    _download_with_progress(download_url, output_file, progress, metadata_task)
+    _download_with_progress(download_url, output_file, progress, task_id)
     return output_file
@@ -1244,19 +1469,19 @@ def _download_aggregate_load_curves_parallel(
             )
-def _download_metadata_single(
+def _download_metadata(
     bldg_ids: list[BuildingID],
     output_dir: Path,
     progress: Progress,
     downloaded_paths: list[Path],
+    failed_downloads: list[str],
+    console: Console,
 ) -> None:
     """Download metadata file (only one needed per release)."""
     if not bldg_ids:
         return
-    bldg = bldg_ids[0]
-    metadata_file = _download_metadata_with_progress(bldg, output_dir, progress)
-    downloaded_paths.append(metadata_file)
+    _download_metadata_with_progress(bldg_ids, output_dir, progress, downloaded_paths, failed_downloads, console)
+    _process_metadata_results(bldg_ids, output_dir, downloaded_paths)
 def download_annual_load_curve_with_progress(
@@ -1302,7 +1527,7 @@ def download_annual_load_curve_with_progress(
     if progress and task_id is not None:
         _download_with_progress(download_url, output_file, progress, task_id)
     else:
-        response = requests.get(download_url, timeout=30)
+        response = requests.get(download_url, timeout=30, verify=True)
         response.raise_for_status()
         with open(output_file, "wb") as file:
             file.write(response.content)
@@ -1369,6 +1594,97 @@ def _download_annual_load_curves_parallel(
                 console.print(f"[red]Download failed for annual load curve {bldg_id.bldg_id}: {e}[/red]")
+def _download_weather_files_parallel(
+    bldg_ids: list[BuildingID],
+    output_dir: Path,
+    max_workers: int,
+    progress: Progress,
+    downloaded_paths: list[Path],
+    failed_downloads: list[str],
+    console: Console,
+    weather_states: Union[list[str], None] = None,
+) -> None:
+    """Download weather files in parallel with progress tracking."""
+    # Initialize weather_states to empty list if None
+    if weather_states is None:
+        weather_states = []
+    # Break if weather_states is empty
+    if len(weather_states) == 0:
+        for bldg_id in bldg_ids:
+            output_file = (
+                output_dir
+                / bldg_id.get_release_name()
+                / "weather"
+                / f"state={bldg_id.state}"
+                / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
+                / f"{bldg_id.get_weather_station_name()}.csv"
+            )
+            failed_downloads.append(str(output_file))
+            console.print(f"[red]Weather file not available for {bldg_id.get_release_name()}[/red]")
+        return
+    # Create progress tasks for weather file downloads
+    weather_file_tasks = {}
+    for i, bldg_id in enumerate(bldg_ids):
+        if bldg_id.state in weather_states:
+            task_id = progress.add_task(
+                f"[magenta]Weather file {bldg_id.bldg_id} (upgrade {bldg_id.upgrade_id})",
+                total=0,  # Will be updated when we get the file size
+            )
+            weather_file_tasks[i] = task_id
+        else:
+            output_file = (
+                output_dir
+                / bldg_id.get_release_name()
+                / "weather"
+                / f"state={bldg_id.state}"
+                / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
+                / f"{bldg_id.get_weather_station_name()}.csv"
+            )
+            failed_downloads.append(str(output_file))
+            console.print(f"[red]Weather file not available for {bldg_id.get_release_name()}[/red]")
+    with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+        # Create a modified version of the download function that uses the specific task IDs
+        def download_weather_file_with_task_id(bldg_id: BuildingID, output_dir: Path, task_id: TaskID) -> Path:
+            return download_weather_file_with_progress(bldg_id, output_dir, progress, task_id)
+        future_to_bldg = {
+            executor.submit(download_weather_file_with_task_id, bldg_id, output_dir, weather_file_tasks[i]): bldg_id
+            for i, bldg_id in enumerate(bldg_ids)
+        }
+        for future in concurrent.futures.as_completed(future_to_bldg):
+            bldg_id = future_to_bldg[future]
+            try:
+                output_file = future.result()
+                downloaded_paths.append(output_file)
+            except NoWeatherFileError:
+                output_file = (
+                    output_dir
+                    / bldg_id.get_release_name()
+                    / "weather"
+                    / f"state={bldg_id.state}"
+                    / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
+                    / f"{bldg_id.get_weather_station_name()}.csv"
+                )
+                failed_downloads.append(str(output_file))
+                console.print(f"[red]Weather file not available for {bldg_id.get_release_name()}[/red]")
+                raise
+            except Exception as e:
+                output_file = (
+                    output_dir
+                    / bldg_id.get_release_name()
+                    / "weather"
+                    / f"state={bldg_id.state}"
+                    / f"upgrade={str(int(bldg_id.upgrade_id)).zfill(2)}"
+                    / f"{bldg_id.get_weather_station_name()}.csv"
+                )
+                failed_downloads.append(str(output_file))
+                console.print(f"[red]Download failed for weather file {bldg_id.bldg_id}: {e}[/red]")
+                raise
 def _print_download_summary(downloaded_paths: list[Path], failed_downloads: list[str], console: Console) -> None:
     """Print a summary of the download results."""
     console.print("\n[bold green]Download complete![/bold green]")
@@ -1380,7 +1696,11 @@ def _print_download_summary(downloaded_paths: list[Path], failed_downloads: list
 def fetch_bldg_data(
-    bldg_ids: list[BuildingID], file_type: tuple[str, ...], output_dir: Path, max_workers: int = 5
+    bldg_ids: list[BuildingID],
+    file_type: tuple[str, ...],
+    output_dir: Path,
+    max_workers: int = 5,
+    weather_states: Union[list[str], None] = None,
 ) -> tuple[list[Path], list[str]]:
     """Download building data for a given list of building ids
@@ -1395,19 +1715,29 @@ def fetch_bldg_data(
     file_type_obj = _parse_requested_file_type(file_type)
     console = Console()
+    # Initialize weather_states to empty list if None
+    if weather_states is None:
+        weather_states = []
     downloaded_paths: list[Path] = []
     failed_downloads: list[str] = []
     # Calculate total files to download
     total_files = 0
     if file_type_obj.metadata:
-        total_files += 1  # Add metadata file
+        unique_metadata_urls = _resolve_unique_metadata_urls(bldg_ids)
+        total_files += len(unique_metadata_urls)  # Add metadata file
     if file_type_obj.load_curve_15min:
         total_files += len(bldg_ids)  # Add 15-minute load curve files
+    if file_type_obj.load_curve_hourly:
+        total_files += len(bldg_ids)  # Add hourly load curve files
     if file_type_obj.load_curve_monthly:
-        total_files += len(bldg_ids)  # Add 15-minute load curve files
+        total_files += len(bldg_ids)  # Add monthly load curve files
     if file_type_obj.load_curve_annual:
         total_files += len(bldg_ids)  # Add annual load curve files
+    if file_type_obj.weather:
+        available_bldg_ids = [bldg_id for bldg_id in bldg_ids if bldg_id.state in weather_states]
+        total_files += len(available_bldg_ids) * len(weather_states)  # Add weather map files
     console.print(f"\n[bold blue]Starting download of {total_files} files...[/bold blue]")
@@ -1425,45 +1755,103 @@ def fetch_bldg_data(
         console=console,
         transient=False,
     ) as progress:
-        # Download building data if requested.
-        if file_type_obj.hpxml or file_type_obj.schedule:
-            _download_building_data_parallel(
-                bldg_ids, file_type_obj, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
-            )
+        _execute_downloads(
+            file_type_obj,
+            bldg_ids,
+            output_dir,
+            max_workers,
+            progress,
+            downloaded_paths,
+            failed_downloads,
+            console,
+            weather_states,
+        )
-        # Get metadata if requested. Only one building is needed to get the metadata.
-        if file_type_obj.metadata:
-            _download_metadata_single(bldg_ids, output_dir, progress, downloaded_paths)
+    _print_download_summary(downloaded_paths, failed_downloads, console)
-        # Get 15 min load profile timeseries if requested.
-        if file_type_obj.load_curve_15min:
-            _download_15min_load_curves_parallel(
-                bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
-            )
+    return downloaded_paths, failed_downloads
-        if file_type_obj.load_curve_monthly:
-            aggregate_time_step = "monthly"
-            _download_aggregate_load_curves_parallel(
-                bldg_ids,
-                output_dir,
-                aggregate_time_step,
-                max_workers,
-                progress,
-                downloaded_paths,
-                failed_downloads,
-                console,
-            )
-        # Get annual load curve if requested.
-        if file_type_obj.load_curve_annual:
-            _download_annual_load_curves_parallel(
-                bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
-            )
+def _execute_downloads(
+    file_type_obj: RequestedFileTypes,
+    bldg_ids: list[BuildingID],
+    output_dir: Path,
+    max_workers: int,
+    progress: Progress,
+    downloaded_paths: list[Path],
+    failed_downloads: list[str],
+    console: Console,
+    weather_states: Union[list[str], None] = None,
+) -> None:
+    """Execute all requested downloads based on file type configuration."""
+    # Initialize weather_states to empty list if None
+    if weather_states is None:
+        weather_states = []
+    # Download building data if requested.
+    if file_type_obj.hpxml or file_type_obj.schedule:
+        _download_building_data_parallel(
+            bldg_ids, file_type_obj, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
+        )
-    _print_download_summary(downloaded_paths, failed_downloads, console)
+    # Get metadata if requested. Only one building is needed to get the metadata.
+    if file_type_obj.metadata:
+        _download_metadata(bldg_ids, output_dir, progress, downloaded_paths, failed_downloads, console)
-    return downloaded_paths, failed_downloads
+    # Get 15 min load profile timeseries if requested.
+    if file_type_obj.load_curve_15min:
+        _download_15min_load_curves_parallel(
+            bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
+        )
+    if file_type_obj.load_curve_hourly:
+        aggregate_time_step = "hourly"
+        _download_aggregate_load_curves_parallel(
+            bldg_ids,
+            output_dir,
+            aggregate_time_step,
+            max_workers,
+            progress,
+            downloaded_paths,
+            failed_downloads,
+            console,
+        )
+    if file_type_obj.load_curve_monthly:
+        aggregate_time_step = "monthly"
+        _download_aggregate_load_curves_parallel(
+            bldg_ids,
+            output_dir,
+            aggregate_time_step,
+            max_workers,
+            progress,
+            downloaded_paths,
+            failed_downloads,
+            console,
+        )
+    # Get annual load curve if requested.
+    if file_type_obj.load_curve_annual:
+        _download_annual_load_curves_parallel(
+            bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console
+        )
+    # Get weather files if requested.
+    if file_type_obj.weather:
+        _download_weather_files_parallel(
+            bldg_ids, output_dir, max_workers, progress, downloaded_paths, failed_downloads, console, weather_states
+        )
 if __name__ == "__main__":  # pragma: no cover
-    print(fetch_bldg_ids("comstock", "2021", "tmy3", "1", "MA", "0")[:3])
+    bldg_ids = [
+        BuildingID(
+            bldg_id=67, release_year="2024", res_com="comstock", weather="tmy3", upgrade_id="0", release_number="2"
+        ),
+    ]
+    file_type = ("weather",)
+    output_dir = Path("data")
+    weather_states: list[str] = []
+    downloaded_paths, failed_downloads = fetch_bldg_data(bldg_ids, file_type, output_dir, weather_states=weather_states)
+    print(downloaded_paths)
+    print(failed_downloads)