PyPI - goesgcp - Versions diffs - 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl - Mend

goesgcp 2.0.1py3-none-any.whl → 2.0.2py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

goesgcp/main.py CHANGED Viewed

@@ -13,6 +13,9 @@ from datetime import datetime, timedelta, timezone
 from pyproj import CRS, Transformer
 from google.api_core.exceptions import GoogleAPIError
+import warnings
+warnings.filterwarnings('ignore')
 def list_blobs(connection, bucket_name, prefix):
     """
@@ -54,11 +57,11 @@ def get_files_period(connection, bucket_name, base_prefix, pattern,
     files_metadata = []
     # Generate the list of dates from start to end
-    current_time = start
-    while current_time < end:
-        year = current_time.year
-        julian_day = str(current_time.timetuple().tm_yday).zfill(3)  # Julian day
-        hour = current_time.hour
+    temp = start
+    while temp <= end:
+        year = temp.year
+        julian_day = str(temp.timetuple().tm_yday).zfill(3)  # Julian day
+        hour = temp.hour
         # Generate the directory prefix
         prefix = f"{base_prefix}/{get_directory_prefix(year, julian_day, hour)}"
@@ -71,26 +74,27 @@ def get_files_period(connection, bucket_name, base_prefix, pattern,
             if pattern in blob.name:
                 files_metadata.append({
                     'file_name': blob.name,
-                    'last_modified': blob.updated
                 })
         # Move to the next hour
-        current_time += timedelta(hours=1)
+        temp += timedelta(hours=1)
     # Create a DataFrame from the list of files
     df = pd.DataFrame(files_metadata)
+    # Transform file_name to datetime
+    df['last_modified'] = pd.to_datetime(df['file_name'].str.extract(r'(\d{4}\d{3}\d{2}\d{2})').squeeze(), format='%Y%j%H%M')
     if df.empty:
         print("No files found matching the pattern.")
         return pd.DataFrame()
     # Ensure 'last_modified' is in the correct datetime format without timezone
-    df['last_modified'] = pd.to_datetime(df['last_modified']).dt.tz_localize(None)
-    start = pd.to_datetime(start).tz_localize(None)
-    end = pd.to_datetime(end).tz_localize(None)
-    # Filter the DataFrame based on the date range
-    df = df[(df['last_modified'] >= start) & (df['last_modified'] < end)]
+    df['last_modified'] = pd.to_datetime(df['last_modified']).dt.tz_localize('UTC')
+    # Filter the DataFrame based on the date range (inclusive)
+    df = df[(df['last_modified'] >= start) & (df['last_modified'] <= end)]
     # Filter the DataFrame based on the hour range
     df['hour'] = df['last_modified'].dt.hour
@@ -153,13 +157,22 @@ def crop_reproject(args):
     Crops and reprojects a GOES-16 file to EPSG:4326.
     """
-    file, output = args
+    file, output, var_name, lat_min, lat_max, lon_min, lon_max, resolution, save_format = args
     # Open the file
     ds = xr.open_dataset(file, engine="netcdf4")
+    if var_name is None:
+        # Get all variables are 2D
+        var_names = [var for var in ds.data_vars if len(ds[var].dims) == 2]
+        # Remove DQF variables
+        var_names = [var for var in var_names if 'DQF' not in var]
+    else:
+        var_names = [var_name]
     # Select only var_name and goes_imager_projection
-    ds = ds[[var_name, "goes_imager_projection"]]
+    ds = ds[var_names + ["goes_imager_projection"]]
     # Get projection
     sat_height = ds["goes_imager_projection"].attrs["perspective_point_height"]
@@ -215,14 +228,13 @@ def crop_reproject(args):
     ds = ds.rename({"x": "lon", "y": "lat"})
     # Add resolution to attributes
-    ds[var_name].attrs['resolution'] = "x={:.2f} y={:.2f} degree".format(resolution, resolution)
+    for var in var_names:
+        ds[var].attrs['resolution'] = "x={:.2f} y={:.2f} degree".format(resolution, resolution)
+        ds[var].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by goesgcp'
     # Crop using lat/lon coordinates, in parallel
     ds = ds.rio.clip_box(minx=lon_min, miny=lat_min, maxx=lon_max, maxy=lat_max)
-    # Add comments
-    ds[var_name].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by goesgcp'
     # Add global metadata comments
     ds.attrs['comments'] = "Data processed by goesgcp, author: Helvecio B. L. Neto (helvecioblneto@gmail.com)"
@@ -249,20 +261,18 @@ def crop_reproject(args):
     output_file = f"{output_directory}{file.split('/')[-1]}"
     ds.to_netcdf(output_file, mode='w', format='NETCDF4_CLASSIC')
-    # Fechar o dataset
     ds.close()
-    return
 def process_file(args):
-    """ Downloads and processes a file in parallel. """
-    bucket_name, blob_name, local_path = args
+    """
+    Downloads and processes a GOES-16 file.
+    """
+    bucket_name, blob_name, local_path, output_path, var_name, lat_min, lat_max, lon_min, lon_max, resolution, \
+    save_format, retries = args
-    # Download options
-    retries = 5
     attempt = 0
     while attempt < retries:
         try:
             # Connect to the bucket
@@ -282,31 +292,28 @@ def process_file(args):
                     log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
     # Crop the file
-    crop_reproject((local_path, output_path))
+    crop_reproject((local_path, output_path, var_name, lat_min, lat_max, lon_min, lon_max, resolution, save_format))
     # Remove the local file
     pathlib.Path(local_path).unlink()
+# Create connection
+storage_client = storage.Client.create_anonymous_client()
 def main():
     ''' Main function to download and process GOES-16 files. '''
-    global output_path, var_name, \
-          lat_min, lat_max, lon_min, lon_max, \
-          max_attempts, parallel, recent, resolution, storage_client, \
-            satellite, product, op_mode, channel, save_format
     epilog = """
     Example usage:
-    - To download recent 3 files from the GOES-16 satellite for the ABI-L2-CMIPF product:
+    - To download recent 3 files from the GOES-16 satellite for the ABI-L2-CMIPF product,
+    change resolution to 0.045, and crop the files between latitudes -35 and 5 and longitudes -80 and -30:
-    goesgcp --satellite goes16 --product ABI-L2-CMIP --recent 3
+    goesgcp --satellite goes-16 --product ABI-L2-CMIPF --recent 3 --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
     - To download files from the GOES-16 satellite for the ABI-L2-CMIPF product between 2022-12-15 and 2022-12-20:
-    goesgcp --start '2022-12-15 00:00:00' --end '2022-12-20 10:00:00' --bt_hour 5 6 --save_format by_date --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
+    goesgcp --satellite goes-16 --product ABI-L2-CMIPF --start '2022-12-15 09:00:00' --end '2022-12-15 09:50:00' --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
     """
@@ -329,14 +336,14 @@ def main():
     ]
     # Set arguments
-    parser = argparse.ArgumentParser(description='Converts GOES-16 L2 data to netCDF',
+    parser = argparse.ArgumentParser(description='Download and process GOES Satellite data files from GCP.',
                                     epilog=epilog,
                                     formatter_class=argparse.RawDescriptionHelpFormatter)
     # Satellite and product settings
     parser.add_argument('--satellite', type=str, default='goes-16', choices=['goes-16', 'goes-18'], help='Name of the satellite (e.g., goes16)')
-    parser.add_argument('--product', type=str, default='ABI-L2-CMIP', help='Name of the satellite product', choices=product_names)
-    parser.add_argument('--var_name', type=str, default='CMI', help='Variable name to extract (e.g., CMI)')
+    parser.add_argument('--product', type=str, default='ABI-L2-CMIPF', help='Name of the satellite product', choices=product_names)
+    parser.add_argument('--var_name', type=str, default=None, help='Variable name to extract (e.g., CMI)')
     parser.add_argument('--channel', type=int, default=13, help='Channel to use (e.g., 13)')
     parser.add_argument('--op_mode', type=str, default='M6C', help='Operational mode to use (e.g., M6C)')
@@ -407,9 +414,6 @@ def main():
     # Create output directory
     pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
-    # Create connection
-    storage_client = storage.Client.create_anonymous_client()
     # Check if the bucket exists
     try:
         storage_client.get_bucket(bucket_name)
@@ -445,7 +449,9 @@ def main():
     if parallel: # Run in parallel
         # Create a list of tasks
-        tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in files_list]
+        tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}", output_path, var_name,
+        lat_min, lat_max, lon_min, lon_max, resolution,
+        save_format, max_attempts) for file in files_list]
         # Download files in parallel
         with Pool(processes=args.processes) as pool:
@@ -455,11 +461,12 @@ def main():
     else: # Run in serial
         for file in files_list:
             local_path = f"tmp/{file.split('/')[-1]}"
-            process_file((bucket_name, file, local_path))
+            process_file((bucket_name, file, local_path, output_path, var_name,
+            lat_min, lat_max, lon_min, lon_max, resolution,
+            save_format, max_attempts))
             loading_bar.update(1)
         loading_bar.close()
-    # Remove temporary directory
     shutil.rmtree('tmp/')
 if __name__ == '__main__':

{goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.2
 Name: goesgcp
-Version: 2.0.1
+Version: 2.0.2
 Summary: A package to download and process GOES-16/17 data
 Home-page: https://github.com/helvecioneto/goesgcp
 Author: Helvecio B. L. Neto
@@ -89,7 +89,7 @@ goesgcp [OPTIONS]
 | `--save_format`       | Format for saving output files (default: `by_date`).                      |
 #### Available GOES Products
-A comprehensive list of available GOES products can be found at the following link: [https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-goes16](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-goes16)
+A comprehensive list of available GOES products can be found at the following link: [https://console.cloud.google.com/storage/browser/gcp-public-data-goes-16](https://console.cloud.google.com/storage/browser/gcp-public-data-goes-16)
 ### Examples
@@ -97,7 +97,7 @@ A comprehensive list of available GOES products can be found at the following li
 In the example below, the command downloads the 3 most recent files from the GOES-16 satellite for the product ABI-L2-CMIPF. It focuses on the variable CMI (Cloud and Moisture Imagery) from channel 13, which is commonly used for infrared observations. The downloaded files are saved to the specified output directory output/.
 ```bash
-goesgcp --satellite goes-16 --product ABI-L2-CMIPF --var_name CMI --channel 13 --recent 3 --output "output/"
+goesgcp --satellite goes-16 --product ABI-L2-CMIPF --recent 3 --output "output/"
 ```
 #### Download Data for a Specific Time Range

goesgcp-2.0.2.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
+goesgcp/main.py,sha256=tDkn46WP0Nwet_3EwW6Rx-A-ASipuc8X251qZOpeeMU,18851
+goesgcp-2.0.2.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
+goesgcp-2.0.2.dist-info/METADATA,sha256=mab7nr7wRRptYsLU_vZykjWZKqXyIT72aV5RiL-CBmQ,6119
+goesgcp-2.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
+goesgcp-2.0.2.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
+goesgcp-2.0.2.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
+goesgcp-2.0.2.dist-info/RECORD,,

goesgcp-2.0.1.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
-goesgcp/main.py,sha256=F2Z0J4DVF2oeejlIN6WEXi3K8eueZxGWIH5JulEWhXE,18178
-goesgcp-2.0.1.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
-goesgcp-2.0.1.dist-info/METADATA,sha256=a1MA1uUCdoG5Ihpr8szCYyVpzjCkmSePv0EnNZTKaLo,6149
-goesgcp-2.0.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
-goesgcp-2.0.1.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
-goesgcp-2.0.1.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
-goesgcp-2.0.1.dist-info/RECORD,,

{goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/LICENSE RENAMED Viewed

File without changes

{goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/WHEEL RENAMED Viewed

File without changes

{goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/top_level.txt RENAMED Viewed

File without changes

goesgcp 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl

goesgcp 2.0.1py3-none-any.whl → 2.0.2py3-none-any.whl