PyPI - goesgcp - Versions diffs - 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl - Mend

goesgcp 1.0.7py3-none-any.whl → 1.0.9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (8) hide show

goesgcp/main.py CHANGED Viewed

@@ -1,16 +1,16 @@
 import pathlib
 import shutil
+import time
 import xarray as xr
 import argparse
 import sys
 import tqdm
 from distutils.util import strtobool
-from concurrent.futures import ThreadPoolExecutor
 from multiprocessing import Pool
 from google.cloud import storage
 from datetime import datetime, timedelta, timezone
 from pyproj import CRS, Transformer
+from google.api_core.exceptions import GoogleAPIError
 def list_blobs(connection, bucket_name, prefix):
@@ -21,6 +21,7 @@ def list_blobs(connection, bucket_name, prefix):
     bucket = connection.bucket(bucket_name)
     blobs = bucket.list_blobs(prefix=prefix)
     return blobs
 def get_directory_prefix(year, julian_day, hour):
@@ -44,6 +45,8 @@ def get_recent_files(connection, bucket_name, base_prefix, pattern, min_files):
     while len(files) < min_files:
         year = current_time.year
         julian_day = current_time.timetuple().tm_yday  # Get the Julian day
+        # Add 3 digits to the Julian day
+        julian_day = str(julian_day).zfill(3)
         hour = current_time.hour
         # Generate the directory prefix for the current date and time
@@ -75,7 +78,7 @@ def crop_reproject(args):
     file, output = args
     # Open the file
-    ds = xr.open_dataset(file, engine='netcdf4')
+    ds = xr.open_dataset(file, engine="netcdf4")
     # Select only var_name and goes_imager_projection
     ds = ds[[var_name, "goes_imager_projection"]]
@@ -154,21 +157,43 @@ def crop_reproject(args):
     return
-def download_file(args):
-    """Downloads a file from a GCP bucket."""
+def process_file(args):
+    """ Downloads and processes a file in parallel. """
     bucket_name, blob_name, local_path = args
-    # Create a client
-    bucket = storage_client.bucket(bucket_name)
-    blob = bucket.blob(blob_name)
+    # Download options
+    retries = 5
+    attempt = 0
+    while attempt < retries:
+        try:
+            # Connect to the bucket
+            bucket = storage_client.bucket(bucket_name)
+            blob = bucket.blob(blob_name)
-    # Download the file
-    blob.download_to_filename(local_path, timeout=120)
+            # Download the file
+            blob.download_to_filename(local_path, timeout=120)
+            break  # Exit the loop if the download is successful
+        except (GoogleAPIError, Exception) as e:  # Catch any exception
+            attempt += 1
+            if attempt < retries:
+                time.sleep(2 ** attempt)  # Backoff exponencial
+            else:
+                # Log the error to a file
+                with open('fail.log', 'a') as log_file:
+                    log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
+    # Crop the file
+    crop_reproject((local_path, output_path))
+    # Remove the local file
+    pathlib.Path(local_path).unlink()
 def main():
+    ''' Main function to download and process GOES-16 files. '''
     global output_path, var_name, \
           lat_min, lat_max, lon_min, lon_max, \
@@ -212,9 +237,9 @@ def main():
     # Parse arguments
     args = parser.parse_args()
-    # if len(sys.argv) == 1:
-    #     parser.print_help(sys.stderr)
-    #     sys.exit(1)
+    if len(sys.argv) == 1:
+        parser.print_help(sys.stderr)
+        sys.exit(1)
     # Set global variables
     output_path = args.output
@@ -261,42 +286,27 @@ def main():
     pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
     # Download files
-    print(f"Downloading {len(recent_files)} files...")
+    print(f"GOESGCP: Downloading and processing {len(recent_files)} files...")
     loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
                         bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
                         [Elapsed:{elapsed} Remaining:<{remaining}]')
-    if parallel:
-        # Download all files to a temporary directory
-        with ThreadPoolExecutor(max_workers=args.processes) as executor:
-            for file in recent_files:
-                local_path = f"tmp/{file.split('/')[-1]}"
-                executor.submit(download_file, (bucket_name, file, local_path))
-                loading_bar.update(1)
-        loading_bar.close()
+    if parallel: # Run in parallel
+        # Create a list of tasks
+        tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in recent_files]
-        # Process files
-        print(f"\nProcessing {len(recent_files)} files...")
-        load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
-                            bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
-                            [Elapsed:{elapsed} Remaining:<{remaining}]')
-        # Process files in parallel
+        # Download files in parallel
         with Pool(processes=args.processes) as pool:
-            for _ in pool.imap_unordered(crop_reproject, [(f"tmp/{file.split('/')[-1]}", output_path) for file in recent_files]):
-                load_bar2.update(1)
-        load_bar2.close()
-    else:
+            for _ in pool.imap_unordered(process_file, tasks):
+                loading_bar.update(1)
+        loading_bar.close()
+    else: # Run in serial
         for file in recent_files:
             local_path = f"tmp/{file.split('/')[-1]}"
-            download_file((bucket_name, file, local_path))
-            crop_reproject((local_path, output_path))
+            process_file((bucket_name, file, local_path))
             loading_bar.update(1)
         loading_bar.close()
     # Remove temporary directory
     shutil.rmtree('tmp/')

{goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: goesgcp
-Version: 1.0.7
+Version: 1.0.9
 Summary: A package to download and process GOES-16/17 data
 Home-page: https://github.com/helvecioneto/goesgcp
 Author: Helvecio B. L. Neto

goesgcp-1.0.9.dist-info/RECORD ADDED Viewed

@@ -0,0 +1,8 @@
+goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
+goesgcp/main.py,sha256=Rk_VWU7Xg6WZWqG0SRBikUjcinMX6risIHBnv6KiKpA,11188
+goesgcp-1.0.9.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
+goesgcp-1.0.9.dist-info/METADATA,sha256=DrTSCYr4w0CWovIn9Xd2O2tV1pleZu-GNcKnG9CmgZw,2993
+goesgcp-1.0.9.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
+goesgcp-1.0.9.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
+goesgcp-1.0.9.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
+goesgcp-1.0.9.dist-info/RECORD,,

{goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/WHEEL RENAMED Viewed

@@ -1,5 +1,5 @@
 Wheel-Version: 1.0
-Generator: setuptools (75.6.0)
+Generator: setuptools (75.7.0)
 Root-Is-Purelib: true
 Tag: py3-none-any

goesgcp-1.0.7.dist-info/RECORD DELETED Viewed

@@ -1,8 +0,0 @@
-goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
-goesgcp/main.py,sha256=zCrAfAjc9Vzs9-7klATCCp6hhe1w4tsu_FiMBuBAafc,10893
-goesgcp-1.0.7.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
-goesgcp-1.0.7.dist-info/METADATA,sha256=eXzz34_yexZ4zx4EmaDRrpwiHFoBtJBzJ_hnVLQD-4Q,2993
-goesgcp-1.0.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
-goesgcp-1.0.7.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
-goesgcp-1.0.7.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
-goesgcp-1.0.7.dist-info/RECORD,,

{goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/LICENSE RENAMED Viewed

File without changes

{goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/entry_points.txt RENAMED Viewed

File without changes

{goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/top_level.txt RENAMED Viewed

File without changes

goesgcp 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

goesgcp 1.0.7py3-none-any.whl → 1.0.9py3-none-any.whl