PyPI - goesgcp - Versions diffs - 1.0.6__tar.gz → 1.0.8__tar.gz - Mend

goesgcp 1.0.6tar.gz → 1.0.8tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

{goesgcp-1.0.6 → goesgcp-1.0.8}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: goesgcp
-Version: 1.0.6
+Version: 1.0.8
 Summary: A package to download and process GOES-16/17 data
 Home-page: https://github.com/helvecioneto/goesgcp
 Author: Helvecio B. L. Neto

{goesgcp-1.0.6 → goesgcp-1.0.8}/goesgcp/main.py RENAMED Viewed

@@ -1,15 +1,16 @@
 import pathlib
 import shutil
+import time
 import xarray as xr
 import argparse
 import sys
 import tqdm
-from concurrent.futures import ThreadPoolExecutor
+from distutils.util import strtobool
 from multiprocessing import Pool
 from google.cloud import storage
 from datetime import datetime, timedelta, timezone
 from pyproj import CRS, Transformer
+from google.api_core.exceptions import GoogleAPIError
 def list_blobs(connection, bucket_name, prefix):
@@ -74,7 +75,7 @@ def crop_reproject(args):
     file, output = args
     # Open the file
-    ds = xr.open_dataset(file, engine='netcdf4')
+    ds = xr.open_dataset(file, engine="netcdf4")
     # Select only var_name and goes_imager_projection
     ds = ds[[var_name, "goes_imager_projection"]]
@@ -153,21 +154,43 @@ def crop_reproject(args):
     return
-def download_file(args):
-    """Downloads a file from a GCP bucket."""
+def process_file(args):
+    """ Downloads and processes a file in parallel. """
     bucket_name, blob_name, local_path = args
-    # Create a client
-    bucket = storage_client.bucket(bucket_name)
-    blob = bucket.blob(blob_name)
+    # Download options
+    retries = 5
+    attempt = 0
+    while attempt < retries:
+        try:
+            # Connect to the bucket
+            bucket = storage_client.bucket(bucket_name)
+            blob = bucket.blob(blob_name)
-    # Download the file
-    blob.download_to_filename(local_path, timeout=120)
+            # Download the file
+            blob.download_to_filename(local_path, timeout=120)
+            break  # Exit the loop if the download is successful
+        except (GoogleAPIError, Exception) as e:  # Catch any exception
+            attempt += 1
+            if attempt < retries:
+                time.sleep(2 ** attempt)  # Backoff exponencial
+            else:
+                # Log the error to a file
+                with open('fail.log', 'a') as log_file:
+                    log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
+    # Crop the file
+    crop_reproject((local_path, output_path))
+    # Remove the local file
+    pathlib.Path(local_path).unlink()
 def main():
+    ''' Main function to download and process GOES-16 files. '''
     global output_path, var_name, \
           lat_min, lat_max, lon_min, lon_max, \
@@ -204,7 +227,7 @@ def main():
     parser.add_argument('--output', type=str, default='output/', help='Path for saving output files')
     # Other settings
-    parser.add_argument('--parallel', type=bool, default=True, help='Use parallel processing')
+    parser.add_argument('--parallel', type=lambda x: bool(strtobool(x)), default=True, help='Use parallel processing')
     parser.add_argument('--processes', type=int, default=4, help='Number of processes for parallel execution')
     parser.add_argument('--max_attempts', type=int, default=3, help='Number of attempts to download a file')
@@ -260,31 +283,26 @@ def main():
     pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
     # Download files
-    print(f"Downloading {len(recent_files)} files...")
+    print(f"GOESGCP: Downloading and processing {len(recent_files)} files...")
     loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
                         bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
                         [Elapsed:{elapsed} Remaining:<{remaining}]')
-    # Download all files to a temporary directory
-    with ThreadPoolExecutor(max_workers=args.processes) as executor:
+    if parallel: # Run in parallel
+        # Create a list of tasks
+        tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in recent_files]
+        # Download files in parallel
+        with Pool(processes=args.processes) as pool:
+            for _ in pool.imap_unordered(process_file, tasks):
+                loading_bar.update(1)
+        loading_bar.close()
+    else: # Run in serial
         for file in recent_files:
             local_path = f"tmp/{file.split('/')[-1]}"
-            executor.submit(download_file, (bucket_name, file, local_path))
+            process_file((bucket_name, file, local_path))
             loading_bar.update(1)
-    loading_bar.close()
-    # Process files
-    print(f"\nProcessing {len(recent_files)} files...")
-    load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
-                        bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
-                        [Elapsed:{elapsed} Remaining:<{remaining}]')
-    # Process files in parallel
-    with Pool(processes=args.processes) as pool:
-        for _ in pool.imap_unordered(crop_reproject, [(f"tmp/{file.split('/')[-1]}", output_path) for file in recent_files]):
-            load_bar2.update(1)
-    load_bar2.close()
+        loading_bar.close()
     # Remove temporary directory
     shutil.rmtree('tmp/')

{goesgcp-1.0.6 → goesgcp-1.0.8}/goesgcp.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: goesgcp
-Version: 1.0.6
+Version: 1.0.8
 Summary: A package to download and process GOES-16/17 data
 Home-page: https://github.com/helvecioneto/goesgcp
 Author: Helvecio B. L. Neto

{goesgcp-1.0.6 → goesgcp-1.0.8}/setup.py RENAMED Viewed

@@ -13,7 +13,7 @@ with open('requirements.txt') as f:
 setup(
     name="goesgcp",
-    version='1.0.6',
+    version='1.0.8',
     author="Helvecio B. L. Neto",
     author_email="helvecioblneto@gmail.com",
     description="A package to download and process GOES-16/17 data",