goesgcp 1.0.6__tar.gz → 1.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.6
3
+ Version: 1.0.8
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -1,15 +1,16 @@
1
1
  import pathlib
2
2
  import shutil
3
+ import time
3
4
  import xarray as xr
4
5
  import argparse
5
6
  import sys
6
7
  import tqdm
7
- from concurrent.futures import ThreadPoolExecutor
8
+ from distutils.util import strtobool
8
9
  from multiprocessing import Pool
9
10
  from google.cloud import storage
10
11
  from datetime import datetime, timedelta, timezone
11
12
  from pyproj import CRS, Transformer
12
-
13
+ from google.api_core.exceptions import GoogleAPIError
13
14
 
14
15
 
15
16
  def list_blobs(connection, bucket_name, prefix):
@@ -74,7 +75,7 @@ def crop_reproject(args):
74
75
  file, output = args
75
76
 
76
77
  # Open the file
77
- ds = xr.open_dataset(file, engine='netcdf4')
78
+ ds = xr.open_dataset(file, engine="netcdf4")
78
79
 
79
80
  # Select only var_name and goes_imager_projection
80
81
  ds = ds[[var_name, "goes_imager_projection"]]
@@ -153,21 +154,43 @@ def crop_reproject(args):
153
154
  return
154
155
 
155
156
 
156
-
157
- def download_file(args):
158
- """Downloads a file from a GCP bucket."""
157
+ def process_file(args):
158
+ """ Downloads and processes a file in parallel. """
159
159
 
160
160
  bucket_name, blob_name, local_path = args
161
161
 
162
- # Create a client
163
- bucket = storage_client.bucket(bucket_name)
164
- blob = bucket.blob(blob_name)
162
+ # Download options
163
+ retries = 5
164
+ attempt = 0
165
+
166
+ while attempt < retries:
167
+ try:
168
+ # Connect to the bucket
169
+ bucket = storage_client.bucket(bucket_name)
170
+ blob = bucket.blob(blob_name)
165
171
 
166
- # Download the file
167
- blob.download_to_filename(local_path, timeout=120)
172
+ # Download the file
173
+ blob.download_to_filename(local_path, timeout=120)
174
+ break # Exit the loop if the download is successful
175
+ except (GoogleAPIError, Exception) as e: # Catch any exception
176
+ attempt += 1
177
+ if attempt < retries:
178
+ time.sleep(2 ** attempt) # Backoff exponencial
179
+ else:
180
+ # Log the error to a file
181
+ with open('fail.log', 'a') as log_file:
182
+ log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
183
+
184
+ # Crop the file
185
+ crop_reproject((local_path, output_path))
186
+
187
+ # Remove the local file
188
+ pathlib.Path(local_path).unlink()
168
189
 
169
190
 
170
191
  def main():
192
+ ''' Main function to download and process GOES-16 files. '''
193
+
171
194
 
172
195
  global output_path, var_name, \
173
196
  lat_min, lat_max, lon_min, lon_max, \
@@ -204,7 +227,7 @@ def main():
204
227
  parser.add_argument('--output', type=str, default='output/', help='Path for saving output files')
205
228
 
206
229
  # Other settings
207
- parser.add_argument('--parallel', type=bool, default=True, help='Use parallel processing')
230
+ parser.add_argument('--parallel', type=lambda x: bool(strtobool(x)), default=True, help='Use parallel processing')
208
231
  parser.add_argument('--processes', type=int, default=4, help='Number of processes for parallel execution')
209
232
  parser.add_argument('--max_attempts', type=int, default=3, help='Number of attempts to download a file')
210
233
 
@@ -260,31 +283,26 @@ def main():
260
283
  pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
261
284
 
262
285
  # Download files
263
- print(f"Downloading {len(recent_files)} files...")
286
+ print(f"GOESGCP: Downloading and processing {len(recent_files)} files...")
264
287
  loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
265
288
  bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
266
289
  [Elapsed:{elapsed} Remaining:<{remaining}]')
267
290
 
268
- # Download all files to a temporary directory
269
- with ThreadPoolExecutor(max_workers=args.processes) as executor:
291
+ if parallel: # Run in parallel
292
+ # Create a list of tasks
293
+ tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in recent_files]
294
+
295
+ # Download files in parallel
296
+ with Pool(processes=args.processes) as pool:
297
+ for _ in pool.imap_unordered(process_file, tasks):
298
+ loading_bar.update(1)
299
+ loading_bar.close()
300
+ else: # Run in serial
270
301
  for file in recent_files:
271
302
  local_path = f"tmp/{file.split('/')[-1]}"
272
- executor.submit(download_file, (bucket_name, file, local_path))
303
+ process_file((bucket_name, file, local_path))
273
304
  loading_bar.update(1)
274
- loading_bar.close()
275
-
276
- # Process files
277
- print(f"\nProcessing {len(recent_files)} files...")
278
- load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
279
- bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
280
- [Elapsed:{elapsed} Remaining:<{remaining}]')
281
-
282
-
283
- # Process files in parallel
284
- with Pool(processes=args.processes) as pool:
285
- for _ in pool.imap_unordered(crop_reproject, [(f"tmp/{file.split('/')[-1]}", output_path) for file in recent_files]):
286
- load_bar2.update(1)
287
- load_bar2.close()
305
+ loading_bar.close()
288
306
 
289
307
  # Remove temporary directory
290
308
  shutil.rmtree('tmp/')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.6
3
+ Version: 1.0.8
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -13,7 +13,7 @@ with open('requirements.txt') as f:
13
13
 
14
14
  setup(
15
15
  name="goesgcp",
16
- version='1.0.6',
16
+ version='1.0.8',
17
17
  author="Helvecio B. L. Neto",
18
18
  author_email="helvecioblneto@gmail.com",
19
19
  description="A package to download and process GOES-16/17 data",
File without changes
File without changes
File without changes
File without changes