goesgcp 1.0.7__tar.gz → 1.0.8__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.7
3
+ Version: 1.0.8
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -1,16 +1,16 @@
1
1
  import pathlib
2
2
  import shutil
3
+ import time
3
4
  import xarray as xr
4
5
  import argparse
5
6
  import sys
6
7
  import tqdm
7
8
  from distutils.util import strtobool
8
- from concurrent.futures import ThreadPoolExecutor
9
9
  from multiprocessing import Pool
10
10
  from google.cloud import storage
11
11
  from datetime import datetime, timedelta, timezone
12
12
  from pyproj import CRS, Transformer
13
-
13
+ from google.api_core.exceptions import GoogleAPIError
14
14
 
15
15
 
16
16
  def list_blobs(connection, bucket_name, prefix):
@@ -75,7 +75,7 @@ def crop_reproject(args):
75
75
  file, output = args
76
76
 
77
77
  # Open the file
78
- ds = xr.open_dataset(file, engine='netcdf4')
78
+ ds = xr.open_dataset(file, engine="netcdf4")
79
79
 
80
80
  # Select only var_name and goes_imager_projection
81
81
  ds = ds[[var_name, "goes_imager_projection"]]
@@ -154,21 +154,43 @@ def crop_reproject(args):
154
154
  return
155
155
 
156
156
 
157
-
158
- def download_file(args):
159
- """Downloads a file from a GCP bucket."""
157
+ def process_file(args):
158
+ """ Downloads and processes a file in parallel. """
160
159
 
161
160
  bucket_name, blob_name, local_path = args
162
161
 
163
- # Create a client
164
- bucket = storage_client.bucket(bucket_name)
165
- blob = bucket.blob(blob_name)
162
+ # Download options
163
+ retries = 5
164
+ attempt = 0
165
+
166
+ while attempt < retries:
167
+ try:
168
+ # Connect to the bucket
169
+ bucket = storage_client.bucket(bucket_name)
170
+ blob = bucket.blob(blob_name)
171
+
172
+ # Download the file
173
+ blob.download_to_filename(local_path, timeout=120)
174
+ break # Exit the loop if the download is successful
175
+ except (GoogleAPIError, Exception) as e: # Catch any exception
176
+ attempt += 1
177
+ if attempt < retries:
178
+ time.sleep(2 ** attempt) # Backoff exponencial
179
+ else:
180
+ # Log the error to a file
181
+ with open('fail.log', 'a') as log_file:
182
+ log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
166
183
 
167
- # Download the file
168
- blob.download_to_filename(local_path, timeout=120)
184
+ # Crop the file
185
+ crop_reproject((local_path, output_path))
186
+
187
+ # Remove the local file
188
+ pathlib.Path(local_path).unlink()
169
189
 
170
190
 
171
191
  def main():
192
+ ''' Main function to download and process GOES-16 files. '''
193
+
172
194
 
173
195
  global output_path, var_name, \
174
196
  lat_min, lat_max, lon_min, lon_max, \
@@ -212,9 +234,9 @@ def main():
212
234
  # Parse arguments
213
235
  args = parser.parse_args()
214
236
 
215
- # if len(sys.argv) == 1:
216
- # parser.print_help(sys.stderr)
217
- # sys.exit(1)
237
+ if len(sys.argv) == 1:
238
+ parser.print_help(sys.stderr)
239
+ sys.exit(1)
218
240
 
219
241
  # Set global variables
220
242
  output_path = args.output
@@ -261,42 +283,27 @@ def main():
261
283
  pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
262
284
 
263
285
  # Download files
264
- print(f"Downloading {len(recent_files)} files...")
286
+ print(f"GOESGCP: Downloading and processing {len(recent_files)} files...")
265
287
  loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
266
288
  bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
267
289
  [Elapsed:{elapsed} Remaining:<{remaining}]')
268
290
 
269
- if parallel:
270
- # Download all files to a temporary directory
271
- with ThreadPoolExecutor(max_workers=args.processes) as executor:
272
- for file in recent_files:
273
- local_path = f"tmp/{file.split('/')[-1]}"
274
- executor.submit(download_file, (bucket_name, file, local_path))
275
- loading_bar.update(1)
276
- loading_bar.close()
277
-
278
- # Process files
279
- print(f"\nProcessing {len(recent_files)} files...")
280
- load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
281
- bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
282
- [Elapsed:{elapsed} Remaining:<{remaining}]')
291
+ if parallel: # Run in parallel
292
+ # Create a list of tasks
293
+ tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in recent_files]
283
294
 
284
-
285
- # Process files in parallel
295
+ # Download files in parallel
286
296
  with Pool(processes=args.processes) as pool:
287
- for _ in pool.imap_unordered(crop_reproject, [(f"tmp/{file.split('/')[-1]}", output_path) for file in recent_files]):
288
- load_bar2.update(1)
289
- load_bar2.close()
290
- else:
297
+ for _ in pool.imap_unordered(process_file, tasks):
298
+ loading_bar.update(1)
299
+ loading_bar.close()
300
+ else: # Run in serial
291
301
  for file in recent_files:
292
302
  local_path = f"tmp/{file.split('/')[-1]}"
293
- download_file((bucket_name, file, local_path))
294
- crop_reproject((local_path, output_path))
303
+ process_file((bucket_name, file, local_path))
295
304
  loading_bar.update(1)
296
305
  loading_bar.close()
297
306
 
298
-
299
-
300
307
  # Remove temporary directory
301
308
  shutil.rmtree('tmp/')
302
309
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.7
3
+ Version: 1.0.8
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -13,7 +13,7 @@ with open('requirements.txt') as f:
13
13
 
14
14
  setup(
15
15
  name="goesgcp",
16
- version='1.0.7',
16
+ version='1.0.8',
17
17
  author="Helvecio B. L. Neto",
18
18
  author_email="helvecioblneto@gmail.com",
19
19
  description="A package to download and process GOES-16/17 data",
File without changes
File without changes
File without changes
File without changes