goesgcp 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
goesgcp/main.py CHANGED
@@ -1,16 +1,16 @@
1
1
  import pathlib
2
2
  import shutil
3
+ import time
3
4
  import xarray as xr
4
5
  import argparse
5
6
  import sys
6
7
  import tqdm
7
8
  from distutils.util import strtobool
8
- from concurrent.futures import ThreadPoolExecutor
9
9
  from multiprocessing import Pool
10
10
  from google.cloud import storage
11
11
  from datetime import datetime, timedelta, timezone
12
12
  from pyproj import CRS, Transformer
13
-
13
+ from google.api_core.exceptions import GoogleAPIError
14
14
 
15
15
 
16
16
  def list_blobs(connection, bucket_name, prefix):
@@ -21,6 +21,7 @@ def list_blobs(connection, bucket_name, prefix):
21
21
  bucket = connection.bucket(bucket_name)
22
22
 
23
23
  blobs = bucket.list_blobs(prefix=prefix)
24
+
24
25
  return blobs
25
26
 
26
27
  def get_directory_prefix(year, julian_day, hour):
@@ -44,6 +45,8 @@ def get_recent_files(connection, bucket_name, base_prefix, pattern, min_files):
44
45
  while len(files) < min_files:
45
46
  year = current_time.year
46
47
  julian_day = current_time.timetuple().tm_yday # Get the Julian day
48
+ # Add 3 digits to the Julian day
49
+ julian_day = str(julian_day).zfill(3)
47
50
  hour = current_time.hour
48
51
 
49
52
  # Generate the directory prefix for the current date and time
@@ -75,7 +78,7 @@ def crop_reproject(args):
75
78
  file, output = args
76
79
 
77
80
  # Open the file
78
- ds = xr.open_dataset(file, engine='netcdf4')
81
+ ds = xr.open_dataset(file, engine="netcdf4")
79
82
 
80
83
  # Select only var_name and goes_imager_projection
81
84
  ds = ds[[var_name, "goes_imager_projection"]]
@@ -154,21 +157,43 @@ def crop_reproject(args):
154
157
  return
155
158
 
156
159
 
157
-
158
- def download_file(args):
159
- """Downloads a file from a GCP bucket."""
160
+ def process_file(args):
161
+ """ Downloads and processes a file in parallel. """
160
162
 
161
163
  bucket_name, blob_name, local_path = args
162
164
 
163
- # Create a client
164
- bucket = storage_client.bucket(bucket_name)
165
- blob = bucket.blob(blob_name)
165
+ # Download options
166
+ retries = 5
167
+ attempt = 0
168
+
169
+ while attempt < retries:
170
+ try:
171
+ # Connect to the bucket
172
+ bucket = storage_client.bucket(bucket_name)
173
+ blob = bucket.blob(blob_name)
166
174
 
167
- # Download the file
168
- blob.download_to_filename(local_path, timeout=120)
175
+ # Download the file
176
+ blob.download_to_filename(local_path, timeout=120)
177
+ break # Exit the loop if the download is successful
178
+ except (GoogleAPIError, Exception) as e: # Catch any exception
179
+ attempt += 1
180
+ if attempt < retries:
181
+ time.sleep(2 ** attempt) # Backoff exponencial
182
+ else:
183
+ # Log the error to a file
184
+ with open('fail.log', 'a') as log_file:
185
+ log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
186
+
187
+ # Crop the file
188
+ crop_reproject((local_path, output_path))
189
+
190
+ # Remove the local file
191
+ pathlib.Path(local_path).unlink()
169
192
 
170
193
 
171
194
  def main():
195
+ ''' Main function to download and process GOES-16 files. '''
196
+
172
197
 
173
198
  global output_path, var_name, \
174
199
  lat_min, lat_max, lon_min, lon_max, \
@@ -212,9 +237,9 @@ def main():
212
237
  # Parse arguments
213
238
  args = parser.parse_args()
214
239
 
215
- # if len(sys.argv) == 1:
216
- # parser.print_help(sys.stderr)
217
- # sys.exit(1)
240
+ if len(sys.argv) == 1:
241
+ parser.print_help(sys.stderr)
242
+ sys.exit(1)
218
243
 
219
244
  # Set global variables
220
245
  output_path = args.output
@@ -261,42 +286,27 @@ def main():
261
286
  pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
262
287
 
263
288
  # Download files
264
- print(f"Downloading {len(recent_files)} files...")
289
+ print(f"GOESGCP: Downloading and processing {len(recent_files)} files...")
265
290
  loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
266
291
  bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
267
292
  [Elapsed:{elapsed} Remaining:<{remaining}]')
268
293
 
269
- if parallel:
270
- # Download all files to a temporary directory
271
- with ThreadPoolExecutor(max_workers=args.processes) as executor:
272
- for file in recent_files:
273
- local_path = f"tmp/{file.split('/')[-1]}"
274
- executor.submit(download_file, (bucket_name, file, local_path))
275
- loading_bar.update(1)
276
- loading_bar.close()
294
+ if parallel: # Run in parallel
295
+ # Create a list of tasks
296
+ tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in recent_files]
277
297
 
278
- # Process files
279
- print(f"\nProcessing {len(recent_files)} files...")
280
- load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
281
- bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
282
- [Elapsed:{elapsed} Remaining:<{remaining}]')
283
-
284
-
285
- # Process files in parallel
298
+ # Download files in parallel
286
299
  with Pool(processes=args.processes) as pool:
287
- for _ in pool.imap_unordered(crop_reproject, [(f"tmp/{file.split('/')[-1]}", output_path) for file in recent_files]):
288
- load_bar2.update(1)
289
- load_bar2.close()
290
- else:
300
+ for _ in pool.imap_unordered(process_file, tasks):
301
+ loading_bar.update(1)
302
+ loading_bar.close()
303
+ else: # Run in serial
291
304
  for file in recent_files:
292
305
  local_path = f"tmp/{file.split('/')[-1]}"
293
- download_file((bucket_name, file, local_path))
294
- crop_reproject((local_path, output_path))
306
+ process_file((bucket_name, file, local_path))
295
307
  loading_bar.update(1)
296
308
  loading_bar.close()
297
309
 
298
-
299
-
300
310
  # Remove temporary directory
301
311
  shutil.rmtree('tmp/')
302
312
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.7
3
+ Version: 1.0.9
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -0,0 +1,8 @@
1
+ goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
+ goesgcp/main.py,sha256=Rk_VWU7Xg6WZWqG0SRBikUjcinMX6risIHBnv6KiKpA,11188
3
+ goesgcp-1.0.9.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
+ goesgcp-1.0.9.dist-info/METADATA,sha256=DrTSCYr4w0CWovIn9Xd2O2tV1pleZu-GNcKnG9CmgZw,2993
5
+ goesgcp-1.0.9.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
6
+ goesgcp-1.0.9.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
+ goesgcp-1.0.9.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
+ goesgcp-1.0.9.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.6.0)
2
+ Generator: setuptools (75.7.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,8 +0,0 @@
1
- goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
- goesgcp/main.py,sha256=zCrAfAjc9Vzs9-7klATCCp6hhe1w4tsu_FiMBuBAafc,10893
3
- goesgcp-1.0.7.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
- goesgcp-1.0.7.dist-info/METADATA,sha256=eXzz34_yexZ4zx4EmaDRrpwiHFoBtJBzJ_hnVLQD-4Q,2993
5
- goesgcp-1.0.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
6
- goesgcp-1.0.7.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
- goesgcp-1.0.7.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
- goesgcp-1.0.7.dist-info/RECORD,,