goesgcp 1.0.7__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- goesgcp/main.py +46 -39
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.8.dist-info}/METADATA +1 -1
- goesgcp-1.0.8.dist-info/RECORD +8 -0
- goesgcp-1.0.7.dist-info/RECORD +0 -8
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.8.dist-info}/LICENSE +0 -0
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.8.dist-info}/WHEEL +0 -0
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.8.dist-info}/entry_points.txt +0 -0
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.8.dist-info}/top_level.txt +0 -0
goesgcp/main.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import pathlib
|
|
2
2
|
import shutil
|
|
3
|
+
import time
|
|
3
4
|
import xarray as xr
|
|
4
5
|
import argparse
|
|
5
6
|
import sys
|
|
6
7
|
import tqdm
|
|
7
8
|
from distutils.util import strtobool
|
|
8
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
9
9
|
from multiprocessing import Pool
|
|
10
10
|
from google.cloud import storage
|
|
11
11
|
from datetime import datetime, timedelta, timezone
|
|
12
12
|
from pyproj import CRS, Transformer
|
|
13
|
-
|
|
13
|
+
from google.api_core.exceptions import GoogleAPIError
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def list_blobs(connection, bucket_name, prefix):
|
|
@@ -75,7 +75,7 @@ def crop_reproject(args):
|
|
|
75
75
|
file, output = args
|
|
76
76
|
|
|
77
77
|
# Open the file
|
|
78
|
-
ds = xr.open_dataset(file, engine=
|
|
78
|
+
ds = xr.open_dataset(file, engine="netcdf4")
|
|
79
79
|
|
|
80
80
|
# Select only var_name and goes_imager_projection
|
|
81
81
|
ds = ds[[var_name, "goes_imager_projection"]]
|
|
@@ -154,21 +154,43 @@ def crop_reproject(args):
|
|
|
154
154
|
return
|
|
155
155
|
|
|
156
156
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
"""Downloads a file from a GCP bucket."""
|
|
157
|
+
def process_file(args):
|
|
158
|
+
""" Downloads and processes a file in parallel. """
|
|
160
159
|
|
|
161
160
|
bucket_name, blob_name, local_path = args
|
|
162
161
|
|
|
163
|
-
#
|
|
164
|
-
|
|
165
|
-
|
|
162
|
+
# Download options
|
|
163
|
+
retries = 5
|
|
164
|
+
attempt = 0
|
|
165
|
+
|
|
166
|
+
while attempt < retries:
|
|
167
|
+
try:
|
|
168
|
+
# Connect to the bucket
|
|
169
|
+
bucket = storage_client.bucket(bucket_name)
|
|
170
|
+
blob = bucket.blob(blob_name)
|
|
171
|
+
|
|
172
|
+
# Download the file
|
|
173
|
+
blob.download_to_filename(local_path, timeout=120)
|
|
174
|
+
break # Exit the loop if the download is successful
|
|
175
|
+
except (GoogleAPIError, Exception) as e: # Catch any exception
|
|
176
|
+
attempt += 1
|
|
177
|
+
if attempt < retries:
|
|
178
|
+
time.sleep(2 ** attempt) # Backoff exponencial
|
|
179
|
+
else:
|
|
180
|
+
# Log the error to a file
|
|
181
|
+
with open('fail.log', 'a') as log_file:
|
|
182
|
+
log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
|
|
166
183
|
|
|
167
|
-
#
|
|
168
|
-
|
|
184
|
+
# Crop the file
|
|
185
|
+
crop_reproject((local_path, output_path))
|
|
186
|
+
|
|
187
|
+
# Remove the local file
|
|
188
|
+
pathlib.Path(local_path).unlink()
|
|
169
189
|
|
|
170
190
|
|
|
171
191
|
def main():
|
|
192
|
+
''' Main function to download and process GOES-16 files. '''
|
|
193
|
+
|
|
172
194
|
|
|
173
195
|
global output_path, var_name, \
|
|
174
196
|
lat_min, lat_max, lon_min, lon_max, \
|
|
@@ -212,9 +234,9 @@ def main():
|
|
|
212
234
|
# Parse arguments
|
|
213
235
|
args = parser.parse_args()
|
|
214
236
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
237
|
+
if len(sys.argv) == 1:
|
|
238
|
+
parser.print_help(sys.stderr)
|
|
239
|
+
sys.exit(1)
|
|
218
240
|
|
|
219
241
|
# Set global variables
|
|
220
242
|
output_path = args.output
|
|
@@ -261,42 +283,27 @@ def main():
|
|
|
261
283
|
pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
|
|
262
284
|
|
|
263
285
|
# Download files
|
|
264
|
-
print(f"Downloading {len(recent_files)} files...")
|
|
286
|
+
print(f"GOESGCP: Downloading and processing {len(recent_files)} files...")
|
|
265
287
|
loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
|
|
266
288
|
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
|
|
267
289
|
[Elapsed:{elapsed} Remaining:<{remaining}]')
|
|
268
290
|
|
|
269
|
-
if parallel:
|
|
270
|
-
#
|
|
271
|
-
|
|
272
|
-
for file in recent_files:
|
|
273
|
-
local_path = f"tmp/{file.split('/')[-1]}"
|
|
274
|
-
executor.submit(download_file, (bucket_name, file, local_path))
|
|
275
|
-
loading_bar.update(1)
|
|
276
|
-
loading_bar.close()
|
|
277
|
-
|
|
278
|
-
# Process files
|
|
279
|
-
print(f"\nProcessing {len(recent_files)} files...")
|
|
280
|
-
load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
|
|
281
|
-
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
|
|
282
|
-
[Elapsed:{elapsed} Remaining:<{remaining}]')
|
|
291
|
+
if parallel: # Run in parallel
|
|
292
|
+
# Create a list of tasks
|
|
293
|
+
tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in recent_files]
|
|
283
294
|
|
|
284
|
-
|
|
285
|
-
# Process files in parallel
|
|
295
|
+
# Download files in parallel
|
|
286
296
|
with Pool(processes=args.processes) as pool:
|
|
287
|
-
for _ in pool.imap_unordered(
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
else:
|
|
297
|
+
for _ in pool.imap_unordered(process_file, tasks):
|
|
298
|
+
loading_bar.update(1)
|
|
299
|
+
loading_bar.close()
|
|
300
|
+
else: # Run in serial
|
|
291
301
|
for file in recent_files:
|
|
292
302
|
local_path = f"tmp/{file.split('/')[-1]}"
|
|
293
|
-
|
|
294
|
-
crop_reproject((local_path, output_path))
|
|
303
|
+
process_file((bucket_name, file, local_path))
|
|
295
304
|
loading_bar.update(1)
|
|
296
305
|
loading_bar.close()
|
|
297
306
|
|
|
298
|
-
|
|
299
|
-
|
|
300
307
|
# Remove temporary directory
|
|
301
308
|
shutil.rmtree('tmp/')
|
|
302
309
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
|
|
2
|
+
goesgcp/main.py,sha256=_7QyMp7MRfAvCb5ChqTc2dyeyQwc5ftH5nJJz6HiD4Y,11100
|
|
3
|
+
goesgcp-1.0.8.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
4
|
+
goesgcp-1.0.8.dist-info/METADATA,sha256=IlkX413bUXozaKP2s65cj8aq2HbAPRbdFNEkPwiaA0o,2993
|
|
5
|
+
goesgcp-1.0.8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
6
|
+
goesgcp-1.0.8.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
7
|
+
goesgcp-1.0.8.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
|
|
8
|
+
goesgcp-1.0.8.dist-info/RECORD,,
|
goesgcp-1.0.7.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
|
|
2
|
-
goesgcp/main.py,sha256=zCrAfAjc9Vzs9-7klATCCp6hhe1w4tsu_FiMBuBAafc,10893
|
|
3
|
-
goesgcp-1.0.7.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
4
|
-
goesgcp-1.0.7.dist-info/METADATA,sha256=eXzz34_yexZ4zx4EmaDRrpwiHFoBtJBzJ_hnVLQD-4Q,2993
|
|
5
|
-
goesgcp-1.0.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
6
|
-
goesgcp-1.0.7.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
7
|
-
goesgcp-1.0.7.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
|
|
8
|
-
goesgcp-1.0.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|