goesgcp 1.0.6__py3-none-any.whl → 1.0.8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- goesgcp/main.py +48 -30
- {goesgcp-1.0.6.dist-info → goesgcp-1.0.8.dist-info}/METADATA +1 -1
- goesgcp-1.0.8.dist-info/RECORD +8 -0
- goesgcp-1.0.6.dist-info/RECORD +0 -8
- {goesgcp-1.0.6.dist-info → goesgcp-1.0.8.dist-info}/LICENSE +0 -0
- {goesgcp-1.0.6.dist-info → goesgcp-1.0.8.dist-info}/WHEEL +0 -0
- {goesgcp-1.0.6.dist-info → goesgcp-1.0.8.dist-info}/entry_points.txt +0 -0
- {goesgcp-1.0.6.dist-info → goesgcp-1.0.8.dist-info}/top_level.txt +0 -0
goesgcp/main.py
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
import pathlib
|
|
2
2
|
import shutil
|
|
3
|
+
import time
|
|
3
4
|
import xarray as xr
|
|
4
5
|
import argparse
|
|
5
6
|
import sys
|
|
6
7
|
import tqdm
|
|
7
|
-
from
|
|
8
|
+
from distutils.util import strtobool
|
|
8
9
|
from multiprocessing import Pool
|
|
9
10
|
from google.cloud import storage
|
|
10
11
|
from datetime import datetime, timedelta, timezone
|
|
11
12
|
from pyproj import CRS, Transformer
|
|
12
|
-
|
|
13
|
+
from google.api_core.exceptions import GoogleAPIError
|
|
13
14
|
|
|
14
15
|
|
|
15
16
|
def list_blobs(connection, bucket_name, prefix):
|
|
@@ -74,7 +75,7 @@ def crop_reproject(args):
|
|
|
74
75
|
file, output = args
|
|
75
76
|
|
|
76
77
|
# Open the file
|
|
77
|
-
ds = xr.open_dataset(file, engine=
|
|
78
|
+
ds = xr.open_dataset(file, engine="netcdf4")
|
|
78
79
|
|
|
79
80
|
# Select only var_name and goes_imager_projection
|
|
80
81
|
ds = ds[[var_name, "goes_imager_projection"]]
|
|
@@ -153,21 +154,43 @@ def crop_reproject(args):
|
|
|
153
154
|
return
|
|
154
155
|
|
|
155
156
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
"""Downloads a file from a GCP bucket."""
|
|
157
|
+
def process_file(args):
|
|
158
|
+
""" Downloads and processes a file in parallel. """
|
|
159
159
|
|
|
160
160
|
bucket_name, blob_name, local_path = args
|
|
161
161
|
|
|
162
|
-
#
|
|
163
|
-
|
|
164
|
-
|
|
162
|
+
# Download options
|
|
163
|
+
retries = 5
|
|
164
|
+
attempt = 0
|
|
165
|
+
|
|
166
|
+
while attempt < retries:
|
|
167
|
+
try:
|
|
168
|
+
# Connect to the bucket
|
|
169
|
+
bucket = storage_client.bucket(bucket_name)
|
|
170
|
+
blob = bucket.blob(blob_name)
|
|
165
171
|
|
|
166
|
-
|
|
167
|
-
|
|
172
|
+
# Download the file
|
|
173
|
+
blob.download_to_filename(local_path, timeout=120)
|
|
174
|
+
break # Exit the loop if the download is successful
|
|
175
|
+
except (GoogleAPIError, Exception) as e: # Catch any exception
|
|
176
|
+
attempt += 1
|
|
177
|
+
if attempt < retries:
|
|
178
|
+
time.sleep(2 ** attempt) # Backoff exponencial
|
|
179
|
+
else:
|
|
180
|
+
# Log the error to a file
|
|
181
|
+
with open('fail.log', 'a') as log_file:
|
|
182
|
+
log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
|
|
183
|
+
|
|
184
|
+
# Crop the file
|
|
185
|
+
crop_reproject((local_path, output_path))
|
|
186
|
+
|
|
187
|
+
# Remove the local file
|
|
188
|
+
pathlib.Path(local_path).unlink()
|
|
168
189
|
|
|
169
190
|
|
|
170
191
|
def main():
|
|
192
|
+
''' Main function to download and process GOES-16 files. '''
|
|
193
|
+
|
|
171
194
|
|
|
172
195
|
global output_path, var_name, \
|
|
173
196
|
lat_min, lat_max, lon_min, lon_max, \
|
|
@@ -204,7 +227,7 @@ def main():
|
|
|
204
227
|
parser.add_argument('--output', type=str, default='output/', help='Path for saving output files')
|
|
205
228
|
|
|
206
229
|
# Other settings
|
|
207
|
-
parser.add_argument('--parallel', type=bool, default=True, help='Use parallel processing')
|
|
230
|
+
parser.add_argument('--parallel', type=lambda x: bool(strtobool(x)), default=True, help='Use parallel processing')
|
|
208
231
|
parser.add_argument('--processes', type=int, default=4, help='Number of processes for parallel execution')
|
|
209
232
|
parser.add_argument('--max_attempts', type=int, default=3, help='Number of attempts to download a file')
|
|
210
233
|
|
|
@@ -260,31 +283,26 @@ def main():
|
|
|
260
283
|
pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
|
|
261
284
|
|
|
262
285
|
# Download files
|
|
263
|
-
print(f"Downloading {len(recent_files)} files...")
|
|
286
|
+
print(f"GOESGCP: Downloading and processing {len(recent_files)} files...")
|
|
264
287
|
loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
|
|
265
288
|
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
|
|
266
289
|
[Elapsed:{elapsed} Remaining:<{remaining}]')
|
|
267
290
|
|
|
268
|
-
|
|
269
|
-
|
|
291
|
+
if parallel: # Run in parallel
|
|
292
|
+
# Create a list of tasks
|
|
293
|
+
tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in recent_files]
|
|
294
|
+
|
|
295
|
+
# Download files in parallel
|
|
296
|
+
with Pool(processes=args.processes) as pool:
|
|
297
|
+
for _ in pool.imap_unordered(process_file, tasks):
|
|
298
|
+
loading_bar.update(1)
|
|
299
|
+
loading_bar.close()
|
|
300
|
+
else: # Run in serial
|
|
270
301
|
for file in recent_files:
|
|
271
302
|
local_path = f"tmp/{file.split('/')[-1]}"
|
|
272
|
-
|
|
303
|
+
process_file((bucket_name, file, local_path))
|
|
273
304
|
loading_bar.update(1)
|
|
274
|
-
|
|
275
|
-
|
|
276
|
-
# Process files
|
|
277
|
-
print(f"\nProcessing {len(recent_files)} files...")
|
|
278
|
-
load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
|
|
279
|
-
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
|
|
280
|
-
[Elapsed:{elapsed} Remaining:<{remaining}]')
|
|
281
|
-
|
|
282
|
-
|
|
283
|
-
# Process files in parallel
|
|
284
|
-
with Pool(processes=args.processes) as pool:
|
|
285
|
-
for _ in pool.imap_unordered(crop_reproject, [(f"tmp/{file.split('/')[-1]}", output_path) for file in recent_files]):
|
|
286
|
-
load_bar2.update(1)
|
|
287
|
-
load_bar2.close()
|
|
305
|
+
loading_bar.close()
|
|
288
306
|
|
|
289
307
|
# Remove temporary directory
|
|
290
308
|
shutil.rmtree('tmp/')
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
|
|
2
|
+
goesgcp/main.py,sha256=_7QyMp7MRfAvCb5ChqTc2dyeyQwc5ftH5nJJz6HiD4Y,11100
|
|
3
|
+
goesgcp-1.0.8.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
4
|
+
goesgcp-1.0.8.dist-info/METADATA,sha256=IlkX413bUXozaKP2s65cj8aq2HbAPRbdFNEkPwiaA0o,2993
|
|
5
|
+
goesgcp-1.0.8.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
6
|
+
goesgcp-1.0.8.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
7
|
+
goesgcp-1.0.8.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
|
|
8
|
+
goesgcp-1.0.8.dist-info/RECORD,,
|
goesgcp-1.0.6.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
|
|
2
|
-
goesgcp/main.py,sha256=3cvmzeFyMh7smI3jkMnY0PUrird9i3lntkFt9c_xwZo,10462
|
|
3
|
-
goesgcp-1.0.6.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
4
|
-
goesgcp-1.0.6.dist-info/METADATA,sha256=-3huDGZYV4-ZFxmKQcU-1avXQJWPQW_RrIQ841XrlGI,2993
|
|
5
|
-
goesgcp-1.0.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
6
|
-
goesgcp-1.0.6.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
7
|
-
goesgcp-1.0.6.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
|
|
8
|
-
goesgcp-1.0.6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|