goesgcp 1.0.7__py3-none-any.whl → 1.0.9__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- goesgcp/main.py +49 -39
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/METADATA +1 -1
- goesgcp-1.0.9.dist-info/RECORD +8 -0
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/WHEEL +1 -1
- goesgcp-1.0.7.dist-info/RECORD +0 -8
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/LICENSE +0 -0
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/entry_points.txt +0 -0
- {goesgcp-1.0.7.dist-info → goesgcp-1.0.9.dist-info}/top_level.txt +0 -0
goesgcp/main.py
CHANGED
|
@@ -1,16 +1,16 @@
|
|
|
1
1
|
import pathlib
|
|
2
2
|
import shutil
|
|
3
|
+
import time
|
|
3
4
|
import xarray as xr
|
|
4
5
|
import argparse
|
|
5
6
|
import sys
|
|
6
7
|
import tqdm
|
|
7
8
|
from distutils.util import strtobool
|
|
8
|
-
from concurrent.futures import ThreadPoolExecutor
|
|
9
9
|
from multiprocessing import Pool
|
|
10
10
|
from google.cloud import storage
|
|
11
11
|
from datetime import datetime, timedelta, timezone
|
|
12
12
|
from pyproj import CRS, Transformer
|
|
13
|
-
|
|
13
|
+
from google.api_core.exceptions import GoogleAPIError
|
|
14
14
|
|
|
15
15
|
|
|
16
16
|
def list_blobs(connection, bucket_name, prefix):
|
|
@@ -21,6 +21,7 @@ def list_blobs(connection, bucket_name, prefix):
|
|
|
21
21
|
bucket = connection.bucket(bucket_name)
|
|
22
22
|
|
|
23
23
|
blobs = bucket.list_blobs(prefix=prefix)
|
|
24
|
+
|
|
24
25
|
return blobs
|
|
25
26
|
|
|
26
27
|
def get_directory_prefix(year, julian_day, hour):
|
|
@@ -44,6 +45,8 @@ def get_recent_files(connection, bucket_name, base_prefix, pattern, min_files):
|
|
|
44
45
|
while len(files) < min_files:
|
|
45
46
|
year = current_time.year
|
|
46
47
|
julian_day = current_time.timetuple().tm_yday # Get the Julian day
|
|
48
|
+
# Add 3 digits to the Julian day
|
|
49
|
+
julian_day = str(julian_day).zfill(3)
|
|
47
50
|
hour = current_time.hour
|
|
48
51
|
|
|
49
52
|
# Generate the directory prefix for the current date and time
|
|
@@ -75,7 +78,7 @@ def crop_reproject(args):
|
|
|
75
78
|
file, output = args
|
|
76
79
|
|
|
77
80
|
# Open the file
|
|
78
|
-
ds = xr.open_dataset(file, engine=
|
|
81
|
+
ds = xr.open_dataset(file, engine="netcdf4")
|
|
79
82
|
|
|
80
83
|
# Select only var_name and goes_imager_projection
|
|
81
84
|
ds = ds[[var_name, "goes_imager_projection"]]
|
|
@@ -154,21 +157,43 @@ def crop_reproject(args):
|
|
|
154
157
|
return
|
|
155
158
|
|
|
156
159
|
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
"""Downloads a file from a GCP bucket."""
|
|
160
|
+
def process_file(args):
|
|
161
|
+
""" Downloads and processes a file in parallel. """
|
|
160
162
|
|
|
161
163
|
bucket_name, blob_name, local_path = args
|
|
162
164
|
|
|
163
|
-
#
|
|
164
|
-
|
|
165
|
-
|
|
165
|
+
# Download options
|
|
166
|
+
retries = 5
|
|
167
|
+
attempt = 0
|
|
168
|
+
|
|
169
|
+
while attempt < retries:
|
|
170
|
+
try:
|
|
171
|
+
# Connect to the bucket
|
|
172
|
+
bucket = storage_client.bucket(bucket_name)
|
|
173
|
+
blob = bucket.blob(blob_name)
|
|
166
174
|
|
|
167
|
-
|
|
168
|
-
|
|
175
|
+
# Download the file
|
|
176
|
+
blob.download_to_filename(local_path, timeout=120)
|
|
177
|
+
break # Exit the loop if the download is successful
|
|
178
|
+
except (GoogleAPIError, Exception) as e: # Catch any exception
|
|
179
|
+
attempt += 1
|
|
180
|
+
if attempt < retries:
|
|
181
|
+
time.sleep(2 ** attempt) # Backoff exponencial
|
|
182
|
+
else:
|
|
183
|
+
# Log the error to a file
|
|
184
|
+
with open('fail.log', 'a') as log_file:
|
|
185
|
+
log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
|
|
186
|
+
|
|
187
|
+
# Crop the file
|
|
188
|
+
crop_reproject((local_path, output_path))
|
|
189
|
+
|
|
190
|
+
# Remove the local file
|
|
191
|
+
pathlib.Path(local_path).unlink()
|
|
169
192
|
|
|
170
193
|
|
|
171
194
|
def main():
|
|
195
|
+
''' Main function to download and process GOES-16 files. '''
|
|
196
|
+
|
|
172
197
|
|
|
173
198
|
global output_path, var_name, \
|
|
174
199
|
lat_min, lat_max, lon_min, lon_max, \
|
|
@@ -212,9 +237,9 @@ def main():
|
|
|
212
237
|
# Parse arguments
|
|
213
238
|
args = parser.parse_args()
|
|
214
239
|
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
240
|
+
if len(sys.argv) == 1:
|
|
241
|
+
parser.print_help(sys.stderr)
|
|
242
|
+
sys.exit(1)
|
|
218
243
|
|
|
219
244
|
# Set global variables
|
|
220
245
|
output_path = args.output
|
|
@@ -261,42 +286,27 @@ def main():
|
|
|
261
286
|
pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
|
|
262
287
|
|
|
263
288
|
# Download files
|
|
264
|
-
print(f"Downloading {len(recent_files)} files...")
|
|
289
|
+
print(f"GOESGCP: Downloading and processing {len(recent_files)} files...")
|
|
265
290
|
loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
|
|
266
291
|
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
|
|
267
292
|
[Elapsed:{elapsed} Remaining:<{remaining}]')
|
|
268
293
|
|
|
269
|
-
if parallel:
|
|
270
|
-
#
|
|
271
|
-
|
|
272
|
-
for file in recent_files:
|
|
273
|
-
local_path = f"tmp/{file.split('/')[-1]}"
|
|
274
|
-
executor.submit(download_file, (bucket_name, file, local_path))
|
|
275
|
-
loading_bar.update(1)
|
|
276
|
-
loading_bar.close()
|
|
294
|
+
if parallel: # Run in parallel
|
|
295
|
+
# Create a list of tasks
|
|
296
|
+
tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in recent_files]
|
|
277
297
|
|
|
278
|
-
#
|
|
279
|
-
print(f"\nProcessing {len(recent_files)} files...")
|
|
280
|
-
load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
|
|
281
|
-
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
|
|
282
|
-
[Elapsed:{elapsed} Remaining:<{remaining}]')
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
# Process files in parallel
|
|
298
|
+
# Download files in parallel
|
|
286
299
|
with Pool(processes=args.processes) as pool:
|
|
287
|
-
for _ in pool.imap_unordered(
|
|
288
|
-
|
|
289
|
-
|
|
290
|
-
else:
|
|
300
|
+
for _ in pool.imap_unordered(process_file, tasks):
|
|
301
|
+
loading_bar.update(1)
|
|
302
|
+
loading_bar.close()
|
|
303
|
+
else: # Run in serial
|
|
291
304
|
for file in recent_files:
|
|
292
305
|
local_path = f"tmp/{file.split('/')[-1]}"
|
|
293
|
-
|
|
294
|
-
crop_reproject((local_path, output_path))
|
|
306
|
+
process_file((bucket_name, file, local_path))
|
|
295
307
|
loading_bar.update(1)
|
|
296
308
|
loading_bar.close()
|
|
297
309
|
|
|
298
|
-
|
|
299
|
-
|
|
300
310
|
# Remove temporary directory
|
|
301
311
|
shutil.rmtree('tmp/')
|
|
302
312
|
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
|
|
2
|
+
goesgcp/main.py,sha256=Rk_VWU7Xg6WZWqG0SRBikUjcinMX6risIHBnv6KiKpA,11188
|
|
3
|
+
goesgcp-1.0.9.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
4
|
+
goesgcp-1.0.9.dist-info/METADATA,sha256=DrTSCYr4w0CWovIn9Xd2O2tV1pleZu-GNcKnG9CmgZw,2993
|
|
5
|
+
goesgcp-1.0.9.dist-info/WHEEL,sha256=A3WOREP4zgxI0fKrHUG8DC8013e3dK3n7a6HDbcEIwE,91
|
|
6
|
+
goesgcp-1.0.9.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
7
|
+
goesgcp-1.0.9.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
|
|
8
|
+
goesgcp-1.0.9.dist-info/RECORD,,
|
goesgcp-1.0.7.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
|
|
2
|
-
goesgcp/main.py,sha256=zCrAfAjc9Vzs9-7klATCCp6hhe1w4tsu_FiMBuBAafc,10893
|
|
3
|
-
goesgcp-1.0.7.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
4
|
-
goesgcp-1.0.7.dist-info/METADATA,sha256=eXzz34_yexZ4zx4EmaDRrpwiHFoBtJBzJ_hnVLQD-4Q,2993
|
|
5
|
-
goesgcp-1.0.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
6
|
-
goesgcp-1.0.7.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
7
|
-
goesgcp-1.0.7.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
|
|
8
|
-
goesgcp-1.0.7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|