goesgcp 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- goesgcp/main.py +53 -46
- {goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/METADATA +3 -3
- goesgcp-2.0.2.dist-info/RECORD +8 -0
- goesgcp-2.0.1.dist-info/RECORD +0 -8
- {goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/LICENSE +0 -0
- {goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/WHEEL +0 -0
- {goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/entry_points.txt +0 -0
- {goesgcp-2.0.1.dist-info → goesgcp-2.0.2.dist-info}/top_level.txt +0 -0
goesgcp/main.py
CHANGED
|
@@ -13,6 +13,9 @@ from datetime import datetime, timedelta, timezone
|
|
|
13
13
|
from pyproj import CRS, Transformer
|
|
14
14
|
from google.api_core.exceptions import GoogleAPIError
|
|
15
15
|
|
|
16
|
+
import warnings
|
|
17
|
+
warnings.filterwarnings('ignore')
|
|
18
|
+
|
|
16
19
|
|
|
17
20
|
def list_blobs(connection, bucket_name, prefix):
|
|
18
21
|
"""
|
|
@@ -54,11 +57,11 @@ def get_files_period(connection, bucket_name, base_prefix, pattern,
|
|
|
54
57
|
files_metadata = []
|
|
55
58
|
|
|
56
59
|
# Generate the list of dates from start to end
|
|
57
|
-
|
|
58
|
-
while
|
|
59
|
-
year =
|
|
60
|
-
julian_day = str(
|
|
61
|
-
hour =
|
|
60
|
+
temp = start
|
|
61
|
+
while temp <= end:
|
|
62
|
+
year = temp.year
|
|
63
|
+
julian_day = str(temp.timetuple().tm_yday).zfill(3) # Julian day
|
|
64
|
+
hour = temp.hour
|
|
62
65
|
|
|
63
66
|
# Generate the directory prefix
|
|
64
67
|
prefix = f"{base_prefix}/{get_directory_prefix(year, julian_day, hour)}"
|
|
@@ -71,26 +74,27 @@ def get_files_period(connection, bucket_name, base_prefix, pattern,
|
|
|
71
74
|
if pattern in blob.name:
|
|
72
75
|
files_metadata.append({
|
|
73
76
|
'file_name': blob.name,
|
|
74
|
-
'last_modified': blob.updated
|
|
75
77
|
})
|
|
76
78
|
|
|
77
79
|
# Move to the next hour
|
|
78
|
-
|
|
80
|
+
temp += timedelta(hours=1)
|
|
79
81
|
|
|
80
82
|
# Create a DataFrame from the list of files
|
|
81
83
|
df = pd.DataFrame(files_metadata)
|
|
82
84
|
|
|
85
|
+
# Transform file_name to datetime
|
|
86
|
+
df['last_modified'] = pd.to_datetime(df['file_name'].str.extract(r'(\d{4}\d{3}\d{2}\d{2})').squeeze(), format='%Y%j%H%M')
|
|
87
|
+
|
|
88
|
+
|
|
83
89
|
if df.empty:
|
|
84
90
|
print("No files found matching the pattern.")
|
|
85
91
|
return pd.DataFrame()
|
|
86
92
|
|
|
87
93
|
# Ensure 'last_modified' is in the correct datetime format without timezone
|
|
88
|
-
df['last_modified'] = pd.to_datetime(df['last_modified']).dt.tz_localize(
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
# Filter the DataFrame based on the date range
|
|
93
|
-
df = df[(df['last_modified'] >= start) & (df['last_modified'] < end)]
|
|
94
|
+
df['last_modified'] = pd.to_datetime(df['last_modified']).dt.tz_localize('UTC')
|
|
95
|
+
|
|
96
|
+
# Filter the DataFrame based on the date range (inclusive)
|
|
97
|
+
df = df[(df['last_modified'] >= start) & (df['last_modified'] <= end)]
|
|
94
98
|
|
|
95
99
|
# Filter the DataFrame based on the hour range
|
|
96
100
|
df['hour'] = df['last_modified'].dt.hour
|
|
@@ -153,13 +157,22 @@ def crop_reproject(args):
|
|
|
153
157
|
Crops and reprojects a GOES-16 file to EPSG:4326.
|
|
154
158
|
"""
|
|
155
159
|
|
|
156
|
-
file, output = args
|
|
160
|
+
file, output, var_name, lat_min, lat_max, lon_min, lon_max, resolution, save_format = args
|
|
157
161
|
|
|
158
162
|
# Open the file
|
|
159
163
|
ds = xr.open_dataset(file, engine="netcdf4")
|
|
160
164
|
|
|
165
|
+
if var_name is None:
|
|
166
|
+
# Get all variables are 2D
|
|
167
|
+
var_names = [var for var in ds.data_vars if len(ds[var].dims) == 2]
|
|
168
|
+
|
|
169
|
+
# Remove DQF variables
|
|
170
|
+
var_names = [var for var in var_names if 'DQF' not in var]
|
|
171
|
+
else:
|
|
172
|
+
var_names = [var_name]
|
|
173
|
+
|
|
161
174
|
# Select only var_name and goes_imager_projection
|
|
162
|
-
ds = ds[[
|
|
175
|
+
ds = ds[var_names + ["goes_imager_projection"]]
|
|
163
176
|
|
|
164
177
|
# Get projection
|
|
165
178
|
sat_height = ds["goes_imager_projection"].attrs["perspective_point_height"]
|
|
@@ -215,14 +228,13 @@ def crop_reproject(args):
|
|
|
215
228
|
ds = ds.rename({"x": "lon", "y": "lat"})
|
|
216
229
|
|
|
217
230
|
# Add resolution to attributes
|
|
218
|
-
|
|
231
|
+
for var in var_names:
|
|
232
|
+
ds[var].attrs['resolution'] = "x={:.2f} y={:.2f} degree".format(resolution, resolution)
|
|
233
|
+
ds[var].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by goesgcp'
|
|
219
234
|
|
|
220
235
|
# Crop using lat/lon coordinates, in parallel
|
|
221
236
|
ds = ds.rio.clip_box(minx=lon_min, miny=lat_min, maxx=lon_max, maxy=lat_max)
|
|
222
237
|
|
|
223
|
-
# Add comments
|
|
224
|
-
ds[var_name].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by goesgcp'
|
|
225
|
-
|
|
226
238
|
# Add global metadata comments
|
|
227
239
|
ds.attrs['comments'] = "Data processed by goesgcp, author: Helvecio B. L. Neto (helvecioblneto@gmail.com)"
|
|
228
240
|
|
|
@@ -249,20 +261,18 @@ def crop_reproject(args):
|
|
|
249
261
|
output_file = f"{output_directory}{file.split('/')[-1]}"
|
|
250
262
|
ds.to_netcdf(output_file, mode='w', format='NETCDF4_CLASSIC')
|
|
251
263
|
|
|
252
|
-
# Fechar o dataset
|
|
253
264
|
ds.close()
|
|
254
|
-
return
|
|
255
265
|
|
|
256
266
|
|
|
257
267
|
def process_file(args):
|
|
258
|
-
"""
|
|
259
|
-
|
|
260
|
-
|
|
268
|
+
"""
|
|
269
|
+
Downloads and processes a GOES-16 file.
|
|
270
|
+
"""
|
|
271
|
+
|
|
272
|
+
bucket_name, blob_name, local_path, output_path, var_name, lat_min, lat_max, lon_min, lon_max, resolution, \
|
|
273
|
+
save_format, retries = args
|
|
261
274
|
|
|
262
|
-
# Download options
|
|
263
|
-
retries = 5
|
|
264
275
|
attempt = 0
|
|
265
|
-
|
|
266
276
|
while attempt < retries:
|
|
267
277
|
try:
|
|
268
278
|
# Connect to the bucket
|
|
@@ -282,31 +292,28 @@ def process_file(args):
|
|
|
282
292
|
log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
|
|
283
293
|
|
|
284
294
|
# Crop the file
|
|
285
|
-
crop_reproject((local_path, output_path))
|
|
295
|
+
crop_reproject((local_path, output_path, var_name, lat_min, lat_max, lon_min, lon_max, resolution, save_format))
|
|
286
296
|
|
|
287
297
|
# Remove the local file
|
|
288
298
|
pathlib.Path(local_path).unlink()
|
|
289
299
|
|
|
300
|
+
# Create connection
|
|
301
|
+
storage_client = storage.Client.create_anonymous_client()
|
|
290
302
|
|
|
291
303
|
def main():
|
|
292
304
|
''' Main function to download and process GOES-16 files. '''
|
|
293
305
|
|
|
294
|
-
|
|
295
|
-
global output_path, var_name, \
|
|
296
|
-
lat_min, lat_max, lon_min, lon_max, \
|
|
297
|
-
max_attempts, parallel, recent, resolution, storage_client, \
|
|
298
|
-
satellite, product, op_mode, channel, save_format
|
|
299
|
-
|
|
300
306
|
epilog = """
|
|
301
307
|
Example usage:
|
|
302
308
|
|
|
303
|
-
- To download recent 3 files from the GOES-16 satellite for the ABI-L2-CMIPF product
|
|
309
|
+
- To download recent 3 files from the GOES-16 satellite for the ABI-L2-CMIPF product,
|
|
310
|
+
change resolution to 0.045, and crop the files between latitudes -35 and 5 and longitudes -80 and -30:
|
|
304
311
|
|
|
305
|
-
goesgcp --satellite
|
|
312
|
+
goesgcp --satellite goes-16 --product ABI-L2-CMIPF --recent 3 --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
|
|
306
313
|
|
|
307
314
|
- To download files from the GOES-16 satellite for the ABI-L2-CMIPF product between 2022-12-15 and 2022-12-20:
|
|
308
315
|
|
|
309
|
-
goesgcp --start '2022-12-15
|
|
316
|
+
goesgcp --satellite goes-16 --product ABI-L2-CMIPF --start '2022-12-15 09:00:00' --end '2022-12-15 09:50:00' --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
|
|
310
317
|
|
|
311
318
|
"""
|
|
312
319
|
|
|
@@ -329,14 +336,14 @@ def main():
|
|
|
329
336
|
]
|
|
330
337
|
|
|
331
338
|
# Set arguments
|
|
332
|
-
parser = argparse.ArgumentParser(description='
|
|
339
|
+
parser = argparse.ArgumentParser(description='Download and process GOES Satellite data files from GCP.',
|
|
333
340
|
epilog=epilog,
|
|
334
341
|
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
335
342
|
|
|
336
343
|
# Satellite and product settings
|
|
337
344
|
parser.add_argument('--satellite', type=str, default='goes-16', choices=['goes-16', 'goes-18'], help='Name of the satellite (e.g., goes16)')
|
|
338
|
-
parser.add_argument('--product', type=str, default='ABI-L2-
|
|
339
|
-
parser.add_argument('--var_name', type=str, default=
|
|
345
|
+
parser.add_argument('--product', type=str, default='ABI-L2-CMIPF', help='Name of the satellite product', choices=product_names)
|
|
346
|
+
parser.add_argument('--var_name', type=str, default=None, help='Variable name to extract (e.g., CMI)')
|
|
340
347
|
parser.add_argument('--channel', type=int, default=13, help='Channel to use (e.g., 13)')
|
|
341
348
|
parser.add_argument('--op_mode', type=str, default='M6C', help='Operational mode to use (e.g., M6C)')
|
|
342
349
|
|
|
@@ -407,9 +414,6 @@ def main():
|
|
|
407
414
|
# Create output directory
|
|
408
415
|
pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
|
|
409
416
|
|
|
410
|
-
# Create connection
|
|
411
|
-
storage_client = storage.Client.create_anonymous_client()
|
|
412
|
-
|
|
413
417
|
# Check if the bucket exists
|
|
414
418
|
try:
|
|
415
419
|
storage_client.get_bucket(bucket_name)
|
|
@@ -445,7 +449,9 @@ def main():
|
|
|
445
449
|
|
|
446
450
|
if parallel: # Run in parallel
|
|
447
451
|
# Create a list of tasks
|
|
448
|
-
tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}"
|
|
452
|
+
tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}", output_path, var_name,
|
|
453
|
+
lat_min, lat_max, lon_min, lon_max, resolution,
|
|
454
|
+
save_format, max_attempts) for file in files_list]
|
|
449
455
|
|
|
450
456
|
# Download files in parallel
|
|
451
457
|
with Pool(processes=args.processes) as pool:
|
|
@@ -455,11 +461,12 @@ def main():
|
|
|
455
461
|
else: # Run in serial
|
|
456
462
|
for file in files_list:
|
|
457
463
|
local_path = f"tmp/{file.split('/')[-1]}"
|
|
458
|
-
process_file((bucket_name, file, local_path
|
|
464
|
+
process_file((bucket_name, file, local_path, output_path, var_name,
|
|
465
|
+
lat_min, lat_max, lon_min, lon_max, resolution,
|
|
466
|
+
save_format, max_attempts))
|
|
459
467
|
loading_bar.update(1)
|
|
460
468
|
loading_bar.close()
|
|
461
469
|
|
|
462
|
-
# Remove temporary directory
|
|
463
470
|
shutil.rmtree('tmp/')
|
|
464
471
|
|
|
465
472
|
if __name__ == '__main__':
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.2
|
|
2
2
|
Name: goesgcp
|
|
3
|
-
Version: 2.0.
|
|
3
|
+
Version: 2.0.2
|
|
4
4
|
Summary: A package to download and process GOES-16/17 data
|
|
5
5
|
Home-page: https://github.com/helvecioneto/goesgcp
|
|
6
6
|
Author: Helvecio B. L. Neto
|
|
@@ -89,7 +89,7 @@ goesgcp [OPTIONS]
|
|
|
89
89
|
| `--save_format` | Format for saving output files (default: `by_date`). |
|
|
90
90
|
|
|
91
91
|
#### Available GOES Products
|
|
92
|
-
A comprehensive list of available GOES products can be found at the following link: [https://
|
|
92
|
+
A comprehensive list of available GOES products can be found at the following link: [https://console.cloud.google.com/storage/browser/gcp-public-data-goes-16](https://console.cloud.google.com/storage/browser/gcp-public-data-goes-16)
|
|
93
93
|
|
|
94
94
|
### Examples
|
|
95
95
|
|
|
@@ -97,7 +97,7 @@ A comprehensive list of available GOES products can be found at the following li
|
|
|
97
97
|
In the example below, the command downloads the 3 most recent files from the GOES-16 satellite for the product ABI-L2-CMIPF. It focuses on the variable CMI (Cloud and Moisture Imagery) from channel 13, which is commonly used for infrared observations. The downloaded files are saved to the specified output directory output/.
|
|
98
98
|
|
|
99
99
|
```bash
|
|
100
|
-
goesgcp --satellite goes-16 --product ABI-L2-CMIPF --
|
|
100
|
+
goesgcp --satellite goes-16 --product ABI-L2-CMIPF --recent 3 --output "output/"
|
|
101
101
|
```
|
|
102
102
|
|
|
103
103
|
#### Download Data for a Specific Time Range
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
|
|
2
|
+
goesgcp/main.py,sha256=tDkn46WP0Nwet_3EwW6Rx-A-ASipuc8X251qZOpeeMU,18851
|
|
3
|
+
goesgcp-2.0.2.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
4
|
+
goesgcp-2.0.2.dist-info/METADATA,sha256=mab7nr7wRRptYsLU_vZykjWZKqXyIT72aV5RiL-CBmQ,6119
|
|
5
|
+
goesgcp-2.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
6
|
+
goesgcp-2.0.2.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
7
|
+
goesgcp-2.0.2.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
|
|
8
|
+
goesgcp-2.0.2.dist-info/RECORD,,
|
goesgcp-2.0.1.dist-info/RECORD
DELETED
|
@@ -1,8 +0,0 @@
|
|
|
1
|
-
goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
|
|
2
|
-
goesgcp/main.py,sha256=F2Z0J4DVF2oeejlIN6WEXi3K8eueZxGWIH5JulEWhXE,18178
|
|
3
|
-
goesgcp-2.0.1.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
4
|
-
goesgcp-2.0.1.dist-info/METADATA,sha256=a1MA1uUCdoG5Ihpr8szCYyVpzjCkmSePv0EnNZTKaLo,6149
|
|
5
|
-
goesgcp-2.0.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
|
|
6
|
-
goesgcp-2.0.1.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
7
|
-
goesgcp-2.0.1.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
|
|
8
|
-
goesgcp-2.0.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|