goesgcp 2.0.0__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
goesgcp/main.py CHANGED
@@ -13,6 +13,9 @@ from datetime import datetime, timedelta, timezone
13
13
  from pyproj import CRS, Transformer
14
14
  from google.api_core.exceptions import GoogleAPIError
15
15
 
16
+ import warnings
17
+ warnings.filterwarnings('ignore')
18
+
16
19
 
17
20
  def list_blobs(connection, bucket_name, prefix):
18
21
  """
@@ -54,11 +57,11 @@ def get_files_period(connection, bucket_name, base_prefix, pattern,
54
57
  files_metadata = []
55
58
 
56
59
  # Generate the list of dates from start to end
57
- current_time = start
58
- while current_time < end:
59
- year = current_time.year
60
- julian_day = str(current_time.timetuple().tm_yday).zfill(3) # Julian day
61
- hour = current_time.hour
60
+ temp = start
61
+ while temp <= end:
62
+ year = temp.year
63
+ julian_day = str(temp.timetuple().tm_yday).zfill(3) # Julian day
64
+ hour = temp.hour
62
65
 
63
66
  # Generate the directory prefix
64
67
  prefix = f"{base_prefix}/{get_directory_prefix(year, julian_day, hour)}"
@@ -71,26 +74,27 @@ def get_files_period(connection, bucket_name, base_prefix, pattern,
71
74
  if pattern in blob.name:
72
75
  files_metadata.append({
73
76
  'file_name': blob.name,
74
- 'last_modified': blob.updated
75
77
  })
76
78
 
77
79
  # Move to the next hour
78
- current_time += timedelta(hours=1)
80
+ temp += timedelta(hours=1)
79
81
 
80
82
  # Create a DataFrame from the list of files
81
83
  df = pd.DataFrame(files_metadata)
82
84
 
85
+ # Transform file_name to datetime
86
+ df['last_modified'] = pd.to_datetime(df['file_name'].str.extract(r'(\d{4}\d{3}\d{2}\d{2})').squeeze(), format='%Y%j%H%M')
87
+
88
+
83
89
  if df.empty:
84
90
  print("No files found matching the pattern.")
85
91
  return pd.DataFrame()
86
92
 
87
93
  # Ensure 'last_modified' is in the correct datetime format without timezone
88
- df['last_modified'] = pd.to_datetime(df['last_modified']).dt.tz_localize(None)
89
- start = pd.to_datetime(start).tz_localize(None)
90
- end = pd.to_datetime(end).tz_localize(None)
91
-
92
- # Filter the DataFrame based on the date range
93
- df = df[(df['last_modified'] >= start) & (df['last_modified'] < end)]
94
+ df['last_modified'] = pd.to_datetime(df['last_modified']).dt.tz_localize('UTC')
95
+
96
+ # Filter the DataFrame based on the date range (inclusive)
97
+ df = df[(df['last_modified'] >= start) & (df['last_modified'] <= end)]
94
98
 
95
99
  # Filter the DataFrame based on the hour range
96
100
  df['hour'] = df['last_modified'].dt.hour
@@ -153,13 +157,22 @@ def crop_reproject(args):
153
157
  Crops and reprojects a GOES-16 file to EPSG:4326.
154
158
  """
155
159
 
156
- file, output = args
160
+ file, output, var_name, lat_min, lat_max, lon_min, lon_max, resolution, save_format = args
157
161
 
158
162
  # Open the file
159
163
  ds = xr.open_dataset(file, engine="netcdf4")
160
164
 
165
+ if var_name is None:
166
+ # Get all variables are 2D
167
+ var_names = [var for var in ds.data_vars if len(ds[var].dims) == 2]
168
+
169
+ # Remove DQF variables
170
+ var_names = [var for var in var_names if 'DQF' not in var]
171
+ else:
172
+ var_names = [var_name]
173
+
161
174
  # Select only var_name and goes_imager_projection
162
- ds = ds[[var_name, "goes_imager_projection"]]
175
+ ds = ds[var_names + ["goes_imager_projection"]]
163
176
 
164
177
  # Get projection
165
178
  sat_height = ds["goes_imager_projection"].attrs["perspective_point_height"]
@@ -215,14 +228,13 @@ def crop_reproject(args):
215
228
  ds = ds.rename({"x": "lon", "y": "lat"})
216
229
 
217
230
  # Add resolution to attributes
218
- ds[var_name].attrs['resolution'] = "x={:.2f} y={:.2f} degree".format(resolution, resolution)
231
+ for var in var_names:
232
+ ds[var].attrs['resolution'] = "x={:.2f} y={:.2f} degree".format(resolution, resolution)
233
+ ds[var].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by goesgcp'
219
234
 
220
235
  # Crop using lat/lon coordinates, in parallel
221
236
  ds = ds.rio.clip_box(minx=lon_min, miny=lat_min, maxx=lon_max, maxy=lat_max)
222
237
 
223
- # Add comments
224
- ds[var_name].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by goesgcp'
225
-
226
238
  # Add global metadata comments
227
239
  ds.attrs['comments'] = "Data processed by goesgcp, author: Helvecio B. L. Neto (helvecioblneto@gmail.com)"
228
240
 
@@ -249,20 +261,18 @@ def crop_reproject(args):
249
261
  output_file = f"{output_directory}{file.split('/')[-1]}"
250
262
  ds.to_netcdf(output_file, mode='w', format='NETCDF4_CLASSIC')
251
263
 
252
- # Fechar o dataset
253
264
  ds.close()
254
- return
255
265
 
256
266
 
257
267
  def process_file(args):
258
- """ Downloads and processes a file in parallel. """
259
-
260
- bucket_name, blob_name, local_path = args
268
+ """
269
+ Downloads and processes a GOES-16 file.
270
+ """
271
+
272
+ bucket_name, blob_name, local_path, output_path, var_name, lat_min, lat_max, lon_min, lon_max, resolution, \
273
+ save_format, retries = args
261
274
 
262
- # Download options
263
- retries = 5
264
275
  attempt = 0
265
-
266
276
  while attempt < retries:
267
277
  try:
268
278
  # Connect to the bucket
@@ -282,46 +292,59 @@ def process_file(args):
282
292
  log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
283
293
 
284
294
  # Crop the file
285
- crop_reproject((local_path, output_path))
295
+ crop_reproject((local_path, output_path, var_name, lat_min, lat_max, lon_min, lon_max, resolution, save_format))
286
296
 
287
297
  # Remove the local file
288
298
  pathlib.Path(local_path).unlink()
289
299
 
300
+ # Create connection
301
+ storage_client = storage.Client.create_anonymous_client()
290
302
 
291
303
  def main():
292
304
  ''' Main function to download and process GOES-16 files. '''
293
305
 
294
-
295
- global output_path, var_name, \
296
- lat_min, lat_max, lon_min, lon_max, \
297
- max_attempts, parallel, recent, resolution, storage_client, \
298
- satellite, product, domain, op_mode, channel, save_format
299
-
300
306
  epilog = """
301
307
  Example usage:
302
308
 
303
- - To download recent 3 files from the GOES-16 satellite for the ABI-L2-CMIPF product:
309
+ - To download recent 3 files from the GOES-16 satellite for the ABI-L2-CMIPF product,
310
+ change resolution to 0.045, and crop the files between latitudes -35 and 5 and longitudes -80 and -30:
304
311
 
305
- goesgcp --satellite goes16 --product ABI-L2-CMIP --recent 3"
312
+ goesgcp --satellite goes-16 --product ABI-L2-CMIPF --recent 3 --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
306
313
 
307
314
  - To download files from the GOES-16 satellite for the ABI-L2-CMIPF product between 2022-12-15 and 2022-12-20:
308
315
 
309
- goesgcp --start '2022-12-15 00:00:00' --end '2022-12-20 10:00:00' --bt_hour 5 6 --save_format by_date --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
316
+ goesgcp --satellite goes-16 --product ABI-L2-CMIPF --start '2022-12-15 09:00:00' --end '2022-12-15 09:50:00' --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
310
317
 
311
318
  """
312
319
 
320
+ product_names = [
321
+ "ABI-L1b-RadF", "ABI-L1b-RadC", "ABI-L1b-RadM", "ABI-L2-ACHAC", "ABI-L2-ACHAF", "ABI-L2-ACHAM",
322
+ "ABI-L2-ACHTF", "ABI-L2-ACHTM", "ABI-L2-ACMC", "ABI-L2-ACMF", "ABI-L2-ACMM", "ABI-L2-ACTPC",
323
+ "ABI-L2-ACTPF", "ABI-L2-ACTPM", "ABI-L2-ADPC", "ABI-L2-ADPF", "ABI-L2-ADPM", "ABI-L2-AICEF",
324
+ "ABI-L2-AITAF", "ABI-L2-AODC", "ABI-L2-AODF", "ABI-L2-BRFC", "ABI-L2-BRFF", "ABI-L2-BRFM",
325
+ "ABI-L2-CMIPC", "ABI-L2-CMIPF", "ABI-L2-CMIPM", "ABI-L2-CODC", "ABI-L2-CODF", "ABI-L2-CPSC",
326
+ "ABI-L2-CPSF", "ABI-L2-CPSM", "ABI-L2-CTPC", "ABI-L2-CTPF", "ABI-L2-DMWC", "ABI-L2-DMWF",
327
+ "ABI-L2-DMWM", "ABI-L2-DMWVC", "ABI-L2-DMWVF", "ABI-L2-DMWVF", "ABI-L2-DSIC", "ABI-L2-DSIF",
328
+ "ABI-L2-DSIM", "ABI-L2-DSRC", "ABI-L2-DSRF", "ABI-L2-DSRM", "ABI-L2-FDCC", "ABI-L2-FDCF",
329
+ "ABI-L2-FDCM", "ABI-L2-LSAC", "ABI-L2-LSAF", "ABI-L2-LSAM", "ABI-L2-LSTC", "ABI-L2-LSTF",
330
+ "ABI-L2-LSTM", "ABI-L2-LVMPC", "ABI-L2-LVMPF", "ABI-L2-LVMPM", "ABI-L2-LVTPC", "ABI-L2-LVTPF",
331
+ "ABI-L2-LVTPM", "ABI-L2-MCMIPC", "ABI-L2-MCMIPF", "ABI-L2-MCMIPM", "ABI-L2-RRQPEF",
332
+ "ABI-L2-RSRC", "ABI-L2-RSRF", "ABI-L2-SSTF", "ABI-L2-TPWC", "ABI-L2-TPWF", "ABI-L2-TPWM",
333
+ "ABI-L2-VAAF", "EXIS-L1b-SFEU", "EXIS-L1b-SFXR", "GLM-L2-LCFA", "MAG-L1b-GEOF", "SEIS-L1b-EHIS",
334
+ "SEIS-L1b-MPSH", "SEIS-L1b-MPSL", "SEIS-L1b-SGPS", "SUVI-L1b-Fe093", "SUVI-L1b-Fe131",
335
+ "SUVI-L1b-Fe171", "SUVI-L1b-Fe195", "SUVI-L1b-Fe284", "SUVI-L1b-He303"
336
+ ]
313
337
 
314
338
  # Set arguments
315
- parser = argparse.ArgumentParser(description='Converts GOES-16 L2 data to netCDF',
339
+ parser = argparse.ArgumentParser(description='Download and process GOES Satellite data files from GCP.',
316
340
  epilog=epilog,
317
341
  formatter_class=argparse.RawDescriptionHelpFormatter)
318
342
 
319
343
  # Satellite and product settings
320
- parser.add_argument('--satellite', type=str, default='goes-16', help='Name of the satellite (e.g., goes16)')
321
- parser.add_argument('--product', type=str, default='ABI-L2-CMIP', help='Name of the satellite product')
322
- parser.add_argument('--var_name', type=str, default='CMI', help='Variable name to extract (e.g., CMI)')
344
+ parser.add_argument('--satellite', type=str, default='goes-16', choices=['goes-16', 'goes-18'], help='Name of the satellite (e.g., goes16)')
345
+ parser.add_argument('--product', type=str, default='ABI-L2-CMIPF', help='Name of the satellite product', choices=product_names)
346
+ parser.add_argument('--var_name', type=str, default=None, help='Variable name to extract (e.g., CMI)')
323
347
  parser.add_argument('--channel', type=int, default=13, help='Channel to use (e.g., 13)')
324
- parser.add_argument('--domain', type=str, default='F', help='Domain to use (e.g., F or C)')
325
348
  parser.add_argument('--op_mode', type=str, default='M6C', help='Operational mode to use (e.g., M6C)')
326
349
 
327
350
  # Recent files settings
@@ -361,7 +384,6 @@ def main():
361
384
  output_path = args.output
362
385
  satellite = args.satellite
363
386
  product = args.product
364
- domain = args.domain
365
387
  op_mode = args.op_mode
366
388
  channel = str(args.channel).zfill(2)
367
389
  var_name = args.var_name
@@ -392,9 +414,6 @@ def main():
392
414
  # Create output directory
393
415
  pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
394
416
 
395
- # Create connection
396
- storage_client = storage.Client.create_anonymous_client()
397
-
398
417
  # Check if the bucket exists
399
418
  try:
400
419
  storage_client.get_bucket(bucket_name)
@@ -403,16 +422,16 @@ def main():
403
422
  sys.exit(1)
404
423
 
405
424
  # Set pattern for the files
406
- pattern = "OR_"+product+domain+"-"+op_mode+channel+"_G" + satellite[-2:]
425
+ pattern = "OR_"+product+"-"+op_mode+channel+"_G" + satellite[-2:]
407
426
 
408
427
  # Check operational mode if is recent or specific date
409
428
  if start and end:
410
429
  files_list = get_files_period(storage_client, bucket_name,
411
- product + domain, pattern, start, end,
430
+ product, pattern, start, end,
412
431
  bt_hour, bt_min, freq)
413
432
  else:
414
433
  # Get recent files
415
- files_list = get_recent_files(storage_client, bucket_name, product + domain, pattern, recent)
434
+ files_list = get_recent_files(storage_client, bucket_name, product, pattern, recent)
416
435
 
417
436
  # Check if any files were found
418
437
  if not files_list:
@@ -430,7 +449,9 @@ def main():
430
449
 
431
450
  if parallel: # Run in parallel
432
451
  # Create a list of tasks
433
- tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in files_list]
452
+ tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}", output_path, var_name,
453
+ lat_min, lat_max, lon_min, lon_max, resolution,
454
+ save_format, max_attempts) for file in files_list]
434
455
 
435
456
  # Download files in parallel
436
457
  with Pool(processes=args.processes) as pool:
@@ -440,11 +461,12 @@ def main():
440
461
  else: # Run in serial
441
462
  for file in files_list:
442
463
  local_path = f"tmp/{file.split('/')[-1]}"
443
- process_file((bucket_name, file, local_path))
464
+ process_file((bucket_name, file, local_path, output_path, var_name,
465
+ lat_min, lat_max, lon_min, lon_max, resolution,
466
+ save_format, max_attempts))
444
467
  loading_bar.update(1)
445
468
  loading_bar.close()
446
469
 
447
- # Remove temporary directory
448
470
  shutil.rmtree('tmp/')
449
471
 
450
472
  if __name__ == '__main__':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: goesgcp
3
- Version: 2.0.0
3
+ Version: 2.0.2
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -33,18 +33,32 @@ Dynamic: requires-dist
33
33
  Dynamic: summary
34
34
 
35
35
  # goesgcp
36
+ <!-- badges: start -->
37
+ [![pypi](https://badge.fury.io/py/goesgcp.svg)](https://pypi.python.org/pypi/goesgcp)
38
+ [![Downloads](https://img.shields.io/pypi/dm/goesgcp.svg)](https://pypi.python.org/pypi/goesgcp)
39
+ [![Contributors](https://img.shields.io/github/contributors/helvecioneto/goesgcp.svg)](https://github.com/helvecioneto/goesgcp/graphs/contributors)
40
+ [![License](https://img.shields.io/pypi/l/goesgcp.svg)](https://github.com/helvecioneto/goesgcp/blob/main/LICENSE)
41
+ <!-- badges: end -->
36
42
 
37
- goesgcp is a utility script for downloading and reprojecting GOES-R satellite data. The script uses the `google.cloud` library to download data from the Google Cloud Platform (GCP) and the `pyproj` library to reproject the data to EPSG:4326 and crop it to a specified bounding box.
38
43
 
44
+ `goesgcp` is a Python utility designed for downloading and reprojecting GOES-R satellite data. This script leverages the `google.cloud` library for accessing data from the Google Cloud Platform (GCP) and `pyproj` for reprojecting data to EPSG:4326, as well as cropping it to a user-defined bounding box.
45
+
46
+ ## Features
47
+
48
+ - **Download GOES-R satellite data**: Supports GOES-16 and GOES-17.
49
+ - **Reprojection and cropping**: Reprojects data to EPSG:4326 and crops to a specified bounding box.
50
+ - **Flexible command-line interface**: Customize download options, variables, channels, time range, and output format.
51
+ - **Efficient processing**: Handles large datasets with optimized performance.
39
52
 
40
53
  ## Installation
41
54
 
42
- You can install the necessary dependencies using `pip`:
55
+ Install the necessary dependencies via `pip`:
43
56
 
44
57
  ```bash
45
58
  pip install goesgcp
46
59
  ```
47
60
 
61
+
48
62
  ## Usage
49
63
 
50
64
  ### Command-Line Arguments
@@ -74,19 +88,31 @@ goesgcp [OPTIONS]
74
88
  | `--bt_minute` | Minute of the hour to download data (default: [0, 15, 30, 45]). |
75
89
  | `--save_format` | Format for saving output files (default: `by_date`). |
76
90
 
91
+ #### Available GOES Products
92
+ A comprehensive list of available GOES products can be found at the following link: [https://console.cloud.google.com/storage/browser/gcp-public-data-goes-16](https://console.cloud.google.com/storage/browser/gcp-public-data-goes-16)
93
+
77
94
  ### Examples
78
95
 
79
- To download most 3 recent data for the GOES-16 satellite, ABI-L2-CMIPF product, variable CMI, and channel 13, run the following command:
96
+ #### Download Recent Data
97
+ In the example below, the command downloads the 3 most recent files from the GOES-16 satellite for the product ABI-L2-CMIPF. It focuses on the variable CMI (Cloud and Moisture Imagery) from channel 13, which is commonly used for infrared observations. The downloaded files are saved to the specified output directory output/.
80
98
 
81
99
  ```bash
82
- goesgcp --satellite goes16 --product ABI-L2-CMIPF --var_name CMI --channel 13 --recent 3 --output "output/"
100
+ goesgcp --satellite goes-16 --product ABI-L2-CMIPF --recent 3 --output "output/"
83
101
  ```
84
102
 
85
- To download data for a specific date range, use the `--start` and `--end` options:
103
+ #### Download Data for a Specific Time Range
104
+ This command retrieves GOES-16 satellite data for the product ABI-L2-CMIPF within the date range 2022-12-15 00:00:00 to 2022-12-20 10:00:00, focusing on hours 5:00 and 6:00 AM. The data is cropped to the geographic bounds of -35° to 5° latitude and -80° to -30° longitude, reprojected with a resolution of 0.045 degrees, and saved in a by_date format for easy organization.
86
105
 
87
106
  ```bash
88
- goesgcp --start '2022-12-15 00:00:00' --end '2022-12-20 10:00:00' --bt_hour 5 6 --save_format by_date --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
107
+ goesgcp --satellite goes-16 --product ABI-L2-CMIPF --start '2022-12-15 00:00:00' --end '2022-12-20 10:00:00' --bt_hour 5 6 --save_format by_date --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
89
108
  ```
90
109
 
110
+ ### Contributing
111
+ Contributions are welcome! If you encounter issues or have suggestions for improvements, please submit them via GitHub issues or pull requests.
112
+
91
113
  ### Credits
92
- And this is a otimization by Helvecio Neto - 2025
114
+ This project was developed and optimized by Helvecio Neto (2025).
115
+ It builds upon NOAA GOES-R data and leverages resources provided by the Google Cloud Platform.
116
+
117
+ ### License
118
+ This project is licensed under the MIT License.
@@ -0,0 +1,8 @@
1
+ goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
+ goesgcp/main.py,sha256=tDkn46WP0Nwet_3EwW6Rx-A-ASipuc8X251qZOpeeMU,18851
3
+ goesgcp-2.0.2.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
+ goesgcp-2.0.2.dist-info/METADATA,sha256=mab7nr7wRRptYsLU_vZykjWZKqXyIT72aV5RiL-CBmQ,6119
5
+ goesgcp-2.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ goesgcp-2.0.2.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
+ goesgcp-2.0.2.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
+ goesgcp-2.0.2.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
- goesgcp/main.py,sha256=5vCAcSuMgFRzTLKZL4IUGgXVBxM3Buw19l-2MrpGR9M,16837
3
- goesgcp-2.0.0.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
- goesgcp-2.0.0.dist-info/METADATA,sha256=fgCGPkdI51BKwv7kpgiZgHP9WZbXhSdWcxkPhaZ2fKk,4066
5
- goesgcp-2.0.0.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- goesgcp-2.0.0.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
- goesgcp-2.0.0.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
- goesgcp-2.0.0.dist-info/RECORD,,