goesgcp 2.0.1__py3-none-any.whl → 2.0.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
goesgcp/main.py CHANGED
@@ -13,6 +13,9 @@ from datetime import datetime, timedelta, timezone
13
13
  from pyproj import CRS, Transformer
14
14
  from google.api_core.exceptions import GoogleAPIError
15
15
 
16
+ import warnings
17
+ warnings.filterwarnings('ignore')
18
+
16
19
 
17
20
  def list_blobs(connection, bucket_name, prefix):
18
21
  """
@@ -54,11 +57,11 @@ def get_files_period(connection, bucket_name, base_prefix, pattern,
54
57
  files_metadata = []
55
58
 
56
59
  # Generate the list of dates from start to end
57
- current_time = start
58
- while current_time < end:
59
- year = current_time.year
60
- julian_day = str(current_time.timetuple().tm_yday).zfill(3) # Julian day
61
- hour = current_time.hour
60
+ temp = start
61
+ while temp <= end:
62
+ year = temp.year
63
+ julian_day = str(temp.timetuple().tm_yday).zfill(3) # Julian day
64
+ hour = temp.hour
62
65
 
63
66
  # Generate the directory prefix
64
67
  prefix = f"{base_prefix}/{get_directory_prefix(year, julian_day, hour)}"
@@ -71,26 +74,27 @@ def get_files_period(connection, bucket_name, base_prefix, pattern,
71
74
  if pattern in blob.name:
72
75
  files_metadata.append({
73
76
  'file_name': blob.name,
74
- 'last_modified': blob.updated
75
77
  })
76
78
 
77
79
  # Move to the next hour
78
- current_time += timedelta(hours=1)
80
+ temp += timedelta(hours=1)
79
81
 
80
82
  # Create a DataFrame from the list of files
81
83
  df = pd.DataFrame(files_metadata)
82
84
 
85
+ # Transform file_name to datetime
86
+ df['last_modified'] = pd.to_datetime(df['file_name'].str.extract(r'(\d{4}\d{3}\d{2}\d{2})').squeeze(), format='%Y%j%H%M')
87
+
88
+
83
89
  if df.empty:
84
90
  print("No files found matching the pattern.")
85
91
  return pd.DataFrame()
86
92
 
87
93
  # Ensure 'last_modified' is in the correct datetime format without timezone
88
- df['last_modified'] = pd.to_datetime(df['last_modified']).dt.tz_localize(None)
89
- start = pd.to_datetime(start).tz_localize(None)
90
- end = pd.to_datetime(end).tz_localize(None)
91
-
92
- # Filter the DataFrame based on the date range
93
- df = df[(df['last_modified'] >= start) & (df['last_modified'] < end)]
94
+ df['last_modified'] = pd.to_datetime(df['last_modified']).dt.tz_localize('UTC')
95
+
96
+ # Filter the DataFrame based on the date range (inclusive)
97
+ df = df[(df['last_modified'] >= start) & (df['last_modified'] <= end)]
94
98
 
95
99
  # Filter the DataFrame based on the hour range
96
100
  df['hour'] = df['last_modified'].dt.hour
@@ -153,13 +157,22 @@ def crop_reproject(args):
153
157
  Crops and reprojects a GOES-16 file to EPSG:4326.
154
158
  """
155
159
 
156
- file, output = args
160
+ file, output, var_name, lat_min, lat_max, lon_min, lon_max, resolution, save_format = args
157
161
 
158
162
  # Open the file
159
163
  ds = xr.open_dataset(file, engine="netcdf4")
160
164
 
165
+ if var_name is None:
166
+ # Get all variables are 2D
167
+ var_names = [var for var in ds.data_vars if len(ds[var].dims) == 2]
168
+
169
+ # Remove DQF variables
170
+ var_names = [var for var in var_names if 'DQF' not in var]
171
+ else:
172
+ var_names = [var_name]
173
+
161
174
  # Select only var_name and goes_imager_projection
162
- ds = ds[[var_name, "goes_imager_projection"]]
175
+ ds = ds[var_names + ["goes_imager_projection"]]
163
176
 
164
177
  # Get projection
165
178
  sat_height = ds["goes_imager_projection"].attrs["perspective_point_height"]
@@ -215,14 +228,13 @@ def crop_reproject(args):
215
228
  ds = ds.rename({"x": "lon", "y": "lat"})
216
229
 
217
230
  # Add resolution to attributes
218
- ds[var_name].attrs['resolution'] = "x={:.2f} y={:.2f} degree".format(resolution, resolution)
231
+ for var in var_names:
232
+ ds[var].attrs['resolution'] = "x={:.2f} y={:.2f} degree".format(resolution, resolution)
233
+ ds[var].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by goesgcp'
219
234
 
220
235
  # Crop using lat/lon coordinates, in parallel
221
236
  ds = ds.rio.clip_box(minx=lon_min, miny=lat_min, maxx=lon_max, maxy=lat_max)
222
237
 
223
- # Add comments
224
- ds[var_name].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by goesgcp'
225
-
226
238
  # Add global metadata comments
227
239
  ds.attrs['comments'] = "Data processed by goesgcp, author: Helvecio B. L. Neto (helvecioblneto@gmail.com)"
228
240
 
@@ -249,20 +261,18 @@ def crop_reproject(args):
249
261
  output_file = f"{output_directory}{file.split('/')[-1]}"
250
262
  ds.to_netcdf(output_file, mode='w', format='NETCDF4_CLASSIC')
251
263
 
252
- # Fechar o dataset
253
264
  ds.close()
254
- return
255
265
 
256
266
 
257
267
  def process_file(args):
258
- """ Downloads and processes a file in parallel. """
259
-
260
- bucket_name, blob_name, local_path = args
268
+ """
269
+ Downloads and processes a GOES-16 file.
270
+ """
271
+
272
+ bucket_name, blob_name, local_path, output_path, var_name, lat_min, lat_max, lon_min, lon_max, resolution, \
273
+ save_format, retries = args
261
274
 
262
- # Download options
263
- retries = 5
264
275
  attempt = 0
265
-
266
276
  while attempt < retries:
267
277
  try:
268
278
  # Connect to the bucket
@@ -282,31 +292,28 @@ def process_file(args):
282
292
  log_file.write(f"Failed to download {blob_name} after {retries} attempts. Error: {e}\n")
283
293
 
284
294
  # Crop the file
285
- crop_reproject((local_path, output_path))
295
+ crop_reproject((local_path, output_path, var_name, lat_min, lat_max, lon_min, lon_max, resolution, save_format))
286
296
 
287
297
  # Remove the local file
288
298
  pathlib.Path(local_path).unlink()
289
299
 
300
+ # Create connection
301
+ storage_client = storage.Client.create_anonymous_client()
290
302
 
291
303
  def main():
292
304
  ''' Main function to download and process GOES-16 files. '''
293
305
 
294
-
295
- global output_path, var_name, \
296
- lat_min, lat_max, lon_min, lon_max, \
297
- max_attempts, parallel, recent, resolution, storage_client, \
298
- satellite, product, op_mode, channel, save_format
299
-
300
306
  epilog = """
301
307
  Example usage:
302
308
 
303
- - To download recent 3 files from the GOES-16 satellite for the ABI-L2-CMIPF product:
309
+ - To download recent 3 files from the GOES-16 satellite for the ABI-L2-CMIPF product,
310
+ change resolution to 0.045, and crop the files between latitudes -35 and 5 and longitudes -80 and -30:
304
311
 
305
- goesgcp --satellite goes16 --product ABI-L2-CMIP --recent 3
312
+ goesgcp --satellite goes-16 --product ABI-L2-CMIPF --recent 3 --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
306
313
 
307
314
  - To download files from the GOES-16 satellite for the ABI-L2-CMIPF product between 2022-12-15 and 2022-12-20:
308
315
 
309
- goesgcp --start '2022-12-15 00:00:00' --end '2022-12-20 10:00:00' --bt_hour 5 6 --save_format by_date --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
316
+ goesgcp --satellite goes-16 --product ABI-L2-CMIPF --start '2022-12-15 09:00:00' --end '2022-12-15 09:50:00' --resolution 0.045 --lat_min -35 --lat_max 5 --lon_min -80 --lon_max -30
310
317
 
311
318
  """
312
319
 
@@ -329,14 +336,14 @@ def main():
329
336
  ]
330
337
 
331
338
  # Set arguments
332
- parser = argparse.ArgumentParser(description='Converts GOES-16 L2 data to netCDF',
339
+ parser = argparse.ArgumentParser(description='Download and process GOES Satellite data files from GCP.',
333
340
  epilog=epilog,
334
341
  formatter_class=argparse.RawDescriptionHelpFormatter)
335
342
 
336
343
  # Satellite and product settings
337
344
  parser.add_argument('--satellite', type=str, default='goes-16', choices=['goes-16', 'goes-18'], help='Name of the satellite (e.g., goes16)')
338
- parser.add_argument('--product', type=str, default='ABI-L2-CMIP', help='Name of the satellite product', choices=product_names)
339
- parser.add_argument('--var_name', type=str, default='CMI', help='Variable name to extract (e.g., CMI)')
345
+ parser.add_argument('--product', type=str, default='ABI-L2-CMIPF', help='Name of the satellite product', choices=product_names)
346
+ parser.add_argument('--var_name', type=str, default=None, help='Variable name to extract (e.g., CMI)')
340
347
  parser.add_argument('--channel', type=int, default=13, help='Channel to use (e.g., 13)')
341
348
  parser.add_argument('--op_mode', type=str, default='M6C', help='Operational mode to use (e.g., M6C)')
342
349
 
@@ -407,9 +414,6 @@ def main():
407
414
  # Create output directory
408
415
  pathlib.Path(output_path).mkdir(parents=True, exist_ok=True)
409
416
 
410
- # Create connection
411
- storage_client = storage.Client.create_anonymous_client()
412
-
413
417
  # Check if the bucket exists
414
418
  try:
415
419
  storage_client.get_bucket(bucket_name)
@@ -445,7 +449,9 @@ def main():
445
449
 
446
450
  if parallel: # Run in parallel
447
451
  # Create a list of tasks
448
- tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}") for file in files_list]
452
+ tasks = [(bucket_name, file, f"tmp/{file.split('/')[-1]}", output_path, var_name,
453
+ lat_min, lat_max, lon_min, lon_max, resolution,
454
+ save_format, max_attempts) for file in files_list]
449
455
 
450
456
  # Download files in parallel
451
457
  with Pool(processes=args.processes) as pool:
@@ -455,11 +461,12 @@ def main():
455
461
  else: # Run in serial
456
462
  for file in files_list:
457
463
  local_path = f"tmp/{file.split('/')[-1]}"
458
- process_file((bucket_name, file, local_path))
464
+ process_file((bucket_name, file, local_path, output_path, var_name,
465
+ lat_min, lat_max, lon_min, lon_max, resolution,
466
+ save_format, max_attempts))
459
467
  loading_bar.update(1)
460
468
  loading_bar.close()
461
469
 
462
- # Remove temporary directory
463
470
  shutil.rmtree('tmp/')
464
471
 
465
472
  if __name__ == '__main__':
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.2
2
2
  Name: goesgcp
3
- Version: 2.0.1
3
+ Version: 2.0.2
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -89,7 +89,7 @@ goesgcp [OPTIONS]
89
89
  | `--save_format` | Format for saving output files (default: `by_date`). |
90
90
 
91
91
  #### Available GOES Products
92
- A comprehensive list of available GOES products can be found at the following link: [https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-goes16](https://github.com/awslabs/open-data-docs/tree/main/docs/noaa/noaa-goes16)
92
+ A comprehensive list of available GOES products can be found at the following link: [https://console.cloud.google.com/storage/browser/gcp-public-data-goes-16](https://console.cloud.google.com/storage/browser/gcp-public-data-goes-16)
93
93
 
94
94
  ### Examples
95
95
 
@@ -97,7 +97,7 @@ A comprehensive list of available GOES products can be found at the following li
97
97
  In the example below, the command downloads the 3 most recent files from the GOES-16 satellite for the product ABI-L2-CMIPF. It focuses on the variable CMI (Cloud and Moisture Imagery) from channel 13, which is commonly used for infrared observations. The downloaded files are saved to the specified output directory output/.
98
98
 
99
99
  ```bash
100
- goesgcp --satellite goes-16 --product ABI-L2-CMIPF --var_name CMI --channel 13 --recent 3 --output "output/"
100
+ goesgcp --satellite goes-16 --product ABI-L2-CMIPF --recent 3 --output "output/"
101
101
  ```
102
102
 
103
103
  #### Download Data for a Specific Time Range
@@ -0,0 +1,8 @@
1
+ goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
+ goesgcp/main.py,sha256=tDkn46WP0Nwet_3EwW6Rx-A-ASipuc8X251qZOpeeMU,18851
3
+ goesgcp-2.0.2.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
+ goesgcp-2.0.2.dist-info/METADATA,sha256=mab7nr7wRRptYsLU_vZykjWZKqXyIT72aV5RiL-CBmQ,6119
5
+ goesgcp-2.0.2.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
+ goesgcp-2.0.2.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
+ goesgcp-2.0.2.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
+ goesgcp-2.0.2.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
- goesgcp/main.py,sha256=F2Z0J4DVF2oeejlIN6WEXi3K8eueZxGWIH5JulEWhXE,18178
3
- goesgcp-2.0.1.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
- goesgcp-2.0.1.dist-info/METADATA,sha256=a1MA1uUCdoG5Ihpr8szCYyVpzjCkmSePv0EnNZTKaLo,6149
5
- goesgcp-2.0.1.dist-info/WHEEL,sha256=In9FTNxeP60KnTkGw7wk6mJPYd_dQSjEZmXdBdMCI-8,91
6
- goesgcp-2.0.1.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
- goesgcp-2.0.1.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
- goesgcp-2.0.1.dist-info/RECORD,,