goesgcp 1.0.4__py3-none-any.whl → 1.0.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
goesgcp/main.py CHANGED
@@ -5,9 +5,10 @@ import argparse
5
5
  import sys
6
6
  import tqdm
7
7
  from concurrent.futures import ThreadPoolExecutor
8
+ from multiprocessing import Pool
8
9
  from google.cloud import storage
9
10
  from datetime import datetime, timedelta, timezone
10
- from pyproj import CRS
11
+ from pyproj import CRS, Transformer
11
12
 
12
13
 
13
14
 
@@ -64,71 +65,120 @@ def get_recent_files(connection, bucket_name, base_prefix, pattern, min_files):
64
65
  # Return only the names of the most recent files, according to the minimum requested
65
66
  return [file[0] for file in files[:min_files]]
66
67
 
67
- def download_file(connection, bucket_name, blob_name, local_path):
68
- """Downloads a file from a GCP bucket."""
69
- bucket = connection.bucket(bucket_name)
70
- blob = bucket.blob(blob_name)
71
- blob.download_to_filename(local_path)
72
68
 
73
- def crop_reproject(file, output):
69
+ def crop_reproject(args):
74
70
  """
75
71
  Crops and reprojects a GOES-16 file to EPSG:4326.
76
72
  """
77
73
 
74
+ file, output = args
75
+
76
+ # Open the file
77
+ ds = xr.open_dataset(file, engine='netcdf4')
78
78
 
79
- ds = xr.open_dataset(file)
80
79
  # Select only var_name and goes_imager_projection
81
80
  ds = ds[[var_name, "goes_imager_projection"]]
81
+
82
82
  # Get projection
83
83
  sat_height = ds["goes_imager_projection"].attrs["perspective_point_height"]
84
84
  ds = ds.assign_coords({
85
85
  "x": ds["x"].values * sat_height,
86
86
  "y": ds["y"].values * sat_height,
87
87
  })
88
- # Set CRS
88
+ # Set CRS from goes_imager_projection
89
89
  crs = CRS.from_cf(ds["goes_imager_projection"].attrs)
90
90
  ds = ds.rio.write_crs(crs)
91
91
 
92
- # Reproject to EPSG:4326 using parallel processing
93
- ds = ds.rio.reproject(dst_crs="EPSG:4326",
94
- resolution=(resolution, resolution),
95
- num_threads=-1)
92
+ # Try to reduce the size of the dataset
93
+ try:
94
+ # Create a transformer
95
+ transformer = Transformer.from_crs(CRS.from_epsg(4326), crs)
96
+ # Calculate the margin
97
+ margin_ratio = 0.40 # 40% margin
98
+
99
+ # Get the bounding box
100
+ min_x, min_y = transformer.transform(lat_min, lon_min)
101
+ max_x, max_y = transformer.transform(lat_max, lon_max)
102
+
103
+ # Calculate the range
104
+ x_range = abs(max_x - min_x)
105
+ y_range = abs(max_y - min_y)
106
+
107
+ margin_x = x_range * margin_ratio
108
+ margin_y = y_range * margin_ratio
109
+
110
+ # Expand the bounding box
111
+ min_x -= margin_x
112
+ max_x += margin_x
113
+ min_y -= margin_y
114
+ max_y += margin_y
115
+
116
+ # Select the region
117
+ if ds["y"].values[0] > ds["y"].values[-1]: # Eixo y decrescente
118
+ ds_ = ds.sel(x=slice(min_x, max_x), y=slice(max_y, min_y))
119
+ else: # Eixo y crescente
120
+ ds_ = ds.sel(x=slice(min_x, max_x), y=slice(min_y, max_y))
121
+ # Sort by y
122
+ if ds_["y"].values[0] > ds_["y"].values[-1]:
123
+ ds_ = ds_.sortby("y")
124
+ # Assign to ds
125
+ ds = ds_
126
+ except:
127
+ pass
128
+
129
+ # Reproject to EPSG:4326
130
+ ds = ds.rio.reproject("EPSG:4326", resolution=resolution)
96
131
 
97
132
  # Rename lat/lon coordinates
98
133
  ds = ds.rename({"x": "lon", "y": "lat"})
99
134
 
100
- # # Crop using lat/lon coordinates, in parallel
101
- ds = ds.rio.clip_box(minx=lon_min, miny=lat_min, maxx=lon_max, maxy=lat_max)
135
+ # Add resolution to attributes
136
+ ds[var_name].attrs['resolution'] = "x={:.2f} y={:.2f} degree".format(resolution, resolution)
102
137
 
103
- # Remove any previous file
104
- if pathlib.Path(f'{output}{file.split("/")[-1]}.nc').exists():
105
- pathlib.Path(f'{output}{file.split("/")[-1]}.nc').unlink()
138
+ # Crop using lat/lon coordinates, in parallel
139
+ ds = ds.rio.clip_box(minx=lon_min, miny=lat_min, maxx=lon_max, maxy=lat_max)
106
140
 
107
141
  # Add comments
108
- ds[var_name].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by helvecioblneto@gmail.com'
142
+ ds[var_name].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by goesgcp'
109
143
 
110
- # # Save as netcdf
111
- ds.to_netcdf(f'{output}{file.split("/")[-1]}')
144
+ # Add global metadata comments
145
+ ds.attrs['comments'] = "Data processed by goesgcp, author: Helvecio B. L. Neto (helvecioblneto@gmail.com)"
146
+
147
+ # Save as netcdf overwriting the original file
148
+ ds.to_netcdf(f'{output}{file.split("/")[-1]}', mode='w', format='NETCDF4_CLASSIC')
112
149
 
113
- # Remove original file
114
- pathlib.Path(file).unlink()
150
+ # Close the dataset
151
+ ds.close()
115
152
 
116
153
  return
117
154
 
118
155
 
119
156
 
157
+ def download_file(args):
158
+ """Downloads a file from a GCP bucket."""
159
+
160
+ bucket_name, blob_name, local_path = args
161
+
162
+ # Create a client
163
+ bucket = storage_client.bucket(bucket_name)
164
+ blob = bucket.blob(blob_name)
165
+
166
+ # Download the file
167
+ blob.download_to_filename(local_path, timeout=120)
168
+
169
+
120
170
  def main():
121
171
 
122
172
  global output_path, var_name, \
123
173
  lat_min, lat_max, lon_min, lon_max, \
124
- max_attempts, parallel, recent, resolution
174
+ max_attempts, parallel, recent, resolution, storage_client
125
175
 
126
176
  epilog = """
127
177
  Example usage:
128
178
 
129
- - To download recent files from the GOES-16 satellite for the ABI-L2-CMIPF product, extracting the CMI variable from channel 13, in the last 30 minutes:
179
+ - To download recent 10 files from the GOES-16 satellite for the ABI-L2-CMIPF product:
130
180
 
131
- goesgcp --satellite goes16 --product ABI-L2-CMIP --domain F --var_name CMI --channel 13 --recent 10 --output_path "output/"
181
+ goesgcp --satellite goes16 --product ABI-L2-CMIP --recent 10 --output_path "output/"
132
182
  """
133
183
 
134
184
 
@@ -146,11 +196,11 @@ def main():
146
196
  parser.add_argument('--recent', type=int, default=3, help='Number of recent files to download')
147
197
 
148
198
  # Geographic bounding box
149
- parser.add_argument('--lat_min', type=float, default=-56, help='Minimum latitude of the bounding box')
150
- parser.add_argument('--lat_max', type=float, default=35, help='Maximum latitude of the bounding box')
151
- parser.add_argument('--lon_min', type=float, default=-116, help='Minimum longitude of the bounding box')
152
- parser.add_argument('--lon_max', type=float, default=-25, help='Maximum longitude of the bounding box')
153
- parser.add_argument('--resolution', type=float, default=0.045, help='Resolution of the output file')
199
+ parser.add_argument('--lat_min', type=float, default=-81.3282, help='Minimum latitude of the bounding box')
200
+ parser.add_argument('--lat_max', type=float, default=81.3282, help='Maximum latitude of the bounding box')
201
+ parser.add_argument('--lon_min', type=float, default=-156.2995, help='Minimum longitude of the bounding box')
202
+ parser.add_argument('--lon_max', type=float, default=6.2995, help='Maximum longitude of the bounding box')
203
+ parser.add_argument('--resolution', type=float, default=0.03208, help='Resolution of the output file')
154
204
  parser.add_argument('--output', type=str, default='output/', help='Path for saving output files')
155
205
 
156
206
  # Other settings
@@ -205,28 +255,36 @@ def main():
205
255
  if not recent_files:
206
256
  print(f"No files found with the pattern {pattern}. Exiting...")
207
257
  sys.exit(1)
208
- print('Downloading files...')
209
- # Loading bar
258
+
259
+ # Create a temporary directory
260
+ pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
261
+
262
+ # Download files
263
+ print(f"Downloading {len(recent_files)} files...")
210
264
  loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
211
265
  bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
212
266
  [Elapsed:{elapsed} Remaining:<{remaining}]')
213
267
 
214
- # Create a temporary directory
215
- pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
216
-
217
268
  # Download all files to a temporary directory
218
269
  with ThreadPoolExecutor(max_workers=args.processes) as executor:
219
270
  for file in recent_files:
220
- download_file(storage_client, bucket_name, file, f'tmp/{file.split("/")[-1]}')
271
+ local_path = f"tmp/{file.split('/')[-1]}"
272
+ executor.submit(download_file, (bucket_name, file, local_path))
221
273
  loading_bar.update(1)
222
274
  loading_bar.close()
223
275
 
224
- print('Cropping and reprojecting files...')
225
- # Crop and reproject all files in serial mode
226
- for file in recent_files:
227
- crop_reproject(f'tmp/{file.split("/")[-1]}', output_path)
228
- loading_bar.update(1)
229
- loading_bar.close()
276
+ # Process files
277
+ print(f"\nProcessing {len(recent_files)} files...")
278
+ load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
279
+ bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
280
+ [Elapsed:{elapsed} Remaining:<{remaining}]')
281
+
282
+
283
+ # Process files in parallel
284
+ with Pool(processes=args.processes) as pool:
285
+ for _ in pool.imap_unordered(crop_reproject, [(f"tmp/{file.split('/')[-1]}", output_path) for file in recent_files]):
286
+ load_bar2.update(1)
287
+ load_bar2.close()
230
288
 
231
289
  # Remove temporary directory
232
290
  shutil.rmtree('tmp/')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.4
3
+ Version: 1.0.6
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -46,28 +46,26 @@ The script uses the `argparse` module for handling command-line arguments. Below
46
46
  goesgcp [OPTIONS]
47
47
  ```
48
48
 
49
- | Option | Description |
50
- |----------------------|-----------------------------------------------------------------------------|
51
- | `--satellite` | Name of the satellite (e.g., goes16). |
52
- | `--product` | Name of the satellite product (e.g., ABI-L2-CMIPF). |
53
- | `--var_name` | Variable name to extract (e.g., CMI). |
54
- | `--channel` | Channel to use (e.g., 13). | |
55
- | `--between_minutes` | Filter data between these minutes (default: `[0, 60]`). |
56
- | `--output_path` | Path for saving output files (default: `output/`). | |
57
- | `--lat_min` | Minimum latitude of the bounding box (default: `-56`). |
58
- | `--lat_max` | Maximum latitude of the bounding box (default: `35`). |
59
- | `--lon_min` | Minimum longitude of the bounding box (default: `-116`). |
60
- | `--lon_max` | Maximum longitude of the bounding box (default: `-25`). |
61
- | `--max_attempts` | Number of attempts to download a file before logging a failure (default: `3`).|
49
+ | Option | Description |
50
+ |----------------------|----------------------------------------------------------------------------|
51
+ | `--satellite` | Name of the satellite (e.g., goes16). |
52
+ | `--product` | Name of the satellite product (e.g., ABI-L2-CMIPF). |
53
+ | `--var_name` | Variable name to extract (e.g., CMI). |
54
+ | `--channel` | Channel to use (e.g., 13). |
55
+ | `--output` | Path for saving output files (default: `output/`). |
56
+ | `--lat_min` | Minimum latitude of the bounding box (default: `-56`). |
57
+ | `--lat_max` | Maximum latitude of the bounding box (default: `35`). |
58
+ | `--lon_min` | Minimum longitude of the bounding box (default: `-116`). |
59
+ | `--lon_max` | Maximum longitude of the bounding box (default: `-25`). |
60
+ | `--resolution` | Set the reprojet data resolution in degree (default: `-0.045`). |
62
61
 
63
62
  ### Examples
64
63
 
65
- To download and process recent data for the GOES-16 satellite, ABI-L2-CMIPF product, variable CMI, and channel 13, run the following command:
64
+ To download most 3 recent data for the GOES-16 satellite, ABI-L2-CMIPF product, variable CMI, and channel 13, run the following command:
66
65
 
67
66
  ```bash
68
- goesgcp --satellite goes16 --product ABI-L2-CMIPF --var_name CMI --channel 13 --recent 3 --output_path "output/"
67
+ goesgcp --satellite goes16 --product ABI-L2-CMIPF --var_name CMI --channel 13 --recent 3 --output "output/"
69
68
  ```
70
69
 
71
70
  ### Credits
72
- All the credit goes to the original author of the **goes2go** library.
73
71
  And this is a otimization by Helvecio Neto - 2025
@@ -0,0 +1,8 @@
1
+ goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
+ goesgcp/main.py,sha256=3cvmzeFyMh7smI3jkMnY0PUrird9i3lntkFt9c_xwZo,10462
3
+ goesgcp-1.0.6.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
+ goesgcp-1.0.6.dist-info/METADATA,sha256=-3huDGZYV4-ZFxmKQcU-1avXQJWPQW_RrIQ841XrlGI,2993
5
+ goesgcp-1.0.6.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
6
+ goesgcp-1.0.6.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
+ goesgcp-1.0.6.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
+ goesgcp-1.0.6.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
- goesgcp/main.py,sha256=4ATwrk6E_mzyeiEDZ11f6z_1tbAHDvTHQa4Jvr4CqIQ,8745
3
- goesgcp-1.0.4.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
- goesgcp-1.0.4.dist-info/METADATA,sha256=6CjGqrBkRDVWISzTiaMZRL2O7NXInZtjNq4vXhH59mY,3219
5
- goesgcp-1.0.4.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
6
- goesgcp-1.0.4.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
- goesgcp-1.0.4.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
- goesgcp-1.0.4.dist-info/RECORD,,