goesgcp 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
goesgcp/__init__.py ADDED
@@ -0,0 +1 @@
1
+ from .main import main
goesgcp/main.py ADDED
@@ -0,0 +1,235 @@
1
+ import pathlib
2
+ import shutil
3
+ import xarray as xr
4
+ import argparse
5
+ import sys
6
+ import tqdm
7
+ from concurrent.futures import ThreadPoolExecutor
8
+ from google.cloud import storage
9
+ from datetime import datetime, timedelta, timezone
10
+ from pyproj import CRS
11
+
12
+
13
+
14
+ def list_blobs(connection, bucket_name, prefix):
15
+ """
16
+ Lists blobs in a GCP bucket with a specified prefix.
17
+ Returns a list of blobs with their metadata.
18
+ """
19
+ bucket = connection.bucket(bucket_name)
20
+
21
+ blobs = bucket.list_blobs(prefix=prefix)
22
+ return blobs
23
+
24
+ def get_directory_prefix(year, julian_day, hour):
25
+ """Generates the directory path based on year, Julian day, and hour."""
26
+ return f"{year}/{julian_day}/{str(hour).zfill(2)}/"
27
+
28
+ def get_recent_files(connection, bucket_name, base_prefix, pattern, min_files):
29
+ """
30
+ Fetches the most recent files in a GCP bucket.
31
+
32
+ :param bucket_name: Name of the GCP bucket.
33
+ :param base_prefix: Base directory prefix (before year/Julian day/hour).
34
+ :param pattern: Search pattern for file names.
35
+ :param min_files: Minimum number of files to return.
36
+ :return: List of the n most recent files.
37
+ """
38
+ files = []
39
+ current_time = datetime.now(timezone.utc)
40
+
41
+ # Loop until the minimum number of files is found
42
+ while len(files) < min_files:
43
+ year = current_time.year
44
+ julian_day = current_time.timetuple().tm_yday # Get the Julian day
45
+ hour = current_time.hour
46
+
47
+ # Generate the directory prefix for the current date and time
48
+ prefix = f"{base_prefix}/{get_directory_prefix(year, julian_day, hour)}"
49
+
50
+ # List blobs from the bucket
51
+ blobs = list_blobs(connection, bucket_name, prefix)
52
+
53
+ # Filter blobs based on the pattern
54
+ for blob in blobs:
55
+ if pattern in blob.name: # You can use "re" here for more complex patterns
56
+ files.append((blob.name, blob.updated))
57
+
58
+ # Go back one hour
59
+ current_time -= timedelta(hours=1)
60
+
61
+ # Sort files by modification date in descending order
62
+ files.sort(key=lambda x: x[1], reverse=True)
63
+
64
+ # Return only the names of the most recent files, according to the minimum requested
65
+ return [file[0] for file in files[:min_files]]
66
+
67
+ def download_file(connection, bucket_name, blob_name, local_path):
68
+ """Downloads a file from a GCP bucket."""
69
+ bucket = connection.bucket(bucket_name)
70
+ blob = bucket.blob(blob_name)
71
+ blob.download_to_filename(local_path)
72
+
73
+ def crop_reproject(file, output):
74
+ """
75
+ Crops and reprojects a GOES-16 file to EPSG:4326.
76
+ """
77
+
78
+
79
+ ds = xr.open_dataset(file)
80
+ # Select only var_name and goes_imager_projection
81
+ ds = ds[[var_name, "goes_imager_projection"]]
82
+ # Get projection
83
+ sat_height = ds["goes_imager_projection"].attrs["perspective_point_height"]
84
+ ds = ds.assign_coords({
85
+ "x": ds["x"].values * sat_height,
86
+ "y": ds["y"].values * sat_height,
87
+ })
88
+ # Set CRS
89
+ crs = CRS.from_cf(ds["goes_imager_projection"].attrs)
90
+ ds = ds.rio.write_crs(crs)
91
+
92
+ # Reproject to EPSG:4326 using parallel processing
93
+ ds = ds.rio.reproject(dst_crs="EPSG:4326",
94
+ resolution=(resolution, resolution),
95
+ num_threads=-1)
96
+
97
+ # Rename lat/lon coordinates
98
+ ds = ds.rename({"x": "lon", "y": "lat"})
99
+
100
+ # # Crop using lat/lon coordinates, in parallel
101
+ ds = ds.rio.clip_box(minx=lon_min, miny=lat_min, maxx=lon_max, maxy=lat_max)
102
+
103
+ # Remove any previous file
104
+ if pathlib.Path(f'{output}{file.split("/")[-1]}.nc').exists():
105
+ pathlib.Path(f'{output}{file.split("/")[-1]}.nc').unlink()
106
+
107
+ # Add comments
108
+ ds[var_name].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by helvecioblneto@gmail.com'
109
+
110
+ # # Save as netcdf
111
+ ds.to_netcdf(f'{output}{file.split("/")[-1]}')
112
+
113
+ # Remove original file
114
+ pathlib.Path(file).unlink()
115
+
116
+ return
117
+
118
+
119
+
120
+ def main():
121
+
122
+ global output_path, var_name, \
123
+ lat_min, lat_max, lon_min, lon_max, \
124
+ max_attempts, parallel, recent, resolution
125
+
126
+ epilog = """
127
+ Example usage:
128
+
129
+ - To download recent files from the GOES-16 satellite for the ABI-L2-CMIPF product, extracting the CMI variable from channel 13, in the last 30 minutes:
130
+
131
+ goesgcp --satellite goes16 --product ABI-L2-CMIP --domain F --var_name CMI --channel 13 --recent 10 --output_path "output/"
132
+ """
133
+
134
+
135
+ # Set arguments
136
+ parser = argparse.ArgumentParser(description='Converts GOES-16 L2 data to netCDF',
137
+ epilog=epilog,
138
+ formatter_class=argparse.RawDescriptionHelpFormatter)
139
+
140
+ # Satellite and product settings
141
+ parser.add_argument('--satellite', type=str, default='goes-16', help='Name of the satellite (e.g., goes16)')
142
+ parser.add_argument('--product', type=str, default='ABI-L2-CMIP', help='Name of the satellite product')
143
+ parser.add_argument('--var_name', type=str, default='CMI', help='Variable name to extract (e.g., CMI)')
144
+ parser.add_argument('--channel', type=int, default=13, help='Channel to use (e.g., 13)')
145
+ parser.add_argument('--domain', type=str, default='F', help='Domain to use (e.g., F or C)')
146
+ parser.add_argument('--recent', type=int, default=3, help='Number of recent files to download')
147
+
148
+ # Geographic bounding box
149
+ parser.add_argument('--lat_min', type=float, default=-56, help='Minimum latitude of the bounding box')
150
+ parser.add_argument('--lat_max', type=float, default=35, help='Maximum latitude of the bounding box')
151
+ parser.add_argument('--lon_min', type=float, default=-116, help='Minimum longitude of the bounding box')
152
+ parser.add_argument('--lon_max', type=float, default=-25, help='Maximum longitude of the bounding box')
153
+ parser.add_argument('--resolution', type=float, default=0.045, help='Resolution of the output file')
154
+ parser.add_argument('--output_path', type=str, default='output/', help='Path for saving output files')
155
+
156
+ # Other settings
157
+ parser.add_argument('--parallel', type=bool, default=True, help='Use parallel processing')
158
+ parser.add_argument('--processes', type=int, default=4, help='Number of processes for parallel execution')
159
+ parser.add_argument('--max_attempts', type=int, default=3, help='Number of attempts to download a file')
160
+
161
+ # Parse arguments
162
+ args = parser.parse_args()
163
+
164
+ if len(sys.argv) == 1:
165
+ parser.print_help(sys.stderr)
166
+ sys.exit(1)
167
+
168
+ # Set global variables
169
+ output_path = args.output_path
170
+ satellite = args.satellite
171
+ product = args.product
172
+ domain = args.domain
173
+ channel = str(args.channel).zfill(2)
174
+ var_name = args.var_name
175
+ lat_min = args.lat_min
176
+ lat_max = args.lat_max
177
+ lon_min = args.lon_min
178
+ lon_max = args.lon_max
179
+ resolution = args.resolution
180
+ max_attempts = args.max_attempts
181
+ parallel = args.parallel
182
+
183
+ # Set bucket name and pattern
184
+ bucket_name = "gcp-public-data-" + satellite
185
+ pattern = "OR_"+product+domain+"-M6C"+channel+"_G" + satellite[-2:]
186
+ min_files = args.recent
187
+
188
+ output = 'output/'
189
+ pathlib.Path(output).mkdir(parents=True, exist_ok=True)
190
+
191
+ # Create connection
192
+ storage_client = storage.Client.create_anonymous_client()
193
+
194
+ # Check if the bucket exists
195
+ try:
196
+ storage_client.get_bucket(bucket_name)
197
+ except Exception as e:
198
+ print(f"Bucket {bucket_name} not found. Exiting...")
199
+ sys.exit(1)
200
+
201
+ # Search for recent files
202
+ recent_files = get_recent_files(storage_client, bucket_name, product + domain, pattern, min_files)
203
+
204
+ # Check if any files were found
205
+ if not recent_files:
206
+ print(f"No files found with the pattern {pattern}. Exiting...")
207
+ sys.exit(1)
208
+ print('Downloading files...')
209
+ # Loading bar
210
+ loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
211
+ bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
212
+ [Elapsed:{elapsed} Remaining:<{remaining}]')
213
+
214
+ # Create a temporary directory
215
+ pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
216
+
217
+ # Download all files to a temporary directory
218
+ with ThreadPoolExecutor(max_workers=args.processes) as executor:
219
+ for file in recent_files:
220
+ download_file(storage_client, bucket_name, file, f'tmp/{file.split("/")[-1]}')
221
+ loading_bar.update(1)
222
+ loading_bar.close()
223
+
224
+ print('Cropping and reprojecting files...')
225
+ # Crop and reproject all files in serial mode
226
+ for file in recent_files:
227
+ crop_reproject(f'tmp/{file.split("/")[-1]}', output)
228
+ loading_bar.update(1)
229
+ loading_bar.close()
230
+
231
+ # Remove temporary directory
232
+ shutil.rmtree('tmp/')
233
+
234
+ if __name__ == '__main__':
235
+ main()
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -0,0 +1,8 @@
1
+ goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
+ goesgcp/main.py,sha256=t_qeof0nwuaWdLW5kbk5TNOk3n6QNuDoOJLHSC6neng,8738
3
+ goesgcp-1.0.1.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
+ goesgcp-1.0.1.dist-info/METADATA,sha256=XzgKLPCfdQ-ZzCmyZXTYFKvh5oHD-ochZN9QQd0kAmM,3219
5
+ goesgcp-1.0.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
6
+ goesgcp-1.0.1.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
+ goesgcp-1.0.1.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
+ goesgcp-1.0.1.dist-info/RECORD,,
@@ -0,0 +1 @@
1
+ goesgcp
@@ -1,6 +0,0 @@
1
- goesgcp-1.0.0.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
2
- goesgcp-1.0.0.dist-info/METADATA,sha256=Q2Un1vXOc29k4JDPf9mUQVgIn8ErQGm86TfFHPZojkM,3219
3
- goesgcp-1.0.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
4
- goesgcp-1.0.0.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
5
- goesgcp-1.0.0.dist-info/top_level.txt,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
6
- goesgcp-1.0.0.dist-info/RECORD,,
@@ -1 +0,0 @@
1
-