goesgcp 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- goesgcp/__init__.py +1 -0
- goesgcp/main.py +235 -0
- {goesgcp-1.0.0.dist-info → goesgcp-1.0.1.dist-info}/METADATA +1 -1
- goesgcp-1.0.1.dist-info/RECORD +8 -0
- goesgcp-1.0.1.dist-info/top_level.txt +1 -0
- goesgcp-1.0.0.dist-info/RECORD +0 -6
- goesgcp-1.0.0.dist-info/top_level.txt +0 -1
- {goesgcp-1.0.0.dist-info → goesgcp-1.0.1.dist-info}/LICENSE +0 -0
- {goesgcp-1.0.0.dist-info → goesgcp-1.0.1.dist-info}/WHEEL +0 -0
- {goesgcp-1.0.0.dist-info → goesgcp-1.0.1.dist-info}/entry_points.txt +0 -0
goesgcp/__init__.py
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
from .main import main
|
goesgcp/main.py
ADDED
|
@@ -0,0 +1,235 @@
|
|
|
1
|
+
import pathlib
|
|
2
|
+
import shutil
|
|
3
|
+
import xarray as xr
|
|
4
|
+
import argparse
|
|
5
|
+
import sys
|
|
6
|
+
import tqdm
|
|
7
|
+
from concurrent.futures import ThreadPoolExecutor
|
|
8
|
+
from google.cloud import storage
|
|
9
|
+
from datetime import datetime, timedelta, timezone
|
|
10
|
+
from pyproj import CRS
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
|
|
14
|
+
def list_blobs(connection, bucket_name, prefix):
|
|
15
|
+
"""
|
|
16
|
+
Lists blobs in a GCP bucket with a specified prefix.
|
|
17
|
+
Returns a list of blobs with their metadata.
|
|
18
|
+
"""
|
|
19
|
+
bucket = connection.bucket(bucket_name)
|
|
20
|
+
|
|
21
|
+
blobs = bucket.list_blobs(prefix=prefix)
|
|
22
|
+
return blobs
|
|
23
|
+
|
|
24
|
+
def get_directory_prefix(year, julian_day, hour):
|
|
25
|
+
"""Generates the directory path based on year, Julian day, and hour."""
|
|
26
|
+
return f"{year}/{julian_day}/{str(hour).zfill(2)}/"
|
|
27
|
+
|
|
28
|
+
def get_recent_files(connection, bucket_name, base_prefix, pattern, min_files):
|
|
29
|
+
"""
|
|
30
|
+
Fetches the most recent files in a GCP bucket.
|
|
31
|
+
|
|
32
|
+
:param bucket_name: Name of the GCP bucket.
|
|
33
|
+
:param base_prefix: Base directory prefix (before year/Julian day/hour).
|
|
34
|
+
:param pattern: Search pattern for file names.
|
|
35
|
+
:param min_files: Minimum number of files to return.
|
|
36
|
+
:return: List of the n most recent files.
|
|
37
|
+
"""
|
|
38
|
+
files = []
|
|
39
|
+
current_time = datetime.now(timezone.utc)
|
|
40
|
+
|
|
41
|
+
# Loop until the minimum number of files is found
|
|
42
|
+
while len(files) < min_files:
|
|
43
|
+
year = current_time.year
|
|
44
|
+
julian_day = current_time.timetuple().tm_yday # Get the Julian day
|
|
45
|
+
hour = current_time.hour
|
|
46
|
+
|
|
47
|
+
# Generate the directory prefix for the current date and time
|
|
48
|
+
prefix = f"{base_prefix}/{get_directory_prefix(year, julian_day, hour)}"
|
|
49
|
+
|
|
50
|
+
# List blobs from the bucket
|
|
51
|
+
blobs = list_blobs(connection, bucket_name, prefix)
|
|
52
|
+
|
|
53
|
+
# Filter blobs based on the pattern
|
|
54
|
+
for blob in blobs:
|
|
55
|
+
if pattern in blob.name: # You can use "re" here for more complex patterns
|
|
56
|
+
files.append((blob.name, blob.updated))
|
|
57
|
+
|
|
58
|
+
# Go back one hour
|
|
59
|
+
current_time -= timedelta(hours=1)
|
|
60
|
+
|
|
61
|
+
# Sort files by modification date in descending order
|
|
62
|
+
files.sort(key=lambda x: x[1], reverse=True)
|
|
63
|
+
|
|
64
|
+
# Return only the names of the most recent files, according to the minimum requested
|
|
65
|
+
return [file[0] for file in files[:min_files]]
|
|
66
|
+
|
|
67
|
+
def download_file(connection, bucket_name, blob_name, local_path):
|
|
68
|
+
"""Downloads a file from a GCP bucket."""
|
|
69
|
+
bucket = connection.bucket(bucket_name)
|
|
70
|
+
blob = bucket.blob(blob_name)
|
|
71
|
+
blob.download_to_filename(local_path)
|
|
72
|
+
|
|
73
|
+
def crop_reproject(file, output):
|
|
74
|
+
"""
|
|
75
|
+
Crops and reprojects a GOES-16 file to EPSG:4326.
|
|
76
|
+
"""
|
|
77
|
+
|
|
78
|
+
|
|
79
|
+
ds = xr.open_dataset(file)
|
|
80
|
+
# Select only var_name and goes_imager_projection
|
|
81
|
+
ds = ds[[var_name, "goes_imager_projection"]]
|
|
82
|
+
# Get projection
|
|
83
|
+
sat_height = ds["goes_imager_projection"].attrs["perspective_point_height"]
|
|
84
|
+
ds = ds.assign_coords({
|
|
85
|
+
"x": ds["x"].values * sat_height,
|
|
86
|
+
"y": ds["y"].values * sat_height,
|
|
87
|
+
})
|
|
88
|
+
# Set CRS
|
|
89
|
+
crs = CRS.from_cf(ds["goes_imager_projection"].attrs)
|
|
90
|
+
ds = ds.rio.write_crs(crs)
|
|
91
|
+
|
|
92
|
+
# Reproject to EPSG:4326 using parallel processing
|
|
93
|
+
ds = ds.rio.reproject(dst_crs="EPSG:4326",
|
|
94
|
+
resolution=(resolution, resolution),
|
|
95
|
+
num_threads=-1)
|
|
96
|
+
|
|
97
|
+
# Rename lat/lon coordinates
|
|
98
|
+
ds = ds.rename({"x": "lon", "y": "lat"})
|
|
99
|
+
|
|
100
|
+
# # Crop using lat/lon coordinates, in parallel
|
|
101
|
+
ds = ds.rio.clip_box(minx=lon_min, miny=lat_min, maxx=lon_max, maxy=lat_max)
|
|
102
|
+
|
|
103
|
+
# Remove any previous file
|
|
104
|
+
if pathlib.Path(f'{output}{file.split("/")[-1]}.nc').exists():
|
|
105
|
+
pathlib.Path(f'{output}{file.split("/")[-1]}.nc').unlink()
|
|
106
|
+
|
|
107
|
+
# Add comments
|
|
108
|
+
ds[var_name].attrs['comments'] = 'Cropped and reprojected to EPSG:4326 by helvecioblneto@gmail.com'
|
|
109
|
+
|
|
110
|
+
# # Save as netcdf
|
|
111
|
+
ds.to_netcdf(f'{output}{file.split("/")[-1]}')
|
|
112
|
+
|
|
113
|
+
# Remove original file
|
|
114
|
+
pathlib.Path(file).unlink()
|
|
115
|
+
|
|
116
|
+
return
|
|
117
|
+
|
|
118
|
+
|
|
119
|
+
|
|
120
|
+
def main():
|
|
121
|
+
|
|
122
|
+
global output_path, var_name, \
|
|
123
|
+
lat_min, lat_max, lon_min, lon_max, \
|
|
124
|
+
max_attempts, parallel, recent, resolution
|
|
125
|
+
|
|
126
|
+
epilog = """
|
|
127
|
+
Example usage:
|
|
128
|
+
|
|
129
|
+
- To download recent files from the GOES-16 satellite for the ABI-L2-CMIPF product, extracting the CMI variable from channel 13, in the last 30 minutes:
|
|
130
|
+
|
|
131
|
+
goesgcp --satellite goes16 --product ABI-L2-CMIP --domain F --var_name CMI --channel 13 --recent 10 --output_path "output/"
|
|
132
|
+
"""
|
|
133
|
+
|
|
134
|
+
|
|
135
|
+
# Set arguments
|
|
136
|
+
parser = argparse.ArgumentParser(description='Converts GOES-16 L2 data to netCDF',
|
|
137
|
+
epilog=epilog,
|
|
138
|
+
formatter_class=argparse.RawDescriptionHelpFormatter)
|
|
139
|
+
|
|
140
|
+
# Satellite and product settings
|
|
141
|
+
parser.add_argument('--satellite', type=str, default='goes-16', help='Name of the satellite (e.g., goes16)')
|
|
142
|
+
parser.add_argument('--product', type=str, default='ABI-L2-CMIP', help='Name of the satellite product')
|
|
143
|
+
parser.add_argument('--var_name', type=str, default='CMI', help='Variable name to extract (e.g., CMI)')
|
|
144
|
+
parser.add_argument('--channel', type=int, default=13, help='Channel to use (e.g., 13)')
|
|
145
|
+
parser.add_argument('--domain', type=str, default='F', help='Domain to use (e.g., F or C)')
|
|
146
|
+
parser.add_argument('--recent', type=int, default=3, help='Number of recent files to download')
|
|
147
|
+
|
|
148
|
+
# Geographic bounding box
|
|
149
|
+
parser.add_argument('--lat_min', type=float, default=-56, help='Minimum latitude of the bounding box')
|
|
150
|
+
parser.add_argument('--lat_max', type=float, default=35, help='Maximum latitude of the bounding box')
|
|
151
|
+
parser.add_argument('--lon_min', type=float, default=-116, help='Minimum longitude of the bounding box')
|
|
152
|
+
parser.add_argument('--lon_max', type=float, default=-25, help='Maximum longitude of the bounding box')
|
|
153
|
+
parser.add_argument('--resolution', type=float, default=0.045, help='Resolution of the output file')
|
|
154
|
+
parser.add_argument('--output_path', type=str, default='output/', help='Path for saving output files')
|
|
155
|
+
|
|
156
|
+
# Other settings
|
|
157
|
+
parser.add_argument('--parallel', type=bool, default=True, help='Use parallel processing')
|
|
158
|
+
parser.add_argument('--processes', type=int, default=4, help='Number of processes for parallel execution')
|
|
159
|
+
parser.add_argument('--max_attempts', type=int, default=3, help='Number of attempts to download a file')
|
|
160
|
+
|
|
161
|
+
# Parse arguments
|
|
162
|
+
args = parser.parse_args()
|
|
163
|
+
|
|
164
|
+
if len(sys.argv) == 1:
|
|
165
|
+
parser.print_help(sys.stderr)
|
|
166
|
+
sys.exit(1)
|
|
167
|
+
|
|
168
|
+
# Set global variables
|
|
169
|
+
output_path = args.output_path
|
|
170
|
+
satellite = args.satellite
|
|
171
|
+
product = args.product
|
|
172
|
+
domain = args.domain
|
|
173
|
+
channel = str(args.channel).zfill(2)
|
|
174
|
+
var_name = args.var_name
|
|
175
|
+
lat_min = args.lat_min
|
|
176
|
+
lat_max = args.lat_max
|
|
177
|
+
lon_min = args.lon_min
|
|
178
|
+
lon_max = args.lon_max
|
|
179
|
+
resolution = args.resolution
|
|
180
|
+
max_attempts = args.max_attempts
|
|
181
|
+
parallel = args.parallel
|
|
182
|
+
|
|
183
|
+
# Set bucket name and pattern
|
|
184
|
+
bucket_name = "gcp-public-data-" + satellite
|
|
185
|
+
pattern = "OR_"+product+domain+"-M6C"+channel+"_G" + satellite[-2:]
|
|
186
|
+
min_files = args.recent
|
|
187
|
+
|
|
188
|
+
output = 'output/'
|
|
189
|
+
pathlib.Path(output).mkdir(parents=True, exist_ok=True)
|
|
190
|
+
|
|
191
|
+
# Create connection
|
|
192
|
+
storage_client = storage.Client.create_anonymous_client()
|
|
193
|
+
|
|
194
|
+
# Check if the bucket exists
|
|
195
|
+
try:
|
|
196
|
+
storage_client.get_bucket(bucket_name)
|
|
197
|
+
except Exception as e:
|
|
198
|
+
print(f"Bucket {bucket_name} not found. Exiting...")
|
|
199
|
+
sys.exit(1)
|
|
200
|
+
|
|
201
|
+
# Search for recent files
|
|
202
|
+
recent_files = get_recent_files(storage_client, bucket_name, product + domain, pattern, min_files)
|
|
203
|
+
|
|
204
|
+
# Check if any files were found
|
|
205
|
+
if not recent_files:
|
|
206
|
+
print(f"No files found with the pattern {pattern}. Exiting...")
|
|
207
|
+
sys.exit(1)
|
|
208
|
+
print('Downloading files...')
|
|
209
|
+
# Loading bar
|
|
210
|
+
loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
|
|
211
|
+
bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
|
|
212
|
+
[Elapsed:{elapsed} Remaining:<{remaining}]')
|
|
213
|
+
|
|
214
|
+
# Create a temporary directory
|
|
215
|
+
pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
|
|
216
|
+
|
|
217
|
+
# Download all files to a temporary directory
|
|
218
|
+
with ThreadPoolExecutor(max_workers=args.processes) as executor:
|
|
219
|
+
for file in recent_files:
|
|
220
|
+
download_file(storage_client, bucket_name, file, f'tmp/{file.split("/")[-1]}')
|
|
221
|
+
loading_bar.update(1)
|
|
222
|
+
loading_bar.close()
|
|
223
|
+
|
|
224
|
+
print('Cropping and reprojecting files...')
|
|
225
|
+
# Crop and reproject all files in serial mode
|
|
226
|
+
for file in recent_files:
|
|
227
|
+
crop_reproject(f'tmp/{file.split("/")[-1]}', output)
|
|
228
|
+
loading_bar.update(1)
|
|
229
|
+
loading_bar.close()
|
|
230
|
+
|
|
231
|
+
# Remove temporary directory
|
|
232
|
+
shutil.rmtree('tmp/')
|
|
233
|
+
|
|
234
|
+
if __name__ == '__main__':
|
|
235
|
+
main()
|
|
@@ -0,0 +1,8 @@
|
|
|
1
|
+
goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
|
|
2
|
+
goesgcp/main.py,sha256=t_qeof0nwuaWdLW5kbk5TNOk3n6QNuDoOJLHSC6neng,8738
|
|
3
|
+
goesgcp-1.0.1.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
4
|
+
goesgcp-1.0.1.dist-info/METADATA,sha256=XzgKLPCfdQ-ZzCmyZXTYFKvh5oHD-ochZN9QQd0kAmM,3219
|
|
5
|
+
goesgcp-1.0.1.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
6
|
+
goesgcp-1.0.1.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
7
|
+
goesgcp-1.0.1.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
|
|
8
|
+
goesgcp-1.0.1.dist-info/RECORD,,
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
goesgcp
|
goesgcp-1.0.0.dist-info/RECORD
DELETED
|
@@ -1,6 +0,0 @@
|
|
|
1
|
-
goesgcp-1.0.0.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
|
|
2
|
-
goesgcp-1.0.0.dist-info/METADATA,sha256=Q2Un1vXOc29k4JDPf9mUQVgIn8ErQGm86TfFHPZojkM,3219
|
|
3
|
-
goesgcp-1.0.0.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
|
|
4
|
-
goesgcp-1.0.0.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
|
|
5
|
-
goesgcp-1.0.0.dist-info/top_level.txt,sha256=AbpHGcgLb-kRsJGnwFEktk7uzpZOCcBY74-YBdrKVGs,1
|
|
6
|
-
goesgcp-1.0.0.dist-info/RECORD,,
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
|
|
File without changes
|
|
File without changes
|
|
File without changes
|