goesgcp 1.0.5__py3-none-any.whl → 1.0.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
goesgcp/main.py CHANGED
@@ -4,6 +4,8 @@ import xarray as xr
4
4
  import argparse
5
5
  import sys
6
6
  import tqdm
7
+ from distutils.util import strtobool
8
+ from concurrent.futures import ThreadPoolExecutor
7
9
  from multiprocessing import Pool
8
10
  from google.cloud import storage
9
11
  from datetime import datetime, timedelta, timezone
@@ -65,10 +67,13 @@ def get_recent_files(connection, bucket_name, base_prefix, pattern, min_files):
65
67
  return [file[0] for file in files[:min_files]]
66
68
 
67
69
 
68
- def crop_reproject(file, output):
70
+ def crop_reproject(args):
69
71
  """
70
72
  Crops and reprojects a GOES-16 file to EPSG:4326.
71
73
  """
74
+
75
+ file, output = args
76
+
72
77
  # Open the file
73
78
  ds = xr.open_dataset(file, engine='netcdf4')
74
79
 
@@ -162,13 +167,6 @@ def download_file(args):
162
167
  # Download the file
163
168
  blob.download_to_filename(local_path, timeout=120)
164
169
 
165
- # Crop and reproject the file
166
- crop_reproject(local_path, output_path)
167
-
168
- # Remove the file
169
- pathlib.Path(local_path).unlink()
170
-
171
-
172
170
 
173
171
  def main():
174
172
 
@@ -207,16 +205,16 @@ def main():
207
205
  parser.add_argument('--output', type=str, default='output/', help='Path for saving output files')
208
206
 
209
207
  # Other settings
210
- parser.add_argument('--parallel', type=bool, default=True, help='Use parallel processing')
208
+ parser.add_argument('--parallel', type=lambda x: bool(strtobool(x)), default=True, help='Use parallel processing')
211
209
  parser.add_argument('--processes', type=int, default=4, help='Number of processes for parallel execution')
212
210
  parser.add_argument('--max_attempts', type=int, default=3, help='Number of attempts to download a file')
213
211
 
214
212
  # Parse arguments
215
213
  args = parser.parse_args()
216
214
 
217
- if len(sys.argv) == 1:
218
- parser.print_help(sys.stderr)
219
- sys.exit(1)
215
+ # if len(sys.argv) == 1:
216
+ # parser.print_help(sys.stderr)
217
+ # sys.exit(1)
220
218
 
221
219
  # Set global variables
222
220
  output_path = args.output
@@ -262,18 +260,42 @@ def main():
262
260
  # Create a temporary directory
263
261
  pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
264
262
 
265
- print(f"Downloading and processing {len(recent_files)} files...")
266
-
267
- # Process files in parallel
263
+ # Download files
264
+ print(f"Downloading {len(recent_files)} files...")
268
265
  loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
269
266
  bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
270
267
  [Elapsed:{elapsed} Remaining:<{remaining}]')
271
268
 
272
- # Download all files to a temporary directory
273
- with Pool(processes=args.processes) as pool:
274
- for _ in pool.imap_unordered(download_file, [(bucket_name,
275
- file, f'tmp/{file.split("/")[-1]}') for file in recent_files]):
269
+ if parallel:
270
+ # Download all files to a temporary directory
271
+ with ThreadPoolExecutor(max_workers=args.processes) as executor:
272
+ for file in recent_files:
273
+ local_path = f"tmp/{file.split('/')[-1]}"
274
+ executor.submit(download_file, (bucket_name, file, local_path))
275
+ loading_bar.update(1)
276
+ loading_bar.close()
277
+
278
+ # Process files
279
+ print(f"\nProcessing {len(recent_files)} files...")
280
+ load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
281
+ bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
282
+ [Elapsed:{elapsed} Remaining:<{remaining}]')
283
+
284
+
285
+ # Process files in parallel
286
+ with Pool(processes=args.processes) as pool:
287
+ for _ in pool.imap_unordered(crop_reproject, [(f"tmp/{file.split('/')[-1]}", output_path) for file in recent_files]):
288
+ load_bar2.update(1)
289
+ load_bar2.close()
290
+ else:
291
+ for file in recent_files:
292
+ local_path = f"tmp/{file.split('/')[-1]}"
293
+ download_file((bucket_name, file, local_path))
294
+ crop_reproject((local_path, output_path))
276
295
  loading_bar.update(1)
296
+ loading_bar.close()
297
+
298
+
277
299
 
278
300
  # Remove temporary directory
279
301
  shutil.rmtree('tmp/')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.5
3
+ Version: 1.0.7
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -0,0 +1,8 @@
1
+ goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
+ goesgcp/main.py,sha256=zCrAfAjc9Vzs9-7klATCCp6hhe1w4tsu_FiMBuBAafc,10893
3
+ goesgcp-1.0.7.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
+ goesgcp-1.0.7.dist-info/METADATA,sha256=eXzz34_yexZ4zx4EmaDRrpwiHFoBtJBzJ_hnVLQD-4Q,2993
5
+ goesgcp-1.0.7.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
6
+ goesgcp-1.0.7.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
+ goesgcp-1.0.7.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
+ goesgcp-1.0.7.dist-info/RECORD,,
@@ -1,8 +0,0 @@
1
- goesgcp/__init__.py,sha256=MigXIT7A1M9YZuH2MyjKReSziFwzbZX2boVYsLosR6s,22
2
- goesgcp/main.py,sha256=mSHN0R0-xIJZkKo0U82dUWgFfX56KNqgiNv_YefdwVs,9967
3
- goesgcp-1.0.5.dist-info/LICENSE,sha256=AHeZifD4UyBZI61Ug5lETXgX3Anp_XfAvFXQqrW9AnU,1078
4
- goesgcp-1.0.5.dist-info/METADATA,sha256=LL90blorVD0KmRNCVYnycaAcTgGoiPgyVtJVkAhTPwI,2993
5
- goesgcp-1.0.5.dist-info/WHEEL,sha256=PZUExdf71Ui_so67QXpySuHtCi3-J3wvF4ORK6k_S8U,91
6
- goesgcp-1.0.5.dist-info/entry_points.txt,sha256=6afMW51WnUR9VZ_xvDoiB8JQb2OFiLuzRtV6dPL__OQ,46
7
- goesgcp-1.0.5.dist-info/top_level.txt,sha256=C-C3vipI0AwEDW9nWFkJ6D0TkcKkIYlyyM15LMskUEc,8
8
- goesgcp-1.0.5.dist-info/RECORD,,