goesgcp 1.0.5__tar.gz → 1.0.6__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.5
3
+ Version: 1.0.6
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -4,6 +4,7 @@ import xarray as xr
4
4
  import argparse
5
5
  import sys
6
6
  import tqdm
7
+ from concurrent.futures import ThreadPoolExecutor
7
8
  from multiprocessing import Pool
8
9
  from google.cloud import storage
9
10
  from datetime import datetime, timedelta, timezone
@@ -65,10 +66,13 @@ def get_recent_files(connection, bucket_name, base_prefix, pattern, min_files):
65
66
  return [file[0] for file in files[:min_files]]
66
67
 
67
68
 
68
- def crop_reproject(file, output):
69
+ def crop_reproject(args):
69
70
  """
70
71
  Crops and reprojects a GOES-16 file to EPSG:4326.
71
72
  """
73
+
74
+ file, output = args
75
+
72
76
  # Open the file
73
77
  ds = xr.open_dataset(file, engine='netcdf4')
74
78
 
@@ -162,13 +166,6 @@ def download_file(args):
162
166
  # Download the file
163
167
  blob.download_to_filename(local_path, timeout=120)
164
168
 
165
- # Crop and reproject the file
166
- crop_reproject(local_path, output_path)
167
-
168
- # Remove the file
169
- pathlib.Path(local_path).unlink()
170
-
171
-
172
169
 
173
170
  def main():
174
171
 
@@ -262,18 +259,32 @@ def main():
262
259
  # Create a temporary directory
263
260
  pathlib.Path('tmp/').mkdir(parents=True, exist_ok=True)
264
261
 
265
- print(f"Downloading and processing {len(recent_files)} files...")
266
-
267
- # Process files in parallel
262
+ # Download files
263
+ print(f"Downloading {len(recent_files)} files...")
268
264
  loading_bar = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
269
265
  bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
270
266
  [Elapsed:{elapsed} Remaining:<{remaining}]')
271
267
 
272
268
  # Download all files to a temporary directory
273
- with Pool(processes=args.processes) as pool:
274
- for _ in pool.imap_unordered(download_file, [(bucket_name,
275
- file, f'tmp/{file.split("/")[-1]}') for file in recent_files]):
269
+ with ThreadPoolExecutor(max_workers=args.processes) as executor:
270
+ for file in recent_files:
271
+ local_path = f"tmp/{file.split('/')[-1]}"
272
+ executor.submit(download_file, (bucket_name, file, local_path))
276
273
  loading_bar.update(1)
274
+ loading_bar.close()
275
+
276
+ # Process files
277
+ print(f"\nProcessing {len(recent_files)} files...")
278
+ load_bar2 = tqdm.tqdm(total=len(recent_files), ncols=100, position=0, leave=True,
279
+ bar_format='{l_bar}{bar}| {n_fmt}/{total_fmt} + \
280
+ [Elapsed:{elapsed} Remaining:<{remaining}]')
281
+
282
+
283
+ # Process files in parallel
284
+ with Pool(processes=args.processes) as pool:
285
+ for _ in pool.imap_unordered(crop_reproject, [(f"tmp/{file.split('/')[-1]}", output_path) for file in recent_files]):
286
+ load_bar2.update(1)
287
+ load_bar2.close()
277
288
 
278
289
  # Remove temporary directory
279
290
  shutil.rmtree('tmp/')
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: goesgcp
3
- Version: 1.0.5
3
+ Version: 1.0.6
4
4
  Summary: A package to download and process GOES-16/17 data
5
5
  Home-page: https://github.com/helvecioneto/goesgcp
6
6
  Author: Helvecio B. L. Neto
@@ -13,7 +13,7 @@ with open('requirements.txt') as f:
13
13
 
14
14
  setup(
15
15
  name="goesgcp",
16
- version='1.0.5',
16
+ version='1.0.6',
17
17
  author="Helvecio B. L. Neto",
18
18
  author_email="helvecioblneto@gmail.com",
19
19
  description="A package to download and process GOES-16/17 data",
File without changes
File without changes
File without changes
File without changes