cloudnetpy 1.66.5__py3-none-any.whl → 1.66.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cloudnetpy/cli.py ADDED
@@ -0,0 +1,607 @@
1
+ import argparse
2
+ import gzip
3
+ import importlib
4
+ import logging
5
+ import os
6
+ import re
7
+ import shutil
8
+ from concurrent.futures import ThreadPoolExecutor, as_completed
9
+ from dataclasses import dataclass
10
+ from pathlib import Path
11
+ from tempfile import TemporaryDirectory
12
+ from typing import TYPE_CHECKING, Final
13
+
14
+ import requests
15
+
16
+ from cloudnetpy import concat_lib, instruments
17
+ from cloudnetpy.categorize import generate_categorize
18
+ from cloudnetpy.exceptions import PlottingError
19
+ from cloudnetpy.plotting import generate_figure
20
+ from cloudnetpy.utils import md5sum
21
+
22
+ if TYPE_CHECKING:
23
+ from collections.abc import Callable
24
+
25
+
26
+ cloudnet_api_url: Final = "https://cloudnet.fmi.fi/api/"
27
+
28
+
29
+ @dataclass
30
+ class Instrument:
31
+ id: str
32
+ pid: str
33
+ name: str
34
+
35
+
36
+ def run(args: argparse.Namespace, tmpdir: str):
37
+ cat_files = {}
38
+
39
+ # Instrument based products
40
+ if source_instruments := _get_source_instruments(args.products):
41
+ for product, possible_instruments in source_instruments.items():
42
+ meta = _fetch_raw_meta(possible_instruments, args)
43
+ instrument = _select_instrument(meta, product)
44
+ if not instrument:
45
+ logging.info("No instrument found for %s", product)
46
+ continue
47
+ meta = _filter_by_instrument(meta, instrument)
48
+ meta = _filter_by_suffix(meta, product)
49
+ if not meta:
50
+ logging.info("No suitable data available for %s", product)
51
+ continue
52
+ output_filepath = _process_instrument_product(
53
+ product, meta, instrument, tmpdir, args
54
+ )
55
+ _plot(output_filepath, product, args)
56
+ cat_files[product] = output_filepath
57
+
58
+ prod_sources = _get_product_sources(args.products)
59
+
60
+ # Categorize based products
61
+ if "categorize" in args.products:
62
+ cat_filepath = _process_categorize(cat_files, args)
63
+ _plot(cat_filepath, "categorize", args)
64
+ else:
65
+ cat_filepath = None
66
+ cat_products = [p for p in prod_sources if "categorize" in prod_sources[p]]
67
+ for product in cat_products:
68
+ if cat_filepath is None:
69
+ cat_filepath = _fetch_product(args, "categorize")
70
+ if cat_filepath is None:
71
+ logging.info("No categorize data available for {}")
72
+ break
73
+ l2_filename = _process_cat_product(product, cat_filepath)
74
+ _plot(l2_filename, product, args)
75
+
76
+ # MWR-L1c based products
77
+ mwrpy_products = [p for p in prod_sources if "mwr-l1c" in prod_sources[p]]
78
+ for product in mwrpy_products:
79
+ if "mwr-l1c" in cat_files:
80
+ mwrpy_filepath = cat_files.get("mwr-l1c")
81
+ else:
82
+ mwrpy_filepath = _fetch_product(args, "mwr-l1c")
83
+ if mwrpy_filepath is None:
84
+ logging.info("No MWR-L1c data available for %s", product)
85
+ break
86
+ l2_filename = _process_mwrpy_product(product, mwrpy_filepath, args)
87
+ _plot(l2_filename, product, args)
88
+
89
+
90
+ def _process_categorize(input_files: dict, args: argparse.Namespace) -> str | None:
91
+ cat_filepath = _create_categorize_filepath(args)
92
+
93
+ input_files["model"] = _fetch_model(args)
94
+ if input_files["model"] is None:
95
+ logging.info("No model data available for this date.")
96
+ return None
97
+
98
+ for product in ("radar", "lidar", "disdrometer"):
99
+ if product not in input_files and (filepath := _fetch_product(args, product)):
100
+ input_files[product] = filepath
101
+
102
+ if mwr := _fetch_mwr(args):
103
+ input_files["mwr"] = mwr
104
+
105
+ try:
106
+ logging.info("Processing categorize...")
107
+ generate_categorize(input_files, cat_filepath)
108
+ logging.info("Processed categorize to %s", cat_filepath)
109
+ except NameError:
110
+ logging.info("No data available for this date.")
111
+ return None
112
+ return cat_filepath
113
+
114
+
115
+ def _fetch_mwr(args: argparse.Namespace) -> str | None:
116
+ mwr_sources = [
117
+ ("mwr-single", None),
118
+ ("mwr", None),
119
+ ("radar", "rpg-fmcw-35"),
120
+ ("radar", "rpg-fmcw-94"),
121
+ ]
122
+ for product, source in mwr_sources:
123
+ mwr = _fetch_product(args, product, source=source)
124
+ if mwr:
125
+ return mwr
126
+ return None
127
+
128
+
129
+ def _process_instrument_product(
130
+ product: str,
131
+ meta: list[dict],
132
+ instrument: Instrument,
133
+ tmpdir: str,
134
+ args: argparse.Namespace,
135
+ ) -> str | None:
136
+ output_filepath = _create_instrument_filepath(instrument, args)
137
+ site_meta = _read_site_meta(meta)
138
+ input_files: list[str] | str
139
+ input_files = _fetch_raw(meta, args)
140
+ if args.dl:
141
+ return None
142
+ input_folder = str(Path(input_files[0]).parent)
143
+ calibration = _get_calibration(instrument, args)
144
+ fun: Callable
145
+ match (product, instrument.id):
146
+ case ("radar", _id) if "mira" in _id:
147
+ fun = instruments.mira2nc
148
+ case ("radar", _id) if "rpg" in _id:
149
+ fun = instruments.rpg2nc
150
+ input_files = input_folder
151
+ case ("radar", _id) if "basta" in _id:
152
+ fun = instruments.basta2nc
153
+ _check_input(input_files)
154
+ input_files = input_files[0]
155
+ case ("radar", _id) if "copernicus" in _id:
156
+ fun = instruments.copernicus2nc
157
+ case ("radar", _id) if "galileo" in _id:
158
+ fun = instruments.galileo2nc
159
+ case ("disdrometer", _id) if "parsivel" in _id:
160
+ fun = instruments.parsivel2nc
161
+ case ("disdrometer", _id) if "thies" in _id:
162
+ fun = instruments.thies2nc
163
+ input_files = _concatenate_(input_files, tmpdir)
164
+ case ("lidar", _id) if "pollyxt" in _id:
165
+ fun = instruments.pollyxt2nc
166
+ case ("lidar", _id) if _id == "cl61d":
167
+ fun = instruments.ceilo2nc
168
+ variables = ["x_pol", "p_pol", "beta_att", "time", "tilt_angle"]
169
+ concat_file = str(Path(tmpdir) / "tmp.nc")
170
+ concat_lib.bundle_netcdf_files(
171
+ input_files,
172
+ args.date,
173
+ concat_file,
174
+ variables=variables,
175
+ )
176
+ input_files = concat_file
177
+ site_meta["model"] = instrument.id
178
+ case ("lidar", _id):
179
+ fun = instruments.ceilo2nc
180
+ input_files = _concatenate_(input_files, tmpdir)
181
+ site_meta["model"] = instrument.id
182
+ if factor := calibration.get("calibration_factor"):
183
+ site_meta["calibration_factor"] = factor
184
+ case ("mwr", _id):
185
+ fun = instruments.hatpro2nc
186
+ input_files = input_folder
187
+ case ("mwr-l1c", _id):
188
+ fun = instruments.hatpro2l1c
189
+ coefficients = _fetch_coefficient_files(calibration, tmpdir)
190
+ site_meta = {**site_meta, **calibration}
191
+ site_meta["coefficientFiles"] = coefficients
192
+ input_files = input_folder
193
+ case ("mrr", _id):
194
+ fun = instruments.mrr2nc
195
+ case ("weather-station", _id):
196
+ fun = instruments.ws2nc
197
+ logging.info("Processing %s...", product)
198
+ fun(input_files, output_filepath, site_meta, date=args.date)
199
+ logging.info("Processed %s: %s", product, output_filepath)
200
+ return output_filepath
201
+
202
+
203
+ def _concatenate_(input_files: list[str], tmpdir: str) -> str:
204
+ if len(input_files) > 1:
205
+ concat_file = str(Path(tmpdir) / "tmp.nc")
206
+ try:
207
+ concat_lib.concatenate_files(input_files, concat_file)
208
+ except OSError:
209
+ concat_lib.concatenate_text_files(input_files, concat_file)
210
+ return concat_file
211
+ return input_files[0]
212
+
213
+
214
+ def _fetch_coefficient_files(calibration: dict, tmpdir: str) -> list:
215
+ if not (links := calibration.get("coefficientLinks")):
216
+ msg = "No calibration coefficients found"
217
+ raise ValueError(msg)
218
+ coefficient_paths = []
219
+ for filename in links:
220
+ res = requests.get(filename, timeout=60)
221
+ res.raise_for_status()
222
+ filepath = Path(tmpdir) / Path(filename).name
223
+ filepath.write_bytes(res.content)
224
+ coefficient_paths.append(str(filepath))
225
+ return coefficient_paths
226
+
227
+
228
+ def _get_calibration(instrument: Instrument, args) -> dict:
229
+ params = {
230
+ "date": args.date,
231
+ "instrumentPid": instrument.pid,
232
+ }
233
+ res = requests.get(
234
+ f"{cloudnet_api_url}calibration",
235
+ params=params,
236
+ timeout=60,
237
+ )
238
+ if res.status_code == 404:
239
+ return {}
240
+ return res.json().get("data", {})
241
+
242
+
243
+ def _create_instrument_filepath(
244
+ instrument: Instrument, args: argparse.Namespace
245
+ ) -> str:
246
+ folder = _create_output_folder("instrument", args)
247
+ pid = _shorten_pid(instrument.pid)
248
+ filename = f"{args.date.replace('-', '')}_{args.site}_{instrument.id}_{pid}.nc"
249
+ return str(folder / filename)
250
+
251
+
252
+ def _create_categorize_filepath(args: argparse.Namespace) -> str:
253
+ folder = _create_output_folder("geophysical", args)
254
+ filename = f"{args.date.replace('-', '')}_{args.site}_categorize.nc"
255
+ return str(folder / filename)
256
+
257
+
258
+ def _create_input_folder(end_point: str, args: argparse.Namespace) -> Path:
259
+ folder = args.input / args.site / args.date / end_point
260
+ folder.mkdir(parents=True, exist_ok=True)
261
+ return folder
262
+
263
+
264
+ def _create_output_folder(end_point: str, args: argparse.Namespace) -> Path:
265
+ folder = args.output / args.site / args.date / end_point
266
+ folder.mkdir(parents=True, exist_ok=True)
267
+ return folder
268
+
269
+
270
+ def _fetch_raw_meta(instruments: list[str], args: argparse.Namespace) -> list[dict]:
271
+ res = requests.get(
272
+ f"{cloudnet_api_url}raw-files/",
273
+ params={
274
+ "site": args.site,
275
+ "date": args.date,
276
+ "instrument": instruments,
277
+ "status": ["uploaded", "processed"],
278
+ },
279
+ timeout=60,
280
+ )
281
+ res.raise_for_status()
282
+ return res.json()
283
+
284
+
285
+ def _filter_by_instrument(meta: list[dict], instrument: Instrument) -> list[dict]:
286
+ return [m for m in meta if m["instrumentInfo"]["pid"] == instrument.pid]
287
+
288
+
289
+ def _filter_by_suffix(meta: list[dict], product: str) -> list[dict]:
290
+ if product == "radar":
291
+ meta = [m for m in meta if not m["filename"].lower().endswith(".lv0")]
292
+ elif product == "mwr":
293
+ meta = [
294
+ m for m in meta if re.search(r"\.(lwp|iwv)", m["filename"], re.IGNORECASE)
295
+ ]
296
+ elif product == "mwr-l1c":
297
+ meta = [m for m in meta if not m["filename"].lower().endswith(".nc")]
298
+ return meta
299
+
300
+
301
+ def _get_source_instruments(products: list[str]) -> dict[str, list[str]]:
302
+ source_instruments = {}
303
+ for product in products:
304
+ prod, model = _parse_instrument(product)
305
+ res = requests.get(f"{cloudnet_api_url}products/{prod}", timeout=60)
306
+ res.raise_for_status()
307
+ if sources := res.json().get("sourceInstruments", []):
308
+ source_instruments[prod] = [i["id"] for i in sources]
309
+ if match := [i for i in source_instruments[prod] if i == model]:
310
+ source_instruments[prod] = match
311
+ return source_instruments
312
+
313
+
314
+ def _get_product_sources(products: list[str]) -> dict[str, list[str]]:
315
+ source_products = {}
316
+ for product in products:
317
+ prod, _ = _parse_instrument(product)
318
+ res = requests.get(f"{cloudnet_api_url}products/{prod}", timeout=60)
319
+ res.raise_for_status()
320
+ if sources := res.json().get("sourceProducts", []):
321
+ source_products[prod] = [i["id"] for i in sources]
322
+ return source_products
323
+
324
+
325
+ def _parse_instrument(s: str) -> tuple[str, str | None]:
326
+ if "[" in s and s.endswith("]"):
327
+ name = s[: s.index("[")]
328
+ value = s[s.index("[") + 1 : -1]
329
+ else:
330
+ name = s
331
+ value = None
332
+ return name, value
333
+
334
+
335
+ def _select_instrument(meta: list[dict], product: str) -> Instrument | None:
336
+ instruments = _get_unique_instruments(meta)
337
+ if len(instruments) == 0:
338
+ logging.info("No instruments found")
339
+ return None
340
+ if len(instruments) > 1:
341
+ logging.info("Multiple instruments found for %s", product)
342
+ logging.info("Please specify which one to use")
343
+ for i, instrument in enumerate(instruments):
344
+ logging.info("%d: %s", i + 1, instrument.name)
345
+ ind = int(input("Select: ")) - 1
346
+ selected_instrument = instruments[ind]
347
+ else:
348
+ selected_instrument = instruments[0]
349
+ logging.info("Single instrument found: %s", selected_instrument.name)
350
+ return selected_instrument
351
+
352
+
353
+ def _get_unique_instruments(meta: list[dict]) -> list[Instrument]:
354
+ unique_pids = {m["instrumentInfo"]["pid"] for m in meta}
355
+ unique_instruments = []
356
+ for pid in unique_pids:
357
+ for m in meta:
358
+ if m["instrumentInfo"]["pid"] == pid:
359
+ i = m["instrumentInfo"]
360
+ unique_instruments.append(
361
+ Instrument(i["instrumentId"], i["pid"], i["name"])
362
+ )
363
+ break
364
+ return sorted(unique_instruments, key=lambda x: x.name)
365
+
366
+
367
+ def _fetch_product(
368
+ args: argparse.Namespace, product: str, source: str | None = None
369
+ ) -> str | None:
370
+ payload = {
371
+ "date": args.date,
372
+ "site": args.site,
373
+ "product": product,
374
+ }
375
+ url = f"{cloudnet_api_url}files"
376
+ res = requests.get(url, payload, timeout=60)
377
+ res.raise_for_status()
378
+ meta = res.json()
379
+ if source:
380
+ meta = [
381
+ m for m in meta if "instrument" in m and m["instrument"]["id"] == source
382
+ ]
383
+ if not meta:
384
+ logging.info("No data available for %s", product)
385
+ return None
386
+ if len(meta) > 1:
387
+ logging.info(
388
+ "Multiple files for %s ... taking the first but some logic needed", product
389
+ )
390
+ meta = meta[0]
391
+ suffix = "geophysical" if "geophysical" in meta["product"]["type"] else "instrument"
392
+ folder = _create_output_folder(suffix, args)
393
+ return _download_product_file(meta, folder)
394
+
395
+
396
+ def _fetch_model(args: argparse.Namespace) -> str | None:
397
+ payload = {
398
+ "date": args.date,
399
+ "site": args.site,
400
+ }
401
+ url = f"{cloudnet_api_url}model-files"
402
+ res = requests.get(url, payload, timeout=60)
403
+ res.raise_for_status()
404
+ meta = res.json()
405
+ if not meta:
406
+ logging.info("No model data available for this date")
407
+ return None
408
+ meta = meta[0]
409
+ folder = _create_output_folder("instrument", args)
410
+ return _download_product_file(meta, folder)
411
+
412
+
413
+ def _fetch_raw(metadata: list[dict], args: argparse.Namespace) -> list[str]:
414
+ pid = _shorten_pid(metadata[0]["instrumentInfo"]["pid"])
415
+ instrument = f"{metadata[0]['instrumentInfo']['instrumentId']}_{pid}"
416
+ folder = _create_input_folder(instrument, args)
417
+ filepaths = []
418
+ with ThreadPoolExecutor() as executor:
419
+ futures = [
420
+ executor.submit(_download_raw_file, meta, folder) for meta in metadata
421
+ ]
422
+ for future in as_completed(futures):
423
+ filepaths.append(future.result())
424
+ return filepaths
425
+
426
+
427
+ def _download_raw_file(meta: dict, folder: Path) -> str:
428
+ filepath = folder / meta["filename"]
429
+ possible_filepaths = [filepath]
430
+ if filepath.suffix == ".gz":
431
+ possible_filepaths.append(filepath.with_suffix(""))
432
+ for path in possible_filepaths:
433
+ if path.exists() and md5sum(path) == meta["checksum"]:
434
+ logging.info("Existing file found: %s", path)
435
+ return str(path)
436
+ logging.info("Downloading file: %s", filepath)
437
+ res = requests.get(meta["downloadUrl"], timeout=60)
438
+ res.raise_for_status()
439
+ filepath.write_bytes(res.content)
440
+ if filepath.suffix == ".gz":
441
+ filepath = _unzip_gz_file(filepath)
442
+ return str(filepath)
443
+
444
+
445
+ def _download_product_file(meta: dict, folder: Path) -> str:
446
+ filepath = folder / meta["filename"]
447
+ if filepath.exists():
448
+ logging.info("Existing file found: %s", filepath)
449
+ return str(filepath)
450
+ logging.info("Downloading file: %s", filepath)
451
+ res = requests.get(meta["downloadUrl"], timeout=60)
452
+ res.raise_for_status()
453
+ filepath.write_bytes(res.content)
454
+ return str(filepath)
455
+
456
+
457
+ def _unzip_gz_file(path_in: Path) -> Path:
458
+ if path_in.suffix != ".gz":
459
+ return path_in
460
+ path_out = path_in.with_suffix("")
461
+ logging.debug("Decompressing %s to %s", path_in, path_out)
462
+ with gzip.open(path_in, "rb") as file_in, open(path_out, "wb") as file_out:
463
+ shutil.copyfileobj(file_in, file_out)
464
+ path_in.unlink()
465
+ return path_out
466
+
467
+
468
+ def _read_site_meta(meta: list[dict]) -> dict:
469
+ return {
470
+ "latitude": meta[0]["site"]["latitude"],
471
+ "longitude": meta[0]["site"]["longitude"],
472
+ "altitude": meta[0]["site"]["altitude"],
473
+ "name": meta[0]["site"]["humanReadableName"],
474
+ }
475
+
476
+
477
+ def _shorten_pid(pid: str) -> str:
478
+ return pid.split(".")[-1][:8]
479
+
480
+
481
+ def _check_input(files: list) -> None:
482
+ if len(files) > 1:
483
+ msg = "Multiple input files found"
484
+ raise ValueError(msg)
485
+
486
+
487
+ def _plot(filepath: os.PathLike | str | None, product: str, args: argparse.Namespace):
488
+ if filepath is None or (not args.plot and not args.show):
489
+ return
490
+ res = requests.get(f"{cloudnet_api_url}products/variables", timeout=60)
491
+ res.raise_for_status()
492
+ variables = next(var["variables"] for var in res.json() if var["id"] == product)
493
+ variables = [var["id"].split("-")[-1] for var in variables]
494
+ image_name = str(filepath).replace(".nc", ".png") if args.plot else None
495
+ try:
496
+ generate_figure(
497
+ filepath,
498
+ variables,
499
+ show=args.show,
500
+ output_filename=image_name,
501
+ )
502
+ except PlottingError as e:
503
+ logging.info("Failed to plot %s: %s", product, e)
504
+ if args.plot:
505
+ logging.info("Plotted %s: %s", product, image_name)
506
+
507
+
508
+ def _process_cat_product(product: str, categorize_file: str) -> str:
509
+ output_file = categorize_file.replace("categorize", product)
510
+ module = importlib.import_module("cloudnetpy.products")
511
+ getattr(module, f"generate_{product}")(categorize_file, output_file)
512
+ logging.info("Processed %s: %s", product, output_file)
513
+ return output_file
514
+
515
+
516
+ def _process_mwrpy_product(
517
+ product: str, mwr_l1c_file: str, args: argparse.Namespace
518
+ ) -> str:
519
+ filename = f"{args.date}_{args.site}_{product}.nc"
520
+ output_file = _create_output_folder("geophysical", args) / filename
521
+ module = importlib.import_module("cloudnetpy.products")
522
+ getattr(module, f"generate_{product.replace('-','_')}")(mwr_l1c_file, output_file)
523
+ logging.info("Processed %s: %s", product, output_file)
524
+ return str(output_file)
525
+
526
+
527
+ def _fetch_cloudnet_sites() -> list[str]:
528
+ res = requests.get(f"{cloudnet_api_url}sites", timeout=60)
529
+ res.raise_for_status()
530
+ return [site["id"] for site in res.json()]
531
+
532
+
533
+ def _parse_products(product_argument: str) -> list[str]:
534
+ products = product_argument.split(",")
535
+ res = requests.get(f"{cloudnet_api_url}products", timeout=60)
536
+ res.raise_for_status()
537
+ valid_options = [p["id"] for p in res.json()]
538
+ valid_products = []
539
+ for product in products:
540
+ prod, _ = _parse_instrument(product)
541
+ if prod in valid_options:
542
+ valid_products.append(prod)
543
+ return valid_products
544
+
545
+
546
+ def main():
547
+ parser = argparse.ArgumentParser(
548
+ description="Command line interface for running CloudnetPy."
549
+ )
550
+ parser.add_argument(
551
+ "-s",
552
+ "--site",
553
+ type=str,
554
+ help="Site",
555
+ required=True,
556
+ choices=_fetch_cloudnet_sites(),
557
+ metavar="SITE",
558
+ )
559
+ parser.add_argument(
560
+ "-d", "--date", type=str, help="Date in YYYY-MM-DD", required=True
561
+ )
562
+ parser.add_argument(
563
+ "-p",
564
+ "--products",
565
+ type=_parse_products,
566
+ help=(
567
+ "Products to process, e.g. 'radar' or 'classification'. If the site "
568
+ "has many instruments, you can specify the instrument in brackets, "
569
+ "e.g. radar[mira-35]."
570
+ ),
571
+ required=True,
572
+ )
573
+ parser.add_argument("--input", type=Path, help="Input path", default="input/")
574
+ parser.add_argument("--output", type=Path, help="Output path", default="output/")
575
+ parser.add_argument(
576
+ "--plot",
577
+ help="Plot the processed data",
578
+ default=False,
579
+ action=argparse.BooleanOptionalAction,
580
+ )
581
+ parser.add_argument(
582
+ "--show",
583
+ help="Show plotted image",
584
+ default=False,
585
+ action=argparse.BooleanOptionalAction,
586
+ )
587
+ parser.add_argument(
588
+ "--dl",
589
+ help="Download raw data only",
590
+ default=False,
591
+ action=argparse.BooleanOptionalAction,
592
+ )
593
+ args = parser.parse_args()
594
+
595
+ logger = logging.getLogger()
596
+ logger.setLevel(logging.INFO)
597
+ handler = logging.StreamHandler()
598
+ formatter = logging.Formatter("%(levelname)s: %(message)s")
599
+ handler.setFormatter(formatter)
600
+ logger.handlers = [handler]
601
+
602
+ with TemporaryDirectory() as tmpdir:
603
+ run(args, tmpdir)
604
+
605
+
606
+ if __name__ == "__main__":
607
+ main()
cloudnetpy/concat_lib.py CHANGED
@@ -1,8 +1,12 @@
1
1
  """Module for concatenating netCDF files."""
2
2
 
3
+ import shutil
4
+ from os import PathLike
5
+
3
6
  import netCDF4
4
7
  import numpy as np
5
8
 
9
+ from cloudnetpy import utils
6
10
  from cloudnetpy.exceptions import InconsistentDataError
7
11
 
8
12
 
@@ -264,3 +268,63 @@ def _update_fields(
264
268
  nc_old.variables[field][idx, :] = nc_new.variables[field][valid_ind, :]
265
269
  elif len(dimensions) == 2 and concat_ind == 1:
266
270
  nc_old.variables[field][:, idx] = nc_new.variables[field][:, valid_ind]
271
+
272
+
273
+ def concatenate_text_files(filenames: list, output_filename: str | PathLike) -> None:
274
+ """Concatenates text files."""
275
+ with open(output_filename, "wb") as target:
276
+ for filename in filenames:
277
+ with open(filename, "rb") as source:
278
+ shutil.copyfileobj(source, target)
279
+
280
+
281
+ def bundle_netcdf_files(
282
+ files: list,
283
+ date: str,
284
+ output_file: str,
285
+ concat_dimensions: tuple[str, ...] = ("time", "profile"),
286
+ variables: list | None = None,
287
+ ) -> list:
288
+ """Concatenates several netcdf files into daily file with
289
+ some extra data manipulation.
290
+ """
291
+ with netCDF4.Dataset(files[0]) as nc:
292
+ concat_dimension = None
293
+ for key in concat_dimensions:
294
+ if key in nc.dimensions:
295
+ concat_dimension = key
296
+ break
297
+ if concat_dimension is None:
298
+ msg = f"Dimension '{concat_dimensions}' not found in the files."
299
+ raise KeyError(msg)
300
+ if len(files) == 1:
301
+ shutil.copy(files[0], output_file)
302
+ return files
303
+ valid_files = []
304
+ for file in files:
305
+ try:
306
+ with netCDF4.Dataset(file) as nc:
307
+ time = nc.variables["time"]
308
+ time_array = time[:]
309
+ time_units = time.units
310
+ except OSError:
311
+ continue
312
+ epoch = utils.get_epoch(time_units)
313
+ for timestamp in time_array:
314
+ if utils.seconds2date(timestamp, epoch)[:3] == date.split("-"):
315
+ valid_files.append(file)
316
+ break
317
+ concatenate_files(
318
+ valid_files,
319
+ output_file,
320
+ concat_dimension=concat_dimension,
321
+ variables=variables,
322
+ ignore=[
323
+ "minimum",
324
+ "maximum",
325
+ "number_integrated_samples",
326
+ "Min_LWP",
327
+ "Max_LWP",
328
+ ],
329
+ )
330
+ return valid_files
@@ -43,6 +43,8 @@ class PlotParameters:
43
43
  instruments and model).
44
44
  footer_text: The text to display in the footer of the plot.
45
45
  plot_meta: Additional metadata for the plot.
46
+ raise_on_empty: Whether to raise an error if no data is found for a
47
+ plotted variable.
46
48
  """
47
49
 
48
50
  dpi: float = 120
@@ -55,6 +57,7 @@ class PlotParameters:
55
57
  show_sources: bool = False
56
58
  footer_text: str | None = None
57
59
  plot_meta: PlotMeta | None = None
60
+ raise_on_empty: bool = False
58
61
 
59
62
 
60
63
  class Dimensions:
@@ -492,7 +495,7 @@ class Plot2D(Plot):
492
495
  smoothed_data = uniform_filter(self._data[valid_time_ind, :], sigma_units)
493
496
  self._data[valid_time_ind, :] = smoothed_data
494
497
 
495
- if self._data.mask.all():
498
+ if self._data.mask.all() and figure_data.options.raise_on_empty:
496
499
  msg = "All data is masked"
497
500
  raise PlottingError(msg)
498
501
 
@@ -603,7 +606,7 @@ class Plot1D(Plot):
603
606
  raise PlottingError(msg)
604
607
  self._data = self._data[:, freq_ind]
605
608
  self._data[np.isnan(self._data)] = ma.masked
606
- if self._data.mask.all():
609
+ if self._data.mask.all() and figure_data.options.raise_on_empty:
607
610
  msg = "All data is masked"
608
611
  raise PlottingError(msg)
609
612
  self._data_orig = self._data_orig[:, freq_ind]
cloudnetpy/utils.py CHANGED
@@ -1,6 +1,8 @@
1
1
  """This module contains general helper functions."""
2
2
 
3
+ import base64
3
4
  import datetime
5
+ import hashlib
4
6
  import logging
5
7
  import os
6
8
  import re
@@ -140,7 +142,7 @@ def rebin_2d(
140
142
  x_in: np.ndarray,
141
143
  array: ma.MaskedArray,
142
144
  x_new: np.ndarray,
143
- statistic: str = "mean",
145
+ statistic: Literal["mean", "std"] = "mean",
144
146
  n_min: int = 1,
145
147
  *,
146
148
  mask_zeros: bool = True,
@@ -178,14 +180,12 @@ def rebin_2d(
178
180
  masked_result = ma.array(result)
179
181
 
180
182
  # Fill bins with not enough profiles
181
- empty_indices = []
182
- for ind in range(len(edges) - 1):
183
- is_data = np.where((x_in > edges[ind]) & (x_in <= edges[ind + 1]))[0]
184
- if len(is_data) < n_min:
185
- masked_result[ind, :] = ma.masked
186
- empty_indices.append(ind)
183
+ x_hist, _ = np.histogram(x_in, bins=edges)
184
+ empty_mask = x_hist < n_min
185
+ masked_result[empty_mask, :] = ma.masked
186
+ empty_indices = list(np.nonzero(empty_mask)[0])
187
187
  if len(empty_indices) > 0:
188
- logging.debug("No radar data in %s bins", len(empty_indices))
188
+ logging.debug("No data in %s bins", len(empty_indices))
189
189
 
190
190
  return masked_result, empty_indices
191
191
 
@@ -1028,3 +1028,23 @@ def remove_masked_blocks(array: ma.MaskedArray, limit: int = 50) -> np.ndarray:
1028
1028
  mask = np.bincount(labeled_array) < limit
1029
1029
  mask[0] = True
1030
1030
  return mask[labeled_array]
1031
+
1032
+
1033
+ def sha256sum(filename: str | os.PathLike) -> str:
1034
+ """Calculates hash of file using sha-256."""
1035
+ return _calc_hash_sum(filename, "sha256", is_base64=False)
1036
+
1037
+
1038
+ def md5sum(filename: str | os.PathLike, *, is_base64: bool = False) -> str:
1039
+ """Calculates hash of file using md5."""
1040
+ return _calc_hash_sum(filename, "md5", is_base64=is_base64)
1041
+
1042
+
1043
+ def _calc_hash_sum(filename, method, *, is_base64: bool) -> str:
1044
+ hash_sum = getattr(hashlib, method)()
1045
+ with open(filename, "rb") as f:
1046
+ for byte_block in iter(lambda: f.read(4096), b""):
1047
+ hash_sum.update(byte_block)
1048
+ if is_base64:
1049
+ return base64.encodebytes(hash_sum.digest()).decode("utf-8").strip()
1050
+ return hash_sum.hexdigest()
cloudnetpy/version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  MAJOR = 1
2
2
  MINOR = 66
3
- PATCH = 5
3
+ PATCH = 7
4
4
  __version__ = f"{MAJOR}.{MINOR}.{PATCH}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloudnetpy
3
- Version: 1.66.5
3
+ Version: 1.66.7
4
4
  Summary: Python package for Cloudnet processing
5
5
  Author: Simo Tukiainen
6
6
  License: MIT License
@@ -1,14 +1,15 @@
1
1
  cloudnetpy/__init__.py,sha256=X_FqY-4yg5GUj5Edo14SToLEos6JIsC3fN-v1FUgQoA,43
2
+ cloudnetpy/cli.py,sha256=yucdLuNPkuSO9T8Kwi5KYY0M0e8cgzxFRTnyL5nbCbs,20779
2
3
  cloudnetpy/cloudnetarray.py,sha256=Ol1ha4RPAmFZANL__U5CaMKX4oYMXYR6OnjoCZ9w3eo,7077
3
- cloudnetpy/concat_lib.py,sha256=8Ek059RMLAfbbXCkX90cgnhw_8ZpcDrxw1yPvwtuitU,9846
4
+ cloudnetpy/concat_lib.py,sha256=QxGWGsA_6el3Ma5-1y2MtrgFCC9Ohpe3yo6EzrPAiRI,11773
4
5
  cloudnetpy/constants.py,sha256=RDB9aqpBRztk3QVCFgsmi9fwhtLuit_0WJrt0D6sDcc,736
5
6
  cloudnetpy/datasource.py,sha256=j7N4g59HPvOBWle-W9bOUF0BfRLgvR4zwOi_B50cI7Q,7921
6
7
  cloudnetpy/exceptions.py,sha256=ns48useL9RN3mPh7CqIiLA19VI9OmVbyRsKTkwbThF8,1760
7
8
  cloudnetpy/metadata.py,sha256=v_VDo2vbdTxB0zIsfP69IcrwSKiRlLpsGdq6JPI4CoA,5306
8
9
  cloudnetpy/output.py,sha256=YrWRBEZg0QNZRVnd9ziAziH-eJSh7O5JuWiH4ZxM0_s,15584
9
10
  cloudnetpy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- cloudnetpy/utils.py,sha256=RIqxZoB62JmMOOfYH6_fqpaudH8wqS9WDbpnR2HCcTM,29040
11
- cloudnetpy/version.py,sha256=RRwzP1UIktb7j48KlcmVNqYg-tOvF82DH-sZMh_TCHI,72
11
+ cloudnetpy/utils.py,sha256=uhSdfx1ha6AqYl-Rlvlf053lIF-UnzcIAjjnh3fbS6U,29725
12
+ cloudnetpy/version.py,sha256=ygpYycrewbHhKNGdgO-MC2aazPsrdzjDgRJgIzCuVEI,72
12
13
  cloudnetpy/categorize/__init__.py,sha256=s-SJaysvVpVVo5kidiruWQO6p3gv2TXwY1wEHYO5D6I,44
13
14
  cloudnetpy/categorize/atmos_utils.py,sha256=9-ymI6i1xASf-XAFyO87FaTfvq6bF89N1i_27OkUp-M,10104
14
15
  cloudnetpy/categorize/attenuation.py,sha256=Y_-fzmQTltWTqIZTulJhovC7a6ifpMcaAazDJcnMIOc,990
@@ -100,7 +101,7 @@ cloudnetpy/model_evaluation/tests/unit/test_statistical_methods.py,sha256=Ra3r4V
100
101
  cloudnetpy/model_evaluation/tests/unit/test_tools.py,sha256=Ia_VrLdV2NstX5gbx_3AZTOAlrgLAy_xFZ8fHYVX0xI,3817
101
102
  cloudnetpy/plotting/__init__.py,sha256=lg9Smn4BI0dVBgnDLC3JVJ4GmwoSnO-qoSd4ApvwV6Y,107
102
103
  cloudnetpy/plotting/plot_meta.py,sha256=ZvaKU3eXy1KFxQomnsEu3mCYpwwBYKAYk7oAwOzAGSg,16143
103
- cloudnetpy/plotting/plotting.py,sha256=5mLDRZKcpgO0V9fUdk_Xf1E8BVQGrVihiBdfifdWWPk,35208
104
+ cloudnetpy/plotting/plotting.py,sha256=RGtRMfrZ6wFsFCXZ540I18a7p_O0x3S7FGIAoyuyOxw,35425
104
105
  cloudnetpy/products/__init__.py,sha256=2hRb5HG9hNrxH1if5laJkLeFeaZCd5W1q3hh4ewsX0E,273
105
106
  cloudnetpy/products/classification.py,sha256=AKb9GCatvhS5KR0c9LfN96nUvzi02175ZCQlvMH1Dws,8077
106
107
  cloudnetpy/products/der.py,sha256=soypE7uSEP4uHUCCQVEhyXsKY6e9mzV9B_2S5GUizqk,12729
@@ -114,8 +115,9 @@ cloudnetpy/products/mie_lu_tables.nc,sha256=It4fYpqJXlqOgL8jeZ-PxGzP08PMrELIDVe5
114
115
  cloudnetpy/products/mwr_tools.py,sha256=rd7UC67O4fsIE5SaHVZ4qWvUJTj41ZGwgQWPwZzOM14,5377
115
116
  cloudnetpy/products/product_tools.py,sha256=01Zc6xV8CSuYcIcLpchFf5POL3_c629-YMNDZJ51udA,10853
116
117
  docs/source/conf.py,sha256=IKiFWw6xhUd8NrCg0q7l596Ck1d61XWeVjIFHVSG9Og,1490
117
- cloudnetpy-1.66.5.dist-info/LICENSE,sha256=wcZF72bdaoG9XugpyE95Juo7lBQOwLuTKBOhhtANZMM,1094
118
- cloudnetpy-1.66.5.dist-info/METADATA,sha256=Fn9VQMj4YeMVPSsIKg2zuBzgrU4E_YDzXh3pcW6R4k0,5784
119
- cloudnetpy-1.66.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
120
- cloudnetpy-1.66.5.dist-info/top_level.txt,sha256=ibSPWRr6ojS1i11rtBFz2_gkIe68mggj7aeswYfaOo0,16
121
- cloudnetpy-1.66.5.dist-info/RECORD,,
118
+ cloudnetpy-1.66.7.dist-info/LICENSE,sha256=wcZF72bdaoG9XugpyE95Juo7lBQOwLuTKBOhhtANZMM,1094
119
+ cloudnetpy-1.66.7.dist-info/METADATA,sha256=US5sPm8mcOCkypxYLuFgDxeOYh4rxMRp-3gdQDQbERw,5784
120
+ cloudnetpy-1.66.7.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
121
+ cloudnetpy-1.66.7.dist-info/entry_points.txt,sha256=HhY7LwCFk4qFgDlXx_Fy983ZTd831WlhtdPIzV-Y3dY,51
122
+ cloudnetpy-1.66.7.dist-info/top_level.txt,sha256=ibSPWRr6ojS1i11rtBFz2_gkIe68mggj7aeswYfaOo0,16
123
+ cloudnetpy-1.66.7.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cloudnetpy = cloudnetpy.cli:main