cloudnetpy 1.66.5__py3-none-any.whl → 1.66.6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
cloudnetpy/cli.py ADDED
@@ -0,0 +1,605 @@
1
+ import argparse
2
+ import gzip
3
+ import importlib
4
+ import logging
5
+ import os
6
+ import re
7
+ import shutil
8
+ from collections.abc import Generator
9
+ from concurrent.futures import ThreadPoolExecutor, as_completed
10
+ from dataclasses import dataclass
11
+ from pathlib import Path
12
+ from tempfile import TemporaryDirectory
13
+ from typing import TYPE_CHECKING, Final
14
+
15
+ import requests
16
+
17
+ from cloudnetpy import concat_lib, instruments
18
+ from cloudnetpy.categorize import generate_categorize
19
+ from cloudnetpy.exceptions import PlottingError
20
+ from cloudnetpy.plotting import generate_figure
21
+ from cloudnetpy.utils import md5sum
22
+
23
+ if TYPE_CHECKING:
24
+ from collections.abc import Callable
25
+
26
+
27
+ cloudnet_api_url: Final = "https://cloudnet.fmi.fi/api/"
28
+
29
+
30
+ @dataclass
31
+ class Instrument:
32
+ id: str
33
+ pid: str
34
+ name: str
35
+
36
+
37
+ def run(args: argparse.Namespace, tmpdir: str):
38
+ cat_files = {}
39
+
40
+ # Instrument based products
41
+ if source_instruments := _get_source_instruments(args.products):
42
+ for product, possible_instruments in source_instruments.items():
43
+ meta = _fetch_raw_meta(possible_instruments, args)
44
+ instrument = _select_instrument(meta, product)
45
+ if not instrument:
46
+ logging.info("No instrument found for %s", product)
47
+ continue
48
+ meta = _filter_by_instrument(meta, instrument)
49
+ meta = _filter_by_suffix(meta, product)
50
+ if not meta:
51
+ logging.info("No suitable data available for %s", product)
52
+ continue
53
+ output_filepath = _process_instrument_product(
54
+ product, meta, instrument, tmpdir, args
55
+ )
56
+ _plot(output_filepath, product, args)
57
+ cat_files[product] = output_filepath
58
+
59
+ prod_sources = _get_product_sources(args.products)
60
+
61
+ # Categorize based products
62
+ if "categorize" in args.products:
63
+ cat_filepath = _process_categorize(cat_files, args)
64
+ _plot(cat_filepath, "categorize", args)
65
+ else:
66
+ cat_filepath = None
67
+ cat_products = [p for p in prod_sources if "categorize" in prod_sources[p]]
68
+ for product in cat_products:
69
+ if cat_filepath is None:
70
+ cat_filepath = _fetch_product(args, "categorize")
71
+ if cat_filepath is None:
72
+ logging.info("No categorize data available for {}")
73
+ break
74
+ l2_filename = _process_cat_product(product, cat_filepath)
75
+ _plot(l2_filename, product, args)
76
+
77
+ # MWR-L1c based products
78
+ mwrpy_products = [p for p in prod_sources if "mwr-l1c" in prod_sources[p]]
79
+ for product in mwrpy_products:
80
+ if "mwr-l1c" in cat_files:
81
+ mwrpy_filepath = cat_files.get("mwr-l1c")
82
+ else:
83
+ mwrpy_filepath = _fetch_product(args, "mwr-l1c")
84
+ if mwrpy_filepath is None:
85
+ logging.info("No MWR-L1c data available for %s", product)
86
+ break
87
+ l2_filename = _process_mwrpy_product(product, mwrpy_filepath, args)
88
+ _plot(l2_filename, product, args)
89
+
90
+
91
+ def _process_categorize(input_files: dict, args: argparse.Namespace) -> str | None:
92
+ cat_filepath = _create_categorize_filepath(args)
93
+
94
+ input_files["model"] = _fetch_model(args)
95
+ if input_files["model"] is None:
96
+ logging.info("No model data available for this date.")
97
+ return None
98
+
99
+ for product in ("radar", "lidar", "disdrometer"):
100
+ if product not in input_files and (filepath := _fetch_product(args, product)):
101
+ input_files[product] = filepath
102
+
103
+ if mwr := _fetch_mwr(args):
104
+ input_files["mwr"] = mwr
105
+
106
+ try:
107
+ logging.info("Processing categorize...")
108
+ generate_categorize(input_files, cat_filepath)
109
+ logging.info("Processed categorize to %s", cat_filepath)
110
+ except NameError:
111
+ logging.info("No data available for this date.")
112
+ return None
113
+ return cat_filepath
114
+
115
+
116
+ def _fetch_mwr(args: argparse.Namespace) -> str | None:
117
+ mwr_sources = [
118
+ ("mwr-single", None),
119
+ ("mwr", None),
120
+ ("radar", "rpg-fmcw-35"),
121
+ ("radar", "rpg-fmcw-94"),
122
+ ]
123
+ for product, source in mwr_sources:
124
+ mwr = _fetch_product(args, product, source=source)
125
+ if mwr:
126
+ return mwr
127
+ return None
128
+
129
+
130
+ def _process_instrument_product(
131
+ product: str,
132
+ meta: list[dict],
133
+ instrument: Instrument,
134
+ tmpdir: str,
135
+ args: argparse.Namespace,
136
+ ) -> str | None:
137
+ output_filepath = _create_instrument_filepath(instrument, args)
138
+ site_meta = _read_site_meta(meta)
139
+ input_files: list[str] | str
140
+ input_files = _fetch_raw(meta, args)
141
+ if args.dl:
142
+ return None
143
+ input_folder = str(Path(input_files[0]).parent)
144
+ calibration = _get_calibration(instrument, args)
145
+ fun: Callable
146
+ match (product, instrument.id):
147
+ case ("radar", _id) if "mira" in _id:
148
+ fun = instruments.mira2nc
149
+ case ("radar", _id) if "rpg" in _id:
150
+ fun = instruments.rpg2nc
151
+ input_files = input_folder
152
+ case ("radar", _id) if "basta" in _id:
153
+ fun = instruments.basta2nc
154
+ _check_input(input_files)
155
+ input_files = input_files[0]
156
+ case ("radar", _id) if "copernicus" in _id:
157
+ fun = instruments.copernicus2nc
158
+ case ("radar", _id) if "galileo" in _id:
159
+ fun = instruments.galileo2nc
160
+ case ("disdrometer", _id) if "parsivel" in _id:
161
+ fun = instruments.parsivel2nc
162
+ case ("disdrometer", _id) if "thies" in _id:
163
+ fun = instruments.thies2nc
164
+ input_files = _concatenate_(input_files, tmpdir)
165
+ case ("lidar", _id) if "pollyxt" in _id:
166
+ fun = instruments.pollyxt2nc
167
+ case ("lidar", _id) if _id == "cl61d":
168
+ fun = instruments.ceilo2nc
169
+ variables = ["x_pol", "p_pol", "beta_att", "time", "tilt_angle"]
170
+ concat_file = str(Path(tmpdir) / "tmp.nc")
171
+ concat_lib.bundle_netcdf_files(
172
+ input_files,
173
+ args.date,
174
+ concat_file,
175
+ variables=variables,
176
+ )
177
+ input_files = concat_file
178
+ site_meta["model"] = instrument.id
179
+ case ("lidar", _id):
180
+ fun = instruments.ceilo2nc
181
+ input_files = _concatenate_(input_files, tmpdir)
182
+ site_meta["model"] = instrument.id
183
+ if factor := calibration.get("calibration_factor"):
184
+ site_meta["calibration_factor"] = factor
185
+ case ("mwr", _id):
186
+ fun = instruments.hatpro2nc
187
+ input_files = input_folder
188
+ case ("mwr-l1c", _id):
189
+ fun = instruments.hatpro2l1c
190
+ coefficients = _fetch_coefficient_files(calibration, tmpdir)
191
+ site_meta = {**site_meta, **calibration}
192
+ site_meta["coefficientFiles"] = coefficients
193
+ input_files = input_folder
194
+ case ("mrr", _id):
195
+ fun = instruments.mrr2nc
196
+ case ("weather-station", _id):
197
+ fun = instruments.ws2nc
198
+ logging.info("Processing %s...", product)
199
+ fun(input_files, output_filepath, site_meta, date=args.date)
200
+ logging.info("Processed %s: %s", product, output_filepath)
201
+ return output_filepath
202
+
203
+
204
+ def _concatenate_(input_files: list[str], tmpdir: str) -> str:
205
+ if len(input_files) > 1:
206
+ concat_file = str(Path(tmpdir) / "tmp.nc")
207
+ try:
208
+ concat_lib.concatenate_files(input_files, concat_file)
209
+ except OSError:
210
+ concat_lib.concatenate_text_files(input_files, concat_file)
211
+ return concat_file
212
+ return input_files[0]
213
+
214
+
215
+ def _fetch_coefficient_files(calibration: dict, tmpdir: str) -> list:
216
+ if not (links := calibration.get("coefficientLinks")):
217
+ msg = "No calibration coefficients found"
218
+ raise ValueError(msg)
219
+ coefficient_paths = []
220
+ for filename in links:
221
+ res = requests.get(filename, timeout=60)
222
+ res.raise_for_status()
223
+ filepath = Path(tmpdir) / Path(filename).name
224
+ filepath.write_bytes(res.content)
225
+ coefficient_paths.append(str(filepath))
226
+ return coefficient_paths
227
+
228
+
229
+ def _get_calibration(instrument: Instrument, args) -> dict:
230
+ params = {
231
+ "date": args.date,
232
+ "instrumentPid": instrument.pid,
233
+ }
234
+ res = requests.get(
235
+ f"{cloudnet_api_url}calibration",
236
+ params=params,
237
+ timeout=60,
238
+ )
239
+ if res.status_code == 404:
240
+ return {}
241
+ return res.json().get("data", {})
242
+
243
+
244
+ def _create_instrument_filepath(
245
+ instrument: Instrument, args: argparse.Namespace
246
+ ) -> str:
247
+ folder = _create_output_folder("instrument", args)
248
+ pid = _shorten_pid(instrument.pid)
249
+ filename = f"{args.date.replace('-', '')}_{args.site}_{instrument.id}_{pid}.nc"
250
+ return str(folder / filename)
251
+
252
+
253
+ def _create_categorize_filepath(args: argparse.Namespace) -> str:
254
+ folder = _create_output_folder("geophysical", args)
255
+ filename = f"{args.date.replace('-', '')}_{args.site}_categorize.nc"
256
+ return str(folder / filename)
257
+
258
+
259
+ def _create_input_folder(end_point: str, args: argparse.Namespace) -> Path:
260
+ folder = args.input / args.site / args.date / end_point
261
+ folder.mkdir(parents=True, exist_ok=True)
262
+ return folder
263
+
264
+
265
+ def _create_output_folder(end_point: str, args: argparse.Namespace) -> Path:
266
+ folder = args.output / args.site / args.date / end_point
267
+ folder.mkdir(parents=True, exist_ok=True)
268
+ return folder
269
+
270
+
271
+ def _fetch_raw_meta(instruments: list[str], args: argparse.Namespace) -> list[dict]:
272
+ res = requests.get(
273
+ f"{cloudnet_api_url}raw-files/",
274
+ params={
275
+ "site": args.site,
276
+ "date": args.date,
277
+ "instrument": instruments,
278
+ "status": ["uploaded", "processed"],
279
+ },
280
+ timeout=60,
281
+ )
282
+ res.raise_for_status()
283
+ return res.json()
284
+
285
+
286
+ def _filter_by_instrument(meta: list[dict], instrument: Instrument) -> list[dict]:
287
+ return [m for m in meta if m["instrumentInfo"]["pid"] == instrument.pid]
288
+
289
+
290
+ def _filter_by_suffix(meta: list[dict], product: str) -> list[dict]:
291
+ if product == "radar":
292
+ meta = [m for m in meta if not m["filename"].lower().endswith(".lv0")]
293
+ elif product == "mwr":
294
+ meta = [
295
+ m for m in meta if re.search(r"\.(lwp|iwv)", m["filename"], re.IGNORECASE)
296
+ ]
297
+ elif product == "mwr-l1c":
298
+ meta = [m for m in meta if not m["filename"].lower().endswith(".nc")]
299
+ return meta
300
+
301
+
302
+ def _get_source_instruments(products: list[str]) -> dict[str, list[str]]:
303
+ source_instruments = {}
304
+ for product in products:
305
+ prod, model = _parse_instrument(product)
306
+ res = requests.get(f"{cloudnet_api_url}products/{prod}", timeout=60)
307
+ res.raise_for_status()
308
+ if sources := res.json().get("sourceInstruments", []):
309
+ source_instruments[prod] = [i["id"] for i in sources]
310
+ if match := [i for i in source_instruments[prod] if i == model]:
311
+ source_instruments[prod] = match
312
+ return source_instruments
313
+
314
+
315
+ def _get_product_sources(products: list[str]) -> dict[str, list[str]]:
316
+ source_products = {}
317
+ for product in products:
318
+ prod, _ = _parse_instrument(product)
319
+ res = requests.get(f"{cloudnet_api_url}products/{prod}", timeout=60)
320
+ res.raise_for_status()
321
+ if sources := res.json().get("sourceProducts", []):
322
+ source_products[prod] = [i["id"] for i in sources]
323
+ return source_products
324
+
325
+
326
+ def _parse_instrument(s: str) -> tuple[str, str | None]:
327
+ if "[" in s and s.endswith("]"):
328
+ name = s[: s.index("[")]
329
+ value = s[s.index("[") + 1 : -1]
330
+ else:
331
+ name = s
332
+ value = None
333
+ return name, value
334
+
335
+
336
+ def _select_instrument(meta: list[dict], product: str) -> Instrument | None:
337
+ instruments = _get_unique_instruments(meta)
338
+ if len(instruments) == 0:
339
+ logging.info("No instruments found")
340
+ return None
341
+ if len(instruments) > 1:
342
+ logging.info("Multiple instruments found for %s", product)
343
+ logging.info("Please specify which one to use")
344
+ for i, instrument in enumerate(instruments):
345
+ logging.info("%d: %s", i + 1, instrument.name)
346
+ ind = int(input("Select: ")) - 1
347
+ selected_instrument = instruments[ind]
348
+ else:
349
+ selected_instrument = instruments[0]
350
+ logging.info("Single instrument found: %s", selected_instrument.name)
351
+ return selected_instrument
352
+
353
+
354
+ def _get_unique_instruments(meta: list[dict]) -> list[Instrument]:
355
+ unique_pids = {m["instrumentInfo"]["pid"] for m in meta}
356
+ unique_instruments = []
357
+ for pid in unique_pids:
358
+ for m in meta:
359
+ if m["instrumentInfo"]["pid"] == pid:
360
+ i = m["instrumentInfo"]
361
+ unique_instruments.append(
362
+ Instrument(i["instrumentId"], i["pid"], i["name"])
363
+ )
364
+ break
365
+ return sorted(unique_instruments, key=lambda x: x.name)
366
+
367
+
368
+ def _fetch_product(
369
+ args: argparse.Namespace, product: str, source: str | None = None
370
+ ) -> str | None:
371
+ payload = {
372
+ "date": args.date,
373
+ "site": args.site,
374
+ "product": product,
375
+ }
376
+ url = f"{cloudnet_api_url}files"
377
+ res = requests.get(url, payload, timeout=60)
378
+ res.raise_for_status()
379
+ meta = res.json()
380
+ if source:
381
+ meta = [
382
+ m for m in meta if "instrument" in m and m["instrument"]["id"] == source
383
+ ]
384
+ if not meta:
385
+ logging.info("No data available for %s", product)
386
+ return None
387
+ if len(meta) > 1:
388
+ logging.info(
389
+ "Multiple files for %s ... taking the first but some logic needed", product
390
+ )
391
+ meta = meta[0]
392
+ suffix = "geophysical" if "geophysical" in meta["product"]["type"] else "instrument"
393
+ folder = _create_output_folder(suffix, args)
394
+ return _download_product_file(meta, folder)
395
+
396
+
397
+ def _fetch_model(args: argparse.Namespace) -> str | None:
398
+ payload = {
399
+ "date": args.date,
400
+ "site": args.site,
401
+ }
402
+ url = f"{cloudnet_api_url}model-files"
403
+ res = requests.get(url, payload, timeout=60)
404
+ res.raise_for_status()
405
+ meta = res.json()
406
+ if not meta:
407
+ logging.info("No model data available for this date")
408
+ return None
409
+ meta = meta[0]
410
+ folder = _create_output_folder("instrument", args)
411
+ return _download_product_file(meta, folder)
412
+
413
+
414
+ def _fetch_raw(metadata: list[dict], args: argparse.Namespace) -> list[str]:
415
+ pid = _shorten_pid(metadata[0]["instrumentInfo"]["pid"])
416
+ instrument = f"{metadata[0]['instrumentInfo']['instrumentId']}_{pid}"
417
+ folder = _create_input_folder(instrument, args)
418
+ filepaths = []
419
+ with ThreadPoolExecutor() as executor:
420
+ futures = [
421
+ executor.submit(_download_raw_file, meta, folder) for meta in metadata
422
+ ]
423
+ for future in as_completed(futures):
424
+ filepaths.append(future.result())
425
+ return filepaths
426
+
427
+
428
+ def _download_raw_file(meta: dict, folder: Path) -> str:
429
+ filepath = folder / meta["filename"]
430
+ possible_filepaths = [filepath]
431
+ if filepath.suffix == ".gz":
432
+ possible_filepaths.append(filepath.with_suffix(""))
433
+ for path in possible_filepaths:
434
+ if path.exists() and md5sum(path) == meta["checksum"]:
435
+ logging.info("Existing file found: %s", path)
436
+ return str(path)
437
+ logging.info("Downloading file: %s", filepath)
438
+ res = requests.get(meta["downloadUrl"], timeout=60)
439
+ res.raise_for_status()
440
+ filepath.write_bytes(res.content)
441
+ if filepath.suffix == ".gz":
442
+ filepath = _unzip_gz_file(filepath)
443
+ return str(filepath)
444
+
445
+
446
+ def _download_product_file(meta: dict, folder: Path) -> str:
447
+ filepath = folder / meta["filename"]
448
+ if filepath.exists():
449
+ logging.info("Existing file found: %s", filepath)
450
+ return str(filepath)
451
+ logging.info("Downloading file: %s", filepath)
452
+ res = requests.get(meta["downloadUrl"], timeout=60)
453
+ res.raise_for_status()
454
+ filepath.write_bytes(res.content)
455
+ return str(filepath)
456
+
457
+
458
+ def _unzip_gz_file(path_in: Path) -> Path:
459
+ if path_in.suffix != ".gz":
460
+ return path_in
461
+ path_out = path_in.with_suffix("")
462
+ logging.debug("Decompressing %s to %s", path_in, path_out)
463
+ with gzip.open(path_in, "rb") as file_in, open(path_out, "wb") as file_out:
464
+ shutil.copyfileobj(file_in, file_out)
465
+ path_in.unlink()
466
+ return path_out
467
+
468
+
469
+ def _read_site_meta(meta: list[dict]) -> dict:
470
+ return {
471
+ "latitude": meta[0]["site"]["latitude"],
472
+ "longitude": meta[0]["site"]["longitude"],
473
+ "altitude": meta[0]["site"]["altitude"],
474
+ "name": meta[0]["site"]["humanReadableName"],
475
+ }
476
+
477
+
478
+ def _shorten_pid(pid: str) -> str:
479
+ return pid.split(".")[-1][:8]
480
+
481
+
482
+ def _check_input(files: list) -> None:
483
+ if len(files) > 1:
484
+ msg = "Multiple input files found"
485
+ raise ValueError(msg)
486
+
487
+
488
+ def _plot(filepath: os.PathLike | str | None, product: str, args: argparse.Namespace):
489
+ if filepath is None or (not args.plot and not args.show):
490
+ return
491
+ res = requests.get(f"{cloudnet_api_url}products/variables", timeout=60)
492
+ res.raise_for_status()
493
+ variables = next(var["variables"] for var in res.json() if var["id"] == product)
494
+ variables = [var["id"].split("-")[-1] for var in variables]
495
+ image_name = str(filepath).replace(".nc", ".png") if args.plot else None
496
+ try:
497
+ generate_figure(
498
+ filepath,
499
+ variables,
500
+ show=args.show,
501
+ output_filename=image_name,
502
+ )
503
+ except PlottingError as e:
504
+ logging.info("Failed to plot %s: %s", product, e)
505
+ if args.plot:
506
+ logging.info("Plotted %s: %s", product, image_name)
507
+
508
+
509
+ def _process_cat_product(product: str, categorize_file: str) -> str:
510
+ output_file = categorize_file.replace("categorize", product)
511
+ module = importlib.import_module("cloudnetpy.products")
512
+ getattr(module, f"generate_{product}")(categorize_file, output_file)
513
+ logging.info("Processed %s: %s", product, output_file)
514
+ return output_file
515
+
516
+
517
+ def _process_mwrpy_product(
518
+ product: str, mwr_l1c_file: str, args: argparse.Namespace
519
+ ) -> str:
520
+ filename = f"{args.date}_{args.site}_{product}.nc"
521
+ output_file = _create_output_folder("geophysical", args) / filename
522
+ module = importlib.import_module("cloudnetpy.products")
523
+ getattr(module, f"generate_{product.replace('-','_')}")(mwr_l1c_file, output_file)
524
+ logging.info("Processed %s: %s", product, output_file)
525
+ return str(output_file)
526
+
527
+
528
+ def _fetch_cloudnet_sites() -> list[str]:
529
+ res = requests.get(f"{cloudnet_api_url}sites", timeout=60)
530
+ res.raise_for_status()
531
+ return [site["id"] for site in res.json()]
532
+
533
+
534
+ def _parse_products(product_argument: str) -> Generator:
535
+ products = product_argument.split(",")
536
+ res = requests.get(f"{cloudnet_api_url}products", timeout=60)
537
+ res.raise_for_status()
538
+ valid_options = [p["id"] for p in res.json()]
539
+ for product in products:
540
+ prod, _ = _parse_instrument(product)
541
+ if prod in valid_options:
542
+ yield product
543
+
544
+
545
+ def main():
546
+ parser = argparse.ArgumentParser(
547
+ description="Command line interface for running CloudnetPy."
548
+ )
549
+ parser.add_argument(
550
+ "-s",
551
+ "--site",
552
+ type=str,
553
+ help="Site",
554
+ required=True,
555
+ choices=_fetch_cloudnet_sites(),
556
+ )
557
+ parser.add_argument(
558
+ "-d", "--date", type=str, help="Date in YYYY-MM-DD", required=True
559
+ )
560
+ parser.add_argument(
561
+ "-p",
562
+ "--products",
563
+ type=_parse_products,
564
+ help=(
565
+ "Products to process, e.g. 'radar' or 'classification'. If the site "
566
+ "has many instruments, you can specify the instrument in brackets, "
567
+ "e.g. radar[mira-35]."
568
+ ),
569
+ required=True,
570
+ )
571
+ parser.add_argument("--input", type=Path, help="Input path", default="input/")
572
+ parser.add_argument("--output", type=Path, help="Output path", default="output/")
573
+ parser.add_argument(
574
+ "--plot",
575
+ help="Plot the processed data",
576
+ default=False,
577
+ action=argparse.BooleanOptionalAction,
578
+ )
579
+ parser.add_argument(
580
+ "--show",
581
+ help="Show plotted image",
582
+ default=False,
583
+ action=argparse.BooleanOptionalAction,
584
+ )
585
+ parser.add_argument(
586
+ "--dl",
587
+ help="Download raw data only",
588
+ default=False,
589
+ action=argparse.BooleanOptionalAction,
590
+ )
591
+ args = parser.parse_args()
592
+
593
+ logger = logging.getLogger()
594
+ logger.setLevel(logging.INFO)
595
+ handler = logging.StreamHandler()
596
+ formatter = logging.Formatter("%(levelname)s: %(message)s")
597
+ handler.setFormatter(formatter)
598
+ logger.handlers = [handler]
599
+
600
+ with TemporaryDirectory() as tmpdir:
601
+ run(args, tmpdir)
602
+
603
+
604
+ if __name__ == "__main__":
605
+ main()
cloudnetpy/concat_lib.py CHANGED
@@ -1,8 +1,12 @@
1
1
  """Module for concatenating netCDF files."""
2
2
 
3
+ import shutil
4
+ from os import PathLike
5
+
3
6
  import netCDF4
4
7
  import numpy as np
5
8
 
9
+ from cloudnetpy import utils
6
10
  from cloudnetpy.exceptions import InconsistentDataError
7
11
 
8
12
 
@@ -264,3 +268,63 @@ def _update_fields(
264
268
  nc_old.variables[field][idx, :] = nc_new.variables[field][valid_ind, :]
265
269
  elif len(dimensions) == 2 and concat_ind == 1:
266
270
  nc_old.variables[field][:, idx] = nc_new.variables[field][:, valid_ind]
271
+
272
+
273
+ def concatenate_text_files(filenames: list, output_filename: str | PathLike) -> None:
274
+ """Concatenates text files."""
275
+ with open(output_filename, "wb") as target:
276
+ for filename in filenames:
277
+ with open(filename, "rb") as source:
278
+ shutil.copyfileobj(source, target)
279
+
280
+
281
+ def bundle_netcdf_files(
282
+ files: list,
283
+ date: str,
284
+ output_file: str,
285
+ concat_dimensions: tuple[str, ...] = ("time", "profile"),
286
+ variables: list | None = None,
287
+ ) -> list:
288
+ """Concatenates several netcdf files into daily file with
289
+ some extra data manipulation.
290
+ """
291
+ with netCDF4.Dataset(files[0]) as nc:
292
+ concat_dimension = None
293
+ for key in concat_dimensions:
294
+ if key in nc.dimensions:
295
+ concat_dimension = key
296
+ break
297
+ if concat_dimension is None:
298
+ msg = f"Dimension '{concat_dimensions}' not found in the files."
299
+ raise KeyError(msg)
300
+ if len(files) == 1:
301
+ shutil.copy(files[0], output_file)
302
+ return files
303
+ valid_files = []
304
+ for file in files:
305
+ try:
306
+ with netCDF4.Dataset(file) as nc:
307
+ time = nc.variables["time"]
308
+ time_array = time[:]
309
+ time_units = time.units
310
+ except OSError:
311
+ continue
312
+ epoch = utils.get_epoch(time_units)
313
+ for timestamp in time_array:
314
+ if utils.seconds2date(timestamp, epoch)[:3] == date.split("-"):
315
+ valid_files.append(file)
316
+ break
317
+ concatenate_files(
318
+ valid_files,
319
+ output_file,
320
+ concat_dimension=concat_dimension,
321
+ variables=variables,
322
+ ignore=[
323
+ "minimum",
324
+ "maximum",
325
+ "number_integrated_samples",
326
+ "Min_LWP",
327
+ "Max_LWP",
328
+ ],
329
+ )
330
+ return valid_files
@@ -43,6 +43,8 @@ class PlotParameters:
43
43
  instruments and model).
44
44
  footer_text: The text to display in the footer of the plot.
45
45
  plot_meta: Additional metadata for the plot.
46
+ raise_on_empty: Whether to raise an error if no data is found for a
47
+ plotted variable.
46
48
  """
47
49
 
48
50
  dpi: float = 120
@@ -55,6 +57,7 @@ class PlotParameters:
55
57
  show_sources: bool = False
56
58
  footer_text: str | None = None
57
59
  plot_meta: PlotMeta | None = None
60
+ raise_on_empty: bool = False
58
61
 
59
62
 
60
63
  class Dimensions:
@@ -492,7 +495,7 @@ class Plot2D(Plot):
492
495
  smoothed_data = uniform_filter(self._data[valid_time_ind, :], sigma_units)
493
496
  self._data[valid_time_ind, :] = smoothed_data
494
497
 
495
- if self._data.mask.all():
498
+ if self._data.mask.all() and figure_data.options.raise_on_empty:
496
499
  msg = "All data is masked"
497
500
  raise PlottingError(msg)
498
501
 
@@ -603,7 +606,7 @@ class Plot1D(Plot):
603
606
  raise PlottingError(msg)
604
607
  self._data = self._data[:, freq_ind]
605
608
  self._data[np.isnan(self._data)] = ma.masked
606
- if self._data.mask.all():
609
+ if self._data.mask.all() and figure_data.options.raise_on_empty:
607
610
  msg = "All data is masked"
608
611
  raise PlottingError(msg)
609
612
  self._data_orig = self._data_orig[:, freq_ind]
cloudnetpy/utils.py CHANGED
@@ -1,6 +1,8 @@
1
1
  """This module contains general helper functions."""
2
2
 
3
+ import base64
3
4
  import datetime
5
+ import hashlib
4
6
  import logging
5
7
  import os
6
8
  import re
@@ -140,7 +142,7 @@ def rebin_2d(
140
142
  x_in: np.ndarray,
141
143
  array: ma.MaskedArray,
142
144
  x_new: np.ndarray,
143
- statistic: str = "mean",
145
+ statistic: Literal["mean", "std"] = "mean",
144
146
  n_min: int = 1,
145
147
  *,
146
148
  mask_zeros: bool = True,
@@ -178,14 +180,12 @@ def rebin_2d(
178
180
  masked_result = ma.array(result)
179
181
 
180
182
  # Fill bins with not enough profiles
181
- empty_indices = []
182
- for ind in range(len(edges) - 1):
183
- is_data = np.where((x_in > edges[ind]) & (x_in <= edges[ind + 1]))[0]
184
- if len(is_data) < n_min:
185
- masked_result[ind, :] = ma.masked
186
- empty_indices.append(ind)
183
+ x_hist, _ = np.histogram(x_in, bins=edges)
184
+ empty_mask = x_hist < n_min
185
+ masked_result[empty_mask, :] = ma.masked
186
+ empty_indices = list(np.nonzero(empty_mask)[0])
187
187
  if len(empty_indices) > 0:
188
- logging.debug("No radar data in %s bins", len(empty_indices))
188
+ logging.debug("No data in %s bins", len(empty_indices))
189
189
 
190
190
  return masked_result, empty_indices
191
191
 
@@ -1028,3 +1028,23 @@ def remove_masked_blocks(array: ma.MaskedArray, limit: int = 50) -> np.ndarray:
1028
1028
  mask = np.bincount(labeled_array) < limit
1029
1029
  mask[0] = True
1030
1030
  return mask[labeled_array]
1031
+
1032
+
1033
+ def sha256sum(filename: str | os.PathLike) -> str:
1034
+ """Calculates hash of file using sha-256."""
1035
+ return _calc_hash_sum(filename, "sha256", is_base64=False)
1036
+
1037
+
1038
+ def md5sum(filename: str | os.PathLike, *, is_base64: bool = False) -> str:
1039
+ """Calculates hash of file using md5."""
1040
+ return _calc_hash_sum(filename, "md5", is_base64=is_base64)
1041
+
1042
+
1043
+ def _calc_hash_sum(filename, method, *, is_base64: bool) -> str:
1044
+ hash_sum = getattr(hashlib, method)()
1045
+ with open(filename, "rb") as f:
1046
+ for byte_block in iter(lambda: f.read(4096), b""):
1047
+ hash_sum.update(byte_block)
1048
+ if is_base64:
1049
+ return base64.encodebytes(hash_sum.digest()).decode("utf-8").strip()
1050
+ return hash_sum.hexdigest()
cloudnetpy/version.py CHANGED
@@ -1,4 +1,4 @@
1
1
  MAJOR = 1
2
2
  MINOR = 66
3
- PATCH = 5
3
+ PATCH = 6
4
4
  __version__ = f"{MAJOR}.{MINOR}.{PATCH}"
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: cloudnetpy
3
- Version: 1.66.5
3
+ Version: 1.66.6
4
4
  Summary: Python package for Cloudnet processing
5
5
  Author: Simo Tukiainen
6
6
  License: MIT License
@@ -1,14 +1,15 @@
1
1
  cloudnetpy/__init__.py,sha256=X_FqY-4yg5GUj5Edo14SToLEos6JIsC3fN-v1FUgQoA,43
2
+ cloudnetpy/cli.py,sha256=KXFguR4nnVXRr4VV8htI2IKcqqQ7LZ7eZwSHj-uG_3w,20729
2
3
  cloudnetpy/cloudnetarray.py,sha256=Ol1ha4RPAmFZANL__U5CaMKX4oYMXYR6OnjoCZ9w3eo,7077
3
- cloudnetpy/concat_lib.py,sha256=8Ek059RMLAfbbXCkX90cgnhw_8ZpcDrxw1yPvwtuitU,9846
4
+ cloudnetpy/concat_lib.py,sha256=QxGWGsA_6el3Ma5-1y2MtrgFCC9Ohpe3yo6EzrPAiRI,11773
4
5
  cloudnetpy/constants.py,sha256=RDB9aqpBRztk3QVCFgsmi9fwhtLuit_0WJrt0D6sDcc,736
5
6
  cloudnetpy/datasource.py,sha256=j7N4g59HPvOBWle-W9bOUF0BfRLgvR4zwOi_B50cI7Q,7921
6
7
  cloudnetpy/exceptions.py,sha256=ns48useL9RN3mPh7CqIiLA19VI9OmVbyRsKTkwbThF8,1760
7
8
  cloudnetpy/metadata.py,sha256=v_VDo2vbdTxB0zIsfP69IcrwSKiRlLpsGdq6JPI4CoA,5306
8
9
  cloudnetpy/output.py,sha256=YrWRBEZg0QNZRVnd9ziAziH-eJSh7O5JuWiH4ZxM0_s,15584
9
10
  cloudnetpy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
- cloudnetpy/utils.py,sha256=RIqxZoB62JmMOOfYH6_fqpaudH8wqS9WDbpnR2HCcTM,29040
11
- cloudnetpy/version.py,sha256=RRwzP1UIktb7j48KlcmVNqYg-tOvF82DH-sZMh_TCHI,72
11
+ cloudnetpy/utils.py,sha256=uhSdfx1ha6AqYl-Rlvlf053lIF-UnzcIAjjnh3fbS6U,29725
12
+ cloudnetpy/version.py,sha256=FiFGNpvgmpe5OMB-IflyE51XCkEqV1mD8JLRmF514Hg,72
12
13
  cloudnetpy/categorize/__init__.py,sha256=s-SJaysvVpVVo5kidiruWQO6p3gv2TXwY1wEHYO5D6I,44
13
14
  cloudnetpy/categorize/atmos_utils.py,sha256=9-ymI6i1xASf-XAFyO87FaTfvq6bF89N1i_27OkUp-M,10104
14
15
  cloudnetpy/categorize/attenuation.py,sha256=Y_-fzmQTltWTqIZTulJhovC7a6ifpMcaAazDJcnMIOc,990
@@ -100,7 +101,7 @@ cloudnetpy/model_evaluation/tests/unit/test_statistical_methods.py,sha256=Ra3r4V
100
101
  cloudnetpy/model_evaluation/tests/unit/test_tools.py,sha256=Ia_VrLdV2NstX5gbx_3AZTOAlrgLAy_xFZ8fHYVX0xI,3817
101
102
  cloudnetpy/plotting/__init__.py,sha256=lg9Smn4BI0dVBgnDLC3JVJ4GmwoSnO-qoSd4ApvwV6Y,107
102
103
  cloudnetpy/plotting/plot_meta.py,sha256=ZvaKU3eXy1KFxQomnsEu3mCYpwwBYKAYk7oAwOzAGSg,16143
103
- cloudnetpy/plotting/plotting.py,sha256=5mLDRZKcpgO0V9fUdk_Xf1E8BVQGrVihiBdfifdWWPk,35208
104
+ cloudnetpy/plotting/plotting.py,sha256=RGtRMfrZ6wFsFCXZ540I18a7p_O0x3S7FGIAoyuyOxw,35425
104
105
  cloudnetpy/products/__init__.py,sha256=2hRb5HG9hNrxH1if5laJkLeFeaZCd5W1q3hh4ewsX0E,273
105
106
  cloudnetpy/products/classification.py,sha256=AKb9GCatvhS5KR0c9LfN96nUvzi02175ZCQlvMH1Dws,8077
106
107
  cloudnetpy/products/der.py,sha256=soypE7uSEP4uHUCCQVEhyXsKY6e9mzV9B_2S5GUizqk,12729
@@ -114,8 +115,9 @@ cloudnetpy/products/mie_lu_tables.nc,sha256=It4fYpqJXlqOgL8jeZ-PxGzP08PMrELIDVe5
114
115
  cloudnetpy/products/mwr_tools.py,sha256=rd7UC67O4fsIE5SaHVZ4qWvUJTj41ZGwgQWPwZzOM14,5377
115
116
  cloudnetpy/products/product_tools.py,sha256=01Zc6xV8CSuYcIcLpchFf5POL3_c629-YMNDZJ51udA,10853
116
117
  docs/source/conf.py,sha256=IKiFWw6xhUd8NrCg0q7l596Ck1d61XWeVjIFHVSG9Og,1490
117
- cloudnetpy-1.66.5.dist-info/LICENSE,sha256=wcZF72bdaoG9XugpyE95Juo7lBQOwLuTKBOhhtANZMM,1094
118
- cloudnetpy-1.66.5.dist-info/METADATA,sha256=Fn9VQMj4YeMVPSsIKg2zuBzgrU4E_YDzXh3pcW6R4k0,5784
119
- cloudnetpy-1.66.5.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
120
- cloudnetpy-1.66.5.dist-info/top_level.txt,sha256=ibSPWRr6ojS1i11rtBFz2_gkIe68mggj7aeswYfaOo0,16
121
- cloudnetpy-1.66.5.dist-info/RECORD,,
118
+ cloudnetpy-1.66.6.dist-info/LICENSE,sha256=wcZF72bdaoG9XugpyE95Juo7lBQOwLuTKBOhhtANZMM,1094
119
+ cloudnetpy-1.66.6.dist-info/METADATA,sha256=20TMauFPuw3zoVxRuuqG5sjtL5Sh1VUiO96s_4M4bro,5784
120
+ cloudnetpy-1.66.6.dist-info/WHEEL,sha256=OVMc5UfuAQiSplgO0_WdW7vXVGAt9Hdd6qtN4HotdyA,91
121
+ cloudnetpy-1.66.6.dist-info/entry_points.txt,sha256=HhY7LwCFk4qFgDlXx_Fy983ZTd831WlhtdPIzV-Y3dY,51
122
+ cloudnetpy-1.66.6.dist-info/top_level.txt,sha256=ibSPWRr6ojS1i11rtBFz2_gkIe68mggj7aeswYfaOo0,16
123
+ cloudnetpy-1.66.6.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (75.1.0)
2
+ Generator: setuptools (75.2.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -0,0 +1,2 @@
1
+ [console_scripts]
2
+ cloudnetpy = cloudnetpy.cli:main