pypromice 1.3.6__py3-none-any.whl → 1.4.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pypromice might be problematic. Click here for more details.

Files changed (53) hide show
  1. pypromice/postprocess/bufr_to_csv.py +15 -3
  2. pypromice/postprocess/bufr_utilities.py +91 -18
  3. pypromice/postprocess/create_bufr_files.py +178 -0
  4. pypromice/postprocess/get_bufr.py +248 -397
  5. pypromice/postprocess/make_metadata_csv.py +214 -0
  6. pypromice/postprocess/real_time_utilities.py +41 -11
  7. pypromice/process/L0toL1.py +12 -5
  8. pypromice/process/L1toL2.py +69 -14
  9. pypromice/process/L2toL3.py +1034 -186
  10. pypromice/process/aws.py +139 -808
  11. pypromice/process/get_l2.py +90 -0
  12. pypromice/process/get_l2tol3.py +111 -0
  13. pypromice/process/join_l2.py +112 -0
  14. pypromice/process/join_l3.py +551 -120
  15. pypromice/process/load.py +161 -0
  16. pypromice/process/resample.py +147 -0
  17. pypromice/process/utilities.py +68 -0
  18. pypromice/process/write.py +503 -0
  19. pypromice/qc/github_data_issues.py +10 -16
  20. pypromice/qc/persistence.py +52 -30
  21. pypromice/resources/__init__.py +28 -0
  22. pypromice/{process/metadata.csv → resources/file_attributes.csv} +0 -2
  23. pypromice/resources/variable_aliases_GC-Net.csv +78 -0
  24. pypromice/resources/variables.csv +106 -0
  25. pypromice/station_configuration.py +118 -0
  26. pypromice/tx/get_l0tx.py +7 -4
  27. pypromice/tx/payload_formats.csv +1 -0
  28. pypromice/tx/tx.py +27 -6
  29. pypromice/utilities/__init__.py +0 -0
  30. pypromice/utilities/git.py +62 -0
  31. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/METADATA +4 -4
  32. pypromice-1.4.1.dist-info/RECORD +53 -0
  33. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/WHEEL +1 -1
  34. pypromice-1.4.1.dist-info/entry_points.txt +13 -0
  35. pypromice/postprocess/station_configurations.toml +0 -762
  36. pypromice/process/get_l3.py +0 -46
  37. pypromice/process/variables.csv +0 -92
  38. pypromice/qc/persistence_test.py +0 -150
  39. pypromice/test/test_config1.toml +0 -69
  40. pypromice/test/test_config2.toml +0 -54
  41. pypromice/test/test_email +0 -75
  42. pypromice/test/test_payload_formats.csv +0 -4
  43. pypromice/test/test_payload_types.csv +0 -7
  44. pypromice/test/test_percentile.py +0 -229
  45. pypromice/test/test_raw1.txt +0 -4468
  46. pypromice/test/test_raw_DataTable2.txt +0 -11167
  47. pypromice/test/test_raw_SlimTableMem1.txt +0 -1155
  48. pypromice/test/test_raw_transmitted1.txt +0 -15411
  49. pypromice/test/test_raw_transmitted2.txt +0 -28
  50. pypromice-1.3.6.dist-info/RECORD +0 -53
  51. pypromice-1.3.6.dist-info/entry_points.txt +0 -8
  52. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/LICENSE.txt +0 -0
  53. {pypromice-1.3.6.dist-info → pypromice-1.4.1.dist-info}/top_level.txt +0 -0
@@ -1,11 +1,23 @@
1
1
  import argparse
2
2
  from pathlib import Path
3
3
 
4
+ import pandas as pd
5
+
4
6
  from pypromice.postprocess.bufr_utilities import read_bufr_file
5
7
 
6
- if __name__ == "__main__":
8
+
9
+ def main():
7
10
  parser = argparse.ArgumentParser("BUFR to CSV converter")
8
- parser.add_argument("path", type=Path)
11
+ parser.add_argument("path", type=Path, nargs='+')
9
12
  args = parser.parse_args()
10
13
 
11
- print(read_bufr_file(args.path).to_csv())
14
+ paths = []
15
+ for path in args.path:
16
+ paths += list(path.parent.glob(path.name))
17
+
18
+ df = pd.concat([read_bufr_file(path) for path in paths])
19
+ print(df.to_csv())
20
+
21
+
22
+ if __name__ == "__main__":
23
+ main()
@@ -45,6 +45,7 @@ def round_converter(decimals: int):
45
45
 
46
46
  return round
47
47
 
48
+
48
49
  # Enforce precision
49
50
  # Note the sensor accuracies listed here:
50
51
  # https://essd.copernicus.org/articles/13/3819/2021/#section8
@@ -64,28 +65,82 @@ class BUFRVariables:
64
65
  * heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD: Corresponds to "#7#heightOfSensorAboveLocalGroundOrDeckOfMarinePlatform" which is height if anemometer relative to ground or deck of marine platform.
65
66
 
66
67
  """
67
- wmo_id: str
68
+
69
+ # Station type: "mobile" or "land"
70
+ # ===============================
71
+ # Fixed land station schema: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/307080
72
+ # Mobile station schema: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/307090
73
+
68
74
  station_type: str
75
+
76
+ # WMO station identifier
77
+ # Land stations: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/301090
78
+ # Mobile stations: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/301092
79
+ # ======================================================================================================
80
+ wmo_id: str
69
81
  timestamp: datetime.datetime
70
- relativeHumidity: float = attrs.field(converter=round_converter(0))
71
- airTemperature: float = attrs.field(converter=round_converter(1))
72
- pressure: float = attrs.field(converter=round_converter(1))
73
- windDirection: float = attrs.field(converter=round_converter(0))
74
- windSpeed: float = attrs.field(converter=round_converter(1))
75
- latitude: float = attrs.field(converter=round_converter(6))
76
- longitude: float = attrs.field(converter=round_converter(6))
82
+
83
+ # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/005001
84
+ # Scale: 5, unit: degrees
85
+ # TODO: Test if eccodes does the rounding as well. The rounding is was 6 which is larger that the scale.
86
+ latitude: float = attrs.field(converter=round_converter(5))
87
+ # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/006001
88
+ # Scale: 5, unit: degrees
89
+ longitude: float = attrs.field(converter=round_converter(5))
90
+
91
+ # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/007030
92
+ # Scale: 1, unit: m
77
93
  heightOfStationGroundAboveMeanSeaLevel: float = attrs.field(
78
- converter=round_converter(2)
94
+ converter=round_converter(1)
79
95
  )
80
- #
96
+ # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/007031
97
+ # Scale: 1, unit: m
81
98
  heightOfBarometerAboveMeanSeaLevel: float = attrs.field(
82
- converter=round_converter(2),
99
+ converter=round_converter(1),
83
100
  )
101
+
102
+ # Pressure information
103
+ # ====================
104
+ # Definition table: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/302031
105
+ # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/010004
106
+ # Scale: -1, unit: Pa
107
+ nonCoordinatePressure: float = attrs.field(converter=round_converter(-1))
108
+ # There are two other pressure variables in the template: 007004 - pressure and 010062 24-hour pressure change
109
+
110
+ # Basic synoptic "instantaneous" data
111
+ # ===================================
112
+ # Definition table: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/302035
113
+ # This section only include the temperature and humidity data (302032).
114
+ # Precipitation and cloud data are currently ignored.
115
+ # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/007032
116
+ # Scale: 2, unit: m
117
+ # This is the first appearance of this variable id.
84
118
  heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformTempRH: float = attrs.field(
85
- converter=round_converter(4),
119
+ converter=round_converter(2),
86
120
  )
121
+ # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/012101
122
+ # Scale: 2, unit: K
123
+ airTemperature: float = attrs.field(converter=round_converter(2))
124
+ # There is also a Dewpoint temperature in this template: 012103 which is currently unused.
125
+ # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/012103
126
+ # Scale: 0, unit: %
127
+ relativeHumidity: float = attrs.field(converter=round_converter(0))
128
+
129
+ # Basic synoptic "period" data
130
+ # ============================
131
+ # Definition table: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/302043
132
+ # Wind data: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/302042
133
+ # Wind direction: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/011001
134
+ # Scale: 0, unit: degrees
135
+ windDirection: float = attrs.field(converter=round_converter(0))
136
+ # Wind speed: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/011002
137
+ # Scale: 1, unit: m/s
138
+ windSpeed: float = attrs.field(converter=round_converter(1))
139
+ # https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/007032
140
+ # Scale: 2, unit: m
141
+ # This is the 7th appearance of this variable id.
87
142
  heightOfSensorAboveLocalGroundOrDeckOfMarinePlatformWSPD: float = attrs.field(
88
- converter=round_converter(4)
143
+ converter=round_converter(2)
89
144
  )
90
145
 
91
146
  def as_series(self) -> pd.Series:
@@ -129,6 +184,7 @@ STATION_CONFIGURATIONS = {
129
184
 
130
185
  BUFR_TEMPLATES = {
131
186
  "mobile": {
187
+ # Template definition: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/307090
132
188
  "unexpandedDescriptors": (307090), # message template, "synopMobil"
133
189
  "edition": 4, # latest edition
134
190
  "masterTableNumber": 0,
@@ -144,6 +200,7 @@ BUFR_TEMPLATES = {
144
200
  "compressedData": 0,
145
201
  },
146
202
  "land": {
203
+ # Template definition: https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_D/307080
147
204
  "unexpandedDescriptors": (307080), # message template, "synopLand"
148
205
  "edition": 4, # latest edition
149
206
  "masterTableNumber": 0,
@@ -246,6 +303,11 @@ def set_station(ibufr, station_type: str, wmo_id: str):
246
303
  elif station_type == "land":
247
304
  # StationNumber for land stations are integeres
248
305
  wmo_id_int = int(wmo_id)
306
+ if wmo_id_int >= 1024:
307
+ raise ValueError(
308
+ f"Invalid WMO ID {wmo_id}. Land station number must be less than 1024."
309
+ "See https://vocabulary-manager.eumetsat.int/vocabularies/BUFR/WMO/32/TABLE_B/001002"
310
+ )
249
311
  station_config = dict(stationNumber=wmo_id_int)
250
312
  else:
251
313
  raise Exception(f"Unsupported station station type {station_type}")
@@ -280,7 +342,7 @@ def set_AWS_variables(
280
342
 
281
343
  set_bufr_value(ibufr, "relativeHumidity", variables.relativeHumidity)
282
344
  set_bufr_value(ibufr, "airTemperature", variables.airTemperature)
283
- set_bufr_value(ibufr, "pressure", variables.pressure)
345
+ set_bufr_value(ibufr, "nonCoordinatePressure", variables.nonCoordinatePressure)
284
346
  set_bufr_value(ibufr, "windDirection", variables.windDirection)
285
347
  set_bufr_value(ibufr, "windSpeed", variables.windSpeed)
286
348
 
@@ -372,7 +434,7 @@ def get_bufr_value(msgid: int, key: str) -> float:
372
434
  raise ValueError(f"Unsupported BUFR value type {type(value)} for key {key}")
373
435
 
374
436
 
375
- def read_bufr_message(fp: BinaryIO) -> Optional[BUFRVariables]:
437
+ def read_bufr_message(fp: BinaryIO, backwards_compatible: bool = False) -> Optional[BUFRVariables]:
376
438
  """
377
439
  Read and parse BUFR message from binary IO stream.
378
440
 
@@ -383,6 +445,8 @@ def read_bufr_message(fp: BinaryIO) -> Optional[BUFRVariables]:
383
445
  ----------
384
446
  fp
385
447
  Readable binary io stream
448
+ backwards_compatible
449
+ Use legacy pressure if nonCoordinatePressure is nan
386
450
 
387
451
  Returns
388
452
  -------
@@ -435,11 +499,19 @@ def read_bufr_message(fp: BinaryIO) -> Optional[BUFRVariables]:
435
499
  f"Unknown BUFR template unexpandedDescriptors: {unexpanded_descriptors}"
436
500
  )
437
501
 
502
+ nonCoordinatePressure = get_bufr_value(ibufr, "nonCoordinatePressure")
503
+ if math.isnan(nonCoordinatePressure) and backwards_compatible:
504
+ nonCoordinatePressure = get_bufr_value(ibufr, "pressure")
505
+ if not math.isnan(nonCoordinatePressure):
506
+ logger.warning(
507
+ f"nonCoordinatePressure is nan, using legacy pressure instead"
508
+ )
509
+
438
510
  variables = BUFRVariables(
439
511
  timestamp=timestamp,
440
512
  relativeHumidity=get_bufr_value(ibufr, "relativeHumidity"),
441
513
  airTemperature=get_bufr_value(ibufr, "airTemperature"),
442
- pressure=get_bufr_value(ibufr, "pressure"),
514
+ nonCoordinatePressure=nonCoordinatePressure,
443
515
  windDirection=get_bufr_value(ibufr, "windDirection"),
444
516
  windSpeed=get_bufr_value(ibufr, "windSpeed"),
445
517
  latitude=get_bufr_value(ibufr, "latitude"),
@@ -485,5 +557,6 @@ def read_bufr_file(path: PathLike) -> pd.DataFrame:
485
557
  message_vars = read_bufr_message(fp)
486
558
  if message_vars is None:
487
559
  break
488
- lines.append(message_vars)
489
- return pd.DataFrame(lines).rename_axis("message_index")
560
+ lines.append(message_vars.as_series())
561
+ data_frame = pd.DataFrame(lines).set_index("wmo_id")
562
+ return data_frame
@@ -0,0 +1,178 @@
1
+ import logging
2
+ from pathlib import Path
3
+ from typing import Sequence, List
4
+
5
+ import pandas as pd
6
+ from pypromice.station_configuration import load_station_configuration_mapping
7
+
8
+ from pypromice.postprocess.get_bufr import (
9
+ get_bufr,
10
+ DEFAULT_LIN_REG_TIME_LIMIT,
11
+ DEFAULT_POSITION_SEED_PATH,
12
+ )
13
+
14
+ main_logger = logging.getLogger(__name__)
15
+
16
+
17
+ def create_bufr_files(
18
+ input_files: Sequence[Path],
19
+ station_configuration_root: Path,
20
+ period_start: str,
21
+ period_end: str,
22
+ output_root: Path,
23
+ override: bool,
24
+ break_on_error: bool = False,
25
+ output_filename_suffix: str = "geus_",
26
+ ):
27
+ """
28
+ Generate hourly bufr files from the for all input files
29
+
30
+ Parameters
31
+ ----------
32
+ input_files
33
+ Paths to csv l3 hourly data files
34
+ station_configuration_root
35
+ Root directory containing station configuration toml files
36
+ period_start
37
+ Datetime string for period start. Eg '2024-01-01T00:00' or '20240101
38
+ period_end
39
+ Datetime string for period end
40
+ output_root
41
+ Output dir for both bufr files for individual stations and compiled. Organized in two sub directories.
42
+ override
43
+ If False: Skip a period if the compiled output file exists.
44
+ break_on_error
45
+ If True: Stop processing if an error occurs
46
+ output_filename_suffix
47
+ Suffix for the compiled output file
48
+
49
+ """
50
+ periods = pd.date_range(period_start, period_end, freq="h")
51
+ output_individual_root = output_root / "individual"
52
+ output_compiled_root = output_root / "compiled"
53
+ output_individual_root.mkdir(parents=True, exist_ok=True)
54
+ output_compiled_root.mkdir(parents=True, exist_ok=True)
55
+
56
+ station_configuration_mapping = load_station_configuration_mapping(
57
+ station_configuration_root,
58
+ skip_unexpected_fields=True,
59
+ )
60
+
61
+ for period in periods:
62
+ period: pd.Timestamp
63
+ date_str = period.strftime("%Y%m%dT%H%M")
64
+ main_logger.info(f"Processing {date_str}")
65
+ output_dir_path = output_individual_root / f"{date_str}"
66
+ output_file_path = (
67
+ output_compiled_root / f"{output_filename_suffix}{date_str}.bufr"
68
+ )
69
+
70
+ main_logger.info(f"{period}, {date_str}")
71
+ if override or not output_file_path.exists():
72
+ get_bufr(
73
+ bufr_out=output_dir_path,
74
+ input_files=input_files,
75
+ store_positions=False,
76
+ positions_filepath=None,
77
+ linear_regression_time_limit=DEFAULT_LIN_REG_TIME_LIMIT,
78
+ timestamps_pickle_filepath=None,
79
+ target_timestamp=period,
80
+ station_configuration_mapping=station_configuration_mapping,
81
+ positions_seed_path=DEFAULT_POSITION_SEED_PATH,
82
+ break_on_error=break_on_error,
83
+ )
84
+
85
+ with output_file_path.open("wb") as fp_dst:
86
+ for src_path in output_dir_path.glob("*.bufr"):
87
+ with src_path.open("rb") as fp_src:
88
+ fp_dst.write(fp_src.read())
89
+ else:
90
+ main_logger.info(f"Output file exists. Skipping {output_file_path}")
91
+
92
+
93
+ # %%
94
+
95
+
96
+ def main():
97
+ import argparse
98
+ import glob
99
+ import sys
100
+
101
+ logger_format_string = "%(asctime)s; %(levelname)s; %(name)s; %(message)s"
102
+ logging.basicConfig(
103
+ level=logging.ERROR,
104
+ stream=sys.stdout,
105
+ format=logger_format_string,
106
+ )
107
+
108
+ main_handler = logging.StreamHandler(sys.stdout)
109
+ main_handler.setLevel(logging.INFO)
110
+ formatter = logging.Formatter(logger_format_string)
111
+ main_handler.setFormatter(formatter)
112
+ main_logger.addHandler(main_handler)
113
+ main_logger.setLevel(logging.INFO)
114
+
115
+ parser = argparse.ArgumentParser("Create BUFR files from L3 tx .csv files.")
116
+ parser.add_argument(
117
+ "--input_files",
118
+ "--l3-filepath",
119
+ "-i",
120
+ type=Path,
121
+ nargs="+",
122
+ required=True,
123
+ help="Path to L3 tx .csv files. Can be direct paths or glob patterns",
124
+ )
125
+ parser.add_argument(
126
+ "--period_start",
127
+ "-s",
128
+ required=True,
129
+ help="Datetime string for period start. Eg '2024-01-01T00:00' or '20240101",
130
+ )
131
+ parser.add_argument(
132
+ "--period_end", "-e", required=True, help="Datetime string for period end"
133
+ )
134
+ parser.add_argument(
135
+ "--output_root",
136
+ "-o",
137
+ required=True,
138
+ type=Path,
139
+ help="Output dir for both bufr files for individual stations and compiled. Organized in two sub directories.",
140
+ )
141
+ parser.add_argument(
142
+ "--station_configuration_root",
143
+ "-c",
144
+ required=True,
145
+ type=Path,
146
+ help="Root directory containing station configuration toml files",
147
+ )
148
+ parser.add_argument(
149
+ "--override",
150
+ "-f",
151
+ default=False,
152
+ action="store_true",
153
+ help="Recreate and overide existing output files",
154
+ )
155
+ args = parser.parse_args()
156
+
157
+ # Interpret all input file paths as glob patterns if they don't exist
158
+ input_files: List[Path] = list()
159
+ for path in args.input_files:
160
+ if path.exists():
161
+ input_files.append(path)
162
+ else:
163
+ # The input path might be a glob pattern
164
+ input_files += map(Path, glob.glob(path.as_posix()))
165
+
166
+ main_logger.info(f"Processing {len(input_files)} input files")
167
+ create_bufr_files(
168
+ input_files=input_files,
169
+ period_start=args.period_start,
170
+ period_end=args.period_end,
171
+ output_root=args.output_root,
172
+ override=args.override,
173
+ station_configuration_root=args.station_configuration_root,
174
+ )
175
+
176
+
177
+ if __name__ == "__main__":
178
+ main()