pydartdiags 0.0.41__py3-none-any.whl → 0.0.42__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydartdiags might be problematic. Click here for more details.

@@ -3,6 +3,7 @@ import datetime as dt
3
3
  import numpy as np
4
4
  import os
5
5
  import yaml
6
+ import struct
6
7
 
7
8
  class obs_sequence:
8
9
  """Create an obs_sequence object from an ascii observation sequence file.
@@ -55,22 +56,40 @@ class obs_sequence:
55
56
 
56
57
  reversed_vert = {value: key for key, value in vert.items()}
57
58
 
58
- # synonyms for observation
59
- synonyms_for_obs = ['NCEP BUFR observation',
60
- 'AIRS observation',
61
- 'GTSPP observation',
62
- 'SST observation',
63
- 'observations',
64
- 'WOD observation']
65
59
 
66
- def __init__(self, file):
60
+ def __init__(self, file, synonyms=None):
67
61
  self.loc_mod = 'None'
68
62
  self.file = file
69
- self.header = self.read_header(file)
63
+ self.synonyms_for_obs = ['NCEP BUFR observation',
64
+ 'AIRS observation',
65
+ 'GTSPP observation',
66
+ 'SST observation',
67
+ 'observations',
68
+ 'WOD observation']
69
+ if synonyms:
70
+ if isinstance(synonyms, list):
71
+ self.synonyms_for_obs.extend(synonyms)
72
+ else:
73
+ self.synonyms_for_obs.append(synonyms)
74
+
75
+ module_dir = os.path.dirname(__file__)
76
+ self.default_composite_types = os.path.join(module_dir,"composite_types.yaml")
77
+
78
+ if self.is_binary(file):
79
+ self.header = self.read_binary_header(file)
80
+ else:
81
+ self.header = self.read_header(file)
82
+
70
83
  self.types = self.collect_obs_types(self.header)
71
84
  self.reverse_types = {v: k for k, v in self.types.items()}
72
85
  self.copie_names, self.n_copies = self.collect_copie_names(self.header)
73
- self.seq = self.obs_reader(file, self.n_copies)
86
+
87
+ if self.is_binary(file):
88
+ self.seq = self.obs_binary_reader(file, self.n_copies)
89
+ self.loc_mod = 'loc3d' # only loc3d supported for binary, & no way to check
90
+ else:
91
+ self.seq = self.obs_reader(file, self.n_copies)
92
+
74
93
  self.all_obs = self.create_all_obs() # uses up the generator
75
94
  # at this point you know if the seq is loc3d or loc1d
76
95
  if self.loc_mod == 'None':
@@ -88,8 +107,7 @@ class obs_sequence:
88
107
  if 'prior_ensemble_mean'.casefold() in map(str.casefold, self.columns):
89
108
  self.df['bias'] = (self.df['prior_ensemble_mean'] - self.df['observation'])
90
109
  self.df['sq_err'] = self.df['bias']**2 # squared error
91
- module_dir = os.path.dirname(__file__)
92
- self.default_composite_types = os.path.join(module_dir,"composite_types.yaml")
110
+
93
111
 
94
112
  def create_all_obs(self):
95
113
  """ steps through the generator to create a
@@ -128,7 +146,11 @@ class obs_sequence:
128
146
  raise ValueError("Neither 'loc3d' nor 'loc1d' could be found in the observation sequence.")
129
147
  typeI = obs.index('kind') # type of observation
130
148
  type_value = obs[typeI + 1]
131
- data.append(self.types[type_value]) # observation type
149
+ if not self.types:
150
+ data.append('Identity')
151
+ else:
152
+ data.append(self.types[type_value]) # observation type
153
+
132
154
  # any observation specific obs def info is between here and the end of the list
133
155
  time = obs[-2].split()
134
156
  data.append(int(time[0])) # seconds
@@ -257,9 +279,19 @@ class obs_sequence:
257
279
  heading.append('obs_err_var')
258
280
  return heading
259
281
 
282
+ @staticmethod
283
+ def is_binary(file):
284
+ """Check if a file is binary file."""
285
+ with open(file, 'rb') as f:
286
+ chunk = f.read(1024)
287
+ if b'\0' in chunk:
288
+ return True
289
+ return False
290
+
291
+
260
292
  @staticmethod
261
293
  def read_header(file):
262
- """Read the header and number of lines in the header of an obs_seq file"""
294
+ """Read the header and number of lines in the header of an ascii obs_seq file"""
263
295
  header = []
264
296
  with open(file, 'r') as f:
265
297
  for line in f:
@@ -270,6 +302,118 @@ class obs_sequence:
270
302
  header.append(line.strip())
271
303
  return header
272
304
 
305
+ @staticmethod
306
+ def read_binary_header(file):
307
+ """Read the header and number of lines in the header of a binary obs_seq file from Fortran output"""
308
+ header = []
309
+ linecount = 0
310
+ obs_types_definitions = -1000
311
+ num_obs = 0
312
+ max_num_obs = 0
313
+ # need to get:
314
+ # number of obs_type_definitions
315
+ # number of copies
316
+ # number of qcs
317
+ with open(file, 'rb') as f:
318
+ while True:
319
+ # Read the record length
320
+ record_length = obs_sequence.read_record_length(f)
321
+ if record_length is None:
322
+ break
323
+ record = f.read(record_length)
324
+ if not record: # end of file
325
+ break
326
+
327
+ # Read the trailing record length (should match the leading one)
328
+ obs_sequence.check_trailing_record_length(f, record_length)
329
+
330
+ linecount += 1
331
+
332
+ if linecount == 3:
333
+ obs_types_definitions = struct.unpack('i', record)[0]
334
+ continue
335
+
336
+ if linecount == 4+obs_types_definitions:
337
+ num_copies, num_qcs, num_obs, max_num_obs = struct.unpack('iiii', record)[:16]
338
+ break
339
+
340
+ # Go back to the beginning of the file
341
+ f.seek(0)
342
+
343
+ for _ in range(2):
344
+ record_length = obs_sequence.read_record_length(f)
345
+ if record_length is None:
346
+ break
347
+
348
+ record = f.read(record_length)
349
+ if not record: # end of file
350
+ break
351
+
352
+ obs_sequence.check_trailing_record_length(f, record_length)
353
+ header.append(record.decode('utf-8').strip())
354
+
355
+ header.append(str(obs_types_definitions))
356
+
357
+ # obs_types_definitions
358
+ for _ in range(3,4+obs_types_definitions):
359
+ # Read the record length
360
+ record_length = obs_sequence.read_record_length(f)
361
+ if record_length is None:
362
+ break
363
+
364
+ # Read the actual record
365
+ record = f.read(record_length)
366
+ if not record: # end of file
367
+ break
368
+
369
+ obs_sequence.check_trailing_record_length(f, record_length)
370
+
371
+ if _ == 3:
372
+ continue # num obs_types_definitions
373
+ # Read an integer and a string from the record
374
+ integer_value = struct.unpack('i', record[:4])[0]
375
+ string_value = record[4:].decode('utf-8').strip()
376
+ header.append(f"{integer_value} {string_value}")
377
+
378
+ header.append(f"num_copies: {num_copies} num_qc: {num_qcs}")
379
+ header.append(f"num_obs: {num_obs} max_num_obs: {max_num_obs}")
380
+
381
+ #copie names
382
+ for _ in range(5+obs_types_definitions, 5+obs_types_definitions+num_copies+num_qcs+1):
383
+ # Read the record length
384
+ record_length = obs_sequence.read_record_length(f)
385
+ if record_length is None:
386
+ break
387
+
388
+ # Read the actual record
389
+ record = f.read(record_length)
390
+ if not record:
391
+ break
392
+
393
+ obs_sequence.check_trailing_record_length(f, record_length)
394
+
395
+ if _ == 5+obs_types_definitions:
396
+ continue
397
+
398
+ # Read the whole record as a string
399
+ string_value = record.decode('utf-8').strip()
400
+ header.append(string_value)
401
+
402
+ # first and last obs
403
+ # Read the record length
404
+ record_length = obs_sequence.read_record_length(f)
405
+
406
+ # Read the actual record
407
+ record = f.read(record_length)
408
+
409
+ obs_sequence.check_trailing_record_length(f, record_length)
410
+
411
+ # Read the whole record as a two integers
412
+ first, last = struct.unpack('ii', record)[:8]
413
+ header.append(f"first: {first} last: {last}")
414
+
415
+ return header
416
+
273
417
  @staticmethod
274
418
  def collect_obs_types(header):
275
419
  """Create a dictionary for the observation types in the obs_seq header"""
@@ -299,7 +443,7 @@ class obs_sequence:
299
443
 
300
444
  @staticmethod
301
445
  def obs_reader(file, n):
302
- """Reads the obs sequence file and returns a generator of the obs"""
446
+ """Reads the ascii obs sequence file and returns a generator of the obs"""
303
447
  previous_line = ''
304
448
  with open(file, 'r') as f:
305
449
  for line in f:
@@ -339,6 +483,115 @@ class obs_sequence:
339
483
  previous_line = next_line
340
484
  yield obs
341
485
 
486
+ @staticmethod
487
+ def check_trailing_record_length(file, expected_length):
488
+ """Reads and checks the trailing record length from the binary file written by Fortran.
489
+
490
+ Parameters:
491
+ file (file): The file object.
492
+ expected_length (int): The expected length of the trailing record.
493
+
494
+ Assuming 4 bytes:
495
+ | Record Length (4 bytes) | Data (N bytes) | Trailing Record Length (4 bytes) |
496
+ """
497
+ trailing_record_length_bytes = file.read(4)
498
+ trailing_record_length = struct.unpack('i', trailing_record_length_bytes)[0]
499
+ if expected_length != trailing_record_length:
500
+ raise ValueError("Record length mismatch in Fortran binary file")
501
+
502
+ @staticmethod
503
+ def read_record_length(file):
504
+ """Reads and unpacks the record length from the file."""
505
+ record_length_bytes = file.read(4)
506
+ if not record_length_bytes:
507
+ return None # End of file
508
+ return struct.unpack('i', record_length_bytes)[0]
509
+
510
+
511
+ def obs_binary_reader(self, file, n):
512
+ """Reads the obs sequence binary file and returns a generator of the obs"""
513
+ header_length = len(self.header)
514
+ with open(file, 'rb') as f:
515
+ # Skip the first len(obs_seq.header) lines
516
+ for _ in range(header_length-1):
517
+ # Read the record length
518
+ record_length = obs_sequence.read_record_length(f)
519
+ if record_length is None: # End of file
520
+ break
521
+
522
+ # Skip the actual record
523
+ f.seek(record_length, 1)
524
+
525
+ # Skip the trailing record length
526
+ f.seek(4, 1)
527
+
528
+ obs_num = 0
529
+ while True:
530
+ obs = []
531
+ obs_num += 1
532
+ obs.append(f"OBS {obs_num}")
533
+ for _ in range(n): # number of copies
534
+ # Read the record length
535
+ record_length = obs_sequence.read_record_length(f)
536
+ if record_length is None:
537
+ break
538
+ # Read the actual record (copie)
539
+ record = f.read(record_length)
540
+ obs.append(struct.unpack('d', record)[0])
541
+
542
+ # Read the trailing record length (should match the leading one)
543
+ obs_sequence.check_trailing_record_length(f, record_length)
544
+
545
+ # linked list info
546
+ record_length = obs_sequence.read_record_length(f)
547
+ if record_length is None:
548
+ break
549
+
550
+ record = f.read(record_length)
551
+ int1, int2, int3 = struct.unpack('iii', record[:12])
552
+ linked_list_string = f"{int1:<12} {int2:<10} {int3:<12}"
553
+ obs.append(linked_list_string)
554
+
555
+ obs_sequence.check_trailing_record_length(f, record_length)
556
+
557
+ # location (note no location header "loc3d" or "loc1d" for binary files)
558
+ obs.append('loc3d')
559
+ record_length = obs_sequence.read_record_length(f)
560
+ record = f.read(record_length)
561
+ x,y,z,vert = struct.unpack('dddi', record[:28])
562
+ location_string = f"{x} {y} {z} {vert}"
563
+ obs.append(location_string)
564
+
565
+ obs_sequence.check_trailing_record_length(f, record_length)
566
+
567
+ # kind (type of observation) value
568
+ obs.append('kind')
569
+ record_length_bytes = f.read(4)
570
+ record_length = struct.unpack('i', record_length_bytes)[0]
571
+ record = f.read(record_length)
572
+ kind = f"{struct.unpack('i', record)[0]}"
573
+ obs.append(kind)
574
+
575
+ obs_sequence.check_trailing_record_length(f, record_length)
576
+
577
+ # time (seconds, days)
578
+ record_length = obs_sequence.read_record_length(f)
579
+ record = f.read(record_length)
580
+ seconds, days = struct.unpack('ii', record)[:8]
581
+ time_string = f"{seconds} {days}"
582
+ obs.append(time_string)
583
+
584
+ obs_sequence.check_trailing_record_length(f, record_length)
585
+
586
+ # obs error variance
587
+ record_length = obs_sequence.read_record_length(f)
588
+ record = f.read(record_length)
589
+ obs.append(struct.unpack('d', record)[0])
590
+
591
+ obs_sequence.check_trailing_record_length(f, record_length)
592
+
593
+ yield obs
594
+
342
595
  def composite_types(self, composite_types='use_default'):
343
596
  """
344
597
  Set up and construct composite types for the DataFrame.
@@ -504,3 +757,5 @@ def construct_composit(df_comp, composite, components):
504
757
  merged_df = merged_df.drop(columns=[col for col in merged_df.columns if col.endswith('_v')])
505
758
 
506
759
  return merged_df
760
+
761
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: pydartdiags
3
- Version: 0.0.41
3
+ Version: 0.0.42
4
4
  Summary: Observation Sequence Diagnostics for DART
5
5
  Home-page: https://github.com/NCAR/pyDARTdiags.git
6
6
  Author: Helen Kershaw
@@ -19,6 +19,11 @@ Requires-Dist: numpy>=1.26
19
19
  Requires-Dist: plotly>=5.22.0
20
20
  Requires-Dist: pyyaml>=6.0.2
21
21
 
22
+ [![License](https://img.shields.io/badge/License-Apache%202.0-blue.svg)](https://opensource.org/licenses/Apache-2.0)
23
+ [![codecov](https://codecov.io/gh/NCAR/pyDARTdiags/graph/badge.svg?token=VK55SQZSVD)](https://codecov.io/gh/NCAR/pyDARTdiags)
24
+ [![PyPI version](https://badge.fury.io/py/pydartdiags.svg)](https://pypi.org/project/pydartdiags/)
25
+
26
+
22
27
  # pyDARTdiags
23
28
 
24
29
  pyDARTdiags is a Python library for obsevation space diagnostics for the Data Assimilation Research Testbed ([DART](https://github.com/NCAR/DART)).
@@ -370,7 +375,7 @@ obsq.possible_vs_used(obs_seq.df)
370
375
  df_qc0 = obsq.select_by_dart_qc(obs_seq.df, 0)
371
376
  plots.plot_rank_histogram(df_qc0)
372
377
  ```
373
- ![Rank Histogram](docs/images/rankhist.png)
378
+ ![Rank Histogram](https://raw.githubusercontent.com/NCAR/pydartdiags/main/docs/images/rankhist.png)
374
379
 
375
380
 
376
381
  ### plot profile of RMSE and Bias
@@ -387,9 +392,9 @@ df_qc0 = obsq.select_by_dart_qc(obs_seq.df, 0) # only qc 0
387
392
  df_profile, figrmse, figbias = plots.plot_profile(df_qc0, plevels)
388
393
  ```
389
394
 
390
- ![RMSE Plot](docs/images/rmse.png)
395
+ ![RMSE Plot](https://raw.githubusercontent.com/NCAR/pydartdiags/main/docs/images/rmse.png)
391
396
 
392
- ![Bias Plot](docs/images/bias.png)
397
+ ![Bias Plot](https://raw.githubusercontent.com/NCAR/pydartdiags/main/docs/images/bias.png)
393
398
 
394
399
  ## Contributing
395
400
  Contributions are welcome! If you have a feature request, bug report, or a suggestion, please open an issue on our GitHub repository.
@@ -0,0 +1,10 @@
1
+ pydartdiags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ pydartdiags/obs_sequence/obs_sequence.py,sha256=UBaNMJR3MOro47KyJMdgJ0p-aEqcrp817-SOtpFQ1bg,31511
4
+ pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
+ pydartdiags/plots/plots.py,sha256=_vZFgQ9qrmtwE_HAP6_nx3pV4JHRdnYckZ5xUxUH4io,6753
6
+ pydartdiags-0.0.42.dist-info/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
7
+ pydartdiags-0.0.42.dist-info/METADATA,sha256=oJe2bv1cjoIhlpJQF2tLIIGhS5zu5qJJzFYEZw9ffgQ,9857
8
+ pydartdiags-0.0.42.dist-info/WHEEL,sha256=GV9aMThwP_4oNCtvEC2ec3qUYutgWeAzklro_0m4WJQ,91
9
+ pydartdiags-0.0.42.dist-info/top_level.txt,sha256=LfMoPLnSd0VhhlWev1eeX9t6AzvyASOloag0LO_ppWg,12
10
+ pydartdiags-0.0.42.dist-info/RECORD,,
@@ -1,5 +1,5 @@
1
1
  Wheel-Version: 1.0
2
- Generator: setuptools (74.0.0)
2
+ Generator: setuptools (75.1.0)
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
5
5
 
@@ -1,10 +0,0 @@
1
- pydartdiags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- pydartdiags/obs_sequence/obs_sequence.py,sha256=WrQ4lFymM1y9KVBl-_SzMR7E_VfPQJ8b4kHcVnIyEOc,21817
4
- pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- pydartdiags/plots/plots.py,sha256=_vZFgQ9qrmtwE_HAP6_nx3pV4JHRdnYckZ5xUxUH4io,6753
6
- pydartdiags-0.0.41.dist-info/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
7
- pydartdiags-0.0.41.dist-info/METADATA,sha256=LP13-RMWfmd54Fifdp_r3GDhzfzvd3X-kMFAmR6cA5s,9345
8
- pydartdiags-0.0.41.dist-info/WHEEL,sha256=UvcQYKBHoFqaQd6LKyqHw9fxEolWLQnlzP0h_LgJAfI,91
9
- pydartdiags-0.0.41.dist-info/top_level.txt,sha256=LfMoPLnSd0VhhlWev1eeX9t6AzvyASOloag0LO_ppWg,12
10
- pydartdiags-0.0.41.dist-info/RECORD,,