pydartdiags 0.6.0__py3-none-any.whl → 0.6.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydartdiags might be problematic. Click here for more details.

@@ -184,6 +184,14 @@ class ObsSequence:
184
184
  }
185
185
  self.df = self.df.rename(columns=rename_dict)
186
186
 
187
+ if self.is_binary(file):
188
+ # binary files do not have "OBS X" in, so set linked list from df.
189
+ self.update_attributes_from_df()
190
+
191
+ # Replace MISSING_R8s with NaNs in posterior stats where DART_quality_control = 2
192
+ if self.has_posterior():
193
+ ObsSequence.replace_qc2_nan(self.df)
194
+
187
195
  def create_all_obs(self):
188
196
  """steps through the generator to create a
189
197
  list of all observations in the sequence
@@ -197,7 +205,7 @@ class ObsSequence:
197
205
  def obs_to_list(self, obs):
198
206
  """put single observation into a list"""
199
207
  data = []
200
- data.append(obs[0].split()[1]) # obs_num
208
+ data.append(int(obs[0].split()[1])) # obs_num
201
209
  data.extend(list(map(float, obs[1 : self.n_copies + 1]))) # all the copies
202
210
  data.append(obs[self.n_copies + 1]) # linked list info
203
211
  try: # HK todo only have to check loc3d or loc1d for the first observation, the whole file is the same
@@ -219,9 +227,9 @@ class ObsSequence:
219
227
  "Neither 'loc3d' nor 'loc1d' could be found in the observation sequence."
220
228
  )
221
229
  typeI = obs.index("kind") # type of observation
222
- type_value = obs[typeI + 1]
223
- if not self.types:
224
- data.append("Identity")
230
+ type_value = int(obs[typeI + 1])
231
+ if type_value < 0:
232
+ data.append(type_value)
225
233
  else:
226
234
  data.append(self.types[type_value]) # observation type
227
235
 
@@ -283,14 +291,22 @@ class ObsSequence:
283
291
  + str(self.reversed_vert[data[self.n_copies + 5]])
284
292
  ) # location x, y, z, vert
285
293
  obs.append("kind") # this is type of observation
286
- obs.append(self.reverse_types[data[self.n_copies + 6]]) # observation type
294
+ obs_type = data[self.n_copies + 6]
295
+ if isinstance(obs_type, str):
296
+ obs.append(self.reverse_types[obs_type]) # observation type
297
+ else:
298
+ obs.append(obs_type) # Identity obs negative integer
287
299
  # Convert metadata to a string and append !HK @todo you are not converting to string
288
300
  obs.extend(data[self.n_copies + 7]) # metadata
289
301
  obs.extend(data[self.n_copies + 8]) # external forward operator
290
302
  elif self.loc_mod == "loc1d":
291
303
  obs.append(data[self.n_copies + 2]) # 1d location
292
304
  obs.append("kind") # this is type of observation
293
- obs.append(self.reverse_types[data[self.n_copies + 3]]) # observation type
305
+ obs_type = data[self.n_copies + 3]
306
+ if isinstance(obs_type, str):
307
+ obs.append(self.reverse_types[obs_type]) # observation type
308
+ else:
309
+ obs.append(obs_type) # Identity obs negative integer
294
310
  obs.extend(data[self.n_copies + 4]) # metadata
295
311
  obs.extend(data[self.n_copies + 5]) # external forward operator
296
312
  obs.append(" ".join(map(str, data[-4:-2]))) # seconds, days
@@ -316,14 +332,17 @@ class ObsSequence:
316
332
 
317
333
  This function writes the observation sequence stored in the obs_seq.DataFrame to a specified file.
318
334
  It updates the header with the number of observations, converts coordinates back to radians
319
- if necessary, drops unnecessary columns, sorts the DataFrame by time, and generates a linked
320
- list pattern for reading by DART programs.
335
+ if necessary, reverts NaNs back to MISSING_R8 for observations with QC=2, drops unnecessary
336
+ columns, sorts the DataFrame by time, and generates a linked list pattern for reading by DART
337
+ programs.
321
338
 
322
339
  Args:
323
340
  file (str): The path to the file where the observation sequence will be written.
324
341
 
325
342
  Notes:
326
343
  - Longitude and latitude are converted back to radians if the location model is 'loc3d'.
344
+ - The replacement of MISSING_R8 values with NaNs for any obs that failed the posterior
345
+ forward observation operators (QC2) is reverted.
327
346
  - The 'bias' and 'sq_err' columns are dropped if they exist in the DataFrame.
328
347
  - The DataFrame is sorted by the 'time' column.
329
348
  - An 'obs_num' column is added to the DataFrame to number the observations in time order.
@@ -334,7 +353,8 @@ class ObsSequence:
334
353
 
335
354
  """
336
355
 
337
- self.create_header_from_dataframe()
356
+ # Update attributes, header, and linked list from dataframe
357
+ self.update_attributes_from_df()
338
358
 
339
359
  with open(file, "w") as f:
340
360
 
@@ -358,15 +378,9 @@ class ObsSequence:
358
378
  if "midpoint" in df_copy.columns:
359
379
  df_copy = df_copy.drop(columns=["midpoint", "vlevels"])
360
380
 
361
- # linked list for reading by dart programs
362
- df_copy = df_copy.sort_values(
363
- by=["time"], kind="stable"
364
- ) # sort the DataFrame by time
365
- df_copy.reset_index(drop=True, inplace=True)
366
- df_copy["obs_num"] = df_copy.index + 1 # obs_num in time order
367
- df_copy["linked_list"] = ObsSequence.generate_linked_list_pattern(
368
- len(df_copy)
369
- ) # linked list pattern
381
+ # Revert NaNs back to MISSING_R8s
382
+ if self.has_posterior():
383
+ ObsSequence.revert_qc2_nan(df_copy)
370
384
 
371
385
  def write_row(row):
372
386
  ob_write = self.list_to_obs(row.tolist())
@@ -390,13 +404,16 @@ class ObsSequence:
390
404
  dict: The types dictionary with keys sorted in numerical order.
391
405
  """
392
406
  # Create a dictionary of observation types from the dataframe
393
- unique_types = df["type"].unique()
407
+ # Ignore Identity obs (negative integers)
408
+ unique_types = df.loc[
409
+ df["type"].apply(lambda x: isinstance(x, str)), "type"
410
+ ].unique()
394
411
 
395
412
  # Ensure all unique types are in reverse_types
396
413
  for obs_type in unique_types:
397
414
  if obs_type not in reverse_types:
398
- new_id = int(max(reverse_types.values(), default=0)) + 1
399
- reverse_types[obs_type] = str(new_id)
415
+ new_id = max(reverse_types.values(), default=0) + 1
416
+ reverse_types[obs_type] = new_id
400
417
 
401
418
  not_sorted_types = {
402
419
  reverse_types[obs_type]: obs_type for obs_type in unique_types
@@ -431,9 +448,7 @@ class ObsSequence:
431
448
  self.header.append(f"{len(self.types)}")
432
449
  for key, value in self.types.items():
433
450
  self.header.append(f"{key} {value}")
434
- self.header.append(
435
- f"num_copies: {self.n_non_qc} num_qc: {self.n_qc}"
436
- ) # @todo HK not keeping track if num_qc changes
451
+ self.header.append(f"num_copies: {self.n_non_qc} num_qc: {self.n_qc}")
437
452
  self.header.append(f"num_obs: {num_obs:>10} max_num_obs: {num_obs:>10}")
438
453
  stats_cols = [
439
454
  "prior_bias",
@@ -692,7 +707,8 @@ class ObsSequence:
692
707
  def collect_obs_types(header):
693
708
  """Create a dictionary for the observation types in the obs_seq header"""
694
709
  num_obs_types = int(header[2])
695
- types = dict([x.split() for x in header[3 : num_obs_types + 3]])
710
+ # The first line containing obs types is the 4th line in an obs_seq file.
711
+ types = {int(x.split()[0]): x.split()[1] for x in header[3 : num_obs_types + 3]}
696
712
  return types
697
713
 
698
714
  @staticmethod
@@ -856,18 +872,45 @@ class ObsSequence:
856
872
 
857
873
  # kind (type of observation) value
858
874
  obs.append("kind")
859
- record_length_bytes = f.read(4)
860
- record_length = struct.unpack("i", record_length_bytes)[0]
875
+ record_length = ObsSequence.read_record_length(f)
861
876
  record = f.read(record_length)
862
877
  kind = f"{struct.unpack('i', record)[0]}"
863
878
  obs.append(kind)
864
879
 
865
880
  ObsSequence.check_trailing_record_length(f, record_length)
866
881
 
882
+ # Skip metadata (obs_def) and go directly to the time record
883
+ while True:
884
+ pos = f.tell()
885
+ record_length = ObsSequence.read_record_length(f)
886
+ if record_length is None:
887
+ break # End of file
888
+
889
+ record = f.read(record_length)
890
+ # Check if this record is likely the "time" record (8 bytes, can be unpacked as two ints)
891
+ if record_length == 8:
892
+ try:
893
+ seconds, days = struct.unpack("ii", record)
894
+ # If unpack succeeds, this is the time record
895
+ f.seek(pos) # Seek back so the main loop can process it
896
+ break
897
+ except struct.error:
898
+ pass # Not the time record, keep skipping
899
+
900
+ ObsSequence.check_trailing_record_length(f, record_length)
901
+
867
902
  # time (seconds, days)
868
903
  record_length = ObsSequence.read_record_length(f)
869
904
  record = f.read(record_length)
870
- seconds, days = struct.unpack("ii", record)[:8]
905
+ try: # This is incase the record is not the time record because of metadata funkyness
906
+ seconds, days = struct.unpack("ii", record)
907
+ except struct.error as e:
908
+ print(
909
+ f"Reading observation {obs_num}... record length: {record_length} kind {kind}"
910
+ )
911
+ print(f"")
912
+ print(f"Error unpacking seconds and days: {e}")
913
+ raise
871
914
  time_string = f"{seconds} {days}"
872
915
  obs.append(time_string)
873
916
 
@@ -882,23 +925,27 @@ class ObsSequence:
882
925
 
883
926
  yield obs
884
927
 
885
- def composite_types(self, composite_types="use_default"):
928
+ def composite_types(self, composite_types="use_default", raise_on_duplicate=False):
886
929
  """
887
- Set up and construct composite types for the DataFrame.
930
+ Set up and construct composite observation types for the DataFrame.
888
931
 
889
- This function sets up composite types based on a provided YAML configuration or
932
+ This function sets up composite observation types based on a provided YAML configuration or
890
933
  a default configuration. It constructs new composite rows by combining specified
891
- components and adds them to the DataFrame.
934
+ components and adds them to the DataFrame in place.
892
935
 
893
936
  Args:
894
937
  composite_types (str, optional): The YAML configuration for composite types.
895
- If 'use_default', the default configuration is used. Otherwise, a custom YAML configuration can be provided.
938
+ If 'use_default', the default configuration is used. Otherwise, a custom YAML
939
+ configuration can be provided.
940
+ raise_on_duplicate (bool, optional): If True, raises an exception if there are
941
+ duplicates in the components. otherwise default False, deals with duplicates as though
942
+ they are distinct observations.
896
943
 
897
944
  Returns:
898
945
  pd.DataFrame: The updated DataFrame with the new composite rows added.
899
946
 
900
947
  Raises:
901
- Exception: If there are repeat values in the components.
948
+ Exception: If there are repeat values in the components and raise_on_duplicate = True
902
949
  """
903
950
 
904
951
  if composite_types == "use_default":
@@ -924,7 +971,10 @@ class ObsSequence:
924
971
  df = pd.DataFrame()
925
972
  for key in self.composite_types_dict:
926
973
  df_new = construct_composit(
927
- df_comp, key, self.composite_types_dict[key]["components"]
974
+ df_comp,
975
+ key,
976
+ self.composite_types_dict[key]["components"],
977
+ raise_on_duplicate,
928
978
  )
929
979
  df = pd.concat([df, df_new], axis=0)
930
980
 
@@ -1045,53 +1095,49 @@ class ObsSequence:
1045
1095
  if item in obs_sequences[0].qc_copie_names
1046
1096
  ]
1047
1097
 
1048
- combo.n_copies = len(combo.copie_names)
1049
- combo.n_qc = len(combo.qc_copie_names)
1050
- combo.n_non_qc = len(combo.non_qc_copie_names)
1051
-
1052
1098
  else:
1053
1099
  for obs_seq in obs_sequences:
1054
1100
  if not obs_sequences[0].df.columns.isin(obs_seq.df.columns).all():
1055
1101
  raise ValueError(
1056
1102
  "All observation sequences must have the same copies."
1057
1103
  )
1058
- combo.n_copies = obs_sequences[0].n_copies
1059
- combo.n_qc = obs_sequences[0].n_qc
1060
- combo.n_non_qc = obs_sequences[0].n_non_qc
1061
1104
  combo.copie_names = obs_sequences[0].copie_names
1105
+ combo.non_qc_copie_names = obs_sequences[0].non_qc_copie_names
1106
+ combo.qc_copie_names = obs_sequences[0].qc_copie_names
1107
+ combo.n_copies = len(combo.copie_names)
1062
1108
 
1063
1109
  # todo HK @todo combine synonyms for obs?
1064
1110
 
1065
1111
  # Initialize combined data
1066
- combined_types = []
1067
- combined_df = pd.DataFrame()
1068
- combo.all_obs = None # set to none to force writing from the dataframe if write_obs_seq is called
1112
+ combo.df = pd.DataFrame()
1069
1113
 
1070
1114
  # Iterate over the list of observation sequences and combine their data
1071
1115
  for obs_seq in obs_sequences:
1072
1116
  if copies:
1073
- combined_df = pd.concat(
1074
- [combined_df, obs_seq.df[requested_columns]], ignore_index=True
1117
+ combo.df = pd.concat(
1118
+ [combo.df, obs_seq.df[requested_columns]], ignore_index=True
1075
1119
  )
1076
1120
  else:
1077
- combined_df = pd.concat([combined_df, obs_seq.df], ignore_index=True)
1078
- combined_types.extend(list(obs_seq.reverse_types.keys()))
1079
-
1080
- # create dictionary of types
1081
- keys = set(combined_types)
1082
- combo.reverse_types = {item: i + 1 for i, item in enumerate(keys)}
1083
- combo.types = {v: k for k, v in combo.reverse_types.items()}
1084
-
1085
- # create linked list for obs
1086
- combo.df = combined_df.sort_values(by="time").reset_index(drop=True)
1087
- combo.df["linked_list"] = ObsSequence.generate_linked_list_pattern(
1088
- len(combo.df)
1089
- )
1090
- combo.df["obs_num"] = combined_df.index + 1
1091
- combo.create_header(len(combo.df))
1121
+ combo.df = pd.concat([combo.df, obs_seq.df], ignore_index=True)
1122
+
1123
+ # update ObsSequence attributes from the combined DataFrame
1124
+ combo.update_attributes_from_df()
1092
1125
 
1093
1126
  return combo
1094
1127
 
1128
+ @staticmethod
1129
+ def update_linked_list(df):
1130
+ """
1131
+ Sorts the DataFrame by 'time', resets the index, and adds/updates 'linked_list'
1132
+ and 'obs_num' columns in place.
1133
+ Modifies the input DataFrame directly.
1134
+ """
1135
+ df.sort_values(by="time", inplace=True, kind="stable")
1136
+ df.reset_index(drop=True, inplace=True)
1137
+ df["linked_list"] = ObsSequence.generate_linked_list_pattern(len(df))
1138
+ df["obs_num"] = df.index + 1
1139
+ return None
1140
+
1095
1141
  def has_assimilation_info(self):
1096
1142
  """
1097
1143
  Check if the DataFrame has prior information.
@@ -1134,6 +1180,100 @@ class ObsSequence:
1134
1180
  self.header.append(copie)
1135
1181
  self.header.append(f"first: 1 last: {n}")
1136
1182
 
1183
+ @staticmethod
1184
+ def replace_qc2_nan(df):
1185
+ """
1186
+ Replace MISSING_R8 values with NaNs in posterior columns for observations where
1187
+ DART_quality_control = 2 (posterior forward observation operators failed)
1188
+
1189
+ This causes these observations to be ignored in the calculations of posterior statistics
1190
+ """
1191
+ df.loc[df["DART_quality_control"] == 2.0, "posterior_ensemble_mean"] = np.nan
1192
+ df.loc[df["DART_quality_control"] == 2.0, "posterior_ensemble_spread"] = np.nan
1193
+ num_post_members = len(
1194
+ df.columns[df.columns.str.startswith("posterior_ensemble_member_")]
1195
+ )
1196
+ for i in range(1, num_post_members + 1):
1197
+ df.loc[
1198
+ df["DART_quality_control"] == 2.0,
1199
+ "posterior_ensemble_member_" + str(i),
1200
+ ] = np.nan
1201
+
1202
+ @staticmethod
1203
+ def revert_qc2_nan(df):
1204
+ """
1205
+ Revert NaNs back to MISSING_R8s for observations where DART_quality_control = 2
1206
+ (posterior forward observation operators failed)
1207
+ """
1208
+ df.loc[df["DART_quality_control"] == 2.0, "posterior_ensemble_mean"] = (
1209
+ -888888.000000
1210
+ )
1211
+ df.loc[df["DART_quality_control"] == 2.0, "posterior_ensemble_spread"] = (
1212
+ -888888.000000
1213
+ )
1214
+ num_post_members = len(
1215
+ df.columns[df.columns.str.startswith("posterior_ensemble_member_")]
1216
+ )
1217
+ for i in range(1, num_post_members + 1):
1218
+ df.loc[
1219
+ df["DART_quality_control"] == 2.0, "posterior_ensemble_member_" + str(i)
1220
+ ] = -888888.000000
1221
+
1222
+ def update_attributes_from_df(self):
1223
+ """
1224
+ Update all internal data (fields/properties) of the ObsSequence object that
1225
+ depend on the DataFrame (self.df).
1226
+ Call this after self.df is replaced or its structure changes.
1227
+
1228
+ Important:
1229
+
1230
+ Assumes copies are all columns between 'obs_num' and 'linked_list' (if present)
1231
+
1232
+ """
1233
+ # Update columns
1234
+ self.columns = list(self.df.columns)
1235
+
1236
+ # Update all_obs (list of lists, each row) @todo HK do we need this?
1237
+ self.all_obs = None
1238
+
1239
+ # Update copie_names, non_qc_copie_names, qc_copie_names, n_copies, n_non_qc, n_qc
1240
+ # Try to infer from columns if possible, else leave as is
1241
+ # Assume copies are all columns between 'obs_num' and 'linked_list' (if present)
1242
+ if "obs_num" in self.df.columns and "linked_list" in self.df.columns:
1243
+ obs_num_idx = self.df.columns.get_loc("obs_num")
1244
+ linked_list_idx = self.df.columns.get_loc("linked_list")
1245
+ self.copie_names = list(self.df.columns[obs_num_idx + 1 : linked_list_idx])
1246
+ else:
1247
+ # Fallback: use previous value or empty
1248
+ self.copie_names = getattr(self, "copie_names", [])
1249
+ self.n_copies = len(self.copie_names)
1250
+
1251
+ # Try to infer non_qc and qc copies from previous names if possible
1252
+ # Find qc copies first
1253
+ self.qc_copie_names = [c for c in self.copie_names if c in self.qc_copie_names]
1254
+ if self.qc_copie_names == []: # If no qc copies found, assume all are non-qc
1255
+ self.non_qc_copie_names = self.copie_names
1256
+ else: # pull out non-qc copies from the copie_names
1257
+ self.non_qc_copie_names = [
1258
+ c for c in self.copie_names if c not in self.qc_copie_names
1259
+ ]
1260
+ self.n_qc = len(self.qc_copie_names)
1261
+ self.n_non_qc = len(self.non_qc_copie_names)
1262
+
1263
+ # Update header and types and reverse_types
1264
+ self.create_header_from_dataframe()
1265
+
1266
+ # Update seq (generator should be empty or None if not from file)
1267
+ self.seq = []
1268
+ # Update loc_mod
1269
+ if "vertical" in self.df.columns:
1270
+ self.loc_mod = "loc3d"
1271
+ else:
1272
+ self.loc_mod = "loc1d"
1273
+
1274
+ # update linked list for obs and obs_nums
1275
+ ObsSequence.update_linked_list(self.df)
1276
+
1137
1277
 
1138
1278
  def load_yaml_to_dict(file_path):
1139
1279
  """
@@ -1164,24 +1304,31 @@ def convert_dart_time(seconds, days):
1164
1304
  return time
1165
1305
 
1166
1306
 
1167
- def construct_composit(df_comp, composite, components):
1307
+ def construct_composit(df_comp, composite, components, raise_on_duplicate):
1168
1308
  """
1169
- Construct a composite DataFrame by combining rows from two components.
1170
-
1171
- This function takes two DataFrames and combines rows from them based on matching
1172
- location and time. It creates a new row with a composite type by combining
1173
- specified columns using the square root of the sum of squares method.
1309
+ Creates a new DataFrame by combining pairs of rows from two specified component
1310
+ types in an observation DataFrame. It matches rows based on location and time,
1311
+ and then combines certain columns using the square root of the sum of squares
1312
+ of the components.
1174
1313
 
1175
1314
  Args:
1176
1315
  df_comp (pd.DataFrame): The DataFrame containing the component rows to be combined.
1177
1316
  composite (str): The type name for the new composite rows.
1178
1317
  components (list of str): A list containing the type names of the two components to be combined.
1318
+ raise_on_duplicate (bool): If False, raises an exception if there are duplicates in the components.
1319
+ otherwise deals with duplicates as though they are distinct observations.
1320
+
1179
1321
 
1180
1322
  Returns:
1181
1323
  merged_df (pd.DataFrame): A DataFrame containing the new composite rows.
1182
1324
  """
1325
+ # select rows for the two components
1326
+ if len(components) != 2:
1327
+ raise ValueError("components must be a list of two component types.")
1183
1328
  selected_rows = df_comp[df_comp["type"] == components[0].upper()]
1184
1329
  selected_rows_v = df_comp[df_comp["type"] == components[1].upper()]
1330
+ selected_rows = selected_rows.copy()
1331
+ selected_rows_v = selected_rows_v.copy()
1185
1332
 
1186
1333
  prior_columns_to_combine = df_comp.filter(regex="prior_ensemble").columns.tolist()
1187
1334
  posterior_columns_to_combine = df_comp.filter(
@@ -1192,7 +1339,7 @@ def construct_composit(df_comp, composite, components):
1192
1339
  + posterior_columns_to_combine
1193
1340
  + ["observation", "obs_err_var"]
1194
1341
  )
1195
- merge_columns = ["latitude", "longitude", "vertical", "time"]
1342
+ merge_columns = ["latitude", "longitude", "vertical", "time"] # @todo HK 1d or 3d
1196
1343
  same_obs_columns = merge_columns + [
1197
1344
  "observation",
1198
1345
  "obs_err_var",
@@ -1202,15 +1349,25 @@ def construct_composit(df_comp, composite, components):
1202
1349
  selected_rows[same_obs_columns].duplicated().sum() > 0
1203
1350
  or selected_rows_v[same_obs_columns].duplicated().sum() > 0
1204
1351
  ):
1205
- print(
1206
- f"{selected_rows[same_obs_columns].duplicated().sum()} duplicates in {composite} component {components[0]}: "
1207
- )
1208
- print(f"{selected_rows[same_obs_columns]}")
1209
- print(
1210
- f"{selected_rows_v[same_obs_columns].duplicated().sum()} duplicates in {composite} component {components[0]}: "
1211
- )
1212
- print(f"{selected_rows_v[same_obs_columns]}")
1213
- raise Exception("There are duplicates in the components.")
1352
+
1353
+ if raise_on_duplicate:
1354
+ print(
1355
+ f"{selected_rows[same_obs_columns].duplicated().sum()} duplicates in {composite} component {components[0]}: "
1356
+ )
1357
+ print(f"{selected_rows[same_obs_columns]}")
1358
+ print(
1359
+ f"{selected_rows_v[same_obs_columns].duplicated().sum()} duplicates in {composite} component {components[0]}: "
1360
+ )
1361
+ print(f"{selected_rows_v[same_obs_columns]}")
1362
+ raise Exception("There are duplicates in the components.")
1363
+
1364
+ else:
1365
+ selected_rows["dup_num"] = selected_rows.groupby(
1366
+ same_obs_columns
1367
+ ).cumcount()
1368
+ selected_rows_v["dup_num"] = selected_rows_v.groupby(
1369
+ same_obs_columns
1370
+ ).cumcount()
1214
1371
 
1215
1372
  # Merge the two DataFrames on location and time columns
1216
1373
  merged_df = pd.merge(
@@ -1227,4 +1384,7 @@ def construct_composit(df_comp, composite, components):
1227
1384
  columns=[col for col in merged_df.columns if col.endswith("_v")]
1228
1385
  )
1229
1386
 
1387
+ if "dup_num" in merged_df.columns:
1388
+ merged_df = merged_df.drop(columns=["dup_num"])
1389
+
1230
1390
  return merged_df
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pydartdiags
3
- Version: 0.6.0
3
+ Version: 0.6.1
4
4
  Summary: Observation Sequence Diagnostics for DART
5
5
  Home-page: https://github.com/NCAR/pyDARTdiags.git
6
6
  Author: Helen Kershaw
@@ -3,13 +3,13 @@ pydartdiags/matplots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hS
3
3
  pydartdiags/matplots/matplots.py,sha256=Bo0TTz1gvsHEvTfTfLfdTi_3hNRN1okmyY5a5yYgtzk,13455
4
4
  pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
5
  pydartdiags/obs_sequence/composite_types.yaml,sha256=PVLMU6x6KcVMCwPB-U65C_e0YQUemfqUhYMpf1DhFOY,917
6
- pydartdiags/obs_sequence/obs_sequence.py,sha256=5HfqOPoF2DyZQrUiGrYEwLJ9Iewe5DIzq0pdxR3bsnk,48037
6
+ pydartdiags/obs_sequence/obs_sequence.py,sha256=szxASzecTcJzP2rEqssRo9VHw26nwpZ7W9Yi6sTbbHI,55112
7
7
  pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
8
8
  pydartdiags/plots/plots.py,sha256=U7WQjE_qN-5a8-85D-PkkgILSFBzTJQ1mcGBa7l5DHI,6464
9
9
  pydartdiags/stats/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
10
10
  pydartdiags/stats/stats.py,sha256=a88VuLoHOlhbjYjnrVPHVNnhiDx-4B3YA1jbc6FUSyU,20193
11
- pydartdiags-0.6.0.dist-info/licenses/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
12
- pydartdiags-0.6.0.dist-info/METADATA,sha256=ZeVGK6hTX2tgIiedCVcavDPn195yCh8LO9-ziliePog,2381
13
- pydartdiags-0.6.0.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
- pydartdiags-0.6.0.dist-info/top_level.txt,sha256=LfMoPLnSd0VhhlWev1eeX9t6AzvyASOloag0LO_ppWg,12
15
- pydartdiags-0.6.0.dist-info/RECORD,,
11
+ pydartdiags-0.6.1.dist-info/licenses/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
12
+ pydartdiags-0.6.1.dist-info/METADATA,sha256=AeuLMziCQas1vggEwAKD6CEfdadxwoSDWEu-Fgwaix0,2381
13
+ pydartdiags-0.6.1.dist-info/WHEEL,sha256=_zCd3N1l69ArxyTb8rzEoP9TpbYXkqRFSNOD5OuxnTs,91
14
+ pydartdiags-0.6.1.dist-info/top_level.txt,sha256=LfMoPLnSd0VhhlWev1eeX9t6AzvyASOloag0LO_ppWg,12
15
+ pydartdiags-0.6.1.dist-info/RECORD,,