pydartdiags 0.0.3b0__py3-none-any.whl → 0.0.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of pydartdiags might be problematic. Click here for more details.

@@ -5,37 +5,38 @@ import os
5
5
  import yaml
6
6
 
7
7
  class obs_sequence:
8
- """Create an obs_sequence object from an ascii observation
9
- sequence file.
10
-
11
- Attributes:
12
-
13
- df : pandas Dataframe containing all the observations
14
- all_obs : list of all observations, each observation is a list
15
- header : header from the ascii file
16
- vert : dictionary of dart vertical units
17
- types : dictionary of types in the observation sequence file
18
- copie_names : names of copies in the observation sequence file.
19
- Spelled copie to avoid conflict with python built-in copy function.
20
- Spaces are replaced with underscores in copie_names.
21
- file : the input observation sequence ascii file
22
-
23
- usage:
24
- Read the observation sequence from file:
25
- obs_seq = obs_sequence('/home/data/obs_seq.final.ascii.small')
26
- Access the resulting pandas dataFrame:
27
- obs_seq.df
28
-
29
- For 3D sphere models: latitude and longitude are in degrees in the DataFrame
30
- sq_err = (mean-obs)**2
31
- bias = (mean-obs)
32
-
33
- rmse = sqrt( sum((mean-obs)**2)/n )
34
- bias = sum((mean-obs)/n)
35
- spread = sum(sd)
36
- totalspread = sqrt(sum(sd+obs_err_var))
37
-
38
-
8
+ """Create an obs_sequence object from an ascii observation sequence file.
9
+
10
+ Attributes:
11
+ df (pandas.DataFrame): DataFrame containing all the observations.
12
+ all_obs (list): List of all observations, each observation is a list.
13
+ header (str): Header from the ascii file.
14
+ vert (dict): Dictionary of dart vertical units.
15
+ types (dict): Dictionary of types in the observation sequence file.
16
+ copie_names (list): Names of copies in the observation sequence file.
17
+ Spelled 'copie' to avoid conflict with the Python built-in copy function.
18
+ Spaces are replaced with underscores in copie_names.
19
+
20
+ Parameters:
21
+ file : the input observation sequence ascii file
22
+
23
+ Example:
24
+ Read the observation sequence from file:
25
+ ``obs_seq = obs_sequence('/home/data/obs_seq.final.ascii.small')``
26
+ Access the resulting pandas DataFrame:
27
+ ``obs_seq.df``
28
+
29
+ For 3D sphere models: latitude and longitude are in degrees in the DataFrame
30
+
31
+ Calculations:
32
+
33
+ - sq_err = (mean-obs)**2
34
+ - bias = (mean-obs)
35
+ - rmse = sqrt( sum((mean-obs)**2)/n )
36
+ - bias = sum((mean-obs)/n)
37
+ - spread = sum(sd)
38
+ - totalspread = sqrt(sum(sd+obs_err_var))
39
+
39
40
  """
40
41
  ## static variables
41
42
  # vertrical coordinate:
@@ -101,6 +102,7 @@ class obs_sequence:
101
102
 
102
103
  def obs_to_list(self, obs):
103
104
  """put single observation into a list
105
+
104
106
  discards obs_def
105
107
  """
106
108
  data = []
@@ -170,7 +172,7 @@ class obs_sequence:
170
172
  def write_obs_seq(self, file, df=None):
171
173
  """
172
174
  Write the observation sequence to a file.
173
-
175
+
174
176
  This function writes the observation sequence to disk.
175
177
  If no DataFrame is provided, it writes the obs_sequence object to a file using the
176
178
  header and all observations stored in the object.
@@ -178,19 +180,17 @@ class obs_sequence:
178
180
  then writes the DataFrame obs to an obs_sequence file. Note the DataFrame is assumed
179
181
  to have been created from obs_sequence object.
180
182
 
181
-
183
+
182
184
  Parameters:
183
- file (str): The path to the file where the observation sequence will be written.
184
- df (pandas.DataFrame, optional): A DataFrame containing the observation data.
185
- If not provided, the function uses self.header
186
- and self.all_obs.
187
-
185
+ file (str): The path to the file where the observation sequence will be written.
186
+ df (pandas.DataFrame, optional): A DataFrame containing the observation data. If not provided, the function uses self.header and self.all_obs.
187
+
188
188
  Returns:
189
- None
190
-
191
- Usage:
192
- obs_seq.write_obs_seq('/path/to/output/file')
193
- obs_seq.write_obs_seq('/path/to/output/file', df=obs_seq.df)
189
+ None
190
+
191
+ Examples:
192
+ ``obs_seq.write_obs_seq('/path/to/output/file')``
193
+ ``obs_seq.write_obs_seq('/path/to/output/file', df=obs_seq.df)``
194
194
  """
195
195
  with open(file, 'w') as f:
196
196
 
@@ -281,14 +281,13 @@ class obs_sequence:
281
281
  """
282
282
  Extracts the names of the copies from the header of an obs_seq file.
283
283
 
284
-
285
284
  Parameters:
286
- header (list): A list of strings representing the lines in the header of the obs_seq file.
285
+ header (list): A list of strings representing the lines in the header of the obs_seq file.
287
286
 
288
287
  Returns:
289
- tuple: A tuple containing two elements:
290
- - copie_names (list): A list of strings representing the copy names with _ for spaces.
291
- - len(copie_names) (int): The number of copy names.
288
+ tuple: A tuple containing two elements:
289
+ - copie_names (list): A list of strings representing the copy names with underscores for spaces.
290
+ - len(copie_names) (int): The number of copy names.
292
291
  """
293
292
  for i, line in enumerate(header):
294
293
  if "num_obs:" in line and "max_num_obs:" in line:
@@ -348,15 +347,13 @@ class obs_sequence:
348
347
  components and adds them to the DataFrame.
349
348
 
350
349
  Parameters:
351
- composite_types (str, optional): The YAML configuration for composite types.
352
- If 'use_default', the default configuration is used.
353
- Otherwise, a custom YAML configuration can be provided.
350
+ composite_types (str, optional): The YAML configuration for composite types. If 'use_default', the default configuration is used. Otherwise, a custom YAML configuration can be provided.
354
351
 
355
352
  Returns:
356
- pd.DataFrame: The updated DataFrame with the new composite rows added.
353
+ pd.DataFrame: The updated DataFrame with the new composite rows added.
357
354
 
358
355
  Raises:
359
- Exception: If there are repeat values in the components.
356
+ Exception: If there are repeat values in the components.
360
357
  """
361
358
 
362
359
  if composite_types == 'use_default':
@@ -386,10 +383,10 @@ def load_yaml_to_dict(file_path):
386
383
  Load a YAML file and convert it to a dictionary.
387
384
 
388
385
  Parameters:
389
- - file_path (str): The path to the YAML file.
386
+ file_path (str): The path to the YAML file.
390
387
 
391
388
  Returns:
392
- - dict: The YAML file content as a dictionary.
389
+ dict: The YAML file content as a dictionary.
393
390
  """
394
391
  try:
395
392
  with open(file_path, 'r') as file:
@@ -402,8 +399,9 @@ def load_yaml_to_dict(file_path):
402
399
  def convert_dart_time(seconds, days):
403
400
  """covert from seconds, days after 1601 to datetime object
404
401
 
405
- base year for Gregorian calendar is 1601
406
- dart time is seconds, days since 1601
402
+ Note:
403
+ - base year for Gregorian calendar is 1601
404
+ - dart time is seconds, days since 1601
407
405
  """
408
406
  time = dt.datetime(1601,1,1) + dt.timedelta(days=days, seconds=seconds)
409
407
  return time
@@ -413,14 +411,14 @@ def select_by_dart_qc(df, dart_qc):
413
411
  Selects rows from a DataFrame based on the DART quality control flag.
414
412
 
415
413
  Parameters:
416
- df (DataFrame): A pandas DataFrame.
417
- dart_qc (int): The DART quality control flag to select.
414
+ df (DataFrame): A pandas DataFrame.
415
+ dart_qc (int): The DART quality control flag to select.
418
416
 
419
417
  Returns:
420
- DataFrame: A DataFrame containing only the rows with the specified DART quality control flag.
418
+ DataFrame: A DataFrame containing only the rows with the specified DART quality control flag.
421
419
 
422
420
  Raises:
423
- ValueError: If the DART quality control flag is not present in the DataFrame.
421
+ ValueError: If the DART quality control flag is not present in the DataFrame.
424
422
  """
425
423
  if dart_qc not in df['DART_quality_control'].unique():
426
424
  raise ValueError(f"DART quality control flag '{dart_qc}' not found in DataFrame.")
@@ -432,10 +430,10 @@ def select_failed_qcs(df):
432
430
  Selects rows from a DataFrame where the DART quality control flag is greater than 0.
433
431
 
434
432
  Parameters:
435
- df (DataFrame): A pandas DataFrame.
433
+ df (DataFrame): A pandas DataFrame.
436
434
 
437
435
  Returns:
438
- DataFrame: A DataFrame containing only the rows with a DART quality control flag greater than 0.
436
+ DataFrame: A DataFrame containing only the rows with a DART quality control flag greater than 0.
439
437
  """
440
438
  return df[df['DART_quality_control'] > 0]
441
439
 
@@ -450,14 +448,14 @@ def possible_vs_used(df):
450
448
  used observations.
451
449
 
452
450
  Parameters:
453
- - df (pd.DataFrame): A DataFrame with at least two columns: 'type' for the observation type and 'observation'
454
- for the observation data. It may also contain other columns required by the `select_failed_qcs` function
455
- to determine failed quality control checks.
451
+ df (pd.DataFrame): A DataFrame with at least two columns: 'type' for the observation type and 'observation'
452
+ for the observation data. It may also contain other columns required by the `select_failed_qcs` function
453
+ to determine failed quality control checks.
456
454
 
457
455
  Returns:
458
- - pd.DataFrame: A DataFrame with three columns: 'type', 'possible', and 'used'. 'type' is the observation type,
459
- 'possible' is the count of all observations of that type, and 'used' is the count of observations of that type
460
- that passed quality control checks.
456
+ pd.DataFrame: A DataFrame with three columns: 'type', 'possible', and 'used'. 'type' is the observation type,
457
+ 'possible' is the count of all observations of that type, and 'used' is the count of observations of that type
458
+ that passed quality control checks.
461
459
 
462
460
  """
463
461
  possible = df.groupby('type')['observation'].count()
@@ -476,12 +474,12 @@ def construct_composit(df_comp, composite, components):
476
474
  specified columns using the square root of the sum of squares method.
477
475
 
478
476
  Parameters:
479
- df_comp (pd.DataFrame): The DataFrame containing the component rows to be combined.
480
- composite (str): The type name for the new composite rows.
481
- components (list of str): A list containing the type names of the two components to be combined.
477
+ df_comp (pd.DataFrame): The DataFrame containing the component rows to be combined.
478
+ composite (str): The type name for the new composite rows.
479
+ components (list of str): A list containing the type names of the two components to be combined.
482
480
 
483
481
  Returns:
484
- merged_df (pd.DataFrame): The updated DataFrame with the new composite rows added.
482
+ merged_df (pd.DataFrame): The updated DataFrame with the new composite rows added.
485
483
  """
486
484
  selected_rows = df_comp[df_comp['type'] == components[0].upper()]
487
485
  selected_rows_v = df_comp[df_comp['type'] == components[1].upper()]
@@ -6,6 +6,7 @@ import pandas as pd
6
6
  def plot_rank_histogram(df):
7
7
  """
8
8
  Plots a rank histogram colored by observation type.
9
+
9
10
  All histogram bars are initalized to be hidden and can be toggled visible in the plot's legend
10
11
  """
11
12
  _, _, df_hist = calculate_rank(df)
@@ -27,12 +28,12 @@ def calculate_rank(df):
27
28
  size plus one.
28
29
 
29
30
  Parameters:
30
- - df (pd.DataFrame): A DataFrame with columns for mean, standard deviation, observed values,
31
- ensemble size, and observation type. The DataFrame should have one row per observation.
31
+ df (pd.DataFrame): A DataFrame with columns for mean, standard deviation, observed values,
32
+ ensemble size, and observation type. The DataFrame should have one row per observation.
32
33
 
33
34
  Returns:
34
- - tuple: A tuple containing the rank array, ensemble size, and a result DataFrame. The result
35
- DataFrame contains columns for 'rank' and 'obstype'.
35
+ tuple: A tuple containing the rank array, ensemble size, and a result DataFrame. The result
36
+ DataFrame contains columns for 'rank' and 'obstype'.
36
37
  """
37
38
  ensemble_values = df.filter(regex='prior_ensemble_member').to_numpy().copy()
38
39
  std_dev = np.sqrt(df['obs_err_var']).to_numpy()
@@ -72,24 +73,24 @@ def plot_profile(df, levels):
72
73
  the vertical profile in the atmosphere correctly.
73
74
 
74
75
  Parameters:
75
- - df (pd.DataFrame): The input DataFrame containing at least the 'vertical' column for pressure levels,
76
- and other columns required by the `rmse_bias` function for calculating RMSE and Bias.
77
- - levels (array-like): The bin edges for categorizing the 'vertical' column values into pressure levels.
76
+ df (pd.DataFrame): The input DataFrame containing at least the 'vertical' column for pressure levels,
77
+ and other columns required by the `rmse_bias` function for calculating RMSE and Bias.
78
+ levels (array-like): The bin edges for categorizing the 'vertical' column values into pressure levels.
78
79
 
79
80
  Returns:
80
- - tuple: A tuple containing the DataFrame with RMSE and Bias calculations, the RMSE plot figure, and the
81
- Bias plot figure. The DataFrame includes a 'plevels' column representing the categorized pressure levels
82
- and 'hPa' column representing the midpoint of each pressure level bin.
81
+ tuple: A tuple containing the DataFrame with RMSE and Bias calculations, the RMSE plot figure, and the
82
+ Bias plot figure. The DataFrame includes a 'plevels' column representing the categorized pressure levels
83
+ and 'hPa' column representing the midpoint of each pressure level bin.
83
84
 
84
85
  Raises:
85
- - ValueError: If there are missing values in the 'vertical' column of the input DataFrame.
86
+ ValueError: If there are missing values in the 'vertical' column of the input DataFrame.
86
87
 
87
88
  Note:
88
- - The function modifies the input DataFrame by adding 'plevels' and 'hPa' columns.
89
- - The 'hPa' values are calculated as half the midpoint of each pressure level bin, which may need
90
- adjustment based on the specific requirements for pressure level representation.
91
- - The plots are generated using Plotly Express and are displayed inline. The y-axis of the plots is
92
- reversed to align with standard atmospheric pressure level representation.
89
+ - The function modifies the input DataFrame by adding 'plevels' and 'hPa' columns.
90
+ - The 'hPa' values are calculated as half the midpoint of each pressure level bin, which may need
91
+ adjustment based on the specific requirements for pressure level representation.
92
+ - The plots are generated using Plotly Express and are displayed inline. The y-axis of the plots is
93
+ reversed to align with standard atmospheric pressure level representation.
93
94
  """
94
95
 
95
96
  pd.options.mode.copy_on_write = True
@@ -116,14 +117,14 @@ def mean_then_sqrt(x):
116
117
  Calculates the mean of an array-like object and then takes the square root of the result.
117
118
 
118
119
  Parameters:
119
- arr (array-like): An array-like object (such as a list or a pandas Series).
120
- The elements should be numeric.
120
+ arr (array-like): An array-like object (such as a list or a pandas Series).
121
+ The elements should be numeric.
121
122
 
122
123
  Returns:
123
- float: The square root of the mean of the input array.
124
+ float: The square root of the mean of the input array.
124
125
 
125
126
  Raises:
126
- TypeError: If the input is not an array-like object containing numeric values.
127
+ TypeError: If the input is not an array-like object containing numeric values.
127
128
  """
128
129
  return np.sqrt(np.mean(x))
129
130
 
@@ -139,14 +140,14 @@ def rmse_bias_by_obs_type(df, obs_type):
139
140
  Calculate the RMSE and bias for a given observation type.
140
141
 
141
142
  Parameters:
142
- df (DataFrame): A pandas DataFrame.
143
- obs_type (str): The observation type for which to calculate the RMSE and bias.
143
+ df (DataFrame): A pandas DataFrame.
144
+ obs_type (str): The observation type for which to calculate the RMSE and bias.
144
145
 
145
146
  Returns:
146
- DataFrame: A DataFrame containing the RMSE and bias for the given observation type.
147
+ DataFrame: A DataFrame containing the RMSE and bias for the given observation type.
147
148
 
148
149
  Raises:
149
- ValueError: If the observation type is not present in the DataFrame.
150
+ ValueError: If the observation type is not present in the DataFrame.
150
151
  """
151
152
  if obs_type not in df['type'].unique():
152
153
  raise ValueError(f"Observation type '{obs_type}' not found in DataFrame.")
@@ -1,9 +1,10 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: pydartdiags
3
- Version: 0.0.3b0
3
+ Version: 0.0.4
4
4
  Summary: Observation Sequence Diagnostics for DART
5
5
  Project-URL: Homepage, https://github.com/NCAR/pyDARTdiags.git
6
6
  Project-URL: Issues, https://github.com/NCAR/pyDARTdiags/issues
7
+ Project-URL: Documentation, https://ncar.github.io/pyDARTdiags
7
8
  Author-email: Helen Kershaw <hkershaw@ucar.edu>
8
9
  License-File: LICENSE
9
10
  Classifier: License :: OSI Approved :: Apache Software License
@@ -0,0 +1,11 @@
1
+ pydartdiags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
+ pydartdiags/obs_sequence/composite_types.yaml,sha256=PVLMU6x6KcVMCwPB-U65C_e0YQUemfqUhYMpf1DhFOY,917
4
+ pydartdiags/obs_sequence/obs_sequence.py,sha256=-TIUBfr8WGEWvEp94wlS6twNC7TNPMXKP3jrQ9isOiE,21774
5
+ pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
+ pydartdiags/plots/plots.py,sha256=8Tp1huRjnOPx24cFt8F92NwMKWUluGA-Ha8ixx6rCWk,6675
7
+ pydartdiags/plots/tests/test_rank_histogram.py,sha256=qfws9oX6Sj0BwO3aFUa74smeHfHxzSR3-TloT4C8D_4,495
8
+ pydartdiags-0.0.4.dist-info/METADATA,sha256=WmlqzRe1w3saI3CLlz0uIkaijJAGBl3Gl0emF16IlS4,9166
9
+ pydartdiags-0.0.4.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
+ pydartdiags-0.0.4.dist-info/licenses/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
11
+ pydartdiags-0.0.4.dist-info/RECORD,,
@@ -1,11 +0,0 @@
1
- pydartdiags/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
- pydartdiags/obs_sequence/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
3
- pydartdiags/obs_sequence/composite_types.yaml,sha256=PVLMU6x6KcVMCwPB-U65C_e0YQUemfqUhYMpf1DhFOY,917
4
- pydartdiags/obs_sequence/obs_sequence.py,sha256=dDGPfYxqKH9elP-Ey7QmmH3t3J7VqScdxJKA34nPddo,21716
5
- pydartdiags/plots/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
6
- pydartdiags/plots/plots.py,sha256=m5Y-wTxl3CW3l2VGGEuBgrQVyyrFNKOXrVCmD2XxiuQ,6600
7
- pydartdiags/plots/tests/test_rank_histogram.py,sha256=qfws9oX6Sj0BwO3aFUa74smeHfHxzSR3-TloT4C8D_4,495
8
- pydartdiags-0.0.3b0.dist-info/METADATA,sha256=hEQMpTQ83fjvW7eL3dKIuLcNu4OC51jCN3d_a7Y72h0,9105
9
- pydartdiags-0.0.3b0.dist-info/WHEEL,sha256=1yFddiXMmvYK7QYTqtRNtX66WJ0Mz8PYEiEUoOUUxRY,87
10
- pydartdiags-0.0.3b0.dist-info/licenses/LICENSE,sha256=ROglds_Eg_ylXp-1MHmEawDqMw_UsCB4r9sk7z9PU9M,11377
11
- pydartdiags-0.0.3b0.dist-info/RECORD,,