seabirdfilehandler 0.6.0__tar.gz → 0.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of seabirdfilehandler might be problematic. Click here for more details.

Files changed (17) hide show
  1. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/PKG-INFO +1 -1
  2. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/pyproject.toml +1 -1
  3. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/__init__.py +2 -2
  4. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/cnvfile.py +20 -25
  5. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/datafiles.py +8 -8
  6. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/file_collection.py +26 -0
  7. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/parameter.py +15 -11
  8. seabirdfilehandler-0.7.0/src/seabirdfilehandler/processing_steps.py +196 -0
  9. seabirdfilehandler-0.6.0/src/seabirdfilehandler/validation_modules.py +0 -155
  10. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/LICENSE +0 -0
  11. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/README.md +0 -0
  12. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/bottlefile.py +0 -0
  13. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/bottlelogfile.py +0 -0
  14. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/geomar_ctd_file_parser.py +0 -0
  15. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/hexfile.py +0 -0
  16. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/utils.py +0 -0
  17. {seabirdfilehandler-0.6.0 → seabirdfilehandler-0.7.0}/src/seabirdfilehandler/xmlfiles.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: seabirdfilehandler
3
- Version: 0.6.0
3
+ Version: 0.7.0
4
4
  Summary: Library of parsers to interact with SeaBird CTD files.
5
5
  Keywords: CTD,parser,seabird,data
6
6
  Author: Emil Michels
@@ -20,7 +20,7 @@ urls.homepage = "https://ctd-software.pages.io-warnemuende.de/seabirdfilehandler
20
20
  urls.repository = "https://git.io-warnemuende.de/CTD-Software/SeabirdFileHandler"
21
21
  urls.documentation = "https://ctd-software.pages.io-warnemuende.de/seabirdfilehandler"
22
22
  dynamic = []
23
- version = "0.6.0"
23
+ version = "0.7.0"
24
24
 
25
25
  [tool.poetry]
26
26
 
@@ -1,10 +1,10 @@
1
1
  from .datafiles import *
2
2
  from .bottlefile import *
3
3
  from .bottlelogfile import *
4
+ from .parameter import *
5
+ from .processing_steps import *
4
6
  from .cnvfile import *
5
7
  from .xmlfiles import *
6
8
  from .hexfile import *
7
- from .validation_modules import *
8
9
  from .geomar_ctd_file_parser import *
9
- from .parameter import *
10
10
  from .file_collection import *
@@ -3,9 +3,7 @@ from datetime import datetime, timedelta
3
3
  import pandas as pd
4
4
  import numpy as np
5
5
  import logging
6
- from seabirdfilehandler import DataFile
7
- from seabirdfilehandler.parameter import Parameters
8
- from seabirdfilehandler.validation_modules import CnvValidationList
6
+ from seabirdfilehandler import DataFile, Parameters, CnvProcessingSteps
9
7
 
10
8
  logger = logging.getLogger(__name__)
11
9
 
@@ -58,11 +56,11 @@ class CnvFile(DataFile):
58
56
  coordinate_columns: bool = False,
59
57
  ):
60
58
  super().__init__(path_to_file, only_header)
61
- self.validation_modules = self.obtaining_validation_modules()
62
- self.start_time = self.reading_start_time()
59
+ self.processing_steps = self.get_processing_step_infos()
63
60
  self.parameters = Parameters(
64
61
  self.data, self.data_table_description, only_header
65
62
  )
63
+ self.start_time = self.reading_start_time()
66
64
  if create_dataframe:
67
65
  self.df = self.create_dataframe()
68
66
  if absolute_time_calculation:
@@ -79,21 +77,15 @@ class CnvFile(DataFile):
79
77
  self.df = self.parameters.get_pandas_dataframe()
80
78
  return self.df
81
79
 
82
- def reading_start_time(
83
- self,
84
- time_source: str = "System UTC",
85
- ) -> datetime | None:
80
+ def reading_start_time(self) -> datetime | None:
86
81
  """
87
82
  Extracts the Cast start time from the metadata header.
88
83
  """
89
- for line in self.sbe9_data:
90
- if line.startswith(time_source):
91
- start_time = line.split("=")[1]
92
- start_time = datetime.strptime(
93
- start_time, " %b %d %Y %H:%M:%S "
94
- )
95
- return start_time
96
- return None
84
+ start_time = (
85
+ self.parameters.data_table_misc["start_time"].split("[")[0].strip()
86
+ )
87
+ start_time = datetime.strptime(start_time, "%b %d %Y %H:%M:%S")
88
+ return start_time
97
89
 
98
90
  def absolute_time_calculation(self) -> bool:
99
91
  """
@@ -134,13 +126,12 @@ class CnvFile(DataFile):
134
126
  return True
135
127
  return False
136
128
 
137
- def obtaining_validation_modules(self) -> CnvValidationList:
129
+ def get_processing_step_infos(self) -> CnvProcessingSteps:
138
130
  """
139
131
  Collects the individual validation modules and their respective
140
132
  information, usually present in key-value pairs.
141
133
  """
142
- validation_modules = self.processing_info
143
- return CnvValidationList(validation_modules)
134
+ return CnvProcessingSteps(self.processing_info)
144
135
 
145
136
  def df2cnv(self, df: pd.DataFrame | None = None) -> list:
146
137
  """
@@ -212,6 +203,7 @@ class CnvFile(DataFile):
212
203
  def _update_header(self):
213
204
  """Re-creates the cnv header."""
214
205
  self.data_table_description = self.parameters._form_data_table_info()
206
+ self.processing_info = self.processing_steps._form_processing_info()
215
207
  self.header = [
216
208
  *[f"* {data}" for data in self.sbe9_data[:-1]],
217
209
  *[f"** {data}" for data in self.metadata_list],
@@ -221,6 +213,8 @@ class CnvFile(DataFile):
221
213
  *[f"# {data}" for data in self.processing_info],
222
214
  "*END*\n",
223
215
  ]
216
+ self.data = self.array2cnv()
217
+ self.file_data = [*self.header, *self.data]
224
218
 
225
219
  def add_processing_metadata(self, addition: str | list):
226
220
  """
@@ -232,13 +226,14 @@ class CnvFile(DataFile):
232
226
  the new information line
233
227
 
234
228
  """
235
- # TODO: use CnvprocessingList here
236
229
  if isinstance(addition, str):
237
230
  addition = [addition]
238
- for line in addition:
239
- self.file_data.append(line)
240
- # add the new info line *before* the 'file_type = ascii' line
241
- self.processing_info.insert(-1, line)
231
+ self.processing_steps.append(
232
+ self.processing_steps.create_step_instance(
233
+ module=addition[0].split("_")[0], raw_info=addition
234
+ )
235
+ )
236
+ self._update_header()
242
237
 
243
238
  def add_station_and_event_column(self) -> bool:
244
239
  """
@@ -77,27 +77,27 @@ class DataFile:
77
77
  file. Lists and Dictionaries are the data structures of choice. Uses
78
78
  basic prefix checking to distinguish different header information.
79
79
  """
80
- past_sensors = False
80
+ past_bad_flag = False
81
81
  with self.path_to_file.open("r", encoding="latin-1") as file:
82
82
  for line in file:
83
83
  self.raw_file_data.append(line)
84
- line_prefix = line[:2]
85
- if line_prefix == "* ":
84
+ if line.startswith("* "):
86
85
  self.header.append(line)
87
86
  self.sbe9_data.append(line[2:])
88
- elif line_prefix == "**":
87
+ elif line.startswith("**"):
89
88
  self.header.append(line)
90
89
  self.metadata_list.append(line[3:])
91
- elif line_prefix == "# ":
90
+ elif line.startswith("#"):
92
91
  self.header.append(line)
93
- if line[2:].strip()[0] == "<":
92
+ if line[2:].strip().startswith("<"):
94
93
  self.sensor_data.append(line[2:])
95
- past_sensors = True
96
94
  else:
97
- if past_sensors:
95
+ if past_bad_flag:
98
96
  self.processing_info.append(line[2:])
99
97
  else:
100
98
  self.data_table_description.append(line[2:])
99
+ if line.startswith("# bad_flag"):
100
+ past_bad_flag = True
101
101
  else:
102
102
  if line.startswith("*END*"):
103
103
  self.header.append(line)
@@ -3,6 +3,7 @@ from pathlib import Path
3
3
  import logging
4
4
  from collections import UserList
5
5
  from typing import Callable, Type
6
+ import warnings
6
7
  import pandas as pd
7
8
  import numpy as np
8
9
  from seabirdfilehandler import (
@@ -353,12 +354,15 @@ class CnvCollection(FileCollection):
353
354
  *args,
354
355
  **kwargs,
355
356
  ):
357
+ if len(args) < 3 and "file_suffix" not in kwargs:
358
+ kwargs["file_suffix"] = "cnv"
356
359
  super().__init__(*args, **kwargs)
357
360
  self.data_meta_info = self.get_data_table_meta_info()
358
361
  self.sensor_data = get_unique_sensor_data(
359
362
  [file.sensors for file in self.data]
360
363
  )
361
364
  self.array = self.get_array()
365
+ self.processing_steps = self.get_processing_steps()
362
366
 
363
367
  def get_dataframes(
364
368
  self,
@@ -430,6 +434,26 @@ class CnvCollection(FileCollection):
430
434
  [file.parameters.create_full_ndarray() for file in self.data]
431
435
  )
432
436
 
437
+ def get_processing_steps(self) -> list:
438
+ """
439
+ Checks the processing steps in the different files for consistency.
440
+ Returns the steps of the first file, which should be the same as for
441
+ all other files.
442
+
443
+ Returns
444
+ -------
445
+ A list of ProcessingSteps.
446
+ """
447
+ individual_processing_steps = [
448
+ file.processing_steps for file in self.data
449
+ ]
450
+ for index, step_info in enumerate(individual_processing_steps):
451
+ if step_info != individual_processing_steps[0]:
452
+ message = f"The processing steps conducted on these files differ. First occurence between index 0 and {index}."
453
+ warnings.warn(message)
454
+ logger.warning(message)
455
+ return individual_processing_steps[0]
456
+
433
457
 
434
458
  class HexCollection(FileCollection):
435
459
  """
@@ -445,6 +469,8 @@ class HexCollection(FileCollection):
445
469
  path_to_xmlcons: Path | str = "",
446
470
  **kwargs,
447
471
  ):
472
+ if len(args) < 3 and "file_suffix" not in kwargs:
473
+ kwargs["file_suffix"] = "hex"
448
474
  # force only_metadata, as the hex data cannot be put into a DataFrame
449
475
  kwargs["only_metadata"] = True
450
476
  super().__init__(*args, **kwargs)
@@ -104,11 +104,19 @@ class Parameters(UserDict):
104
104
  )
105
105
  parameter_dict = {}
106
106
  list_of_metadata_shortnames = list(metadata.keys())
107
+ # if column number and metadata number is different, we are propably
108
+ # working with duplicate_columns and will drop the duplicates
107
109
  if self.full_data_array.shape[1] != len(list_of_metadata_shortnames):
108
- raise AssertionError(
109
- f"Array column width {
110
- self.full_data_array.shape[1]
111
- } does not fit metadata length {len(metadata)}"
110
+ self.full_data_array = np.delete(
111
+ self.full_data_array, self.duplicate_columns, 1
112
+ )
113
+ assert self.full_data_array.shape[1] == len(
114
+ list_of_metadata_shortnames
115
+ )
116
+ # rewrite the column number in the metadata header
117
+ self.data_table_stats["nquan"] = str(
118
+ int(self.data_table_stats["nquan"])
119
+ - len(self.duplicate_columns)
112
120
  )
113
121
  for i in range(self.full_data_array.shape[1]):
114
122
  column_data = self.full_data_array[:, i]
@@ -286,14 +294,10 @@ class Parameters(UserDict):
286
294
  assert data.shape[1] == len(columns)
287
295
  df = pd.DataFrame(data=data, columns=columns)
288
296
  for column in df.columns:
289
- if column.lower() not in [
290
- "latitude",
291
- "longitude",
292
- "event",
293
- "cast",
294
- "flag",
295
- ]:
297
+ try:
296
298
  df[column].astype("float64")
299
+ except (TypeError, ValueError):
300
+ df[columns].astype("str")
297
301
  return df
298
302
 
299
303
  def with_name_type(self, name_type: str = "shortname"):
@@ -0,0 +1,196 @@
1
+ from __future__ import annotations
2
+ from collections import UserList
3
+
4
+
5
+ class CnvProcessingSteps(UserList):
6
+ """
7
+ A python representation of the individual processing steps conducted
8
+ in the process of a cnv file creation. These modules are stored in
9
+ a dictionary structure, together with all the variables/metadata/etc.
10
+ given in the header of a cnv file.
11
+
12
+ Parameters
13
+ ----------
14
+
15
+ Returns
16
+ -------
17
+
18
+ """
19
+
20
+ def __init__(self, raw_processing_info: list):
21
+ self.modules = self.extract_individual_modules(raw_processing_info)
22
+ self.data = []
23
+ for module in self.modules:
24
+ self.data.append(
25
+ self.create_step_instance(module, raw_processing_info)
26
+ )
27
+
28
+ def _form_processing_info(self) -> list:
29
+ out_list = []
30
+ for module in self.data:
31
+ if "vars" in module.metadata and module.name != "wildedit":
32
+ module.metadata["date"] = (
33
+ module.metadata["date"]
34
+ + f" [{module.name.lower()}_vars = {module.metadata.pop('vars')}]"
35
+ )
36
+ if module.name == "binavg":
37
+ collection_string = module.metadata["binavg_surface_bin"][
38
+ "surface_bin"
39
+ ]
40
+ for k, v in module.metadata["binavg_surface_bin"].items():
41
+ if k != "surface_bin":
42
+ collection_string += f", {k} = {v}"
43
+ module.metadata["binavg_surface_bin"] = collection_string
44
+ for key, value in module.metadata.items():
45
+ if module.name == "wfilter" and key == "action":
46
+ for action_key, action_value in value.items():
47
+ out_list.append(
48
+ f"wfilter_action {action_key} = {action_value}\n"
49
+ )
50
+ else:
51
+ out_list.append(f"{module.name}_{key} = {value}\n")
52
+ out_list.append("file_type = ascii\n")
53
+ return out_list
54
+
55
+ def get_names(self) -> list[str]:
56
+ return [step.name for step in self.data]
57
+
58
+ def extract_individual_modules(self, raw_info: list[str]) -> list:
59
+ """ """
60
+ module_list = []
61
+ for line in raw_info:
62
+ module = line.split("_")[0]
63
+ if (module not in module_list) and (
64
+ line.split()[0] != "file_type"
65
+ ):
66
+ module_list.append(module)
67
+ return module_list
68
+
69
+ def create_step_instance(
70
+ self,
71
+ module: str,
72
+ raw_info: list[str],
73
+ ) -> ProcessingStep:
74
+ """
75
+
76
+ Parameters
77
+ ----------
78
+ module :
79
+
80
+
81
+ Returns
82
+ -------
83
+
84
+ """
85
+ # TODO: probably need to split this into smaller bits
86
+ out_dict = {}
87
+ inner_action_dict = {}
88
+ # extract lines corresponding to the module
89
+ for line in raw_info:
90
+ if module == line.split("_")[0]:
91
+ # removing the module names from the lines
92
+ shifting_index = len(module) + 1
93
+ line_content = line[shifting_index:]
94
+ # handle the case of the validation methods keyword being
95
+ # 'action', which corresponds to an entire dict of values
96
+ if line_content[:6] == "action":
97
+ inner_action_dict = self._module_dict_feeder(
98
+ line_content[6:], inner_action_dict
99
+ )
100
+ else:
101
+ # handle the cases where after some date value, another value
102
+ # is printed inside of [] brackets
103
+ double_value_list = line_content.split("[")
104
+ if len(double_value_list) > 1:
105
+ out_dict = self._module_dict_feeder(
106
+ double_value_list[1][shifting_index:-2], out_dict
107
+ )
108
+ line_content = double_value_list[0]
109
+ if line_content[:11] == "surface_bin":
110
+ surface_bin_dict = {}
111
+ for line in line_content.split(","):
112
+ self._module_dict_feeder(line, surface_bin_dict)
113
+ out_dict["surface_bin"] = surface_bin_dict
114
+ continue
115
+ # usual behavior, for 99% cases:
116
+ # assigning key and value to the module dict
117
+ out_dict = self._module_dict_feeder(line_content, out_dict)
118
+ if inner_action_dict:
119
+ out_dict["action"] = inner_action_dict
120
+ return ProcessingStep(module, out_dict)
121
+
122
+ def _module_dict_feeder(
123
+ self,
124
+ line: str,
125
+ dictionary: dict,
126
+ split_value: str = "=",
127
+ ):
128
+ """
129
+
130
+ Parameters
131
+ ----------
132
+ line: str :
133
+
134
+ dictionary: dict :
135
+
136
+ split_value: str :
137
+ (Default value = '=')
138
+
139
+ Returns
140
+ -------
141
+
142
+ """
143
+ # adds the values of a specific header line into a dictionary
144
+ try:
145
+ key, value = line.split(split_value)
146
+ except ValueError:
147
+ pass
148
+ else:
149
+ dictionary[key.strip()] = value.strip()
150
+ finally:
151
+ return dictionary
152
+
153
+ def get_step(self, step: str) -> ProcessingStep | None:
154
+ """
155
+
156
+ Parameters
157
+ ----------
158
+ module: str :
159
+
160
+
161
+ Returns
162
+ -------
163
+
164
+ """
165
+ for index, element in enumerate(self.data):
166
+ if str(element) == step:
167
+ return self.data[index]
168
+ return None
169
+
170
+
171
+ class ProcessingStep:
172
+ """
173
+ Class that is meant to represent one individual processing step, that lead
174
+ to the current status of the cnv file. Can be a custom processing step or
175
+ one of the original Sea-Bird ones.
176
+
177
+ Parameters
178
+ ----------
179
+
180
+ Returns
181
+ -------
182
+
183
+ """
184
+
185
+ def __init__(self, name: str, metadata: dict):
186
+ self.name = name
187
+ self.metadata = metadata
188
+
189
+ def __str__(self) -> str:
190
+ return self.name
191
+
192
+ def __repr__(self) -> str:
193
+ return self.__str__()
194
+
195
+ def __eq__(self, other) -> bool:
196
+ return self.metadata == other.metadata
@@ -1,155 +0,0 @@
1
- from collections import UserDict
2
-
3
-
4
- class CnvValidationList(UserDict):
5
- """A python representation of the individual validation steps conducted
6
- in the process of a cnv file creation. These modules are stored in
7
- a dictionary structure, together with all the variables/metadata/etc.
8
- given in the header of a cnv file.
9
-
10
- Parameters
11
- ----------
12
-
13
- Returns
14
- -------
15
-
16
- """
17
-
18
- def __init__(self, cnv_header_val_modules: list):
19
- self.cnv_header_val_modules = cnv_header_val_modules
20
- self.data = {}
21
- self.modules = self.extract_individual_modules()
22
- for module in self.modules:
23
- module_data = self.create_dict_for_module(module)
24
- self.data[module] = module_data
25
-
26
- def extract_individual_modules(self) -> list:
27
- """ """
28
- module_list = []
29
- for line in self.cnv_header_val_modules:
30
- module = line.split("_")[0]
31
- if (module not in module_list) and (
32
- line.split()[0] != "file_type"
33
- ):
34
- module_list.append(module)
35
- return module_list
36
-
37
- def create_dict_for_module(self, module) -> dict:
38
- """
39
-
40
- Parameters
41
- ----------
42
- module :
43
-
44
-
45
- Returns
46
- -------
47
-
48
- """
49
- # TODO: probably need to split this into smaller bits
50
- out_dict = {}
51
- inner_action_dict = {}
52
- action_dict_present = False
53
- # extract lines corresponding to the module
54
- for line in self.cnv_header_val_modules:
55
- if module == line.split("_")[0]:
56
- # removing the module names from the lines
57
- shifting_index = len(module) + 1
58
- line_content = line[shifting_index:]
59
- # handle the case of the validation methods keyword being
60
- # 'action', which corresponds to an entire dict of values
61
- if line_content[:6] == "action":
62
- action_dict_present = True
63
- inner_action_dict = self.module_dict_feeder(
64
- line_content[6:], inner_action_dict
65
- )
66
- else:
67
- # handle the cases where after some date value, another value
68
- # is printed inside of [] brackets
69
- double_value_list = line_content.split("[")
70
- if len(double_value_list) > 1:
71
- out_dict = self.module_dict_feeder(
72
- double_value_list[1][shifting_index:-2], out_dict
73
- )
74
- line_content = double_value_list[0]
75
- if line_content[:11] == "surface_bin":
76
- surface_bin_dict = {}
77
- for line in line_content.split(","):
78
- self.module_dict_feeder(line, surface_bin_dict)
79
- out_dict["surface_bin"] = surface_bin_dict
80
- continue
81
- # usual behavior, for 99% cases:
82
- # assigning key and value to the module dict
83
- out_dict = self.module_dict_feeder(line_content, out_dict)
84
- if action_dict_present:
85
- out_dict["action"] = inner_action_dict
86
- return out_dict
87
-
88
- def module_dict_feeder(
89
- self, line: str, dictionary: dict, split_value: str = "="
90
- ):
91
- """
92
-
93
- Parameters
94
- ----------
95
- line: str :
96
-
97
- dictionary: dict :
98
-
99
- split_value: str :
100
- (Default value = '=')
101
-
102
- Returns
103
- -------
104
-
105
- """
106
- # adds the values of a specific header line into a dictionary
107
- try:
108
- key, value = line.split(split_value)
109
- except ValueError:
110
- pass
111
- else:
112
- dictionary[key.strip()] = value.strip()
113
- finally:
114
- return dictionary
115
-
116
- def get(self, module: str) -> dict:
117
- """
118
-
119
- Parameters
120
- ----------
121
- module: str :
122
-
123
-
124
- Returns
125
- -------
126
-
127
- """
128
- for element in self.data:
129
- if str(element) == module:
130
- return self.data[element]
131
- else:
132
- return {}
133
-
134
-
135
- class ValidationModule:
136
- """Class that is meant to represent the individual validation modules of
137
- the SeaSoft software. This includes all the input parameters and settins,
138
- as well as a description of the output.
139
- The idea is to inherit from this class for each individual module. But I
140
- am not sure if its worth the effort.
141
-
142
- Parameters
143
- ----------
144
-
145
- Returns
146
- -------
147
-
148
- """
149
-
150
- def __init__(self, name):
151
- self.name = name
152
-
153
- def extract_information(self):
154
- """ """
155
- pass