seabirdfilehandler 0.6.1__py3-none-any.whl → 0.7.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of seabirdfilehandler might be problematic. Click here for more details.

@@ -1,10 +1,10 @@
1
1
  from .datafiles import *
2
2
  from .bottlefile import *
3
3
  from .bottlelogfile import *
4
+ from .parameter import *
5
+ from .processing_steps import *
4
6
  from .cnvfile import *
5
7
  from .xmlfiles import *
6
8
  from .hexfile import *
7
- from .validation_modules import *
8
9
  from .geomar_ctd_file_parser import *
9
- from .parameter import *
10
10
  from .file_collection import *
@@ -3,9 +3,7 @@ from datetime import datetime, timedelta
3
3
  import pandas as pd
4
4
  import numpy as np
5
5
  import logging
6
- from seabirdfilehandler import DataFile
7
- from seabirdfilehandler.parameter import Parameters
8
- from seabirdfilehandler.validation_modules import CnvValidationList
6
+ from seabirdfilehandler import DataFile, Parameters, CnvProcessingSteps
9
7
 
10
8
  logger = logging.getLogger(__name__)
11
9
 
@@ -58,7 +56,7 @@ class CnvFile(DataFile):
58
56
  coordinate_columns: bool = False,
59
57
  ):
60
58
  super().__init__(path_to_file, only_header)
61
- self.validation_modules = self.obtaining_validation_modules()
59
+ self.processing_steps = self.get_processing_step_infos()
62
60
  self.parameters = Parameters(
63
61
  self.data, self.data_table_description, only_header
64
62
  )
@@ -128,13 +126,12 @@ class CnvFile(DataFile):
128
126
  return True
129
127
  return False
130
128
 
131
- def obtaining_validation_modules(self) -> CnvValidationList:
129
+ def get_processing_step_infos(self) -> CnvProcessingSteps:
132
130
  """
133
131
  Collects the individual validation modules and their respective
134
132
  information, usually present in key-value pairs.
135
133
  """
136
- validation_modules = self.processing_info
137
- return CnvValidationList(validation_modules)
134
+ return CnvProcessingSteps(self.processing_info)
138
135
 
139
136
  def df2cnv(self, df: pd.DataFrame | None = None) -> list:
140
137
  """
@@ -206,6 +203,7 @@ class CnvFile(DataFile):
206
203
  def _update_header(self):
207
204
  """Re-creates the cnv header."""
208
205
  self.data_table_description = self.parameters._form_data_table_info()
206
+ self.processing_info = self.processing_steps._form_processing_info()
209
207
  self.header = [
210
208
  *[f"* {data}" for data in self.sbe9_data[:-1]],
211
209
  *[f"** {data}" for data in self.metadata_list],
@@ -215,6 +213,8 @@ class CnvFile(DataFile):
215
213
  *[f"# {data}" for data in self.processing_info],
216
214
  "*END*\n",
217
215
  ]
216
+ self.data = self.array2cnv()
217
+ self.file_data = [*self.header, *self.data]
218
218
 
219
219
  def add_processing_metadata(self, addition: str | list):
220
220
  """
@@ -226,13 +226,14 @@ class CnvFile(DataFile):
226
226
  the new information line
227
227
 
228
228
  """
229
- # TODO: use CnvprocessingList here
230
229
  if isinstance(addition, str):
231
230
  addition = [addition]
232
- for line in addition:
233
- self.file_data.append(line)
234
- # add the new info line *before* the 'file_type = ascii' line
235
- self.processing_info.insert(-1, line)
231
+ self.processing_steps.append(
232
+ self.processing_steps.create_step_instance(
233
+ module=addition[0].split("_")[0], raw_info=addition
234
+ )
235
+ )
236
+ self._update_header()
236
237
 
237
238
  def add_station_and_event_column(self) -> bool:
238
239
  """
@@ -77,27 +77,27 @@ class DataFile:
77
77
  file. Lists and Dictionaries are the data structures of choice. Uses
78
78
  basic prefix checking to distinguish different header information.
79
79
  """
80
- past_sensors = False
80
+ past_bad_flag = False
81
81
  with self.path_to_file.open("r", encoding="latin-1") as file:
82
82
  for line in file:
83
83
  self.raw_file_data.append(line)
84
- line_prefix = line[:2]
85
- if line_prefix == "* ":
84
+ if line.startswith("* "):
86
85
  self.header.append(line)
87
86
  self.sbe9_data.append(line[2:])
88
- elif line_prefix == "**":
87
+ elif line.startswith("**"):
89
88
  self.header.append(line)
90
89
  self.metadata_list.append(line[3:])
91
- elif line_prefix == "# ":
90
+ elif line.startswith("#"):
92
91
  self.header.append(line)
93
- if line[2:].strip()[0] == "<":
92
+ if line[2:].strip().startswith("<"):
94
93
  self.sensor_data.append(line[2:])
95
- past_sensors = True
96
94
  else:
97
- if past_sensors:
95
+ if past_bad_flag:
98
96
  self.processing_info.append(line[2:])
99
97
  else:
100
98
  self.data_table_description.append(line[2:])
99
+ if line.startswith("# bad_flag"):
100
+ past_bad_flag = True
101
101
  else:
102
102
  if line.startswith("*END*"):
103
103
  self.header.append(line)
@@ -3,6 +3,7 @@ from pathlib import Path
3
3
  import logging
4
4
  from collections import UserList
5
5
  from typing import Callable, Type
6
+ import warnings
6
7
  import pandas as pd
7
8
  import numpy as np
8
9
  from seabirdfilehandler import (
@@ -353,12 +354,15 @@ class CnvCollection(FileCollection):
353
354
  *args,
354
355
  **kwargs,
355
356
  ):
357
+ if len(args) < 3 and "file_suffix" not in kwargs:
358
+ kwargs["file_suffix"] = "cnv"
356
359
  super().__init__(*args, **kwargs)
357
360
  self.data_meta_info = self.get_data_table_meta_info()
358
361
  self.sensor_data = get_unique_sensor_data(
359
362
  [file.sensors for file in self.data]
360
363
  )
361
364
  self.array = self.get_array()
365
+ self.processing_steps = self.get_processing_steps()
362
366
 
363
367
  def get_dataframes(
364
368
  self,
@@ -430,6 +434,26 @@ class CnvCollection(FileCollection):
430
434
  [file.parameters.create_full_ndarray() for file in self.data]
431
435
  )
432
436
 
437
+ def get_processing_steps(self) -> list:
438
+ """
439
+ Checks the processing steps in the different files for consistency.
440
+ Returns the steps of the first file, which should be the same as for
441
+ all other files.
442
+
443
+ Returns
444
+ -------
445
+ A list of ProcessingSteps.
446
+ """
447
+ individual_processing_steps = [
448
+ file.processing_steps for file in self.data
449
+ ]
450
+ for index, step_info in enumerate(individual_processing_steps):
451
+ if step_info != individual_processing_steps[0]:
452
+ message = f"The processing steps conducted on these files differ. First occurence between index 0 and {index}."
453
+ warnings.warn(message)
454
+ logger.warning(message)
455
+ return individual_processing_steps[0]
456
+
433
457
 
434
458
  class HexCollection(FileCollection):
435
459
  """
@@ -445,6 +469,8 @@ class HexCollection(FileCollection):
445
469
  path_to_xmlcons: Path | str = "",
446
470
  **kwargs,
447
471
  ):
472
+ if len(args) < 3 and "file_suffix" not in kwargs:
473
+ kwargs["file_suffix"] = "hex"
448
474
  # force only_metadata, as the hex data cannot be put into a DataFrame
449
475
  kwargs["only_metadata"] = True
450
476
  super().__init__(*args, **kwargs)
@@ -296,7 +296,7 @@ class Parameters(UserDict):
296
296
  for column in df.columns:
297
297
  try:
298
298
  df[column].astype("float64")
299
- except ValueError:
299
+ except (TypeError, ValueError):
300
300
  df[columns].astype("str")
301
301
  return df
302
302
 
@@ -0,0 +1,196 @@
1
+ from __future__ import annotations
2
+ from collections import UserList
3
+
4
+
5
+ class CnvProcessingSteps(UserList):
6
+ """
7
+ A python representation of the individual processing steps conducted
8
+ in the process of a cnv file creation. These modules are stored in
9
+ a dictionary structure, together with all the variables/metadata/etc.
10
+ given in the header of a cnv file.
11
+
12
+ Parameters
13
+ ----------
14
+
15
+ Returns
16
+ -------
17
+
18
+ """
19
+
20
+ def __init__(self, raw_processing_info: list):
21
+ self.modules = self.extract_individual_modules(raw_processing_info)
22
+ self.data = []
23
+ for module in self.modules:
24
+ self.data.append(
25
+ self.create_step_instance(module, raw_processing_info)
26
+ )
27
+
28
+ def _form_processing_info(self) -> list:
29
+ out_list = []
30
+ for module in self.data:
31
+ if "vars" in module.metadata and module.name != "wildedit":
32
+ module.metadata["date"] = (
33
+ module.metadata["date"]
34
+ + f" [{module.name.lower()}_vars = {module.metadata.pop('vars')}]"
35
+ )
36
+ if module.name == "binavg":
37
+ collection_string = module.metadata["binavg_surface_bin"][
38
+ "surface_bin"
39
+ ]
40
+ for k, v in module.metadata["binavg_surface_bin"].items():
41
+ if k != "surface_bin":
42
+ collection_string += f", {k} = {v}"
43
+ module.metadata["binavg_surface_bin"] = collection_string
44
+ for key, value in module.metadata.items():
45
+ if module.name == "wfilter" and key == "action":
46
+ for action_key, action_value in value.items():
47
+ out_list.append(
48
+ f"wfilter_action {action_key} = {action_value}\n"
49
+ )
50
+ else:
51
+ out_list.append(f"{module.name}_{key} = {value}\n")
52
+ out_list.append("file_type = ascii\n")
53
+ return out_list
54
+
55
+ def get_names(self) -> list[str]:
56
+ return [step.name for step in self.data]
57
+
58
+ def extract_individual_modules(self, raw_info: list[str]) -> list:
59
+ """ """
60
+ module_list = []
61
+ for line in raw_info:
62
+ module = line.split("_")[0]
63
+ if (module not in module_list) and (
64
+ line.split()[0] != "file_type"
65
+ ):
66
+ module_list.append(module)
67
+ return module_list
68
+
69
+ def create_step_instance(
70
+ self,
71
+ module: str,
72
+ raw_info: list[str],
73
+ ) -> ProcessingStep:
74
+ """
75
+
76
+ Parameters
77
+ ----------
78
+ module :
79
+
80
+
81
+ Returns
82
+ -------
83
+
84
+ """
85
+ # TODO: probably need to split this into smaller bits
86
+ out_dict = {}
87
+ inner_action_dict = {}
88
+ # extract lines corresponding to the module
89
+ for line in raw_info:
90
+ if module == line.split("_")[0]:
91
+ # removing the module names from the lines
92
+ shifting_index = len(module) + 1
93
+ line_content = line[shifting_index:]
94
+ # handle the case of the validation methods keyword being
95
+ # 'action', which corresponds to an entire dict of values
96
+ if line_content[:6] == "action":
97
+ inner_action_dict = self._module_dict_feeder(
98
+ line_content[6:], inner_action_dict
99
+ )
100
+ else:
101
+ # handle the cases where after some date value, another value
102
+ # is printed inside of [] brackets
103
+ double_value_list = line_content.split("[")
104
+ if len(double_value_list) > 1:
105
+ out_dict = self._module_dict_feeder(
106
+ double_value_list[1][shifting_index:-2], out_dict
107
+ )
108
+ line_content = double_value_list[0]
109
+ if line_content[:11] == "surface_bin":
110
+ surface_bin_dict = {}
111
+ for line in line_content.split(","):
112
+ self._module_dict_feeder(line, surface_bin_dict)
113
+ out_dict["surface_bin"] = surface_bin_dict
114
+ continue
115
+ # usual behavior, for 99% cases:
116
+ # assigning key and value to the module dict
117
+ out_dict = self._module_dict_feeder(line_content, out_dict)
118
+ if inner_action_dict:
119
+ out_dict["action"] = inner_action_dict
120
+ return ProcessingStep(module, out_dict)
121
+
122
+ def _module_dict_feeder(
123
+ self,
124
+ line: str,
125
+ dictionary: dict,
126
+ split_value: str = "=",
127
+ ):
128
+ """
129
+
130
+ Parameters
131
+ ----------
132
+ line: str :
133
+
134
+ dictionary: dict :
135
+
136
+ split_value: str :
137
+ (Default value = '=')
138
+
139
+ Returns
140
+ -------
141
+
142
+ """
143
+ # adds the values of a specific header line into a dictionary
144
+ try:
145
+ key, value = line.split(split_value)
146
+ except ValueError:
147
+ pass
148
+ else:
149
+ dictionary[key.strip()] = value.strip()
150
+ finally:
151
+ return dictionary
152
+
153
+ def get_step(self, step: str) -> ProcessingStep | None:
154
+ """
155
+
156
+ Parameters
157
+ ----------
158
+ module: str :
159
+
160
+
161
+ Returns
162
+ -------
163
+
164
+ """
165
+ for index, element in enumerate(self.data):
166
+ if str(element) == step:
167
+ return self.data[index]
168
+ return None
169
+
170
+
171
+ class ProcessingStep:
172
+ """
173
+ Class that is meant to represent one individual processing step, that lead
174
+ to the current status of the cnv file. Can be a custom processing step or
175
+ one of the original Sea-Bird ones.
176
+
177
+ Parameters
178
+ ----------
179
+
180
+ Returns
181
+ -------
182
+
183
+ """
184
+
185
+ def __init__(self, name: str, metadata: dict):
186
+ self.name = name
187
+ self.metadata = metadata
188
+
189
+ def __str__(self) -> str:
190
+ return self.name
191
+
192
+ def __repr__(self) -> str:
193
+ return self.__str__()
194
+
195
+ def __eq__(self, other) -> bool:
196
+ return self.metadata == other.metadata
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: seabirdfilehandler
3
- Version: 0.6.1
3
+ Version: 0.7.0
4
4
  Summary: Library of parsers to interact with SeaBird CTD files.
5
5
  Keywords: CTD,parser,seabird,data
6
6
  Author: Emil Michels
@@ -0,0 +1,16 @@
1
+ seabirdfilehandler/__init__.py,sha256=PSokwgSgsmpFh-2Xv2T2d3yxmmahLnIq58WLZ51l86I,276
2
+ seabirdfilehandler/bottlefile.py,sha256=qCh506J3MWZXM11243aw_oJRocVB0ZIipXQLEgkD5M0,6046
3
+ seabirdfilehandler/bottlelogfile.py,sha256=MtMmEebdAktO3mk6KbmJC7dfx9sRLbV5qqDQt2qtpJE,4310
4
+ seabirdfilehandler/cnvfile.py,sha256=NyoePjbwJ7ZVLq-NXMXQWz4GlI9IXVu2aBcZPSBLFE0,10051
5
+ seabirdfilehandler/datafiles.py,sha256=HQungz24lRw4OiTwWs_curAGhzkI1rPiCmMXXcdQFNE,9423
6
+ seabirdfilehandler/file_collection.py,sha256=rHYrrFcX-bB8CrWny-aVI-WbFRPHX_-dCMBzPiL4nGw,16103
7
+ seabirdfilehandler/geomar_ctd_file_parser.py,sha256=4eCnkE0mvPKC8Dic8sXP4xpfwnk3K2MQcGFBf6loT8k,2655
8
+ seabirdfilehandler/hexfile.py,sha256=TBplwbWHrTuJzv2qlx6xYNtoX43I2YUabDmaGZuBEDQ,2144
9
+ seabirdfilehandler/parameter.py,sha256=re91TLqfYWLU64hY4caSlpVENqF_wFxur25OkkBBTDE,14890
10
+ seabirdfilehandler/processing_steps.py,sha256=TYqzzVjFxz3EbLZcNTlZIFpMguyJcT6S2bLCFG35NDY,6185
11
+ seabirdfilehandler/utils.py,sha256=5KXdB8Hdv65dv5tPyXxNMct1mCEOyA3S8XP54AFAnx0,1745
12
+ seabirdfilehandler/xmlfiles.py,sha256=XqqbVNjyINySoe2ZC_qJglkAqshavZxT2-jorDOSj7Y,5084
13
+ seabirdfilehandler-0.7.0.dist-info/LICENSE,sha256=Ifd1VPmYv32oJd2QVh3wIQP9X05vYJlcY6kONz360ws,34603
14
+ seabirdfilehandler-0.7.0.dist-info/METADATA,sha256=vZzgju7TvAO_6VEkj0dYKjsRC7eOCnyuLf9QWcpyZGM,2307
15
+ seabirdfilehandler-0.7.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
16
+ seabirdfilehandler-0.7.0.dist-info/RECORD,,
@@ -1,155 +0,0 @@
1
- from collections import UserDict
2
-
3
-
4
- class CnvValidationList(UserDict):
5
- """A python representation of the individual validation steps conducted
6
- in the process of a cnv file creation. These modules are stored in
7
- a dictionary structure, together with all the variables/metadata/etc.
8
- given in the header of a cnv file.
9
-
10
- Parameters
11
- ----------
12
-
13
- Returns
14
- -------
15
-
16
- """
17
-
18
- def __init__(self, cnv_header_val_modules: list):
19
- self.cnv_header_val_modules = cnv_header_val_modules
20
- self.data = {}
21
- self.modules = self.extract_individual_modules()
22
- for module in self.modules:
23
- module_data = self.create_dict_for_module(module)
24
- self.data[module] = module_data
25
-
26
- def extract_individual_modules(self) -> list:
27
- """ """
28
- module_list = []
29
- for line in self.cnv_header_val_modules:
30
- module = line.split("_")[0]
31
- if (module not in module_list) and (
32
- line.split()[0] != "file_type"
33
- ):
34
- module_list.append(module)
35
- return module_list
36
-
37
- def create_dict_for_module(self, module) -> dict:
38
- """
39
-
40
- Parameters
41
- ----------
42
- module :
43
-
44
-
45
- Returns
46
- -------
47
-
48
- """
49
- # TODO: probably need to split this into smaller bits
50
- out_dict = {}
51
- inner_action_dict = {}
52
- action_dict_present = False
53
- # extract lines corresponding to the module
54
- for line in self.cnv_header_val_modules:
55
- if module == line.split("_")[0]:
56
- # removing the module names from the lines
57
- shifting_index = len(module) + 1
58
- line_content = line[shifting_index:]
59
- # handle the case of the validation methods keyword being
60
- # 'action', which corresponds to an entire dict of values
61
- if line_content[:6] == "action":
62
- action_dict_present = True
63
- inner_action_dict = self.module_dict_feeder(
64
- line_content[6:], inner_action_dict
65
- )
66
- else:
67
- # handle the cases where after some date value, another value
68
- # is printed inside of [] brackets
69
- double_value_list = line_content.split("[")
70
- if len(double_value_list) > 1:
71
- out_dict = self.module_dict_feeder(
72
- double_value_list[1][shifting_index:-2], out_dict
73
- )
74
- line_content = double_value_list[0]
75
- if line_content[:11] == "surface_bin":
76
- surface_bin_dict = {}
77
- for line in line_content.split(","):
78
- self.module_dict_feeder(line, surface_bin_dict)
79
- out_dict["surface_bin"] = surface_bin_dict
80
- continue
81
- # usual behavior, for 99% cases:
82
- # assigning key and value to the module dict
83
- out_dict = self.module_dict_feeder(line_content, out_dict)
84
- if action_dict_present:
85
- out_dict["action"] = inner_action_dict
86
- return out_dict
87
-
88
- def module_dict_feeder(
89
- self, line: str, dictionary: dict, split_value: str = "="
90
- ):
91
- """
92
-
93
- Parameters
94
- ----------
95
- line: str :
96
-
97
- dictionary: dict :
98
-
99
- split_value: str :
100
- (Default value = '=')
101
-
102
- Returns
103
- -------
104
-
105
- """
106
- # adds the values of a specific header line into a dictionary
107
- try:
108
- key, value = line.split(split_value)
109
- except ValueError:
110
- pass
111
- else:
112
- dictionary[key.strip()] = value.strip()
113
- finally:
114
- return dictionary
115
-
116
- def get(self, module: str) -> dict:
117
- """
118
-
119
- Parameters
120
- ----------
121
- module: str :
122
-
123
-
124
- Returns
125
- -------
126
-
127
- """
128
- for element in self.data:
129
- if str(element) == module:
130
- return self.data[element]
131
- else:
132
- return {}
133
-
134
-
135
- class ValidationModule:
136
- """Class that is meant to represent the individual validation modules of
137
- the SeaSoft software. This includes all the input parameters and settins,
138
- as well as a description of the output.
139
- The idea is to inherit from this class for each individual module. But I
140
- am not sure if its worth the effort.
141
-
142
- Parameters
143
- ----------
144
-
145
- Returns
146
- -------
147
-
148
- """
149
-
150
- def __init__(self, name):
151
- self.name = name
152
-
153
- def extract_information(self):
154
- """ """
155
- pass
@@ -1,16 +0,0 @@
1
- seabirdfilehandler/__init__.py,sha256=XMmhfwZ3YPzR3sLTi8AHN6mSlrQA1tLEpw02_fI7Rr0,278
2
- seabirdfilehandler/bottlefile.py,sha256=qCh506J3MWZXM11243aw_oJRocVB0ZIipXQLEgkD5M0,6046
3
- seabirdfilehandler/bottlelogfile.py,sha256=MtMmEebdAktO3mk6KbmJC7dfx9sRLbV5qqDQt2qtpJE,4310
4
- seabirdfilehandler/cnvfile.py,sha256=IvCHjcgI-mgtJodSzlQ5bfyJWZ-D-3Lk5Z5Hh3BLfps,10048
5
- seabirdfilehandler/datafiles.py,sha256=9r0Mh3zPYJJ3CoybgOBH4Dsq43kLDnca9m8s_V0cYU8,9378
6
- seabirdfilehandler/file_collection.py,sha256=8uLoN55h00_EmiEa4mbyaSJZst7w8NhODv7DrtzEAwc,15021
7
- seabirdfilehandler/geomar_ctd_file_parser.py,sha256=4eCnkE0mvPKC8Dic8sXP4xpfwnk3K2MQcGFBf6loT8k,2655
8
- seabirdfilehandler/hexfile.py,sha256=TBplwbWHrTuJzv2qlx6xYNtoX43I2YUabDmaGZuBEDQ,2144
9
- seabirdfilehandler/parameter.py,sha256=MDVNGC1PZPf6IMN7r6Y3A3cg52AKXoyau1foEDQ1uoI,14877
10
- seabirdfilehandler/utils.py,sha256=5KXdB8Hdv65dv5tPyXxNMct1mCEOyA3S8XP54AFAnx0,1745
11
- seabirdfilehandler/validation_modules.py,sha256=eZ6x0giftUtlxnRMOnK_vCkgccdwUXPrDjajFa-E6n0,4698
12
- seabirdfilehandler/xmlfiles.py,sha256=XqqbVNjyINySoe2ZC_qJglkAqshavZxT2-jorDOSj7Y,5084
13
- seabirdfilehandler-0.6.1.dist-info/LICENSE,sha256=Ifd1VPmYv32oJd2QVh3wIQP9X05vYJlcY6kONz360ws,34603
14
- seabirdfilehandler-0.6.1.dist-info/METADATA,sha256=4BNXjQ4Xgm5OQCyLkPK3C4l0OkFiD_e--dfjcYedS04,2307
15
- seabirdfilehandler-0.6.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
16
- seabirdfilehandler-0.6.1.dist-info/RECORD,,