seabirdfilehandler 0.4.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of seabirdfilehandler might be problematic. Click here for more details.

@@ -0,0 +1,200 @@
1
+ from pathlib import Path
2
+ from dataclasses import dataclass
3
+ from datetime import datetime, timezone
4
+ import xmltodict
5
+ import logging
6
+
7
+ logger = logging.getLogger(__name__)
8
+
9
+
10
+ @dataclass
11
+ class SeaBirdFile:
12
+ """Base class to describe any kind of file generated by the Seasoft
13
+ software. Such a file should be given as input to this class and the
14
+ information it contains should subsequently be extracted and structured
15
+ automatically. Various classes inherit from this one for a more file
16
+ specific behaviour
17
+
18
+ Parameters
19
+ ----------
20
+
21
+ Returns
22
+ -------
23
+
24
+ """
25
+
26
+ def __init__(
27
+ self,
28
+ path_to_file: Path | str,
29
+ only_header: bool = False,
30
+ ):
31
+ self.path_to_file = Path(path_to_file)
32
+ self.file_name = self.path_to_file.stem
33
+ self.file_dir = self.path_to_file.parents[0]
34
+ self.timestamp = datetime.now(timezone.utc)
35
+ self.raw_file_data = [] # the text file input
36
+ self.header = [] # the full file header
37
+ self.sbe9_data = [] # device specific information
38
+ self.metadata = {} # non-SeaBird metadata
39
+ self.metadata_list = [] # unstructured metadata for easier export
40
+ self.data_table_description = [] # the column names and other info
41
+ self.data_table_stats = {}
42
+ self.data_table_names_and_spans = []
43
+ self.data_table_misc = {}
44
+ self.sensor_data = []
45
+ self.sensors = {} # xml-parsed sensor data
46
+ self.processing_info = [] # everything after the sensor data
47
+ self.data = [] # the data table
48
+ self.file_data = self.raw_file_data # variable file information
49
+ with self.path_to_file.open("r", encoding="latin-1") as file:
50
+ for line in file:
51
+ self.raw_file_data.append(line)
52
+ if only_header and line.startswith("*END*"):
53
+ break
54
+ self.extract_file_information(only_header)
55
+ if len(self.sensor_data) > 0:
56
+ self.sensors = self.sensor_xml_to_flattened_dict("".join(self.sensor_data))
57
+
58
+ def __str__(self) -> str:
59
+ return "/n".join(self.file_data)
60
+
61
+ def __repr__(self) -> str:
62
+ return str(self.path_to_file.absolute())
63
+
64
+ def __eq__(self, other) -> bool:
65
+ return self.file_data == other.file_data
66
+
67
+ def extract_file_information(self, only_header: bool = False):
68
+ """Reads and structures all the different information present in the
69
+ file. Lists and Dictionaries are the data structures of choice. Uses
70
+ basic prefix checking to distinguish different header information.
71
+
72
+ Parameters
73
+ ----------
74
+
75
+ Returns
76
+ -------
77
+
78
+ """
79
+ self.metadata_list = []
80
+ past_sensors = False
81
+ for line in self.raw_file_data:
82
+ line_prefix = line[:2]
83
+ if line_prefix == "* ":
84
+ self.header.append(line)
85
+ self.sbe9_data.append(line[2:])
86
+ elif line_prefix == "**":
87
+ self.header.append(line)
88
+ self.metadata_list.append(line[3:])
89
+ elif line_prefix == "# ":
90
+ self.header.append(line)
91
+ if line[2:].strip()[0] == "<":
92
+ self.sensor_data.append(line[2:])
93
+ past_sensors = True
94
+ else:
95
+ if past_sensors:
96
+ self.processing_info.append(line[2:])
97
+ else:
98
+ self.data_table_description.append(line[2:])
99
+ elif line_prefix == "*E":
100
+ self.header.append(line)
101
+ if only_header:
102
+ break
103
+ else:
104
+ self.data.append(line)
105
+
106
+ self.metadata = self.structure_metadata(self.metadata_list)
107
+ self.differentiate_table_description()
108
+
109
+ def differentiate_table_description(self):
110
+ past_spans = False
111
+ pre = []
112
+ column_names = []
113
+ column_value_spans = []
114
+ post = []
115
+ for line in self.data_table_description:
116
+ if line.startswith("name"):
117
+ column_names.append(line.split("=")[1].strip())
118
+ elif line.startswith("span"):
119
+ past_spans = True
120
+ column_value_spans.append(line.split("=")[1].strip())
121
+ else:
122
+ if not past_spans:
123
+ pre.append(line)
124
+ else:
125
+ post.append(line)
126
+ assert len(column_names) == len(column_value_spans)
127
+ self.data_table_stats = {
128
+ line.split("=")[0].strip(): line.split("=")[1].strip() for line in pre
129
+ }
130
+ self.data_table_names_and_spans = [
131
+ (name, span) for name, span in zip(column_names, column_value_spans)
132
+ ]
133
+ self.data_table_misc = {line.split("=")[0].strip(): line.split("=")[1].strip() for line in post}
134
+
135
+ def sensor_xml_to_flattened_dict(self, sensor_data: str) -> list[dict] | dict:
136
+ """Reads the pure xml sensor input and creates a multilevel dictionary,
137
+ dropping the first two dictionaries, as they are single entry only
138
+
139
+ Parameters
140
+ ----------
141
+
142
+ Returns
143
+ -------
144
+
145
+ """
146
+ full_sensor_dict = xmltodict.parse(sensor_data, process_comments=True)
147
+ try:
148
+ sensors = full_sensor_dict["Sensors"]["sensor"]
149
+ except KeyError as error:
150
+ logger.error(f"XML is not formatted as expected: {error}")
151
+ return full_sensor_dict
152
+ else:
153
+ # create a tidied version of the xml-parsed sensor dict
154
+ tidied_sensor_list = []
155
+ for entry in sensors:
156
+ # use comment value as type descriptor
157
+ comment = entry["#comment"]
158
+ split_comment = comment.split(",")
159
+ new_entry = split_comment[1].strip()
160
+ if split_comment[-1] == " 2":
161
+ new_entry += " 2"
162
+ # remove second-level dict
163
+ calibration_info = list(entry.values())[-1]
164
+ try:
165
+ new_dict = {
166
+ "Channel": entry["@Channel"],
167
+ "SensorName": new_entry,
168
+ **calibration_info,
169
+ }
170
+ except TypeError:
171
+ new_dict = {
172
+ "Channel": entry["@Channel"],
173
+ "SensorName": new_entry,
174
+ "Info": calibration_info,
175
+ }
176
+ tidied_sensor_list.append(new_dict)
177
+ return tidied_sensor_list
178
+
179
+ def structure_metadata(self, metadata_list: list) -> dict:
180
+ """Creates a dictionary to store the metadata that is added by using
181
+ werums dship API.
182
+
183
+ Parameters
184
+ ----------
185
+ metadata_list: list :
186
+ a list of the individual lines of metadata found in the file
187
+
188
+ Returns
189
+ -------
190
+ a dictionary of the lines of metadata divided into key-value pairs
191
+ """
192
+ out_dict = {}
193
+ for line in metadata_list:
194
+ try:
195
+ (key, val) = line.split("=")
196
+ except ValueError:
197
+ out_dict["text"] = line
198
+ else:
199
+ out_dict[key.strip()] = val.strip()
200
+ return out_dict
@@ -0,0 +1,152 @@
1
+ from collections import UserDict
2
+
3
+
4
+ class CnvValidationList(UserDict):
5
+ """A python representation of the individual validation steps conducted
6
+ in the process of a cnv file creation. These modules are stored in
7
+ a dictionary structure, together with all the variables/metadata/etc.
8
+ given in the header of a cnv file.
9
+
10
+ Parameters
11
+ ----------
12
+
13
+ Returns
14
+ -------
15
+
16
+ """
17
+
18
+ def __init__(self, cnv_header_val_modules: list):
19
+ self.cnv_header_val_modules = cnv_header_val_modules
20
+ self.data = {}
21
+ self.modules = self.extract_individual_modules()
22
+ for module in self.modules:
23
+ module_data = self.create_dict_for_module(module)
24
+ self.data[module] = module_data
25
+
26
+ def extract_individual_modules(self) -> list:
27
+ """ """
28
+ module_list = []
29
+ for line in self.cnv_header_val_modules:
30
+ module = line.split('_')[0]
31
+ if ((module not in module_list) and (line.split()[0] != 'file_type')):
32
+ module_list.append(module)
33
+ return module_list
34
+
35
+ def create_dict_for_module(self, module) -> dict:
36
+ """
37
+
38
+ Parameters
39
+ ----------
40
+ module :
41
+
42
+
43
+ Returns
44
+ -------
45
+
46
+ """
47
+ # TODO: probably need to split this into smaller bits
48
+ out_dict = {}
49
+ inner_action_dict = {}
50
+ action_dict_present = False
51
+ # extract lines corresponding to the module
52
+ for line in self.cnv_header_val_modules:
53
+ if module == line.split('_')[0]:
54
+ # removing the module names from the lines
55
+ shifting_index = len(module) + 1
56
+ line_content = line[shifting_index:]
57
+ # handle the case of the validation methods keyword being
58
+ # 'action', which corresponds to an entire dict of values
59
+ if line_content[:6] == 'action':
60
+ action_dict_present = True
61
+ inner_action_dict = self.module_dict_feeder(
62
+ line_content[6:], inner_action_dict)
63
+ else:
64
+ # handle the cases where after some date value, another value
65
+ # is printed inside of [] brackets
66
+ double_value_list = line_content.split('[')
67
+ if len(double_value_list) > 1:
68
+ out_dict = self.module_dict_feeder(
69
+ double_value_list[1][shifting_index:-2], out_dict)
70
+ line_content = double_value_list[0]
71
+ if line_content[:11] == 'surface_bin':
72
+ surface_bin_dict = {}
73
+ for line in line_content.split(','):
74
+ self.module_dict_feeder(line, surface_bin_dict)
75
+ out_dict['surface_bin'] = surface_bin_dict
76
+ continue
77
+ # usual behavior, for 99% cases:
78
+ # assigning key and value to the module dict
79
+ out_dict = self.module_dict_feeder(line_content, out_dict)
80
+ if action_dict_present:
81
+ out_dict['action'] = inner_action_dict
82
+ return out_dict
83
+
84
+ def module_dict_feeder(self,
85
+ line: str,
86
+ dictionary: dict,
87
+ split_value: str = '='):
88
+ """
89
+
90
+ Parameters
91
+ ----------
92
+ line: str :
93
+
94
+ dictionary: dict :
95
+
96
+ split_value: str :
97
+ (Default value = '=')
98
+
99
+ Returns
100
+ -------
101
+
102
+ """
103
+ # adds the values of a specific header line into a dictionary
104
+ try:
105
+ key, value = line.split(split_value)
106
+ except ValueError:
107
+ pass
108
+ else:
109
+ dictionary[key.strip()] = value.strip()
110
+ finally:
111
+ return dictionary
112
+
113
+ def get(self, module: str) -> dict:
114
+ """
115
+
116
+ Parameters
117
+ ----------
118
+ module: str :
119
+
120
+
121
+ Returns
122
+ -------
123
+
124
+ """
125
+ for element in self.data:
126
+ if str(element) == module:
127
+ return self.data[element]
128
+ else:
129
+ return {}
130
+
131
+
132
+ class ValidationModule:
133
+ """Class that is meant to represent the individual validation modules of
134
+ the SeaSoft software. This includes all the input parameters and settins,
135
+ as well as a description of the output.
136
+ The idea is to inherit from this class for each individual module. But I
137
+ am not sure if its worth the effort.
138
+
139
+ Parameters
140
+ ----------
141
+
142
+ Returns
143
+ -------
144
+
145
+ """
146
+
147
+ def __init__(self, name):
148
+ self.name = name
149
+
150
+ def extract_information(self):
151
+ """ """
152
+ pass
@@ -0,0 +1,87 @@
1
+ from pathlib import Path
2
+ from collections import UserDict
3
+ import xml.etree.ElementTree as ET
4
+ import json
5
+ import xmltodict
6
+
7
+
8
+ class XMLFile(UserDict):
9
+ """
10
+ Parent class for XML and psa representation that loads XML as a
11
+ python-internal tree and as a dict.
12
+
13
+ Parameters
14
+ ----------
15
+ path_to_file : Path | str :
16
+ the path to the xml file
17
+
18
+ Returns
19
+ -------
20
+
21
+ """
22
+
23
+ def __init__(self, path_to_file: Path | str):
24
+ self.path_to_file = Path(path_to_file)
25
+ self.file_name = self.path_to_file.name
26
+ self.file_dir = self.path_to_file.parents[0]
27
+ self.input = ""
28
+ with open(self.path_to_file, "r") as file:
29
+ for line in file:
30
+ self.input += line
31
+ self.xml_tree = ET.fromstring(self.input)
32
+ self.data = xmltodict.parse(self.input)
33
+
34
+ def to_xml(self, file_name=None, file_path=None):
35
+ """
36
+ Writes the dictionary to xml.
37
+
38
+ Parameters
39
+ ----------
40
+ file_name : str :
41
+ the original files name (Default value = self.file_name)
42
+ file_path : pathlib.Path :
43
+ the directory of the file (Default value = self.file_dir)
44
+
45
+ Returns
46
+ -------
47
+
48
+ """
49
+ file_path = self.file_dir if file_path is None else file_path
50
+ file_name = self.file_name if file_name is None else file_name
51
+ with open(Path(file_path).joinpath(file_name), "w") as file:
52
+ file.write(xmltodict.unparse(self.data, pretty=True))
53
+
54
+ def to_json(self, file_name=None, file_path=None):
55
+ """
56
+ Writes the dictionary representation of the XML input to a json
57
+ file.
58
+
59
+ Parameters
60
+ ----------
61
+ file_name : str :
62
+ the original files name (Default value = self.file_name)
63
+ file_path : pathlib.Path :
64
+ the directory of the file (Default value = self.file_dir)
65
+
66
+ Returns
67
+ -------
68
+
69
+ """
70
+ file_path = self.file_dir if file_path is None else file_path
71
+ file_name = self.file_name if file_name is None else file_name
72
+ with open(Path(file_path).joinpath(file_name + ".json"), "w") as file:
73
+ json.dump(self.data, file, indent=4)
74
+
75
+
76
+ class XMLCONFile(XMLFile):
77
+ """ """
78
+
79
+ def __init__(self, path_to_file):
80
+ super().__init__(path_to_file)
81
+
82
+
83
+ class PsaFile(XMLFile):
84
+ """ """
85
+
86
+ def __init__(self, path_to_file):
87
+ super().__init__(path_to_file)