seabirdfilehandler 0.4.3__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of seabirdfilehandler might be problematic. Click here for more details.

@@ -1,23 +0,0 @@
1
- version: 1
2
- formatters:
3
- simple:
4
- format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
5
- handlers:
6
- console:
7
- class: logging.StreamHandler
8
- level: DEBUG
9
- formatter: simple
10
- stream: ext://sys.stdout
11
- file:
12
- class: logging.FileHandler
13
- filename: seabirdfiles.log
14
- level: DEBUG
15
- formatter: simple
16
- loggers:
17
- simpleExample:
18
- level: DEBUG
19
- handlers: [console]
20
- propagate: no
21
- root:
22
- level: DEBUG
23
- handlers: [console]
@@ -1,210 +0,0 @@
1
- from pathlib import Path
2
- from dataclasses import dataclass
3
- from datetime import datetime, timezone
4
- import xmltodict
5
- import logging
6
-
7
- logger = logging.getLogger(__name__)
8
-
9
-
10
- @dataclass
11
- class SeaBirdFile:
12
- """Base class to describe any kind of file generated by the Seasoft
13
- software. Such a file should be given as input to this class and the
14
- information it contains should subsequently be extracted and structured
15
- automatically. Various classes inherit from this one for a more file
16
- specific behaviour
17
-
18
- Parameters
19
- ----------
20
-
21
- Returns
22
- -------
23
-
24
- """
25
-
26
- def __init__(
27
- self,
28
- path_to_file: Path | str,
29
- only_header: bool = False,
30
- ):
31
- self.path_to_file = Path(path_to_file)
32
- self.file_name = self.path_to_file.stem
33
- self.file_dir = self.path_to_file.parent
34
- self.timestamp = datetime.now(timezone.utc)
35
- self.raw_file_data = [] # the text file input
36
- self.header = [] # the full file header
37
- self.sbe9_data = [] # device specific information
38
- self.metadata = {} # non-SeaBird metadata
39
- self.metadata_list = [] # unstructured metadata for easier export
40
- self.data_table_description = [] # the column names and other info
41
- self.data_table_stats = {}
42
- self.data_table_names_and_spans = []
43
- self.data_table_misc = {}
44
- self.sensor_data = []
45
- self.sensors = {} # xml-parsed sensor data
46
- self.processing_info = [] # everything after the sensor data
47
- self.data = [] # the data table
48
- self.file_data = self.raw_file_data # variable file information
49
- with self.path_to_file.open("r", encoding="latin-1") as file:
50
- for line in file:
51
- self.raw_file_data.append(line)
52
- if only_header and line.startswith("*END*"):
53
- break
54
- self.extract_file_information(only_header)
55
- if len(self.sensor_data) > 0:
56
- self.sensors = self.sensor_xml_to_flattened_dict(
57
- "".join(self.sensor_data)
58
- )
59
-
60
- def __str__(self) -> str:
61
- return "/n".join(self.file_data)
62
-
63
- def __repr__(self) -> str:
64
- return str(self.path_to_file.absolute())
65
-
66
- def __eq__(self, other) -> bool:
67
- return self.file_data == other.file_data
68
-
69
- def extract_file_information(self, only_header: bool = False):
70
- """Reads and structures all the different information present in the
71
- file. Lists and Dictionaries are the data structures of choice. Uses
72
- basic prefix checking to distinguish different header information.
73
-
74
- Parameters
75
- ----------
76
-
77
- Returns
78
- -------
79
-
80
- """
81
- self.metadata_list = []
82
- past_sensors = False
83
- for line in self.raw_file_data:
84
- line_prefix = line[:2]
85
- if line_prefix == "* ":
86
- self.header.append(line)
87
- self.sbe9_data.append(line[2:])
88
- elif line_prefix == "**":
89
- self.header.append(line)
90
- self.metadata_list.append(line[3:])
91
- elif line_prefix == "# ":
92
- self.header.append(line)
93
- if line[2:].strip()[0] == "<":
94
- self.sensor_data.append(line[2:])
95
- past_sensors = True
96
- else:
97
- if past_sensors:
98
- self.processing_info.append(line[2:])
99
- else:
100
- self.data_table_description.append(line[2:])
101
- elif line_prefix == "*E":
102
- self.header.append(line)
103
- if only_header:
104
- break
105
- else:
106
- self.data.append(line)
107
-
108
- self.metadata = self.structure_metadata(self.metadata_list)
109
- self.differentiate_table_description()
110
-
111
- def differentiate_table_description(self):
112
- past_spans = False
113
- pre = []
114
- column_names = []
115
- column_value_spans = []
116
- post = []
117
- for line in self.data_table_description:
118
- if line.startswith("name"):
119
- # TODO: cuts off lines containing multiple '=' symbols
120
- column_names.append(line.split("=")[1].strip())
121
- elif line.startswith("span"):
122
- past_spans = True
123
- column_value_spans.append(line.split("=")[1].strip())
124
- else:
125
- if not past_spans:
126
- pre.append(line)
127
- else:
128
- post.append(line)
129
- assert len(column_names) == len(column_value_spans)
130
- self.data_table_stats = {
131
- line.split("=")[0].strip(): line.split("=")[1].strip()
132
- for line in pre
133
- }
134
- self.data_table_names_and_spans = [
135
- (name, span)
136
- for name, span in zip(column_names, column_value_spans)
137
- ]
138
- self.data_table_misc = {
139
- line.split("=")[0].strip(): line.split("=")[1].strip()
140
- for line in post
141
- }
142
-
143
- def sensor_xml_to_flattened_dict(
144
- self, sensor_data: str
145
- ) -> list[dict] | dict:
146
- """Reads the pure xml sensor input and creates a multilevel dictionary,
147
- dropping the first two dictionaries, as they are single entry only
148
-
149
- Parameters
150
- ----------
151
-
152
- Returns
153
- -------
154
-
155
- """
156
- full_sensor_dict = xmltodict.parse(sensor_data, process_comments=True)
157
- try:
158
- sensors = full_sensor_dict["Sensors"]["sensor"]
159
- except KeyError as error:
160
- logger.error(f"XML is not formatted as expected: {error}")
161
- return full_sensor_dict
162
- else:
163
- # create a tidied version of the xml-parsed sensor dict
164
- tidied_sensor_list = []
165
- for entry in sensors:
166
- # use comment value as type descriptor
167
- comment = entry["#comment"]
168
- split_comment = comment.split(",")
169
- new_entry = split_comment[1].strip()
170
- if split_comment[-1] == " 2":
171
- new_entry += " 2"
172
- # remove second-level dict
173
- calibration_info = list(entry.values())[-1]
174
- try:
175
- new_dict = {
176
- "Channel": entry["@Channel"],
177
- "SensorName": new_entry,
178
- **calibration_info,
179
- }
180
- except TypeError:
181
- new_dict = {
182
- "Channel": entry["@Channel"],
183
- "SensorName": new_entry,
184
- "Info": calibration_info,
185
- }
186
- tidied_sensor_list.append(new_dict)
187
- return tidied_sensor_list
188
-
189
- def structure_metadata(self, metadata_list: list) -> dict:
190
- """Creates a dictionary to store the metadata that is added by using
191
- werums dship API.
192
-
193
- Parameters
194
- ----------
195
- metadata_list: list :
196
- a list of the individual lines of metadata found in the file
197
-
198
- Returns
199
- -------
200
- a dictionary of the lines of metadata divided into key-value pairs
201
- """
202
- out_dict = {}
203
- for line in metadata_list:
204
- try:
205
- (key, val) = line.split("=")
206
- except ValueError:
207
- out_dict["text"] = line
208
- else:
209
- out_dict[key.strip()] = val.strip()
210
- return out_dict
@@ -1,14 +0,0 @@
1
- seabirdfilehandler/__init__.py,sha256=8Vk2TURWXv2_NG_U_fR0fIVDFymkFkBipvmlS5ucB3M,147
2
- seabirdfilehandler/dataframe_meta_accessor.py,sha256=x4mSEN49us6Ezzjdt41fl5Ry8IJR09ORrZ1roOIJbyc,6439
3
- seabirdfilehandler/datatablefiles.py,sha256=yzTAzsWcdrHYaa2QaR6OpdEs-cu2py8V5o79s2Uz7MM,31646
4
- seabirdfilehandler/file_collection.py,sha256=nWyi5FToCV9-e_zcaLhRb4oOt8KAmyHC-SBGLJO9KQ4,6909
5
- seabirdfilehandler/logging.yaml,sha256=mXxbhJPio3OGaukTpc3rLGA8Ywq1DNqp0Vn5YCbH6jY,459
6
- seabirdfilehandler/parameter.py,sha256=UyKb_HGQ57pETdhSfR5FbJ60aOj8_d3_Tgw_akth0TY,13283
7
- seabirdfilehandler/seabirdfiles.py,sha256=BKLyk5gUMkt1CG4ljDXlCqcr5zej0-9PjPS0sX2E4n8,7449
8
- seabirdfilehandler/utils.py,sha256=5KXdB8Hdv65dv5tPyXxNMct1mCEOyA3S8XP54AFAnx0,1745
9
- seabirdfilehandler/validation_modules.py,sha256=eZ6x0giftUtlxnRMOnK_vCkgccdwUXPrDjajFa-E6n0,4698
10
- seabirdfilehandler/xmlfiles.py,sha256=L_puQf8eg0ojv85AyEMID4jnwkOlV_fgZP3W5yeSUBY,4668
11
- seabirdfilehandler-0.4.3.dist-info/LICENSE,sha256=Ifd1VPmYv32oJd2QVh3wIQP9X05vYJlcY6kONz360ws,34603
12
- seabirdfilehandler-0.4.3.dist-info/METADATA,sha256=ODlAzixojiAr_KtwyXVmYlWDn0UJTzFlp_RBAWzLxug,1289
13
- seabirdfilehandler-0.4.3.dist-info/WHEEL,sha256=XbeZDeTWKc1w7CSIyre5aMDU_-PohRwTQceYnisIYYY,88
14
- seabirdfilehandler-0.4.3.dist-info/RECORD,,