seabirdfilehandler 0.7.7__tar.gz → 0.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of seabirdfilehandler might be problematic. Click here for more details.
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/PKG-INFO +1 -1
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/pyproject.toml +1 -1
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/file_collection.py +18 -6
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/parameter.py +56 -54
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/LICENSE +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/README.md +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/__init__.py +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/bottlefile.py +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/bottlelogfile.py +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/cnvfile.py +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/datafiles.py +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/geomar_ctd_file_parser.py +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/hexfile.py +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/processing_steps.py +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/utils.py +0 -0
- {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/xmlfiles.py +0 -0
|
@@ -23,7 +23,7 @@ urls.documentation = "https://ctd-software.pages.io-warnemuende.de/seabirdfileha
|
|
|
23
23
|
dynamic = []
|
|
24
24
|
requires-python = ">=3.12"
|
|
25
25
|
dependencies = ["pandas>=2.2.1", "xmltodict>=0.13.0"]
|
|
26
|
-
version = "0.
|
|
26
|
+
version = "0.8.0"
|
|
27
27
|
|
|
28
28
|
[project.optional-dependencies]
|
|
29
29
|
test = ["pytest>=8.3.0", "parameterized>=0.9.0", "pre-commit>=3.6.2"]
|
{seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/file_collection.py
RENAMED
|
@@ -417,13 +417,25 @@ class CnvCollection(FileCollection):
|
|
|
417
417
|
A list of dictionaries that represent the data column information.
|
|
418
418
|
"""
|
|
419
419
|
all_column_descriptions = [
|
|
420
|
-
file.parameters.
|
|
420
|
+
file.parameters.get_metadata() for file in self.data
|
|
421
421
|
]
|
|
422
|
-
for info in all_column_descriptions:
|
|
422
|
+
for index, info in enumerate(all_column_descriptions):
|
|
423
423
|
if all_column_descriptions[0] != info:
|
|
424
|
-
|
|
425
|
-
|
|
426
|
-
)
|
|
424
|
+
for expected, real in zip(
|
|
425
|
+
all_column_descriptions[0].items(), info.items()
|
|
426
|
+
):
|
|
427
|
+
# allow difference in latitude inside depth
|
|
428
|
+
if expected[0] == "depSM":
|
|
429
|
+
if real[0] != "depSM":
|
|
430
|
+
raise AssertionError(
|
|
431
|
+
f"Data files {self.data[0].path_to_file} and {self.data[index].path_to_file} differ in:\n{expected} and {real}"
|
|
432
|
+
)
|
|
433
|
+
|
|
434
|
+
elif expected != real:
|
|
435
|
+
raise AssertionError(
|
|
436
|
+
f"Data files {self.data[0].path_to_file} and {self.data[index].path_to_file} differ in:\n{expected} and {real}"
|
|
437
|
+
)
|
|
438
|
+
|
|
427
439
|
return all_column_descriptions[0]
|
|
428
440
|
|
|
429
441
|
def get_array(self) -> np.ndarray:
|
|
@@ -435,7 +447,7 @@ class CnvCollection(FileCollection):
|
|
|
435
447
|
A numpy array, representing the data of all input files.
|
|
436
448
|
"""
|
|
437
449
|
return np.concatenate(
|
|
438
|
-
[file.parameters.
|
|
450
|
+
[file.parameters.get_full_data_array() for file in self.data]
|
|
439
451
|
)
|
|
440
452
|
|
|
441
453
|
def get_processing_steps(self) -> list:
|
|
@@ -36,18 +36,30 @@ class Parameters(UserDict):
|
|
|
36
36
|
metadata: list,
|
|
37
37
|
only_header: bool = False,
|
|
38
38
|
):
|
|
39
|
-
self.
|
|
40
|
-
self.
|
|
41
|
-
self.
|
|
42
|
-
self.metadata, self.duplicate_columns = self.reading_data_header(
|
|
39
|
+
self.data = {}
|
|
40
|
+
self.differentiate_table_description(metadata)
|
|
41
|
+
parsed_metadata, self.duplicate_columns = self.reading_data_header(
|
|
43
42
|
metadata
|
|
44
43
|
)
|
|
45
44
|
if not only_header:
|
|
46
|
-
self.full_data_array = self.create_full_ndarray()
|
|
47
|
-
self.
|
|
45
|
+
self.full_data_array = self.create_full_ndarray(data)
|
|
46
|
+
self.create_parameter_instances(
|
|
47
|
+
self.full_data_array, parsed_metadata
|
|
48
|
+
)
|
|
49
|
+
|
|
50
|
+
def get_full_data_array(self) -> np.ndarray:
|
|
51
|
+
return np.array(
|
|
52
|
+
[parameter.data for parameter in self.data.values()], dtype=float
|
|
53
|
+
).T
|
|
48
54
|
|
|
49
|
-
def
|
|
50
|
-
return [parameter
|
|
55
|
+
def get_names(self) -> list[str]:
|
|
56
|
+
return [parameter.name for parameter in self.data.values()]
|
|
57
|
+
|
|
58
|
+
def get_metadata(self) -> dict[str, dict]:
|
|
59
|
+
return {
|
|
60
|
+
parameter.name: parameter.metadata
|
|
61
|
+
for parameter in self.data.values()
|
|
62
|
+
}
|
|
51
63
|
|
|
52
64
|
def get_parameter_list(self) -> list[Parameter]:
|
|
53
65
|
""" """
|
|
@@ -68,9 +80,6 @@ class Parameters(UserDict):
|
|
|
68
80
|
A numpy array of the same shape as the cnv files data table
|
|
69
81
|
|
|
70
82
|
"""
|
|
71
|
-
data_table = (
|
|
72
|
-
self.raw_input_data if len(data_table) == 0 else data_table
|
|
73
|
-
)
|
|
74
83
|
n = 11
|
|
75
84
|
row_list = []
|
|
76
85
|
for line in data_table:
|
|
@@ -84,7 +93,8 @@ class Parameters(UserDict):
|
|
|
84
93
|
|
|
85
94
|
def create_parameter_instances(
|
|
86
95
|
self,
|
|
87
|
-
|
|
96
|
+
array_data: np.ndarray,
|
|
97
|
+
metadata: dict[str, dict],
|
|
88
98
|
) -> dict[str, Parameter]:
|
|
89
99
|
"""
|
|
90
100
|
Differentiates the individual parameter columns into separate parameter
|
|
@@ -101,30 +111,24 @@ class Parameters(UserDict):
|
|
|
101
111
|
A dictionary of parameter instances
|
|
102
112
|
|
|
103
113
|
"""
|
|
104
|
-
metadata = (
|
|
105
|
-
self.metadata if len(list(metadata.keys())) == 0 else metadata
|
|
106
|
-
)
|
|
107
114
|
parameter_dict = {}
|
|
108
115
|
list_of_metadata_shortnames = list(metadata.keys())
|
|
109
116
|
# if column number and metadata number is different, we are propably
|
|
110
117
|
# working with duplicate_columns and will drop the duplicates
|
|
111
|
-
if
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
)
|
|
115
|
-
assert self.full_data_array.shape[1] == len(
|
|
116
|
-
list_of_metadata_shortnames
|
|
117
|
-
)
|
|
118
|
+
if array_data.shape[1] != len(list_of_metadata_shortnames):
|
|
119
|
+
array_data = np.delete(array_data, self.duplicate_columns, 1)
|
|
120
|
+
assert array_data.shape[1] == len(list_of_metadata_shortnames)
|
|
118
121
|
# rewrite the column number in the metadata header
|
|
119
122
|
self.data_table_stats["nquan"] = str(
|
|
120
123
|
int(self.data_table_stats["nquan"])
|
|
121
124
|
- len(self.duplicate_columns)
|
|
122
125
|
)
|
|
123
|
-
for i in range(
|
|
124
|
-
column_data = self.full_data_array[:, i]
|
|
126
|
+
for i in range(array_data.shape[1]):
|
|
125
127
|
key = list_of_metadata_shortnames[i]
|
|
126
|
-
parameter_dict[key] =
|
|
127
|
-
data=
|
|
128
|
+
parameter_dict[key] = self.create_parameter(
|
|
129
|
+
data=array_data[:, i],
|
|
130
|
+
metadata=metadata[key],
|
|
131
|
+
name=key,
|
|
128
132
|
)
|
|
129
133
|
return parameter_dict
|
|
130
134
|
|
|
@@ -132,17 +136,25 @@ class Parameters(UserDict):
|
|
|
132
136
|
"""Recreates the data table descriptions, like column names and spans
|
|
133
137
|
from the structured dictionaries these values were stored in."""
|
|
134
138
|
new_table_info = []
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
139
|
+
# 'data table stats'
|
|
140
|
+
data_array = self.get_full_data_array()
|
|
141
|
+
new_table_info.append(f"nquan = {data_array.shape[1]}")
|
|
142
|
+
new_table_info.append(f"nvalues = {data_array.shape[0]}")
|
|
143
|
+
new_table_info.append(f"units = {self.data_table_stats['units']}\n")
|
|
144
|
+
# 'data tables names'
|
|
145
|
+
for index, metadata in enumerate(self.get_metadata().values()):
|
|
146
|
+
new_table_info.append(
|
|
147
|
+
f"name {index} = {metadata['shortname']}: {metadata['longinfo']}\n"
|
|
148
|
+
)
|
|
149
|
+
# 'data table spans'
|
|
150
|
+
for index, (minimum, maximum) in enumerate(self.get_spans()):
|
|
151
|
+
new_table_info.append(f"span {index} = {minimum}, {maximum}\n")
|
|
152
|
+
# 'data table misc'
|
|
141
153
|
for key, value in self.data_table_misc.items():
|
|
142
154
|
new_table_info.append(f"{key} = {value}\n")
|
|
143
155
|
return new_table_info
|
|
144
156
|
|
|
145
|
-
def differentiate_table_description(self):
|
|
157
|
+
def differentiate_table_description(self, metadata: list):
|
|
146
158
|
"""
|
|
147
159
|
The original method that structures data table metadata.
|
|
148
160
|
|
|
@@ -153,7 +165,7 @@ class Parameters(UserDict):
|
|
|
153
165
|
column_names = []
|
|
154
166
|
column_value_spans = []
|
|
155
167
|
post = []
|
|
156
|
-
for line in
|
|
168
|
+
for line in metadata:
|
|
157
169
|
if line.startswith("name"):
|
|
158
170
|
column_names.append(line.split("=", 1)[1].strip())
|
|
159
171
|
elif line.startswith("span"):
|
|
@@ -188,31 +200,18 @@ class Parameters(UserDict):
|
|
|
188
200
|
The new parameter
|
|
189
201
|
|
|
190
202
|
"""
|
|
191
|
-
|
|
192
|
-
# add to parameter dict at given
|
|
203
|
+
# add to parameter dict at given position
|
|
193
204
|
if position:
|
|
194
205
|
new_dict = {}
|
|
195
|
-
for
|
|
206
|
+
for key, value in self.data.items():
|
|
196
207
|
new_dict[key] = value
|
|
197
208
|
if key == position:
|
|
198
209
|
new_dict[parameter.name] = parameter
|
|
199
|
-
position_index = index + 1
|
|
200
210
|
self.data = new_dict
|
|
201
211
|
|
|
202
212
|
else:
|
|
203
213
|
self.data[parameter.name] = parameter
|
|
204
214
|
|
|
205
|
-
# update metadata dict
|
|
206
|
-
self.metadata = {
|
|
207
|
-
parameter.name: parameter.metadata
|
|
208
|
-
for parameter in self.data.values()
|
|
209
|
-
}
|
|
210
|
-
# add to the data array if data
|
|
211
|
-
if parameter.type == "data":
|
|
212
|
-
self.full_data_array = np.insert(
|
|
213
|
-
self.full_data_array, position_index, parameter.data, axis=1
|
|
214
|
-
)
|
|
215
|
-
|
|
216
215
|
def create_parameter(
|
|
217
216
|
self,
|
|
218
217
|
data: np.ndarray | int | float | str | None,
|
|
@@ -357,17 +356,17 @@ class Parameters(UserDict):
|
|
|
357
356
|
|
|
358
357
|
|
|
359
358
|
"""
|
|
360
|
-
if len(header_info) == 0:
|
|
361
|
-
header_info = self.raw_metadata
|
|
362
359
|
table_header = {}
|
|
363
360
|
duplicate_columns = []
|
|
364
361
|
for line in header_info:
|
|
365
362
|
if line.startswith("name"):
|
|
366
363
|
header_meta_info = {}
|
|
367
364
|
# get basic shortname and the full, non-differentiated info
|
|
368
|
-
shortname = longinfo = line_info = line.split("=")[
|
|
365
|
+
shortname = longinfo = line_info = line.split("=", 1)[
|
|
366
|
+
1
|
|
367
|
+
].strip()
|
|
369
368
|
try:
|
|
370
|
-
shortname, longinfo = line_info.split(":")
|
|
369
|
+
shortname, longinfo = line_info.split(":", 1)
|
|
371
370
|
except IndexError:
|
|
372
371
|
pass
|
|
373
372
|
finally:
|
|
@@ -375,7 +374,7 @@ class Parameters(UserDict):
|
|
|
375
374
|
if shortname in list(table_header.keys()):
|
|
376
375
|
try:
|
|
377
376
|
duplicate_columns.append(
|
|
378
|
-
int(line.split("=")[0].strip().split()[1])
|
|
377
|
+
int(line.split("=", 1)[0].strip().split()[1])
|
|
379
378
|
)
|
|
380
379
|
except IndexError as error:
|
|
381
380
|
logger.error(
|
|
@@ -452,6 +451,9 @@ class Parameter:
|
|
|
452
451
|
self.data = data
|
|
453
452
|
self.metadata = metadata
|
|
454
453
|
self.name = metadata["shortname"]
|
|
454
|
+
self.param = re.split(r"[,\s]", metadata["name"])[0]
|
|
455
|
+
self.sensor_number = 2 if metadata["name"][-1] == "2" else 1
|
|
456
|
+
self.unit = metadata["unit"]
|
|
455
457
|
self.type = "data" if self.data.dtype in ["float", "int"] else "meta"
|
|
456
458
|
self.parse_to_float()
|
|
457
459
|
self.update_span()
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/bottlelogfile.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
{seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/processing_steps.py
RENAMED
|
File without changes
|
|
File without changes
|
|
File without changes
|