seabirdfilehandler 0.7.6__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of seabirdfilehandler might be problematic. Click here for more details.

Files changed (16) hide show
  1. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/PKG-INFO +13 -1
  2. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/pyproject.toml +16 -31
  3. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/bottlefile.py +5 -3
  4. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/bottlelogfile.py +4 -2
  5. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/cnvfile.py +6 -4
  6. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/datafiles.py +4 -3
  7. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/file_collection.py +27 -14
  8. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/geomar_ctd_file_parser.py +1 -0
  9. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/hexfile.py +1 -0
  10. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/parameter.py +64 -60
  11. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/processing_steps.py +3 -3
  12. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/xmlfiles.py +4 -3
  13. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/LICENSE +0 -0
  14. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/README.md +0 -0
  15. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/__init__.py +0 -0
  16. {seabirdfilehandler-0.7.6 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/utils.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: seabirdfilehandler
3
- Version: 0.7.6
3
+ Version: 0.8.0
4
4
  Summary: Library of parsers to interact with SeaBird CTD files.
5
5
  License-File: LICENSE
6
6
  Keywords: CTD,parser,seabird,data
@@ -15,7 +15,19 @@ Classifier: Topic :: Scientific/Engineering :: Oceanography
15
15
  Classifier: Programming Language :: Python :: 3 :: Only
16
16
  Classifier: Programming Language :: Python :: 3.12
17
17
  Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Provides-Extra: docs
20
+ Provides-Extra: test
21
+ Requires-Dist: myst-parser (>=4.0.1) ; extra == "docs"
18
22
  Requires-Dist: pandas (>=2.2.1)
23
+ Requires-Dist: parameterized (>=0.9.0) ; extra == "test"
24
+ Requires-Dist: pre-commit (>=3.6.2) ; extra == "docs"
25
+ Requires-Dist: pre-commit (>=3.6.2) ; extra == "test"
26
+ Requires-Dist: pyment (>=0.3.3) ; extra == "docs"
27
+ Requires-Dist: pytest (>=8.3.0) ; extra == "test"
28
+ Requires-Dist: sphinx (>=8.2.3) ; extra == "docs"
29
+ Requires-Dist: sphinx-autodoc-typehints (>=1.24.1) ; extra == "docs"
30
+ Requires-Dist: sphinx-rtd-theme (>=1.3.0) ; extra == "docs"
19
31
  Requires-Dist: xmltodict (>=0.13.0)
20
32
  Project-URL: Documentation, https://ctd-software.pages.io-warnemuende.de/seabirdfilehandler
21
33
  Project-URL: Homepage, https://ctd-software.pages.io-warnemuende.de/seabirdfilehandler
@@ -15,49 +15,35 @@ classifiers = [
15
15
  "Programming Language :: Python :: 3 :: Only",
16
16
  "Programming Language :: Python :: 3.12",
17
17
  "Programming Language :: Python :: 3.13",
18
+ "Programming Language :: Python :: 3.14",
18
19
  ]
19
20
  urls.homepage = "https://ctd-software.pages.io-warnemuende.de/seabirdfilehandler"
20
21
  urls.repository = "https://git.io-warnemuende.de/CTD-Software/SeabirdFileHandler"
21
22
  urls.documentation = "https://ctd-software.pages.io-warnemuende.de/seabirdfilehandler"
22
23
  dynamic = []
23
- version = "0.7.6"
24
+ requires-python = ">=3.12"
25
+ dependencies = ["pandas>=2.2.1", "xmltodict>=0.13.0"]
26
+ version = "0.8.0"
27
+
28
+ [project.optional-dependencies]
29
+ test = ["pytest>=8.3.0", "parameterized>=0.9.0", "pre-commit>=3.6.2"]
30
+ docs = [
31
+ "pre-commit>=3.6.2",
32
+ "sphinx>=8.2.3",
33
+ "sphinx-rtd-theme>=1.3.0",
34
+ "sphinx-autodoc-typehints>=1.24.1",
35
+ "myst-parser>=4.0.1",
36
+ "pyment>=0.3.3",
37
+ ]
24
38
 
25
39
  [tool.poetry]
26
40
 
27
41
  [tool.poetry.requires-plugins]
28
42
  poetry-dynamic-versioning = { version = ">=1.0.0,<2.0.0", extras = ["plugin"] }
29
43
 
30
- [tool.poetry.dependencies]
31
- python = ">=3.12"
32
- pandas = ">=2.2.1"
33
- xmltodict = ">=0.13.0"
34
-
35
- [tool.poetry.group.dev.dependencies]
36
- pytest = ">=8.3.0"
37
- pytest-xdist = ">=3.6.1"
38
- pytest-cov = ">=6.0.0"
39
- parameterized = ">=0.9.0"
40
- sphinx = ">=8.2.3"
41
- sphinx-rtd-theme = ">=1.3.0"
42
- sphinx-autodoc-typehints = ">=1.24.1"
43
- pyment = ">=0.3.3"
44
- pylint = ">=3.0.2"
45
- pre-commit = ">=3.6.2"
46
- tomlkit = ">=0.13.2"
47
- myst-parser = "^4.0.1"
48
-
49
44
  [tool.pytest.ini_options]
50
45
  pythonpath = [".", "src", "src/seabirdfilehandler"]
51
- filterwarnings = [
52
- "error",
53
- "ignore::UserWarning",
54
- 'ignore::DeprecationWarning',
55
- # note the use of single quote below to denote "raw" strings in TOML
56
- 'ignore: datetime.datetime.utcfromtimestamp\(\) is deprecated:DeprecationWarning',
57
- ]
58
-
59
- [tool.coverage.run]
60
- omit = ["tests/*"]
46
+ filterwarnings = ["error", "ignore::FutureWarning"]
61
47
 
62
48
  [tool.ruff]
63
49
  line-length = 79
@@ -65,7 +51,6 @@ line-length = 79
65
51
  [tool.ruff.lint]
66
52
  ignore = ["F403", "F821"]
67
53
 
68
-
69
54
  [tool.poetry-dynamic-versioning]
70
55
  enable = false
71
56
 
@@ -1,9 +1,11 @@
1
+ import logging
2
+ from datetime import datetime, time
1
3
  from pathlib import Path
2
4
  from typing import Union
3
- from datetime import datetime, time
4
- import pandas as pd
5
+
5
6
  import numpy as np
6
- import logging
7
+ import pandas as pd
8
+
7
9
  from seabirdfilehandler import DataFile
8
10
 
9
11
  logger = logging.getLogger(__name__)
@@ -1,7 +1,9 @@
1
- from datetime import datetime
2
- import re
3
1
  import logging
2
+ import re
3
+ from datetime import datetime
4
+
4
5
  import pandas as pd
6
+
5
7
  from seabirdfilehandler import DataFile
6
8
 
7
9
  logger = logging.getLogger(__name__)
@@ -1,9 +1,11 @@
1
- from pathlib import Path
1
+ import logging
2
2
  from datetime import datetime, timedelta
3
- import pandas as pd
3
+ from pathlib import Path
4
+
4
5
  import numpy as np
5
- import logging
6
- from seabirdfilehandler import DataFile, Parameters, CnvProcessingSteps
6
+ import pandas as pd
7
+
8
+ from seabirdfilehandler import CnvProcessingSteps, DataFile, Parameters
7
9
 
8
10
  logger = logging.getLogger(__name__)
9
11
 
@@ -1,8 +1,9 @@
1
+ import logging
1
2
  from pathlib import Path
2
- import xmltodict
3
- import pandas as pd
3
+
4
4
  import numpy as np
5
- import logging
5
+ import pandas as pd
6
+ import xmltodict
6
7
 
7
8
  logger = logging.getLogger(__name__)
8
9
 
@@ -1,15 +1,18 @@
1
1
  from __future__ import annotations
2
- from pathlib import Path
2
+
3
3
  import logging
4
+ import warnings
4
5
  from collections import UserList
6
+ from pathlib import Path
5
7
  from typing import Callable, Type
6
- import warnings
7
- import pandas as pd
8
+
8
9
  import numpy as np
10
+ import pandas as pd
11
+
9
12
  from seabirdfilehandler import (
10
- CnvFile,
11
13
  BottleFile,
12
14
  BottleLogFile,
15
+ CnvFile,
13
16
  DataFile,
14
17
  HexFile,
15
18
  )
@@ -307,10 +310,8 @@ class FileCollection(UserList):
307
310
  for parameter in df.columns:
308
311
  if parameter in ["datetime"]:
309
312
  continue
310
- try:
311
- df[parameter] = df[parameter].astype("float")
312
- finally:
313
- continue
313
+ df[parameter] = df[parameter].astype("float")
314
+ continue
314
315
  return df
315
316
 
316
317
  def select_real_scan_data(self, df: pd.DataFrame) -> pd.DataFrame:
@@ -416,13 +417,25 @@ class CnvCollection(FileCollection):
416
417
  A list of dictionaries that represent the data column information.
417
418
  """
418
419
  all_column_descriptions = [
419
- file.parameters.metadata for file in self.data
420
+ file.parameters.get_metadata() for file in self.data
420
421
  ]
421
- for info in all_column_descriptions:
422
+ for index, info in enumerate(all_column_descriptions):
422
423
  if all_column_descriptions[0] != info:
423
- raise AssertionError(
424
- "Acting on differently formed data files, aborting"
425
- )
424
+ for expected, real in zip(
425
+ all_column_descriptions[0].items(), info.items()
426
+ ):
427
+ # allow difference in latitude inside depth
428
+ if expected[0] == "depSM":
429
+ if real[0] != "depSM":
430
+ raise AssertionError(
431
+ f"Data files {self.data[0].path_to_file} and {self.data[index].path_to_file} differ in:\n{expected} and {real}"
432
+ )
433
+
434
+ elif expected != real:
435
+ raise AssertionError(
436
+ f"Data files {self.data[0].path_to_file} and {self.data[index].path_to_file} differ in:\n{expected} and {real}"
437
+ )
438
+
426
439
  return all_column_descriptions[0]
427
440
 
428
441
  def get_array(self) -> np.ndarray:
@@ -434,7 +447,7 @@ class CnvCollection(FileCollection):
434
447
  A numpy array, representing the data of all input files.
435
448
  """
436
449
  return np.concatenate(
437
- [file.parameters.create_full_ndarray() for file in self.data]
450
+ [file.parameters.get_full_data_array() for file in self.data]
438
451
  )
439
452
 
440
453
  def get_processing_steps(self) -> list:
@@ -1,4 +1,5 @@
1
1
  from pathlib import Path
2
+
2
3
  import pandas as pd
3
4
 
4
5
 
@@ -1,4 +1,5 @@
1
1
  from pathlib import Path
2
+
2
3
  from seabirdfilehandler import DataFile, XMLCONFile
3
4
 
4
5
 
@@ -1,10 +1,12 @@
1
1
  from __future__ import annotations
2
- from typing import Tuple
2
+
3
+ import logging
3
4
  import re
4
5
  from collections import UserDict
6
+ from typing import Tuple
7
+
5
8
  import numpy as np
6
9
  import pandas as pd
7
- import logging
8
10
 
9
11
  logger = logging.getLogger(__name__)
10
12
 
@@ -34,18 +36,30 @@ class Parameters(UserDict):
34
36
  metadata: list,
35
37
  only_header: bool = False,
36
38
  ):
37
- self.raw_input_data = data
38
- self.raw_metadata = metadata
39
- self.differentiate_table_description()
40
- self.metadata, self.duplicate_columns = self.reading_data_header(
39
+ self.data = {}
40
+ self.differentiate_table_description(metadata)
41
+ parsed_metadata, self.duplicate_columns = self.reading_data_header(
41
42
  metadata
42
43
  )
43
44
  if not only_header:
44
- self.full_data_array = self.create_full_ndarray()
45
- self.data = self.create_parameter_instances()
45
+ self.full_data_array = self.create_full_ndarray(data)
46
+ self.create_parameter_instances(
47
+ self.full_data_array, parsed_metadata
48
+ )
49
+
50
+ def get_full_data_array(self) -> np.ndarray:
51
+ return np.array(
52
+ [parameter.data for parameter in self.data.values()], dtype=float
53
+ ).T
46
54
 
47
- def get_parameter_names(self) -> list[str]:
48
- return [parameter["name"] for parameter in self.metadata.values()]
55
+ def get_names(self) -> list[str]:
56
+ return [parameter.name for parameter in self.data.values()]
57
+
58
+ def get_metadata(self) -> dict[str, dict]:
59
+ return {
60
+ parameter.name: parameter.metadata
61
+ for parameter in self.data.values()
62
+ }
49
63
 
50
64
  def get_parameter_list(self) -> list[Parameter]:
51
65
  """ """
@@ -66,9 +80,6 @@ class Parameters(UserDict):
66
80
  A numpy array of the same shape as the cnv files data table
67
81
 
68
82
  """
69
- data_table = (
70
- self.raw_input_data if len(data_table) == 0 else data_table
71
- )
72
83
  n = 11
73
84
  row_list = []
74
85
  for line in data_table:
@@ -82,7 +93,8 @@ class Parameters(UserDict):
82
93
 
83
94
  def create_parameter_instances(
84
95
  self,
85
- metadata: dict[str, dict] = {},
96
+ array_data: np.ndarray,
97
+ metadata: dict[str, dict],
86
98
  ) -> dict[str, Parameter]:
87
99
  """
88
100
  Differentiates the individual parameter columns into separate parameter
@@ -99,30 +111,24 @@ class Parameters(UserDict):
99
111
  A dictionary of parameter instances
100
112
 
101
113
  """
102
- metadata = (
103
- self.metadata if len(list(metadata.keys())) == 0 else metadata
104
- )
105
114
  parameter_dict = {}
106
115
  list_of_metadata_shortnames = list(metadata.keys())
107
116
  # if column number and metadata number is different, we are propably
108
117
  # working with duplicate_columns and will drop the duplicates
109
- if self.full_data_array.shape[1] != len(list_of_metadata_shortnames):
110
- self.full_data_array = np.delete(
111
- self.full_data_array, self.duplicate_columns, 1
112
- )
113
- assert self.full_data_array.shape[1] == len(
114
- list_of_metadata_shortnames
115
- )
118
+ if array_data.shape[1] != len(list_of_metadata_shortnames):
119
+ array_data = np.delete(array_data, self.duplicate_columns, 1)
120
+ assert array_data.shape[1] == len(list_of_metadata_shortnames)
116
121
  # rewrite the column number in the metadata header
117
122
  self.data_table_stats["nquan"] = str(
118
123
  int(self.data_table_stats["nquan"])
119
124
  - len(self.duplicate_columns)
120
125
  )
121
- for i in range(self.full_data_array.shape[1]):
122
- column_data = self.full_data_array[:, i]
126
+ for i in range(array_data.shape[1]):
123
127
  key = list_of_metadata_shortnames[i]
124
- parameter_dict[key] = Parameter(
125
- data=column_data, metadata=metadata[key]
128
+ parameter_dict[key] = self.create_parameter(
129
+ data=array_data[:, i],
130
+ metadata=metadata[key],
131
+ name=key,
126
132
  )
127
133
  return parameter_dict
128
134
 
@@ -130,17 +136,25 @@ class Parameters(UserDict):
130
136
  """Recreates the data table descriptions, like column names and spans
131
137
  from the structured dictionaries these values were stored in."""
132
138
  new_table_info = []
133
- for key, value in self.data_table_stats.items():
134
- new_table_info.append(f"{key} = {value}\n")
135
- for index, (name, _) in enumerate(self.data_table_names_and_spans):
136
- new_table_info.append(f"name {index} = {name}\n")
137
- for index, (_, span) in enumerate(self.data_table_names_and_spans):
138
- new_table_info.append(f"span {index} = {span}\n")
139
+ # 'data table stats'
140
+ data_array = self.get_full_data_array()
141
+ new_table_info.append(f"nquan = {data_array.shape[1]}")
142
+ new_table_info.append(f"nvalues = {data_array.shape[0]}")
143
+ new_table_info.append(f"units = {self.data_table_stats['units']}\n")
144
+ # 'data tables names'
145
+ for index, metadata in enumerate(self.get_metadata().values()):
146
+ new_table_info.append(
147
+ f"name {index} = {metadata['shortname']}: {metadata['longinfo']}\n"
148
+ )
149
+ # 'data table spans'
150
+ for index, (minimum, maximum) in enumerate(self.get_spans()):
151
+ new_table_info.append(f"span {index} = {minimum}, {maximum}\n")
152
+ # 'data table misc'
139
153
  for key, value in self.data_table_misc.items():
140
154
  new_table_info.append(f"{key} = {value}\n")
141
155
  return new_table_info
142
156
 
143
- def differentiate_table_description(self):
157
+ def differentiate_table_description(self, metadata: list):
144
158
  """
145
159
  The original method that structures data table metadata.
146
160
 
@@ -151,12 +165,12 @@ class Parameters(UserDict):
151
165
  column_names = []
152
166
  column_value_spans = []
153
167
  post = []
154
- for line in self.raw_metadata:
168
+ for line in metadata:
155
169
  if line.startswith("name"):
156
- column_names.append(line.split("=")[1].strip())
170
+ column_names.append(line.split("=", 1)[1].strip())
157
171
  elif line.startswith("span"):
158
172
  past_spans = True
159
- column_value_spans.append(line.split("=")[1].strip())
173
+ column_value_spans.append(line.split("=", 1)[1].strip())
160
174
  else:
161
175
  if not past_spans:
162
176
  pre.append(line)
@@ -164,7 +178,7 @@ class Parameters(UserDict):
164
178
  post.append(line)
165
179
  assert len(column_names) == len(column_value_spans)
166
180
  self.data_table_stats = {
167
- line.split("=")[0].strip(): line.split("=")[1].strip()
181
+ line.split("=")[0].strip(): line.split("=", 1)[1].strip()
168
182
  for line in pre
169
183
  }
170
184
  self.data_table_names_and_spans = [
@@ -172,7 +186,7 @@ class Parameters(UserDict):
172
186
  for name, span in zip(column_names, column_value_spans)
173
187
  ]
174
188
  self.data_table_misc = {
175
- line.split("=")[0].strip(): line.split("=")[1].strip()
189
+ line.split("=")[0].strip(): line.split("=", 1)[1].strip()
176
190
  for line in post
177
191
  }
178
192
 
@@ -186,31 +200,18 @@ class Parameters(UserDict):
186
200
  The new parameter
187
201
 
188
202
  """
189
- position_index = -1
190
- # add to parameter dict at given
203
+ # add to parameter dict at given position
191
204
  if position:
192
205
  new_dict = {}
193
- for index, (key, value) in enumerate(self.data.items()):
206
+ for key, value in self.data.items():
194
207
  new_dict[key] = value
195
208
  if key == position:
196
209
  new_dict[parameter.name] = parameter
197
- position_index = index + 1
198
210
  self.data = new_dict
199
211
 
200
212
  else:
201
213
  self.data[parameter.name] = parameter
202
214
 
203
- # update metadata dict
204
- self.metadata = {
205
- parameter.name: parameter.metadata
206
- for parameter in self.data.values()
207
- }
208
- # add to the data array if data
209
- if parameter.type == "data":
210
- self.full_data_array = np.insert(
211
- self.full_data_array, position_index, parameter.data, axis=1
212
- )
213
-
214
215
  def create_parameter(
215
216
  self,
216
217
  data: np.ndarray | int | float | str | None,
@@ -355,17 +356,17 @@ class Parameters(UserDict):
355
356
 
356
357
 
357
358
  """
358
- if len(header_info) == 0:
359
- header_info = self.raw_metadata
360
359
  table_header = {}
361
360
  duplicate_columns = []
362
361
  for line in header_info:
363
362
  if line.startswith("name"):
364
363
  header_meta_info = {}
365
364
  # get basic shortname and the full, non-differentiated info
366
- shortname = longinfo = line_info = line.split("=")[1].strip()
365
+ shortname = longinfo = line_info = line.split("=", 1)[
366
+ 1
367
+ ].strip()
367
368
  try:
368
- shortname, longinfo = line_info.split(":")
369
+ shortname, longinfo = line_info.split(":", 1)
369
370
  except IndexError:
370
371
  pass
371
372
  finally:
@@ -373,7 +374,7 @@ class Parameters(UserDict):
373
374
  if shortname in list(table_header.keys()):
374
375
  try:
375
376
  duplicate_columns.append(
376
- int(line.split("=")[0].strip().split()[1])
377
+ int(line.split("=", 1)[0].strip().split()[1])
377
378
  )
378
379
  except IndexError as error:
379
380
  logger.error(
@@ -450,6 +451,9 @@ class Parameter:
450
451
  self.data = data
451
452
  self.metadata = metadata
452
453
  self.name = metadata["shortname"]
454
+ self.param = re.split(r"[,\s]", metadata["name"])[0]
455
+ self.sensor_number = 2 if metadata["name"][-1] == "2" else 1
456
+ self.unit = metadata["unit"]
453
457
  self.type = "data" if self.data.dtype in ["float", "int"] else "meta"
454
458
  self.parse_to_float()
455
459
  self.update_span()
@@ -1,6 +1,7 @@
1
1
  from __future__ import annotations
2
- from collections import UserList
2
+
3
3
  import copy
4
+ from collections import UserList
4
5
 
5
6
 
6
7
  class CnvProcessingSteps(UserList):
@@ -149,8 +150,7 @@ class CnvProcessingSteps(UserList):
149
150
  pass
150
151
  else:
151
152
  dictionary[key.strip()] = value.strip()
152
- finally:
153
- return dictionary
153
+ return dictionary
154
154
 
155
155
  def get_step(self, step: str) -> ProcessingStep | None:
156
156
  """
@@ -1,7 +1,8 @@
1
- from pathlib import Path
2
- from collections import UserDict
3
- import xml.etree.ElementTree as ET
4
1
  import json
2
+ import xml.etree.ElementTree as ET
3
+ from collections import UserDict
4
+ from pathlib import Path
5
+
5
6
  import xmltodict
6
7
 
7
8
  from seabirdfilehandler.utils import UnexpectedFileFormat