seabirdfilehandler 0.7.7__tar.gz → 0.8.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of seabirdfilehandler might be problematic. Click here for more details.

Files changed (16) hide show
  1. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/PKG-INFO +1 -1
  2. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/pyproject.toml +1 -1
  3. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/file_collection.py +18 -6
  4. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/parameter.py +56 -54
  5. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/LICENSE +0 -0
  6. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/README.md +0 -0
  7. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/__init__.py +0 -0
  8. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/bottlefile.py +0 -0
  9. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/bottlelogfile.py +0 -0
  10. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/cnvfile.py +0 -0
  11. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/datafiles.py +0 -0
  12. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/geomar_ctd_file_parser.py +0 -0
  13. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/hexfile.py +0 -0
  14. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/processing_steps.py +0 -0
  15. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/utils.py +0 -0
  16. {seabirdfilehandler-0.7.7 → seabirdfilehandler-0.8.0}/src/seabirdfilehandler/xmlfiles.py +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: seabirdfilehandler
3
- Version: 0.7.7
3
+ Version: 0.8.0
4
4
  Summary: Library of parsers to interact with SeaBird CTD files.
5
5
  License-File: LICENSE
6
6
  Keywords: CTD,parser,seabird,data
@@ -23,7 +23,7 @@ urls.documentation = "https://ctd-software.pages.io-warnemuende.de/seabirdfileha
23
23
  dynamic = []
24
24
  requires-python = ">=3.12"
25
25
  dependencies = ["pandas>=2.2.1", "xmltodict>=0.13.0"]
26
- version = "0.7.7"
26
+ version = "0.8.0"
27
27
 
28
28
  [project.optional-dependencies]
29
29
  test = ["pytest>=8.3.0", "parameterized>=0.9.0", "pre-commit>=3.6.2"]
@@ -417,13 +417,25 @@ class CnvCollection(FileCollection):
417
417
  A list of dictionaries that represent the data column information.
418
418
  """
419
419
  all_column_descriptions = [
420
- file.parameters.metadata for file in self.data
420
+ file.parameters.get_metadata() for file in self.data
421
421
  ]
422
- for info in all_column_descriptions:
422
+ for index, info in enumerate(all_column_descriptions):
423
423
  if all_column_descriptions[0] != info:
424
- raise AssertionError(
425
- "Acting on differently formed data files, aborting"
426
- )
424
+ for expected, real in zip(
425
+ all_column_descriptions[0].items(), info.items()
426
+ ):
427
+ # allow difference in latitude inside depth
428
+ if expected[0] == "depSM":
429
+ if real[0] != "depSM":
430
+ raise AssertionError(
431
+ f"Data files {self.data[0].path_to_file} and {self.data[index].path_to_file} differ in:\n{expected} and {real}"
432
+ )
433
+
434
+ elif expected != real:
435
+ raise AssertionError(
436
+ f"Data files {self.data[0].path_to_file} and {self.data[index].path_to_file} differ in:\n{expected} and {real}"
437
+ )
438
+
427
439
  return all_column_descriptions[0]
428
440
 
429
441
  def get_array(self) -> np.ndarray:
@@ -435,7 +447,7 @@ class CnvCollection(FileCollection):
435
447
  A numpy array, representing the data of all input files.
436
448
  """
437
449
  return np.concatenate(
438
- [file.parameters.create_full_ndarray() for file in self.data]
450
+ [file.parameters.get_full_data_array() for file in self.data]
439
451
  )
440
452
 
441
453
  def get_processing_steps(self) -> list:
@@ -36,18 +36,30 @@ class Parameters(UserDict):
36
36
  metadata: list,
37
37
  only_header: bool = False,
38
38
  ):
39
- self.raw_input_data = data
40
- self.raw_metadata = metadata
41
- self.differentiate_table_description()
42
- self.metadata, self.duplicate_columns = self.reading_data_header(
39
+ self.data = {}
40
+ self.differentiate_table_description(metadata)
41
+ parsed_metadata, self.duplicate_columns = self.reading_data_header(
43
42
  metadata
44
43
  )
45
44
  if not only_header:
46
- self.full_data_array = self.create_full_ndarray()
47
- self.data = self.create_parameter_instances()
45
+ self.full_data_array = self.create_full_ndarray(data)
46
+ self.create_parameter_instances(
47
+ self.full_data_array, parsed_metadata
48
+ )
49
+
50
+ def get_full_data_array(self) -> np.ndarray:
51
+ return np.array(
52
+ [parameter.data for parameter in self.data.values()], dtype=float
53
+ ).T
48
54
 
49
- def get_parameter_names(self) -> list[str]:
50
- return [parameter["name"] for parameter in self.metadata.values()]
55
+ def get_names(self) -> list[str]:
56
+ return [parameter.name for parameter in self.data.values()]
57
+
58
+ def get_metadata(self) -> dict[str, dict]:
59
+ return {
60
+ parameter.name: parameter.metadata
61
+ for parameter in self.data.values()
62
+ }
51
63
 
52
64
  def get_parameter_list(self) -> list[Parameter]:
53
65
  """ """
@@ -68,9 +80,6 @@ class Parameters(UserDict):
68
80
  A numpy array of the same shape as the cnv files data table
69
81
 
70
82
  """
71
- data_table = (
72
- self.raw_input_data if len(data_table) == 0 else data_table
73
- )
74
83
  n = 11
75
84
  row_list = []
76
85
  for line in data_table:
@@ -84,7 +93,8 @@ class Parameters(UserDict):
84
93
 
85
94
  def create_parameter_instances(
86
95
  self,
87
- metadata: dict[str, dict] = {},
96
+ array_data: np.ndarray,
97
+ metadata: dict[str, dict],
88
98
  ) -> dict[str, Parameter]:
89
99
  """
90
100
  Differentiates the individual parameter columns into separate parameter
@@ -101,30 +111,24 @@ class Parameters(UserDict):
101
111
  A dictionary of parameter instances
102
112
 
103
113
  """
104
- metadata = (
105
- self.metadata if len(list(metadata.keys())) == 0 else metadata
106
- )
107
114
  parameter_dict = {}
108
115
  list_of_metadata_shortnames = list(metadata.keys())
109
116
  # if column number and metadata number is different, we are propably
110
117
  # working with duplicate_columns and will drop the duplicates
111
- if self.full_data_array.shape[1] != len(list_of_metadata_shortnames):
112
- self.full_data_array = np.delete(
113
- self.full_data_array, self.duplicate_columns, 1
114
- )
115
- assert self.full_data_array.shape[1] == len(
116
- list_of_metadata_shortnames
117
- )
118
+ if array_data.shape[1] != len(list_of_metadata_shortnames):
119
+ array_data = np.delete(array_data, self.duplicate_columns, 1)
120
+ assert array_data.shape[1] == len(list_of_metadata_shortnames)
118
121
  # rewrite the column number in the metadata header
119
122
  self.data_table_stats["nquan"] = str(
120
123
  int(self.data_table_stats["nquan"])
121
124
  - len(self.duplicate_columns)
122
125
  )
123
- for i in range(self.full_data_array.shape[1]):
124
- column_data = self.full_data_array[:, i]
126
+ for i in range(array_data.shape[1]):
125
127
  key = list_of_metadata_shortnames[i]
126
- parameter_dict[key] = Parameter(
127
- data=column_data, metadata=metadata[key]
128
+ parameter_dict[key] = self.create_parameter(
129
+ data=array_data[:, i],
130
+ metadata=metadata[key],
131
+ name=key,
128
132
  )
129
133
  return parameter_dict
130
134
 
@@ -132,17 +136,25 @@ class Parameters(UserDict):
132
136
  """Recreates the data table descriptions, like column names and spans
133
137
  from the structured dictionaries these values were stored in."""
134
138
  new_table_info = []
135
- for key, value in self.data_table_stats.items():
136
- new_table_info.append(f"{key} = {value}\n")
137
- for index, (name, _) in enumerate(self.data_table_names_and_spans):
138
- new_table_info.append(f"name {index} = {name}\n")
139
- for index, (_, span) in enumerate(self.data_table_names_and_spans):
140
- new_table_info.append(f"span {index} = {span}\n")
139
+ # 'data table stats'
140
+ data_array = self.get_full_data_array()
141
+ new_table_info.append(f"nquan = {data_array.shape[1]}")
142
+ new_table_info.append(f"nvalues = {data_array.shape[0]}")
143
+ new_table_info.append(f"units = {self.data_table_stats['units']}\n")
144
+ # 'data tables names'
145
+ for index, metadata in enumerate(self.get_metadata().values()):
146
+ new_table_info.append(
147
+ f"name {index} = {metadata['shortname']}: {metadata['longinfo']}\n"
148
+ )
149
+ # 'data table spans'
150
+ for index, (minimum, maximum) in enumerate(self.get_spans()):
151
+ new_table_info.append(f"span {index} = {minimum}, {maximum}\n")
152
+ # 'data table misc'
141
153
  for key, value in self.data_table_misc.items():
142
154
  new_table_info.append(f"{key} = {value}\n")
143
155
  return new_table_info
144
156
 
145
- def differentiate_table_description(self):
157
+ def differentiate_table_description(self, metadata: list):
146
158
  """
147
159
  The original method that structures data table metadata.
148
160
 
@@ -153,7 +165,7 @@ class Parameters(UserDict):
153
165
  column_names = []
154
166
  column_value_spans = []
155
167
  post = []
156
- for line in self.raw_metadata:
168
+ for line in metadata:
157
169
  if line.startswith("name"):
158
170
  column_names.append(line.split("=", 1)[1].strip())
159
171
  elif line.startswith("span"):
@@ -188,31 +200,18 @@ class Parameters(UserDict):
188
200
  The new parameter
189
201
 
190
202
  """
191
- position_index = -1
192
- # add to parameter dict at given
203
+ # add to parameter dict at given position
193
204
  if position:
194
205
  new_dict = {}
195
- for index, (key, value) in enumerate(self.data.items()):
206
+ for key, value in self.data.items():
196
207
  new_dict[key] = value
197
208
  if key == position:
198
209
  new_dict[parameter.name] = parameter
199
- position_index = index + 1
200
210
  self.data = new_dict
201
211
 
202
212
  else:
203
213
  self.data[parameter.name] = parameter
204
214
 
205
- # update metadata dict
206
- self.metadata = {
207
- parameter.name: parameter.metadata
208
- for parameter in self.data.values()
209
- }
210
- # add to the data array if data
211
- if parameter.type == "data":
212
- self.full_data_array = np.insert(
213
- self.full_data_array, position_index, parameter.data, axis=1
214
- )
215
-
216
215
  def create_parameter(
217
216
  self,
218
217
  data: np.ndarray | int | float | str | None,
@@ -357,17 +356,17 @@ class Parameters(UserDict):
357
356
 
358
357
 
359
358
  """
360
- if len(header_info) == 0:
361
- header_info = self.raw_metadata
362
359
  table_header = {}
363
360
  duplicate_columns = []
364
361
  for line in header_info:
365
362
  if line.startswith("name"):
366
363
  header_meta_info = {}
367
364
  # get basic shortname and the full, non-differentiated info
368
- shortname = longinfo = line_info = line.split("=")[1].strip()
365
+ shortname = longinfo = line_info = line.split("=", 1)[
366
+ 1
367
+ ].strip()
369
368
  try:
370
- shortname, longinfo = line_info.split(":")
369
+ shortname, longinfo = line_info.split(":", 1)
371
370
  except IndexError:
372
371
  pass
373
372
  finally:
@@ -375,7 +374,7 @@ class Parameters(UserDict):
375
374
  if shortname in list(table_header.keys()):
376
375
  try:
377
376
  duplicate_columns.append(
378
- int(line.split("=")[0].strip().split()[1])
377
+ int(line.split("=", 1)[0].strip().split()[1])
379
378
  )
380
379
  except IndexError as error:
381
380
  logger.error(
@@ -452,6 +451,9 @@ class Parameter:
452
451
  self.data = data
453
452
  self.metadata = metadata
454
453
  self.name = metadata["shortname"]
454
+ self.param = re.split(r"[,\s]", metadata["name"])[0]
455
+ self.sensor_number = 2 if metadata["name"][-1] == "2" else 1
456
+ self.unit = metadata["unit"]
455
457
  self.type = "data" if self.data.dtype in ["float", "int"] else "meta"
456
458
  self.parse_to_float()
457
459
  self.update_span()