seabirdfilehandler 0.5.0__py3-none-any.whl → 0.5.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of seabirdfilehandler might be problematic. Click here for more details.

@@ -5,3 +5,4 @@ from .cnvfile import *
5
5
  from .xmlfiles import *
6
6
  from .validation_modules import *
7
7
  from .file_collection import *
8
+ from .geomar_ctd_file_parser import *
@@ -4,10 +4,6 @@ import pandas as pd
4
4
  import numpy as np
5
5
  import logging
6
6
  from seabirdfilehandler import DataFile
7
- from seabirdfilehandler.dataframe_meta_accessor import (
8
- SeriesMetaAccessor, # noqa: F401
9
- DataFrameMetaAccessor, # noqa: F401
10
- )
11
7
 
12
8
  logger = logging.getLogger(__name__)
13
9
 
@@ -3,10 +3,6 @@ import re
3
3
  import logging
4
4
  import pandas as pd
5
5
  from seabirdfilehandler import DataFile
6
- from seabirdfilehandler.dataframe_meta_accessor import (
7
- SeriesMetaAccessor, # noqa: F401
8
- DataFrameMetaAccessor, # noqa: F401
9
- )
10
6
 
11
7
  logger = logging.getLogger(__name__)
12
8
 
@@ -46,23 +46,24 @@ class CnvFile(DataFile):
46
46
  def __init__(
47
47
  self,
48
48
  path_to_file: Path | str,
49
+ only_header: bool = False,
49
50
  create_dataframe: bool = False,
50
51
  absolute_time_calculation: bool = False,
51
52
  event_log_column: bool = False,
52
53
  coordinate_columns: bool = False,
53
54
  ):
54
- super().__init__(path_to_file)
55
+ super().__init__(path_to_file, only_header)
55
56
  self.validation_modules = self.obtaining_validation_modules()
56
57
  self.start_time = self.reading_start_time()
57
58
  self.parameters = Parameters(self.data, self.data_table_description)
58
59
  if create_dataframe:
59
60
  self.df = self.parameters.get_pandas_dataframe()
60
- if absolute_time_calculation:
61
- self.absolute_time_calculation()
62
- if event_log_column:
63
- self.add_station_and_event_column()
64
- if coordinate_columns:
65
- self.add_position_columns()
61
+ if absolute_time_calculation:
62
+ self.absolute_time_calculation()
63
+ if event_log_column:
64
+ self.add_station_and_event_column()
65
+ if coordinate_columns:
66
+ self.add_position_columns()
66
67
 
67
68
  def reading_start_time(
68
69
  self,
@@ -1,8 +1,19 @@
1
1
  from pathlib import Path
2
2
  import xmltodict
3
3
  import pandas as pd
4
+ import numpy as np
4
5
  import logging
5
6
 
7
+ logging.basicConfig(
8
+ level=logging.INFO,
9
+ format="%(asctime)s - %(name)s - [%(levelname)s] - %(message)s",
10
+ datefmt="%Y-%m-%d %H:%M:%S",
11
+ handlers=[
12
+ logging.FileHandler("filehandler.log"),
13
+ logging.StreamHandler(),
14
+ ],
15
+ )
16
+
6
17
  logger = logging.getLogger(__name__)
7
18
 
8
19
 
@@ -193,7 +204,7 @@ class DataFile:
193
204
 
194
205
  def to_csv(
195
206
  self,
196
- selected_columns: list | None = None,
207
+ data: pd.DataFrame | np.ndarray,
197
208
  with_header: bool = True,
198
209
  output_file_path: Path | str | None = None,
199
210
  output_file_name: str | None = None,
@@ -218,10 +229,6 @@ class DataFile:
218
229
  -------
219
230
 
220
231
  """
221
- selected_columns = (
222
- self.df.columns if selected_columns is None else selected_columns
223
- )
224
- df = self.df[selected_columns].reset_index(drop=True)
225
232
  new_file_path = self.define_output_path(
226
233
  output_file_path, output_file_name
227
234
  )
@@ -229,15 +236,15 @@ class DataFile:
229
236
  with open(new_file_path, "w") as file:
230
237
  for line in self.header:
231
238
  file.write(line)
232
- df.to_csv(new_file_path, index=False, mode="a")
239
+ if isinstance(data, pd.DataFrame):
240
+ data.to_csv(new_file_path, index=False, mode="a")
233
241
  else:
234
- df.to_csv(new_file_path, index=False, mode="w")
235
- logger.info(f"Wrote file {self.path_to_file} to {new_file_path}.")
242
+ np.savetxt(new_file_path, data, delimiter=",")
236
243
 
237
244
  def selecting_columns(
238
245
  self,
239
246
  list_of_columns: list | str,
240
- df: pd.DataFrame | None = None,
247
+ df: pd.DataFrame,
241
248
  ):
242
249
  """Alters the dataframe to only hold the given columns.
243
250
 
@@ -251,7 +258,6 @@ class DataFile:
251
258
  -------
252
259
 
253
260
  """
254
- df = self.df if df is None else df
255
261
  # ensure that the input is a list, so that isin() can do its job
256
262
  if isinstance(list_of_columns, str):
257
263
  list_of_columns = [list_of_columns]
@@ -74,7 +74,7 @@ class FileCollection(UserList):
74
74
  def collect_files(
75
75
  self,
76
76
  sorting_key: Callable | None = lambda file: int(
77
- file.stem.split("_")[4]
77
+ file.stem.split("_")[3]
78
78
  ),
79
79
  ):
80
80
  """ """
@@ -0,0 +1,80 @@
1
+ from pathlib import Path
2
+ import pandas as pd
3
+
4
+
5
+ class GEOMARCTDFile:
6
+ """
7
+ A parser to read .ctd files created by the GEOMAR processing software.
8
+
9
+ Goes through the file line by line and sorts the individual lines in
10
+ corresponding lists. That way, data and different types of metadata are
11
+ structured on a basic level.
12
+ In general, this parser is meant to stick close to the way the Seabird-
13
+ Parsers are written.
14
+ """
15
+
16
+ def __init__(
17
+ self,
18
+ path_to_file: Path | str,
19
+ only_header: bool = False,
20
+ create_dataframe: bool = True,
21
+ ):
22
+ self.path_to_file = Path(path_to_file)
23
+ self.only_header = only_header
24
+ self.raw_input = []
25
+ self.metadata = {}
26
+ self.history = []
27
+ self.comment = []
28
+ self.data_header = []
29
+ self.raw_data = []
30
+ self.read_file()
31
+ if create_dataframe:
32
+ self.create_dataframe()
33
+
34
+ def __str__(self) -> str:
35
+ return "/n".join(self.raw_data)
36
+
37
+ def __repr__(self) -> str:
38
+ return str(self.path_to_file.absolute())
39
+
40
+ def __eq__(self, other) -> bool:
41
+ return self.raw_data == other.raw_data
42
+
43
+ def read_file(self):
44
+ with open(self.path_to_file, "r") as file:
45
+ past_header = False
46
+ for line in file:
47
+ self.raw_input.append(line)
48
+ if line.startswith("History"):
49
+ self.history.append(
50
+ line.removeprefix("History = # GEOMAR").strip()
51
+ )
52
+ elif line.startswith("Comment"):
53
+ self.comment.append(
54
+ line.removeprefix("Comment =").strip()
55
+ )
56
+ elif line.startswith("Columns"):
57
+ self.data_header = [
58
+ column.removeprefix("Columns =").strip()
59
+ for column in line.split(":")
60
+ ]
61
+ past_header = True
62
+ if self.only_header:
63
+ break
64
+ else:
65
+ if not past_header:
66
+ try:
67
+ key, value = line.split("=")
68
+ except ValueError:
69
+ key = line
70
+ value = ""
71
+ self.metadata[key.strip()] = value.strip()
72
+ else:
73
+ self.raw_data.append(line)
74
+
75
+ def create_dataframe(self):
76
+ self.df = pd.DataFrame(
77
+ [row.split() for row in self.raw_data],
78
+ dtype=float,
79
+ columns=self.data_header,
80
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.3
2
2
  Name: seabirdfilehandler
3
- Version: 0.5.0
3
+ Version: 0.5.2
4
4
  Summary: Library of parsers to interact with SeaBird CTD files.
5
5
  Keywords: CTD,parser,seabird,data
6
6
  Author: Emil Michels
@@ -0,0 +1,15 @@
1
+ seabirdfilehandler/__init__.py,sha256=rS1NfxKVWgOntk5NL-ndZyDt3LHW_tKr_F3iW_QbYvg,230
2
+ seabirdfilehandler/bottlefile.py,sha256=nnfoDczPMG_ge40dT2rHNhifR7-NRgnZNFrfPM_9OSQ,5925
3
+ seabirdfilehandler/bottlelogfile.py,sha256=MtMmEebdAktO3mk6KbmJC7dfx9sRLbV5qqDQt2qtpJE,4310
4
+ seabirdfilehandler/cnvfile.py,sha256=LXpJcC3ukiD-2b5vy4aKESCbIvwV12TwQy1G6Y25_GE,9709
5
+ seabirdfilehandler/datafiles.py,sha256=lqENvdGSwRKT6PyNFN2etaWKMA-4OONG0x-up1W5ezo,8991
6
+ seabirdfilehandler/file_collection.py,sha256=b5iJaP4F34Vq7-FiJOlPvfS4IePGWsYx20XwWbZQw1A,6882
7
+ seabirdfilehandler/geomar_ctd_file_parser.py,sha256=4eCnkE0mvPKC8Dic8sXP4xpfwnk3K2MQcGFBf6loT8k,2655
8
+ seabirdfilehandler/parameter.py,sha256=UuwFzege94sqPt0kOjEqtMGGol4hjuFjj2_EH7o0pzA,14374
9
+ seabirdfilehandler/utils.py,sha256=5KXdB8Hdv65dv5tPyXxNMct1mCEOyA3S8XP54AFAnx0,1745
10
+ seabirdfilehandler/validation_modules.py,sha256=eZ6x0giftUtlxnRMOnK_vCkgccdwUXPrDjajFa-E6n0,4698
11
+ seabirdfilehandler/xmlfiles.py,sha256=L_puQf8eg0ojv85AyEMID4jnwkOlV_fgZP3W5yeSUBY,4668
12
+ seabirdfilehandler-0.5.2.dist-info/LICENSE,sha256=Ifd1VPmYv32oJd2QVh3wIQP9X05vYJlcY6kONz360ws,34603
13
+ seabirdfilehandler-0.5.2.dist-info/METADATA,sha256=ER8rZi5Ei76THx_JdYv7JuXjNjIyErAzS_RMHd-Pspc,1289
14
+ seabirdfilehandler-0.5.2.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
15
+ seabirdfilehandler-0.5.2.dist-info/RECORD,,
@@ -1,184 +0,0 @@
1
- import pandas as pd
2
- import logging
3
- from pandas.api.extensions import register_series_accessor
4
- from pandas.api.extensions import register_dataframe_accessor
5
- import warnings
6
-
7
-
8
- logger = logging.getLogger(__name__)
9
-
10
-
11
- class MetadataHandler:
12
- """
13
- The base class for the pandas series and dataframe accessors.
14
- Offers a very basic metadata handling, by using a dictionary as metadata
15
- store. The accessors then allow to access this metadata store and
16
- corresponding methods by calling 'df.meta' or 'series.meta', respectively.
17
- Mainly targeted for usage with dataframes featuring data from CNV files,
18
- it for example allows the attachement of parameter metadata found in the
19
- CNV header to individual dataframe columns.
20
-
21
- This approach was chosen over others, like directly subclassing the pandas
22
- dataframe or series class, or a seperate metadata storage, due to its
23
- simplicity and ability to keep using the full powerfull pandas library
24
- without the need to implement each and every transformation. Of course,
25
- the 'attrs' attribute does offer a similar metadata storage. But at the
26
- time of writing this, it is still in a very experimental condition and does
27
- not propagate reliably.
28
- """
29
-
30
- def __init__(self, pandas_obj):
31
- self._obj = pandas_obj
32
- if not hasattr(self._obj, "_metadata_store"):
33
- with warnings.catch_warnings():
34
- warnings.simplefilter("ignore")
35
- self._obj._metadata_store = {}
36
-
37
- @property
38
- def metadata(self):
39
- return self._obj._metadata_store
40
-
41
- @metadata.setter
42
- def metadata(self, value):
43
- self._obj._metadata_store = value
44
-
45
- def get(self, key, default=None):
46
- return self._obj._metadata_store.get(key, default)
47
-
48
- def set(self, key, value):
49
- self._obj._metadata_store[key] = value
50
-
51
- def clear(self):
52
- self._obj._metadata_store.clear()
53
-
54
-
55
- @register_series_accessor("meta")
56
- class SeriesMetaAccessor(MetadataHandler):
57
- """
58
- Series implementation of the Metadata Accessor.
59
- Does not offer anything more than the base class at the moment.
60
- """
61
-
62
- def __init__(self, pandas_obj):
63
- super().__init__(pandas_obj)
64
-
65
-
66
- @register_dataframe_accessor("meta")
67
- class DataFrameMetaAccessor(MetadataHandler):
68
- """
69
- DataFrame implementation of the Metadata Accessor.
70
- Introduces another attribute, '_header_level_detail', that stores the
71
- currently displayed metadata as column names. Additionally offers methods
72
- to sync metadata between the dataframe and its series, and the handling of
73
- common operations, like renaming or the addition of new columns.
74
- """
75
-
76
- def __init__(self, pandas_obj):
77
- super().__init__(pandas_obj)
78
- if not hasattr(self._obj, "_header_level_detail"):
79
- self._obj._header_level_detail = "shortname"
80
- # Initialize DataFrame metadata
81
- self.aggregate_series_metadata()
82
-
83
- @property
84
- def header_detail(self):
85
- return self._obj._header_level_detail
86
-
87
- @header_detail.setter
88
- def header_detail(self, value):
89
- self._obj._header_level_detail = value
90
-
91
- @property
92
- def metadata(self):
93
- return self._obj._metadata_store
94
-
95
- @metadata.setter
96
- def metadata(self, value):
97
- meta_dict = {
98
- shortname: self.add_default_metadata(shortname, metainfo)
99
- for shortname, metainfo in value.items()
100
- }
101
- self._obj._metadata_store = meta_dict
102
- self.propagate_metadata_to_series()
103
-
104
- def aggregate_series_metadata(self):
105
- """Aggregate metadata from Series within the DataFrame."""
106
- for column in self._obj.columns:
107
- if isinstance(self._obj[column], pd.Series) and hasattr(
108
- self._obj[column], "meta"
109
- ):
110
- self.metadata[column] = self._obj[column].meta.metadata
111
-
112
- def propagate_metadata_to_series(self):
113
- """Propagate DataFrame-level metadata back to Series."""
114
- for column in self._obj.columns:
115
- if isinstance(self._obj[column], pd.Series) and hasattr(
116
- self._obj[column], "meta"
117
- ):
118
- for key, value in self.metadata.items():
119
- if key == column:
120
- try:
121
- self._obj[column].meta.metadata = value
122
- except TypeError:
123
- logger.error(f"{column}: {value}")
124
-
125
- def update_metadata_on_rename(self, rename_dict):
126
- """Update metadata when columns are renamed."""
127
- new_metadata = {}
128
- for old_name, new_name in rename_dict.items():
129
- for key, value in self.metadata.items():
130
- if key == old_name:
131
- new_metadata[new_name] = value
132
- self.metadata = new_metadata
133
- self.propagate_metadata_to_series()
134
-
135
- def rename(self, rename_key):
136
- """Rename the column names by using a metadata point."""
137
- rename_dict = {
138
- column: (
139
- self._obj[column].meta.get(rename_key)
140
- if rename_key in list(self._obj[column].meta.metadata.keys())
141
- else column
142
- )
143
- for column in self._obj.columns
144
- }
145
- self._obj.rename(columns=rename_dict, inplace=True)
146
- self.header_detail = rename_key
147
- self.update_metadata_on_rename(rename_dict)
148
-
149
- def add_column(
150
- self,
151
- name: str,
152
- data: pd.Series | list,
153
- location: int | None = None,
154
- metadata: dict = {},
155
- ):
156
- """Add a column and use or generate metadata for it."""
157
- location = len(self._obj.columns) if location is None else location
158
- self._obj.insert(
159
- loc=location,
160
- column=name,
161
- value=data,
162
- allow_duplicates=False,
163
- )
164
- self.metadata[name] = self.add_default_metadata(name, metadata)
165
- self.propagate_metadata_to_series()
166
-
167
- def add_default_metadata(
168
- self,
169
- name: str,
170
- metadata: dict = {},
171
- list_of_keys: list = [
172
- "shortname",
173
- "longinfo",
174
- "name",
175
- "metainfo",
176
- "unit",
177
- ],
178
- ) -> dict:
179
- """Fill up missing metadata points with a default value."""
180
- default = {}
181
- for key in list_of_keys:
182
- if key not in list(metadata.keys()):
183
- default[key] = name
184
- return {**metadata, **default}
@@ -1,23 +0,0 @@
1
- version: 1
2
- formatters:
3
- simple:
4
- format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
5
- handlers:
6
- console:
7
- class: logging.StreamHandler
8
- level: DEBUG
9
- formatter: simple
10
- stream: ext://sys.stdout
11
- file:
12
- class: logging.FileHandler
13
- filename: seabirdfiles.log
14
- level: DEBUG
15
- formatter: simple
16
- loggers:
17
- simpleExample:
18
- level: DEBUG
19
- handlers: [console]
20
- propagate: no
21
- root:
22
- level: DEBUG
23
- handlers: [console]
@@ -1,16 +0,0 @@
1
- seabirdfilehandler/__init__.py,sha256=5JTzYE3oRdrxkC9_etAnFQ1cy10PHtpmesdR6n5PoPQ,192
2
- seabirdfilehandler/bottlefile.py,sha256=QBUqtKhD-dUPbgc_sz8NOGEFFkAjL3g5r1oAsqQnUcQ,6063
3
- seabirdfilehandler/bottlelogfile.py,sha256=CjBeITQS27Ar80bfxguoAnVkYxI1ioAiVTnlvwevw7E,4448
4
- seabirdfilehandler/cnvfile.py,sha256=-mCuQX76uuWIETljem1DjzDbZ5eNIUucjoHejTQu_sU,9685
5
- seabirdfilehandler/datafiles.py,sha256=z27PZJBvEbl1aFXpDQ0QTM_yR5NTaNK-HC9_z3t7zuM,8991
6
- seabirdfilehandler/dataframe_meta_accessor.py,sha256=x4mSEN49us6Ezzjdt41fl5Ry8IJR09ORrZ1roOIJbyc,6439
7
- seabirdfilehandler/file_collection.py,sha256=qkEdlI-hcoyuOdmgTr8wdAr1mXXkVuKkF9J4j2-v3kY,6882
8
- seabirdfilehandler/logging.yaml,sha256=mXxbhJPio3OGaukTpc3rLGA8Ywq1DNqp0Vn5YCbH6jY,459
9
- seabirdfilehandler/parameter.py,sha256=UuwFzege94sqPt0kOjEqtMGGol4hjuFjj2_EH7o0pzA,14374
10
- seabirdfilehandler/utils.py,sha256=5KXdB8Hdv65dv5tPyXxNMct1mCEOyA3S8XP54AFAnx0,1745
11
- seabirdfilehandler/validation_modules.py,sha256=eZ6x0giftUtlxnRMOnK_vCkgccdwUXPrDjajFa-E6n0,4698
12
- seabirdfilehandler/xmlfiles.py,sha256=L_puQf8eg0ojv85AyEMID4jnwkOlV_fgZP3W5yeSUBY,4668
13
- seabirdfilehandler-0.5.0.dist-info/LICENSE,sha256=Ifd1VPmYv32oJd2QVh3wIQP9X05vYJlcY6kONz360ws,34603
14
- seabirdfilehandler-0.5.0.dist-info/METADATA,sha256=jPHzHpxz9OY48QHifHT9crI9a5pxYae1aCw6jAhqtYM,1289
15
- seabirdfilehandler-0.5.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
16
- seabirdfilehandler-0.5.0.dist-info/RECORD,,