seabirdfilehandler 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of seabirdfilehandler might be problematic. Click here for more details.
- seabirdfilehandler/bottlefile.py +0 -4
- seabirdfilehandler/bottlelogfile.py +0 -4
- seabirdfilehandler/cnvfile.py +8 -7
- seabirdfilehandler/datafiles.py +16 -10
- seabirdfilehandler/file_collection.py +1 -1
- {seabirdfilehandler-0.5.0.dist-info → seabirdfilehandler-0.5.1.dist-info}/METADATA +1 -1
- seabirdfilehandler-0.5.1.dist-info/RECORD +14 -0
- seabirdfilehandler/dataframe_meta_accessor.py +0 -184
- seabirdfilehandler/logging.yaml +0 -23
- seabirdfilehandler-0.5.0.dist-info/RECORD +0 -16
- {seabirdfilehandler-0.5.0.dist-info → seabirdfilehandler-0.5.1.dist-info}/LICENSE +0 -0
- {seabirdfilehandler-0.5.0.dist-info → seabirdfilehandler-0.5.1.dist-info}/WHEEL +0 -0
seabirdfilehandler/bottlefile.py
CHANGED
|
@@ -4,10 +4,6 @@ import pandas as pd
|
|
|
4
4
|
import numpy as np
|
|
5
5
|
import logging
|
|
6
6
|
from seabirdfilehandler import DataFile
|
|
7
|
-
from seabirdfilehandler.dataframe_meta_accessor import (
|
|
8
|
-
SeriesMetaAccessor, # noqa: F401
|
|
9
|
-
DataFrameMetaAccessor, # noqa: F401
|
|
10
|
-
)
|
|
11
7
|
|
|
12
8
|
logger = logging.getLogger(__name__)
|
|
13
9
|
|
|
@@ -3,10 +3,6 @@ import re
|
|
|
3
3
|
import logging
|
|
4
4
|
import pandas as pd
|
|
5
5
|
from seabirdfilehandler import DataFile
|
|
6
|
-
from seabirdfilehandler.dataframe_meta_accessor import (
|
|
7
|
-
SeriesMetaAccessor, # noqa: F401
|
|
8
|
-
DataFrameMetaAccessor, # noqa: F401
|
|
9
|
-
)
|
|
10
6
|
|
|
11
7
|
logger = logging.getLogger(__name__)
|
|
12
8
|
|
seabirdfilehandler/cnvfile.py
CHANGED
|
@@ -46,23 +46,24 @@ class CnvFile(DataFile):
|
|
|
46
46
|
def __init__(
|
|
47
47
|
self,
|
|
48
48
|
path_to_file: Path | str,
|
|
49
|
+
only_header: bool = False,
|
|
49
50
|
create_dataframe: bool = False,
|
|
50
51
|
absolute_time_calculation: bool = False,
|
|
51
52
|
event_log_column: bool = False,
|
|
52
53
|
coordinate_columns: bool = False,
|
|
53
54
|
):
|
|
54
|
-
super().__init__(path_to_file)
|
|
55
|
+
super().__init__(path_to_file, only_header)
|
|
55
56
|
self.validation_modules = self.obtaining_validation_modules()
|
|
56
57
|
self.start_time = self.reading_start_time()
|
|
57
58
|
self.parameters = Parameters(self.data, self.data_table_description)
|
|
58
59
|
if create_dataframe:
|
|
59
60
|
self.df = self.parameters.get_pandas_dataframe()
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
61
|
+
if absolute_time_calculation:
|
|
62
|
+
self.absolute_time_calculation()
|
|
63
|
+
if event_log_column:
|
|
64
|
+
self.add_station_and_event_column()
|
|
65
|
+
if coordinate_columns:
|
|
66
|
+
self.add_position_columns()
|
|
66
67
|
|
|
67
68
|
def reading_start_time(
|
|
68
69
|
self,
|
seabirdfilehandler/datafiles.py
CHANGED
|
@@ -1,8 +1,19 @@
|
|
|
1
1
|
from pathlib import Path
|
|
2
2
|
import xmltodict
|
|
3
3
|
import pandas as pd
|
|
4
|
+
import numpy as np
|
|
4
5
|
import logging
|
|
5
6
|
|
|
7
|
+
logging.basicConfig(
|
|
8
|
+
level=logging.INFO,
|
|
9
|
+
format="%(asctime)s - %(name)s - [%(levelname)s] - %(message)s",
|
|
10
|
+
datefmt="%Y-%m-%d %H:%M:%S",
|
|
11
|
+
handlers=[
|
|
12
|
+
logging.FileHandler("filehandler.log"),
|
|
13
|
+
logging.StreamHandler(),
|
|
14
|
+
],
|
|
15
|
+
)
|
|
16
|
+
|
|
6
17
|
logger = logging.getLogger(__name__)
|
|
7
18
|
|
|
8
19
|
|
|
@@ -193,7 +204,7 @@ class DataFile:
|
|
|
193
204
|
|
|
194
205
|
def to_csv(
|
|
195
206
|
self,
|
|
196
|
-
|
|
207
|
+
data: pd.DataFrame | np.ndarray,
|
|
197
208
|
with_header: bool = True,
|
|
198
209
|
output_file_path: Path | str | None = None,
|
|
199
210
|
output_file_name: str | None = None,
|
|
@@ -218,10 +229,6 @@ class DataFile:
|
|
|
218
229
|
-------
|
|
219
230
|
|
|
220
231
|
"""
|
|
221
|
-
selected_columns = (
|
|
222
|
-
self.df.columns if selected_columns is None else selected_columns
|
|
223
|
-
)
|
|
224
|
-
df = self.df[selected_columns].reset_index(drop=True)
|
|
225
232
|
new_file_path = self.define_output_path(
|
|
226
233
|
output_file_path, output_file_name
|
|
227
234
|
)
|
|
@@ -229,15 +236,15 @@ class DataFile:
|
|
|
229
236
|
with open(new_file_path, "w") as file:
|
|
230
237
|
for line in self.header:
|
|
231
238
|
file.write(line)
|
|
232
|
-
|
|
239
|
+
if isinstance(data, pd.DataFrame):
|
|
240
|
+
data.to_csv(new_file_path, index=False, mode="a")
|
|
233
241
|
else:
|
|
234
|
-
|
|
235
|
-
logger.info(f"Wrote file {self.path_to_file} to {new_file_path}.")
|
|
242
|
+
np.savetxt(new_file_path, data, delimiter=",")
|
|
236
243
|
|
|
237
244
|
def selecting_columns(
|
|
238
245
|
self,
|
|
239
246
|
list_of_columns: list | str,
|
|
240
|
-
df: pd.DataFrame
|
|
247
|
+
df: pd.DataFrame,
|
|
241
248
|
):
|
|
242
249
|
"""Alters the dataframe to only hold the given columns.
|
|
243
250
|
|
|
@@ -251,7 +258,6 @@ class DataFile:
|
|
|
251
258
|
-------
|
|
252
259
|
|
|
253
260
|
"""
|
|
254
|
-
df = self.df if df is None else df
|
|
255
261
|
# ensure that the input is a list, so that isin() can do its job
|
|
256
262
|
if isinstance(list_of_columns, str):
|
|
257
263
|
list_of_columns = [list_of_columns]
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
seabirdfilehandler/__init__.py,sha256=5JTzYE3oRdrxkC9_etAnFQ1cy10PHtpmesdR6n5PoPQ,192
|
|
2
|
+
seabirdfilehandler/bottlefile.py,sha256=nnfoDczPMG_ge40dT2rHNhifR7-NRgnZNFrfPM_9OSQ,5925
|
|
3
|
+
seabirdfilehandler/bottlelogfile.py,sha256=MtMmEebdAktO3mk6KbmJC7dfx9sRLbV5qqDQt2qtpJE,4310
|
|
4
|
+
seabirdfilehandler/cnvfile.py,sha256=LXpJcC3ukiD-2b5vy4aKESCbIvwV12TwQy1G6Y25_GE,9709
|
|
5
|
+
seabirdfilehandler/datafiles.py,sha256=lqENvdGSwRKT6PyNFN2etaWKMA-4OONG0x-up1W5ezo,8991
|
|
6
|
+
seabirdfilehandler/file_collection.py,sha256=b5iJaP4F34Vq7-FiJOlPvfS4IePGWsYx20XwWbZQw1A,6882
|
|
7
|
+
seabirdfilehandler/parameter.py,sha256=UuwFzege94sqPt0kOjEqtMGGol4hjuFjj2_EH7o0pzA,14374
|
|
8
|
+
seabirdfilehandler/utils.py,sha256=5KXdB8Hdv65dv5tPyXxNMct1mCEOyA3S8XP54AFAnx0,1745
|
|
9
|
+
seabirdfilehandler/validation_modules.py,sha256=eZ6x0giftUtlxnRMOnK_vCkgccdwUXPrDjajFa-E6n0,4698
|
|
10
|
+
seabirdfilehandler/xmlfiles.py,sha256=L_puQf8eg0ojv85AyEMID4jnwkOlV_fgZP3W5yeSUBY,4668
|
|
11
|
+
seabirdfilehandler-0.5.1.dist-info/LICENSE,sha256=Ifd1VPmYv32oJd2QVh3wIQP9X05vYJlcY6kONz360ws,34603
|
|
12
|
+
seabirdfilehandler-0.5.1.dist-info/METADATA,sha256=2VrJmgeRr-Par2zU5A--xDS5r_7VsKzi-HLi8SlPUX4,1289
|
|
13
|
+
seabirdfilehandler-0.5.1.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
14
|
+
seabirdfilehandler-0.5.1.dist-info/RECORD,,
|
|
@@ -1,184 +0,0 @@
|
|
|
1
|
-
import pandas as pd
|
|
2
|
-
import logging
|
|
3
|
-
from pandas.api.extensions import register_series_accessor
|
|
4
|
-
from pandas.api.extensions import register_dataframe_accessor
|
|
5
|
-
import warnings
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
logger = logging.getLogger(__name__)
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
class MetadataHandler:
|
|
12
|
-
"""
|
|
13
|
-
The base class for the pandas series and dataframe accessors.
|
|
14
|
-
Offers a very basic metadata handling, by using a dictionary as metadata
|
|
15
|
-
store. The accessors then allow to access this metadata store and
|
|
16
|
-
corresponding methods by calling 'df.meta' or 'series.meta', respectively.
|
|
17
|
-
Mainly targeted for usage with dataframes featuring data from CNV files,
|
|
18
|
-
it for example allows the attachement of parameter metadata found in the
|
|
19
|
-
CNV header to individual dataframe columns.
|
|
20
|
-
|
|
21
|
-
This approach was chosen over others, like directly subclassing the pandas
|
|
22
|
-
dataframe or series class, or a seperate metadata storage, due to its
|
|
23
|
-
simplicity and ability to keep using the full powerfull pandas library
|
|
24
|
-
without the need to implement each and every transformation. Of course,
|
|
25
|
-
the 'attrs' attribute does offer a similar metadata storage. But at the
|
|
26
|
-
time of writing this, it is still in a very experimental condition and does
|
|
27
|
-
not propagate reliably.
|
|
28
|
-
"""
|
|
29
|
-
|
|
30
|
-
def __init__(self, pandas_obj):
|
|
31
|
-
self._obj = pandas_obj
|
|
32
|
-
if not hasattr(self._obj, "_metadata_store"):
|
|
33
|
-
with warnings.catch_warnings():
|
|
34
|
-
warnings.simplefilter("ignore")
|
|
35
|
-
self._obj._metadata_store = {}
|
|
36
|
-
|
|
37
|
-
@property
|
|
38
|
-
def metadata(self):
|
|
39
|
-
return self._obj._metadata_store
|
|
40
|
-
|
|
41
|
-
@metadata.setter
|
|
42
|
-
def metadata(self, value):
|
|
43
|
-
self._obj._metadata_store = value
|
|
44
|
-
|
|
45
|
-
def get(self, key, default=None):
|
|
46
|
-
return self._obj._metadata_store.get(key, default)
|
|
47
|
-
|
|
48
|
-
def set(self, key, value):
|
|
49
|
-
self._obj._metadata_store[key] = value
|
|
50
|
-
|
|
51
|
-
def clear(self):
|
|
52
|
-
self._obj._metadata_store.clear()
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
@register_series_accessor("meta")
|
|
56
|
-
class SeriesMetaAccessor(MetadataHandler):
|
|
57
|
-
"""
|
|
58
|
-
Series implementation of the Metadata Accessor.
|
|
59
|
-
Does not offer anything more than the base class at the moment.
|
|
60
|
-
"""
|
|
61
|
-
|
|
62
|
-
def __init__(self, pandas_obj):
|
|
63
|
-
super().__init__(pandas_obj)
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
@register_dataframe_accessor("meta")
|
|
67
|
-
class DataFrameMetaAccessor(MetadataHandler):
|
|
68
|
-
"""
|
|
69
|
-
DataFrame implementation of the Metadata Accessor.
|
|
70
|
-
Introduces another attribute, '_header_level_detail', that stores the
|
|
71
|
-
currently displayed metadata as column names. Additionally offers methods
|
|
72
|
-
to sync metadata between the dataframe and its series, and the handling of
|
|
73
|
-
common operations, like renaming or the addition of new columns.
|
|
74
|
-
"""
|
|
75
|
-
|
|
76
|
-
def __init__(self, pandas_obj):
|
|
77
|
-
super().__init__(pandas_obj)
|
|
78
|
-
if not hasattr(self._obj, "_header_level_detail"):
|
|
79
|
-
self._obj._header_level_detail = "shortname"
|
|
80
|
-
# Initialize DataFrame metadata
|
|
81
|
-
self.aggregate_series_metadata()
|
|
82
|
-
|
|
83
|
-
@property
|
|
84
|
-
def header_detail(self):
|
|
85
|
-
return self._obj._header_level_detail
|
|
86
|
-
|
|
87
|
-
@header_detail.setter
|
|
88
|
-
def header_detail(self, value):
|
|
89
|
-
self._obj._header_level_detail = value
|
|
90
|
-
|
|
91
|
-
@property
|
|
92
|
-
def metadata(self):
|
|
93
|
-
return self._obj._metadata_store
|
|
94
|
-
|
|
95
|
-
@metadata.setter
|
|
96
|
-
def metadata(self, value):
|
|
97
|
-
meta_dict = {
|
|
98
|
-
shortname: self.add_default_metadata(shortname, metainfo)
|
|
99
|
-
for shortname, metainfo in value.items()
|
|
100
|
-
}
|
|
101
|
-
self._obj._metadata_store = meta_dict
|
|
102
|
-
self.propagate_metadata_to_series()
|
|
103
|
-
|
|
104
|
-
def aggregate_series_metadata(self):
|
|
105
|
-
"""Aggregate metadata from Series within the DataFrame."""
|
|
106
|
-
for column in self._obj.columns:
|
|
107
|
-
if isinstance(self._obj[column], pd.Series) and hasattr(
|
|
108
|
-
self._obj[column], "meta"
|
|
109
|
-
):
|
|
110
|
-
self.metadata[column] = self._obj[column].meta.metadata
|
|
111
|
-
|
|
112
|
-
def propagate_metadata_to_series(self):
|
|
113
|
-
"""Propagate DataFrame-level metadata back to Series."""
|
|
114
|
-
for column in self._obj.columns:
|
|
115
|
-
if isinstance(self._obj[column], pd.Series) and hasattr(
|
|
116
|
-
self._obj[column], "meta"
|
|
117
|
-
):
|
|
118
|
-
for key, value in self.metadata.items():
|
|
119
|
-
if key == column:
|
|
120
|
-
try:
|
|
121
|
-
self._obj[column].meta.metadata = value
|
|
122
|
-
except TypeError:
|
|
123
|
-
logger.error(f"{column}: {value}")
|
|
124
|
-
|
|
125
|
-
def update_metadata_on_rename(self, rename_dict):
|
|
126
|
-
"""Update metadata when columns are renamed."""
|
|
127
|
-
new_metadata = {}
|
|
128
|
-
for old_name, new_name in rename_dict.items():
|
|
129
|
-
for key, value in self.metadata.items():
|
|
130
|
-
if key == old_name:
|
|
131
|
-
new_metadata[new_name] = value
|
|
132
|
-
self.metadata = new_metadata
|
|
133
|
-
self.propagate_metadata_to_series()
|
|
134
|
-
|
|
135
|
-
def rename(self, rename_key):
|
|
136
|
-
"""Rename the column names by using a metadata point."""
|
|
137
|
-
rename_dict = {
|
|
138
|
-
column: (
|
|
139
|
-
self._obj[column].meta.get(rename_key)
|
|
140
|
-
if rename_key in list(self._obj[column].meta.metadata.keys())
|
|
141
|
-
else column
|
|
142
|
-
)
|
|
143
|
-
for column in self._obj.columns
|
|
144
|
-
}
|
|
145
|
-
self._obj.rename(columns=rename_dict, inplace=True)
|
|
146
|
-
self.header_detail = rename_key
|
|
147
|
-
self.update_metadata_on_rename(rename_dict)
|
|
148
|
-
|
|
149
|
-
def add_column(
|
|
150
|
-
self,
|
|
151
|
-
name: str,
|
|
152
|
-
data: pd.Series | list,
|
|
153
|
-
location: int | None = None,
|
|
154
|
-
metadata: dict = {},
|
|
155
|
-
):
|
|
156
|
-
"""Add a column and use or generate metadata for it."""
|
|
157
|
-
location = len(self._obj.columns) if location is None else location
|
|
158
|
-
self._obj.insert(
|
|
159
|
-
loc=location,
|
|
160
|
-
column=name,
|
|
161
|
-
value=data,
|
|
162
|
-
allow_duplicates=False,
|
|
163
|
-
)
|
|
164
|
-
self.metadata[name] = self.add_default_metadata(name, metadata)
|
|
165
|
-
self.propagate_metadata_to_series()
|
|
166
|
-
|
|
167
|
-
def add_default_metadata(
|
|
168
|
-
self,
|
|
169
|
-
name: str,
|
|
170
|
-
metadata: dict = {},
|
|
171
|
-
list_of_keys: list = [
|
|
172
|
-
"shortname",
|
|
173
|
-
"longinfo",
|
|
174
|
-
"name",
|
|
175
|
-
"metainfo",
|
|
176
|
-
"unit",
|
|
177
|
-
],
|
|
178
|
-
) -> dict:
|
|
179
|
-
"""Fill up missing metadata points with a default value."""
|
|
180
|
-
default = {}
|
|
181
|
-
for key in list_of_keys:
|
|
182
|
-
if key not in list(metadata.keys()):
|
|
183
|
-
default[key] = name
|
|
184
|
-
return {**metadata, **default}
|
seabirdfilehandler/logging.yaml
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
version: 1
|
|
2
|
-
formatters:
|
|
3
|
-
simple:
|
|
4
|
-
format: '%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
5
|
-
handlers:
|
|
6
|
-
console:
|
|
7
|
-
class: logging.StreamHandler
|
|
8
|
-
level: DEBUG
|
|
9
|
-
formatter: simple
|
|
10
|
-
stream: ext://sys.stdout
|
|
11
|
-
file:
|
|
12
|
-
class: logging.FileHandler
|
|
13
|
-
filename: seabirdfiles.log
|
|
14
|
-
level: DEBUG
|
|
15
|
-
formatter: simple
|
|
16
|
-
loggers:
|
|
17
|
-
simpleExample:
|
|
18
|
-
level: DEBUG
|
|
19
|
-
handlers: [console]
|
|
20
|
-
propagate: no
|
|
21
|
-
root:
|
|
22
|
-
level: DEBUG
|
|
23
|
-
handlers: [console]
|
|
@@ -1,16 +0,0 @@
|
|
|
1
|
-
seabirdfilehandler/__init__.py,sha256=5JTzYE3oRdrxkC9_etAnFQ1cy10PHtpmesdR6n5PoPQ,192
|
|
2
|
-
seabirdfilehandler/bottlefile.py,sha256=QBUqtKhD-dUPbgc_sz8NOGEFFkAjL3g5r1oAsqQnUcQ,6063
|
|
3
|
-
seabirdfilehandler/bottlelogfile.py,sha256=CjBeITQS27Ar80bfxguoAnVkYxI1ioAiVTnlvwevw7E,4448
|
|
4
|
-
seabirdfilehandler/cnvfile.py,sha256=-mCuQX76uuWIETljem1DjzDbZ5eNIUucjoHejTQu_sU,9685
|
|
5
|
-
seabirdfilehandler/datafiles.py,sha256=z27PZJBvEbl1aFXpDQ0QTM_yR5NTaNK-HC9_z3t7zuM,8991
|
|
6
|
-
seabirdfilehandler/dataframe_meta_accessor.py,sha256=x4mSEN49us6Ezzjdt41fl5Ry8IJR09ORrZ1roOIJbyc,6439
|
|
7
|
-
seabirdfilehandler/file_collection.py,sha256=qkEdlI-hcoyuOdmgTr8wdAr1mXXkVuKkF9J4j2-v3kY,6882
|
|
8
|
-
seabirdfilehandler/logging.yaml,sha256=mXxbhJPio3OGaukTpc3rLGA8Ywq1DNqp0Vn5YCbH6jY,459
|
|
9
|
-
seabirdfilehandler/parameter.py,sha256=UuwFzege94sqPt0kOjEqtMGGol4hjuFjj2_EH7o0pzA,14374
|
|
10
|
-
seabirdfilehandler/utils.py,sha256=5KXdB8Hdv65dv5tPyXxNMct1mCEOyA3S8XP54AFAnx0,1745
|
|
11
|
-
seabirdfilehandler/validation_modules.py,sha256=eZ6x0giftUtlxnRMOnK_vCkgccdwUXPrDjajFa-E6n0,4698
|
|
12
|
-
seabirdfilehandler/xmlfiles.py,sha256=L_puQf8eg0ojv85AyEMID4jnwkOlV_fgZP3W5yeSUBY,4668
|
|
13
|
-
seabirdfilehandler-0.5.0.dist-info/LICENSE,sha256=Ifd1VPmYv32oJd2QVh3wIQP9X05vYJlcY6kONz360ws,34603
|
|
14
|
-
seabirdfilehandler-0.5.0.dist-info/METADATA,sha256=jPHzHpxz9OY48QHifHT9crI9a5pxYae1aCw6jAhqtYM,1289
|
|
15
|
-
seabirdfilehandler-0.5.0.dist-info/WHEEL,sha256=b4K_helf-jlQoXBBETfwnf4B04YC67LOev0jo4fX5m8,88
|
|
16
|
-
seabirdfilehandler-0.5.0.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|