seabirdfilehandler 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of seabirdfilehandler might be problematic. Click here for more details.
- seabirdfilehandler/datatablefiles.py +178 -134
- seabirdfilehandler/file_collection.py +4 -3
- seabirdfilehandler/parameter.py +37 -13
- seabirdfilehandler/seabirdfiles.py +16 -6
- seabirdfilehandler/validation_modules.py +18 -15
- seabirdfilehandler-0.4.2.dist-info/LICENSE +229 -0
- {seabirdfilehandler-0.4.0.dist-info → seabirdfilehandler-0.4.2.dist-info}/METADATA +12 -13
- seabirdfilehandler-0.4.2.dist-info/RECORD +13 -0
- {seabirdfilehandler-0.4.0.dist-info → seabirdfilehandler-0.4.2.dist-info}/WHEEL +1 -1
- seabirdfilehandler-0.4.0.dist-info/LICENSE +0 -373
- seabirdfilehandler-0.4.0.dist-info/RECORD +0 -13
|
@@ -9,7 +9,10 @@ import warnings
|
|
|
9
9
|
from seabirdfilehandler.parameter import Parameters
|
|
10
10
|
from seabirdfilehandler.validation_modules import CnvValidationList
|
|
11
11
|
from seabirdfilehandler.seabirdfiles import SeaBirdFile
|
|
12
|
-
from seabirdfilehandler.dataframe_meta_accessor import
|
|
12
|
+
from seabirdfilehandler.dataframe_meta_accessor import (
|
|
13
|
+
SeriesMetaAccessor,
|
|
14
|
+
DataFrameMetaAccessor,
|
|
15
|
+
)
|
|
13
16
|
|
|
14
17
|
logger = logging.getLogger(__name__)
|
|
15
18
|
|
|
@@ -54,8 +57,8 @@ class DataTableFile(SeaBirdFile):
|
|
|
54
57
|
"""
|
|
55
58
|
file_path = self.file_dir if file_path is None else file_path
|
|
56
59
|
file_name = self.file_name if file_name is None else file_name
|
|
57
|
-
if file_type[0] !=
|
|
58
|
-
file_type =
|
|
60
|
+
if file_type[0] != ".":
|
|
61
|
+
file_type = "." + file_type
|
|
59
62
|
return Path(file_path).joinpath(file_name).with_suffix(file_type)
|
|
60
63
|
|
|
61
64
|
def to_csv(
|
|
@@ -85,18 +88,21 @@ class DataTableFile(SeaBirdFile):
|
|
|
85
88
|
-------
|
|
86
89
|
|
|
87
90
|
"""
|
|
88
|
-
selected_columns =
|
|
91
|
+
selected_columns = (
|
|
92
|
+
self.df.columns if selected_columns is None else selected_columns
|
|
93
|
+
)
|
|
89
94
|
df = self.df[selected_columns].reset_index(drop=True)
|
|
90
95
|
new_file_path = self.define_output_path(
|
|
91
|
-
output_file_path, output_file_name
|
|
96
|
+
output_file_path, output_file_name
|
|
97
|
+
)
|
|
92
98
|
if with_header:
|
|
93
|
-
with open(new_file_path,
|
|
99
|
+
with open(new_file_path, "w") as file:
|
|
94
100
|
for line in self.header:
|
|
95
101
|
file.write(line)
|
|
96
|
-
df.to_csv(new_file_path, index=False, mode=
|
|
102
|
+
df.to_csv(new_file_path, index=False, mode="a")
|
|
97
103
|
else:
|
|
98
|
-
df.to_csv(new_file_path, index=False, mode=
|
|
99
|
-
logger.info(f
|
|
104
|
+
df.to_csv(new_file_path, index=False, mode="w")
|
|
105
|
+
logger.info(f"Wrote file {self.path_to_file} to {new_file_path}.")
|
|
100
106
|
|
|
101
107
|
def selecting_columns(
|
|
102
108
|
self,
|
|
@@ -159,12 +165,17 @@ class BottleFile(DataTableFile):
|
|
|
159
165
|
top_names, bottom_names = self.reading_data_header()
|
|
160
166
|
# creating statistics column to store the row type information:
|
|
161
167
|
# 4 rows per bottle, average, standard deviation, max value, min value
|
|
162
|
-
top_names.append(
|
|
168
|
+
top_names.append("Statistic")
|
|
163
169
|
# TODO: sexier way to construct dataframe than opening the file a
|
|
164
170
|
# second time
|
|
165
171
|
# # df = pd.DataFrame(self.data, index=None, columns=top_names)
|
|
166
|
-
df: pd.DataFrame = pd.read_fwf(
|
|
167
|
-
self.
|
|
172
|
+
df: pd.DataFrame = pd.read_fwf(
|
|
173
|
+
self.path_to_file,
|
|
174
|
+
index_col=False,
|
|
175
|
+
skiprows=len(self.header) + 2,
|
|
176
|
+
header=None,
|
|
177
|
+
names=top_names,
|
|
178
|
+
)
|
|
168
179
|
|
|
169
180
|
# handling the double row header
|
|
170
181
|
rowtypes = df[df.columns[-1]].unique()
|
|
@@ -184,22 +195,25 @@ class BottleFile(DataTableFile):
|
|
|
184
195
|
|
|
185
196
|
"""
|
|
186
197
|
column_idx = df.columns.get_loc(column)
|
|
187
|
-
old_column = df.iloc[::
|
|
188
|
-
new_column = df.iloc[1::
|
|
198
|
+
old_column = df.iloc[::length, column_idx].reset_index(drop=True)
|
|
199
|
+
new_column = df.iloc[1::length, column_idx].reset_index(drop=True)
|
|
189
200
|
old_column_expanded = pd.Series(
|
|
190
|
-
np.repeat(old_column, length)
|
|
201
|
+
np.repeat(old_column, length)
|
|
202
|
+
).reset_index(drop=True)
|
|
191
203
|
new_column_expanded = pd.Series(
|
|
192
|
-
np.repeat(new_column, length)
|
|
204
|
+
np.repeat(new_column, length)
|
|
205
|
+
).reset_index(drop=True)
|
|
193
206
|
df[column] = old_column_expanded
|
|
194
|
-
df.insert(
|
|
195
|
-
|
|
207
|
+
df.insert(
|
|
208
|
+
column_idx + 1, bottom_names[column_idx], new_column_expanded
|
|
209
|
+
)
|
|
196
210
|
return df
|
|
197
211
|
|
|
198
|
-
df = separate_double_header_row(df,
|
|
212
|
+
df = separate_double_header_row(df, "Date", len(rowtypes))
|
|
199
213
|
df = separate_double_header_row(df, top_names[0], len(rowtypes))
|
|
200
214
|
# remove brackets around statistics values
|
|
201
|
-
df["Statistic"] = df["Statistic"].str.strip(
|
|
202
|
-
df = df.rename(mapper={
|
|
215
|
+
df["Statistic"] = df["Statistic"].str.strip("()")
|
|
216
|
+
df = df.rename(mapper={"Btl_ID": "Bottle_ID"}, axis=1)
|
|
203
217
|
return df
|
|
204
218
|
|
|
205
219
|
def adding_timestamp_column(self):
|
|
@@ -216,9 +230,10 @@ class BottleFile(DataTableFile):
|
|
|
216
230
|
# constructing timestamp column
|
|
217
231
|
timestamp = []
|
|
218
232
|
for datepoint, timepoint in zip(self.df.Date, self.df.Time):
|
|
219
|
-
timestamp.append(
|
|
220
|
-
datepoint, time.fromisoformat(str(timepoint)))
|
|
221
|
-
|
|
233
|
+
timestamp.append(
|
|
234
|
+
datetime.combine(datepoint, time.fromisoformat(str(timepoint)))
|
|
235
|
+
)
|
|
236
|
+
self.df.insert(2, "Timestamp", timestamp)
|
|
222
237
|
self.df.Timestamp = pd.to_datetime(self.df.Timestamp)
|
|
223
238
|
|
|
224
239
|
def setting_dataframe_dtypes(self):
|
|
@@ -228,9 +243,9 @@ class BottleFile(DataTableFile):
|
|
|
228
243
|
self.df.Date = pd.to_datetime(self.df.Date)
|
|
229
244
|
self.df.Bottle_ID = self.df.Bottle_ID.astype(int)
|
|
230
245
|
|
|
231
|
-
def selecting_rows(
|
|
232
|
-
|
|
233
|
-
|
|
246
|
+
def selecting_rows(
|
|
247
|
+
self, df=None, statistic_of_interest: Union[list, str] = ["avg"]
|
|
248
|
+
):
|
|
234
249
|
"""Creates a dataframe with the given row identifier, using the
|
|
235
250
|
statistics column. A single string or a list of strings can be
|
|
236
251
|
processed.
|
|
@@ -251,7 +266,7 @@ class BottleFile(DataTableFile):
|
|
|
251
266
|
# ensure that the input is a list, so that isin() can do its job
|
|
252
267
|
if isinstance(statistic_of_interest, str):
|
|
253
268
|
statistic_of_interest = [statistic_of_interest]
|
|
254
|
-
self.df = df.loc[df[
|
|
269
|
+
self.df = df.loc[df["Statistic"].isin(statistic_of_interest)]
|
|
255
270
|
|
|
256
271
|
def reading_data_header(self):
|
|
257
272
|
"""Identifies and separatly collects the rows that specify the data
|
|
@@ -267,21 +282,24 @@ class BottleFile(DataTableFile):
|
|
|
267
282
|
n = 11 # fix column width of a seabird btl file
|
|
268
283
|
top_line = self.data[0]
|
|
269
284
|
second_line = self.data[1]
|
|
270
|
-
top_names = [
|
|
271
|
-
|
|
272
|
-
|
|
273
|
-
|
|
285
|
+
top_names = [
|
|
286
|
+
top_line[i : i + n].split()[0]
|
|
287
|
+
for i in range(0, len(top_line) - n, n)
|
|
288
|
+
]
|
|
289
|
+
bottom_names = [
|
|
290
|
+
second_line[i : i + n].split()[0] for i in range(0, 2 * n, n)
|
|
291
|
+
]
|
|
274
292
|
return top_names, bottom_names
|
|
275
293
|
|
|
276
294
|
def add_station_and_event_column(self):
|
|
277
|
-
event_list = [self.metadata[
|
|
278
|
-
self.df.insert(0,
|
|
295
|
+
event_list = [self.metadata["Station"] for _ in self.data]
|
|
296
|
+
self.df.insert(0, "Event", pd.Series(event_list))
|
|
279
297
|
|
|
280
298
|
def add_position_columns(self):
|
|
281
|
-
latitude_list = [self.metadata[
|
|
282
|
-
self.df.insert(1,
|
|
283
|
-
longitude_list = [self.metadata[
|
|
284
|
-
self.df.insert(2,
|
|
299
|
+
latitude_list = [self.metadata["GPS_Lat"] for _ in self.data]
|
|
300
|
+
self.df.insert(1, "Latitude", pd.Series(latitude_list))
|
|
301
|
+
longitude_list = [self.metadata["GPS_Lon"] for _ in self.data]
|
|
302
|
+
self.df.insert(2, "Longitude", pd.Series(longitude_list))
|
|
285
303
|
|
|
286
304
|
|
|
287
305
|
class CnvFile(DataTableFile):
|
|
@@ -331,12 +349,13 @@ class CnvFile(DataTableFile):
|
|
|
331
349
|
self.start_time = self.reading_start_time()
|
|
332
350
|
if create_dataframe:
|
|
333
351
|
warnings.warn(
|
|
334
|
-
|
|
352
|
+
"The default of constructing a pandas Dataframe will soon be replaced by using the Parameters class that works on numpy arrays.",
|
|
335
353
|
DeprecationWarning,
|
|
336
|
-
stacklevel=2 # Ensures the warning points to the caller's line
|
|
354
|
+
stacklevel=2, # Ensures the warning points to the caller's line
|
|
355
|
+
)
|
|
356
|
+
self.data_header_meta_info, self.duplicate_columns = (
|
|
357
|
+
self.reading_data_header(self.data_table_description)
|
|
337
358
|
)
|
|
338
|
-
self.data_header_meta_info, self.duplicate_columns = self.reading_data_header(
|
|
339
|
-
self.data_table_description)
|
|
340
359
|
self.original_df = self.create_dataframe(data_table_info_level)
|
|
341
360
|
self.df = self.original_df
|
|
342
361
|
if absolute_time_calculation:
|
|
@@ -347,9 +366,12 @@ class CnvFile(DataTableFile):
|
|
|
347
366
|
self.add_position_columns()
|
|
348
367
|
else:
|
|
349
368
|
self.parameters = Parameters(
|
|
350
|
-
self.data, self.data_table_description
|
|
369
|
+
self.data, self.data_table_description
|
|
370
|
+
)
|
|
351
371
|
|
|
352
|
-
def reading_data_header(
|
|
372
|
+
def reading_data_header(
|
|
373
|
+
self, header_info: list = []
|
|
374
|
+
) -> Tuple[dict[str, dict], list[int]]:
|
|
353
375
|
"""Reads the tables header data from the header.
|
|
354
376
|
|
|
355
377
|
Parameters
|
|
@@ -380,16 +402,20 @@ class CnvFile(DataTableFile):
|
|
|
380
402
|
if shortname in list(table_header.keys()):
|
|
381
403
|
try:
|
|
382
404
|
duplicate_columns.append(
|
|
383
|
-
int(line.split("=")[0].strip().split()[1])
|
|
384
|
-
|
|
405
|
+
int(line.split("=")[0].strip().split()[1])
|
|
406
|
+
)
|
|
407
|
+
except IndexError as error:
|
|
385
408
|
logger.error(
|
|
386
409
|
f"Could not resolve duplicate column: {
|
|
387
|
-
shortname
|
|
410
|
+
shortname
|
|
411
|
+
}, {error}"
|
|
412
|
+
)
|
|
388
413
|
else:
|
|
389
414
|
header_meta_info["shortname"] = shortname
|
|
390
415
|
header_meta_info["longinfo"] = longinfo.strip()
|
|
391
416
|
metainfo = self._extract_data_header_meta_info(
|
|
392
|
-
longinfo.strip()
|
|
417
|
+
longinfo.strip()
|
|
418
|
+
)
|
|
393
419
|
header_meta_info = {**header_meta_info, **metainfo}
|
|
394
420
|
table_header[shortname.strip()] = header_meta_info
|
|
395
421
|
return table_header, duplicate_columns
|
|
@@ -408,22 +434,24 @@ class CnvFile(DataTableFile):
|
|
|
408
434
|
|
|
409
435
|
"""
|
|
410
436
|
regex_string = r"(?:(?P<name0>.+),\s(?P<metainfo0>.+)\s\[(?P<unit0>.+)\]|(?P<name2>.+)\s\[(?P<unit2>.+)\]|(?P<name3>.+),\s(?P<metainfo2>.[^\s]+)|(?P<name4>.+))"
|
|
411
|
-
regex_check = re.search(
|
|
412
|
-
regex_string, line, flags=re.IGNORECASE)
|
|
437
|
+
regex_check = re.search(regex_string, line, flags=re.IGNORECASE)
|
|
413
438
|
if regex_check:
|
|
414
439
|
regex_info = dict(regex_check.groupdict())
|
|
415
|
-
regex_info = {
|
|
416
|
-
|
|
440
|
+
regex_info = {
|
|
441
|
+
key[:-1]: value
|
|
442
|
+
for key, value in regex_info.items()
|
|
443
|
+
if value is not None
|
|
444
|
+
}
|
|
417
445
|
if len(regex_info) > 2:
|
|
418
446
|
# check for second sensors and adjust their names
|
|
419
|
-
if regex_info["metainfo"][-1] ==
|
|
447
|
+
if regex_info["metainfo"][-1] == "2":
|
|
420
448
|
regex_info["name"] = regex_info["name"] + " 2"
|
|
421
|
-
regex_info["metainfo"] = regex_info[
|
|
449
|
+
regex_info["metainfo"] = regex_info["metainfo"][:-1]
|
|
422
450
|
if len(regex_info["metainfo"]) == 0:
|
|
423
451
|
regex_info.pop("metainfo")
|
|
424
|
-
if regex_info[
|
|
425
|
-
regex_info["metainfo"] = regex_info[
|
|
426
|
-
regex_info["unit"] = regex_info[
|
|
452
|
+
if regex_info["name"] == "flag":
|
|
453
|
+
regex_info["metainfo"] = regex_info["name"]
|
|
454
|
+
regex_info["unit"] = regex_info["name"]
|
|
427
455
|
return regex_info
|
|
428
456
|
return {}
|
|
429
457
|
|
|
@@ -449,10 +477,13 @@ class CnvFile(DataTableFile):
|
|
|
449
477
|
n = 11
|
|
450
478
|
row_list = []
|
|
451
479
|
for line in self.data:
|
|
452
|
-
row_list.append(
|
|
453
|
-
|
|
454
|
-
|
|
455
|
-
|
|
480
|
+
row_list.append(
|
|
481
|
+
[
|
|
482
|
+
line[i : i + n].split()[0]
|
|
483
|
+
for i in range(0, len(line) - n, n)
|
|
484
|
+
]
|
|
485
|
+
)
|
|
486
|
+
df = pd.DataFrame(row_list, dtype=float)
|
|
456
487
|
header_names = [
|
|
457
488
|
metainfo[header_info_detail_level]
|
|
458
489
|
for metainfo in list(self.data_header_meta_info.values())
|
|
@@ -463,8 +494,9 @@ class CnvFile(DataTableFile):
|
|
|
463
494
|
try:
|
|
464
495
|
df.columns = header_names
|
|
465
496
|
except ValueError as error:
|
|
466
|
-
logger.error(
|
|
467
|
-
|
|
497
|
+
logger.error(
|
|
498
|
+
f"Could not set dataframe header for {self.file_name}: {error}"
|
|
499
|
+
)
|
|
468
500
|
logger.error(header_names)
|
|
469
501
|
else:
|
|
470
502
|
df.meta.metadata = self.data_header_meta_info
|
|
@@ -472,26 +504,27 @@ class CnvFile(DataTableFile):
|
|
|
472
504
|
return df
|
|
473
505
|
|
|
474
506
|
def rename_dataframe_header(
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
507
|
+
self,
|
|
508
|
+
df: pd.DataFrame | None = None,
|
|
509
|
+
header_detail_level: str = "shortname",
|
|
478
510
|
) -> list:
|
|
479
511
|
df = self.df if df is None else df
|
|
480
512
|
df.meta.rename(header_detail_level)
|
|
481
513
|
return [column for column in df.columns]
|
|
482
514
|
|
|
483
515
|
def reading_start_time(
|
|
484
|
-
|
|
485
|
-
|
|
516
|
+
self,
|
|
517
|
+
time_source: str = "System UTC",
|
|
486
518
|
) -> datetime | None:
|
|
487
519
|
"""
|
|
488
520
|
Extracts the Cast start time from the metadata header.
|
|
489
521
|
"""
|
|
490
522
|
for line in self.sbe9_data:
|
|
491
523
|
if line.startswith(time_source):
|
|
492
|
-
start_time = line.split(
|
|
524
|
+
start_time = line.split("=")[1]
|
|
493
525
|
start_time = datetime.strptime(
|
|
494
|
-
start_time,
|
|
526
|
+
start_time, " %b %d %Y %H:%M:%S "
|
|
527
|
+
)
|
|
495
528
|
return start_time
|
|
496
529
|
return None
|
|
497
530
|
|
|
@@ -504,11 +537,11 @@ class CnvFile(DataTableFile):
|
|
|
504
537
|
"""
|
|
505
538
|
time_parameter = None
|
|
506
539
|
for parameter in self.df.columns:
|
|
507
|
-
if parameter.lower().startswith(
|
|
540
|
+
if parameter.lower().startswith("time"):
|
|
508
541
|
time_parameter = parameter
|
|
509
542
|
if time_parameter and self.start_time:
|
|
510
543
|
self.df.meta.add_column(
|
|
511
|
-
name=
|
|
544
|
+
name="datetime",
|
|
512
545
|
data=[
|
|
513
546
|
timedelta(days=float(time)) + self.start_time
|
|
514
547
|
if time_parameter == "timeJ"
|
|
@@ -526,7 +559,7 @@ class CnvFile(DataTableFile):
|
|
|
526
559
|
"""
|
|
527
560
|
if self.start_time:
|
|
528
561
|
self.df.meta.add_column(
|
|
529
|
-
name=
|
|
562
|
+
name="start_time",
|
|
530
563
|
data=pd.Series([self.start_time for _ in self.data]),
|
|
531
564
|
)
|
|
532
565
|
return True
|
|
@@ -541,9 +574,9 @@ class CnvFile(DataTableFile):
|
|
|
541
574
|
return CnvValidationList(validation_modules)
|
|
542
575
|
|
|
543
576
|
def df2cnv(
|
|
544
|
-
|
|
545
|
-
|
|
546
|
-
|
|
577
|
+
self,
|
|
578
|
+
header_names: list | None = None,
|
|
579
|
+
header_detail_level: str | None = None,
|
|
547
580
|
) -> list:
|
|
548
581
|
"""
|
|
549
582
|
Parses a pandas dataframe into a list that represents the lines inside
|
|
@@ -577,16 +610,23 @@ class CnvFile(DataTableFile):
|
|
|
577
610
|
)
|
|
578
611
|
cnv_out = []
|
|
579
612
|
for _, row in df.iterrows():
|
|
580
|
-
cnv_like_row =
|
|
581
|
-
(lambda column: f"{str(column):>11}")(value) for value in row
|
|
613
|
+
cnv_like_row = "".join(
|
|
614
|
+
(lambda column: f"{str(column):>11}")(value) for value in row
|
|
615
|
+
)
|
|
582
616
|
cnv_out.append(cnv_like_row + "\n")
|
|
583
617
|
return cnv_out
|
|
584
618
|
|
|
619
|
+
def array2cnv(self) -> list:
|
|
620
|
+
result = []
|
|
621
|
+
for row in self.parameters.full_data_array:
|
|
622
|
+
formatted_row = "".join(f"{elem:11}" for elem in row)
|
|
623
|
+
result.append(formatted_row + "\n")
|
|
624
|
+
return result
|
|
625
|
+
|
|
585
626
|
def to_cnv(
|
|
586
627
|
self,
|
|
587
628
|
file_name: Path | str | None = None,
|
|
588
|
-
|
|
589
|
-
use_current_processing_header: bool = False,
|
|
629
|
+
use_dataframe: bool = True,
|
|
590
630
|
header_list: list | None = None,
|
|
591
631
|
):
|
|
592
632
|
"""
|
|
@@ -606,32 +646,32 @@ class CnvFile(DataTableFile):
|
|
|
606
646
|
"""
|
|
607
647
|
file_name = self.path_to_file if file_name is None else file_name
|
|
608
648
|
# content construction
|
|
609
|
-
if
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
self.
|
|
613
|
-
self.
|
|
649
|
+
if use_dataframe:
|
|
650
|
+
data = self.df2cnv(header_list)
|
|
651
|
+
else:
|
|
652
|
+
data = self.array2cnv()
|
|
653
|
+
self._update_header()
|
|
654
|
+
self.file_data = [*self.header, *data]
|
|
614
655
|
# writing content out
|
|
615
656
|
try:
|
|
616
|
-
with open(file_name,
|
|
657
|
+
with open(file_name, "w", encoding="latin-1") as file:
|
|
617
658
|
for line in self.file_data:
|
|
618
659
|
file.write(line)
|
|
619
|
-
logger.info(f'Wrote cnv {self.path_to_file} to {file_name}.')
|
|
620
660
|
|
|
621
661
|
except IOError as error:
|
|
622
|
-
logger.error(f
|
|
662
|
+
logger.error(f"Could not write cnv file: {error}")
|
|
623
663
|
|
|
624
664
|
def _update_header(self):
|
|
625
665
|
"""Re-creates the cnv header."""
|
|
626
666
|
self.data_table_description = self._form_data_table_info()
|
|
627
667
|
self.header = [
|
|
628
|
-
*[f
|
|
629
|
-
*[f
|
|
630
|
-
f
|
|
631
|
-
*[f
|
|
632
|
-
*[f
|
|
633
|
-
*[f
|
|
634
|
-
|
|
668
|
+
*[f"* {data}" for data in self.sbe9_data[:-1]],
|
|
669
|
+
*[f"** {data}" for data in self.metadata_list],
|
|
670
|
+
f"* {self.sbe9_data[-1]}",
|
|
671
|
+
*[f"# {data}" for data in self.data_table_description],
|
|
672
|
+
*[f"# {data}" for data in self.sensor_data],
|
|
673
|
+
*[f"# {data}" for data in self.processing_info],
|
|
674
|
+
"*END*\n",
|
|
635
675
|
]
|
|
636
676
|
|
|
637
677
|
def _form_data_table_info(self) -> list:
|
|
@@ -644,7 +684,8 @@ class CnvFile(DataTableFile):
|
|
|
644
684
|
new_table_info.append(f"name {index} = {name}\n")
|
|
645
685
|
for index, (_, span) in enumerate(self.data_table_names_and_spans):
|
|
646
686
|
new_table_info.append(f"span {index} = {span}\n")
|
|
647
|
-
|
|
687
|
+
for key, value in self.data_table_misc.items():
|
|
688
|
+
new_table_info.append(f"{key} = {value}\n")
|
|
648
689
|
return new_table_info
|
|
649
690
|
|
|
650
691
|
def add_processing_metadata(self, addition: str | list):
|
|
@@ -672,14 +713,12 @@ class CnvFile(DataTableFile):
|
|
|
672
713
|
|
|
673
714
|
"""
|
|
674
715
|
try:
|
|
675
|
-
event_list = [self.metadata[
|
|
716
|
+
event_list = [self.metadata["Station"] for _ in self.data]
|
|
676
717
|
except KeyError:
|
|
677
718
|
return False
|
|
678
719
|
else:
|
|
679
720
|
self.df.meta.add_column(
|
|
680
|
-
name=
|
|
681
|
-
data=pd.Series(event_list),
|
|
682
|
-
location=0
|
|
721
|
+
name="Event", data=pd.Series(event_list), location=0
|
|
683
722
|
)
|
|
684
723
|
return True
|
|
685
724
|
|
|
@@ -689,26 +728,22 @@ class CnvFile(DataTableFile):
|
|
|
689
728
|
These must be present inside the extra metadata header.
|
|
690
729
|
|
|
691
730
|
"""
|
|
692
|
-
if (
|
|
693
|
-
column[
|
|
731
|
+
if ("latitude" or "longitude") in [
|
|
732
|
+
column["shortname"]
|
|
694
733
|
for column in list(self.df.meta.metadata.values())
|
|
695
734
|
]:
|
|
696
735
|
return True
|
|
697
736
|
try:
|
|
698
|
-
latitude_list = [self.metadata[
|
|
699
|
-
longitude_list = [self.metadata[
|
|
737
|
+
latitude_list = [self.metadata["GPS_Lat"] for _ in self.data]
|
|
738
|
+
longitude_list = [self.metadata["GPS_Lon"] for _ in self.data]
|
|
700
739
|
except KeyError:
|
|
701
740
|
return False
|
|
702
741
|
else:
|
|
703
742
|
self.df.meta.add_column(
|
|
704
|
-
name=
|
|
705
|
-
data=pd.Series(latitude_list),
|
|
706
|
-
location=1
|
|
743
|
+
name="Latitude", data=pd.Series(latitude_list), location=1
|
|
707
744
|
)
|
|
708
745
|
self.df.meta.add_column(
|
|
709
|
-
name=
|
|
710
|
-
data=pd.Series(longitude_list),
|
|
711
|
-
location=2
|
|
746
|
+
name="Longitude", data=pd.Series(longitude_list), location=2
|
|
712
747
|
)
|
|
713
748
|
return True
|
|
714
749
|
|
|
@@ -722,13 +757,13 @@ class CnvFile(DataTableFile):
|
|
|
722
757
|
the cast number of this files cast
|
|
723
758
|
|
|
724
759
|
"""
|
|
725
|
-
if (
|
|
726
|
-
number = int(self.metadata[
|
|
760
|
+
if ("Cast" in self.metadata.keys()) and (not number):
|
|
761
|
+
number = int(self.metadata["Cast"])
|
|
727
762
|
try:
|
|
728
763
|
self.df.meta.add_column(
|
|
729
|
-
name=
|
|
764
|
+
name="Cast",
|
|
730
765
|
data=pd.Series([number for _ in self.data]),
|
|
731
|
-
location=0
|
|
766
|
+
location=0,
|
|
732
767
|
)
|
|
733
768
|
except ValueError:
|
|
734
769
|
# Cast is already part of the dataframe, so nothing left to do
|
|
@@ -750,7 +785,7 @@ class BottleLogFile(DataTableFile):
|
|
|
750
785
|
|
|
751
786
|
"""
|
|
752
787
|
|
|
753
|
-
def __init__(self, path_to_file, create_dataframe
|
|
788
|
+
def __init__(self, path_to_file, create_dataframe=False):
|
|
754
789
|
super().__init__(path_to_file)
|
|
755
790
|
self.reset_time = self.obtaining_reset_time()
|
|
756
791
|
self.origin_cnv = self.raw_file_data[0].strip()
|
|
@@ -761,8 +796,6 @@ class BottleLogFile(DataTableFile):
|
|
|
761
796
|
self.df = self.original_df
|
|
762
797
|
else:
|
|
763
798
|
self.data_list = self.create_list()
|
|
764
|
-
|
|
765
|
-
|
|
766
799
|
|
|
767
800
|
def data_whitespace_removal(self) -> list:
|
|
768
801
|
"""Strips the input from whitespace characters, in this case especially
|
|
@@ -793,13 +826,15 @@ class BottleLogFile(DataTableFile):
|
|
|
793
826
|
|
|
794
827
|
"""
|
|
795
828
|
|
|
796
|
-
regex_check = re.search(
|
|
797
|
-
|
|
829
|
+
regex_check = re.search(
|
|
830
|
+
r"RESET\s(\w{3}\s\d+\s\d{4}\s\d\d:\d\d:\d\d)",
|
|
831
|
+
self.raw_file_data[1],
|
|
832
|
+
)
|
|
798
833
|
if regex_check:
|
|
799
|
-
return datetime.strptime(regex_check.group(1),
|
|
834
|
+
return datetime.strptime(regex_check.group(1), "%b %d %Y %H:%M:%S")
|
|
800
835
|
else:
|
|
801
|
-
error_message =
|
|
802
|
-
Reset time could not be extracted.
|
|
836
|
+
error_message = """BottleLogFile is not formatted as expected:
|
|
837
|
+
Reset time could not be extracted."""
|
|
803
838
|
logger.error(error_message)
|
|
804
839
|
raise IOError(error_message)
|
|
805
840
|
|
|
@@ -820,13 +855,12 @@ class BottleLogFile(DataTableFile):
|
|
|
820
855
|
bottles = [int(x) for x in self.data[i].split(",")[:2]]
|
|
821
856
|
date = self.convert_date(self.data[i].split(",")[2])
|
|
822
857
|
lines = tuple([int(x) for x in self.data[i].split(",")[3:]])
|
|
823
|
-
|
|
858
|
+
|
|
824
859
|
content_array.append([bottles, date, lines])
|
|
825
|
-
|
|
860
|
+
|
|
826
861
|
return content_array
|
|
827
|
-
|
|
828
862
|
|
|
829
|
-
def convert_date(self, date
|
|
863
|
+
def convert_date(self, date: str):
|
|
830
864
|
"""Converts the Dates of the .bl files to an ISO 8601 standard
|
|
831
865
|
|
|
832
866
|
Parameters
|
|
@@ -837,8 +871,21 @@ class BottleLogFile(DataTableFile):
|
|
|
837
871
|
a string with the date in the form of "yymmddThhmmss"
|
|
838
872
|
"""
|
|
839
873
|
date = date.strip()
|
|
840
|
-
month_list = [
|
|
841
|
-
|
|
874
|
+
month_list = [
|
|
875
|
+
"Jan",
|
|
876
|
+
"Feb",
|
|
877
|
+
"Mar",
|
|
878
|
+
"Apr",
|
|
879
|
+
"May",
|
|
880
|
+
"Jun",
|
|
881
|
+
"Jul",
|
|
882
|
+
"Aug",
|
|
883
|
+
"Sep",
|
|
884
|
+
"Oct",
|
|
885
|
+
"Nov",
|
|
886
|
+
"Dec",
|
|
887
|
+
]
|
|
888
|
+
|
|
842
889
|
month_ind = month_list.index(date.split(" ")[0]) + 1
|
|
843
890
|
if month_ind < 10:
|
|
844
891
|
month = "0" + str(month_ind)
|
|
@@ -848,9 +895,7 @@ class BottleLogFile(DataTableFile):
|
|
|
848
895
|
year = (date.split(" ")[2])[2:]
|
|
849
896
|
time = date.split(" ")[3].replace(":", "")
|
|
850
897
|
return year + month + day + "T" + time
|
|
851
|
-
|
|
852
898
|
|
|
853
|
-
|
|
854
899
|
def create_dataframe(self) -> pd.DataFrame:
|
|
855
900
|
"""Creates a dataframe from the list specified in self.data.
|
|
856
901
|
|
|
@@ -863,16 +908,15 @@ class BottleLogFile(DataTableFile):
|
|
|
863
908
|
"""
|
|
864
909
|
data_lists = []
|
|
865
910
|
for line in self.data:
|
|
866
|
-
inner_list = line.split(
|
|
911
|
+
inner_list = line.split(",")
|
|
867
912
|
# dropping first column as its the index
|
|
868
913
|
data_lists.append(inner_list[1:])
|
|
869
914
|
df = pd.DataFrame(data_lists)
|
|
870
|
-
df.columns = [
|
|
915
|
+
df.columns = ["Bottle ID", "Datetime", "start_range", "end_range"]
|
|
871
916
|
return df
|
|
872
917
|
|
|
873
918
|
|
|
874
919
|
class FieldCalibrationFile(DataTableFile):
|
|
875
|
-
|
|
876
920
|
def __init__(self, path_to_file):
|
|
877
921
|
super().__init__(path_to_file)
|
|
878
922
|
self.original_df = self.create_dataframe()
|
|
@@ -882,5 +926,5 @@ class FieldCalibrationFile(DataTableFile):
|
|
|
882
926
|
try:
|
|
883
927
|
return pd.read_csv(self.path_to_file, skiprows=len(self.header))
|
|
884
928
|
except IOError as error:
|
|
885
|
-
logger.error(f
|
|
929
|
+
logger.error(f"Could not read field calibration file: {error}.")
|
|
886
930
|
return pd.DataFrame()
|
|
@@ -81,8 +81,7 @@ class FileCollection(UserList):
|
|
|
81
81
|
self.data.append(self.file_type(file))
|
|
82
82
|
except TypeError:
|
|
83
83
|
logger.error(
|
|
84
|
-
f"Could not open file {
|
|
85
|
-
file} with the type "
|
|
84
|
+
f"Could not open file {file} with the type "
|
|
86
85
|
f"{self.file_type}."
|
|
87
86
|
)
|
|
88
87
|
continue
|
|
@@ -260,7 +259,9 @@ class FileCollection(UserList):
|
|
|
260
259
|
else:
|
|
261
260
|
differing_dicts = [
|
|
262
261
|
current_dict
|
|
263
|
-
for last_dict, current_dict in zip(
|
|
262
|
+
for last_dict, current_dict in zip(
|
|
263
|
+
last_unique, cast_sensors
|
|
264
|
+
)
|
|
264
265
|
if current_dict != last_dict
|
|
265
266
|
]
|
|
266
267
|
if differing_dicts:
|