seabirdfilehandler 0.4.0__py3-none-any.whl → 0.4.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of seabirdfilehandler might be problematic. Click here for more details.

@@ -9,7 +9,10 @@ import warnings
9
9
  from seabirdfilehandler.parameter import Parameters
10
10
  from seabirdfilehandler.validation_modules import CnvValidationList
11
11
  from seabirdfilehandler.seabirdfiles import SeaBirdFile
12
- from seabirdfilehandler.dataframe_meta_accessor import SeriesMetaAccessor, DataFrameMetaAccessor
12
+ from seabirdfilehandler.dataframe_meta_accessor import (
13
+ SeriesMetaAccessor,
14
+ DataFrameMetaAccessor,
15
+ )
13
16
 
14
17
  logger = logging.getLogger(__name__)
15
18
 
@@ -54,8 +57,8 @@ class DataTableFile(SeaBirdFile):
54
57
  """
55
58
  file_path = self.file_dir if file_path is None else file_path
56
59
  file_name = self.file_name if file_name is None else file_name
57
- if file_type[0] != '.':
58
- file_type = '.' + file_type
60
+ if file_type[0] != ".":
61
+ file_type = "." + file_type
59
62
  return Path(file_path).joinpath(file_name).with_suffix(file_type)
60
63
 
61
64
  def to_csv(
@@ -85,18 +88,21 @@ class DataTableFile(SeaBirdFile):
85
88
  -------
86
89
 
87
90
  """
88
- selected_columns = self.df.columns if selected_columns is None else selected_columns
91
+ selected_columns = (
92
+ self.df.columns if selected_columns is None else selected_columns
93
+ )
89
94
  df = self.df[selected_columns].reset_index(drop=True)
90
95
  new_file_path = self.define_output_path(
91
- output_file_path, output_file_name)
96
+ output_file_path, output_file_name
97
+ )
92
98
  if with_header:
93
- with open(new_file_path, 'w') as file:
99
+ with open(new_file_path, "w") as file:
94
100
  for line in self.header:
95
101
  file.write(line)
96
- df.to_csv(new_file_path, index=False, mode='a')
102
+ df.to_csv(new_file_path, index=False, mode="a")
97
103
  else:
98
- df.to_csv(new_file_path, index=False, mode='w')
99
- logger.info(f'Wrote file {self.path_to_file} to {new_file_path}.')
104
+ df.to_csv(new_file_path, index=False, mode="w")
105
+ logger.info(f"Wrote file {self.path_to_file} to {new_file_path}.")
100
106
 
101
107
  def selecting_columns(
102
108
  self,
@@ -159,12 +165,17 @@ class BottleFile(DataTableFile):
159
165
  top_names, bottom_names = self.reading_data_header()
160
166
  # creating statistics column to store the row type information:
161
167
  # 4 rows per bottle, average, standard deviation, max value, min value
162
- top_names.append('Statistic')
168
+ top_names.append("Statistic")
163
169
  # TODO: sexier way to construct dataframe than opening the file a
164
170
  # second time
165
171
  # # df = pd.DataFrame(self.data, index=None, columns=top_names)
166
- df: pd.DataFrame = pd.read_fwf(self.path_to_file, index_col=False, skiprows=len(
167
- self.header)+2, header=None, names=top_names)
172
+ df: pd.DataFrame = pd.read_fwf(
173
+ self.path_to_file,
174
+ index_col=False,
175
+ skiprows=len(self.header) + 2,
176
+ header=None,
177
+ names=top_names,
178
+ )
168
179
 
169
180
  # handling the double row header
170
181
  rowtypes = df[df.columns[-1]].unique()
@@ -184,22 +195,25 @@ class BottleFile(DataTableFile):
184
195
 
185
196
  """
186
197
  column_idx = df.columns.get_loc(column)
187
- old_column = df.iloc[:: length, column_idx].reset_index(drop=True)
188
- new_column = df.iloc[1:: length, column_idx].reset_index(drop=True)
198
+ old_column = df.iloc[::length, column_idx].reset_index(drop=True)
199
+ new_column = df.iloc[1::length, column_idx].reset_index(drop=True)
189
200
  old_column_expanded = pd.Series(
190
- np.repeat(old_column, length)).reset_index(drop=True)
201
+ np.repeat(old_column, length)
202
+ ).reset_index(drop=True)
191
203
  new_column_expanded = pd.Series(
192
- np.repeat(new_column, length)).reset_index(drop=True)
204
+ np.repeat(new_column, length)
205
+ ).reset_index(drop=True)
193
206
  df[column] = old_column_expanded
194
- df.insert(column_idx + 1,
195
- bottom_names[column_idx], new_column_expanded)
207
+ df.insert(
208
+ column_idx + 1, bottom_names[column_idx], new_column_expanded
209
+ )
196
210
  return df
197
211
 
198
- df = separate_double_header_row(df, 'Date', len(rowtypes))
212
+ df = separate_double_header_row(df, "Date", len(rowtypes))
199
213
  df = separate_double_header_row(df, top_names[0], len(rowtypes))
200
214
  # remove brackets around statistics values
201
- df["Statistic"] = df["Statistic"].str.strip('()')
202
- df = df.rename(mapper={'Btl_ID': 'Bottle_ID'}, axis=1)
215
+ df["Statistic"] = df["Statistic"].str.strip("()")
216
+ df = df.rename(mapper={"Btl_ID": "Bottle_ID"}, axis=1)
203
217
  return df
204
218
 
205
219
  def adding_timestamp_column(self):
@@ -216,9 +230,10 @@ class BottleFile(DataTableFile):
216
230
  # constructing timestamp column
217
231
  timestamp = []
218
232
  for datepoint, timepoint in zip(self.df.Date, self.df.Time):
219
- timestamp.append(datetime.combine(
220
- datepoint, time.fromisoformat(str(timepoint))))
221
- self.df.insert(2, 'Timestamp', timestamp)
233
+ timestamp.append(
234
+ datetime.combine(datepoint, time.fromisoformat(str(timepoint)))
235
+ )
236
+ self.df.insert(2, "Timestamp", timestamp)
222
237
  self.df.Timestamp = pd.to_datetime(self.df.Timestamp)
223
238
 
224
239
  def setting_dataframe_dtypes(self):
@@ -228,9 +243,9 @@ class BottleFile(DataTableFile):
228
243
  self.df.Date = pd.to_datetime(self.df.Date)
229
244
  self.df.Bottle_ID = self.df.Bottle_ID.astype(int)
230
245
 
231
- def selecting_rows(self,
232
- df=None,
233
- statistic_of_interest: Union[list, str] = ['avg']):
246
+ def selecting_rows(
247
+ self, df=None, statistic_of_interest: Union[list, str] = ["avg"]
248
+ ):
234
249
  """Creates a dataframe with the given row identifier, using the
235
250
  statistics column. A single string or a list of strings can be
236
251
  processed.
@@ -251,7 +266,7 @@ class BottleFile(DataTableFile):
251
266
  # ensure that the input is a list, so that isin() can do its job
252
267
  if isinstance(statistic_of_interest, str):
253
268
  statistic_of_interest = [statistic_of_interest]
254
- self.df = df.loc[df['Statistic'].isin(statistic_of_interest)]
269
+ self.df = df.loc[df["Statistic"].isin(statistic_of_interest)]
255
270
 
256
271
  def reading_data_header(self):
257
272
  """Identifies and separatly collects the rows that specify the data
@@ -267,21 +282,24 @@ class BottleFile(DataTableFile):
267
282
  n = 11 # fix column width of a seabird btl file
268
283
  top_line = self.data[0]
269
284
  second_line = self.data[1]
270
- top_names = [top_line[i:i+n].split()[0]
271
- for i in range(0, len(top_line)-n, n)]
272
- bottom_names = [second_line[i:i+n].split()[0]
273
- for i in range(0, 2*n, n)]
285
+ top_names = [
286
+ top_line[i : i + n].split()[0]
287
+ for i in range(0, len(top_line) - n, n)
288
+ ]
289
+ bottom_names = [
290
+ second_line[i : i + n].split()[0] for i in range(0, 2 * n, n)
291
+ ]
274
292
  return top_names, bottom_names
275
293
 
276
294
  def add_station_and_event_column(self):
277
- event_list = [self.metadata['Station'] for _ in self.data]
278
- self.df.insert(0, 'Event', pd.Series(event_list))
295
+ event_list = [self.metadata["Station"] for _ in self.data]
296
+ self.df.insert(0, "Event", pd.Series(event_list))
279
297
 
280
298
  def add_position_columns(self):
281
- latitude_list = [self.metadata['GPS_Lat'] for _ in self.data]
282
- self.df.insert(1, 'Latitude', pd.Series(latitude_list))
283
- longitude_list = [self.metadata['GPS_Lon'] for _ in self.data]
284
- self.df.insert(2, 'Longitude', pd.Series(longitude_list))
299
+ latitude_list = [self.metadata["GPS_Lat"] for _ in self.data]
300
+ self.df.insert(1, "Latitude", pd.Series(latitude_list))
301
+ longitude_list = [self.metadata["GPS_Lon"] for _ in self.data]
302
+ self.df.insert(2, "Longitude", pd.Series(longitude_list))
285
303
 
286
304
 
287
305
  class CnvFile(DataTableFile):
@@ -331,12 +349,13 @@ class CnvFile(DataTableFile):
331
349
  self.start_time = self.reading_start_time()
332
350
  if create_dataframe:
333
351
  warnings.warn(
334
- 'The default of constructing a pandas Dataframe will soon be replaced by using the Parameters class that works on numpy arrays.',
352
+ "The default of constructing a pandas Dataframe will soon be replaced by using the Parameters class that works on numpy arrays.",
335
353
  DeprecationWarning,
336
- stacklevel=2 # Ensures the warning points to the caller's line
354
+ stacklevel=2, # Ensures the warning points to the caller's line
355
+ )
356
+ self.data_header_meta_info, self.duplicate_columns = (
357
+ self.reading_data_header(self.data_table_description)
337
358
  )
338
- self.data_header_meta_info, self.duplicate_columns = self.reading_data_header(
339
- self.data_table_description)
340
359
  self.original_df = self.create_dataframe(data_table_info_level)
341
360
  self.df = self.original_df
342
361
  if absolute_time_calculation:
@@ -347,9 +366,12 @@ class CnvFile(DataTableFile):
347
366
  self.add_position_columns()
348
367
  else:
349
368
  self.parameters = Parameters(
350
- self.data, self.data_table_description)
369
+ self.data, self.data_table_description
370
+ )
351
371
 
352
- def reading_data_header(self, header_info: list = []) -> Tuple[dict[str, dict], list[int]]:
372
+ def reading_data_header(
373
+ self, header_info: list = []
374
+ ) -> Tuple[dict[str, dict], list[int]]:
353
375
  """Reads the tables header data from the header.
354
376
 
355
377
  Parameters
@@ -380,16 +402,20 @@ class CnvFile(DataTableFile):
380
402
  if shortname in list(table_header.keys()):
381
403
  try:
382
404
  duplicate_columns.append(
383
- int(line.split("=")[0].strip().split()[1]))
384
- except (IndexError) as error:
405
+ int(line.split("=")[0].strip().split()[1])
406
+ )
407
+ except IndexError as error:
385
408
  logger.error(
386
409
  f"Could not resolve duplicate column: {
387
- shortname}, {error}")
410
+ shortname
411
+ }, {error}"
412
+ )
388
413
  else:
389
414
  header_meta_info["shortname"] = shortname
390
415
  header_meta_info["longinfo"] = longinfo.strip()
391
416
  metainfo = self._extract_data_header_meta_info(
392
- longinfo.strip())
417
+ longinfo.strip()
418
+ )
393
419
  header_meta_info = {**header_meta_info, **metainfo}
394
420
  table_header[shortname.strip()] = header_meta_info
395
421
  return table_header, duplicate_columns
@@ -408,22 +434,24 @@ class CnvFile(DataTableFile):
408
434
 
409
435
  """
410
436
  regex_string = r"(?:(?P<name0>.+),\s(?P<metainfo0>.+)\s\[(?P<unit0>.+)\]|(?P<name2>.+)\s\[(?P<unit2>.+)\]|(?P<name3>.+),\s(?P<metainfo2>.[^\s]+)|(?P<name4>.+))"
411
- regex_check = re.search(
412
- regex_string, line, flags=re.IGNORECASE)
437
+ regex_check = re.search(regex_string, line, flags=re.IGNORECASE)
413
438
  if regex_check:
414
439
  regex_info = dict(regex_check.groupdict())
415
- regex_info = {key[:-1]: value for key,
416
- value in regex_info.items() if value is not None}
440
+ regex_info = {
441
+ key[:-1]: value
442
+ for key, value in regex_info.items()
443
+ if value is not None
444
+ }
417
445
  if len(regex_info) > 2:
418
446
  # check for second sensors and adjust their names
419
- if regex_info["metainfo"][-1] == '2':
447
+ if regex_info["metainfo"][-1] == "2":
420
448
  regex_info["name"] = regex_info["name"] + " 2"
421
- regex_info["metainfo"] = regex_info['metainfo'][:-1]
449
+ regex_info["metainfo"] = regex_info["metainfo"][:-1]
422
450
  if len(regex_info["metainfo"]) == 0:
423
451
  regex_info.pop("metainfo")
424
- if regex_info['name'] == 'flag':
425
- regex_info["metainfo"] = regex_info['name']
426
- regex_info["unit"] = regex_info['name']
452
+ if regex_info["name"] == "flag":
453
+ regex_info["metainfo"] = regex_info["name"]
454
+ regex_info["unit"] = regex_info["name"]
427
455
  return regex_info
428
456
  return {}
429
457
 
@@ -449,10 +477,13 @@ class CnvFile(DataTableFile):
449
477
  n = 11
450
478
  row_list = []
451
479
  for line in self.data:
452
- row_list.append([line[i:i+n].split()[0]
453
- for i in range(0, len(line)-n, n)])
454
- # TODO: force float dtype here and handle that in rest of code
455
- df = pd.DataFrame(row_list)
480
+ row_list.append(
481
+ [
482
+ line[i : i + n].split()[0]
483
+ for i in range(0, len(line) - n, n)
484
+ ]
485
+ )
486
+ df = pd.DataFrame(row_list, dtype=float)
456
487
  header_names = [
457
488
  metainfo[header_info_detail_level]
458
489
  for metainfo in list(self.data_header_meta_info.values())
@@ -463,8 +494,9 @@ class CnvFile(DataTableFile):
463
494
  try:
464
495
  df.columns = header_names
465
496
  except ValueError as error:
466
- logger.error(f"Could not set dataframe header for {
467
- self.file_name}: {error}")
497
+ logger.error(
498
+ f"Could not set dataframe header for {self.file_name}: {error}"
499
+ )
468
500
  logger.error(header_names)
469
501
  else:
470
502
  df.meta.metadata = self.data_header_meta_info
@@ -472,26 +504,27 @@ class CnvFile(DataTableFile):
472
504
  return df
473
505
 
474
506
  def rename_dataframe_header(
475
- self,
476
- df: pd.DataFrame | None = None,
477
- header_detail_level: str = "shortname",
507
+ self,
508
+ df: pd.DataFrame | None = None,
509
+ header_detail_level: str = "shortname",
478
510
  ) -> list:
479
511
  df = self.df if df is None else df
480
512
  df.meta.rename(header_detail_level)
481
513
  return [column for column in df.columns]
482
514
 
483
515
  def reading_start_time(
484
- self,
485
- time_source: str = "System UTC",
516
+ self,
517
+ time_source: str = "System UTC",
486
518
  ) -> datetime | None:
487
519
  """
488
520
  Extracts the Cast start time from the metadata header.
489
521
  """
490
522
  for line in self.sbe9_data:
491
523
  if line.startswith(time_source):
492
- start_time = line.split('=')[1]
524
+ start_time = line.split("=")[1]
493
525
  start_time = datetime.strptime(
494
- start_time, ' %b %d %Y %H:%M:%S ')
526
+ start_time, " %b %d %Y %H:%M:%S "
527
+ )
495
528
  return start_time
496
529
  return None
497
530
 
@@ -504,11 +537,11 @@ class CnvFile(DataTableFile):
504
537
  """
505
538
  time_parameter = None
506
539
  for parameter in self.df.columns:
507
- if parameter.lower().startswith('time'):
540
+ if parameter.lower().startswith("time"):
508
541
  time_parameter = parameter
509
542
  if time_parameter and self.start_time:
510
543
  self.df.meta.add_column(
511
- name='datetime',
544
+ name="datetime",
512
545
  data=[
513
546
  timedelta(days=float(time)) + self.start_time
514
547
  if time_parameter == "timeJ"
@@ -526,7 +559,7 @@ class CnvFile(DataTableFile):
526
559
  """
527
560
  if self.start_time:
528
561
  self.df.meta.add_column(
529
- name='start_time',
562
+ name="start_time",
530
563
  data=pd.Series([self.start_time for _ in self.data]),
531
564
  )
532
565
  return True
@@ -541,9 +574,9 @@ class CnvFile(DataTableFile):
541
574
  return CnvValidationList(validation_modules)
542
575
 
543
576
  def df2cnv(
544
- self,
545
- header_names: list | None = None,
546
- header_detail_level: str | None = None,
577
+ self,
578
+ header_names: list | None = None,
579
+ header_detail_level: str | None = None,
547
580
  ) -> list:
548
581
  """
549
582
  Parses a pandas dataframe into a list that represents the lines inside
@@ -577,16 +610,23 @@ class CnvFile(DataTableFile):
577
610
  )
578
611
  cnv_out = []
579
612
  for _, row in df.iterrows():
580
- cnv_like_row = ''.join(
581
- (lambda column: f"{str(column):>11}")(value) for value in row)
613
+ cnv_like_row = "".join(
614
+ (lambda column: f"{str(column):>11}")(value) for value in row
615
+ )
582
616
  cnv_out.append(cnv_like_row + "\n")
583
617
  return cnv_out
584
618
 
619
+ def array2cnv(self) -> list:
620
+ result = []
621
+ for row in self.parameters.full_data_array:
622
+ formatted_row = "".join(f"{elem:11}" for elem in row)
623
+ result.append(formatted_row + "\n")
624
+ return result
625
+
585
626
  def to_cnv(
586
627
  self,
587
628
  file_name: Path | str | None = None,
588
- use_current_df: bool = True,
589
- use_current_processing_header: bool = False,
629
+ use_dataframe: bool = True,
590
630
  header_list: list | None = None,
591
631
  ):
592
632
  """
@@ -606,32 +646,32 @@ class CnvFile(DataTableFile):
606
646
  """
607
647
  file_name = self.path_to_file if file_name is None else file_name
608
648
  # content construction
609
- if use_current_df:
610
- self.data = self.df2cnv(header_list)
611
- if use_current_processing_header:
612
- self._update_header()
613
- self.file_data = [*self.header, *self.data]
649
+ if use_dataframe:
650
+ data = self.df2cnv(header_list)
651
+ else:
652
+ data = self.array2cnv()
653
+ self._update_header()
654
+ self.file_data = [*self.header, *data]
614
655
  # writing content out
615
656
  try:
616
- with open(file_name, 'w', encoding='latin-1') as file:
657
+ with open(file_name, "w", encoding="latin-1") as file:
617
658
  for line in self.file_data:
618
659
  file.write(line)
619
- logger.info(f'Wrote cnv {self.path_to_file} to {file_name}.')
620
660
 
621
661
  except IOError as error:
622
- logger.error(f'Could not write cnv file: {error}')
662
+ logger.error(f"Could not write cnv file: {error}")
623
663
 
624
664
  def _update_header(self):
625
665
  """Re-creates the cnv header."""
626
666
  self.data_table_description = self._form_data_table_info()
627
667
  self.header = [
628
- *[f'* {data}' for data in self.sbe9_data[:-1]],
629
- *[f'** {data}' for data in self.metadata_list],
630
- f'* {self.sbe9_data[-1]}',
631
- *[f'# {data}' for data in self.data_table_description],
632
- *[f'# {data}' for data in self.sensor_data],
633
- *[f'# {data}' for data in self.processing_info],
634
- '*END*\n'
668
+ *[f"* {data}" for data in self.sbe9_data[:-1]],
669
+ *[f"** {data}" for data in self.metadata_list],
670
+ f"* {self.sbe9_data[-1]}",
671
+ *[f"# {data}" for data in self.data_table_description],
672
+ *[f"# {data}" for data in self.sensor_data],
673
+ *[f"# {data}" for data in self.processing_info],
674
+ "*END*\n",
635
675
  ]
636
676
 
637
677
  def _form_data_table_info(self) -> list:
@@ -644,7 +684,8 @@ class CnvFile(DataTableFile):
644
684
  new_table_info.append(f"name {index} = {name}\n")
645
685
  for index, (_, span) in enumerate(self.data_table_names_and_spans):
646
686
  new_table_info.append(f"span {index} = {span}\n")
647
- new_table_info = [*new_table_info, *self.data_table_misc]
687
+ for key, value in self.data_table_misc.items():
688
+ new_table_info.append(f"{key} = {value}\n")
648
689
  return new_table_info
649
690
 
650
691
  def add_processing_metadata(self, addition: str | list):
@@ -672,14 +713,12 @@ class CnvFile(DataTableFile):
672
713
 
673
714
  """
674
715
  try:
675
- event_list = [self.metadata['Station'] for _ in self.data]
716
+ event_list = [self.metadata["Station"] for _ in self.data]
676
717
  except KeyError:
677
718
  return False
678
719
  else:
679
720
  self.df.meta.add_column(
680
- name='Event',
681
- data=pd.Series(event_list),
682
- location=0
721
+ name="Event", data=pd.Series(event_list), location=0
683
722
  )
684
723
  return True
685
724
 
@@ -689,26 +728,22 @@ class CnvFile(DataTableFile):
689
728
  These must be present inside the extra metadata header.
690
729
 
691
730
  """
692
- if ('latitude' or 'longitude') in [
693
- column['shortname']
731
+ if ("latitude" or "longitude") in [
732
+ column["shortname"]
694
733
  for column in list(self.df.meta.metadata.values())
695
734
  ]:
696
735
  return True
697
736
  try:
698
- latitude_list = [self.metadata['GPS_Lat'] for _ in self.data]
699
- longitude_list = [self.metadata['GPS_Lon'] for _ in self.data]
737
+ latitude_list = [self.metadata["GPS_Lat"] for _ in self.data]
738
+ longitude_list = [self.metadata["GPS_Lon"] for _ in self.data]
700
739
  except KeyError:
701
740
  return False
702
741
  else:
703
742
  self.df.meta.add_column(
704
- name='Latitude',
705
- data=pd.Series(latitude_list),
706
- location=1
743
+ name="Latitude", data=pd.Series(latitude_list), location=1
707
744
  )
708
745
  self.df.meta.add_column(
709
- name='Longitude',
710
- data=pd.Series(longitude_list),
711
- location=2
746
+ name="Longitude", data=pd.Series(longitude_list), location=2
712
747
  )
713
748
  return True
714
749
 
@@ -722,13 +757,13 @@ class CnvFile(DataTableFile):
722
757
  the cast number of this files cast
723
758
 
724
759
  """
725
- if ('Cast' in self.metadata.keys()) and (not number):
726
- number = int(self.metadata['Cast'])
760
+ if ("Cast" in self.metadata.keys()) and (not number):
761
+ number = int(self.metadata["Cast"])
727
762
  try:
728
763
  self.df.meta.add_column(
729
- name='Cast',
764
+ name="Cast",
730
765
  data=pd.Series([number for _ in self.data]),
731
- location=0
766
+ location=0,
732
767
  )
733
768
  except ValueError:
734
769
  # Cast is already part of the dataframe, so nothing left to do
@@ -750,7 +785,7 @@ class BottleLogFile(DataTableFile):
750
785
 
751
786
  """
752
787
 
753
- def __init__(self, path_to_file, create_dataframe = False):
788
+ def __init__(self, path_to_file, create_dataframe=False):
754
789
  super().__init__(path_to_file)
755
790
  self.reset_time = self.obtaining_reset_time()
756
791
  self.origin_cnv = self.raw_file_data[0].strip()
@@ -761,8 +796,6 @@ class BottleLogFile(DataTableFile):
761
796
  self.df = self.original_df
762
797
  else:
763
798
  self.data_list = self.create_list()
764
-
765
-
766
799
 
767
800
  def data_whitespace_removal(self) -> list:
768
801
  """Strips the input from whitespace characters, in this case especially
@@ -793,13 +826,15 @@ class BottleLogFile(DataTableFile):
793
826
 
794
827
  """
795
828
 
796
- regex_check = re.search(r'RESET\s(\w{3}\s\d+\s\d{4}\s\d\d:\d\d:\d\d)',
797
- self.raw_file_data[1])
829
+ regex_check = re.search(
830
+ r"RESET\s(\w{3}\s\d+\s\d{4}\s\d\d:\d\d:\d\d)",
831
+ self.raw_file_data[1],
832
+ )
798
833
  if regex_check:
799
- return datetime.strptime(regex_check.group(1), '%b %d %Y %H:%M:%S')
834
+ return datetime.strptime(regex_check.group(1), "%b %d %Y %H:%M:%S")
800
835
  else:
801
- error_message = '''BottleLogFile is not formatted as expected:
802
- Reset time could not be extracted.'''
836
+ error_message = """BottleLogFile is not formatted as expected:
837
+ Reset time could not be extracted."""
803
838
  logger.error(error_message)
804
839
  raise IOError(error_message)
805
840
 
@@ -820,13 +855,12 @@ class BottleLogFile(DataTableFile):
820
855
  bottles = [int(x) for x in self.data[i].split(",")[:2]]
821
856
  date = self.convert_date(self.data[i].split(",")[2])
822
857
  lines = tuple([int(x) for x in self.data[i].split(",")[3:]])
823
-
858
+
824
859
  content_array.append([bottles, date, lines])
825
-
860
+
826
861
  return content_array
827
-
828
862
 
829
- def convert_date(self, date : str):
863
+ def convert_date(self, date: str):
830
864
  """Converts the Dates of the .bl files to an ISO 8601 standard
831
865
 
832
866
  Parameters
@@ -837,8 +871,21 @@ class BottleLogFile(DataTableFile):
837
871
  a string with the date in the form of "yymmddThhmmss"
838
872
  """
839
873
  date = date.strip()
840
- month_list = ["Jan", "Feb", "Mar", "Apr", "May", "Jun", "Jul", "Aug", "Sep", "Oct", "Nov", "Dec"]
841
-
874
+ month_list = [
875
+ "Jan",
876
+ "Feb",
877
+ "Mar",
878
+ "Apr",
879
+ "May",
880
+ "Jun",
881
+ "Jul",
882
+ "Aug",
883
+ "Sep",
884
+ "Oct",
885
+ "Nov",
886
+ "Dec",
887
+ ]
888
+
842
889
  month_ind = month_list.index(date.split(" ")[0]) + 1
843
890
  if month_ind < 10:
844
891
  month = "0" + str(month_ind)
@@ -848,9 +895,7 @@ class BottleLogFile(DataTableFile):
848
895
  year = (date.split(" ")[2])[2:]
849
896
  time = date.split(" ")[3].replace(":", "")
850
897
  return year + month + day + "T" + time
851
-
852
898
 
853
-
854
899
  def create_dataframe(self) -> pd.DataFrame:
855
900
  """Creates a dataframe from the list specified in self.data.
856
901
 
@@ -863,16 +908,15 @@ class BottleLogFile(DataTableFile):
863
908
  """
864
909
  data_lists = []
865
910
  for line in self.data:
866
- inner_list = line.split(',')
911
+ inner_list = line.split(",")
867
912
  # dropping first column as its the index
868
913
  data_lists.append(inner_list[1:])
869
914
  df = pd.DataFrame(data_lists)
870
- df.columns = ['Bottle ID', 'Datetime', 'start_range', 'end_range']
915
+ df.columns = ["Bottle ID", "Datetime", "start_range", "end_range"]
871
916
  return df
872
917
 
873
918
 
874
919
  class FieldCalibrationFile(DataTableFile):
875
-
876
920
  def __init__(self, path_to_file):
877
921
  super().__init__(path_to_file)
878
922
  self.original_df = self.create_dataframe()
@@ -882,5 +926,5 @@ class FieldCalibrationFile(DataTableFile):
882
926
  try:
883
927
  return pd.read_csv(self.path_to_file, skiprows=len(self.header))
884
928
  except IOError as error:
885
- logger.error(f'Could not read field calibration file: {error}.')
929
+ logger.error(f"Could not read field calibration file: {error}.")
886
930
  return pd.DataFrame()
@@ -81,8 +81,7 @@ class FileCollection(UserList):
81
81
  self.data.append(self.file_type(file))
82
82
  except TypeError:
83
83
  logger.error(
84
- f"Could not open file {
85
- file} with the type "
84
+ f"Could not open file {file} with the type "
86
85
  f"{self.file_type}."
87
86
  )
88
87
  continue
@@ -260,7 +259,9 @@ class FileCollection(UserList):
260
259
  else:
261
260
  differing_dicts = [
262
261
  current_dict
263
- for last_dict, current_dict in zip(last_unique, cast_sensors)
262
+ for last_dict, current_dict in zip(
263
+ last_unique, cast_sensors
264
+ )
264
265
  if current_dict != last_dict
265
266
  ]
266
267
  if differing_dicts: