pymast 1.0.0__tar.gz → 1.0.2__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pymast-1.0.0 → pymast-1.0.2}/PKG-INFO +1 -1
- {pymast-1.0.0 → pymast-1.0.2}/pymast/__init__.py +1 -1
- {pymast-1.0.0 → pymast-1.0.2}/pymast/parsers.py +197 -186
- {pymast-1.0.0 → pymast-1.0.2}/pymast/radio_project.py +164 -66
- {pymast-1.0.0 → pymast-1.0.2}/pymast.egg-info/PKG-INFO +1 -1
- {pymast-1.0.0 → pymast-1.0.2}/pyproject.toml +19 -19
- {pymast-1.0.0 → pymast-1.0.2}/LICENSE.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/README.md +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast/fish_history.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast/formatter.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast/logger.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast/naive_bayes.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast/overlap_removal.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast/predictors.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast/validation.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast.egg-info/SOURCES.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast.egg-info/dependency_links.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast.egg-info/requires.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/pymast.egg-info/top_level.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/setup.cfg +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/setup.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_basic.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_csv_pit.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_formatter_tte.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_initial_state_release.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_overlap_hdf5_integration.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_overlap_loading.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_overlap_small.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_overlap_unit.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_parsers_basic.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_pit_multiple_parser.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_pit_parser.py +0 -0
- {pymast-1.0.0 → pymast-1.0.2}/tests/test_unified_pit.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pymast
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data
|
|
5
5
|
Author: Theodore Castro-Santos
|
|
6
6
|
Author-email: "Kevin P. Nebiolo" <kevin.nebiolo@kleinschmidtgroup.com>
|
|
@@ -80,32 +80,32 @@ predictors.noise_ratio : Miscoded detection ratio calculation
|
|
|
80
80
|
import pandas as pd
|
|
81
81
|
import numpy as np
|
|
82
82
|
import datetime
|
|
83
|
-
import os
|
|
84
|
-
import pymast.predictors as predictors
|
|
85
|
-
import sys
|
|
86
|
-
|
|
87
|
-
def _append_raw_data(db_dir, telem_dat, data_columns=None):
|
|
88
|
-
with pd.HDFStore(db_dir, mode='a') as store:
|
|
89
|
-
append_kwargs = {
|
|
90
|
-
'key': 'raw_data',
|
|
91
|
-
'value': telem_dat,
|
|
92
|
-
'format': 'table',
|
|
93
|
-
'index': False,
|
|
94
|
-
'min_itemsize': {
|
|
95
|
-
'freq_code': 20,
|
|
96
|
-
'rec_type': 20,
|
|
97
|
-
'rec_id': 20,
|
|
98
|
-
},
|
|
99
|
-
'append': True,
|
|
100
|
-
'chunksize': 1000000,
|
|
101
|
-
}
|
|
102
|
-
if data_columns is not None:
|
|
103
|
-
append_kwargs['data_columns'] = data_columns
|
|
104
|
-
store.append(**append_kwargs)
|
|
105
|
-
|
|
106
|
-
def ares(file_name,
|
|
107
|
-
db_dir,
|
|
108
|
-
rec_id,
|
|
83
|
+
import os
|
|
84
|
+
import pymast.predictors as predictors
|
|
85
|
+
import sys
|
|
86
|
+
|
|
87
|
+
def _append_raw_data(db_dir, telem_dat, data_columns=None):
|
|
88
|
+
with pd.HDFStore(db_dir, mode='a') as store:
|
|
89
|
+
append_kwargs = {
|
|
90
|
+
'key': 'raw_data',
|
|
91
|
+
'value': telem_dat,
|
|
92
|
+
'format': 'table',
|
|
93
|
+
'index': False,
|
|
94
|
+
'min_itemsize': {
|
|
95
|
+
'freq_code': 20,
|
|
96
|
+
'rec_type': 20,
|
|
97
|
+
'rec_id': 20,
|
|
98
|
+
},
|
|
99
|
+
'append': True,
|
|
100
|
+
'chunksize': 1000000,
|
|
101
|
+
}
|
|
102
|
+
if data_columns is not None:
|
|
103
|
+
append_kwargs['data_columns'] = data_columns
|
|
104
|
+
store.append(**append_kwargs)
|
|
105
|
+
|
|
106
|
+
def ares(file_name,
|
|
107
|
+
db_dir,
|
|
108
|
+
rec_id,
|
|
109
109
|
study_tags,
|
|
110
110
|
scan_time = 1,
|
|
111
111
|
channels = 1,
|
|
@@ -229,26 +229,26 @@ def ares(file_name,
|
|
|
229
229
|
telem_dat.epoch.values,
|
|
230
230
|
study_tags)
|
|
231
231
|
|
|
232
|
-
telem_dat = telem_dat.astype({'power':'float32',
|
|
233
|
-
'freq_code':'object',
|
|
234
|
-
'time_stamp':'datetime64[ns]',
|
|
235
|
-
'scan_time':'float32',
|
|
236
|
-
'channels':'int32',
|
|
237
|
-
'rec_type':'object',
|
|
238
|
-
'epoch':'int64',
|
|
239
|
-
'noise_ratio':'float32',
|
|
240
|
-
'rec_id':'object'})
|
|
241
|
-
|
|
242
|
-
_append_raw_data(db_dir, telem_dat)
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
def orion_import(file_name,
|
|
246
|
-
db_dir,
|
|
247
|
-
rec_id,
|
|
248
|
-
study_tags,
|
|
249
|
-
scan_time = 1.,
|
|
250
|
-
channels = 1,
|
|
251
|
-
ant_to_rec_dict = None):
|
|
232
|
+
telem_dat = telem_dat.astype({'power':'float32',
|
|
233
|
+
'freq_code':'object',
|
|
234
|
+
'time_stamp':'datetime64[ns]',
|
|
235
|
+
'scan_time':'float32',
|
|
236
|
+
'channels':'int32',
|
|
237
|
+
'rec_type':'object',
|
|
238
|
+
'epoch':'int64',
|
|
239
|
+
'noise_ratio':'float32',
|
|
240
|
+
'rec_id':'object'})
|
|
241
|
+
|
|
242
|
+
_append_raw_data(db_dir, telem_dat)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def orion_import(file_name,
|
|
246
|
+
db_dir,
|
|
247
|
+
rec_id,
|
|
248
|
+
study_tags,
|
|
249
|
+
scan_time = 1.,
|
|
250
|
+
channels = 1,
|
|
251
|
+
ant_to_rec_dict = None):
|
|
252
252
|
"""
|
|
253
253
|
Import Sigma Eight Orion receiver data into MAST HDF5 database.
|
|
254
254
|
|
|
@@ -334,33 +334,33 @@ def orion_import(file_name,
|
|
|
334
334
|
telem_dat['Freq'] = telem_dat['Freq'].apply(lambda x: f"{x:.3f}")
|
|
335
335
|
|
|
336
336
|
|
|
337
|
-
def _write_orion_subset(df, receiver_id, epoch_dtype):
|
|
338
|
-
df = df.copy()
|
|
339
|
-
df['rec_id'] = np.repeat(receiver_id, len(df))
|
|
340
|
-
df.drop(['Ant'], axis = 1, inplace = True)
|
|
341
|
-
df = df.astype({'power':'float32',
|
|
342
|
-
'freq_code':'object',
|
|
343
|
-
'time_stamp':'datetime64[ns]',
|
|
344
|
-
'scan_time':'float32',
|
|
345
|
-
'channels':'int32',
|
|
346
|
-
'rec_type':'object',
|
|
347
|
-
'epoch': epoch_dtype,
|
|
348
|
-
'noise_ratio':'float32',
|
|
349
|
-
'rec_id':'object'})
|
|
350
|
-
|
|
351
|
-
df = df[['power',
|
|
352
|
-
'time_stamp',
|
|
353
|
-
'epoch',
|
|
354
|
-
'freq_code',
|
|
355
|
-
'noise_ratio',
|
|
356
|
-
'scan_time',
|
|
357
|
-
'channels',
|
|
358
|
-
'rec_id',
|
|
359
|
-
'rec_type']]
|
|
360
|
-
|
|
361
|
-
_append_raw_data(db_dir, df, data_columns=True)
|
|
362
|
-
|
|
363
|
-
if len(telem_dat) > 0:
|
|
337
|
+
def _write_orion_subset(df, receiver_id, epoch_dtype):
|
|
338
|
+
df = df.copy()
|
|
339
|
+
df['rec_id'] = np.repeat(receiver_id, len(df))
|
|
340
|
+
df.drop(['Ant'], axis = 1, inplace = True)
|
|
341
|
+
df = df.astype({'power':'float32',
|
|
342
|
+
'freq_code':'object',
|
|
343
|
+
'time_stamp':'datetime64[ns]',
|
|
344
|
+
'scan_time':'float32',
|
|
345
|
+
'channels':'int32',
|
|
346
|
+
'rec_type':'object',
|
|
347
|
+
'epoch': epoch_dtype,
|
|
348
|
+
'noise_ratio':'float32',
|
|
349
|
+
'rec_id':'object'})
|
|
350
|
+
|
|
351
|
+
df = df[['power',
|
|
352
|
+
'time_stamp',
|
|
353
|
+
'epoch',
|
|
354
|
+
'freq_code',
|
|
355
|
+
'noise_ratio',
|
|
356
|
+
'scan_time',
|
|
357
|
+
'channels',
|
|
358
|
+
'rec_id',
|
|
359
|
+
'rec_type']]
|
|
360
|
+
|
|
361
|
+
_append_raw_data(db_dir, df, data_columns=True)
|
|
362
|
+
|
|
363
|
+
if len(telem_dat) > 0:
|
|
364
364
|
# add file name to data
|
|
365
365
|
#['fileName'] = np.repeat(file_name,len(telem_dat)) #Note I'm going back here to the actual file name without the path. Is that OK? I prefer it, but it's a potential source of confusion
|
|
366
366
|
|
|
@@ -389,18 +389,18 @@ def orion_import(file_name,
|
|
|
389
389
|
telem_dat.epoch.values,
|
|
390
390
|
study_tags)
|
|
391
391
|
|
|
392
|
-
# if there is no antenna to receiver dictionary
|
|
393
|
-
if ant_to_rec_dict == None:
|
|
394
|
-
_write_orion_subset(telem_dat, rec_id, 'int64')
|
|
395
|
-
# if there is an antenna to receiver dictionary
|
|
396
|
-
else:
|
|
397
|
-
for i in ant_to_rec_dict.keys():
|
|
398
|
-
# get site from dictionary
|
|
399
|
-
site = ant_to_rec_dict[i]
|
|
400
|
-
|
|
401
|
-
# get telemetryt data associated with this site
|
|
402
|
-
telem_dat_sub = telem_dat[telem_dat.Ant == 1]
|
|
403
|
-
_write_orion_subset(telem_dat_sub, site, 'float32')
|
|
392
|
+
# if there is no antenna to receiver dictionary
|
|
393
|
+
if ant_to_rec_dict == None:
|
|
394
|
+
_write_orion_subset(telem_dat, rec_id, 'int64')
|
|
395
|
+
# if there is an antenna to receiver dictionary
|
|
396
|
+
else:
|
|
397
|
+
for i in ant_to_rec_dict.keys():
|
|
398
|
+
# get site from dictionary
|
|
399
|
+
site = ant_to_rec_dict[i]
|
|
400
|
+
|
|
401
|
+
# get telemetryt data associated with this site
|
|
402
|
+
telem_dat_sub = telem_dat[telem_dat.Ant == 1]
|
|
403
|
+
_write_orion_subset(telem_dat_sub, site, 'float32')
|
|
404
404
|
else:
|
|
405
405
|
raise ValueError("Invalid import parameters, no data returned")
|
|
406
406
|
sys.exit()
|
|
@@ -492,7 +492,7 @@ def vr2_import(file_name,db_dir,study_tags, rec_id):
|
|
|
492
492
|
'noise_ratio':'float32',
|
|
493
493
|
'rec_id':'object'})
|
|
494
494
|
|
|
495
|
-
_append_raw_data(db_dir, telem_dat)
|
|
495
|
+
_append_raw_data(db_dir, telem_dat)
|
|
496
496
|
|
|
497
497
|
def srx1200(file_name,
|
|
498
498
|
db_dir,
|
|
@@ -785,7 +785,7 @@ def srx1200(file_name,
|
|
|
785
785
|
'rec_id',
|
|
786
786
|
'rec_type']]
|
|
787
787
|
|
|
788
|
-
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
788
|
+
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
789
789
|
|
|
790
790
|
# if the data doesn't have a header
|
|
791
791
|
else:
|
|
@@ -857,7 +857,7 @@ def srx1200(file_name,
|
|
|
857
857
|
'rec_id',
|
|
858
858
|
'rec_type']]
|
|
859
859
|
|
|
860
|
-
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
860
|
+
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
861
861
|
|
|
862
862
|
def srx800(file_name,
|
|
863
863
|
db_dir,
|
|
@@ -1146,16 +1146,16 @@ def srx800(file_name,
|
|
|
1146
1146
|
telem_dat_sub['epoch'] = np.round((telem_dat_sub.time_stamp - pd.Timestamp("1970-01-01")) / pd.Timedelta('1s'),6)
|
|
1147
1147
|
|
|
1148
1148
|
# get setup number for every row
|
|
1149
|
-
try:
|
|
1150
|
-
telem_dat_sub['setup'] = get_setup(
|
|
1151
|
-
telem_dat_sub.epoch.values,
|
|
1152
|
-
setup_df.epoch.values
|
|
1153
|
-
)
|
|
1154
|
-
except (ValueError, TypeError, IndexError) as e:
|
|
1155
|
-
raise ValueError(
|
|
1156
|
-
f"Failed to compute setup mapping for antenna '{ant}' at site '{site}'. "
|
|
1157
|
-
"Check setup table epoch alignment and input data integrity."
|
|
1158
|
-
) from e
|
|
1149
|
+
try:
|
|
1150
|
+
telem_dat_sub['setup'] = get_setup(
|
|
1151
|
+
telem_dat_sub.epoch.values,
|
|
1152
|
+
setup_df.epoch.values
|
|
1153
|
+
)
|
|
1154
|
+
except (ValueError, TypeError, IndexError) as e:
|
|
1155
|
+
raise ValueError(
|
|
1156
|
+
f"Failed to compute setup mapping for antenna '{ant}' at site '{site}'. "
|
|
1157
|
+
"Check setup table epoch alignment and input data integrity."
|
|
1158
|
+
) from e
|
|
1159
1159
|
|
|
1160
1160
|
# get frequency from channel
|
|
1161
1161
|
telem_dat_sub['Frequency'] = get_frequency(telem_dat_sub.setup.values,
|
|
@@ -1488,7 +1488,7 @@ def srx600(file_name,
|
|
|
1488
1488
|
'noise_ratio':'float32',
|
|
1489
1489
|
'rec_id':'object'})
|
|
1490
1490
|
|
|
1491
|
-
_append_raw_data(db_dir, telem_dat_sub, data_columns=True)
|
|
1491
|
+
_append_raw_data(db_dir, telem_dat_sub, data_columns=True)
|
|
1492
1492
|
else:
|
|
1493
1493
|
telem_dat = pd.read_fwf(file_name,
|
|
1494
1494
|
colspecs = [(0,9),(9,19),(19,29),(29,36),(36,44),(44,52)],
|
|
@@ -1553,7 +1553,7 @@ def srx600(file_name,
|
|
|
1553
1553
|
'noise_ratio':'float32',
|
|
1554
1554
|
'rec_id':'object'})
|
|
1555
1555
|
|
|
1556
|
-
_append_raw_data(db_dir, telem_dat_sub)
|
|
1556
|
+
_append_raw_data(db_dir, telem_dat_sub)
|
|
1557
1557
|
|
|
1558
1558
|
|
|
1559
1559
|
|
|
@@ -1644,13 +1644,13 @@ def PIT(file_name,
|
|
|
1644
1644
|
# First, analyze the file to determine format
|
|
1645
1645
|
def analyze_file_format(file_name):
|
|
1646
1646
|
"""Dynamically determine PIT file format and header structure"""
|
|
1647
|
-
with open(file_name, 'r') as file:
|
|
1648
|
-
lines = []
|
|
1649
|
-
for _ in range(20): # Read first 20 lines to analyze format
|
|
1650
|
-
line = file.readline()
|
|
1651
|
-
if not line:
|
|
1652
|
-
break
|
|
1653
|
-
lines.append(line.rstrip('\n'))
|
|
1647
|
+
with open(file_name, 'r') as file:
|
|
1648
|
+
lines = []
|
|
1649
|
+
for _ in range(20): # Read first 20 lines to analyze format
|
|
1650
|
+
line = file.readline()
|
|
1651
|
+
if not line:
|
|
1652
|
+
break
|
|
1653
|
+
lines.append(line.rstrip('\n'))
|
|
1654
1654
|
|
|
1655
1655
|
# Check if CSV format (look for commas in sample lines)
|
|
1656
1656
|
csv_indicators = 0
|
|
@@ -1711,10 +1711,10 @@ def PIT(file_name,
|
|
|
1711
1711
|
telem_dat = pd.read_csv(file_name, dtype=str)
|
|
1712
1712
|
print(f"Auto-detected columns: {list(telem_dat.columns)}")
|
|
1713
1713
|
|
|
1714
|
-
except (pd.errors.ParserError, UnicodeDecodeError, ValueError) as e:
|
|
1715
|
-
raise ValueError(
|
|
1716
|
-
f"CSV auto-detection failed for PIT file '{file_name}': {e}"
|
|
1717
|
-
) from e
|
|
1714
|
+
except (pd.errors.ParserError, UnicodeDecodeError, ValueError) as e:
|
|
1715
|
+
raise ValueError(
|
|
1716
|
+
f"CSV auto-detection failed for PIT file '{file_name}': {e}"
|
|
1717
|
+
) from e
|
|
1718
1718
|
|
|
1719
1719
|
# Find timestamp column dynamically
|
|
1720
1720
|
timestamp_col = find_column_by_patterns(telem_dat, ['timestamp', 'time stamp', 'date', 'scan date', 'detected'])
|
|
@@ -1732,8 +1732,8 @@ def PIT(file_name,
|
|
|
1732
1732
|
if not telem_dat["time_stamp"].isna().all():
|
|
1733
1733
|
print(f"Successfully parsed timestamps using format: {fmt or 'auto-detect'}")
|
|
1734
1734
|
break
|
|
1735
|
-
except (ValueError, TypeError) as e:
|
|
1736
|
-
continue
|
|
1735
|
+
except (ValueError, TypeError) as e:
|
|
1736
|
+
continue
|
|
1737
1737
|
else:
|
|
1738
1738
|
raise ValueError("Could not find timestamp column")
|
|
1739
1739
|
|
|
@@ -1773,14 +1773,14 @@ def PIT(file_name,
|
|
|
1773
1773
|
# Fixed-Width Format Parsing (original logic)
|
|
1774
1774
|
|
|
1775
1775
|
# Read header information for format detection
|
|
1776
|
-
with open(file_name, 'r') as file:
|
|
1777
|
-
header_lines = []
|
|
1778
|
-
for _ in range(max(skiprows, 10)):
|
|
1779
|
-
line = file.readline()
|
|
1780
|
-
if not line:
|
|
1781
|
-
break
|
|
1782
|
-
header_lines.append(line.rstrip('\n'))
|
|
1783
|
-
header_text = " ".join(header_lines).lower()
|
|
1776
|
+
with open(file_name, 'r') as file:
|
|
1777
|
+
header_lines = []
|
|
1778
|
+
for _ in range(max(skiprows, 10)):
|
|
1779
|
+
line = file.readline()
|
|
1780
|
+
if not line:
|
|
1781
|
+
break
|
|
1782
|
+
header_lines.append(line.rstrip('\n'))
|
|
1783
|
+
header_text = " ".join(header_lines).lower()
|
|
1784
1784
|
|
|
1785
1785
|
# Define colspecs for different fixed-width formats
|
|
1786
1786
|
if 'latitude' in header_text or 'longitude' in header_text:
|
|
@@ -1842,7 +1842,15 @@ def PIT(file_name,
|
|
|
1842
1842
|
antenna_col = None
|
|
1843
1843
|
for col in telem_dat.columns:
|
|
1844
1844
|
col_lower = str(col).lower().strip()
|
|
1845
|
-
if col_lower in (
|
|
1845
|
+
if col_lower in (
|
|
1846
|
+
'antenna id',
|
|
1847
|
+
'antenna',
|
|
1848
|
+
'ant',
|
|
1849
|
+
'antennae',
|
|
1850
|
+
'antennae id',
|
|
1851
|
+
'reader id',
|
|
1852
|
+
'readerid',
|
|
1853
|
+
):
|
|
1846
1854
|
antenna_col = col
|
|
1847
1855
|
break
|
|
1848
1856
|
|
|
@@ -1854,12 +1862,12 @@ def PIT(file_name,
|
|
|
1854
1862
|
telem_dat['antenna_num'] = pd.to_numeric(telem_dat['antenna_num'], errors='coerce')
|
|
1855
1863
|
|
|
1856
1864
|
# Prepare mapping dict keys as strings and ints for robust lookup
|
|
1857
|
-
ant_map = {}
|
|
1858
|
-
for k, v in ant_to_rec_dict.items():
|
|
1859
|
-
key_str = str(k).strip()
|
|
1860
|
-
if key_str.isdigit():
|
|
1861
|
-
ant_map[int(key_str)] = v
|
|
1862
|
-
ant_map[key_str] = v
|
|
1865
|
+
ant_map = {}
|
|
1866
|
+
for k, v in ant_to_rec_dict.items():
|
|
1867
|
+
key_str = str(k).strip()
|
|
1868
|
+
if key_str.isdigit():
|
|
1869
|
+
ant_map[int(key_str)] = v
|
|
1870
|
+
ant_map[key_str] = v
|
|
1863
1871
|
|
|
1864
1872
|
# Map by numeric antenna if possible, else by raw string
|
|
1865
1873
|
telem_dat['rec_id'] = telem_dat['antenna_num'].map(ant_map)
|
|
@@ -1877,7 +1885,10 @@ def PIT(file_name,
|
|
|
1877
1885
|
# drop detections that do not map to a known receiver
|
|
1878
1886
|
telem_dat = telem_dat.dropna(subset=['rec_id'])
|
|
1879
1887
|
else:
|
|
1880
|
-
raise ValueError(
|
|
1888
|
+
raise ValueError(
|
|
1889
|
+
'Multi-antenna fixed-width PIT file requires an antenna/reader column '
|
|
1890
|
+
'(e.g., "Antenna ID" or "Reader ID"), but none was found'
|
|
1891
|
+
)
|
|
1881
1892
|
|
|
1882
1893
|
# Data cleaning - remove invalid entries
|
|
1883
1894
|
print(f"\nCleaning data - original records: {len(telem_dat)}")
|
|
@@ -1899,49 +1910,49 @@ def PIT(file_name,
|
|
|
1899
1910
|
telem_dat = telem_dat[telem_dat['freq_code'].str.len() > 3]
|
|
1900
1911
|
telem_dat = telem_dat[~telem_dat['freq_code'].isna()]
|
|
1901
1912
|
|
|
1902
|
-
# Finalize fields and append to HDF5 /raw_data
|
|
1903
|
-
if len(telem_dat) == 0:
|
|
1904
|
-
print('No valid PIT rows after cleaning; nothing to append')
|
|
1905
|
-
return
|
|
1906
|
-
|
|
1907
|
-
if 'power' not in telem_dat.columns:
|
|
1908
|
-
telem_dat['power'] = np.nan
|
|
1909
|
-
|
|
1910
|
-
# compute epoch as int64 seconds and other derived fields
|
|
1911
|
-
telem_dat['epoch'] = (pd.to_datetime(telem_dat['time_stamp']).astype('int64') // 10**9).astype('int64')
|
|
1913
|
+
# Finalize fields and append to HDF5 /raw_data
|
|
1914
|
+
if len(telem_dat) == 0:
|
|
1915
|
+
print('No valid PIT rows after cleaning; nothing to append')
|
|
1916
|
+
return
|
|
1917
|
+
|
|
1918
|
+
if 'power' not in telem_dat.columns:
|
|
1919
|
+
telem_dat['power'] = np.nan
|
|
1920
|
+
|
|
1921
|
+
# compute epoch as int64 seconds and other derived fields
|
|
1922
|
+
telem_dat['epoch'] = (pd.to_datetime(telem_dat['time_stamp']).astype('int64') // 10**9).astype('int64')
|
|
1912
1923
|
telem_dat['channels'] = np.repeat(channels, len(telem_dat))
|
|
1913
1924
|
telem_dat['scan_time'] = np.repeat(scan_time, len(telem_dat))
|
|
1914
1925
|
telem_dat['rec_type'] = np.repeat(rec_type, len(telem_dat))
|
|
1915
1926
|
|
|
1916
1927
|
# compute noise ratio if study_tags provided
|
|
1917
|
-
try:
|
|
1918
|
-
telem_dat['noise_ratio'] = predictors.noise_ratio(
|
|
1919
|
-
5.0,
|
|
1920
|
-
telem_dat.freq_code.values,
|
|
1921
|
-
telem_dat.epoch.values,
|
|
1922
|
-
study_tags
|
|
1923
|
-
)
|
|
1924
|
-
except (ValueError, TypeError, KeyError, IndexError) as e:
|
|
1925
|
-
raise ValueError(f"Failed to compute noise_ratio for PIT data: {e}") from e
|
|
1926
|
-
|
|
1927
|
-
# ensure dtypes
|
|
1928
|
-
telem_dat = telem_dat.astype({'time_stamp': 'datetime64[ns]',
|
|
1929
|
-
'epoch': 'int64',
|
|
1930
|
-
'freq_code': 'object',
|
|
1931
|
-
'power': 'float32',
|
|
1932
|
-
'rec_id': 'object',
|
|
1933
|
-
'rec_type': 'object',
|
|
1934
|
-
'scan_time': 'float32',
|
|
1935
|
-
'channels': 'int32',
|
|
1936
|
-
'noise_ratio': 'float32'})
|
|
1928
|
+
try:
|
|
1929
|
+
telem_dat['noise_ratio'] = predictors.noise_ratio(
|
|
1930
|
+
5.0,
|
|
1931
|
+
telem_dat.freq_code.values,
|
|
1932
|
+
telem_dat.epoch.values,
|
|
1933
|
+
study_tags
|
|
1934
|
+
)
|
|
1935
|
+
except (ValueError, TypeError, KeyError, IndexError) as e:
|
|
1936
|
+
raise ValueError(f"Failed to compute noise_ratio for PIT data: {e}") from e
|
|
1937
|
+
|
|
1938
|
+
# ensure dtypes
|
|
1939
|
+
telem_dat = telem_dat.astype({'time_stamp': 'datetime64[ns]',
|
|
1940
|
+
'epoch': 'int64',
|
|
1941
|
+
'freq_code': 'object',
|
|
1942
|
+
'power': 'float32',
|
|
1943
|
+
'rec_id': 'object',
|
|
1944
|
+
'rec_type': 'object',
|
|
1945
|
+
'scan_time': 'float32',
|
|
1946
|
+
'channels': 'int32',
|
|
1947
|
+
'noise_ratio': 'float32'})
|
|
1937
1948
|
|
|
1938
1949
|
# reorder columns to match expected schema
|
|
1939
1950
|
cols = ['time_stamp', 'epoch', 'freq_code', 'power', 'noise_ratio', 'scan_time', 'channels', 'rec_id', 'rec_type']
|
|
1940
1951
|
cols_existing = [c for c in cols if c in telem_dat.columns]
|
|
1941
1952
|
|
|
1942
|
-
_append_raw_data(db_dir, telem_dat[cols_existing], data_columns=True)
|
|
1943
|
-
with pd.HDFStore(db_dir, mode='a') as store:
|
|
1944
|
-
print('Store keys after append:', store.keys())
|
|
1953
|
+
_append_raw_data(db_dir, telem_dat[cols_existing], data_columns=True)
|
|
1954
|
+
with pd.HDFStore(db_dir, mode='a') as store:
|
|
1955
|
+
print('Store keys after append:', store.keys())
|
|
1945
1956
|
|
|
1946
1957
|
|
|
1947
1958
|
def PIT_Multiple(
|
|
@@ -2019,29 +2030,29 @@ def PIT_Multiple(
|
|
|
2019
2030
|
"LocationDetail", "Type", "Recapture", "Sex", "GeneticSampleID", "Comments"
|
|
2020
2031
|
]
|
|
2021
2032
|
|
|
2022
|
-
# Read the CSV into a DataFrame, skipping rows if needed
|
|
2023
|
-
telem_dat = pd.read_csv(file_name, names=col_names, header=0, skiprows=skiprows, dtype=str)
|
|
2024
|
-
|
|
2025
|
-
mode_str = "multi-antenna"
|
|
2026
|
-
if ant_to_rec_dict is None:
|
|
2027
|
-
raise ValueError("ant_to_rec_dict is required for PIT_Multiple")
|
|
2028
|
-
|
|
2029
|
-
# Convert "TimeStamp" to datetime with explicit format
|
|
2030
|
-
telem_dat["time_stamp"] = pd.to_datetime(telem_dat["TimeStamp"], format="%m/%d/%Y %H:%M", errors="coerce")
|
|
2031
|
-
|
|
2032
|
-
# Ensure "Tag1Dec" and "Tag1Hex" are treated as strings (avoid scientific notation issues)
|
|
2033
|
-
telem_dat["Tag1Dec"] = telem_dat["Tag1Dec"].astype(str)
|
|
2034
|
-
telem_dat["Tag1Hex"] = telem_dat["Tag1Hex"].astype(str)
|
|
2035
|
-
|
|
2036
|
-
telem_dat["freq_code"] = telem_dat["Tag1Hex"].astype(str).str.strip()
|
|
2037
|
-
antenna_raw = telem_dat["Antennae"].astype(str).str.strip()
|
|
2038
|
-
antenna_num = pd.to_numeric(antenna_raw.str.extract(r"(\d+)")[0], errors="coerce")
|
|
2039
|
-
rec_id = antenna_num.map(ant_to_rec_dict)
|
|
2040
|
-
if rec_id.isna().any():
|
|
2041
|
-
rec_id = rec_id.fillna(antenna_raw.map(ant_to_rec_dict))
|
|
2042
|
-
telem_dat["rec_id"] = rec_id
|
|
2043
|
-
telem_dat = telem_dat.dropna(subset=["rec_id"])
|
|
2044
|
-
|
|
2033
|
+
# Read the CSV into a DataFrame, skipping rows if needed
|
|
2034
|
+
telem_dat = pd.read_csv(file_name, names=col_names, header=0, skiprows=skiprows, dtype=str)
|
|
2035
|
+
|
|
2036
|
+
mode_str = "multi-antenna"
|
|
2037
|
+
if ant_to_rec_dict is None:
|
|
2038
|
+
raise ValueError("ant_to_rec_dict is required for PIT_Multiple")
|
|
2039
|
+
|
|
2040
|
+
# Convert "TimeStamp" to datetime with explicit format
|
|
2041
|
+
telem_dat["time_stamp"] = pd.to_datetime(telem_dat["TimeStamp"], format="%m/%d/%Y %H:%M", errors="coerce")
|
|
2042
|
+
|
|
2043
|
+
# Ensure "Tag1Dec" and "Tag1Hex" are treated as strings (avoid scientific notation issues)
|
|
2044
|
+
telem_dat["Tag1Dec"] = telem_dat["Tag1Dec"].astype(str)
|
|
2045
|
+
telem_dat["Tag1Hex"] = telem_dat["Tag1Hex"].astype(str)
|
|
2046
|
+
|
|
2047
|
+
telem_dat["freq_code"] = telem_dat["Tag1Hex"].astype(str).str.strip()
|
|
2048
|
+
antenna_raw = telem_dat["Antennae"].astype(str).str.strip()
|
|
2049
|
+
antenna_num = pd.to_numeric(antenna_raw.str.extract(r"(\d+)")[0], errors="coerce")
|
|
2050
|
+
rec_id = antenna_num.map(ant_to_rec_dict)
|
|
2051
|
+
if rec_id.isna().any():
|
|
2052
|
+
rec_id = rec_id.fillna(antenna_raw.map(ant_to_rec_dict))
|
|
2053
|
+
telem_dat["rec_id"] = rec_id
|
|
2054
|
+
telem_dat = telem_dat.dropna(subset=["rec_id"])
|
|
2055
|
+
|
|
2045
2056
|
# if after_cleanup == 0:
|
|
2046
2057
|
# raise ValueError(f"No valid records found in {file_name}")
|
|
2047
2058
|
|
|
@@ -2101,4 +2112,4 @@ def PIT_Multiple(
|
|
|
2101
2112
|
|
|
2102
2113
|
|
|
2103
2114
|
|
|
2104
|
-
|
|
2115
|
+
|
|
@@ -95,21 +95,21 @@ import pymast.predictors as predictors
|
|
|
95
95
|
import matplotlib.pyplot as plt
|
|
96
96
|
from matplotlib import rcParams
|
|
97
97
|
from scipy import interpolate
|
|
98
|
-
try:
|
|
99
|
-
from tqdm import tqdm
|
|
100
|
-
except ImportError:
|
|
101
|
-
def tqdm(iterable, **kwargs):
|
|
102
|
-
return iterable
|
|
98
|
+
try:
|
|
99
|
+
from tqdm import tqdm
|
|
100
|
+
except ImportError:
|
|
101
|
+
def tqdm(iterable, **kwargs):
|
|
102
|
+
return iterable
|
|
103
103
|
import shutil
|
|
104
104
|
import warnings
|
|
105
105
|
import dask.dataframe as dd
|
|
106
106
|
import dask.array as da
|
|
107
|
-
try:
|
|
108
|
-
from dask_ml.cluster import KMeans
|
|
109
|
-
_KMEANS_IMPL = 'dask'
|
|
110
|
-
except ImportError:
|
|
111
|
-
from sklearn.cluster import KMeans
|
|
112
|
-
_KMEANS_IMPL = 'sklearn'
|
|
107
|
+
try:
|
|
108
|
+
from dask_ml.cluster import KMeans
|
|
109
|
+
_KMEANS_IMPL = 'dask'
|
|
110
|
+
except ImportError:
|
|
111
|
+
from sklearn.cluster import KMeans
|
|
112
|
+
_KMEANS_IMPL = 'sklearn'
|
|
113
113
|
|
|
114
114
|
# Initialize logger
|
|
115
115
|
logger = logging.getLogger('pymast.radio_project')
|
|
@@ -415,12 +415,12 @@ class radio_project():
|
|
|
415
415
|
if self.non_interactive:
|
|
416
416
|
logger.debug(f"Non-interactive mode: auto-answering '{prompt_text}' with '{default}'")
|
|
417
417
|
return default
|
|
418
|
-
try:
|
|
419
|
-
return input(prompt_text)
|
|
420
|
-
except (EOFError, OSError) as exc:
|
|
421
|
-
raise RuntimeError(
|
|
422
|
-
"Input prompt failed. Set project.non_interactive = True to use defaults."
|
|
423
|
-
) from exc
|
|
418
|
+
try:
|
|
419
|
+
return input(prompt_text)
|
|
420
|
+
except (EOFError, OSError) as exc:
|
|
421
|
+
raise RuntimeError(
|
|
422
|
+
"Input prompt failed. Set project.non_interactive = True to use defaults."
|
|
423
|
+
) from exc
|
|
424
424
|
|
|
425
425
|
def telem_data_import(self,
|
|
426
426
|
rec_id,
|
|
@@ -496,9 +496,19 @@ class radio_project():
|
|
|
496
496
|
|
|
497
497
|
logger.info(f" Found {len(tFiles)} file(s) to import")
|
|
498
498
|
|
|
499
|
+
# Track detections per file for statistics
|
|
500
|
+
detections_per_file = []
|
|
501
|
+
|
|
499
502
|
# for every file call the correct text parser and import
|
|
500
503
|
for i, f in enumerate(tqdm(tFiles, desc=f"Importing {rec_id}", unit="file"), 1):
|
|
501
504
|
logger.debug(f" Processing file {i}/{len(tFiles)}: {f}")
|
|
505
|
+
|
|
506
|
+
# Count detections before import
|
|
507
|
+
try:
|
|
508
|
+
pre_count = len(pd.read_hdf(self.db, key='raw_data', where=f'rec_id = "{rec_id}"'))
|
|
509
|
+
except (KeyError, FileNotFoundError):
|
|
510
|
+
pre_count = 0
|
|
511
|
+
|
|
502
512
|
# get the complete file directory
|
|
503
513
|
f_dir = os.path.join(file_dir,f)
|
|
504
514
|
|
|
@@ -533,8 +543,91 @@ class radio_project():
|
|
|
533
543
|
else:
|
|
534
544
|
logger.error(f"No import routine for receiver type: {rec_type}")
|
|
535
545
|
raise ValueError(f"No import routine available for receiver type: {rec_type}")
|
|
546
|
+
|
|
547
|
+
# Count detections after import
|
|
548
|
+
try:
|
|
549
|
+
post_count = len(pd.read_hdf(self.db, key='raw_data', where=f'rec_id = "{rec_id}"'))
|
|
550
|
+
detections_this_file = post_count - pre_count
|
|
551
|
+
detections_per_file.append(detections_this_file)
|
|
552
|
+
except (KeyError, FileNotFoundError):
|
|
553
|
+
detections_per_file.append(0)
|
|
536
554
|
|
|
537
555
|
logger.info(f"✓ Import complete for receiver {rec_id}: {len(tFiles)} file(s) processed")
|
|
556
|
+
|
|
557
|
+
# Calculate and display import statistics
|
|
558
|
+
try:
|
|
559
|
+
raw_data = pd.read_hdf(self.db, key='raw_data', where=f'rec_id = "{rec_id}"')
|
|
560
|
+
|
|
561
|
+
# Total Detection Count
|
|
562
|
+
total_detections = len(raw_data)
|
|
563
|
+
logger.info(f"\n{'='*60}")
|
|
564
|
+
logger.info(f"IMPORT STATISTICS FOR {rec_id}")
|
|
565
|
+
logger.info(f"{'='*60}")
|
|
566
|
+
logger.info(f"Total Detection Count: {total_detections:,}")
|
|
567
|
+
|
|
568
|
+
if total_detections > 0:
|
|
569
|
+
# Detection count summary statistics
|
|
570
|
+
logger.info(f"\nDetection Summary Statistics:")
|
|
571
|
+
logger.info(f" Mean detections per file: {total_detections / len(tFiles):.1f}")
|
|
572
|
+
logger.info(f" Files processed: {len(tFiles)}")
|
|
573
|
+
|
|
574
|
+
# 5-number summary for detections per file
|
|
575
|
+
if len(detections_per_file) > 0:
|
|
576
|
+
det_array = np.array(detections_per_file)
|
|
577
|
+
logger.info(f"\nDetections Per File (5-number summary):")
|
|
578
|
+
logger.info(f" Min: {np.min(det_array):,.0f}")
|
|
579
|
+
logger.info(f" Q1: {np.percentile(det_array, 25):,.0f}")
|
|
580
|
+
logger.info(f" Median: {np.median(det_array):,.0f}")
|
|
581
|
+
logger.info(f" Q3: {np.percentile(det_array, 75):,.0f}")
|
|
582
|
+
logger.info(f" Max: {np.max(det_array):,.0f}")
|
|
583
|
+
|
|
584
|
+
# Unique Tag Count
|
|
585
|
+
unique_tags = raw_data['freq_code'].nunique()
|
|
586
|
+
logger.info(f"\nUnique Tag Count: {unique_tags}")
|
|
587
|
+
|
|
588
|
+
# Duplicate Tag Count and IDs
|
|
589
|
+
# Check for detections at the exact same timestamp (true duplicates)
|
|
590
|
+
if 'time_stamp' in raw_data.columns:
|
|
591
|
+
dup_mask = raw_data.duplicated(subset=['freq_code', 'time_stamp'], keep=False)
|
|
592
|
+
duplicate_count = dup_mask.sum()
|
|
593
|
+
|
|
594
|
+
if duplicate_count > 0:
|
|
595
|
+
duplicate_tags = raw_data.loc[dup_mask, 'freq_code'].unique()
|
|
596
|
+
logger.info(f"\nDuplicate Detection Count (same timestamp): {duplicate_count:,}")
|
|
597
|
+
logger.info(f"Duplicate Tag IDs ({len(duplicate_tags)} tags):")
|
|
598
|
+
for tag in sorted(duplicate_tags)[:10]: # Show first 10
|
|
599
|
+
tag_dups = dup_mask & (raw_data['freq_code'] == tag)
|
|
600
|
+
logger.info(f" {tag}: {tag_dups.sum()} duplicate(s)")
|
|
601
|
+
if len(duplicate_tags) > 10:
|
|
602
|
+
logger.info(f" ... and {len(duplicate_tags) - 10} more")
|
|
603
|
+
else:
|
|
604
|
+
logger.info(f"\nDuplicate Detection Count: 0 (no exact timestamp duplicates)")
|
|
605
|
+
|
|
606
|
+
# Time Coverage
|
|
607
|
+
if 'time_stamp' in raw_data.columns:
|
|
608
|
+
raw_data['time_stamp'] = pd.to_datetime(raw_data['time_stamp'])
|
|
609
|
+
start_time = raw_data['time_stamp'].min()
|
|
610
|
+
end_time = raw_data['time_stamp'].max()
|
|
611
|
+
duration = end_time - start_time
|
|
612
|
+
|
|
613
|
+
logger.info(f"\nTime Coverage:")
|
|
614
|
+
logger.info(f" Start: {start_time}")
|
|
615
|
+
logger.info(f" End: {end_time}")
|
|
616
|
+
logger.info(f" Duration: {duration.days} days, {duration.seconds // 3600} hours")
|
|
617
|
+
|
|
618
|
+
# Detection rate
|
|
619
|
+
if duration.total_seconds() > 0:
|
|
620
|
+
det_per_hour = total_detections / (duration.total_seconds() / 3600)
|
|
621
|
+
logger.info(f" Detection rate: {det_per_hour:.1f} detections/hour")
|
|
622
|
+
|
|
623
|
+
logger.info(f"{'='*60}\n")
|
|
624
|
+
else:
|
|
625
|
+
logger.warning(f"No detections found for receiver {rec_id}")
|
|
626
|
+
|
|
627
|
+
except KeyError:
|
|
628
|
+
logger.warning(f"Could not retrieve statistics - raw_data table not found in database")
|
|
629
|
+
except Exception as e:
|
|
630
|
+
logger.warning(f"Error calculating import statistics: {e}")
|
|
538
631
|
|
|
539
632
|
def get_fish(self, rec_id, train = True, reclass_iter = None):
|
|
540
633
|
logger.info(f"Getting fish for receiver {rec_id}")
|
|
@@ -1576,16 +1669,16 @@ class radio_project():
|
|
|
1576
1669
|
node_path = node._v_pathname
|
|
1577
1670
|
print(f" Copying {node_path}...")
|
|
1578
1671
|
|
|
1579
|
-
try:
|
|
1580
|
-
# Use recursive=True to copy entire subtree (Groups, Tables, Arrays, etc.)
|
|
1581
|
-
h5in.copy_node(
|
|
1582
|
-
where=node_path,
|
|
1583
|
-
newparent=h5out.root,
|
|
1584
|
-
recursive=True,
|
|
1585
|
-
filters=filters
|
|
1586
|
-
)
|
|
1587
|
-
except (tables.NodeError, tables.HDF5ExtError, OSError, ValueError) as e:
|
|
1588
|
-
raise RuntimeError(f"Failed to copy HDF5 node {node_path}: {e}") from e
|
|
1672
|
+
try:
|
|
1673
|
+
# Use recursive=True to copy entire subtree (Groups, Tables, Arrays, etc.)
|
|
1674
|
+
h5in.copy_node(
|
|
1675
|
+
where=node_path,
|
|
1676
|
+
newparent=h5out.root,
|
|
1677
|
+
recursive=True,
|
|
1678
|
+
filters=filters
|
|
1679
|
+
)
|
|
1680
|
+
except (tables.NodeError, tables.HDF5ExtError, OSError, ValueError) as e:
|
|
1681
|
+
raise RuntimeError(f"Failed to copy HDF5 node {node_path}: {e}") from e
|
|
1589
1682
|
|
|
1590
1683
|
# Get new size
|
|
1591
1684
|
new_size = os.path.getsize(output_path)
|
|
@@ -1603,26 +1696,29 @@ class radio_project():
|
|
|
1603
1696
|
def make_recaptures_table(self, export=True, pit_study=False):
|
|
1604
1697
|
'''Creates a recaptures key in the HDF5 file, iterating over receivers to manage memory.'''
|
|
1605
1698
|
logger.info("Creating recaptures table")
|
|
1699
|
+
logger.info(f" PIT study mode: {pit_study}")
|
|
1606
1700
|
logger.info(f" Processing {len(self.receivers)} receiver(s)")
|
|
1607
1701
|
# prepare a heartbeat log so long runs can be monitored (one-line per receiver)
|
|
1608
1702
|
heartbeat_dir = os.path.join(self.project_dir, 'build')
|
|
1609
|
-
try:
|
|
1610
|
-
os.makedirs(heartbeat_dir, exist_ok=True)
|
|
1611
|
-
except OSError as e:
|
|
1612
|
-
raise RuntimeError(
|
|
1613
|
-
f"Failed to create heartbeat directory '{heartbeat_dir}': {e}"
|
|
1614
|
-
) from e
|
|
1703
|
+
try:
|
|
1704
|
+
os.makedirs(heartbeat_dir, exist_ok=True)
|
|
1705
|
+
except OSError as e:
|
|
1706
|
+
raise RuntimeError(
|
|
1707
|
+
f"Failed to create heartbeat directory '{heartbeat_dir}': {e}"
|
|
1708
|
+
) from e
|
|
1615
1709
|
heartbeat_path = os.path.join(heartbeat_dir, 'recaptures_heartbeat.log')
|
|
1616
1710
|
print(f"Starting recaptures: {len(self.receivers)} receivers. Heartbeat -> {heartbeat_path}")
|
|
1617
|
-
try:
|
|
1618
|
-
with open(heartbeat_path, 'a') as _hb:
|
|
1619
|
-
_hb.write(f"START {datetime.datetime.now().isoformat()} receivers={len(self.receivers)}\n")
|
|
1620
|
-
except OSError as e:
|
|
1621
|
-
raise RuntimeError(
|
|
1622
|
-
f"Failed to write heartbeat start to '{heartbeat_path}': {e}"
|
|
1623
|
-
) from e
|
|
1624
|
-
|
|
1625
|
-
if pit_study
|
|
1711
|
+
try:
|
|
1712
|
+
with open(heartbeat_path, 'a') as _hb:
|
|
1713
|
+
_hb.write(f"START {datetime.datetime.now().isoformat()} receivers={len(self.receivers)}\n")
|
|
1714
|
+
except OSError as e:
|
|
1715
|
+
raise RuntimeError(
|
|
1716
|
+
f"Failed to write heartbeat start to '{heartbeat_path}': {e}"
|
|
1717
|
+
) from e
|
|
1718
|
+
|
|
1719
|
+
if not pit_study:
|
|
1720
|
+
# RADIO STUDY PATH
|
|
1721
|
+
logger.info(" Using RADIO study processing path")
|
|
1626
1722
|
# Convert release dates to datetime if not already done
|
|
1627
1723
|
self.tags['rel_date'] = pd.to_datetime(self.tags['rel_date'])
|
|
1628
1724
|
tags_copy = self.tags.copy()
|
|
@@ -1787,15 +1883,17 @@ class radio_project():
|
|
|
1787
1883
|
logger.info(f" ✓ Recaps for {rec} compiled and written to HDF5")
|
|
1788
1884
|
print(f"[recaptures] ✓ {rec} written to database", flush=True)
|
|
1789
1885
|
# append heartbeat line
|
|
1790
|
-
try:
|
|
1791
|
-
with open(heartbeat_path, 'a') as _hb:
|
|
1792
|
-
_hb.write(f"{datetime.datetime.now().isoformat()} rec={rec} rows={len(rec_dat)}\n")
|
|
1793
|
-
except OSError as e:
|
|
1794
|
-
raise RuntimeError(
|
|
1795
|
-
f"Failed to write heartbeat for receiver {rec} to '{heartbeat_path}': {e}"
|
|
1796
|
-
) from e
|
|
1886
|
+
try:
|
|
1887
|
+
with open(heartbeat_path, 'a') as _hb:
|
|
1888
|
+
_hb.write(f"{datetime.datetime.now().isoformat()} rec={rec} rows={len(rec_dat)}\n")
|
|
1889
|
+
except OSError as e:
|
|
1890
|
+
raise RuntimeError(
|
|
1891
|
+
f"Failed to write heartbeat for receiver {rec} to '{heartbeat_path}': {e}"
|
|
1892
|
+
) from e
|
|
1797
1893
|
|
|
1798
1894
|
else:
|
|
1895
|
+
# PIT STUDY PATH
|
|
1896
|
+
logger.info(" Using PIT study processing path")
|
|
1799
1897
|
# Loop over each receiver in self.receivers
|
|
1800
1898
|
for rec in tqdm(self.receivers.index, desc="Processing PIT receivers", unit="receiver"):
|
|
1801
1899
|
logger.info(f" Processing {rec} (PIT study)...")
|
|
@@ -1917,13 +2015,13 @@ class radio_project():
|
|
|
1917
2015
|
|
|
1918
2016
|
logger.info(f" ✓ PIT recaps for {rec} compiled and written to HDF5")
|
|
1919
2017
|
print(f"[recaptures] ✓ {rec} PIT data written to database", flush=True)
|
|
1920
|
-
try:
|
|
1921
|
-
with open(heartbeat_path, 'a') as _hb:
|
|
1922
|
-
_hb.write(f"{datetime.datetime.now().isoformat()} pit_rec={rec} rows={len(pit_data)}\n")
|
|
1923
|
-
except OSError as e:
|
|
1924
|
-
raise RuntimeError(
|
|
1925
|
-
f"Failed to write PIT heartbeat for receiver {rec} to '{heartbeat_path}': {e}"
|
|
1926
|
-
) from e
|
|
2018
|
+
try:
|
|
2019
|
+
with open(heartbeat_path, 'a') as _hb:
|
|
2020
|
+
_hb.write(f"{datetime.datetime.now().isoformat()} pit_rec={rec} rows={len(pit_data)}\n")
|
|
2021
|
+
except OSError as e:
|
|
2022
|
+
raise RuntimeError(
|
|
2023
|
+
f"Failed to write PIT heartbeat for receiver {rec} to '{heartbeat_path}': {e}"
|
|
2024
|
+
) from e
|
|
1927
2025
|
|
|
1928
2026
|
|
|
1929
2027
|
if export:
|
|
@@ -1933,16 +2031,16 @@ class radio_project():
|
|
|
1933
2031
|
rec_data.to_csv(os.path.join(self.output_dir,'recaptures.csv'), index=False)
|
|
1934
2032
|
logger.info(f" ✓ Export complete: {os.path.join(self.output_dir,'recaptures.csv')}")
|
|
1935
2033
|
print(f"[recaptures] ✓ Export complete: {os.path.join(self.output_dir,'recaptures.csv')}", flush=True)
|
|
1936
|
-
try:
|
|
1937
|
-
with open(heartbeat_path, 'a') as _hb:
|
|
1938
|
-
_hb.write(
|
|
1939
|
-
f"DONE {datetime.datetime.now().isoformat()} export="
|
|
1940
|
-
f"{os.path.join(self.output_dir, 'recaptures.csv')}\n"
|
|
1941
|
-
)
|
|
1942
|
-
except OSError as e:
|
|
1943
|
-
raise RuntimeError(
|
|
1944
|
-
f"Failed to write heartbeat completion to '{heartbeat_path}': {e}"
|
|
1945
|
-
) from e
|
|
2034
|
+
try:
|
|
2035
|
+
with open(heartbeat_path, 'a') as _hb:
|
|
2036
|
+
_hb.write(
|
|
2037
|
+
f"DONE {datetime.datetime.now().isoformat()} export="
|
|
2038
|
+
f"{os.path.join(self.output_dir, 'recaptures.csv')}\n"
|
|
2039
|
+
)
|
|
2040
|
+
except OSError as e:
|
|
2041
|
+
raise RuntimeError(
|
|
2042
|
+
f"Failed to write heartbeat completion to '{heartbeat_path}': {e}"
|
|
2043
|
+
) from e
|
|
1946
2044
|
|
|
1947
2045
|
|
|
1948
2046
|
def undo_recaptures(self):
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pymast
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.2
|
|
4
4
|
Summary: Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data
|
|
5
5
|
Author: Theodore Castro-Santos
|
|
6
6
|
Author-email: "Kevin P. Nebiolo" <kevin.nebiolo@kleinschmidtgroup.com>
|
|
@@ -1,23 +1,23 @@
|
|
|
1
|
-
[build-system]
|
|
2
|
-
requires = ["setuptools>=61", "wheel"]
|
|
3
|
-
build-backend = "setuptools.build_meta"
|
|
1
|
+
[build-system]
|
|
2
|
+
requires = ["setuptools>=61", "wheel"]
|
|
3
|
+
build-backend = "setuptools.build_meta"
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pymast"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.2"
|
|
8
8
|
description = "Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [
|
|
11
11
|
{name = "Kevin P. Nebiolo", email = "kevin.nebiolo@kleinschmidtgroup.com"},
|
|
12
12
|
{name = "Theodore Castro-Santos"}
|
|
13
13
|
]
|
|
14
|
-
license = "MIT"
|
|
15
|
-
classifiers = [
|
|
16
|
-
"Development Status :: 4 - Beta",
|
|
17
|
-
"Intended Audience :: Science/Research",
|
|
18
|
-
"Programming Language :: Python :: 3",
|
|
19
|
-
"Programming Language :: Python :: 3.8",
|
|
20
|
-
"Programming Language :: Python :: 3.9",
|
|
14
|
+
license = "MIT"
|
|
15
|
+
classifiers = [
|
|
16
|
+
"Development Status :: 4 - Beta",
|
|
17
|
+
"Intended Audience :: Science/Research",
|
|
18
|
+
"Programming Language :: Python :: 3",
|
|
19
|
+
"Programming Language :: Python :: 3.8",
|
|
20
|
+
"Programming Language :: Python :: 3.9",
|
|
21
21
|
"Programming Language :: Python :: 3.10",
|
|
22
22
|
"Programming Language :: Python :: 3.11",
|
|
23
23
|
"Topic :: Scientific/Engineering :: Bio-Informatics",
|
|
@@ -30,14 +30,14 @@ dependencies = [
|
|
|
30
30
|
"matplotlib>=3.4.0",
|
|
31
31
|
"statsmodels>=0.12.0",
|
|
32
32
|
"networkx>=2.5",
|
|
33
|
-
"scipy>=1.7.1",
|
|
34
|
-
"scikit-learn>=0.24.0",
|
|
35
|
-
"h5py>=3.0.0",
|
|
36
|
-
"dask>=2021.3.0",
|
|
37
|
-
"dask-ml>=1.9.0",
|
|
38
|
-
"distributed>=2021.3.0",
|
|
39
|
-
"numba>=0.53.0",
|
|
40
|
-
"tables>=3.8.0",
|
|
33
|
+
"scipy>=1.7.1",
|
|
34
|
+
"scikit-learn>=0.24.0",
|
|
35
|
+
"h5py>=3.0.0",
|
|
36
|
+
"dask>=2021.3.0",
|
|
37
|
+
"dask-ml>=1.9.0",
|
|
38
|
+
"distributed>=2021.3.0",
|
|
39
|
+
"numba>=0.53.0",
|
|
40
|
+
"tables>=3.8.0",
|
|
41
41
|
"intervaltree>=3.1.0",
|
|
42
42
|
]
|
|
43
43
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|