pymast 1.0.0__tar.gz → 1.0.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {pymast-1.0.0 → pymast-1.0.1}/PKG-INFO +1 -1
- {pymast-1.0.0 → pymast-1.0.1}/pymast/parsers.py +202 -191
- {pymast-1.0.0 → pymast-1.0.1}/pymast.egg-info/PKG-INFO +1 -1
- {pymast-1.0.0 → pymast-1.0.1}/pyproject.toml +1 -1
- {pymast-1.0.0 → pymast-1.0.1}/LICENSE.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/README.md +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast/__init__.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast/fish_history.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast/formatter.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast/logger.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast/naive_bayes.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast/overlap_removal.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast/predictors.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast/radio_project.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast/validation.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast.egg-info/SOURCES.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast.egg-info/dependency_links.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast.egg-info/requires.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/pymast.egg-info/top_level.txt +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/setup.cfg +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/setup.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_basic.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_csv_pit.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_formatter_tte.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_initial_state_release.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_overlap_hdf5_integration.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_overlap_loading.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_overlap_small.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_overlap_unit.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_parsers_basic.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_pit_multiple_parser.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_pit_parser.py +0 -0
- {pymast-1.0.0 → pymast-1.0.1}/tests/test_unified_pit.py +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pymast
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data
|
|
5
5
|
Author: Theodore Castro-Santos
|
|
6
6
|
Author-email: "Kevin P. Nebiolo" <kevin.nebiolo@kleinschmidtgroup.com>
|
|
@@ -80,32 +80,32 @@ predictors.noise_ratio : Miscoded detection ratio calculation
|
|
|
80
80
|
import pandas as pd
|
|
81
81
|
import numpy as np
|
|
82
82
|
import datetime
|
|
83
|
-
import os
|
|
84
|
-
import pymast.predictors as predictors
|
|
85
|
-
import sys
|
|
86
|
-
|
|
87
|
-
def _append_raw_data(db_dir, telem_dat, data_columns=None):
|
|
88
|
-
with pd.HDFStore(db_dir, mode='a') as store:
|
|
89
|
-
append_kwargs = {
|
|
90
|
-
'key': 'raw_data',
|
|
91
|
-
'value': telem_dat,
|
|
92
|
-
'format': 'table',
|
|
93
|
-
'index': False,
|
|
94
|
-
'min_itemsize': {
|
|
95
|
-
'freq_code': 20,
|
|
96
|
-
'rec_type': 20,
|
|
97
|
-
'rec_id': 20,
|
|
98
|
-
},
|
|
99
|
-
'append': True,
|
|
100
|
-
'chunksize': 1000000,
|
|
101
|
-
}
|
|
102
|
-
if data_columns is not None:
|
|
103
|
-
append_kwargs['data_columns'] = data_columns
|
|
104
|
-
store.append(**append_kwargs)
|
|
105
|
-
|
|
106
|
-
def ares(file_name,
|
|
107
|
-
db_dir,
|
|
108
|
-
rec_id,
|
|
83
|
+
import os
|
|
84
|
+
import pymast.predictors as predictors
|
|
85
|
+
import sys
|
|
86
|
+
|
|
87
|
+
def _append_raw_data(db_dir, telem_dat, data_columns=None):
|
|
88
|
+
with pd.HDFStore(db_dir, mode='a') as store:
|
|
89
|
+
append_kwargs = {
|
|
90
|
+
'key': 'raw_data',
|
|
91
|
+
'value': telem_dat,
|
|
92
|
+
'format': 'table',
|
|
93
|
+
'index': False,
|
|
94
|
+
'min_itemsize': {
|
|
95
|
+
'freq_code': 20,
|
|
96
|
+
'rec_type': 20,
|
|
97
|
+
'rec_id': 20,
|
|
98
|
+
},
|
|
99
|
+
'append': True,
|
|
100
|
+
'chunksize': 1000000,
|
|
101
|
+
}
|
|
102
|
+
if data_columns is not None:
|
|
103
|
+
append_kwargs['data_columns'] = data_columns
|
|
104
|
+
store.append(**append_kwargs)
|
|
105
|
+
|
|
106
|
+
def ares(file_name,
|
|
107
|
+
db_dir,
|
|
108
|
+
rec_id,
|
|
109
109
|
study_tags,
|
|
110
110
|
scan_time = 1,
|
|
111
111
|
channels = 1,
|
|
@@ -229,26 +229,26 @@ def ares(file_name,
|
|
|
229
229
|
telem_dat.epoch.values,
|
|
230
230
|
study_tags)
|
|
231
231
|
|
|
232
|
-
telem_dat = telem_dat.astype({'power':'float32',
|
|
233
|
-
'freq_code':'object',
|
|
234
|
-
'time_stamp':'datetime64[ns]',
|
|
235
|
-
'scan_time':'float32',
|
|
236
|
-
'channels':'int32',
|
|
237
|
-
'rec_type':'object',
|
|
238
|
-
'epoch':'int64',
|
|
239
|
-
'noise_ratio':'float32',
|
|
240
|
-
'rec_id':'object'})
|
|
241
|
-
|
|
242
|
-
_append_raw_data(db_dir, telem_dat)
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
def orion_import(file_name,
|
|
246
|
-
db_dir,
|
|
247
|
-
rec_id,
|
|
248
|
-
study_tags,
|
|
249
|
-
scan_time = 1.,
|
|
250
|
-
channels = 1,
|
|
251
|
-
ant_to_rec_dict = None):
|
|
232
|
+
telem_dat = telem_dat.astype({'power':'float32',
|
|
233
|
+
'freq_code':'object',
|
|
234
|
+
'time_stamp':'datetime64[ns]',
|
|
235
|
+
'scan_time':'float32',
|
|
236
|
+
'channels':'int32',
|
|
237
|
+
'rec_type':'object',
|
|
238
|
+
'epoch':'int64',
|
|
239
|
+
'noise_ratio':'float32',
|
|
240
|
+
'rec_id':'object'})
|
|
241
|
+
|
|
242
|
+
_append_raw_data(db_dir, telem_dat)
|
|
243
|
+
|
|
244
|
+
|
|
245
|
+
def orion_import(file_name,
|
|
246
|
+
db_dir,
|
|
247
|
+
rec_id,
|
|
248
|
+
study_tags,
|
|
249
|
+
scan_time = 1.,
|
|
250
|
+
channels = 1,
|
|
251
|
+
ant_to_rec_dict = None):
|
|
252
252
|
"""
|
|
253
253
|
Import Sigma Eight Orion receiver data into MAST HDF5 database.
|
|
254
254
|
|
|
@@ -334,33 +334,33 @@ def orion_import(file_name,
|
|
|
334
334
|
telem_dat['Freq'] = telem_dat['Freq'].apply(lambda x: f"{x:.3f}")
|
|
335
335
|
|
|
336
336
|
|
|
337
|
-
def _write_orion_subset(df, receiver_id, epoch_dtype):
|
|
338
|
-
df = df.copy()
|
|
339
|
-
df['rec_id'] = np.repeat(receiver_id, len(df))
|
|
340
|
-
df.drop(['Ant'], axis = 1, inplace = True)
|
|
341
|
-
df = df.astype({'power':'float32',
|
|
342
|
-
'freq_code':'object',
|
|
343
|
-
'time_stamp':'datetime64[ns]',
|
|
344
|
-
'scan_time':'float32',
|
|
345
|
-
'channels':'int32',
|
|
346
|
-
'rec_type':'object',
|
|
347
|
-
'epoch': epoch_dtype,
|
|
348
|
-
'noise_ratio':'float32',
|
|
349
|
-
'rec_id':'object'})
|
|
350
|
-
|
|
351
|
-
df = df[['power',
|
|
352
|
-
'time_stamp',
|
|
353
|
-
'epoch',
|
|
354
|
-
'freq_code',
|
|
355
|
-
'noise_ratio',
|
|
356
|
-
'scan_time',
|
|
357
|
-
'channels',
|
|
358
|
-
'rec_id',
|
|
359
|
-
'rec_type']]
|
|
360
|
-
|
|
361
|
-
_append_raw_data(db_dir, df, data_columns=True)
|
|
362
|
-
|
|
363
|
-
if len(telem_dat) > 0:
|
|
337
|
+
def _write_orion_subset(df, receiver_id, epoch_dtype):
|
|
338
|
+
df = df.copy()
|
|
339
|
+
df['rec_id'] = np.repeat(receiver_id, len(df))
|
|
340
|
+
df.drop(['Ant'], axis = 1, inplace = True)
|
|
341
|
+
df = df.astype({'power':'float32',
|
|
342
|
+
'freq_code':'object',
|
|
343
|
+
'time_stamp':'datetime64[ns]',
|
|
344
|
+
'scan_time':'float32',
|
|
345
|
+
'channels':'int32',
|
|
346
|
+
'rec_type':'object',
|
|
347
|
+
'epoch': epoch_dtype,
|
|
348
|
+
'noise_ratio':'float32',
|
|
349
|
+
'rec_id':'object'})
|
|
350
|
+
|
|
351
|
+
df = df[['power',
|
|
352
|
+
'time_stamp',
|
|
353
|
+
'epoch',
|
|
354
|
+
'freq_code',
|
|
355
|
+
'noise_ratio',
|
|
356
|
+
'scan_time',
|
|
357
|
+
'channels',
|
|
358
|
+
'rec_id',
|
|
359
|
+
'rec_type']]
|
|
360
|
+
|
|
361
|
+
_append_raw_data(db_dir, df, data_columns=True)
|
|
362
|
+
|
|
363
|
+
if len(telem_dat) > 0:
|
|
364
364
|
# add file name to data
|
|
365
365
|
#['fileName'] = np.repeat(file_name,len(telem_dat)) #Note I'm going back here to the actual file name without the path. Is that OK? I prefer it, but it's a potential source of confusion
|
|
366
366
|
|
|
@@ -389,18 +389,18 @@ def orion_import(file_name,
|
|
|
389
389
|
telem_dat.epoch.values,
|
|
390
390
|
study_tags)
|
|
391
391
|
|
|
392
|
-
# if there is no antenna to receiver dictionary
|
|
393
|
-
if ant_to_rec_dict == None:
|
|
394
|
-
_write_orion_subset(telem_dat, rec_id, 'int64')
|
|
395
|
-
# if there is an antenna to receiver dictionary
|
|
396
|
-
else:
|
|
397
|
-
for i in ant_to_rec_dict.keys():
|
|
398
|
-
# get site from dictionary
|
|
399
|
-
site = ant_to_rec_dict[i]
|
|
400
|
-
|
|
401
|
-
# get telemetryt data associated with this site
|
|
402
|
-
telem_dat_sub = telem_dat[telem_dat.Ant == 1]
|
|
403
|
-
_write_orion_subset(telem_dat_sub, site, 'float32')
|
|
392
|
+
# if there is no antenna to receiver dictionary
|
|
393
|
+
if ant_to_rec_dict == None:
|
|
394
|
+
_write_orion_subset(telem_dat, rec_id, 'int64')
|
|
395
|
+
# if there is an antenna to receiver dictionary
|
|
396
|
+
else:
|
|
397
|
+
for i in ant_to_rec_dict.keys():
|
|
398
|
+
# get site from dictionary
|
|
399
|
+
site = ant_to_rec_dict[i]
|
|
400
|
+
|
|
401
|
+
# get telemetryt data associated with this site
|
|
402
|
+
telem_dat_sub = telem_dat[telem_dat.Ant == 1]
|
|
403
|
+
_write_orion_subset(telem_dat_sub, site, 'float32')
|
|
404
404
|
else:
|
|
405
405
|
raise ValueError("Invalid import parameters, no data returned")
|
|
406
406
|
sys.exit()
|
|
@@ -492,7 +492,7 @@ def vr2_import(file_name,db_dir,study_tags, rec_id):
|
|
|
492
492
|
'noise_ratio':'float32',
|
|
493
493
|
'rec_id':'object'})
|
|
494
494
|
|
|
495
|
-
_append_raw_data(db_dir, telem_dat)
|
|
495
|
+
_append_raw_data(db_dir, telem_dat)
|
|
496
496
|
|
|
497
497
|
def srx1200(file_name,
|
|
498
498
|
db_dir,
|
|
@@ -785,7 +785,7 @@ def srx1200(file_name,
|
|
|
785
785
|
'rec_id',
|
|
786
786
|
'rec_type']]
|
|
787
787
|
|
|
788
|
-
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
788
|
+
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
789
789
|
|
|
790
790
|
# if the data doesn't have a header
|
|
791
791
|
else:
|
|
@@ -857,7 +857,7 @@ def srx1200(file_name,
|
|
|
857
857
|
'rec_id',
|
|
858
858
|
'rec_type']]
|
|
859
859
|
|
|
860
|
-
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
860
|
+
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
861
861
|
|
|
862
862
|
def srx800(file_name,
|
|
863
863
|
db_dir,
|
|
@@ -1146,16 +1146,16 @@ def srx800(file_name,
|
|
|
1146
1146
|
telem_dat_sub['epoch'] = np.round((telem_dat_sub.time_stamp - pd.Timestamp("1970-01-01")) / pd.Timedelta('1s'),6)
|
|
1147
1147
|
|
|
1148
1148
|
# get setup number for every row
|
|
1149
|
-
try:
|
|
1150
|
-
telem_dat_sub['setup'] = get_setup(
|
|
1151
|
-
telem_dat_sub.epoch.values,
|
|
1152
|
-
setup_df.epoch.values
|
|
1153
|
-
)
|
|
1154
|
-
except (ValueError, TypeError, IndexError) as e:
|
|
1155
|
-
raise ValueError(
|
|
1156
|
-
f"Failed to compute setup mapping for antenna '{ant}' at site '{site}'. "
|
|
1157
|
-
"Check setup table epoch alignment and input data integrity."
|
|
1158
|
-
) from e
|
|
1149
|
+
try:
|
|
1150
|
+
telem_dat_sub['setup'] = get_setup(
|
|
1151
|
+
telem_dat_sub.epoch.values,
|
|
1152
|
+
setup_df.epoch.values
|
|
1153
|
+
)
|
|
1154
|
+
except (ValueError, TypeError, IndexError) as e:
|
|
1155
|
+
raise ValueError(
|
|
1156
|
+
f"Failed to compute setup mapping for antenna '{ant}' at site '{site}'. "
|
|
1157
|
+
"Check setup table epoch alignment and input data integrity."
|
|
1158
|
+
) from e
|
|
1159
1159
|
|
|
1160
1160
|
# get frequency from channel
|
|
1161
1161
|
telem_dat_sub['Frequency'] = get_frequency(telem_dat_sub.setup.values,
|
|
@@ -1488,7 +1488,7 @@ def srx600(file_name,
|
|
|
1488
1488
|
'noise_ratio':'float32',
|
|
1489
1489
|
'rec_id':'object'})
|
|
1490
1490
|
|
|
1491
|
-
_append_raw_data(db_dir, telem_dat_sub, data_columns=True)
|
|
1491
|
+
_append_raw_data(db_dir, telem_dat_sub, data_columns=True)
|
|
1492
1492
|
else:
|
|
1493
1493
|
telem_dat = pd.read_fwf(file_name,
|
|
1494
1494
|
colspecs = [(0,9),(9,19),(19,29),(29,36),(36,44),(44,52)],
|
|
@@ -1553,7 +1553,7 @@ def srx600(file_name,
|
|
|
1553
1553
|
'noise_ratio':'float32',
|
|
1554
1554
|
'rec_id':'object'})
|
|
1555
1555
|
|
|
1556
|
-
_append_raw_data(db_dir, telem_dat_sub)
|
|
1556
|
+
_append_raw_data(db_dir, telem_dat_sub)
|
|
1557
1557
|
|
|
1558
1558
|
|
|
1559
1559
|
|
|
@@ -1644,13 +1644,13 @@ def PIT(file_name,
|
|
|
1644
1644
|
# First, analyze the file to determine format
|
|
1645
1645
|
def analyze_file_format(file_name):
|
|
1646
1646
|
"""Dynamically determine PIT file format and header structure"""
|
|
1647
|
-
with open(file_name, 'r') as file:
|
|
1648
|
-
lines = []
|
|
1649
|
-
for _ in range(20): # Read first 20 lines to analyze format
|
|
1650
|
-
line = file.readline()
|
|
1651
|
-
if not line:
|
|
1652
|
-
break
|
|
1653
|
-
lines.append(line.rstrip('\n'))
|
|
1647
|
+
with open(file_name, 'r') as file:
|
|
1648
|
+
lines = []
|
|
1649
|
+
for _ in range(20): # Read first 20 lines to analyze format
|
|
1650
|
+
line = file.readline()
|
|
1651
|
+
if not line:
|
|
1652
|
+
break
|
|
1653
|
+
lines.append(line.rstrip('\n'))
|
|
1654
1654
|
|
|
1655
1655
|
# Check if CSV format (look for commas in sample lines)
|
|
1656
1656
|
csv_indicators = 0
|
|
@@ -1711,10 +1711,10 @@ def PIT(file_name,
|
|
|
1711
1711
|
telem_dat = pd.read_csv(file_name, dtype=str)
|
|
1712
1712
|
print(f"Auto-detected columns: {list(telem_dat.columns)}")
|
|
1713
1713
|
|
|
1714
|
-
except (pd.errors.ParserError, UnicodeDecodeError, ValueError) as e:
|
|
1715
|
-
raise ValueError(
|
|
1716
|
-
f"CSV auto-detection failed for PIT file '{file_name}': {e}"
|
|
1717
|
-
) from e
|
|
1714
|
+
except (pd.errors.ParserError, UnicodeDecodeError, ValueError) as e:
|
|
1715
|
+
raise ValueError(
|
|
1716
|
+
f"CSV auto-detection failed for PIT file '{file_name}': {e}"
|
|
1717
|
+
) from e
|
|
1718
1718
|
|
|
1719
1719
|
# Find timestamp column dynamically
|
|
1720
1720
|
timestamp_col = find_column_by_patterns(telem_dat, ['timestamp', 'time stamp', 'date', 'scan date', 'detected'])
|
|
@@ -1732,8 +1732,8 @@ def PIT(file_name,
|
|
|
1732
1732
|
if not telem_dat["time_stamp"].isna().all():
|
|
1733
1733
|
print(f"Successfully parsed timestamps using format: {fmt or 'auto-detect'}")
|
|
1734
1734
|
break
|
|
1735
|
-
except (ValueError, TypeError) as e:
|
|
1736
|
-
continue
|
|
1735
|
+
except (ValueError, TypeError) as e:
|
|
1736
|
+
continue
|
|
1737
1737
|
else:
|
|
1738
1738
|
raise ValueError("Could not find timestamp column")
|
|
1739
1739
|
|
|
@@ -1773,14 +1773,14 @@ def PIT(file_name,
|
|
|
1773
1773
|
# Fixed-Width Format Parsing (original logic)
|
|
1774
1774
|
|
|
1775
1775
|
# Read header information for format detection
|
|
1776
|
-
with open(file_name, 'r') as file:
|
|
1777
|
-
header_lines = []
|
|
1778
|
-
for _ in range(max(skiprows, 10)):
|
|
1779
|
-
line = file.readline()
|
|
1780
|
-
if not line:
|
|
1781
|
-
break
|
|
1782
|
-
header_lines.append(line.rstrip('\n'))
|
|
1783
|
-
header_text = " ".join(header_lines).lower()
|
|
1776
|
+
with open(file_name, 'r') as file:
|
|
1777
|
+
header_lines = []
|
|
1778
|
+
for _ in range(max(skiprows, 10)):
|
|
1779
|
+
line = file.readline()
|
|
1780
|
+
if not line:
|
|
1781
|
+
break
|
|
1782
|
+
header_lines.append(line.rstrip('\n'))
|
|
1783
|
+
header_text = " ".join(header_lines).lower()
|
|
1784
1784
|
|
|
1785
1785
|
# Define colspecs for different fixed-width formats
|
|
1786
1786
|
if 'latitude' in header_text or 'longitude' in header_text:
|
|
@@ -1840,11 +1840,19 @@ def PIT(file_name,
|
|
|
1840
1840
|
else:
|
|
1841
1841
|
# try to find an antenna column in the fixed-width frame
|
|
1842
1842
|
antenna_col = None
|
|
1843
|
-
for col in telem_dat.columns:
|
|
1844
|
-
col_lower = str(col).lower().strip()
|
|
1845
|
-
if col_lower in (
|
|
1846
|
-
|
|
1847
|
-
|
|
1843
|
+
for col in telem_dat.columns:
|
|
1844
|
+
col_lower = str(col).lower().strip()
|
|
1845
|
+
if col_lower in (
|
|
1846
|
+
'antenna id',
|
|
1847
|
+
'antenna',
|
|
1848
|
+
'ant',
|
|
1849
|
+
'antennae',
|
|
1850
|
+
'antennae id',
|
|
1851
|
+
'reader id',
|
|
1852
|
+
'readerid',
|
|
1853
|
+
):
|
|
1854
|
+
antenna_col = col
|
|
1855
|
+
break
|
|
1848
1856
|
|
|
1849
1857
|
if antenna_col is not None:
|
|
1850
1858
|
# extract numeric antenna identifier and map using provided dictionary
|
|
@@ -1854,12 +1862,12 @@ def PIT(file_name,
|
|
|
1854
1862
|
telem_dat['antenna_num'] = pd.to_numeric(telem_dat['antenna_num'], errors='coerce')
|
|
1855
1863
|
|
|
1856
1864
|
# Prepare mapping dict keys as strings and ints for robust lookup
|
|
1857
|
-
ant_map = {}
|
|
1858
|
-
for k, v in ant_to_rec_dict.items():
|
|
1859
|
-
key_str = str(k).strip()
|
|
1860
|
-
if key_str.isdigit():
|
|
1861
|
-
ant_map[int(key_str)] = v
|
|
1862
|
-
ant_map[key_str] = v
|
|
1865
|
+
ant_map = {}
|
|
1866
|
+
for k, v in ant_to_rec_dict.items():
|
|
1867
|
+
key_str = str(k).strip()
|
|
1868
|
+
if key_str.isdigit():
|
|
1869
|
+
ant_map[int(key_str)] = v
|
|
1870
|
+
ant_map[key_str] = v
|
|
1863
1871
|
|
|
1864
1872
|
# Map by numeric antenna if possible, else by raw string
|
|
1865
1873
|
telem_dat['rec_id'] = telem_dat['antenna_num'].map(ant_map)
|
|
@@ -1876,8 +1884,11 @@ def PIT(file_name,
|
|
|
1876
1884
|
|
|
1877
1885
|
# drop detections that do not map to a known receiver
|
|
1878
1886
|
telem_dat = telem_dat.dropna(subset=['rec_id'])
|
|
1879
|
-
else:
|
|
1880
|
-
raise ValueError(
|
|
1887
|
+
else:
|
|
1888
|
+
raise ValueError(
|
|
1889
|
+
'Multi-antenna fixed-width PIT file requires an antenna/reader column '
|
|
1890
|
+
'(e.g., "Antenna ID" or "Reader ID"), but none was found'
|
|
1891
|
+
)
|
|
1881
1892
|
|
|
1882
1893
|
# Data cleaning - remove invalid entries
|
|
1883
1894
|
print(f"\nCleaning data - original records: {len(telem_dat)}")
|
|
@@ -1899,49 +1910,49 @@ def PIT(file_name,
|
|
|
1899
1910
|
telem_dat = telem_dat[telem_dat['freq_code'].str.len() > 3]
|
|
1900
1911
|
telem_dat = telem_dat[~telem_dat['freq_code'].isna()]
|
|
1901
1912
|
|
|
1902
|
-
# Finalize fields and append to HDF5 /raw_data
|
|
1903
|
-
if len(telem_dat) == 0:
|
|
1904
|
-
print('No valid PIT rows after cleaning; nothing to append')
|
|
1905
|
-
return
|
|
1906
|
-
|
|
1907
|
-
if 'power' not in telem_dat.columns:
|
|
1908
|
-
telem_dat['power'] = np.nan
|
|
1909
|
-
|
|
1910
|
-
# compute epoch as int64 seconds and other derived fields
|
|
1911
|
-
telem_dat['epoch'] = (pd.to_datetime(telem_dat['time_stamp']).astype('int64') // 10**9).astype('int64')
|
|
1913
|
+
# Finalize fields and append to HDF5 /raw_data
|
|
1914
|
+
if len(telem_dat) == 0:
|
|
1915
|
+
print('No valid PIT rows after cleaning; nothing to append')
|
|
1916
|
+
return
|
|
1917
|
+
|
|
1918
|
+
if 'power' not in telem_dat.columns:
|
|
1919
|
+
telem_dat['power'] = np.nan
|
|
1920
|
+
|
|
1921
|
+
# compute epoch as int64 seconds and other derived fields
|
|
1922
|
+
telem_dat['epoch'] = (pd.to_datetime(telem_dat['time_stamp']).astype('int64') // 10**9).astype('int64')
|
|
1912
1923
|
telem_dat['channels'] = np.repeat(channels, len(telem_dat))
|
|
1913
1924
|
telem_dat['scan_time'] = np.repeat(scan_time, len(telem_dat))
|
|
1914
1925
|
telem_dat['rec_type'] = np.repeat(rec_type, len(telem_dat))
|
|
1915
1926
|
|
|
1916
1927
|
# compute noise ratio if study_tags provided
|
|
1917
|
-
try:
|
|
1918
|
-
telem_dat['noise_ratio'] = predictors.noise_ratio(
|
|
1919
|
-
5.0,
|
|
1920
|
-
telem_dat.freq_code.values,
|
|
1921
|
-
telem_dat.epoch.values,
|
|
1922
|
-
study_tags
|
|
1923
|
-
)
|
|
1924
|
-
except (ValueError, TypeError, KeyError, IndexError) as e:
|
|
1925
|
-
raise ValueError(f"Failed to compute noise_ratio for PIT data: {e}") from e
|
|
1926
|
-
|
|
1927
|
-
# ensure dtypes
|
|
1928
|
-
telem_dat = telem_dat.astype({'time_stamp': 'datetime64[ns]',
|
|
1929
|
-
'epoch': 'int64',
|
|
1930
|
-
'freq_code': 'object',
|
|
1931
|
-
'power': 'float32',
|
|
1932
|
-
'rec_id': 'object',
|
|
1933
|
-
'rec_type': 'object',
|
|
1934
|
-
'scan_time': 'float32',
|
|
1935
|
-
'channels': 'int32',
|
|
1936
|
-
'noise_ratio': 'float32'})
|
|
1928
|
+
try:
|
|
1929
|
+
telem_dat['noise_ratio'] = predictors.noise_ratio(
|
|
1930
|
+
5.0,
|
|
1931
|
+
telem_dat.freq_code.values,
|
|
1932
|
+
telem_dat.epoch.values,
|
|
1933
|
+
study_tags
|
|
1934
|
+
)
|
|
1935
|
+
except (ValueError, TypeError, KeyError, IndexError) as e:
|
|
1936
|
+
raise ValueError(f"Failed to compute noise_ratio for PIT data: {e}") from e
|
|
1937
|
+
|
|
1938
|
+
# ensure dtypes
|
|
1939
|
+
telem_dat = telem_dat.astype({'time_stamp': 'datetime64[ns]',
|
|
1940
|
+
'epoch': 'int64',
|
|
1941
|
+
'freq_code': 'object',
|
|
1942
|
+
'power': 'float32',
|
|
1943
|
+
'rec_id': 'object',
|
|
1944
|
+
'rec_type': 'object',
|
|
1945
|
+
'scan_time': 'float32',
|
|
1946
|
+
'channels': 'int32',
|
|
1947
|
+
'noise_ratio': 'float32'})
|
|
1937
1948
|
|
|
1938
1949
|
# reorder columns to match expected schema
|
|
1939
1950
|
cols = ['time_stamp', 'epoch', 'freq_code', 'power', 'noise_ratio', 'scan_time', 'channels', 'rec_id', 'rec_type']
|
|
1940
1951
|
cols_existing = [c for c in cols if c in telem_dat.columns]
|
|
1941
1952
|
|
|
1942
|
-
_append_raw_data(db_dir, telem_dat[cols_existing], data_columns=True)
|
|
1943
|
-
with pd.HDFStore(db_dir, mode='a') as store:
|
|
1944
|
-
print('Store keys after append:', store.keys())
|
|
1953
|
+
_append_raw_data(db_dir, telem_dat[cols_existing], data_columns=True)
|
|
1954
|
+
with pd.HDFStore(db_dir, mode='a') as store:
|
|
1955
|
+
print('Store keys after append:', store.keys())
|
|
1945
1956
|
|
|
1946
1957
|
|
|
1947
1958
|
def PIT_Multiple(
|
|
@@ -2019,29 +2030,29 @@ def PIT_Multiple(
|
|
|
2019
2030
|
"LocationDetail", "Type", "Recapture", "Sex", "GeneticSampleID", "Comments"
|
|
2020
2031
|
]
|
|
2021
2032
|
|
|
2022
|
-
# Read the CSV into a DataFrame, skipping rows if needed
|
|
2023
|
-
telem_dat = pd.read_csv(file_name, names=col_names, header=0, skiprows=skiprows, dtype=str)
|
|
2024
|
-
|
|
2025
|
-
mode_str = "multi-antenna"
|
|
2026
|
-
if ant_to_rec_dict is None:
|
|
2027
|
-
raise ValueError("ant_to_rec_dict is required for PIT_Multiple")
|
|
2028
|
-
|
|
2029
|
-
# Convert "TimeStamp" to datetime with explicit format
|
|
2030
|
-
telem_dat["time_stamp"] = pd.to_datetime(telem_dat["TimeStamp"], format="%m/%d/%Y %H:%M", errors="coerce")
|
|
2031
|
-
|
|
2032
|
-
# Ensure "Tag1Dec" and "Tag1Hex" are treated as strings (avoid scientific notation issues)
|
|
2033
|
-
telem_dat["Tag1Dec"] = telem_dat["Tag1Dec"].astype(str)
|
|
2034
|
-
telem_dat["Tag1Hex"] = telem_dat["Tag1Hex"].astype(str)
|
|
2035
|
-
|
|
2036
|
-
telem_dat["freq_code"] = telem_dat["Tag1Hex"].astype(str).str.strip()
|
|
2037
|
-
antenna_raw = telem_dat["Antennae"].astype(str).str.strip()
|
|
2038
|
-
antenna_num = pd.to_numeric(antenna_raw.str.extract(r"(\d+)")[0], errors="coerce")
|
|
2039
|
-
rec_id = antenna_num.map(ant_to_rec_dict)
|
|
2040
|
-
if rec_id.isna().any():
|
|
2041
|
-
rec_id = rec_id.fillna(antenna_raw.map(ant_to_rec_dict))
|
|
2042
|
-
telem_dat["rec_id"] = rec_id
|
|
2043
|
-
telem_dat = telem_dat.dropna(subset=["rec_id"])
|
|
2044
|
-
|
|
2033
|
+
# Read the CSV into a DataFrame, skipping rows if needed
|
|
2034
|
+
telem_dat = pd.read_csv(file_name, names=col_names, header=0, skiprows=skiprows, dtype=str)
|
|
2035
|
+
|
|
2036
|
+
mode_str = "multi-antenna"
|
|
2037
|
+
if ant_to_rec_dict is None:
|
|
2038
|
+
raise ValueError("ant_to_rec_dict is required for PIT_Multiple")
|
|
2039
|
+
|
|
2040
|
+
# Convert "TimeStamp" to datetime with explicit format
|
|
2041
|
+
telem_dat["time_stamp"] = pd.to_datetime(telem_dat["TimeStamp"], format="%m/%d/%Y %H:%M", errors="coerce")
|
|
2042
|
+
|
|
2043
|
+
# Ensure "Tag1Dec" and "Tag1Hex" are treated as strings (avoid scientific notation issues)
|
|
2044
|
+
telem_dat["Tag1Dec"] = telem_dat["Tag1Dec"].astype(str)
|
|
2045
|
+
telem_dat["Tag1Hex"] = telem_dat["Tag1Hex"].astype(str)
|
|
2046
|
+
|
|
2047
|
+
telem_dat["freq_code"] = telem_dat["Tag1Hex"].astype(str).str.strip()
|
|
2048
|
+
antenna_raw = telem_dat["Antennae"].astype(str).str.strip()
|
|
2049
|
+
antenna_num = pd.to_numeric(antenna_raw.str.extract(r"(\d+)")[0], errors="coerce")
|
|
2050
|
+
rec_id = antenna_num.map(ant_to_rec_dict)
|
|
2051
|
+
if rec_id.isna().any():
|
|
2052
|
+
rec_id = rec_id.fillna(antenna_raw.map(ant_to_rec_dict))
|
|
2053
|
+
telem_dat["rec_id"] = rec_id
|
|
2054
|
+
telem_dat = telem_dat.dropna(subset=["rec_id"])
|
|
2055
|
+
|
|
2045
2056
|
# if after_cleanup == 0:
|
|
2046
2057
|
# raise ValueError(f"No valid records found in {file_name}")
|
|
2047
2058
|
|
|
@@ -2101,4 +2112,4 @@ def PIT_Multiple(
|
|
|
2101
2112
|
|
|
2102
2113
|
|
|
2103
2114
|
|
|
2104
|
-
|
|
2115
|
+
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: pymast
|
|
3
|
-
Version: 1.0.
|
|
3
|
+
Version: 1.0.1
|
|
4
4
|
Summary: Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data
|
|
5
5
|
Author: Theodore Castro-Santos
|
|
6
6
|
Author-email: "Kevin P. Nebiolo" <kevin.nebiolo@kleinschmidtgroup.com>
|
|
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
|
|
|
4
4
|
|
|
5
5
|
[project]
|
|
6
6
|
name = "pymast"
|
|
7
|
-
version = "1.0.
|
|
7
|
+
version = "1.0.1"
|
|
8
8
|
description = "Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data"
|
|
9
9
|
readme = "README.md"
|
|
10
10
|
authors = [
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|