pymast 0.0.6__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymast/__init__.py +31 -2
- pymast/fish_history.py +59 -6
- pymast/formatter.py +886 -548
- pymast/logger.py +58 -0
- pymast/naive_bayes.py +116 -9
- pymast/overlap_removal.py +2327 -490
- pymast/parsers.py +1091 -208
- pymast/predictors.py +302 -116
- pymast/radio_project.py +1382 -512
- pymast/validation.py +224 -0
- pymast-1.0.1.dist-info/METADATA +636 -0
- pymast-1.0.1.dist-info/RECORD +15 -0
- {pymast-0.0.6.dist-info → pymast-1.0.1.dist-info}/WHEEL +1 -1
- pymast/table_merge.py +0 -154
- pymast-0.0.6.dist-info/METADATA +0 -19
- pymast-0.0.6.dist-info/RECORD +0 -14
- {pymast-0.0.6.dist-info → pymast-1.0.1.dist-info/licenses}/LICENSE.txt +0 -0
- {pymast-0.0.6.dist-info → pymast-1.0.1.dist-info}/top_level.txt +0 -0
pymast/parsers.py
CHANGED
|
@@ -1,10 +1,107 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
|
+
"""
|
|
3
|
+
Data parsers for radio telemetry receiver file formats.
|
|
4
|
+
|
|
5
|
+
This module provides parser functions to import raw detection data from various
|
|
6
|
+
radio telemetry receiver manufacturers into the MAST HDF5 database. Each parser
|
|
7
|
+
handles manufacturer-specific file formats and standardizes the data into a common
|
|
8
|
+
schema for downstream processing.
|
|
9
|
+
|
|
10
|
+
Supported Receiver Types
|
|
11
|
+
------------------------
|
|
12
|
+
- **ARES**: Lotek Advanced Radio Telemetry Systems
|
|
13
|
+
- **Orion**: Sigma Eight Orion receivers
|
|
14
|
+
- **SRX-1200**: Lotek SRX 1200 receivers (fixed-width format)
|
|
15
|
+
- **SRX-800**: Lotek SRX 800 receivers (fixed-width format)
|
|
16
|
+
- **SRX-600**: Lotek SRX 600 receivers (fixed-width format)
|
|
17
|
+
- **VR2**: Vemco VR2 acoustic receivers (CSV format)
|
|
18
|
+
- **PIT**: Passive Integrated Transponder readers
|
|
19
|
+
|
|
20
|
+
Common Data Pipeline
|
|
21
|
+
--------------------
|
|
22
|
+
All parsers follow this workflow:
|
|
23
|
+
1. Read raw receiver file (CSV, fixed-width, or vendor format)
|
|
24
|
+
2. Parse timestamps, frequencies, codes, power, antenna information
|
|
25
|
+
3. Calculate derived fields: epoch, noise_ratio
|
|
26
|
+
4. Standardize column names and data types
|
|
27
|
+
5. Append to HDF5 `/raw_data` table
|
|
28
|
+
|
|
29
|
+
Standardized Output Schema
|
|
30
|
+
--------------------------
|
|
31
|
+
All parsers produce these columns:
|
|
32
|
+
- `time_stamp` : datetime64 - Detection timestamp
|
|
33
|
+
- `epoch` : float32 - Seconds since 1970-01-01
|
|
34
|
+
- `freq_code` : object - Frequency + code (e.g., "166.380 7")
|
|
35
|
+
- `power` : float32 - Signal power (dB or raw)
|
|
36
|
+
- `rec_id` : object - Receiver identifier
|
|
37
|
+
- `rec_type` : object - Receiver type (ares, orion, srx1200, etc.)
|
|
38
|
+
- `channels` : int32 - Number of receiver channels
|
|
39
|
+
- `scan_time` : float32 - Scan duration per channel (seconds)
|
|
40
|
+
- `noise_ratio` : float32 - Ratio of miscoded to total detections
|
|
41
|
+
|
|
42
|
+
Typical Usage
|
|
43
|
+
-------------
|
|
44
|
+
>>> import pymast.parsers as parsers
|
|
45
|
+
>>>
|
|
46
|
+
>>> # Import ARES data
|
|
47
|
+
>>> parsers.ares(
|
|
48
|
+
... file_name='receiver_001.csv',
|
|
49
|
+
... db_dir='project.h5',
|
|
50
|
+
... rec_id='REC001',
|
|
51
|
+
... study_tags=['166.380 7', '166.380 12'],
|
|
52
|
+
... scan_time=1.0,
|
|
53
|
+
... channels=1
|
|
54
|
+
... )
|
|
55
|
+
>>>
|
|
56
|
+
>>> # Import SRX-1200 data
|
|
57
|
+
>>> parsers.srx1200(
|
|
58
|
+
... file_name='srx_detections.txt',
|
|
59
|
+
... db_dir='project.h5',
|
|
60
|
+
... rec_id='SRX123',
|
|
61
|
+
... study_tags=['166.380 7'],
|
|
62
|
+
... scan_time=2.5,
|
|
63
|
+
... channels=1
|
|
64
|
+
... )
|
|
65
|
+
|
|
66
|
+
Notes
|
|
67
|
+
-----
|
|
68
|
+
- Frequency values are rounded to nearest 5 kHz then converted to MHz with 3 decimal precision
|
|
69
|
+
- Noise ratio calculated using 5-minute moving window (see `predictors.noise_ratio`)
|
|
70
|
+
- All parsers append to existing HDF5 `/raw_data` table (mode='a')
|
|
71
|
+
- Timestamps assumed to be in UTC or project-specific timezone
|
|
72
|
+
- PIT readers have different schemas due to antenna-based detection logic
|
|
73
|
+
|
|
74
|
+
See Also
|
|
75
|
+
--------
|
|
76
|
+
radio_project.import_data : High-level batch import interface
|
|
77
|
+
predictors.noise_ratio : Miscoded detection ratio calculation
|
|
78
|
+
"""
|
|
2
79
|
|
|
3
80
|
import pandas as pd
|
|
4
81
|
import numpy as np
|
|
5
82
|
import datetime
|
|
6
83
|
import os
|
|
7
84
|
import pymast.predictors as predictors
|
|
85
|
+
import sys
|
|
86
|
+
|
|
87
|
+
def _append_raw_data(db_dir, telem_dat, data_columns=None):
|
|
88
|
+
with pd.HDFStore(db_dir, mode='a') as store:
|
|
89
|
+
append_kwargs = {
|
|
90
|
+
'key': 'raw_data',
|
|
91
|
+
'value': telem_dat,
|
|
92
|
+
'format': 'table',
|
|
93
|
+
'index': False,
|
|
94
|
+
'min_itemsize': {
|
|
95
|
+
'freq_code': 20,
|
|
96
|
+
'rec_type': 20,
|
|
97
|
+
'rec_id': 20,
|
|
98
|
+
},
|
|
99
|
+
'append': True,
|
|
100
|
+
'chunksize': 1000000,
|
|
101
|
+
}
|
|
102
|
+
if data_columns is not None:
|
|
103
|
+
append_kwargs['data_columns'] = data_columns
|
|
104
|
+
store.append(**append_kwargs)
|
|
8
105
|
|
|
9
106
|
def ares(file_name,
|
|
10
107
|
db_dir,
|
|
@@ -13,6 +110,60 @@ def ares(file_name,
|
|
|
13
110
|
scan_time = 1,
|
|
14
111
|
channels = 1,
|
|
15
112
|
ant_to_rec_dict = None):
|
|
113
|
+
"""
|
|
114
|
+
Import Lotek ARES receiver data into MAST HDF5 database.
|
|
115
|
+
|
|
116
|
+
Parses CSV format detection files from Lotek Advanced Radio Telemetry Systems
|
|
117
|
+
(ARES) receivers. Automatically detects file format variant based on header row
|
|
118
|
+
and standardizes data into common schema.
|
|
119
|
+
|
|
120
|
+
Parameters
|
|
121
|
+
----------
|
|
122
|
+
file_name : str
|
|
123
|
+
Absolute path to ARES CSV file
|
|
124
|
+
db_dir : str
|
|
125
|
+
Absolute path to project HDF5 database
|
|
126
|
+
rec_id : str
|
|
127
|
+
Unique receiver identifier (e.g., 'REC001', 'SITE_A')
|
|
128
|
+
study_tags : list of str
|
|
129
|
+
List of valid freq_code tags deployed in study (e.g., ['166.380 7', '166.380 12'])
|
|
130
|
+
Used to calculate noise_ratio
|
|
131
|
+
scan_time : float, optional
|
|
132
|
+
Scan duration per channel in seconds (default: 1.0)
|
|
133
|
+
channels : int, optional
|
|
134
|
+
Number of receiver channels (default: 1)
|
|
135
|
+
ant_to_rec_dict : dict, optional
|
|
136
|
+
Mapping of antenna IDs to receiver IDs (not currently used)
|
|
137
|
+
|
|
138
|
+
Returns
|
|
139
|
+
-------
|
|
140
|
+
None
|
|
141
|
+
Data appended directly to HDF5 `/raw_data` table
|
|
142
|
+
|
|
143
|
+
Notes
|
|
144
|
+
-----
|
|
145
|
+
- Handles two ARES file format variants (detected via header row)
|
|
146
|
+
- Frequencies rounded to nearest 5 kHz, formatted as 3-decimal MHz
|
|
147
|
+
- Calculates noise_ratio using 5-minute moving window
|
|
148
|
+
- All timestamps converted to epoch (seconds since 1970-01-01)
|
|
149
|
+
|
|
150
|
+
Examples
|
|
151
|
+
--------
|
|
152
|
+
>>> import pymast.parsers as parsers
|
|
153
|
+
>>> parsers.ares(
|
|
154
|
+
... file_name='C:/data/ares_001.csv',
|
|
155
|
+
... db_dir='C:/project/study.h5',
|
|
156
|
+
... rec_id='ARES001',
|
|
157
|
+
... study_tags=['166.380 7', '166.380 12', '166.380 19'],
|
|
158
|
+
... scan_time=1.0,
|
|
159
|
+
... channels=1
|
|
160
|
+
... )
|
|
161
|
+
|
|
162
|
+
See Also
|
|
163
|
+
--------
|
|
164
|
+
radio_project.import_data : High-level batch import
|
|
165
|
+
predictors.noise_ratio : Noise ratio calculation
|
|
166
|
+
"""
|
|
16
167
|
# identify the receiver type
|
|
17
168
|
rec_type = 'ares'
|
|
18
169
|
|
|
@@ -67,7 +218,8 @@ def ares(file_name,
|
|
|
67
218
|
inplace = True)
|
|
68
219
|
|
|
69
220
|
# now do this stuff to files regardless of type
|
|
70
|
-
|
|
221
|
+
# compute epoch as integer seconds (int64) to avoid floating precision loss
|
|
222
|
+
telem_dat['epoch'] = (telem_dat.time_stamp.astype('int64') // 10**9).astype('int64')
|
|
71
223
|
telem_dat['rec_type'] = np.repeat(rec_type,len(telem_dat))
|
|
72
224
|
telem_dat['rec_id'] = np.repeat(rec_id,len(telem_dat))
|
|
73
225
|
telem_dat['channels'] = np.repeat(channels,len(telem_dat))
|
|
@@ -83,34 +235,72 @@ def ares(file_name,
|
|
|
83
235
|
'scan_time':'float32',
|
|
84
236
|
'channels':'int32',
|
|
85
237
|
'rec_type':'object',
|
|
86
|
-
'epoch':'
|
|
238
|
+
'epoch':'int64',
|
|
87
239
|
'noise_ratio':'float32',
|
|
88
240
|
'rec_id':'object'})
|
|
89
241
|
|
|
90
|
-
|
|
91
|
-
store.append(key = 'raw_data',
|
|
92
|
-
value = telem_dat,
|
|
93
|
-
format = 'table',
|
|
94
|
-
index = False,
|
|
95
|
-
min_itemsize = {'freq_code':20,
|
|
96
|
-
'rec_type':20,
|
|
97
|
-
'rec_id':20},
|
|
98
|
-
append = True,
|
|
99
|
-
chunksize = 1000000)
|
|
242
|
+
_append_raw_data(db_dir, telem_dat)
|
|
100
243
|
|
|
101
244
|
|
|
102
245
|
def orion_import(file_name,
|
|
103
246
|
db_dir,
|
|
104
247
|
rec_id,
|
|
105
248
|
study_tags,
|
|
106
|
-
scan_time = 1
|
|
249
|
+
scan_time = 1.,
|
|
107
250
|
channels = 1,
|
|
108
251
|
ant_to_rec_dict = None):
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
252
|
+
"""
|
|
253
|
+
Import Sigma Eight Orion receiver data into MAST HDF5 database.
|
|
254
|
+
|
|
255
|
+
Parses fixed-width format detection files from Sigma Eight Orion receivers.
|
|
256
|
+
Automatically detects firmware version based on header row and adjusts
|
|
257
|
+
column parsing accordingly.
|
|
258
|
+
|
|
259
|
+
Parameters
|
|
260
|
+
----------
|
|
261
|
+
file_name : str
|
|
262
|
+
Absolute path to Orion fixed-width text file
|
|
263
|
+
db_dir : str
|
|
264
|
+
Absolute path to project HDF5 database
|
|
265
|
+
rec_id : str
|
|
266
|
+
Unique receiver identifier (e.g., 'ORION_01')
|
|
267
|
+
study_tags : list of str
|
|
268
|
+
List of valid freq_code tags deployed in study
|
|
269
|
+
scan_time : float, optional
|
|
270
|
+
Scan duration per channel in seconds (default: 1.0)
|
|
271
|
+
channels : int, optional
|
|
272
|
+
Number of receiver channels (default: 1)
|
|
273
|
+
ant_to_rec_dict : dict, optional
|
|
274
|
+
Mapping of antenna IDs to receiver IDs (not currently used)
|
|
275
|
+
|
|
276
|
+
Returns
|
|
277
|
+
-------
|
|
278
|
+
None
|
|
279
|
+
Data appended directly to HDF5 `/raw_data` table
|
|
280
|
+
|
|
281
|
+
Notes
|
|
282
|
+
-----
|
|
283
|
+
- Handles two Orion firmware variants: with/without 'Type' column
|
|
284
|
+
- Fixed-width column parsing using pandas read_fwf
|
|
285
|
+
- Filters out 'STATUS' messages (firmware-specific)
|
|
286
|
+
- Frequencies formatted as 3-decimal MHz
|
|
287
|
+
|
|
288
|
+
Examples
|
|
289
|
+
--------
|
|
290
|
+
>>> parsers.orion_import(
|
|
291
|
+
... file_name='C:/data/orion_site1.txt',
|
|
292
|
+
... db_dir='C:/project/study.h5',
|
|
293
|
+
... rec_id='ORION_SITE1',
|
|
294
|
+
... study_tags=['166.380 7'],
|
|
295
|
+
... scan_time=1.0,
|
|
296
|
+
... channels=1
|
|
297
|
+
... )
|
|
298
|
+
|
|
299
|
+
See Also
|
|
300
|
+
--------
|
|
301
|
+
ares : Similar parser for Lotek ARES receivers
|
|
302
|
+
srx1200 : Parser for Lotek SRX 1200 receivers
|
|
303
|
+
"""
|
|
114
304
|
# identify the receiver type
|
|
115
305
|
rec_type = 'orion'
|
|
116
306
|
|
|
@@ -124,17 +314,51 @@ def orion_import(file_name,
|
|
|
124
314
|
# with our data row, extract information using pandas fwf import procedure
|
|
125
315
|
telem_dat = pd.read_fwf(file_name,colspecs = [(0,12),(13,23),(24,30),(31,35),(36,45),(46,54),(55,60),(61,65)],
|
|
126
316
|
names = ['Date','Time','Site','Ant','Freq','Type','Code','power'],
|
|
127
|
-
skiprows = 1
|
|
128
|
-
dtype = {'Date':str,'Time':str,'Site':np.int32,'Ant':str,'Freq':str,'Type':str,'Code':str,'power':np.float64})
|
|
317
|
+
skiprows = 1)#,
|
|
318
|
+
#dtype = {'Date':str,'Time':str,'Site':np.int32,'Ant':str,'Freq':str,'Type':str,'Code':str,'power':np.float64})
|
|
129
319
|
telem_dat = telem_dat[telem_dat.Type != 'STATUS']
|
|
320
|
+
telem_dat['Freq'] = telem_dat.Freq.astype('float32')
|
|
321
|
+
|
|
322
|
+
telem_dat['Freq'] = telem_dat['Freq'].apply(lambda x: f"{x:.3f}")
|
|
323
|
+
telem_dat['Ant'] = telem_dat.Ant.astype('object')
|
|
130
324
|
telem_dat.drop(['Type'], axis = 1, inplace = True)
|
|
131
325
|
|
|
132
326
|
else:
|
|
133
327
|
# with our data row, extract information using pandas fwf import procedure
|
|
134
328
|
telem_dat = pd.read_fwf(file_name,colspecs = [(0,11),(11,20),(20,26),(26,30),(30,37),(37,42),(42,48)],
|
|
135
329
|
names = ['Date','Time','Site','Ant','Freq','Code','power'],
|
|
136
|
-
skiprows = 1
|
|
137
|
-
dtype = {'Date':str,'Time':str,'Site':str,'Ant':str,'Freq':str,'Code':str,'power':str})
|
|
330
|
+
skiprows = 1)#,
|
|
331
|
+
#dtype = {'Date':str,'Time':str,'Site':str,'Ant':str,'Freq':str,'Code':str,'power':str})
|
|
332
|
+
telem_dat['Ant'] = telem_dat.Ant.astype('object')
|
|
333
|
+
telem_dat['Freq'] = telem_dat.Freq.astype('float32')
|
|
334
|
+
telem_dat['Freq'] = telem_dat['Freq'].apply(lambda x: f"{x:.3f}")
|
|
335
|
+
|
|
336
|
+
|
|
337
|
+
def _write_orion_subset(df, receiver_id, epoch_dtype):
|
|
338
|
+
df = df.copy()
|
|
339
|
+
df['rec_id'] = np.repeat(receiver_id, len(df))
|
|
340
|
+
df.drop(['Ant'], axis = 1, inplace = True)
|
|
341
|
+
df = df.astype({'power':'float32',
|
|
342
|
+
'freq_code':'object',
|
|
343
|
+
'time_stamp':'datetime64[ns]',
|
|
344
|
+
'scan_time':'float32',
|
|
345
|
+
'channels':'int32',
|
|
346
|
+
'rec_type':'object',
|
|
347
|
+
'epoch': epoch_dtype,
|
|
348
|
+
'noise_ratio':'float32',
|
|
349
|
+
'rec_id':'object'})
|
|
350
|
+
|
|
351
|
+
df = df[['power',
|
|
352
|
+
'time_stamp',
|
|
353
|
+
'epoch',
|
|
354
|
+
'freq_code',
|
|
355
|
+
'noise_ratio',
|
|
356
|
+
'scan_time',
|
|
357
|
+
'channels',
|
|
358
|
+
'rec_id',
|
|
359
|
+
'rec_type']]
|
|
360
|
+
|
|
361
|
+
_append_raw_data(db_dir, df, data_columns=True)
|
|
138
362
|
|
|
139
363
|
if len(telem_dat) > 0:
|
|
140
364
|
# add file name to data
|
|
@@ -153,8 +377,8 @@ def orion_import(file_name,
|
|
|
153
377
|
if len(telem_dat) == 0:
|
|
154
378
|
print ("Invalid timestamps in raw data, cannot import")
|
|
155
379
|
else:
|
|
156
|
-
# create epoch
|
|
157
|
-
telem_dat['epoch'] =
|
|
380
|
+
# create epoch as int64 seconds
|
|
381
|
+
telem_dat['epoch'] = (telem_dat.time_stamp.astype('int64') // 10**9).astype('int64')
|
|
158
382
|
|
|
159
383
|
# drop unnecessary columns
|
|
160
384
|
telem_dat.drop (['Date','Time','Freq','Code','Site'],axis = 1, inplace = True)
|
|
@@ -167,97 +391,64 @@ def orion_import(file_name,
|
|
|
167
391
|
|
|
168
392
|
# if there is no antenna to receiver dictionary
|
|
169
393
|
if ant_to_rec_dict == None:
|
|
170
|
-
|
|
171
|
-
telem_dat.drop(['Ant'], axis = 1, inplace = True)
|
|
172
|
-
|
|
173
|
-
# add receiver id
|
|
174
|
-
telem_dat['rec_id'] = np.repeat(rec_id,len(telem_dat))
|
|
175
|
-
|
|
176
|
-
telem_dat = telem_dat.astype({'power':'float32',
|
|
177
|
-
'freq_code':'object',
|
|
178
|
-
'time_stamp':'datetime64[ns]',
|
|
179
|
-
'scan_time':'float32',
|
|
180
|
-
'channels':'int32',
|
|
181
|
-
'rec_type':'object',
|
|
182
|
-
'epoch':'float32',
|
|
183
|
-
'noise_ratio':'float32',
|
|
184
|
-
'rec_id':'object'})
|
|
185
|
-
|
|
186
|
-
telem_dat = telem_dat[['power',
|
|
187
|
-
'time_stamp',
|
|
188
|
-
'epoch',
|
|
189
|
-
'freq_code',
|
|
190
|
-
'noise_ratio',
|
|
191
|
-
'scan_time',
|
|
192
|
-
'channels',
|
|
193
|
-
'rec_id',
|
|
194
|
-
'rec_type']]
|
|
195
|
-
|
|
196
|
-
with pd.HDFStore(db_dir, mode='a') as store:
|
|
197
|
-
store.append(key = 'raw_data',
|
|
198
|
-
value = telem_dat,
|
|
199
|
-
format = 'table',
|
|
200
|
-
index = False,
|
|
201
|
-
min_itemsize = {'freq_code':20,
|
|
202
|
-
'rec_type':20,
|
|
203
|
-
'rec_id':20},
|
|
204
|
-
append = True,
|
|
205
|
-
chunksize = 1000000,
|
|
206
|
-
data_columns = True)
|
|
207
|
-
|
|
394
|
+
_write_orion_subset(telem_dat, rec_id, 'int64')
|
|
208
395
|
# if there is an antenna to receiver dictionary
|
|
209
396
|
else:
|
|
210
|
-
for i in ant_to_rec_dict:
|
|
397
|
+
for i in ant_to_rec_dict.keys():
|
|
211
398
|
# get site from dictionary
|
|
212
399
|
site = ant_to_rec_dict[i]
|
|
213
400
|
|
|
214
401
|
# get telemetryt data associated with this site
|
|
215
|
-
telem_dat_sub = telem_dat[telem_dat.Ant ==
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
telem_dat_sub.drop(['Ant'], axis = 1, inplace = True)
|
|
222
|
-
|
|
223
|
-
telem_dat_sub = telem_dat_sub.astype({'power':'float32',
|
|
224
|
-
'freq_code':'object',
|
|
225
|
-
'time_stamp':'datetime64[ns]',
|
|
226
|
-
'scan_time':'float32',
|
|
227
|
-
'channels':'int32',
|
|
228
|
-
'rec_type':'object',
|
|
229
|
-
'epoch':'float32',
|
|
230
|
-
'noise_ratio':'float32',
|
|
231
|
-
'rec_id':'object'})
|
|
232
|
-
|
|
233
|
-
telem_dat_sub = telem_dat_sub[['power',
|
|
234
|
-
'time_stamp',
|
|
235
|
-
'epoch',
|
|
236
|
-
'freq_code',
|
|
237
|
-
'noise_ratio',
|
|
238
|
-
'scan_time',
|
|
239
|
-
'channels',
|
|
240
|
-
'rec_id',
|
|
241
|
-
'rec_type']]
|
|
242
|
-
|
|
243
|
-
with pd.HDFStore(db_dir, mode='a') as store:
|
|
244
|
-
store.append(key = 'raw_data',
|
|
245
|
-
value = telem_dat_sub,
|
|
246
|
-
format = 'table',
|
|
247
|
-
index = False,
|
|
248
|
-
min_itemsize = {'freq_code':20,
|
|
249
|
-
'rec_type':20,
|
|
250
|
-
'rec_id':20},
|
|
251
|
-
append = True,
|
|
252
|
-
chunksize = 1000000,
|
|
253
|
-
data_columns = True)
|
|
254
|
-
|
|
255
|
-
|
|
402
|
+
telem_dat_sub = telem_dat[telem_dat.Ant == 1]
|
|
403
|
+
_write_orion_subset(telem_dat_sub, site, 'float32')
|
|
404
|
+
else:
|
|
405
|
+
raise ValueError("Invalid import parameters, no data returned")
|
|
406
|
+
sys.exit()
|
|
407
|
+
|
|
256
408
|
|
|
257
409
|
def vr2_import(file_name,db_dir,study_tags, rec_id):
|
|
258
|
-
|
|
259
|
-
|
|
260
|
-
|
|
410
|
+
"""
|
|
411
|
+
Import Vemco VR2 acoustic receiver data into MAST HDF5 database.
|
|
412
|
+
|
|
413
|
+
Parses CSV format detection files from Vemco VR2 acoustic receivers.
|
|
414
|
+
VR2 data uses acoustic tags instead of radio frequencies, with different
|
|
415
|
+
field names and data structure.
|
|
416
|
+
|
|
417
|
+
Parameters
|
|
418
|
+
----------
|
|
419
|
+
file_name : str
|
|
420
|
+
Absolute path to VR2 CSV file
|
|
421
|
+
db_dir : str
|
|
422
|
+
Absolute path to project HDF5 database
|
|
423
|
+
study_tags : list of str
|
|
424
|
+
List of valid acoustic tag codes deployed in study
|
|
425
|
+
rec_id : str
|
|
426
|
+
Unique receiver identifier
|
|
427
|
+
|
|
428
|
+
Returns
|
|
429
|
+
-------
|
|
430
|
+
None
|
|
431
|
+
Data appended directly to HDF5 `/raw_data` table
|
|
432
|
+
|
|
433
|
+
Notes
|
|
434
|
+
-----
|
|
435
|
+
- Acoustic receivers use different schema than radio receivers
|
|
436
|
+
- VR2 files typically have standardized CSV format from Vemco software
|
|
437
|
+
- Converts acoustic tag IDs to freq_code format for consistency
|
|
438
|
+
|
|
439
|
+
Examples
|
|
440
|
+
--------
|
|
441
|
+
>>> parsers.vr2_import(
|
|
442
|
+
... file_name='C:/data/vr2_001.csv',
|
|
443
|
+
... db_dir='C:/project/acoustic_study.h5',
|
|
444
|
+
... study_tags=['A69-1601-12345', 'A69-1601-12346'],
|
|
445
|
+
... rec_id='VR2_001'
|
|
446
|
+
... )
|
|
447
|
+
|
|
448
|
+
See Also
|
|
449
|
+
--------
|
|
450
|
+
ares : Parser for radio telemetry receivers
|
|
451
|
+
"""
|
|
261
452
|
|
|
262
453
|
recType = 'vr2'
|
|
263
454
|
|
|
@@ -281,7 +472,7 @@ def vr2_import(file_name,db_dir,study_tags, rec_id):
|
|
|
281
472
|
telem_dat['transmitter'] = telem_dat['transmitter'].str.split("-", n = 2, expand = True)[2]
|
|
282
473
|
telem_dat['transmitter'] = telem_dat.transmitter.astype(str)
|
|
283
474
|
telem_dat.rename(columns = {'Receiver':'rec_id','transmitter':'freq_code'}, inplace = True)
|
|
284
|
-
telem_dat['epoch'] =
|
|
475
|
+
telem_dat['epoch'] = (telem_dat.time_stamp.astype('int64') // 10**9).astype('int64')
|
|
285
476
|
try:
|
|
286
477
|
telem_dat.drop (['Date and Time (UTC)', 'Transmitter Name','Transmitter Serial','Sensor Value','Sensor Unit','Station Name','Latitude','Longitude','Transmitter Type','Sensor Precision'],axis = 1, inplace = True)
|
|
287
478
|
except KeyError:
|
|
@@ -292,32 +483,80 @@ def vr2_import(file_name,db_dir,study_tags, rec_id):
|
|
|
292
483
|
# telem_dat.set_index(index,inplace = True,drop = False)
|
|
293
484
|
|
|
294
485
|
telem_dat = telem_dat.astype({'power':'float32',
|
|
295
|
-
|
|
296
|
-
|
|
297
|
-
|
|
298
|
-
|
|
299
|
-
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
486
|
+
'freq_code':'object',
|
|
487
|
+
'time_stamp':'datetime64[ns]',
|
|
488
|
+
'scan_time':'float32',
|
|
489
|
+
'channels':'int32',
|
|
490
|
+
'rec_type':'object',
|
|
491
|
+
'epoch':'int64',
|
|
492
|
+
'noise_ratio':'float32',
|
|
493
|
+
'rec_id':'object'})
|
|
303
494
|
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
value = telem_dat,
|
|
307
|
-
format = 'table',
|
|
308
|
-
index = False,
|
|
309
|
-
min_itemsize = {'freq_code':20,
|
|
310
|
-
'rec_type':20,
|
|
311
|
-
'rec_id':20},
|
|
312
|
-
append = True,
|
|
313
|
-
chunksize = 1000000)
|
|
495
|
+
_append_raw_data(db_dir, telem_dat)
|
|
496
|
+
|
|
314
497
|
def srx1200(file_name,
|
|
315
498
|
db_dir,
|
|
316
499
|
rec_id,
|
|
317
500
|
study_tags,
|
|
318
501
|
scan_time = 1,
|
|
319
502
|
channels = 1,
|
|
320
|
-
ant_to_rec_dict = None
|
|
503
|
+
ant_to_rec_dict = None,
|
|
504
|
+
ka_format = False):
|
|
505
|
+
"""
|
|
506
|
+
Import Lotek SRX-1200 receiver data into MAST HDF5 database.
|
|
507
|
+
|
|
508
|
+
Parses fixed-width format detection files from Lotek SRX-1200 receivers.
|
|
509
|
+
Supports both standard Lotek format and custom Kleinschmidt Associates (KA) format.
|
|
510
|
+
|
|
511
|
+
Parameters
|
|
512
|
+
----------
|
|
513
|
+
file_name : str
|
|
514
|
+
Absolute path to SRX-1200 fixed-width text file
|
|
515
|
+
db_dir : str
|
|
516
|
+
Absolute path to project HDF5 database
|
|
517
|
+
rec_id : str
|
|
518
|
+
Unique receiver identifier (e.g., 'SRX1200_001')
|
|
519
|
+
study_tags : list of str
|
|
520
|
+
List of valid freq_code tags deployed in study
|
|
521
|
+
scan_time : float, optional
|
|
522
|
+
Scan duration per channel in seconds (default: 1.0)
|
|
523
|
+
channels : int, optional
|
|
524
|
+
Number of receiver channels (default: 1)
|
|
525
|
+
ant_to_rec_dict : dict, optional
|
|
526
|
+
Mapping of antenna IDs to receiver IDs for multi-antenna setups
|
|
527
|
+
ka_format : bool, optional
|
|
528
|
+
If True, parse Kleinschmidt Associates custom format (default: False)
|
|
529
|
+
|
|
530
|
+
Returns
|
|
531
|
+
-------
|
|
532
|
+
None
|
|
533
|
+
Data appended directly to HDF5 `/raw_data` table
|
|
534
|
+
|
|
535
|
+
Notes
|
|
536
|
+
-----
|
|
537
|
+
- Fixed-width column parsing optimized for SRX-1200 output
|
|
538
|
+
- Handles multi-antenna configurations via ant_to_rec_dict
|
|
539
|
+
- KA format includes additional metadata fields
|
|
540
|
+
- Power values typically in dB
|
|
541
|
+
|
|
542
|
+
Examples
|
|
543
|
+
--------
|
|
544
|
+
>>> parsers.srx1200(
|
|
545
|
+
... file_name='C:/data/srx1200_site1.txt',
|
|
546
|
+
... db_dir='C:/project/study.h5',
|
|
547
|
+
... rec_id='SRX1200_SITE1',
|
|
548
|
+
... study_tags=['166.380 7', '166.380 12'],
|
|
549
|
+
... scan_time=2.5,
|
|
550
|
+
... channels=1,
|
|
551
|
+
... ka_format=False
|
|
552
|
+
... )
|
|
553
|
+
|
|
554
|
+
See Also
|
|
555
|
+
--------
|
|
556
|
+
srx800 : Parser for SRX-800 receivers
|
|
557
|
+
srx600 : Parser for SRX-600 receivers
|
|
558
|
+
ares : Parser for ARES receivers
|
|
559
|
+
"""
|
|
321
560
|
rec_type = 'srx1200'
|
|
322
561
|
|
|
323
562
|
# create empty dictionary to hold Lotek header data indexed by line number - to be imported to Pandas dataframe
|
|
@@ -467,12 +706,20 @@ def srx1200(file_name,
|
|
|
467
706
|
|
|
468
707
|
# read in telemetry data
|
|
469
708
|
if new_split == None:
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
709
|
+
if ka_format == False:
|
|
710
|
+
telem_dat = pd.read_fwf(file_name,
|
|
711
|
+
colspecs = [(0,7),(7,25),(25,35),(35,46),(46,57),(57,68),(68,80),(80,90),(90,102),(102,110),(110,130),(130,143),(143,153)],
|
|
712
|
+
names = ['Index','Rx Serial Number','Date','Time','[uSec]','Tag/BPM','Freq [MHz]','Codeset','Antenna','Gain','RSSI','Latitude','Longitude'],
|
|
713
|
+
skiprows = dataRow,
|
|
714
|
+
skipfooter = eof - dataEnd)
|
|
715
|
+
telem_dat.drop(columns = ['Index'], inplace = True)
|
|
716
|
+
else:
|
|
717
|
+
telem_dat = pd.read_fwf(file_name,
|
|
718
|
+
colspecs = [(0,5),(6,20),(20,32),(32,43),(43,53),(53,65),(65,72),(72,85),(85,93),(93,101)],
|
|
719
|
+
names = ['Index','Date','Time','[uSec]','Tag/BPM','Freq [MHz]','Codeset','Antenna','Gain','RSSI'],
|
|
720
|
+
skiprows = dataRow,
|
|
721
|
+
skipfooter = eof - dataEnd)
|
|
722
|
+
telem_dat.drop(columns = ['Index'], inplace = True)
|
|
476
723
|
|
|
477
724
|
else:
|
|
478
725
|
telem_dat = pd.read_csv(file_name,
|
|
@@ -489,8 +736,8 @@ def srx1200(file_name,
|
|
|
489
736
|
|
|
490
737
|
telem_dat['time_stamp'] = pd.to_datetime(telem_dat.time_stamp)
|
|
491
738
|
|
|
492
|
-
# calculate Epoch
|
|
493
|
-
telem_dat['epoch'] = (telem_dat.time_stamp
|
|
739
|
+
# calculate Epoch as int64 seconds
|
|
740
|
+
telem_dat['epoch'] = (telem_dat.time_stamp.astype('int64') // 10**9).astype('int64')
|
|
494
741
|
|
|
495
742
|
# format frequency code
|
|
496
743
|
telem_dat['FreqNo'] = telem_dat['Freq [MHz]'].apply(lambda x: f"{x:.3f}" )
|
|
@@ -519,30 +766,26 @@ def srx1200(file_name,
|
|
|
519
766
|
telem_dat.reset_index(inplace = True)
|
|
520
767
|
|
|
521
768
|
telem_dat = telem_dat.astype({'power':'float32',
|
|
522
|
-
|
|
523
|
-
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
769
|
+
'freq_code':'object',
|
|
770
|
+
'time_stamp':'datetime64[ns]',
|
|
771
|
+
'scan_time':'float32',
|
|
772
|
+
'channels':'int32',
|
|
773
|
+
'rec_type':'object',
|
|
774
|
+
'epoch':'int64',
|
|
775
|
+
'noise_ratio':'float32',
|
|
776
|
+
'rec_id':'object'})
|
|
530
777
|
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
536
|
-
|
|
537
|
-
|
|
538
|
-
|
|
539
|
-
|
|
540
|
-
|
|
541
|
-
|
|
542
|
-
'rec_type':20,
|
|
543
|
-
'rec_id':20},
|
|
544
|
-
chunksize = 1000000,
|
|
545
|
-
data_columns = True,)
|
|
778
|
+
telem_dat = telem_dat[['power',
|
|
779
|
+
'time_stamp',
|
|
780
|
+
'epoch',
|
|
781
|
+
'freq_code',
|
|
782
|
+
'noise_ratio',
|
|
783
|
+
'scan_time',
|
|
784
|
+
'channels',
|
|
785
|
+
'rec_id',
|
|
786
|
+
'rec_type']]
|
|
787
|
+
|
|
788
|
+
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
546
789
|
|
|
547
790
|
# if the data doesn't have a header
|
|
548
791
|
else:
|
|
@@ -566,7 +809,7 @@ def srx1200(file_name,
|
|
|
566
809
|
telem_dat['time_stamp'] = pd.to_datetime(telem_dat.time_stamp)
|
|
567
810
|
|
|
568
811
|
# calculate Epoch
|
|
569
|
-
telem_dat['epoch'] =
|
|
812
|
+
telem_dat['epoch'] = (telem_dat.time_stamp.astype('int64') // 10**9).astype('int64')
|
|
570
813
|
|
|
571
814
|
# format frequency code
|
|
572
815
|
telem_dat['FreqNo'] = telem_dat['Freq [MHz]'].apply(lambda x: f"{x:.3f}" )
|
|
@@ -595,26 +838,26 @@ def srx1200(file_name,
|
|
|
595
838
|
telem_dat.reset_index(inplace = True, drop = True)
|
|
596
839
|
|
|
597
840
|
telem_dat = telem_dat.astype({'power':'float32',
|
|
598
|
-
|
|
599
|
-
|
|
600
|
-
|
|
601
|
-
|
|
602
|
-
|
|
603
|
-
|
|
604
|
-
|
|
605
|
-
|
|
606
|
-
|
|
607
|
-
|
|
608
|
-
|
|
609
|
-
|
|
610
|
-
|
|
611
|
-
|
|
612
|
-
|
|
613
|
-
|
|
614
|
-
|
|
615
|
-
|
|
616
|
-
|
|
617
|
-
|
|
841
|
+
'freq_code':'object',
|
|
842
|
+
'time_stamp':'datetime64[ns]',
|
|
843
|
+
'scan_time':'float32',
|
|
844
|
+
'channels':'int32',
|
|
845
|
+
'rec_type':'object',
|
|
846
|
+
'epoch':'int64',
|
|
847
|
+
'noise_ratio':'float32',
|
|
848
|
+
'rec_id':'object'})
|
|
849
|
+
|
|
850
|
+
telem_dat = telem_dat[['power',
|
|
851
|
+
'time_stamp',
|
|
852
|
+
'epoch',
|
|
853
|
+
'freq_code',
|
|
854
|
+
'noise_ratio',
|
|
855
|
+
'scan_time',
|
|
856
|
+
'channels',
|
|
857
|
+
'rec_id',
|
|
858
|
+
'rec_type']]
|
|
859
|
+
|
|
860
|
+
_append_raw_data(db_dir, telem_dat, data_columns=True)
|
|
618
861
|
|
|
619
862
|
def srx800(file_name,
|
|
620
863
|
db_dir,
|
|
@@ -623,6 +866,57 @@ def srx800(file_name,
|
|
|
623
866
|
scan_time = 1,
|
|
624
867
|
channels = 1,
|
|
625
868
|
ant_to_rec_dict = None):
|
|
869
|
+
"""
|
|
870
|
+
Import Lotek SRX-800 receiver data into MAST HDF5 database.
|
|
871
|
+
|
|
872
|
+
Parses fixed-width format detection files from Lotek SRX-800 receivers.
|
|
873
|
+
Similar to SRX-1200 but with different column widths and firmware-specific
|
|
874
|
+
header parsing.
|
|
875
|
+
|
|
876
|
+
Parameters
|
|
877
|
+
----------
|
|
878
|
+
file_name : str
|
|
879
|
+
Absolute path to SRX-800 fixed-width text file
|
|
880
|
+
db_dir : str
|
|
881
|
+
Absolute path to project HDF5 database
|
|
882
|
+
rec_id : str
|
|
883
|
+
Unique receiver identifier
|
|
884
|
+
study_tags : list of str
|
|
885
|
+
List of valid freq_code tags deployed in study
|
|
886
|
+
scan_time : float, optional
|
|
887
|
+
Scan duration per channel in seconds (default: 1.0)
|
|
888
|
+
channels : int, optional
|
|
889
|
+
Number of receiver channels (default: 1)
|
|
890
|
+
ant_to_rec_dict : dict, optional
|
|
891
|
+
Mapping of antenna IDs to receiver IDs
|
|
892
|
+
|
|
893
|
+
Returns
|
|
894
|
+
-------
|
|
895
|
+
None
|
|
896
|
+
Data appended directly to HDF5 `/raw_data` table
|
|
897
|
+
|
|
898
|
+
Notes
|
|
899
|
+
-----
|
|
900
|
+
- Parses SRX-800 specific header format for scan configuration
|
|
901
|
+
- Fixed-width column parsing adjusted for SRX-800 output
|
|
902
|
+
- Handles multi-antenna configurations
|
|
903
|
+
|
|
904
|
+
Examples
|
|
905
|
+
--------
|
|
906
|
+
>>> parsers.srx800(
|
|
907
|
+
... file_name='C:/data/srx800_detections.txt',
|
|
908
|
+
... db_dir='C:/project/study.h5',
|
|
909
|
+
... rec_id='SRX800_001',
|
|
910
|
+
... study_tags=['166.380 7'],
|
|
911
|
+
... scan_time=2.0,
|
|
912
|
+
... channels=1
|
|
913
|
+
... )
|
|
914
|
+
|
|
915
|
+
See Also
|
|
916
|
+
--------
|
|
917
|
+
srx1200 : Parser for SRX-1200 receivers
|
|
918
|
+
srx600 : Parser for SRX-600 receivers
|
|
919
|
+
"""
|
|
626
920
|
|
|
627
921
|
rec_type = 'srx800'
|
|
628
922
|
|
|
@@ -786,6 +1080,7 @@ def srx800(file_name,
|
|
|
786
1080
|
names = ['DayNumber','Time','ChannelID','TagID','Antenna','power'],
|
|
787
1081
|
skiprows = dataRow,
|
|
788
1082
|
dtype = {'ChannelID':str,'TagID':str,'Antenna':str})
|
|
1083
|
+
telem_dat = telem_dat.iloc[:-1]
|
|
789
1084
|
telem_dat['day0'] = np.repeat(pd.to_datetime("1900-01-01"),len(telem_dat))
|
|
790
1085
|
telem_dat['Date'] = telem_dat['day0'] + pd.to_timedelta(telem_dat['DayNumber'].astype(int), unit='d')
|
|
791
1086
|
telem_dat['Date'] = telem_dat.Date.astype('str')
|
|
@@ -852,10 +1147,15 @@ def srx800(file_name,
|
|
|
852
1147
|
|
|
853
1148
|
# get setup number for every row
|
|
854
1149
|
try:
|
|
855
|
-
telem_dat_sub['setup'] = get_setup(
|
|
856
|
-
|
|
857
|
-
|
|
858
|
-
|
|
1150
|
+
telem_dat_sub['setup'] = get_setup(
|
|
1151
|
+
telem_dat_sub.epoch.values,
|
|
1152
|
+
setup_df.epoch.values
|
|
1153
|
+
)
|
|
1154
|
+
except (ValueError, TypeError, IndexError) as e:
|
|
1155
|
+
raise ValueError(
|
|
1156
|
+
f"Failed to compute setup mapping for antenna '{ant}' at site '{site}'. "
|
|
1157
|
+
"Check setup table epoch alignment and input data integrity."
|
|
1158
|
+
) from e
|
|
859
1159
|
|
|
860
1160
|
# get frequency from channel
|
|
861
1161
|
telem_dat_sub['Frequency'] = get_frequency(telem_dat_sub.setup.values,
|
|
@@ -934,6 +1234,57 @@ def srx600(file_name,
|
|
|
934
1234
|
scan_time = 1,
|
|
935
1235
|
channels = 1,
|
|
936
1236
|
ant_to_rec_dict = None):
|
|
1237
|
+
"""
|
|
1238
|
+
Import Lotek SRX-600 receiver data into MAST HDF5 database.
|
|
1239
|
+
|
|
1240
|
+
Parses fixed-width format detection files from Lotek SRX-600 receivers.
|
|
1241
|
+
Similar to SRX-800/1200 but with SRX-600 specific column widths and
|
|
1242
|
+
header structure.
|
|
1243
|
+
|
|
1244
|
+
Parameters
|
|
1245
|
+
----------
|
|
1246
|
+
file_name : str
|
|
1247
|
+
Absolute path to SRX-600 fixed-width text file
|
|
1248
|
+
db_dir : str
|
|
1249
|
+
Absolute path to project HDF5 database
|
|
1250
|
+
rec_id : str
|
|
1251
|
+
Unique receiver identifier
|
|
1252
|
+
study_tags : list of str
|
|
1253
|
+
List of valid freq_code tags deployed in study
|
|
1254
|
+
scan_time : float, optional
|
|
1255
|
+
Scan duration per channel in seconds (default: 1.0)
|
|
1256
|
+
channels : int, optional
|
|
1257
|
+
Number of receiver channels (default: 1)
|
|
1258
|
+
ant_to_rec_dict : dict, optional
|
|
1259
|
+
Mapping of antenna IDs to receiver IDs
|
|
1260
|
+
|
|
1261
|
+
Returns
|
|
1262
|
+
-------
|
|
1263
|
+
None
|
|
1264
|
+
Data appended directly to HDF5 `/raw_data` table
|
|
1265
|
+
|
|
1266
|
+
Notes
|
|
1267
|
+
-----
|
|
1268
|
+
- Parses SRX-600 specific header format
|
|
1269
|
+
- Fixed-width column parsing adjusted for SRX-600 output
|
|
1270
|
+
- Older receiver model with slightly different data structure
|
|
1271
|
+
|
|
1272
|
+
Examples
|
|
1273
|
+
--------
|
|
1274
|
+
>>> parsers.srx600(
|
|
1275
|
+
... file_name='C:/data/srx600_detections.txt',
|
|
1276
|
+
... db_dir='C:/project/study.h5',
|
|
1277
|
+
... rec_id='SRX600_001',
|
|
1278
|
+
... study_tags=['166.380 7'],
|
|
1279
|
+
... scan_time=1.5,
|
|
1280
|
+
... channels=1
|
|
1281
|
+
... )
|
|
1282
|
+
|
|
1283
|
+
See Also
|
|
1284
|
+
--------
|
|
1285
|
+
srx1200 : Parser for SRX-1200 receivers
|
|
1286
|
+
srx800 : Parser for SRX-800 receivers
|
|
1287
|
+
"""
|
|
937
1288
|
|
|
938
1289
|
rec_type = 'srx600'
|
|
939
1290
|
|
|
@@ -1095,8 +1446,8 @@ def srx600(file_name,
|
|
|
1095
1446
|
telem_dat_sub['time_stamp'] = pd.to_datetime(telem_dat_sub['Date'] + ' ' + telem_dat_sub['Time'])
|
|
1096
1447
|
telem_dat_sub.drop(['day0','DayNumber'],axis = 1, inplace = True)
|
|
1097
1448
|
|
|
1098
|
-
# calculate unix epoch
|
|
1099
|
-
|
|
1449
|
+
# calculate unix epoch as int64 seconds
|
|
1450
|
+
telem_dat_sub['epoch'] = (telem_dat_sub.time_stamp.astype('int64') // 10**9).astype('int64')
|
|
1100
1451
|
|
|
1101
1452
|
# clean up some more
|
|
1102
1453
|
telem_dat_sub.drop (['Date','Time','Frequency','TagID','ChannelID','Antenna'],axis = 1, inplace = True)
|
|
@@ -1137,17 +1488,7 @@ def srx600(file_name,
|
|
|
1137
1488
|
'noise_ratio':'float32',
|
|
1138
1489
|
'rec_id':'object'})
|
|
1139
1490
|
|
|
1140
|
-
|
|
1141
|
-
store.append(key = 'raw_data',
|
|
1142
|
-
value = telem_dat_sub,
|
|
1143
|
-
format = 'table',
|
|
1144
|
-
index = False,
|
|
1145
|
-
min_itemsize = {'freq_code':20,
|
|
1146
|
-
'rec_type':20,
|
|
1147
|
-
'rec_id':20},
|
|
1148
|
-
append = True,
|
|
1149
|
-
chunksize = 1000000,
|
|
1150
|
-
data_columns = True)
|
|
1491
|
+
_append_raw_data(db_dir, telem_dat_sub, data_columns=True)
|
|
1151
1492
|
else:
|
|
1152
1493
|
telem_dat = pd.read_fwf(file_name,
|
|
1153
1494
|
colspecs = [(0,9),(9,19),(19,29),(29,36),(36,44),(44,52)],
|
|
@@ -1212,21 +1553,563 @@ def srx600(file_name,
|
|
|
1212
1553
|
'noise_ratio':'float32',
|
|
1213
1554
|
'rec_id':'object'})
|
|
1214
1555
|
|
|
1215
|
-
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
value = telem_dat_sub,
|
|
1219
|
-
format = 'table',
|
|
1220
|
-
index = False,
|
|
1221
|
-
min_itemsize = {'freq_code':20,
|
|
1222
|
-
'rec_type':20,
|
|
1223
|
-
'rec_id':20},
|
|
1224
|
-
append = True,
|
|
1225
|
-
chunksize = 1000000)
|
|
1556
|
+
_append_raw_data(db_dir, telem_dat_sub)
|
|
1557
|
+
|
|
1558
|
+
|
|
1226
1559
|
|
|
1227
1560
|
|
|
1228
1561
|
|
|
1562
|
+
def PIT(file_name,
|
|
1563
|
+
db_dir,
|
|
1564
|
+
rec_id=None,
|
|
1565
|
+
study_tags=None,
|
|
1566
|
+
skiprows=6,
|
|
1567
|
+
scan_time=0,
|
|
1568
|
+
channels=0,
|
|
1569
|
+
rec_type="PIT",
|
|
1570
|
+
ant_to_rec_dict=None):
|
|
1571
|
+
"""
|
|
1572
|
+
Import PIT (Passive Integrated Transponder) reader data into MAST HDF5 database.
|
|
1229
1573
|
|
|
1574
|
+
Parses detection files from PIT tag readers. PIT systems use different
|
|
1575
|
+
technology (RFID) than radio telemetry but data can be analyzed with
|
|
1576
|
+
similar methods.
|
|
1230
1577
|
|
|
1578
|
+
Parameters
|
|
1579
|
+
----------
|
|
1580
|
+
file_name : str
|
|
1581
|
+
Absolute path to PIT reader CSV/text file
|
|
1582
|
+
db_dir : str
|
|
1583
|
+
Absolute path to project HDF5 database
|
|
1584
|
+
rec_id : str
|
|
1585
|
+
Unique reader identifier
|
|
1586
|
+
study_tags : list of str
|
|
1587
|
+
List of valid PIT tag IDs deployed in study
|
|
1588
|
+
skiprows : int, optional
|
|
1589
|
+
Number of header rows to skip (default: 6)
|
|
1590
|
+
scan_time : float, optional
|
|
1591
|
+
Not used for PIT readers (default: 0)
|
|
1592
|
+
channels : int, optional
|
|
1593
|
+
Not used for PIT readers (default: 0)
|
|
1594
|
+
rec_type : str, optional
|
|
1595
|
+
Reader type identifier (default: 'PIT_Array')
|
|
1596
|
+
ant_to_rec_dict : dict, optional
|
|
1597
|
+
Mapping of antenna IDs to reader IDs for multi-antenna arrays
|
|
1598
|
+
|
|
1599
|
+
Returns
|
|
1600
|
+
-------
|
|
1601
|
+
None
|
|
1602
|
+
Data appended directly to HDF5 `/raw_data` table
|
|
1603
|
+
|
|
1604
|
+
Notes
|
|
1605
|
+
-----
|
|
1606
|
+
- PIT readers have antenna-based detection logic (different from radio receivers)
|
|
1607
|
+
- Tag IDs converted to freq_code format for consistency with radio data
|
|
1608
|
+
- Typically used at fixed locations (weirs, ladders, bypass systems)
|
|
1609
|
+
- scan_time and channels not applicable to PIT technology
|
|
1610
|
+
|
|
1611
|
+
Examples
|
|
1612
|
+
--------
|
|
1613
|
+
>>> parsers.PIT(
|
|
1614
|
+
... file_name='C:/data/pit_reader_001.csv',
|
|
1615
|
+
... db_dir='C:/project/pit_study.h5',
|
|
1616
|
+
... rec_id='PIT_WEIR_01',
|
|
1617
|
+
... study_tags=['3D9.1BF3C5A8B2', '3D9.1BF3C5A8C1'],
|
|
1618
|
+
... skiprows=6,
|
|
1619
|
+
... rec_type='PIT_Array'
|
|
1620
|
+
... )
|
|
1621
|
+
|
|
1622
|
+
See Also
|
|
1623
|
+
--------
|
|
1624
|
+
PIT_Multiple : Parser for multi-antenna PIT arrays
|
|
1625
|
+
"""
|
|
1626
|
+
|
|
1627
|
+
import pandas as pd
|
|
1628
|
+
import re
|
|
1629
|
+
|
|
1630
|
+
# Determine mode based on parameters
|
|
1631
|
+
is_multi_antenna = ant_to_rec_dict is not None
|
|
1632
|
+
mode_str = "multi-antenna" if is_multi_antenna else "single antenna"
|
|
1633
|
+
print(f"Parsing PIT file ({mode_str}): {file_name}")
|
|
1634
|
+
|
|
1635
|
+
# Function to find columns by pattern matching
|
|
1636
|
+
def find_column_by_patterns(df, patterns):
|
|
1637
|
+
for col in df.columns:
|
|
1638
|
+
col_lower = str(col).lower().strip()
|
|
1639
|
+
for pattern in patterns:
|
|
1640
|
+
if pattern in col_lower:
|
|
1641
|
+
return col
|
|
1642
|
+
return None
|
|
1643
|
+
|
|
1644
|
+
# First, analyze the file to determine format
|
|
1645
|
+
def analyze_file_format(file_name):
|
|
1646
|
+
"""Dynamically determine PIT file format and header structure"""
|
|
1647
|
+
with open(file_name, 'r') as file:
|
|
1648
|
+
lines = []
|
|
1649
|
+
for _ in range(20): # Read first 20 lines to analyze format
|
|
1650
|
+
line = file.readline()
|
|
1651
|
+
if not line:
|
|
1652
|
+
break
|
|
1653
|
+
lines.append(line.rstrip('\n'))
|
|
1654
|
+
|
|
1655
|
+
# Check if CSV format (look for commas in sample lines)
|
|
1656
|
+
csv_indicators = 0
|
|
1657
|
+
for line in lines[max(0, len(lines)-10):]: # Check last 10 lines for data
|
|
1658
|
+
if line.count(',') > 3: # More than 3 commas suggests CSV
|
|
1659
|
+
csv_indicators += 1
|
|
1660
|
+
|
|
1661
|
+
is_csv = csv_indicators > 2 # If most lines have commas, it's CSV
|
|
1662
|
+
|
|
1663
|
+
# For CSV, look for header row
|
|
1664
|
+
actual_skiprows = 0
|
|
1665
|
+
if is_csv:
|
|
1666
|
+
for i, line in enumerate(lines):
|
|
1667
|
+
line_lower = line.lower()
|
|
1668
|
+
# Look for column headers (must contain text headers, not just data)
|
|
1669
|
+
if any(header in line_lower for header in ['tag', 'time', 'date', 'antenna', 'detected', 'site', 'reader']):
|
|
1670
|
+
if ',' in line and not re.search(r'^\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{4}', line.strip()):
|
|
1671
|
+
# It's a header row (has keywords but no date pattern at start)
|
|
1672
|
+
actual_skiprows = 0 # Keep headers, don't skip them
|
|
1673
|
+
break
|
|
1674
|
+
|
|
1675
|
+
# If no header found, assume no header (skiprows = 0)
|
|
1676
|
+
else:
|
|
1677
|
+
# For fixed-width, look for data start
|
|
1678
|
+
for i, line in enumerate(lines):
|
|
1679
|
+
line_lower = line.lower()
|
|
1680
|
+
if 'version' in line_lower or 'ver' in line_lower:
|
|
1681
|
+
print(f"Found version info: {line}")
|
|
1682
|
+
|
|
1683
|
+
# Look for data start indicators
|
|
1684
|
+
if any(indicator in line_lower for indicator in ['scan date', 'date', 'timestamp', 'tag id']):
|
|
1685
|
+
if i > 0: # If this looks like a header row
|
|
1686
|
+
actual_skiprows = i + 1
|
|
1687
|
+
break
|
|
1688
|
+
|
|
1689
|
+
# Check if this looks like a data line
|
|
1690
|
+
if re.search(r'\d{1,2}/\d{1,2}/\d{4}|\d{4}-\d{2}-\d{2}', line):
|
|
1691
|
+
actual_skiprows = i
|
|
1692
|
+
break
|
|
1693
|
+
|
|
1694
|
+
return is_csv, actual_skiprows, lines
|
|
1695
|
+
|
|
1696
|
+
# Analyze file format
|
|
1697
|
+
is_csv_format, detected_skiprows, sample_lines = analyze_file_format(file_name)
|
|
1698
|
+
|
|
1699
|
+
# Use detected skiprows for CSV, keep provided for fixed-width
|
|
1700
|
+
if is_csv_format:
|
|
1701
|
+
skiprows = detected_skiprows
|
|
1702
|
+
print(f"Detected CSV format, using skiprows: {skiprows}")
|
|
1703
|
+
else:
|
|
1704
|
+
print(f"Detected fixed-width format, using skiprows: {skiprows}")
|
|
1705
|
+
|
|
1706
|
+
# Parse the file based on detected format
|
|
1707
|
+
if is_csv_format:
|
|
1708
|
+
# CSV Format Parsing
|
|
1709
|
+
try:
|
|
1710
|
+
# Read CSV - if skiprows is 0, pandas will automatically use first row as headers
|
|
1711
|
+
telem_dat = pd.read_csv(file_name, dtype=str)
|
|
1712
|
+
print(f"Auto-detected columns: {list(telem_dat.columns)}")
|
|
1713
|
+
|
|
1714
|
+
except (pd.errors.ParserError, UnicodeDecodeError, ValueError) as e:
|
|
1715
|
+
raise ValueError(
|
|
1716
|
+
f"CSV auto-detection failed for PIT file '{file_name}': {e}"
|
|
1717
|
+
) from e
|
|
1718
|
+
|
|
1719
|
+
# Find timestamp column dynamically
|
|
1720
|
+
timestamp_col = find_column_by_patterns(telem_dat, ['timestamp', 'time stamp', 'date', 'scan date', 'detected'])
|
|
1721
|
+
if timestamp_col:
|
|
1722
|
+
print(f"Found timestamp column: {timestamp_col}")
|
|
1723
|
+
# Try multiple datetime formats
|
|
1724
|
+
for fmt in ["%m/%d/%Y %H:%M", "%Y-%m-%d %H:%M:%S", "%m/%d/%Y", "%Y-%m-%d", None]:
|
|
1725
|
+
try:
|
|
1726
|
+
if fmt:
|
|
1727
|
+
telem_dat["time_stamp"] = pd.to_datetime(telem_dat[timestamp_col], format=fmt, errors="coerce")
|
|
1728
|
+
else:
|
|
1729
|
+
telem_dat["time_stamp"] = pd.to_datetime(telem_dat[timestamp_col], errors="coerce")
|
|
1730
|
+
|
|
1731
|
+
# Check if parsing was successful
|
|
1732
|
+
if not telem_dat["time_stamp"].isna().all():
|
|
1733
|
+
print(f"Successfully parsed timestamps using format: {fmt or 'auto-detect'}")
|
|
1734
|
+
break
|
|
1735
|
+
except (ValueError, TypeError) as e:
|
|
1736
|
+
continue
|
|
1737
|
+
else:
|
|
1738
|
+
raise ValueError("Could not find timestamp column")
|
|
1739
|
+
|
|
1740
|
+
# Find tag ID columns dynamically
|
|
1741
|
+
telem_dat["freq_code"] = telem_dat['Tag1Hex'].astype(str).str.strip()
|
|
1742
|
+
|
|
1743
|
+
# hex_tag_col = find_column_by_patterns(telem_dat, ['hex', 'tag1hex', 'tag id', 'tagid', 'tag'])
|
|
1744
|
+
# dec_tag_col = find_column_by_patterns(telem_dat, ['dec', 'tag1dec', 'decimal'])
|
|
1745
|
+
|
|
1746
|
+
# if hex_tag_col:
|
|
1747
|
+
# print(f"Found HEX tag column: {hex_tag_col}")
|
|
1748
|
+
# telem_dat["freq_code"] = telem_dat[hex_tag_col].astype(str).str.strip()
|
|
1749
|
+
# elif dec_tag_col:
|
|
1750
|
+
# print(f"Found DEC tag column: {dec_tag_col}")
|
|
1751
|
+
# telem_dat["freq_code"] = telem_dat[dec_tag_col].astype(str).str.strip()
|
|
1752
|
+
# else:
|
|
1753
|
+
# raise ValueError("Could not find tag ID column")
|
|
1754
|
+
|
|
1755
|
+
# Handle antenna mapping for multi-antenna files
|
|
1756
|
+
if is_multi_antenna:
|
|
1757
|
+
antenna_col = find_column_by_patterns(telem_dat, ['antenna', 'antennae', 'ant'])
|
|
1758
|
+
if antenna_col:
|
|
1759
|
+
print(f"Found antenna column: {antenna_col}")
|
|
1760
|
+
# Convert antenna column to integer and apply mapping
|
|
1761
|
+
telem_dat["antenna_clean"] = telem_dat[antenna_col].astype(str).str.extract(r'(\d+)')[0]
|
|
1762
|
+
telem_dat["antenna_clean"] = pd.to_numeric(telem_dat["antenna_clean"], errors='coerce').astype("Int64")
|
|
1763
|
+
telem_dat["rec_id"] = telem_dat["antenna_clean"].map(ant_to_rec_dict)
|
|
1764
|
+
# Drop rows where antenna values don't match known receivers
|
|
1765
|
+
telem_dat = telem_dat.dropna(subset=["rec_id"])
|
|
1766
|
+
else:
|
|
1767
|
+
raise ValueError("Multi-antenna mode requires antenna column, but none found")
|
|
1768
|
+
else:
|
|
1769
|
+
# Single antenna mode - use provided rec_id
|
|
1770
|
+
telem_dat["rec_id"] = rec_id
|
|
1771
|
+
|
|
1772
|
+
else:
|
|
1773
|
+
# Fixed-Width Format Parsing (original logic)
|
|
1774
|
+
|
|
1775
|
+
# Read header information for format detection
|
|
1776
|
+
with open(file_name, 'r') as file:
|
|
1777
|
+
header_lines = []
|
|
1778
|
+
for _ in range(max(skiprows, 10)):
|
|
1779
|
+
line = file.readline()
|
|
1780
|
+
if not line:
|
|
1781
|
+
break
|
|
1782
|
+
header_lines.append(line.rstrip('\n'))
|
|
1783
|
+
header_text = " ".join(header_lines).lower()
|
|
1784
|
+
|
|
1785
|
+
# Define colspecs for different fixed-width formats
|
|
1786
|
+
if 'latitude' in header_text or 'longitude' in header_text:
|
|
1787
|
+
colspecs = [(0, 12), (12, 26), (26, 41), (41, 56), (56, 59), (66, 70),
|
|
1788
|
+
(79, 95), (95, 112), (113, 120), (120, 131), (138, 145),
|
|
1789
|
+
(145, 155), (155, 166), (166, 175)]
|
|
1790
|
+
col_names = ["Scan Date", "Scan Time", "Download Date", "Download Time",
|
|
1791
|
+
"Reader ID", "Antenna ID", "HEX Tag ID", "DEC Tag ID",
|
|
1792
|
+
"Temperature_C", "Signal_mV", "Is Duplicate", "Latitude",
|
|
1793
|
+
"Longitude", "File Name"]
|
|
1794
|
+
print("Using format with latitude/longitude")
|
|
1795
|
+
else:
|
|
1796
|
+
colspecs = [(0, 12), (12, 26), (26, 41), (41, 56), (56, 62), (62, 73),
|
|
1797
|
+
(73, 89), (89, 107), (107, 122), (122, 132), (136, 136)]
|
|
1798
|
+
col_names = ["Scan Date", "Scan Time", "Download Date", "Download Time",
|
|
1799
|
+
"S/N", "Reader ID", "HEX Tag ID", "DEC Tag ID",
|
|
1800
|
+
"Temperature_C", "Signal_mV", "Is Duplicate"]
|
|
1801
|
+
print("Using format without latitude/longitude")
|
|
1802
|
+
|
|
1803
|
+
# Try different encodings if UTF-8 fails
|
|
1804
|
+
encodings_to_try = ['utf-8', 'latin-1', 'cp1252', 'iso-8859-1']
|
|
1805
|
+
telem_dat = None
|
|
1806
|
+
|
|
1807
|
+
for encoding in encodings_to_try:
|
|
1808
|
+
try:
|
|
1809
|
+
print(f"Attempting to read file with encoding: {encoding}")
|
|
1810
|
+
telem_dat = pd.read_fwf(
|
|
1811
|
+
file_name,
|
|
1812
|
+
colspecs=colspecs,
|
|
1813
|
+
names=col_names,
|
|
1814
|
+
skiprows=skiprows,
|
|
1815
|
+
encoding=encoding
|
|
1816
|
+
)
|
|
1817
|
+
print(f"Successfully read with encoding: {encoding}")
|
|
1818
|
+
break
|
|
1819
|
+
except UnicodeDecodeError:
|
|
1820
|
+
print(f"Failed with {encoding}, trying next...")
|
|
1821
|
+
continue
|
|
1822
|
+
|
|
1823
|
+
if telem_dat is None:
|
|
1824
|
+
raise ValueError(f"Could not read file with any supported encoding: {encodings_to_try}")
|
|
1825
|
+
|
|
1826
|
+
print(f"Fixed-width parsing complete. Shape: {telem_dat.shape}")
|
|
1827
|
+
|
|
1828
|
+
# Build timestamp from Scan Date + Scan Time
|
|
1829
|
+
telem_dat["time_stamp"] = pd.to_datetime(
|
|
1830
|
+
telem_dat["Scan Date"] + " " + telem_dat["Scan Time"],
|
|
1831
|
+
errors="coerce"
|
|
1832
|
+
)
|
|
1833
|
+
|
|
1834
|
+
# Use HEX Tag ID as freq_code
|
|
1835
|
+
telem_dat["freq_code"] = telem_dat["HEX Tag ID"].str.strip()
|
|
1836
|
+
|
|
1837
|
+
# For fixed-width, assign rec_id or map antennas if multi-antenna mapping provided
|
|
1838
|
+
if ant_to_rec_dict is None:
|
|
1839
|
+
telem_dat["rec_id"] = rec_id
|
|
1840
|
+
else:
|
|
1841
|
+
# try to find an antenna column in the fixed-width frame
|
|
1842
|
+
antenna_col = None
|
|
1843
|
+
for col in telem_dat.columns:
|
|
1844
|
+
col_lower = str(col).lower().strip()
|
|
1845
|
+
if col_lower in (
|
|
1846
|
+
'antenna id',
|
|
1847
|
+
'antenna',
|
|
1848
|
+
'ant',
|
|
1849
|
+
'antennae',
|
|
1850
|
+
'antennae id',
|
|
1851
|
+
'reader id',
|
|
1852
|
+
'readerid',
|
|
1853
|
+
):
|
|
1854
|
+
antenna_col = col
|
|
1855
|
+
break
|
|
1856
|
+
|
|
1857
|
+
if antenna_col is not None:
|
|
1858
|
+
# extract numeric antenna identifier and map using provided dictionary
|
|
1859
|
+
telem_dat['antenna_raw'] = telem_dat[antenna_col].astype(str).str.strip()
|
|
1860
|
+
# Try numeric extraction first, then fall back to raw string mapping
|
|
1861
|
+
telem_dat['antenna_num'] = telem_dat['antenna_raw'].str.extract(r'(\d+)')[0]
|
|
1862
|
+
telem_dat['antenna_num'] = pd.to_numeric(telem_dat['antenna_num'], errors='coerce')
|
|
1863
|
+
|
|
1864
|
+
# Prepare mapping dict keys as strings and ints for robust lookup
|
|
1865
|
+
ant_map = {}
|
|
1866
|
+
for k, v in ant_to_rec_dict.items():
|
|
1867
|
+
key_str = str(k).strip()
|
|
1868
|
+
if key_str.isdigit():
|
|
1869
|
+
ant_map[int(key_str)] = v
|
|
1870
|
+
ant_map[key_str] = v
|
|
1871
|
+
|
|
1872
|
+
# Map by numeric antenna if possible, else by raw string
|
|
1873
|
+
telem_dat['rec_id'] = telem_dat['antenna_num'].map(ant_map)
|
|
1874
|
+
missing_mask = telem_dat['rec_id'].isna()
|
|
1875
|
+
if missing_mask.any():
|
|
1876
|
+
# try mapping by raw string for missing ones
|
|
1877
|
+
telem_dat.loc[missing_mask, 'rec_id'] = telem_dat.loc[missing_mask, 'antenna_raw'].map(ant_map)
|
|
1878
|
+
|
|
1879
|
+
# report mapping summary for debugging
|
|
1880
|
+
unique_antennas = telem_dat['antenna_raw'].unique()[:20]
|
|
1881
|
+
print('Detected antenna values (sample):', unique_antennas)
|
|
1882
|
+
mapped_counts = telem_dat['rec_id'].notna().sum()
|
|
1883
|
+
print(f'Mapped {mapped_counts} / {len(telem_dat)} rows to receivers via ant_to_rec_dict')
|
|
1884
|
+
|
|
1885
|
+
# drop detections that do not map to a known receiver
|
|
1886
|
+
telem_dat = telem_dat.dropna(subset=['rec_id'])
|
|
1887
|
+
else:
|
|
1888
|
+
raise ValueError(
|
|
1889
|
+
'Multi-antenna fixed-width PIT file requires an antenna/reader column '
|
|
1890
|
+
'(e.g., "Antenna ID" or "Reader ID"), but none was found'
|
|
1891
|
+
)
|
|
1892
|
+
|
|
1893
|
+
# Data cleaning - remove invalid entries
|
|
1894
|
+
print(f"\nCleaning data - original records: {len(telem_dat)}")
|
|
1895
|
+
|
|
1896
|
+
before_cleanup = len(telem_dat)
|
|
1897
|
+
|
|
1898
|
+
# Remove header artifacts
|
|
1899
|
+
header_patterns = ['HEX Tag ID', 'DEC Tag ID', '----', '====', 'Tag ID', 'Scan Date']
|
|
1900
|
+
for pattern in header_patterns:
|
|
1901
|
+
telem_dat = telem_dat[telem_dat['freq_code'] != pattern]
|
|
1902
|
+
|
|
1903
|
+
# Remove separator lines
|
|
1904
|
+
telem_dat = telem_dat[~telem_dat['freq_code'].str.match(r'^-+$', na=False)]
|
|
1905
|
+
|
|
1906
|
+
# Remove rows with invalid timestamps
|
|
1907
|
+
telem_dat = telem_dat[~telem_dat['time_stamp'].isna()]
|
|
1908
|
+
|
|
1909
|
+
# Remove rows with invalid freq_codes
|
|
1910
|
+
telem_dat = telem_dat[telem_dat['freq_code'].str.len() > 3]
|
|
1911
|
+
telem_dat = telem_dat[~telem_dat['freq_code'].isna()]
|
|
1912
|
+
|
|
1913
|
+
# Finalize fields and append to HDF5 /raw_data
|
|
1914
|
+
if len(telem_dat) == 0:
|
|
1915
|
+
print('No valid PIT rows after cleaning; nothing to append')
|
|
1916
|
+
return
|
|
1917
|
+
|
|
1918
|
+
if 'power' not in telem_dat.columns:
|
|
1919
|
+
telem_dat['power'] = np.nan
|
|
1920
|
+
|
|
1921
|
+
# compute epoch as int64 seconds and other derived fields
|
|
1922
|
+
telem_dat['epoch'] = (pd.to_datetime(telem_dat['time_stamp']).astype('int64') // 10**9).astype('int64')
|
|
1923
|
+
telem_dat['channels'] = np.repeat(channels, len(telem_dat))
|
|
1924
|
+
telem_dat['scan_time'] = np.repeat(scan_time, len(telem_dat))
|
|
1925
|
+
telem_dat['rec_type'] = np.repeat(rec_type, len(telem_dat))
|
|
1926
|
+
|
|
1927
|
+
# compute noise ratio if study_tags provided
|
|
1928
|
+
try:
|
|
1929
|
+
telem_dat['noise_ratio'] = predictors.noise_ratio(
|
|
1930
|
+
5.0,
|
|
1931
|
+
telem_dat.freq_code.values,
|
|
1932
|
+
telem_dat.epoch.values,
|
|
1933
|
+
study_tags
|
|
1934
|
+
)
|
|
1935
|
+
except (ValueError, TypeError, KeyError, IndexError) as e:
|
|
1936
|
+
raise ValueError(f"Failed to compute noise_ratio for PIT data: {e}") from e
|
|
1937
|
+
|
|
1938
|
+
# ensure dtypes
|
|
1939
|
+
telem_dat = telem_dat.astype({'time_stamp': 'datetime64[ns]',
|
|
1940
|
+
'epoch': 'int64',
|
|
1941
|
+
'freq_code': 'object',
|
|
1942
|
+
'power': 'float32',
|
|
1943
|
+
'rec_id': 'object',
|
|
1944
|
+
'rec_type': 'object',
|
|
1945
|
+
'scan_time': 'float32',
|
|
1946
|
+
'channels': 'int32',
|
|
1947
|
+
'noise_ratio': 'float32'})
|
|
1948
|
+
|
|
1949
|
+
# reorder columns to match expected schema
|
|
1950
|
+
cols = ['time_stamp', 'epoch', 'freq_code', 'power', 'noise_ratio', 'scan_time', 'channels', 'rec_id', 'rec_type']
|
|
1951
|
+
cols_existing = [c for c in cols if c in telem_dat.columns]
|
|
1952
|
+
|
|
1953
|
+
_append_raw_data(db_dir, telem_dat[cols_existing], data_columns=True)
|
|
1954
|
+
with pd.HDFStore(db_dir, mode='a') as store:
|
|
1955
|
+
print('Store keys after append:', store.keys())
|
|
1956
|
+
|
|
1957
|
+
|
|
1958
|
+
def PIT_Multiple(
|
|
1959
|
+
file_name,
|
|
1960
|
+
db_dir,
|
|
1961
|
+
study_tags=None,
|
|
1962
|
+
skiprows=0,
|
|
1963
|
+
scan_time=0,
|
|
1964
|
+
channels=0,
|
|
1965
|
+
rec_type="PIT_Multiple",
|
|
1966
|
+
ant_to_rec_dict=None
|
|
1967
|
+
):
|
|
1968
|
+
"""
|
|
1969
|
+
Import multi-antenna PIT array data into MAST HDF5 database.
|
|
1970
|
+
|
|
1971
|
+
Parses detection files from PIT reader arrays with multiple antennas at
|
|
1972
|
+
a single location. Handles antenna-to-receiver mapping and converts
|
|
1973
|
+
multi-antenna detections to individual receiver records.
|
|
1974
|
+
|
|
1975
|
+
Parameters
|
|
1976
|
+
----------
|
|
1977
|
+
file_name : str
|
|
1978
|
+
Absolute path to PIT array CSV file
|
|
1979
|
+
db_dir : str
|
|
1980
|
+
Absolute path to project HDF5 database
|
|
1981
|
+
study_tags : list of str, optional
|
|
1982
|
+
List of valid PIT tag IDs deployed in study
|
|
1983
|
+
skiprows : int, optional
|
|
1984
|
+
Number of header rows to skip (default: 0)
|
|
1985
|
+
scan_time : float, optional
|
|
1986
|
+
Not used for PIT readers (default: 0)
|
|
1987
|
+
channels : int, optional
|
|
1988
|
+
Not used for PIT readers (default: 0)
|
|
1989
|
+
rec_type : str, optional
|
|
1990
|
+
Reader type identifier (default: 'PIT_Multiple')
|
|
1991
|
+
ant_to_rec_dict : dict, optional
|
|
1992
|
+
Mapping of antenna IDs to receiver IDs (REQUIRED for multi-antenna arrays)
|
|
1993
|
+
|
|
1994
|
+
Returns
|
|
1995
|
+
-------
|
|
1996
|
+
None
|
|
1997
|
+
Data appended directly to HDF5 `/raw_data` table
|
|
1998
|
+
|
|
1999
|
+
Notes
|
|
2000
|
+
-----
|
|
2001
|
+
- Designed for PIT arrays with multiple antennas at single location
|
|
2002
|
+
- Uses ant_to_rec_dict to assign detections to virtual "receivers" per antenna
|
|
2003
|
+
- Processes fish metadata (species, weight, length, capture method)
|
|
2004
|
+
- Handles both decimal and hexadecimal tag formats
|
|
2005
|
+
|
|
2006
|
+
Examples
|
|
2007
|
+
--------
|
|
2008
|
+
>>> ant_map = {
|
|
2009
|
+
... 'Antenna1': 'PIT_WEIR_DOWNSTREAM',
|
|
2010
|
+
... 'Antenna2': 'PIT_WEIR_UPSTREAM',
|
|
2011
|
+
... 'Antenna3': 'PIT_WEIR_LADDER'
|
|
2012
|
+
... }
|
|
2013
|
+
>>> parsers.PIT_Multiple(
|
|
2014
|
+
... file_name='C:/data/pit_array_detections.csv',
|
|
2015
|
+
... db_dir='C:/project/pit_study.h5',
|
|
2016
|
+
... study_tags=['3D9.1BF3C5A8B2'],
|
|
2017
|
+
... rec_type='PIT_Multiple',
|
|
2018
|
+
... ant_to_rec_dict=ant_map
|
|
2019
|
+
... )
|
|
2020
|
+
|
|
2021
|
+
See Also
|
|
2022
|
+
--------
|
|
2023
|
+
PIT : Parser for single PIT readers
|
|
2024
|
+
"""
|
|
2025
|
+
# Define column names based on the expected structure of the CSV
|
|
2026
|
+
col_names = [
|
|
2027
|
+
"FishId", "Tag1Dec", "Tag1Hex", "Tag2Dec", "Tag2Hex", "FloyTag", "RadioTag",
|
|
2028
|
+
"Location", "Source", "FishSpecies", "TimeStamp", "Weight", "Length",
|
|
2029
|
+
"Antennae", "Latitude", "Longitude", "SampleDate", "CaptureMethod",
|
|
2030
|
+
"LocationDetail", "Type", "Recapture", "Sex", "GeneticSampleID", "Comments"
|
|
2031
|
+
]
|
|
2032
|
+
|
|
2033
|
+
# Read the CSV into a DataFrame, skipping rows if needed
|
|
2034
|
+
telem_dat = pd.read_csv(file_name, names=col_names, header=0, skiprows=skiprows, dtype=str)
|
|
2035
|
+
|
|
2036
|
+
mode_str = "multi-antenna"
|
|
2037
|
+
if ant_to_rec_dict is None:
|
|
2038
|
+
raise ValueError("ant_to_rec_dict is required for PIT_Multiple")
|
|
2039
|
+
|
|
2040
|
+
# Convert "TimeStamp" to datetime with explicit format
|
|
2041
|
+
telem_dat["time_stamp"] = pd.to_datetime(telem_dat["TimeStamp"], format="%m/%d/%Y %H:%M", errors="coerce")
|
|
2042
|
+
|
|
2043
|
+
# Ensure "Tag1Dec" and "Tag1Hex" are treated as strings (avoid scientific notation issues)
|
|
2044
|
+
telem_dat["Tag1Dec"] = telem_dat["Tag1Dec"].astype(str)
|
|
2045
|
+
telem_dat["Tag1Hex"] = telem_dat["Tag1Hex"].astype(str)
|
|
2046
|
+
|
|
2047
|
+
telem_dat["freq_code"] = telem_dat["Tag1Hex"].astype(str).str.strip()
|
|
2048
|
+
antenna_raw = telem_dat["Antennae"].astype(str).str.strip()
|
|
2049
|
+
antenna_num = pd.to_numeric(antenna_raw.str.extract(r"(\d+)")[0], errors="coerce")
|
|
2050
|
+
rec_id = antenna_num.map(ant_to_rec_dict)
|
|
2051
|
+
if rec_id.isna().any():
|
|
2052
|
+
rec_id = rec_id.fillna(antenna_raw.map(ant_to_rec_dict))
|
|
2053
|
+
telem_dat["rec_id"] = rec_id
|
|
2054
|
+
telem_dat = telem_dat.dropna(subset=["rec_id"])
|
|
2055
|
+
|
|
2056
|
+
# if after_cleanup == 0:
|
|
2057
|
+
# raise ValueError(f"No valid records found in {file_name}")
|
|
2058
|
+
|
|
2059
|
+
# Standardize columns
|
|
2060
|
+
telem_dat["power"] = 0.0
|
|
2061
|
+
telem_dat["noise_ratio"] = 0.0
|
|
2062
|
+
telem_dat["scan_time"] = scan_time
|
|
2063
|
+
telem_dat["channels"] = channels
|
|
2064
|
+
telem_dat["rec_type"] = rec_type
|
|
2065
|
+
|
|
2066
|
+
# Calculate epoch time (seconds since 1970-01-01) as int64
|
|
2067
|
+
# Use integer seconds to avoid float32 precision loss for large epoch values
|
|
2068
|
+
# Ensure time_stamp has no NaT rows before converting
|
|
2069
|
+
telem_dat = telem_dat[~telem_dat["time_stamp"].isna()].copy()
|
|
2070
|
+
telem_dat["epoch"] = telem_dat["time_stamp"].astype('int64') // 10**9
|
|
2071
|
+
|
|
2072
|
+
# Convert to standard data types
|
|
2073
|
+
telem_dat = telem_dat.astype({
|
|
2074
|
+
"power": "float32",
|
|
2075
|
+
"freq_code": "object",
|
|
2076
|
+
"time_stamp": "datetime64[ns]",
|
|
2077
|
+
"scan_time": "float32",
|
|
2078
|
+
"channels": "int32",
|
|
2079
|
+
"rec_type": "object",
|
|
2080
|
+
"epoch": "int64",
|
|
2081
|
+
"noise_ratio": "float32",
|
|
2082
|
+
"rec_id": "object"
|
|
2083
|
+
})
|
|
2084
|
+
|
|
2085
|
+
# Keep only standard columns
|
|
2086
|
+
telem_dat = telem_dat[
|
|
2087
|
+
["power", "time_stamp", "epoch", "freq_code", "noise_ratio",
|
|
2088
|
+
"scan_time", "channels", "rec_id", "rec_type"]
|
|
2089
|
+
]
|
|
2090
|
+
|
|
2091
|
+
# Append to HDF5 store
|
|
2092
|
+
with pd.HDFStore(db_dir, mode='a') as store:
|
|
2093
|
+
store.append(
|
|
2094
|
+
key="raw_data",
|
|
2095
|
+
value=telem_dat,
|
|
2096
|
+
format="table",
|
|
2097
|
+
index=False,
|
|
2098
|
+
min_itemsize={"freq_code": 20, "rec_type": 20, "rec_id": 20},
|
|
2099
|
+
append=True,
|
|
2100
|
+
chunksize=1000000,
|
|
2101
|
+
data_columns=True
|
|
2102
|
+
)
|
|
2103
|
+
|
|
2104
|
+
print(f"\nSuccessfully parsed {file_name} and appended to {db_dir}!")
|
|
2105
|
+
print(f"Imported {len(telem_dat)} records in {mode_str} mode")
|
|
2106
|
+
|
|
2107
|
+
with pd.HDFStore(db_dir, 'r') as store:
|
|
2108
|
+
print("Store keys after append:", store.keys())
|
|
2109
|
+
|
|
2110
|
+
|
|
2111
|
+
|
|
2112
|
+
|
|
2113
|
+
|
|
2114
|
+
|
|
1231
2115
|
|
|
1232
|
-
|