pymast 0.0.6__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pymast/__init__.py +31 -2
- pymast/fish_history.py +59 -6
- pymast/formatter.py +886 -548
- pymast/logger.py +58 -0
- pymast/naive_bayes.py +116 -9
- pymast/overlap_removal.py +2327 -490
- pymast/parsers.py +1091 -208
- pymast/predictors.py +302 -116
- pymast/radio_project.py +1382 -512
- pymast/validation.py +224 -0
- pymast-1.0.1.dist-info/METADATA +636 -0
- pymast-1.0.1.dist-info/RECORD +15 -0
- {pymast-0.0.6.dist-info → pymast-1.0.1.dist-info}/WHEEL +1 -1
- pymast/table_merge.py +0 -154
- pymast-0.0.6.dist-info/METADATA +0 -19
- pymast-0.0.6.dist-info/RECORD +0 -14
- {pymast-0.0.6.dist-info → pymast-1.0.1.dist-info/licenses}/LICENSE.txt +0 -0
- {pymast-0.0.6.dist-info → pymast-1.0.1.dist-info}/top_level.txt +0 -0
pymast/logger.py
ADDED
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Logging configuration for MAST
|
|
3
|
+
"""
|
|
4
|
+
import logging
|
|
5
|
+
import sys
|
|
6
|
+
|
|
7
|
+
def setup_logging(level=logging.INFO, log_file=None):
|
|
8
|
+
"""
|
|
9
|
+
Configure logging for MAST project.
|
|
10
|
+
|
|
11
|
+
Parameters
|
|
12
|
+
----------
|
|
13
|
+
level : int
|
|
14
|
+
Logging level (default: logging.INFO)
|
|
15
|
+
log_file : str, optional
|
|
16
|
+
Path to log file. If None, logs only to console.
|
|
17
|
+
|
|
18
|
+
Returns
|
|
19
|
+
-------
|
|
20
|
+
logger : logging.Logger
|
|
21
|
+
Configured logger instance
|
|
22
|
+
|
|
23
|
+
Examples
|
|
24
|
+
--------
|
|
25
|
+
>>> from pymast.logger import setup_logging
|
|
26
|
+
>>> logger = setup_logging(level=logging.DEBUG)
|
|
27
|
+
>>> logger.info("Starting analysis...")
|
|
28
|
+
"""
|
|
29
|
+
# Create logger
|
|
30
|
+
logger = logging.getLogger('pymast')
|
|
31
|
+
logger.setLevel(level)
|
|
32
|
+
|
|
33
|
+
# Remove existing handlers
|
|
34
|
+
logger.handlers = []
|
|
35
|
+
|
|
36
|
+
# Create formatter
|
|
37
|
+
formatter = logging.Formatter(
|
|
38
|
+
'%(asctime)s - %(name)s - %(levelname)s - %(message)s',
|
|
39
|
+
datefmt='%Y-%m-%d %H:%M:%S'
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
# Console handler
|
|
43
|
+
console_handler = logging.StreamHandler(sys.stdout)
|
|
44
|
+
console_handler.setLevel(level)
|
|
45
|
+
console_handler.setFormatter(formatter)
|
|
46
|
+
logger.addHandler(console_handler)
|
|
47
|
+
|
|
48
|
+
# File handler if specified
|
|
49
|
+
if log_file:
|
|
50
|
+
file_handler = logging.FileHandler(log_file)
|
|
51
|
+
file_handler.setLevel(level)
|
|
52
|
+
file_handler.setFormatter(formatter)
|
|
53
|
+
logger.addHandler(file_handler)
|
|
54
|
+
|
|
55
|
+
return logger
|
|
56
|
+
|
|
57
|
+
# Default logger
|
|
58
|
+
logger = logging.getLogger('pymast')
|
pymast/naive_bayes.py
CHANGED
|
@@ -1,9 +1,65 @@
|
|
|
1
1
|
# -*- coding: utf-8 -*-
|
|
2
2
|
"""
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
3
|
+
Naive Bayes classifier for distinguishing true fish detections from noise.
|
|
4
|
+
|
|
5
|
+
This module implements a custom Naive Bayes classifier tailored for radio telemetry
|
|
6
|
+
data. It uses multiple predictor variables (signal power, lag differences, noise ratio,
|
|
7
|
+
etc.) to classify detections as true fish signals versus environmental noise or
|
|
8
|
+
spurious transmissions.
|
|
9
|
+
|
|
10
|
+
Classification Workflow
|
|
11
|
+
-----------------------
|
|
12
|
+
1. **Training**: Calculate priors and likelihoods from hand-labeled data
|
|
13
|
+
2. **Testing**: Apply trained classifier to unlabeled detections
|
|
14
|
+
3. **Binning**: Discretize continuous predictors for probability calculations
|
|
15
|
+
4. **Posterior**: Combine priors and likelihoods via Bayes' theorem
|
|
16
|
+
5. **Threshold**: Classify based on posterior ratio (adjustable threshold)
|
|
17
|
+
|
|
18
|
+
Predictor Variables
|
|
19
|
+
-------------------
|
|
20
|
+
- **hit_ratio**: Proportion of detections matching expected pulse intervals
|
|
21
|
+
- **power**: Signal strength (dB or raw power)
|
|
22
|
+
- **lag_diff**: Variability in time between consecutive detections
|
|
23
|
+
- **cons_length**: Maximum contiguous sequence of expected detections
|
|
24
|
+
- **noise_ratio**: Ratio of miscoded to total detections in time window
|
|
25
|
+
|
|
26
|
+
Typical Usage
|
|
27
|
+
-------------
|
|
28
|
+
>>> import pymast.naive_bayes as nb
|
|
29
|
+
>>>
|
|
30
|
+
>>> # Calculate priors from labeled training data
|
|
31
|
+
>>> priors = nb.calculate_priors(labeled_truth_array)
|
|
32
|
+
>>>
|
|
33
|
+
>>> # Calculate likelihoods for each predictor
|
|
34
|
+
>>> likelihood_true = nb.calculate_likelihood(
|
|
35
|
+
... training_obs=training_power,
|
|
36
|
+
... labeled_array=labeled_truth,
|
|
37
|
+
... assumption=True,
|
|
38
|
+
... classification_obs=test_power,
|
|
39
|
+
... laplace=1
|
|
40
|
+
... )
|
|
41
|
+
>>>
|
|
42
|
+
>>> # Calculate posterior and classify
|
|
43
|
+
>>> posterior_true = nb.calculate_posterior(
|
|
44
|
+
... priors, evidence, likelihoods_dict, assumption=True
|
|
45
|
+
... )
|
|
46
|
+
>>> classifications = nb.classify_with_threshold(
|
|
47
|
+
... posterior_true, posterior_false, threshold_ratio=1.0
|
|
48
|
+
... )
|
|
49
|
+
|
|
50
|
+
Notes
|
|
51
|
+
-----
|
|
52
|
+
- Uses Laplace smoothing (add-one) to handle unseen predictor values
|
|
53
|
+
- Predictors are binned into discrete categories before classification
|
|
54
|
+
- Threshold ratio allows precision/recall tradeoff (default: 1.0 = MAP)
|
|
55
|
+
- Assumes conditional independence between predictors (Naive Bayes assumption)
|
|
56
|
+
|
|
57
|
+
See Also
|
|
58
|
+
--------
|
|
59
|
+
predictors : Calculation of predictor variables
|
|
60
|
+
radio_project : Project management and data storage
|
|
6
61
|
"""
|
|
62
|
+
|
|
7
63
|
import numpy as np
|
|
8
64
|
|
|
9
65
|
def calculate_priors(labeled_array):
|
|
@@ -132,7 +188,56 @@ def classify_with_threshold(posterior_true, posterior_false, threshold_ratio=1.0
|
|
|
132
188
|
return classification
|
|
133
189
|
|
|
134
190
|
def bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio):
|
|
135
|
-
|
|
191
|
+
"""
|
|
192
|
+
Bin continuous predictor variables into discrete categories for Naive Bayes.
|
|
193
|
+
|
|
194
|
+
Converts continuous predictor values into discrete bins for probability
|
|
195
|
+
calculations. Binning allows Naive Bayes to estimate likelihoods from
|
|
196
|
+
limited training data.
|
|
197
|
+
|
|
198
|
+
Parameters
|
|
199
|
+
----------
|
|
200
|
+
hit_ratio : array_like
|
|
201
|
+
Proportion of detections matching expected pulse intervals (0.0 to 1.0)
|
|
202
|
+
power : array_like
|
|
203
|
+
Signal power values (dB or raw)
|
|
204
|
+
lag_diff : array_like
|
|
205
|
+
Differences in lag times between consecutive detections (seconds)
|
|
206
|
+
cons_length : array_like
|
|
207
|
+
Maximum contiguous sequence of expected detections (1 to 11)
|
|
208
|
+
noise_ratio : array_like
|
|
209
|
+
Ratio of miscoded to total detections (0.0 to 1.0)
|
|
210
|
+
|
|
211
|
+
Returns
|
|
212
|
+
-------
|
|
213
|
+
tuple of numpy.ndarray
|
|
214
|
+
(hit_ratio_count, power_count, lag_count, con_len_count, noise_count)
|
|
215
|
+
Each array contains bin indices for corresponding input values
|
|
216
|
+
|
|
217
|
+
Notes
|
|
218
|
+
-----
|
|
219
|
+
Binning strategies:
|
|
220
|
+
- hit_ratio: 11 bins from 0.0 to 1.0 (0.1 increments)
|
|
221
|
+
- power: 10 dB bins from min to max (rounded to nearest 5 dB)
|
|
222
|
+
- lag_diff: 20-second bins from -100 to 110 seconds
|
|
223
|
+
- cons_length: 1-unit bins from 1 to 12
|
|
224
|
+
- noise_ratio: 0.1 increment bins from 0.0 to 1.0
|
|
225
|
+
|
|
226
|
+
Examples
|
|
227
|
+
--------
|
|
228
|
+
>>> hit_ratio = np.array([0.25, 0.75, 0.95])
|
|
229
|
+
>>> power = np.array([100, 120, 140])
|
|
230
|
+
>>> lag_diff = np.array([-10, 0, 10])
|
|
231
|
+
>>> cons_length = np.array([3, 5, 8])
|
|
232
|
+
>>> noise_ratio = np.array([0.05, 0.15, 0.25])
|
|
233
|
+
>>> nb.bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio)
|
|
234
|
+
(array([3, 8, 10]), array([1, 3, 5]), array([5, 6, 6]), array([3, 5, 8]), array([1, 2, 3]))
|
|
235
|
+
|
|
236
|
+
See Also
|
|
237
|
+
--------
|
|
238
|
+
calculate_likelihood : Uses binned data for probability calculations
|
|
239
|
+
"""
|
|
240
|
+
# bin numerical predictors for classification
|
|
136
241
|
# define bins for analysis
|
|
137
242
|
|
|
138
243
|
# hit ratio bins
|
|
@@ -141,10 +246,12 @@ def bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio):
|
|
|
141
246
|
# plot signal power histograms by detection class
|
|
142
247
|
min_power = power.min()//5 * 5
|
|
143
248
|
max_power = power.max()//5 * 5
|
|
144
|
-
try:
|
|
145
|
-
power_bins =np.arange(min_power,max_power+20,10)
|
|
146
|
-
except:
|
|
147
|
-
|
|
249
|
+
try:
|
|
250
|
+
power_bins = np.arange(min_power, max_power + 20, 10)
|
|
251
|
+
except (TypeError, ValueError) as e:
|
|
252
|
+
raise ValueError(
|
|
253
|
+
f"Error creating power bins: {e}. Check that power values are valid."
|
|
254
|
+
) from e
|
|
148
255
|
|
|
149
256
|
# Lag Back Differences - how steady are detection lags?
|
|
150
257
|
lag_bins =np.arange(-100,110,20)
|
|
@@ -162,4 +269,4 @@ def bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio):
|
|
|
162
269
|
con_len_count = np.digitize(cons_length, con_length_bins)
|
|
163
270
|
noise_count = np.digitize(noise_ratio, noise_bins)
|
|
164
271
|
|
|
165
|
-
return hit_ratio_count, power_count, lag_count, con_len_count, noise_count
|
|
272
|
+
return hit_ratio_count, power_count, lag_count, con_len_count, noise_count
|