pymast 0.0.6__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
pymast/logger.py ADDED
@@ -0,0 +1,58 @@
1
+ """
2
+ Logging configuration for MAST
3
+ """
4
+ import logging
5
+ import sys
6
+
7
+ def setup_logging(level=logging.INFO, log_file=None):
8
+ """
9
+ Configure logging for MAST project.
10
+
11
+ Parameters
12
+ ----------
13
+ level : int
14
+ Logging level (default: logging.INFO)
15
+ log_file : str, optional
16
+ Path to log file. If None, logs only to console.
17
+
18
+ Returns
19
+ -------
20
+ logger : logging.Logger
21
+ Configured logger instance
22
+
23
+ Examples
24
+ --------
25
+ >>> from pymast.logger import setup_logging
26
+ >>> logger = setup_logging(level=logging.DEBUG)
27
+ >>> logger.info("Starting analysis...")
28
+ """
29
+ # Create logger
30
+ logger = logging.getLogger('pymast')
31
+ logger.setLevel(level)
32
+
33
+ # Remove existing handlers
34
+ logger.handlers = []
35
+
36
+ # Create formatter
37
+ formatter = logging.Formatter(
38
+ '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
39
+ datefmt='%Y-%m-%d %H:%M:%S'
40
+ )
41
+
42
+ # Console handler
43
+ console_handler = logging.StreamHandler(sys.stdout)
44
+ console_handler.setLevel(level)
45
+ console_handler.setFormatter(formatter)
46
+ logger.addHandler(console_handler)
47
+
48
+ # File handler if specified
49
+ if log_file:
50
+ file_handler = logging.FileHandler(log_file)
51
+ file_handler.setLevel(level)
52
+ file_handler.setFormatter(formatter)
53
+ logger.addHandler(file_handler)
54
+
55
+ return logger
56
+
57
+ # Default logger
58
+ logger = logging.getLogger('pymast')
pymast/naive_bayes.py CHANGED
@@ -1,9 +1,65 @@
1
1
  # -*- coding: utf-8 -*-
2
2
  """
3
- Created on Wed Nov 15 21:50:12 2023
4
-
5
- @author: KNebiolo
3
+ Naive Bayes classifier for distinguishing true fish detections from noise.
4
+
5
+ This module implements a custom Naive Bayes classifier tailored for radio telemetry
6
+ data. It uses multiple predictor variables (signal power, lag differences, noise ratio,
7
+ etc.) to classify detections as true fish signals versus environmental noise or
8
+ spurious transmissions.
9
+
10
+ Classification Workflow
11
+ -----------------------
12
+ 1. **Training**: Calculate priors and likelihoods from hand-labeled data
13
+ 2. **Testing**: Apply trained classifier to unlabeled detections
14
+ 3. **Binning**: Discretize continuous predictors for probability calculations
15
+ 4. **Posterior**: Combine priors and likelihoods via Bayes' theorem
16
+ 5. **Threshold**: Classify based on posterior ratio (adjustable threshold)
17
+
18
+ Predictor Variables
19
+ -------------------
20
+ - **hit_ratio**: Proportion of detections matching expected pulse intervals
21
+ - **power**: Signal strength (dB or raw power)
22
+ - **lag_diff**: Variability in time between consecutive detections
23
+ - **cons_length**: Maximum contiguous sequence of expected detections
24
+ - **noise_ratio**: Ratio of miscoded to total detections in time window
25
+
26
+ Typical Usage
27
+ -------------
28
+ >>> import pymast.naive_bayes as nb
29
+ >>>
30
+ >>> # Calculate priors from labeled training data
31
+ >>> priors = nb.calculate_priors(labeled_truth_array)
32
+ >>>
33
+ >>> # Calculate likelihoods for each predictor
34
+ >>> likelihood_true = nb.calculate_likelihood(
35
+ ... training_obs=training_power,
36
+ ... labeled_array=labeled_truth,
37
+ ... assumption=True,
38
+ ... classification_obs=test_power,
39
+ ... laplace=1
40
+ ... )
41
+ >>>
42
+ >>> # Calculate posterior and classify
43
+ >>> posterior_true = nb.calculate_posterior(
44
+ ... priors, evidence, likelihoods_dict, assumption=True
45
+ ... )
46
+ >>> classifications = nb.classify_with_threshold(
47
+ ... posterior_true, posterior_false, threshold_ratio=1.0
48
+ ... )
49
+
50
+ Notes
51
+ -----
52
+ - Uses Laplace smoothing (add-one) to handle unseen predictor values
53
+ - Predictors are binned into discrete categories before classification
54
+ - Threshold ratio allows precision/recall tradeoff (default: 1.0 = MAP)
55
+ - Assumes conditional independence between predictors (Naive Bayes assumption)
56
+
57
+ See Also
58
+ --------
59
+ predictors : Calculation of predictor variables
60
+ radio_project : Project management and data storage
6
61
  """
62
+
7
63
  import numpy as np
8
64
 
9
65
  def calculate_priors(labeled_array):
@@ -132,7 +188,56 @@ def classify_with_threshold(posterior_true, posterior_false, threshold_ratio=1.0
132
188
  return classification
133
189
 
134
190
  def bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio):
135
- 'bin numerical predictors for classification'
191
+ """
192
+ Bin continuous predictor variables into discrete categories for Naive Bayes.
193
+
194
+ Converts continuous predictor values into discrete bins for probability
195
+ calculations. Binning allows Naive Bayes to estimate likelihoods from
196
+ limited training data.
197
+
198
+ Parameters
199
+ ----------
200
+ hit_ratio : array_like
201
+ Proportion of detections matching expected pulse intervals (0.0 to 1.0)
202
+ power : array_like
203
+ Signal power values (dB or raw)
204
+ lag_diff : array_like
205
+ Differences in lag times between consecutive detections (seconds)
206
+ cons_length : array_like
207
+ Maximum contiguous sequence of expected detections (1 to 11)
208
+ noise_ratio : array_like
209
+ Ratio of miscoded to total detections (0.0 to 1.0)
210
+
211
+ Returns
212
+ -------
213
+ tuple of numpy.ndarray
214
+ (hit_ratio_count, power_count, lag_count, con_len_count, noise_count)
215
+ Each array contains bin indices for corresponding input values
216
+
217
+ Notes
218
+ -----
219
+ Binning strategies:
220
+ - hit_ratio: 11 bins from 0.0 to 1.0 (0.1 increments)
221
+ - power: 10 dB bins from min to max (rounded to nearest 5 dB)
222
+ - lag_diff: 20-second bins from -100 to 110 seconds
223
+ - cons_length: 1-unit bins from 1 to 12
224
+ - noise_ratio: 0.1 increment bins from 0.0 to 1.0
225
+
226
+ Examples
227
+ --------
228
+ >>> hit_ratio = np.array([0.25, 0.75, 0.95])
229
+ >>> power = np.array([100, 120, 140])
230
+ >>> lag_diff = np.array([-10, 0, 10])
231
+ >>> cons_length = np.array([3, 5, 8])
232
+ >>> noise_ratio = np.array([0.05, 0.15, 0.25])
233
+ >>> nb.bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio)
234
+ (array([3, 8, 10]), array([1, 3, 5]), array([5, 6, 6]), array([3, 5, 8]), array([1, 2, 3]))
235
+
236
+ See Also
237
+ --------
238
+ calculate_likelihood : Uses binned data for probability calculations
239
+ """
240
+ # bin numerical predictors for classification
136
241
  # define bins for analysis
137
242
 
138
243
  # hit ratio bins
@@ -141,10 +246,12 @@ def bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio):
141
246
  # plot signal power histograms by detection class
142
247
  min_power = power.min()//5 * 5
143
248
  max_power = power.max()//5 * 5
144
- try:
145
- power_bins =np.arange(min_power,max_power+20,10)
146
- except:
147
- print ('fuck')
249
+ try:
250
+ power_bins = np.arange(min_power, max_power + 20, 10)
251
+ except (TypeError, ValueError) as e:
252
+ raise ValueError(
253
+ f"Error creating power bins: {e}. Check that power values are valid."
254
+ ) from e
148
255
 
149
256
  # Lag Back Differences - how steady are detection lags?
150
257
  lag_bins =np.arange(-100,110,20)
@@ -162,4 +269,4 @@ def bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio):
162
269
  con_len_count = np.digitize(cons_length, con_length_bins)
163
270
  noise_count = np.digitize(noise_ratio, noise_bins)
164
271
 
165
- return hit_ratio_count, power_count, lag_count, con_len_count, noise_count
272
+ return hit_ratio_count, power_count, lag_count, con_len_count, noise_count