PyPI - pymast - Versions diffs - 0.0.6__py3-none-any.whl → 1.0.1__py3-none-any.whl - Mend

pymast 0.0.6py3-none-any.whl → 1.0.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

pymast/__init__.py +31 -2
pymast/fish_history.py +59 -6
pymast/formatter.py +886 -548
pymast/logger.py +58 -0
pymast/naive_bayes.py +116 -9
pymast/overlap_removal.py +2327 -490
pymast/parsers.py +1091 -208
pymast/predictors.py +302 -116
pymast/radio_project.py +1382 -512
pymast/validation.py +224 -0
pymast-1.0.1.dist-info/METADATA +636 -0
pymast-1.0.1.dist-info/RECORD +15 -0
{pymast-0.0.6.dist-info → pymast-1.0.1.dist-info}/WHEEL +1 -1
pymast/table_merge.py +0 -154
pymast-0.0.6.dist-info/METADATA +0 -19
pymast-0.0.6.dist-info/RECORD +0 -14
{pymast-0.0.6.dist-info → pymast-1.0.1.dist-info/licenses}/LICENSE.txt +0 -0
{pymast-0.0.6.dist-info → pymast-1.0.1.dist-info}/top_level.txt +0 -0

pymast/logger.py ADDED Viewed

@@ -0,0 +1,58 @@
+"""
+Logging configuration for MAST
+"""
+import logging
+import sys
+def setup_logging(level=logging.INFO, log_file=None):
+    """
+    Configure logging for MAST project.
+    Parameters
+    ----------
+    level : int
+        Logging level (default: logging.INFO)
+    log_file : str, optional
+        Path to log file. If None, logs only to console.
+    Returns
+    -------
+    logger : logging.Logger
+        Configured logger instance
+    Examples
+    --------
+    >>> from pymast.logger import setup_logging
+    >>> logger = setup_logging(level=logging.DEBUG)
+    >>> logger.info("Starting analysis...")
+    """
+    # Create logger
+    logger = logging.getLogger('pymast')
+    logger.setLevel(level)
+    # Remove existing handlers
+    logger.handlers = []
+    # Create formatter
+    formatter = logging.Formatter(
+        '%(asctime)s - %(name)s - %(levelname)s - %(message)s',
+        datefmt='%Y-%m-%d %H:%M:%S'
+    )
+    # Console handler
+    console_handler = logging.StreamHandler(sys.stdout)
+    console_handler.setLevel(level)
+    console_handler.setFormatter(formatter)
+    logger.addHandler(console_handler)
+    # File handler if specified
+    if log_file:
+        file_handler = logging.FileHandler(log_file)
+        file_handler.setLevel(level)
+        file_handler.setFormatter(formatter)
+        logger.addHandler(file_handler)
+    return logger
+# Default logger
+logger = logging.getLogger('pymast')

pymast/naive_bayes.py CHANGED Viewed

@@ -1,9 +1,65 @@
 # -*- coding: utf-8 -*-
 """
-Created on Wed Nov 15 21:50:12 2023
-@author: KNebiolo
+Naive Bayes classifier for distinguishing true fish detections from noise.
+This module implements a custom Naive Bayes classifier tailored for radio telemetry
+data. It uses multiple predictor variables (signal power, lag differences, noise ratio,
+etc.) to classify detections as true fish signals versus environmental noise or
+spurious transmissions.
+Classification Workflow
+-----------------------
+1. **Training**: Calculate priors and likelihoods from hand-labeled data
+2. **Testing**: Apply trained classifier to unlabeled detections
+3. **Binning**: Discretize continuous predictors for probability calculations
+4. **Posterior**: Combine priors and likelihoods via Bayes' theorem
+5. **Threshold**: Classify based on posterior ratio (adjustable threshold)
+Predictor Variables
+-------------------
+- **hit_ratio**: Proportion of detections matching expected pulse intervals
+- **power**: Signal strength (dB or raw power)
+- **lag_diff**: Variability in time between consecutive detections
+- **cons_length**: Maximum contiguous sequence of expected detections
+- **noise_ratio**: Ratio of miscoded to total detections in time window
+Typical Usage
+-------------
+>>> import pymast.naive_bayes as nb
+>>>
+>>> # Calculate priors from labeled training data
+>>> priors = nb.calculate_priors(labeled_truth_array)
+>>>
+>>> # Calculate likelihoods for each predictor
+>>> likelihood_true = nb.calculate_likelihood(
+...     training_obs=training_power,
+...     labeled_array=labeled_truth,
+...     assumption=True,
+...     classification_obs=test_power,
+...     laplace=1
+... )
+>>>
+>>> # Calculate posterior and classify
+>>> posterior_true = nb.calculate_posterior(
+...     priors, evidence, likelihoods_dict, assumption=True
+... )
+>>> classifications = nb.classify_with_threshold(
+...     posterior_true, posterior_false, threshold_ratio=1.0
+... )
+Notes
+-----
+- Uses Laplace smoothing (add-one) to handle unseen predictor values
+- Predictors are binned into discrete categories before classification
+- Threshold ratio allows precision/recall tradeoff (default: 1.0 = MAP)
+- Assumes conditional independence between predictors (Naive Bayes assumption)
+See Also
+--------
+predictors : Calculation of predictor variables
+radio_project : Project management and data storage
 """
 import numpy as np
 def calculate_priors(labeled_array):
@@ -132,7 +188,56 @@ def classify_with_threshold(posterior_true, posterior_false, threshold_ratio=1.0
     return classification
 def bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio):
-    'bin numerical predictors for classification'
+    """
+    Bin continuous predictor variables into discrete categories for Naive Bayes.
+    Converts continuous predictor values into discrete bins for probability
+    calculations. Binning allows Naive Bayes to estimate likelihoods from
+    limited training data.
+    Parameters
+    ----------
+    hit_ratio : array_like
+        Proportion of detections matching expected pulse intervals (0.0 to 1.0)
+    power : array_like
+        Signal power values (dB or raw)
+    lag_diff : array_like
+        Differences in lag times between consecutive detections (seconds)
+    cons_length : array_like
+        Maximum contiguous sequence of expected detections (1 to 11)
+    noise_ratio : array_like
+        Ratio of miscoded to total detections (0.0 to 1.0)
+    Returns
+    -------
+    tuple of numpy.ndarray
+        (hit_ratio_count, power_count, lag_count, con_len_count, noise_count)
+        Each array contains bin indices for corresponding input values
+    Notes
+    -----
+    Binning strategies:
+    - hit_ratio: 11 bins from 0.0 to 1.0 (0.1 increments)
+    - power: 10 dB bins from min to max (rounded to nearest 5 dB)
+    - lag_diff: 20-second bins from -100 to 110 seconds
+    - cons_length: 1-unit bins from 1 to 12
+    - noise_ratio: 0.1 increment bins from 0.0 to 1.0
+    Examples
+    --------
+    >>> hit_ratio = np.array([0.25, 0.75, 0.95])
+    >>> power = np.array([100, 120, 140])
+    >>> lag_diff = np.array([-10, 0, 10])
+    >>> cons_length = np.array([3, 5, 8])
+    >>> noise_ratio = np.array([0.05, 0.15, 0.25])
+    >>> nb.bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio)
+    (array([3, 8, 10]), array([1, 3, 5]), array([5, 6, 6]), array([3, 5, 8]), array([1, 2, 3]))
+    See Also
+    --------
+    calculate_likelihood : Uses binned data for probability calculations
+    """
+    # bin numerical predictors for classification
     # define bins for analysis
     # hit ratio bins
@@ -141,10 +246,12 @@ def bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio):
     # plot signal power histograms by detection class
     min_power = power.min()//5 * 5
     max_power = power.max()//5 * 5
-    try:
-        power_bins =np.arange(min_power,max_power+20,10)
-    except:
-        print ('fuck')
+    try:
+        power_bins = np.arange(min_power, max_power + 20, 10)
+    except (TypeError, ValueError) as e:
+        raise ValueError(
+            f"Error creating power bins: {e}. Check that power values are valid."
+        ) from e
     # Lag Back Differences - how steady are detection lags?
     lag_bins =np.arange(-100,110,20)
@@ -162,4 +269,4 @@ def bin_predictors(hit_ratio, power, lag_diff, cons_length, noise_ratio):
     con_len_count = np.digitize(cons_length, con_length_bins)
     noise_count = np.digitize(noise_ratio, noise_bins)
-    return hit_ratio_count, power_count, lag_count, con_len_count, noise_count
+    return hit_ratio_count, power_count, lag_count, con_len_count, noise_count

pymast 0.0.6__py3-none-any.whl → 1.0.1__py3-none-any.whl

pymast 0.0.6py3-none-any.whl → 1.0.1py3-none-any.whl