PyPI - pymast - Versions diffs - 1.0.0__tar.gz → 1.0.2__tar.gz - Mend

pymast 1.0.0tar.gz → 1.0.2tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{pymast-1.0.0 → pymast-1.0.2}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pymast
-Version: 1.0.0
+Version: 1.0.2
 Summary: Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data
 Author: Theodore Castro-Santos
 Author-email: "Kevin P. Nebiolo" <kevin.nebiolo@kleinschmidtgroup.com>

{pymast-1.0.0 → pymast-1.0.2}/pymast/__init__.py RENAMED Viewed

@@ -28,7 +28,7 @@ from .parsers import *
 from .radio_project import *
 # Version
-__version__ = '1.0.0'
+__version__ = '1.0.2'
 # Define what's available when using "from pymast import *"
 __all__ = [

{pymast-1.0.0 → pymast-1.0.2}/pymast/parsers.py RENAMED Viewed

@@ -80,32 +80,32 @@ predictors.noise_ratio : Miscoded detection ratio calculation
 import pandas as pd
 import numpy as np
 import datetime
-import os
-import pymast.predictors as predictors
-import sys
-def _append_raw_data(db_dir, telem_dat, data_columns=None):
-    with pd.HDFStore(db_dir, mode='a') as store:
-        append_kwargs = {
-            'key': 'raw_data',
-            'value': telem_dat,
-            'format': 'table',
-            'index': False,
-            'min_itemsize': {
-                'freq_code': 20,
-                'rec_type': 20,
-                'rec_id': 20,
-            },
-            'append': True,
-            'chunksize': 1000000,
-        }
-        if data_columns is not None:
-            append_kwargs['data_columns'] = data_columns
-        store.append(**append_kwargs)
-def ares(file_name,
-                 db_dir,
-                 rec_id,
+import os
+import pymast.predictors as predictors
+import sys
+def _append_raw_data(db_dir, telem_dat, data_columns=None):
+    with pd.HDFStore(db_dir, mode='a') as store:
+        append_kwargs = {
+            'key': 'raw_data',
+            'value': telem_dat,
+            'format': 'table',
+            'index': False,
+            'min_itemsize': {
+                'freq_code': 20,
+                'rec_type': 20,
+                'rec_id': 20,
+            },
+            'append': True,
+            'chunksize': 1000000,
+        }
+        if data_columns is not None:
+            append_kwargs['data_columns'] = data_columns
+        store.append(**append_kwargs)
+def ares(file_name,
+                 db_dir,
+                 rec_id,
                  study_tags,
                  scan_time = 1,
                  channels = 1,
@@ -229,26 +229,26 @@ def ares(file_name,
                                            telem_dat.epoch.values,
                                            study_tags)
-    telem_dat = telem_dat.astype({'power':'float32',
-                                  'freq_code':'object',
-                                  'time_stamp':'datetime64[ns]',
-                                  'scan_time':'float32',
-                                  'channels':'int32',
-                                  'rec_type':'object',
-                                  'epoch':'int64',
-                                  'noise_ratio':'float32',
-                                  'rec_id':'object'})
-    _append_raw_data(db_dir, telem_dat)
-def orion_import(file_name,
-                 db_dir,
-                 rec_id,
-                 study_tags,
-                 scan_time = 1.,
-                 channels = 1,
-                 ant_to_rec_dict = None):
+    telem_dat = telem_dat.astype({'power':'float32',
+                                  'freq_code':'object',
+                                  'time_stamp':'datetime64[ns]',
+                                  'scan_time':'float32',
+                                  'channels':'int32',
+                                  'rec_type':'object',
+                                  'epoch':'int64',
+                                  'noise_ratio':'float32',
+                                  'rec_id':'object'})
+    _append_raw_data(db_dir, telem_dat)
+def orion_import(file_name,
+                 db_dir,
+                 rec_id,
+                 study_tags,
+                 scan_time = 1.,
+                 channels = 1,
+                 ant_to_rec_dict = None):
     """
     Import Sigma Eight Orion receiver data into MAST HDF5 database.
@@ -334,33 +334,33 @@ def orion_import(file_name,
         telem_dat['Freq'] = telem_dat['Freq'].apply(lambda x: f"{x:.3f}")
-    def _write_orion_subset(df, receiver_id, epoch_dtype):
-        df = df.copy()
-        df['rec_id'] = np.repeat(receiver_id, len(df))
-        df.drop(['Ant'], axis = 1, inplace = True)
-        df = df.astype({'power':'float32',
-                        'freq_code':'object',
-                        'time_stamp':'datetime64[ns]',
-                        'scan_time':'float32',
-                        'channels':'int32',
-                        'rec_type':'object',
-                        'epoch': epoch_dtype,
-                        'noise_ratio':'float32',
-                        'rec_id':'object'})
-        df = df[['power',
-                 'time_stamp',
-                 'epoch',
-                 'freq_code',
-                 'noise_ratio',
-                 'scan_time',
-                 'channels',
-                 'rec_id',
-                 'rec_type']]
-        _append_raw_data(db_dir, df, data_columns=True)
-    if len(telem_dat) > 0:
+    def _write_orion_subset(df, receiver_id, epoch_dtype):
+        df = df.copy()
+        df['rec_id'] = np.repeat(receiver_id, len(df))
+        df.drop(['Ant'], axis = 1, inplace = True)
+        df = df.astype({'power':'float32',
+                        'freq_code':'object',
+                        'time_stamp':'datetime64[ns]',
+                        'scan_time':'float32',
+                        'channels':'int32',
+                        'rec_type':'object',
+                        'epoch': epoch_dtype,
+                        'noise_ratio':'float32',
+                        'rec_id':'object'})
+        df = df[['power',
+                 'time_stamp',
+                 'epoch',
+                 'freq_code',
+                 'noise_ratio',
+                 'scan_time',
+                 'channels',
+                 'rec_id',
+                 'rec_type']]
+        _append_raw_data(db_dir, df, data_columns=True)
+    if len(telem_dat) > 0:
         # add file name to data
         #['fileName'] = np.repeat(file_name,len(telem_dat))    #Note I'm going back here to the actual file name without the path.  Is that OK?  I prefer it, but it's a potential source of confusion
@@ -389,18 +389,18 @@ def orion_import(file_name,
                                                    telem_dat.epoch.values,
                                                    study_tags)
-            # if there is no antenna to receiver dictionary
-            if ant_to_rec_dict == None:
-                _write_orion_subset(telem_dat, rec_id, 'int64')
-            # if there is an antenna to receiver dictionary
-            else:
-                for i in ant_to_rec_dict.keys():
-                    # get site from dictionary
-                    site = ant_to_rec_dict[i]
-                    # get telemetryt data associated with this site
-                    telem_dat_sub = telem_dat[telem_dat.Ant == 1]
-                    _write_orion_subset(telem_dat_sub, site, 'float32')
+            # if there is no antenna to receiver dictionary
+            if ant_to_rec_dict == None:
+                _write_orion_subset(telem_dat, rec_id, 'int64')
+            # if there is an antenna to receiver dictionary
+            else:
+                for i in ant_to_rec_dict.keys():
+                    # get site from dictionary
+                    site = ant_to_rec_dict[i]
+                    # get telemetryt data associated with this site
+                    telem_dat_sub = telem_dat[telem_dat.Ant == 1]
+                    _write_orion_subset(telem_dat_sub, site, 'float32')
     else:
         raise ValueError("Invalid import parameters, no data returned")
         sys.exit()
@@ -492,7 +492,7 @@ def vr2_import(file_name,db_dir,study_tags, rec_id):
                           'noise_ratio':'float32',
                           'rec_id':'object'})
-        _append_raw_data(db_dir, telem_dat)
+        _append_raw_data(db_dir, telem_dat)
 def srx1200(file_name,
              db_dir,
@@ -785,7 +785,7 @@ def srx1200(file_name,
                                 'rec_id',
                                 'rec_type']]
-        _append_raw_data(db_dir, telem_dat, data_columns=True)
+        _append_raw_data(db_dir, telem_dat, data_columns=True)
     # if the data doesn't have a header
     else:
@@ -857,7 +857,7 @@ def srx1200(file_name,
                                 'rec_id',
                                 'rec_type']]
-        _append_raw_data(db_dir, telem_dat, data_columns=True)
+        _append_raw_data(db_dir, telem_dat, data_columns=True)
 def srx800(file_name,
              db_dir,
@@ -1146,16 +1146,16 @@ def srx800(file_name,
             telem_dat_sub['epoch'] = np.round((telem_dat_sub.time_stamp - pd.Timestamp("1970-01-01")) / pd.Timedelta('1s'),6)
             # get setup number for every row
-            try:
-                telem_dat_sub['setup'] = get_setup(
-                    telem_dat_sub.epoch.values,
-                    setup_df.epoch.values
-                )
-            except (ValueError, TypeError, IndexError) as e:
-                raise ValueError(
-                    f"Failed to compute setup mapping for antenna '{ant}' at site '{site}'. "
-                    "Check setup table epoch alignment and input data integrity."
-                ) from e
+            try:
+                telem_dat_sub['setup'] = get_setup(
+                    telem_dat_sub.epoch.values,
+                    setup_df.epoch.values
+                )
+            except (ValueError, TypeError, IndexError) as e:
+                raise ValueError(
+                    f"Failed to compute setup mapping for antenna '{ant}' at site '{site}'. "
+                    "Check setup table epoch alignment and input data integrity."
+                ) from e
             # get frequency from channel
             telem_dat_sub['Frequency'] = get_frequency(telem_dat_sub.setup.values,
@@ -1488,7 +1488,7 @@ def srx600(file_name,
                                                       'noise_ratio':'float32',
                                                       'rec_id':'object'})
-                _append_raw_data(db_dir, telem_dat_sub, data_columns=True)
+                _append_raw_data(db_dir, telem_dat_sub, data_columns=True)
     else:
         telem_dat = pd.read_fwf(file_name,
                                colspecs = [(0,9),(9,19),(19,29),(29,36),(36,44),(44,52)],
@@ -1553,7 +1553,7 @@ def srx600(file_name,
                                                       'noise_ratio':'float32',
                                                       'rec_id':'object'})
-                _append_raw_data(db_dir, telem_dat_sub)
+                _append_raw_data(db_dir, telem_dat_sub)
@@ -1644,13 +1644,13 @@ def PIT(file_name,
     # First, analyze the file to determine format
     def analyze_file_format(file_name):
         """Dynamically determine PIT file format and header structure"""
-        with open(file_name, 'r') as file:
-            lines = []
-            for _ in range(20):  # Read first 20 lines to analyze format
-                line = file.readline()
-                if not line:
-                    break
-                lines.append(line.rstrip('\n'))
+        with open(file_name, 'r') as file:
+            lines = []
+            for _ in range(20):  # Read first 20 lines to analyze format
+                line = file.readline()
+                if not line:
+                    break
+                lines.append(line.rstrip('\n'))
         # Check if CSV format (look for commas in sample lines)
         csv_indicators = 0
@@ -1711,10 +1711,10 @@ def PIT(file_name,
             telem_dat = pd.read_csv(file_name, dtype=str)
             print(f"Auto-detected columns: {list(telem_dat.columns)}")
-        except (pd.errors.ParserError, UnicodeDecodeError, ValueError) as e:
-            raise ValueError(
-                f"CSV auto-detection failed for PIT file '{file_name}': {e}"
-            ) from e
+        except (pd.errors.ParserError, UnicodeDecodeError, ValueError) as e:
+            raise ValueError(
+                f"CSV auto-detection failed for PIT file '{file_name}': {e}"
+            ) from e
         # Find timestamp column dynamically
         timestamp_col = find_column_by_patterns(telem_dat, ['timestamp', 'time stamp', 'date', 'scan date', 'detected'])
@@ -1732,8 +1732,8 @@ def PIT(file_name,
                     if not telem_dat["time_stamp"].isna().all():
                         print(f"Successfully parsed timestamps using format: {fmt or 'auto-detect'}")
                         break
-                except (ValueError, TypeError) as e:
-                    continue
+                except (ValueError, TypeError) as e:
+                    continue
         else:
             raise ValueError("Could not find timestamp column")
@@ -1773,14 +1773,14 @@ def PIT(file_name,
         # Fixed-Width Format Parsing (original logic)
         # Read header information for format detection
-        with open(file_name, 'r') as file:
-            header_lines = []
-            for _ in range(max(skiprows, 10)):
-                line = file.readline()
-                if not line:
-                    break
-                header_lines.append(line.rstrip('\n'))
-            header_text = " ".join(header_lines).lower()
+        with open(file_name, 'r') as file:
+            header_lines = []
+            for _ in range(max(skiprows, 10)):
+                line = file.readline()
+                if not line:
+                    break
+                header_lines.append(line.rstrip('\n'))
+            header_text = " ".join(header_lines).lower()
         # Define colspecs for different fixed-width formats
         if 'latitude' in header_text or 'longitude' in header_text:
@@ -1842,7 +1842,15 @@ def PIT(file_name,
             antenna_col = None
             for col in telem_dat.columns:
                 col_lower = str(col).lower().strip()
-                if col_lower in ('antenna id', 'antenna', 'ant', 'antennae', 'antennae id'):
+                if col_lower in (
+                    'antenna id',
+                    'antenna',
+                    'ant',
+                    'antennae',
+                    'antennae id',
+                    'reader id',
+                    'readerid',
+                ):
                     antenna_col = col
                     break
@@ -1854,12 +1862,12 @@ def PIT(file_name,
                 telem_dat['antenna_num'] = pd.to_numeric(telem_dat['antenna_num'], errors='coerce')
                 # Prepare mapping dict keys as strings and ints for robust lookup
-                ant_map = {}
-                for k, v in ant_to_rec_dict.items():
-                    key_str = str(k).strip()
-                    if key_str.isdigit():
-                        ant_map[int(key_str)] = v
-                    ant_map[key_str] = v
+                ant_map = {}
+                for k, v in ant_to_rec_dict.items():
+                    key_str = str(k).strip()
+                    if key_str.isdigit():
+                        ant_map[int(key_str)] = v
+                    ant_map[key_str] = v
                 # Map by numeric antenna if possible, else by raw string
                 telem_dat['rec_id'] = telem_dat['antenna_num'].map(ant_map)
@@ -1877,7 +1885,10 @@ def PIT(file_name,
                 # drop detections that do not map to a known receiver
                 telem_dat = telem_dat.dropna(subset=['rec_id'])
             else:
-                raise ValueError('Multi-antenna fixed-width PIT file requires an antenna column but none was found')
+                raise ValueError(
+                    'Multi-antenna fixed-width PIT file requires an antenna/reader column '
+                    '(e.g., "Antenna ID" or "Reader ID"), but none was found'
+                )
     # Data cleaning - remove invalid entries
     print(f"\nCleaning data - original records: {len(telem_dat)}")
@@ -1899,49 +1910,49 @@ def PIT(file_name,
     telem_dat = telem_dat[telem_dat['freq_code'].str.len() > 3]
     telem_dat = telem_dat[~telem_dat['freq_code'].isna()]
-    # Finalize fields and append to HDF5 /raw_data
-    if len(telem_dat) == 0:
-        print('No valid PIT rows after cleaning; nothing to append')
-        return
-    if 'power' not in telem_dat.columns:
-        telem_dat['power'] = np.nan
-    # compute epoch as int64 seconds and other derived fields
-    telem_dat['epoch'] = (pd.to_datetime(telem_dat['time_stamp']).astype('int64') // 10**9).astype('int64')
+    # Finalize fields and append to HDF5 /raw_data
+    if len(telem_dat) == 0:
+        print('No valid PIT rows after cleaning; nothing to append')
+        return
+    if 'power' not in telem_dat.columns:
+        telem_dat['power'] = np.nan
+    # compute epoch as int64 seconds and other derived fields
+    telem_dat['epoch'] = (pd.to_datetime(telem_dat['time_stamp']).astype('int64') // 10**9).astype('int64')
     telem_dat['channels'] = np.repeat(channels, len(telem_dat))
     telem_dat['scan_time'] = np.repeat(scan_time, len(telem_dat))
     telem_dat['rec_type'] = np.repeat(rec_type, len(telem_dat))
     # compute noise ratio if study_tags provided
-    try:
-        telem_dat['noise_ratio'] = predictors.noise_ratio(
-            5.0,
-            telem_dat.freq_code.values,
-            telem_dat.epoch.values,
-            study_tags
-        )
-    except (ValueError, TypeError, KeyError, IndexError) as e:
-        raise ValueError(f"Failed to compute noise_ratio for PIT data: {e}") from e
-    # ensure dtypes
-    telem_dat = telem_dat.astype({'time_stamp': 'datetime64[ns]',
-                                  'epoch': 'int64',
-                                  'freq_code': 'object',
-                                  'power': 'float32',
-                                  'rec_id': 'object',
-                                  'rec_type': 'object',
-                                  'scan_time': 'float32',
-                                  'channels': 'int32',
-                                  'noise_ratio': 'float32'})
+    try:
+        telem_dat['noise_ratio'] = predictors.noise_ratio(
+            5.0,
+            telem_dat.freq_code.values,
+            telem_dat.epoch.values,
+            study_tags
+        )
+    except (ValueError, TypeError, KeyError, IndexError) as e:
+        raise ValueError(f"Failed to compute noise_ratio for PIT data: {e}") from e
+    # ensure dtypes
+    telem_dat = telem_dat.astype({'time_stamp': 'datetime64[ns]',
+                                  'epoch': 'int64',
+                                  'freq_code': 'object',
+                                  'power': 'float32',
+                                  'rec_id': 'object',
+                                  'rec_type': 'object',
+                                  'scan_time': 'float32',
+                                  'channels': 'int32',
+                                  'noise_ratio': 'float32'})
     # reorder columns to match expected schema
     cols = ['time_stamp', 'epoch', 'freq_code', 'power', 'noise_ratio', 'scan_time', 'channels', 'rec_id', 'rec_type']
     cols_existing = [c for c in cols if c in telem_dat.columns]
-    _append_raw_data(db_dir, telem_dat[cols_existing], data_columns=True)
-    with pd.HDFStore(db_dir, mode='a') as store:
-        print('Store keys after append:', store.keys())
+    _append_raw_data(db_dir, telem_dat[cols_existing], data_columns=True)
+    with pd.HDFStore(db_dir, mode='a') as store:
+        print('Store keys after append:', store.keys())
 def PIT_Multiple(
@@ -2019,29 +2030,29 @@ def PIT_Multiple(
         "LocationDetail", "Type", "Recapture", "Sex", "GeneticSampleID", "Comments"
     ]
-    # Read the CSV into a DataFrame, skipping rows if needed
-    telem_dat = pd.read_csv(file_name, names=col_names, header=0, skiprows=skiprows, dtype=str)
-    mode_str = "multi-antenna"
-    if ant_to_rec_dict is None:
-        raise ValueError("ant_to_rec_dict is required for PIT_Multiple")
-    # Convert "TimeStamp" to datetime with explicit format
-    telem_dat["time_stamp"] = pd.to_datetime(telem_dat["TimeStamp"], format="%m/%d/%Y %H:%M", errors="coerce")
-    # Ensure "Tag1Dec" and "Tag1Hex" are treated as strings (avoid scientific notation issues)
-    telem_dat["Tag1Dec"] = telem_dat["Tag1Dec"].astype(str)
-    telem_dat["Tag1Hex"] = telem_dat["Tag1Hex"].astype(str)
-    telem_dat["freq_code"] = telem_dat["Tag1Hex"].astype(str).str.strip()
-    antenna_raw = telem_dat["Antennae"].astype(str).str.strip()
-    antenna_num = pd.to_numeric(antenna_raw.str.extract(r"(\d+)")[0], errors="coerce")
-    rec_id = antenna_num.map(ant_to_rec_dict)
-    if rec_id.isna().any():
-        rec_id = rec_id.fillna(antenna_raw.map(ant_to_rec_dict))
-    telem_dat["rec_id"] = rec_id
-    telem_dat = telem_dat.dropna(subset=["rec_id"])
+    # Read the CSV into a DataFrame, skipping rows if needed
+    telem_dat = pd.read_csv(file_name, names=col_names, header=0, skiprows=skiprows, dtype=str)
+    mode_str = "multi-antenna"
+    if ant_to_rec_dict is None:
+        raise ValueError("ant_to_rec_dict is required for PIT_Multiple")
+    # Convert "TimeStamp" to datetime with explicit format
+    telem_dat["time_stamp"] = pd.to_datetime(telem_dat["TimeStamp"], format="%m/%d/%Y %H:%M", errors="coerce")
+    # Ensure "Tag1Dec" and "Tag1Hex" are treated as strings (avoid scientific notation issues)
+    telem_dat["Tag1Dec"] = telem_dat["Tag1Dec"].astype(str)
+    telem_dat["Tag1Hex"] = telem_dat["Tag1Hex"].astype(str)
+    telem_dat["freq_code"] = telem_dat["Tag1Hex"].astype(str).str.strip()
+    antenna_raw = telem_dat["Antennae"].astype(str).str.strip()
+    antenna_num = pd.to_numeric(antenna_raw.str.extract(r"(\d+)")[0], errors="coerce")
+    rec_id = antenna_num.map(ant_to_rec_dict)
+    if rec_id.isna().any():
+        rec_id = rec_id.fillna(antenna_raw.map(ant_to_rec_dict))
+    telem_dat["rec_id"] = rec_id
+    telem_dat = telem_dat.dropna(subset=["rec_id"])
     # if after_cleanup == 0:
     #     raise ValueError(f"No valid records found in {file_name}")
@@ -2101,4 +2112,4 @@ def PIT_Multiple(

{pymast-1.0.0 → pymast-1.0.2}/pymast/radio_project.py RENAMED Viewed

@@ -95,21 +95,21 @@ import pymast.predictors as predictors
 import matplotlib.pyplot as plt
 from matplotlib import rcParams
 from scipy import interpolate
-try:
-    from tqdm import tqdm
-except ImportError:
-    def tqdm(iterable, **kwargs):
-        return iterable
+try:
+    from tqdm import tqdm
+except ImportError:
+    def tqdm(iterable, **kwargs):
+        return iterable
 import shutil
 import warnings
 import dask.dataframe as dd
 import dask.array as da
-try:
-    from dask_ml.cluster import KMeans
-    _KMEANS_IMPL = 'dask'
-except ImportError:
-    from sklearn.cluster import KMeans
-    _KMEANS_IMPL = 'sklearn'
+try:
+    from dask_ml.cluster import KMeans
+    _KMEANS_IMPL = 'dask'
+except ImportError:
+    from sklearn.cluster import KMeans
+    _KMEANS_IMPL = 'sklearn'
 # Initialize logger
 logger = logging.getLogger('pymast.radio_project')
@@ -415,12 +415,12 @@ class radio_project():
         if self.non_interactive:
             logger.debug(f"Non-interactive mode: auto-answering '{prompt_text}' with '{default}'")
             return default
-        try:
-            return input(prompt_text)
-        except (EOFError, OSError) as exc:
-            raise RuntimeError(
-                "Input prompt failed. Set project.non_interactive = True to use defaults."
-            ) from exc
+        try:
+            return input(prompt_text)
+        except (EOFError, OSError) as exc:
+            raise RuntimeError(
+                "Input prompt failed. Set project.non_interactive = True to use defaults."
+            ) from exc
     def telem_data_import(self,
                           rec_id,
@@ -496,9 +496,19 @@ class radio_project():
         logger.info(f"  Found {len(tFiles)} file(s) to import")
+        # Track detections per file for statistics
+        detections_per_file = []
         # for every file call the correct text parser and import
         for i, f in enumerate(tqdm(tFiles, desc=f"Importing {rec_id}", unit="file"), 1):
             logger.debug(f"  Processing file {i}/{len(tFiles)}: {f}")
+            # Count detections before import
+            try:
+                pre_count = len(pd.read_hdf(self.db, key='raw_data', where=f'rec_id = "{rec_id}"'))
+            except (KeyError, FileNotFoundError):
+                pre_count = 0
             # get the complete file directory
             f_dir = os.path.join(file_dir,f)
@@ -533,8 +543,91 @@ class radio_project():
             else:
                 logger.error(f"No import routine for receiver type: {rec_type}")
                 raise ValueError(f"No import routine available for receiver type: {rec_type}")
+            # Count detections after import
+            try:
+                post_count = len(pd.read_hdf(self.db, key='raw_data', where=f'rec_id = "{rec_id}"'))
+                detections_this_file = post_count - pre_count
+                detections_per_file.append(detections_this_file)
+            except (KeyError, FileNotFoundError):
+                detections_per_file.append(0)
         logger.info(f"✓ Import complete for receiver {rec_id}: {len(tFiles)} file(s) processed")
+        # Calculate and display import statistics
+        try:
+            raw_data = pd.read_hdf(self.db, key='raw_data', where=f'rec_id = "{rec_id}"')
+            # Total Detection Count
+            total_detections = len(raw_data)
+            logger.info(f"\n{'='*60}")
+            logger.info(f"IMPORT STATISTICS FOR {rec_id}")
+            logger.info(f"{'='*60}")
+            logger.info(f"Total Detection Count: {total_detections:,}")
+            if total_detections > 0:
+                # Detection count summary statistics
+                logger.info(f"\nDetection Summary Statistics:")
+                logger.info(f"  Mean detections per file: {total_detections / len(tFiles):.1f}")
+                logger.info(f"  Files processed: {len(tFiles)}")
+                # 5-number summary for detections per file
+                if len(detections_per_file) > 0:
+                    det_array = np.array(detections_per_file)
+                    logger.info(f"\nDetections Per File (5-number summary):")
+                    logger.info(f"  Min:    {np.min(det_array):,.0f}")
+                    logger.info(f"  Q1:     {np.percentile(det_array, 25):,.0f}")
+                    logger.info(f"  Median: {np.median(det_array):,.0f}")
+                    logger.info(f"  Q3:     {np.percentile(det_array, 75):,.0f}")
+                    logger.info(f"  Max:    {np.max(det_array):,.0f}")
+                # Unique Tag Count
+                unique_tags = raw_data['freq_code'].nunique()
+                logger.info(f"\nUnique Tag Count: {unique_tags}")
+                # Duplicate Tag Count and IDs
+                # Check for detections at the exact same timestamp (true duplicates)
+                if 'time_stamp' in raw_data.columns:
+                    dup_mask = raw_data.duplicated(subset=['freq_code', 'time_stamp'], keep=False)
+                    duplicate_count = dup_mask.sum()
+                    if duplicate_count > 0:
+                        duplicate_tags = raw_data.loc[dup_mask, 'freq_code'].unique()
+                        logger.info(f"\nDuplicate Detection Count (same timestamp): {duplicate_count:,}")
+                        logger.info(f"Duplicate Tag IDs ({len(duplicate_tags)} tags):")
+                        for tag in sorted(duplicate_tags)[:10]:  # Show first 10
+                            tag_dups = dup_mask & (raw_data['freq_code'] == tag)
+                            logger.info(f"  {tag}: {tag_dups.sum()} duplicate(s)")
+                        if len(duplicate_tags) > 10:
+                            logger.info(f"  ... and {len(duplicate_tags) - 10} more")
+                    else:
+                        logger.info(f"\nDuplicate Detection Count: 0 (no exact timestamp duplicates)")
+                # Time Coverage
+                if 'time_stamp' in raw_data.columns:
+                    raw_data['time_stamp'] = pd.to_datetime(raw_data['time_stamp'])
+                    start_time = raw_data['time_stamp'].min()
+                    end_time = raw_data['time_stamp'].max()
+                    duration = end_time - start_time
+                    logger.info(f"\nTime Coverage:")
+                    logger.info(f"  Start: {start_time}")
+                    logger.info(f"  End:   {end_time}")
+                    logger.info(f"  Duration: {duration.days} days, {duration.seconds // 3600} hours")
+                    # Detection rate
+                    if duration.total_seconds() > 0:
+                        det_per_hour = total_detections / (duration.total_seconds() / 3600)
+                        logger.info(f"  Detection rate: {det_per_hour:.1f} detections/hour")
+                logger.info(f"{'='*60}\n")
+            else:
+                logger.warning(f"No detections found for receiver {rec_id}")
+        except KeyError:
+            logger.warning(f"Could not retrieve statistics - raw_data table not found in database")
+        except Exception as e:
+            logger.warning(f"Error calculating import statistics: {e}")
     def get_fish(self, rec_id, train = True, reclass_iter = None):
         logger.info(f"Getting fish for receiver {rec_id}")
@@ -1576,16 +1669,16 @@ class radio_project():
                     node_path = node._v_pathname
                     print(f"  Copying {node_path}...")
-                    try:
-                        # Use recursive=True to copy entire subtree (Groups, Tables, Arrays, etc.)
-                        h5in.copy_node(
-                            where=node_path,
-                            newparent=h5out.root,
-                            recursive=True,
-                            filters=filters
-                        )
-                    except (tables.NodeError, tables.HDF5ExtError, OSError, ValueError) as e:
-                        raise RuntimeError(f"Failed to copy HDF5 node {node_path}: {e}") from e
+                    try:
+                        # Use recursive=True to copy entire subtree (Groups, Tables, Arrays, etc.)
+                        h5in.copy_node(
+                            where=node_path,
+                            newparent=h5out.root,
+                            recursive=True,
+                            filters=filters
+                        )
+                    except (tables.NodeError, tables.HDF5ExtError, OSError, ValueError) as e:
+                        raise RuntimeError(f"Failed to copy HDF5 node {node_path}: {e}") from e
         # Get new size
         new_size = os.path.getsize(output_path)
@@ -1603,26 +1696,29 @@ class radio_project():
     def make_recaptures_table(self, export=True, pit_study=False):
         '''Creates a recaptures key in the HDF5 file, iterating over receivers to manage memory.'''
         logger.info("Creating recaptures table")
+        logger.info(f"  PIT study mode: {pit_study}")
         logger.info(f"  Processing {len(self.receivers)} receiver(s)")
         # prepare a heartbeat log so long runs can be monitored (one-line per receiver)
         heartbeat_dir = os.path.join(self.project_dir, 'build')
-        try:
-            os.makedirs(heartbeat_dir, exist_ok=True)
-        except OSError as e:
-            raise RuntimeError(
-                f"Failed to create heartbeat directory '{heartbeat_dir}': {e}"
-            ) from e
+        try:
+            os.makedirs(heartbeat_dir, exist_ok=True)
+        except OSError as e:
+            raise RuntimeError(
+                f"Failed to create heartbeat directory '{heartbeat_dir}': {e}"
+            ) from e
         heartbeat_path = os.path.join(heartbeat_dir, 'recaptures_heartbeat.log')
         print(f"Starting recaptures: {len(self.receivers)} receivers. Heartbeat -> {heartbeat_path}")
-        try:
-            with open(heartbeat_path, 'a') as _hb:
-                _hb.write(f"START {datetime.datetime.now().isoformat()} receivers={len(self.receivers)}\n")
-        except OSError as e:
-            raise RuntimeError(
-                f"Failed to write heartbeat start to '{heartbeat_path}': {e}"
-            ) from e
-        if pit_study==False:
+        try:
+            with open(heartbeat_path, 'a') as _hb:
+                _hb.write(f"START {datetime.datetime.now().isoformat()} receivers={len(self.receivers)}\n")
+        except OSError as e:
+            raise RuntimeError(
+                f"Failed to write heartbeat start to '{heartbeat_path}': {e}"
+            ) from e
+        if not pit_study:
+            # RADIO STUDY PATH
+            logger.info("  Using RADIO study processing path")
             # Convert release dates to datetime if not already done
             self.tags['rel_date'] = pd.to_datetime(self.tags['rel_date'])
             tags_copy = self.tags.copy()
@@ -1787,15 +1883,17 @@ class radio_project():
                 logger.info(f"  ✓ Recaps for {rec} compiled and written to HDF5")
                 print(f"[recaptures] ✓ {rec} written to database", flush=True)
                 # append heartbeat line
-                try:
-                    with open(heartbeat_path, 'a') as _hb:
-                        _hb.write(f"{datetime.datetime.now().isoformat()} rec={rec} rows={len(rec_dat)}\n")
-                except OSError as e:
-                    raise RuntimeError(
-                        f"Failed to write heartbeat for receiver {rec} to '{heartbeat_path}': {e}"
-                    ) from e
+                try:
+                    with open(heartbeat_path, 'a') as _hb:
+                        _hb.write(f"{datetime.datetime.now().isoformat()} rec={rec} rows={len(rec_dat)}\n")
+                except OSError as e:
+                    raise RuntimeError(
+                        f"Failed to write heartbeat for receiver {rec} to '{heartbeat_path}': {e}"
+                    ) from e
         else:
+            # PIT STUDY PATH
+            logger.info("  Using PIT study processing path")
             # Loop over each receiver in self.receivers
             for rec in tqdm(self.receivers.index, desc="Processing PIT receivers", unit="receiver"):
                 logger.info(f"  Processing {rec} (PIT study)...")
@@ -1917,13 +2015,13 @@ class radio_project():
                 logger.info(f"  ✓ PIT recaps for {rec} compiled and written to HDF5")
                 print(f"[recaptures] ✓ {rec} PIT data written to database", flush=True)
-                try:
-                    with open(heartbeat_path, 'a') as _hb:
-                        _hb.write(f"{datetime.datetime.now().isoformat()} pit_rec={rec} rows={len(pit_data)}\n")
-                except OSError as e:
-                    raise RuntimeError(
-                        f"Failed to write PIT heartbeat for receiver {rec} to '{heartbeat_path}': {e}"
-                    ) from e
+                try:
+                    with open(heartbeat_path, 'a') as _hb:
+                        _hb.write(f"{datetime.datetime.now().isoformat()} pit_rec={rec} rows={len(pit_data)}\n")
+                except OSError as e:
+                    raise RuntimeError(
+                        f"Failed to write PIT heartbeat for receiver {rec} to '{heartbeat_path}': {e}"
+                    ) from e
         if export:
@@ -1933,16 +2031,16 @@ class radio_project():
             rec_data.to_csv(os.path.join(self.output_dir,'recaptures.csv'), index=False)
             logger.info(f"  ✓ Export complete: {os.path.join(self.output_dir,'recaptures.csv')}")
             print(f"[recaptures] ✓ Export complete: {os.path.join(self.output_dir,'recaptures.csv')}", flush=True)
-            try:
-                with open(heartbeat_path, 'a') as _hb:
-                    _hb.write(
-                        f"DONE {datetime.datetime.now().isoformat()} export="
-                        f"{os.path.join(self.output_dir, 'recaptures.csv')}\n"
-                    )
-            except OSError as e:
-                raise RuntimeError(
-                    f"Failed to write heartbeat completion to '{heartbeat_path}': {e}"
-                ) from e
+            try:
+                with open(heartbeat_path, 'a') as _hb:
+                    _hb.write(
+                        f"DONE {datetime.datetime.now().isoformat()} export="
+                        f"{os.path.join(self.output_dir, 'recaptures.csv')}\n"
+                    )
+            except OSError as e:
+                raise RuntimeError(
+                    f"Failed to write heartbeat completion to '{heartbeat_path}': {e}"
+                ) from e
     def undo_recaptures(self):

{pymast-1.0.0 → pymast-1.0.2}/pymast.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pymast
-Version: 1.0.0
+Version: 1.0.2
 Summary: Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data
 Author: Theodore Castro-Santos
 Author-email: "Kevin P. Nebiolo" <kevin.nebiolo@kleinschmidtgroup.com>

{pymast-1.0.0 → pymast-1.0.2}/pyproject.toml RENAMED Viewed

@@ -1,23 +1,23 @@
-[build-system]
-requires = ["setuptools>=61", "wheel"]
-build-backend = "setuptools.build_meta"
+[build-system]
+requires = ["setuptools>=61", "wheel"]
+build-backend = "setuptools.build_meta"
 [project]
 name = "pymast"
-version = "1.0.0"
+version = "1.0.2"
 description = "Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data"
 readme = "README.md"
 authors = [
     {name = "Kevin P. Nebiolo", email = "kevin.nebiolo@kleinschmidtgroup.com"},
     {name = "Theodore Castro-Santos"}
 ]
-license = "MIT"
-classifiers = [
-    "Development Status :: 4 - Beta",
-    "Intended Audience :: Science/Research",
-    "Programming Language :: Python :: 3",
-    "Programming Language :: Python :: 3.8",
-    "Programming Language :: Python :: 3.9",
+license = "MIT"
+classifiers = [
+    "Development Status :: 4 - Beta",
+    "Intended Audience :: Science/Research",
+    "Programming Language :: Python :: 3",
+    "Programming Language :: Python :: 3.8",
+    "Programming Language :: Python :: 3.9",
     "Programming Language :: Python :: 3.10",
     "Programming Language :: Python :: 3.11",
     "Topic :: Scientific/Engineering :: Bio-Informatics",
@@ -30,14 +30,14 @@ dependencies = [
     "matplotlib>=3.4.0",
     "statsmodels>=0.12.0",
     "networkx>=2.5",
-    "scipy>=1.7.1",
-    "scikit-learn>=0.24.0",
-    "h5py>=3.0.0",
-    "dask>=2021.3.0",
-    "dask-ml>=1.9.0",
-    "distributed>=2021.3.0",
-    "numba>=0.53.0",
-    "tables>=3.8.0",
+    "scipy>=1.7.1",
+    "scikit-learn>=0.24.0",
+    "h5py>=3.0.0",
+    "dask>=2021.3.0",
+    "dask-ml>=1.9.0",
+    "distributed>=2021.3.0",
+    "numba>=0.53.0",
+    "tables>=3.8.0",
     "intervaltree>=3.1.0",
 ]