PyPI - pymast - Versions diffs - 1.0.0__tar.gz → 1.0.1__tar.gz - Mend

pymast 1.0.0tar.gz → 1.0.1tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (33) hide show

{pymast-1.0.0 → pymast-1.0.1}/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pymast
-Version: 1.0.0
+Version: 1.0.1
 Summary: Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data
 Author: Theodore Castro-Santos
 Author-email: "Kevin P. Nebiolo" <kevin.nebiolo@kleinschmidtgroup.com>

{pymast-1.0.0 → pymast-1.0.1}/pymast/parsers.py RENAMED Viewed

@@ -80,32 +80,32 @@ predictors.noise_ratio : Miscoded detection ratio calculation
 import pandas as pd
 import numpy as np
 import datetime
-import os
-import pymast.predictors as predictors
-import sys
-def _append_raw_data(db_dir, telem_dat, data_columns=None):
-    with pd.HDFStore(db_dir, mode='a') as store:
-        append_kwargs = {
-            'key': 'raw_data',
-            'value': telem_dat,
-            'format': 'table',
-            'index': False,
-            'min_itemsize': {
-                'freq_code': 20,
-                'rec_type': 20,
-                'rec_id': 20,
-            },
-            'append': True,
-            'chunksize': 1000000,
-        }
-        if data_columns is not None:
-            append_kwargs['data_columns'] = data_columns
-        store.append(**append_kwargs)
-def ares(file_name,
-                 db_dir,
-                 rec_id,
+import os
+import pymast.predictors as predictors
+import sys
+def _append_raw_data(db_dir, telem_dat, data_columns=None):
+    with pd.HDFStore(db_dir, mode='a') as store:
+        append_kwargs = {
+            'key': 'raw_data',
+            'value': telem_dat,
+            'format': 'table',
+            'index': False,
+            'min_itemsize': {
+                'freq_code': 20,
+                'rec_type': 20,
+                'rec_id': 20,
+            },
+            'append': True,
+            'chunksize': 1000000,
+        }
+        if data_columns is not None:
+            append_kwargs['data_columns'] = data_columns
+        store.append(**append_kwargs)
+def ares(file_name,
+                 db_dir,
+                 rec_id,
                  study_tags,
                  scan_time = 1,
                  channels = 1,
@@ -229,26 +229,26 @@ def ares(file_name,
                                            telem_dat.epoch.values,
                                            study_tags)
-    telem_dat = telem_dat.astype({'power':'float32',
-                                  'freq_code':'object',
-                                  'time_stamp':'datetime64[ns]',
-                                  'scan_time':'float32',
-                                  'channels':'int32',
-                                  'rec_type':'object',
-                                  'epoch':'int64',
-                                  'noise_ratio':'float32',
-                                  'rec_id':'object'})
-    _append_raw_data(db_dir, telem_dat)
-def orion_import(file_name,
-                 db_dir,
-                 rec_id,
-                 study_tags,
-                 scan_time = 1.,
-                 channels = 1,
-                 ant_to_rec_dict = None):
+    telem_dat = telem_dat.astype({'power':'float32',
+                                  'freq_code':'object',
+                                  'time_stamp':'datetime64[ns]',
+                                  'scan_time':'float32',
+                                  'channels':'int32',
+                                  'rec_type':'object',
+                                  'epoch':'int64',
+                                  'noise_ratio':'float32',
+                                  'rec_id':'object'})
+    _append_raw_data(db_dir, telem_dat)
+def orion_import(file_name,
+                 db_dir,
+                 rec_id,
+                 study_tags,
+                 scan_time = 1.,
+                 channels = 1,
+                 ant_to_rec_dict = None):
     """
     Import Sigma Eight Orion receiver data into MAST HDF5 database.
@@ -334,33 +334,33 @@ def orion_import(file_name,
         telem_dat['Freq'] = telem_dat['Freq'].apply(lambda x: f"{x:.3f}")
-    def _write_orion_subset(df, receiver_id, epoch_dtype):
-        df = df.copy()
-        df['rec_id'] = np.repeat(receiver_id, len(df))
-        df.drop(['Ant'], axis = 1, inplace = True)
-        df = df.astype({'power':'float32',
-                        'freq_code':'object',
-                        'time_stamp':'datetime64[ns]',
-                        'scan_time':'float32',
-                        'channels':'int32',
-                        'rec_type':'object',
-                        'epoch': epoch_dtype,
-                        'noise_ratio':'float32',
-                        'rec_id':'object'})
-        df = df[['power',
-                 'time_stamp',
-                 'epoch',
-                 'freq_code',
-                 'noise_ratio',
-                 'scan_time',
-                 'channels',
-                 'rec_id',
-                 'rec_type']]
-        _append_raw_data(db_dir, df, data_columns=True)
-    if len(telem_dat) > 0:
+    def _write_orion_subset(df, receiver_id, epoch_dtype):
+        df = df.copy()
+        df['rec_id'] = np.repeat(receiver_id, len(df))
+        df.drop(['Ant'], axis = 1, inplace = True)
+        df = df.astype({'power':'float32',
+                        'freq_code':'object',
+                        'time_stamp':'datetime64[ns]',
+                        'scan_time':'float32',
+                        'channels':'int32',
+                        'rec_type':'object',
+                        'epoch': epoch_dtype,
+                        'noise_ratio':'float32',
+                        'rec_id':'object'})
+        df = df[['power',
+                 'time_stamp',
+                 'epoch',
+                 'freq_code',
+                 'noise_ratio',
+                 'scan_time',
+                 'channels',
+                 'rec_id',
+                 'rec_type']]
+        _append_raw_data(db_dir, df, data_columns=True)
+    if len(telem_dat) > 0:
         # add file name to data
         #['fileName'] = np.repeat(file_name,len(telem_dat))    #Note I'm going back here to the actual file name without the path.  Is that OK?  I prefer it, but it's a potential source of confusion
@@ -389,18 +389,18 @@ def orion_import(file_name,
                                                    telem_dat.epoch.values,
                                                    study_tags)
-            # if there is no antenna to receiver dictionary
-            if ant_to_rec_dict == None:
-                _write_orion_subset(telem_dat, rec_id, 'int64')
-            # if there is an antenna to receiver dictionary
-            else:
-                for i in ant_to_rec_dict.keys():
-                    # get site from dictionary
-                    site = ant_to_rec_dict[i]
-                    # get telemetryt data associated with this site
-                    telem_dat_sub = telem_dat[telem_dat.Ant == 1]
-                    _write_orion_subset(telem_dat_sub, site, 'float32')
+            # if there is no antenna to receiver dictionary
+            if ant_to_rec_dict == None:
+                _write_orion_subset(telem_dat, rec_id, 'int64')
+            # if there is an antenna to receiver dictionary
+            else:
+                for i in ant_to_rec_dict.keys():
+                    # get site from dictionary
+                    site = ant_to_rec_dict[i]
+                    # get telemetryt data associated with this site
+                    telem_dat_sub = telem_dat[telem_dat.Ant == 1]
+                    _write_orion_subset(telem_dat_sub, site, 'float32')
     else:
         raise ValueError("Invalid import parameters, no data returned")
         sys.exit()
@@ -492,7 +492,7 @@ def vr2_import(file_name,db_dir,study_tags, rec_id):
                           'noise_ratio':'float32',
                           'rec_id':'object'})
-        _append_raw_data(db_dir, telem_dat)
+        _append_raw_data(db_dir, telem_dat)
 def srx1200(file_name,
              db_dir,
@@ -785,7 +785,7 @@ def srx1200(file_name,
                                 'rec_id',
                                 'rec_type']]
-        _append_raw_data(db_dir, telem_dat, data_columns=True)
+        _append_raw_data(db_dir, telem_dat, data_columns=True)
     # if the data doesn't have a header
     else:
@@ -857,7 +857,7 @@ def srx1200(file_name,
                                 'rec_id',
                                 'rec_type']]
-        _append_raw_data(db_dir, telem_dat, data_columns=True)
+        _append_raw_data(db_dir, telem_dat, data_columns=True)
 def srx800(file_name,
              db_dir,
@@ -1146,16 +1146,16 @@ def srx800(file_name,
             telem_dat_sub['epoch'] = np.round((telem_dat_sub.time_stamp - pd.Timestamp("1970-01-01")) / pd.Timedelta('1s'),6)
             # get setup number for every row
-            try:
-                telem_dat_sub['setup'] = get_setup(
-                    telem_dat_sub.epoch.values,
-                    setup_df.epoch.values
-                )
-            except (ValueError, TypeError, IndexError) as e:
-                raise ValueError(
-                    f"Failed to compute setup mapping for antenna '{ant}' at site '{site}'. "
-                    "Check setup table epoch alignment and input data integrity."
-                ) from e
+            try:
+                telem_dat_sub['setup'] = get_setup(
+                    telem_dat_sub.epoch.values,
+                    setup_df.epoch.values
+                )
+            except (ValueError, TypeError, IndexError) as e:
+                raise ValueError(
+                    f"Failed to compute setup mapping for antenna '{ant}' at site '{site}'. "
+                    "Check setup table epoch alignment and input data integrity."
+                ) from e
             # get frequency from channel
             telem_dat_sub['Frequency'] = get_frequency(telem_dat_sub.setup.values,
@@ -1488,7 +1488,7 @@ def srx600(file_name,
                                                       'noise_ratio':'float32',
                                                       'rec_id':'object'})
-                _append_raw_data(db_dir, telem_dat_sub, data_columns=True)
+                _append_raw_data(db_dir, telem_dat_sub, data_columns=True)
     else:
         telem_dat = pd.read_fwf(file_name,
                                colspecs = [(0,9),(9,19),(19,29),(29,36),(36,44),(44,52)],
@@ -1553,7 +1553,7 @@ def srx600(file_name,
                                                       'noise_ratio':'float32',
                                                       'rec_id':'object'})
-                _append_raw_data(db_dir, telem_dat_sub)
+                _append_raw_data(db_dir, telem_dat_sub)
@@ -1644,13 +1644,13 @@ def PIT(file_name,
     # First, analyze the file to determine format
     def analyze_file_format(file_name):
         """Dynamically determine PIT file format and header structure"""
-        with open(file_name, 'r') as file:
-            lines = []
-            for _ in range(20):  # Read first 20 lines to analyze format
-                line = file.readline()
-                if not line:
-                    break
-                lines.append(line.rstrip('\n'))
+        with open(file_name, 'r') as file:
+            lines = []
+            for _ in range(20):  # Read first 20 lines to analyze format
+                line = file.readline()
+                if not line:
+                    break
+                lines.append(line.rstrip('\n'))
         # Check if CSV format (look for commas in sample lines)
         csv_indicators = 0
@@ -1711,10 +1711,10 @@ def PIT(file_name,
             telem_dat = pd.read_csv(file_name, dtype=str)
             print(f"Auto-detected columns: {list(telem_dat.columns)}")
-        except (pd.errors.ParserError, UnicodeDecodeError, ValueError) as e:
-            raise ValueError(
-                f"CSV auto-detection failed for PIT file '{file_name}': {e}"
-            ) from e
+        except (pd.errors.ParserError, UnicodeDecodeError, ValueError) as e:
+            raise ValueError(
+                f"CSV auto-detection failed for PIT file '{file_name}': {e}"
+            ) from e
         # Find timestamp column dynamically
         timestamp_col = find_column_by_patterns(telem_dat, ['timestamp', 'time stamp', 'date', 'scan date', 'detected'])
@@ -1732,8 +1732,8 @@ def PIT(file_name,
                     if not telem_dat["time_stamp"].isna().all():
                         print(f"Successfully parsed timestamps using format: {fmt or 'auto-detect'}")
                         break
-                except (ValueError, TypeError) as e:
-                    continue
+                except (ValueError, TypeError) as e:
+                    continue
         else:
             raise ValueError("Could not find timestamp column")
@@ -1773,14 +1773,14 @@ def PIT(file_name,
         # Fixed-Width Format Parsing (original logic)
         # Read header information for format detection
-        with open(file_name, 'r') as file:
-            header_lines = []
-            for _ in range(max(skiprows, 10)):
-                line = file.readline()
-                if not line:
-                    break
-                header_lines.append(line.rstrip('\n'))
-            header_text = " ".join(header_lines).lower()
+        with open(file_name, 'r') as file:
+            header_lines = []
+            for _ in range(max(skiprows, 10)):
+                line = file.readline()
+                if not line:
+                    break
+                header_lines.append(line.rstrip('\n'))
+            header_text = " ".join(header_lines).lower()
         # Define colspecs for different fixed-width formats
         if 'latitude' in header_text or 'longitude' in header_text:
@@ -1840,11 +1840,19 @@ def PIT(file_name,
         else:
             # try to find an antenna column in the fixed-width frame
             antenna_col = None
-            for col in telem_dat.columns:
-                col_lower = str(col).lower().strip()
-                if col_lower in ('antenna id', 'antenna', 'ant', 'antennae', 'antennae id'):
-                    antenna_col = col
-                    break
+            for col in telem_dat.columns:
+                col_lower = str(col).lower().strip()
+                if col_lower in (
+                    'antenna id',
+                    'antenna',
+                    'ant',
+                    'antennae',
+                    'antennae id',
+                    'reader id',
+                    'readerid',
+                ):
+                    antenna_col = col
+                    break
             if antenna_col is not None:
                 # extract numeric antenna identifier and map using provided dictionary
@@ -1854,12 +1862,12 @@ def PIT(file_name,
                 telem_dat['antenna_num'] = pd.to_numeric(telem_dat['antenna_num'], errors='coerce')
                 # Prepare mapping dict keys as strings and ints for robust lookup
-                ant_map = {}
-                for k, v in ant_to_rec_dict.items():
-                    key_str = str(k).strip()
-                    if key_str.isdigit():
-                        ant_map[int(key_str)] = v
-                    ant_map[key_str] = v
+                ant_map = {}
+                for k, v in ant_to_rec_dict.items():
+                    key_str = str(k).strip()
+                    if key_str.isdigit():
+                        ant_map[int(key_str)] = v
+                    ant_map[key_str] = v
                 # Map by numeric antenna if possible, else by raw string
                 telem_dat['rec_id'] = telem_dat['antenna_num'].map(ant_map)
@@ -1876,8 +1884,11 @@ def PIT(file_name,
                 # drop detections that do not map to a known receiver
                 telem_dat = telem_dat.dropna(subset=['rec_id'])
-            else:
-                raise ValueError('Multi-antenna fixed-width PIT file requires an antenna column but none was found')
+            else:
+                raise ValueError(
+                    'Multi-antenna fixed-width PIT file requires an antenna/reader column '
+                    '(e.g., "Antenna ID" or "Reader ID"), but none was found'
+                )
     # Data cleaning - remove invalid entries
     print(f"\nCleaning data - original records: {len(telem_dat)}")
@@ -1899,49 +1910,49 @@ def PIT(file_name,
     telem_dat = telem_dat[telem_dat['freq_code'].str.len() > 3]
     telem_dat = telem_dat[~telem_dat['freq_code'].isna()]
-    # Finalize fields and append to HDF5 /raw_data
-    if len(telem_dat) == 0:
-        print('No valid PIT rows after cleaning; nothing to append')
-        return
-    if 'power' not in telem_dat.columns:
-        telem_dat['power'] = np.nan
-    # compute epoch as int64 seconds and other derived fields
-    telem_dat['epoch'] = (pd.to_datetime(telem_dat['time_stamp']).astype('int64') // 10**9).astype('int64')
+    # Finalize fields and append to HDF5 /raw_data
+    if len(telem_dat) == 0:
+        print('No valid PIT rows after cleaning; nothing to append')
+        return
+    if 'power' not in telem_dat.columns:
+        telem_dat['power'] = np.nan
+    # compute epoch as int64 seconds and other derived fields
+    telem_dat['epoch'] = (pd.to_datetime(telem_dat['time_stamp']).astype('int64') // 10**9).astype('int64')
     telem_dat['channels'] = np.repeat(channels, len(telem_dat))
     telem_dat['scan_time'] = np.repeat(scan_time, len(telem_dat))
     telem_dat['rec_type'] = np.repeat(rec_type, len(telem_dat))
     # compute noise ratio if study_tags provided
-    try:
-        telem_dat['noise_ratio'] = predictors.noise_ratio(
-            5.0,
-            telem_dat.freq_code.values,
-            telem_dat.epoch.values,
-            study_tags
-        )
-    except (ValueError, TypeError, KeyError, IndexError) as e:
-        raise ValueError(f"Failed to compute noise_ratio for PIT data: {e}") from e
-    # ensure dtypes
-    telem_dat = telem_dat.astype({'time_stamp': 'datetime64[ns]',
-                                  'epoch': 'int64',
-                                  'freq_code': 'object',
-                                  'power': 'float32',
-                                  'rec_id': 'object',
-                                  'rec_type': 'object',
-                                  'scan_time': 'float32',
-                                  'channels': 'int32',
-                                  'noise_ratio': 'float32'})
+    try:
+        telem_dat['noise_ratio'] = predictors.noise_ratio(
+            5.0,
+            telem_dat.freq_code.values,
+            telem_dat.epoch.values,
+            study_tags
+        )
+    except (ValueError, TypeError, KeyError, IndexError) as e:
+        raise ValueError(f"Failed to compute noise_ratio for PIT data: {e}") from e
+    # ensure dtypes
+    telem_dat = telem_dat.astype({'time_stamp': 'datetime64[ns]',
+                                  'epoch': 'int64',
+                                  'freq_code': 'object',
+                                  'power': 'float32',
+                                  'rec_id': 'object',
+                                  'rec_type': 'object',
+                                  'scan_time': 'float32',
+                                  'channels': 'int32',
+                                  'noise_ratio': 'float32'})
     # reorder columns to match expected schema
     cols = ['time_stamp', 'epoch', 'freq_code', 'power', 'noise_ratio', 'scan_time', 'channels', 'rec_id', 'rec_type']
     cols_existing = [c for c in cols if c in telem_dat.columns]
-    _append_raw_data(db_dir, telem_dat[cols_existing], data_columns=True)
-    with pd.HDFStore(db_dir, mode='a') as store:
-        print('Store keys after append:', store.keys())
+    _append_raw_data(db_dir, telem_dat[cols_existing], data_columns=True)
+    with pd.HDFStore(db_dir, mode='a') as store:
+        print('Store keys after append:', store.keys())
 def PIT_Multiple(
@@ -2019,29 +2030,29 @@ def PIT_Multiple(
         "LocationDetail", "Type", "Recapture", "Sex", "GeneticSampleID", "Comments"
     ]
-    # Read the CSV into a DataFrame, skipping rows if needed
-    telem_dat = pd.read_csv(file_name, names=col_names, header=0, skiprows=skiprows, dtype=str)
-    mode_str = "multi-antenna"
-    if ant_to_rec_dict is None:
-        raise ValueError("ant_to_rec_dict is required for PIT_Multiple")
-    # Convert "TimeStamp" to datetime with explicit format
-    telem_dat["time_stamp"] = pd.to_datetime(telem_dat["TimeStamp"], format="%m/%d/%Y %H:%M", errors="coerce")
-    # Ensure "Tag1Dec" and "Tag1Hex" are treated as strings (avoid scientific notation issues)
-    telem_dat["Tag1Dec"] = telem_dat["Tag1Dec"].astype(str)
-    telem_dat["Tag1Hex"] = telem_dat["Tag1Hex"].astype(str)
-    telem_dat["freq_code"] = telem_dat["Tag1Hex"].astype(str).str.strip()
-    antenna_raw = telem_dat["Antennae"].astype(str).str.strip()
-    antenna_num = pd.to_numeric(antenna_raw.str.extract(r"(\d+)")[0], errors="coerce")
-    rec_id = antenna_num.map(ant_to_rec_dict)
-    if rec_id.isna().any():
-        rec_id = rec_id.fillna(antenna_raw.map(ant_to_rec_dict))
-    telem_dat["rec_id"] = rec_id
-    telem_dat = telem_dat.dropna(subset=["rec_id"])
+    # Read the CSV into a DataFrame, skipping rows if needed
+    telem_dat = pd.read_csv(file_name, names=col_names, header=0, skiprows=skiprows, dtype=str)
+    mode_str = "multi-antenna"
+    if ant_to_rec_dict is None:
+        raise ValueError("ant_to_rec_dict is required for PIT_Multiple")
+    # Convert "TimeStamp" to datetime with explicit format
+    telem_dat["time_stamp"] = pd.to_datetime(telem_dat["TimeStamp"], format="%m/%d/%Y %H:%M", errors="coerce")
+    # Ensure "Tag1Dec" and "Tag1Hex" are treated as strings (avoid scientific notation issues)
+    telem_dat["Tag1Dec"] = telem_dat["Tag1Dec"].astype(str)
+    telem_dat["Tag1Hex"] = telem_dat["Tag1Hex"].astype(str)
+    telem_dat["freq_code"] = telem_dat["Tag1Hex"].astype(str).str.strip()
+    antenna_raw = telem_dat["Antennae"].astype(str).str.strip()
+    antenna_num = pd.to_numeric(antenna_raw.str.extract(r"(\d+)")[0], errors="coerce")
+    rec_id = antenna_num.map(ant_to_rec_dict)
+    if rec_id.isna().any():
+        rec_id = rec_id.fillna(antenna_raw.map(ant_to_rec_dict))
+    telem_dat["rec_id"] = rec_id
+    telem_dat = telem_dat.dropna(subset=["rec_id"])
     # if after_cleanup == 0:
     #     raise ValueError(f"No valid records found in {file_name}")
@@ -2101,4 +2112,4 @@ def PIT_Multiple(

{pymast-1.0.0 → pymast-1.0.1}/pymast.egg-info/PKG-INFO RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: pymast
-Version: 1.0.0
+Version: 1.0.1
 Summary: Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data
 Author: Theodore Castro-Santos
 Author-email: "Kevin P. Nebiolo" <kevin.nebiolo@kleinschmidtgroup.com>

{pymast-1.0.0 → pymast-1.0.1}/pyproject.toml RENAMED Viewed

@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 [project]
 name = "pymast"
-version = "1.0.0"
+version = "1.0.1"
 description = "Movement Analysis Software for Telemetry (MAST) - False positive removal and movement analysis for radio telemetry data"
 readme = "README.md"
 authors = [