pingmapper 5.3.7__tar.gz → 5.4.1__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. {pingmapper-5.3.7 → pingmapper-5.4.1}/PKG-INFO +1 -1
  2. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/class_portstarObj.py +220 -8
  3. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/class_sonObj.py +247 -0
  4. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/class_sonObj_nadirgaptest.py +247 -0
  5. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/doWork.py +3 -0
  6. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/main_readFiles.py +20 -6
  7. pingmapper-5.4.1/pingmapper/nonGUI_batch_main.py +129 -0
  8. pingmapper-5.4.1/pingmapper/nonGui_main.py +126 -0
  9. pingmapper-5.4.1/pingmapper/test_dq_filter.py +203 -0
  10. pingmapper-5.4.1/pingmapper/version.py +1 -0
  11. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper.egg-info/PKG-INFO +1 -1
  12. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper.egg-info/SOURCES.txt +3 -0
  13. pingmapper-5.3.7/pingmapper/version.py +0 -1
  14. {pingmapper-5.3.7 → pingmapper-5.4.1}/LICENSE +0 -0
  15. {pingmapper-5.3.7 → pingmapper-5.4.1}/README.md +0 -0
  16. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/__init__.py +0 -0
  17. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/__main__.py +0 -0
  18. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/class_mapSubstrateObj.py +0 -0
  19. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/class_rectObj.py +0 -0
  20. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/default_params.json +0 -0
  21. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/funcs_common.py +0 -0
  22. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/funcs_model.py +0 -0
  23. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/funcs_rectify.py +0 -0
  24. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/gui_main.py +0 -0
  25. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/main_mapSubstrate.py +0 -0
  26. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/main_rectify.py +0 -0
  27. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/processing_scripts/main_batchDirectory_2024-01-18_0926.py +0 -0
  28. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/processing_scripts/main_batchDirectory_2024-01-18_0929.py +0 -0
  29. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/scratch/funcs_pyhum_correct.py +0 -0
  30. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/scratch/main.py +0 -0
  31. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/scratch/main_batchDirectory.py +0 -0
  32. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/test_PINGMapper.py +0 -0
  33. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/test_time.py +0 -0
  34. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/DRAFT_Workflows/avg_predictions_Mussel_WBL.py +0 -0
  35. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/DRAFT_Workflows/gen_centerline.py +0 -0
  36. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/DRAFT_Workflows/gen_centerline_from_bankline.py +0 -0
  37. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/DRAFT_Workflows/gen_centerline_trkpnts_fitspline_DRAFT.py +0 -0
  38. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/DRAFT_Workflows/testEXAMPLE_mosaic_logit.py +0 -0
  39. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/RawEGN_avg_predictions.py +0 -0
  40. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/00_substrate_logits_mosaic_transects.py +0 -0
  41. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/00_substrate_shps_mosaic_transects.py +0 -0
  42. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/01_gen_centerline_from_coverage.py +0 -0
  43. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/02_gen_summary_stamp_shps.py +0 -0
  44. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/03_gen_summary_shp.py +0 -0
  45. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/04_combine_summary_shp_csv.py +0 -0
  46. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/05_gen_summary_shp_plots.py +0 -0
  47. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/06_compare_raw-egn_volume.py +0 -0
  48. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/08_raw-egn_hardReacheFreq_hist.py +0 -0
  49. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/09_raw-egn_PatchSize_density.py +0 -0
  50. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/Substrate_Summaries/summarize_project_substrate.py +0 -0
  51. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/export_coverage.py +0 -0
  52. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper/utils/main_mosaic_transects.py +0 -0
  53. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper.egg-info/dependency_links.txt +0 -0
  54. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper.egg-info/requires.txt +0 -0
  55. {pingmapper-5.3.7 → pingmapper-5.4.1}/pingmapper.egg-info/top_level.txt +0 -0
  56. {pingmapper-5.3.7 → pingmapper-5.4.1}/pyproject.toml +0 -0
  57. {pingmapper-5.3.7 → pingmapper-5.4.1}/setup.cfg +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: pingmapper
3
- Version: 5.3.7
3
+ Version: 5.4.1
4
4
  Summary: Open-source interface for processing recreation-grade side scan sonar datasets and reproducibly mapping benthic habitat
5
5
  Author: Daniel Buscombe
6
6
  Author-email: Cameron Bodine <bodine.cs@gmail.email>
@@ -1199,13 +1199,45 @@ class portstarObj(object):
1199
1199
  # Find ping-wise water column width from min and max depth prediction
1200
1200
  Wp = maxDepths+minDepths
1201
1201
 
1202
- # Try cropping so water column ~1/3 of target size area
1203
- WCProp = 1/3
1202
+ # Try cropping so water column ~1/4 of target size area.
1203
+ # Keeping less water column generally improves bed segmentation stability.
1204
+ WCProp = 1/4
1204
1205
 
1205
1206
  # Buffers so we don't crop too much
1206
1207
  WwcBuf = 150
1207
1208
  WsBuf = 150
1208
1209
 
1210
+ # Use instrument depth to constrain max bed search depth when available.
1211
+ # This helps avoid far-range false positives when returns are only valid
1212
+ # near the center (e.g., shallow channels with large configured range).
1213
+ # inst_depth_mult=3.0 means max search depth = 3x instrument depth, which
1214
+ # keeps the water column at roughly 25% of each side's range.
1215
+ inst_depth_mult = 3.0
1216
+
1217
+ if not hasattr(self.port, 'sonMetaDF'):
1218
+ self.port._loadSonMeta()
1219
+ if not hasattr(self.star, 'sonMetaDF'):
1220
+ self.star._loadSonMeta()
1221
+
1222
+ portChunk = self.port.sonMetaDF[self.port.sonMetaDF['chunk_id'] == i]
1223
+ starChunk = self.star.sonMetaDF[self.star.sonMetaDF['chunk_id'] == i]
1224
+
1225
+ portInstM = pd.to_numeric(portChunk['inst_dep_m'], errors='coerce').to_numpy(dtype=float, copy=True)
1226
+ starInstM = pd.to_numeric(starChunk['inst_dep_m'], errors='coerce').to_numpy(dtype=float, copy=True)
1227
+ portPixM = pd.to_numeric(portChunk['pixM'], errors='coerce').to_numpy(dtype=float, copy=True)
1228
+ starPixM = pd.to_numeric(starChunk['pixM'], errors='coerce').to_numpy(dtype=float, copy=True)
1229
+
1230
+ portInstPix = np.where((portInstM > 0) & (portPixM > 0), portInstM / portPixM, np.nan)
1231
+ starInstPix = np.where((starInstM > 0) & (starPixM > 0), starInstM / starPixM, np.nan)
1232
+
1233
+ instPix = np.concatenate((portInstPix, starInstPix))
1234
+ validInstPix = instPix[np.isfinite(instPix) & (instPix > 0)]
1235
+ if validInstPix.size > 0:
1236
+ instMedPix = np.nanmedian(validInstPix)
1237
+ maxDepByInst = instMedPix * inst_depth_mult
1238
+ if np.isfinite(maxDepByInst) and maxDepByInst > 0:
1239
+ maxDep = min(maxDep, maxDepByInst)
1240
+
1209
1241
  # Sum Wp to determine area of water column
1210
1242
  WpArea = np.nansum(Wp)
1211
1243
 
@@ -1235,16 +1267,31 @@ class portstarObj(object):
1235
1267
  if Ws > (C-(Wwc/2)):
1236
1268
  Ws = int( C - (Wwc/2) - (W/2) - WsBuf)
1237
1269
 
1270
+ # If Ws is negative, inst_depth * mult exceeds the recorded range.
1271
+ # Pad the far-range side with zeros so the crop proportions are correct.
1272
+ # Zero columns = no acoustic return, which the model interprets as bed,
1273
+ # anchoring the pick at the data boundary when depth approaches range.
1274
+ pad_far = 0
1275
+ if Ws < 0:
1276
+ pad_far = int(-Ws)
1277
+ Ws = 0
1278
+
1238
1279
  # Crop the original sonogram
1239
1280
  ## Port Crop
1240
1281
  lC = Ws # left side crop
1241
1282
  rC = int(C - (Wwc/2)) # right side crop
1242
1283
  portCrop = son3bnd[:, lC:rC,:]
1284
+ if pad_far > 0:
1285
+ _pad = np.zeros((portCrop.shape[0], pad_far, portCrop.shape[2]), dtype=np.uint8)
1286
+ portCrop = np.concatenate((_pad, portCrop), axis=1) # extend far range (left)
1243
1287
 
1244
1288
  ## Star Crop
1245
1289
  lC = int(C + (Wwc/2)) # left side crop
1246
1290
  rC = int(N - Ws) # right side crop
1247
1291
  starCrop = son3bnd[:, lC:rC, :]
1292
+ if pad_far > 0:
1293
+ _pad = np.zeros((starCrop.shape[0], pad_far, starCrop.shape[2]), dtype=np.uint8)
1294
+ starCrop = np.concatenate((starCrop, _pad), axis=1) # extend far range (right)
1248
1295
 
1249
1296
 
1250
1297
  ## Concatenate port & star crop
@@ -1277,8 +1324,9 @@ class portstarObj(object):
1277
1324
  # Calculate depth from prediction
1278
1325
  portDepPixCrop, starDepPixCrop = self._findBed(crop_label) # get pixel location of bed
1279
1326
 
1280
- # add Wwc/2 to get final estimate at original sonogram dimensions
1281
- portDepPixFinal = np.flip( np.asarray(portDepPixCrop) + int(Wwc/2) )
1327
+ # add Wwc/2 to get final estimate at original sonogram dimensions.
1328
+ # Subtract pad_far from port since padding shifted its columns left.
1329
+ portDepPixFinal = np.flip( np.asarray(portDepPixCrop) + int(Wwc/2) - pad_far )
1282
1330
  starDepPixFinal = np.flip( np.asarray(starDepPixCrop) + int(Wwc/2) )
1283
1331
 
1284
1332
  #############
@@ -1400,6 +1448,34 @@ class portstarObj(object):
1400
1448
  isChunk = son.sonMetaDF['chunk_id'] == chunk
1401
1449
  sonMeta = son.sonMetaDF[isChunk].reset_index()
1402
1450
  acoustic_depth = pd.to_numeric(sonMeta['inst_dep_m'], errors='coerce').to_numpy(dtype=float, copy=True)
1451
+ acoustic_med = pd.Series(acoustic_depth).rolling(window=31, center=True, min_periods=1).median().to_numpy()
1452
+ acoustic_resid = np.abs(acoustic_depth - acoustic_med)
1453
+ acoustic_valid = np.isfinite(acoustic_depth) & (acoustic_depth > 0)
1454
+ acoustic_bad = np.zeros(acoustic_depth.shape, dtype=bool)
1455
+ if acoustic_valid.any():
1456
+ resid_valid = acoustic_resid[acoustic_valid]
1457
+ resid_center = np.nanmedian(resid_valid)
1458
+ resid_mad = np.nanmedian(np.abs(resid_valid - resid_center))
1459
+ resid_thr = max(0.5, 6.0 * resid_mad if np.isfinite(resid_mad) else 0.5)
1460
+ acoustic_bad |= acoustic_valid & (acoustic_resid > resid_thr)
1461
+
1462
+ if acoustic_depth.size >= 3:
1463
+ prev_step = acoustic_depth[1:-1] - acoustic_depth[:-2]
1464
+ next_step = acoustic_depth[2:] - acoustic_depth[1:-1]
1465
+ step_mag = np.abs(np.concatenate((prev_step, next_step)))
1466
+ step_mag = step_mag[np.isfinite(step_mag)]
1467
+ if step_mag.size > 0:
1468
+ step_center = np.nanmedian(step_mag)
1469
+ step_mad = np.nanmedian(np.abs(step_mag - step_center))
1470
+ jump_thr = max(0.5, 6.0 * step_mad if np.isfinite(step_mad) else 0.5)
1471
+ acoustic_bad[1:-1] |= (
1472
+ acoustic_valid[1:-1] & acoustic_valid[:-2] & acoustic_valid[2:] &
1473
+ (np.abs(prev_step) > jump_thr) &
1474
+ (np.abs(next_step) > jump_thr) &
1475
+ ((prev_step * next_step) < 0)
1476
+ )
1477
+
1478
+ acoustic_depth = np.where(acoustic_bad, np.nan, acoustic_depth)
1403
1479
  acousticBed = np.round(acoustic_depth / sonMeta['pixM'].to_numpy(dtype=float, copy=True), 0)
1404
1480
  acousticBed = acousticBed[np.isfinite(acousticBed) & (acousticBed > 0)]
1405
1481
 
@@ -1407,8 +1483,8 @@ class portstarObj(object):
1407
1483
  # Step 1 : Acoustic Bedpick Filter
1408
1484
  # Use acoustic bed pick to crop image
1409
1485
  if acousticBed.size > 0:
1410
- bedMin = max(int(np.nanmin(acousticBed)) - 50, 0)
1411
- bedMax = int(np.nanmax(acousticBed)) + pix_buf
1486
+ bedMin = max(int(np.nanpercentile(acousticBed, 5)) - 50, 0)
1487
+ bedMax = int(np.nanpercentile(acousticBed, 95)) + pix_buf
1412
1488
  else:
1413
1489
  bedMin = 0
1414
1490
  bedMax = H
@@ -1570,6 +1646,82 @@ class portstarObj(object):
1570
1646
  return '0 pixels'
1571
1647
  return str(float(adjDep) / float(valid_pix.iloc[0])) + ' pixels'
1572
1648
 
1649
+ def _rolling_median(vals, window=31):
1650
+ return pd.Series(vals).rolling(window=window, center=True, min_periods=1).median().to_numpy()
1651
+
1652
+ def _flag_depth_outliers(depth, inst_depth=None, inst_depth_mult=None,
1653
+ resid_floor_m=0.5, jump_floor_m=0.5,
1654
+ iterative_jump=False):
1655
+ depth = np.asarray(depth, dtype=float)
1656
+ flags = np.zeros(depth.shape, dtype=bool)
1657
+
1658
+ valid = np.isfinite(depth) & (depth > 0)
1659
+ if not valid.any():
1660
+ return flags
1661
+
1662
+ med = _rolling_median(depth)
1663
+ resid = np.abs(depth - med)
1664
+ resid_valid = resid[valid]
1665
+ resid_center = np.nanmedian(resid_valid)
1666
+ resid_mad = np.nanmedian(np.abs(resid_valid - resid_center))
1667
+ resid_thr = max(resid_floor_m, 6.0 * resid_mad if np.isfinite(resid_mad) else resid_floor_m)
1668
+ flags |= valid & (resid > resid_thr)
1669
+
1670
+ if depth.size >= 3:
1671
+ prev_step = depth[1:-1] - depth[:-2]
1672
+ next_step = depth[2:] - depth[1:-1]
1673
+ step_mag = np.abs(np.concatenate((prev_step, next_step)))
1674
+ step_mag = step_mag[np.isfinite(step_mag)]
1675
+ if step_mag.size > 0:
1676
+ step_center = np.nanmedian(step_mag)
1677
+ step_mad = np.nanmedian(np.abs(step_mag - step_center))
1678
+ jump_thr = max(jump_floor_m, 6.0 * step_mad if np.isfinite(step_mad) else jump_floor_m)
1679
+ spike_mid = (
1680
+ valid[1:-1] & valid[:-2] & valid[2:] &
1681
+ (np.abs(prev_step) > jump_thr) &
1682
+ (np.abs(next_step) > jump_thr) &
1683
+ ((prev_step * next_step) < 0)
1684
+ )
1685
+ flags[1:-1] |= spike_mid
1686
+
1687
+ # Acoustic depth failures often persist as a step change rather than
1688
+ # a single-ping spike (e.g., 2.4 m -> 6.3 m for many consecutive
1689
+ # records). Iteratively flagging the later side of large jumps peels
1690
+ # back those runs until continuity is restored.
1691
+ if iterative_jump:
1692
+ work = depth.copy()
1693
+ max_iter = max(1, depth.size)
1694
+ for _ in range(max_iter):
1695
+ valid_work = np.isfinite(work) & (work > 0)
1696
+ valid_idx = np.flatnonzero(valid_work)
1697
+ if valid_idx.size < 2:
1698
+ break
1699
+
1700
+ step_vals = np.abs(np.diff(work[valid_idx]))
1701
+ step_vals = step_vals[np.isfinite(step_vals)]
1702
+ if step_vals.size == 0:
1703
+ break
1704
+
1705
+ step_center = np.nanmedian(step_vals)
1706
+ step_mad = np.nanmedian(np.abs(step_vals - step_center))
1707
+ jump_thr = max(jump_floor_m, 6.0 * step_mad if np.isfinite(step_mad) else jump_floor_m)
1708
+
1709
+ diffs = np.abs(np.diff(work[valid_idx]))
1710
+ bad_step_pos = np.flatnonzero(np.isfinite(diffs) & (diffs > jump_thr))
1711
+ if bad_step_pos.size == 0:
1712
+ break
1713
+
1714
+ new_bad_idx = valid_idx[bad_step_pos + 1]
1715
+ flags[new_bad_idx] = True
1716
+ work[new_bad_idx] = np.nan
1717
+
1718
+ if inst_depth is not None and inst_depth_mult is not None:
1719
+ inst_depth = np.asarray(inst_depth, dtype=float)
1720
+ inst_valid = np.isfinite(inst_depth) & (inst_depth > 0)
1721
+ flags |= valid & inst_valid & (depth > (inst_depth * inst_depth_mult))
1722
+
1723
+ return flags
1724
+
1573
1725
  def _sync_trackline_depth(beam_obj, beam_df):
1574
1726
  trk_file = os.path.join(beam_obj.metaDir, 'Trackline_Smth_' + beam_obj.beamName + '.csv')
1575
1727
  if not os.path.exists(trk_file):
@@ -1586,6 +1738,8 @@ class portstarObj(object):
1586
1738
  depth_cols.append('dep_m_smth')
1587
1739
  if 'dep_m_adjBy' in beam_df.columns:
1588
1740
  depth_cols.append('dep_m_adjBy')
1741
+ if 'dep_m_interp' in beam_df.columns:
1742
+ depth_cols.append('dep_m_interp')
1589
1743
 
1590
1744
  depth_df = beam_df[depth_cols].drop_duplicates(subset=['record_num'], keep='last').set_index('record_num')
1591
1745
  trk_df = trk_df.set_index('record_num')
@@ -1597,6 +1751,8 @@ class portstarObj(object):
1597
1751
  trk_df['dep_m_smth'] = depth_df['dep_m_smth']
1598
1752
  if 'dep_m_adjBy' in depth_df.columns:
1599
1753
  trk_df['dep_m_adjBy'] = depth_df['dep_m_adjBy']
1754
+ if 'dep_m_interp' in depth_df.columns:
1755
+ trk_df['dep_m_interp'] = depth_df['dep_m_interp']
1600
1756
 
1601
1757
  trk_df.reset_index().to_csv(trk_file, index=False, float_format='%.14f')
1602
1758
 
@@ -1749,10 +1905,59 @@ class portstarObj(object):
1749
1905
  portDF['dep_m_adjBy'] = _format_depth_adjustment(portDF['pixM'])
1750
1906
  starDF['dep_m_adjBy'] = _format_depth_adjustment(starDF['pixM'])
1751
1907
 
1908
+ # Outlier and jump filtering before interpolation.
1909
+ # detectDep=0: continuity-only acoustic QC.
1910
+ # detectDep=1/2: continuity QC plus instrument-depth proportional cap.
1911
+ portArr = pd.to_numeric(portDF['dep_m'], errors='coerce').to_numpy(dtype=float, copy=True)
1912
+ starArr = pd.to_numeric(starDF['dep_m'], errors='coerce').to_numpy(dtype=float, copy=True)
1913
+ portInst = pd.to_numeric(portDF['inst_dep_m'], errors='coerce').to_numpy(dtype=float, copy=True)
1914
+ starInst = pd.to_numeric(starDF['inst_dep_m'], errors='coerce').to_numpy(dtype=float, copy=True)
1915
+
1916
+ portFlags = np.zeros(portArr.shape, dtype=bool)
1917
+ starFlags = np.zeros(starArr.shape, dtype=bool)
1918
+
1919
+ if detectDep == 0:
1920
+ portFlags |= _flag_depth_outliers(portArr, iterative_jump=True)
1921
+ starFlags |= _flag_depth_outliers(starArr, iterative_jump=True)
1922
+ elif detectDep in (1, 2):
1923
+ portFlags |= _flag_depth_outliers(portArr, portInst, inst_depth_mult=3.0)
1924
+ starFlags |= _flag_depth_outliers(starArr, starInst, inst_depth_mult=3.0)
1925
+
1926
+ # If sides diverge strongly, invalidate the side farther from
1927
+ # instrument depth so interpolation can recover continuity.
1928
+ pair_valid = np.isfinite(portArr) & np.isfinite(starArr) & (portArr > 0) & (starArr > 0)
1929
+ diverge = pair_valid & (np.abs(portArr - starArr) > 5.0)
1930
+ if diverge.any():
1931
+ portErr = np.abs(portArr - portInst)
1932
+ starErr = np.abs(starArr - starInst)
1933
+
1934
+ inst_pair_valid = np.isfinite(portInst) & (portInst > 0) & np.isfinite(starInst) & (starInst > 0)
1935
+ choose_by_inst = diverge & inst_pair_valid
1936
+ portFlags |= choose_by_inst & (portErr >= starErr)
1937
+ starFlags |= choose_by_inst & (starErr > portErr)
1938
+
1939
+ # Fallback for rows without valid instrument depth on one/both sides.
1940
+ fallback = diverge & (~inst_pair_valid)
1941
+ if fallback.any():
1942
+ pmed = _rolling_median(portArr)
1943
+ smed = _rolling_median(starArr)
1944
+ presid = np.abs(portArr - pmed)
1945
+ sresid = np.abs(starArr - smed)
1946
+ portFlags |= fallback & (presid >= sresid)
1947
+ starFlags |= fallback & (sresid > presid)
1948
+
1949
+ portArr[portFlags] = np.nan
1950
+ starArr[starFlags] = np.nan
1951
+ portDF['dep_m'] = portArr
1952
+ starDF['dep_m'] = starArr
1953
+
1752
1954
  # Interpolate over nan's (and set zeros to nan)
1753
1955
  portDep = portDF['dep_m'].to_numpy(copy=True)
1754
1956
  starDep = starDF['dep_m'].to_numpy(copy=True)
1755
1957
 
1958
+ portInterp = np.isnan(portDep) | (portDep == 0)
1959
+ starInterp = np.isnan(starDep) | (starDep == 0)
1960
+
1756
1961
  portDep[portDep == 0] = np.nan
1757
1962
  starDep[starDep == 0] = np.nan
1758
1963
 
@@ -1762,6 +1967,7 @@ class portstarObj(object):
1762
1967
  else:
1763
1968
  portDep[nans] = 0
1764
1969
  portDF['dep_m'] = portDep
1970
+ portDF['dep_m_interp'] = portInterp.astype(np.uint8)
1765
1971
 
1766
1972
  nans, x = np.isnan(starDep), lambda z: z.nonzero()[0]
1767
1973
  if (~nans).any():
@@ -1769,6 +1975,7 @@ class portstarObj(object):
1769
1975
  else:
1770
1976
  starDep[nans] = 0
1771
1977
  starDF['dep_m'] = starDep
1978
+ starDF['dep_m_interp'] = starInterp.astype(np.uint8)
1772
1979
 
1773
1980
  # Export to csv
1774
1981
  portDF.to_csv(self.port.sonMetaFile, index=False, float_format='%.14f')
@@ -1778,18 +1985,23 @@ class portstarObj(object):
1778
1985
 
1779
1986
  try:
1780
1987
  # Take average of both estimates to store with downlooking sonar csv
1781
- depDF = pd.DataFrame(columns=['dep_m', 'dep_m_Method', 'dep_m_smth', 'dep_m_adjBy'])
1988
+ depDF = pd.DataFrame(columns=['dep_m', 'dep_m_Method', 'dep_m_smth', 'dep_m_adjBy', 'dep_m_interp'])
1782
1989
  depDF['dep_m'] = np.nanmean([portDF['dep_m'].to_numpy(), starDF['dep_m'].to_numpy()], axis=0)
1783
1990
  depDF['dep_m_Method'] = portDF['dep_m_Method']
1784
1991
  depDF['dep_m_smth'] = portDF['dep_m_smth']
1785
1992
  depDF['dep_m_adjBy'] = portDF['dep_m_adjBy']
1993
+ depDF['dep_m_interp'] = np.maximum(
1994
+ pd.to_numeric(portDF['dep_m_interp'], errors='coerce').fillna(0).to_numpy(dtype=np.uint8, copy=True),
1995
+ pd.to_numeric(starDF['dep_m_interp'], errors='coerce').fillna(0).to_numpy(dtype=np.uint8, copy=True)
1996
+ )
1786
1997
  except:
1787
1998
  # In case port and star are not same length
1788
- depDF = pd.DataFrame(columns=['dep_m', 'dep_m_Method', 'dep_m_smth', 'dep_m_adjBy'])
1999
+ depDF = pd.DataFrame(columns=['dep_m', 'dep_m_Method', 'dep_m_smth', 'dep_m_adjBy', 'dep_m_interp'])
1789
2000
  depDF['dep_m'] = portDF['dep_m']
1790
2001
  depDF['dep_m_Method'] = portDF['dep_m_Method']
1791
2002
  depDF['dep_m_smth'] = portDF['dep_m_smth']
1792
2003
  depDF['dep_m_adjBy'] = portDF['dep_m_adjBy']
2004
+ depDF['dep_m_interp'] = portDF['dep_m_interp']
1793
2005
 
1794
2006
  del portDF, starDF
1795
2007
  gc.collect()
@@ -253,6 +253,13 @@ class sonObj(object):
253
253
  max_speed,
254
254
  aoi,
255
255
  time_table,
256
+ dq_table=False,
257
+ dq_time_field=False,
258
+ dq_flag_field=False,
259
+ dq_keep_values=False,
260
+ dq_src_utc_offset=0.0,
261
+ dq_target_utc_offset=0.0,
262
+ dq_time_offset=0.0,
256
263
  ):
257
264
  '''
258
265
  '''
@@ -284,6 +291,20 @@ class sonObj(object):
284
291
  if time_table:
285
292
  sonDF = self._filterTime(sonDF, time_table)
286
293
 
294
+ ####################
295
+ # Data Quality Filter
296
+ if dq_table:
297
+ sonDF = self._filterDQ(
298
+ sonDF,
299
+ dq_table,
300
+ dq_time_field,
301
+ dq_flag_field,
302
+ dq_keep_values,
303
+ dq_src_utc_offset,
304
+ dq_target_utc_offset,
305
+ dq_time_offset,
306
+ )
307
+
287
308
  return sonDF
288
309
 
289
310
  # ======================================================================
@@ -304,6 +325,232 @@ class sonObj(object):
304
325
  return df
305
326
 
306
327
 
328
+ # ======================================================================
329
+ def _filterDQ(self,
330
+ sonDF,
331
+ dq_table,
332
+ dq_time_field,
333
+ dq_flag_field,
334
+ dq_keep_values,
335
+ dq_src_utc_offset,
336
+ dq_target_utc_offset,
337
+ dq_time_offset,
338
+ ):
339
+ '''
340
+ Filter sonar pings using a data-quality (DQ) log CSV.
341
+
342
+ Each row in the DQ log is treated as a state-change event: the flag
343
+ recorded at time T applies to every sonar ping from T until the next
344
+ event row. Pings that occur before the first DQ event are removed.
345
+ '''
346
+
347
+ filtDQCol = 'filter_dq'
348
+ filtCol = 'filter'
349
+ dqTimeCol = '_dq_ts'
350
+
351
+ if not dq_time_field:
352
+ raise ValueError('dq_time_field is required when dq_table is provided.')
353
+ if not dq_flag_field:
354
+ raise ValueError('dq_flag_field is required when dq_table is provided.')
355
+
356
+ keep_vals = self._normalizeDQKeepValues(dq_keep_values)
357
+ if len(keep_vals) == 0:
358
+ raise ValueError('dq_keep_values must contain at least one value to keep.')
359
+
360
+ sonDF = sonDF.copy()
361
+ sonDF[filtDQCol] = False
362
+
363
+ if filtCol not in sonDF.columns:
364
+ sonDF[filtCol] = True
365
+
366
+ dqDF = pd.read_csv(dq_table)
367
+ missing_cols = [c for c in [dq_time_field, dq_flag_field] if c not in dqDF.columns]
368
+ if missing_cols:
369
+ raise ValueError('dqLog missing required column(s): {}'.format(', '.join(missing_cols)))
370
+
371
+ dqTimes, dqKind = self._coerceDQTimestampSeries(dqDF[dq_time_field], dq_time_field)
372
+ sonTimes, sonKind = self._getSonarFilterTimestamp(sonDF)
373
+
374
+ if dqKind != sonKind:
375
+ raise ValueError(
376
+ 'dqLog timestamp type ({}) does not match sonar timestamp type ({}).'.format(dqKind, sonKind)
377
+ )
378
+
379
+ if dqKind == 'datetime':
380
+ dqTimes = self._shiftDQDatetimeToTargetOffset(
381
+ dqTimes,
382
+ dq_src_utc_offset,
383
+ dq_target_utc_offset,
384
+ )
385
+
386
+ dqDF = dqDF.copy()
387
+ dqDF[dqTimeCol] = dqTimes
388
+ dqDF = dqDF[dqDF[dqTimeCol].notna()].copy()
389
+ if dqDF.empty:
390
+ raise ValueError('dqLog contained no valid timestamps after parsing {}.'.format(dq_time_field))
391
+
392
+ offset = float(dq_time_offset)
393
+ if sonKind == 'datetime':
394
+ sonTimes = sonTimes + pd.to_timedelta(offset, unit='s')
395
+ else:
396
+ sonTimes = sonTimes + offset
397
+
398
+ sonMerge = pd.DataFrame({
399
+ '_son_idx': sonDF.index,
400
+ '_son_ts': sonTimes,
401
+ })
402
+ sonMerge = sonMerge[sonMerge['_son_ts'].notna()].copy()
403
+
404
+ dqDF['_dq_keep'] = dqDF[dq_flag_field].map(self._normalizeDQValue).isin(keep_vals)
405
+
406
+ event_state = dqDF[[dqTimeCol, '_dq_keep']].copy()
407
+ event_state.sort_values(dqTimeCol, inplace=True)
408
+ event_state = event_state.groupby(dqTimeCol, as_index=False)['_dq_keep'].last()
409
+
410
+ keep_idx = self._applyDQEventState(sonMerge, event_state, dqTimeCol)
411
+
412
+ sonDF.loc[keep_idx, filtDQCol] = True
413
+ sonDF[filtCol] = sonDF[filtCol] & sonDF[filtDQCol]
414
+
415
+ return sonDF
416
+
417
+ # ======================================================================
418
+ def _normalizeDQKeepValues(self, dq_keep_values):
419
+
420
+ if dq_keep_values is False or dq_keep_values is None:
421
+ return set()
422
+
423
+ if isinstance(dq_keep_values, str):
424
+ dq_keep_values = dq_keep_values.split(',')
425
+
426
+ keep_vals = set()
427
+ for value in dq_keep_values:
428
+ norm = self._normalizeDQValue(value)
429
+ if norm:
430
+ keep_vals.add(norm)
431
+
432
+ return keep_vals
433
+
434
+ # ======================================================================
435
+ def _normalizeDQValue(self, value):
436
+
437
+ if pd.isna(value):
438
+ return ''
439
+ return str(value).strip().lower()
440
+
441
+ # ======================================================================
442
+ def _coerceDQTimestampSeries(self, series, field_name):
443
+
444
+ non_na = series.dropna()
445
+ numeric = pd.to_numeric(series, errors='coerce')
446
+ if len(non_na) > 0 and numeric.notna().sum() == len(non_na):
447
+ return numeric, 'numeric'
448
+
449
+ dt = pd.to_datetime(series, errors='coerce')
450
+ if dt.notna().any():
451
+ try:
452
+ if dt.dt.tz is not None:
453
+ dt = dt.dt.tz_localize(None)
454
+ except AttributeError:
455
+ pass
456
+ return dt, 'datetime'
457
+
458
+ if numeric.notna().any():
459
+ return numeric, 'numeric'
460
+
461
+ raise ValueError('Unable to parse dqLog timestamps from column: {}'.format(field_name))
462
+
463
+ # ======================================================================
464
+ def _shiftDQDatetimeToTargetOffset(self,
465
+ dq_times,
466
+ dq_src_utc_offset=False,
467
+ dq_target_utc_offset=False):
468
+
469
+ src_offset = self._coerceDQUtcOffset(dq_src_utc_offset, 'dq_src_utc_offset')
470
+ target_offset = self._coerceDQUtcOffset(dq_target_utc_offset, 'dq_target_utc_offset')
471
+
472
+ if src_offset is None and target_offset is None:
473
+ return dq_times
474
+
475
+ if src_offset is None or target_offset is None:
476
+ raise ValueError(
477
+ 'dq_src_utc_offset and dq_target_utc_offset must both be provided when either is set.'
478
+ )
479
+
480
+ return dq_times + pd.to_timedelta(target_offset - src_offset, unit='h')
481
+
482
+ # ======================================================================
483
+ def _coerceDQUtcOffset(self, value, field_name):
484
+
485
+ if value is False or value is None or value == '':
486
+ return None
487
+
488
+ try:
489
+ return float(value)
490
+ except (TypeError, ValueError):
491
+ raise ValueError('{} must be a numeric UTC offset in hours.'.format(field_name))
492
+
493
+ # ======================================================================
494
+ def _getSonarFilterTimestamp(self, sonDF):
495
+
496
+ if 'date' in sonDF.columns and 'time' in sonDF.columns:
497
+ dt = pd.to_datetime(
498
+ sonDF['date'].astype(str).str.strip() + ' ' + sonDF['time'].astype(str).str.strip(),
499
+ errors='coerce',
500
+ format='mixed',
501
+ )
502
+ if dt.notna().any():
503
+ try:
504
+ if dt.dt.tz is not None:
505
+ dt = dt.dt.tz_localize(None)
506
+ except AttributeError:
507
+ pass
508
+ return dt, 'datetime'
509
+
510
+ if 'time' in sonDF.columns:
511
+ dt = pd.to_datetime(sonDF['time'], errors='coerce', format='mixed')
512
+ if dt.notna().any():
513
+ try:
514
+ if dt.dt.tz is not None:
515
+ dt = dt.dt.tz_localize(None)
516
+ except AttributeError:
517
+ pass
518
+ return dt, 'datetime'
519
+
520
+ if 'time_s' in sonDF.columns:
521
+ numeric = pd.to_numeric(sonDF['time_s'], errors='coerce')
522
+ if numeric.notna().any():
523
+ return numeric, 'numeric'
524
+
525
+ raise ValueError('Unable to determine sonar timestamps for dqLog filtering.')
526
+
527
+
528
+ # ======================================================================
529
+ def _applyDQEventState(self, son, event_state, dqTimeCol):
530
+ '''
531
+ Return the subset of sonar indices whose ping timestamp falls within a
532
+ "keep" state block as defined by the DQ event log.
533
+
534
+ Uses np.searchsorted so each ping inherits the state of the most-recent
535
+ event that preceded it. Pings before the first event are excluded.
536
+ '''
537
+
538
+ event_times = event_state[dqTimeCol].to_numpy()
539
+ event_keep = event_state['_dq_keep'].to_numpy(dtype=bool)
540
+ son_times = son['_son_ts'].to_numpy()
541
+
542
+ # searchsorted(side='right') - 1 gives index of last event <= ping time
543
+ event_idx = np.searchsorted(event_times, son_times, side='right') - 1
544
+
545
+ # Pings before the first event get event_idx == -1 → exclude
546
+ valid_idx = event_idx >= 0
547
+
548
+ keep_mask = np.zeros(len(son), dtype=bool)
549
+ keep_mask[valid_idx] = event_keep[event_idx[valid_idx]]
550
+
551
+ return son.loc[keep_mask, '_son_idx']
552
+
553
+
307
554
  # ======================================================================
308
555
  def _filterHeading(self,
309
556
  df,