PyMVP 0.2.1__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyMVP
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Python package for Moving Vessel Profiler correction and analysis
5
5
  Author: MaximilienWemaere
6
6
  Requires-Python: >=3.10
@@ -13,3 +13,5 @@ Requires-Dist: cartopy>=0.25.0
13
13
  Requires-Dist: scipy>=1.16.2
14
14
  Requires-Dist: xarray>=2025.9.1
15
15
  Requires-Dist: netcdf4>=1.7.2
16
+ Requires-Dist: pandas>=2.1.0
17
+ Requires-Dist: geopy>=2.4.0
@@ -33,6 +33,10 @@ import cartopy.feature as cfeature
33
33
  import xarray as xr
34
34
  from . import mvp_routines as mvp
35
35
  from scipy.ndimage import median_filter
36
+ from scipy.interpolate import griddata
37
+ from scipy.ndimage import gaussian_filter
38
+ from geopy.distance import geodesic
39
+ import pandas as pd
36
40
 
37
41
 
38
42
  class Analyzer:
@@ -50,6 +54,10 @@ class Analyzer:
50
54
  self.ctd = False
51
55
  self.speed = False
52
56
  self.corrected = False
57
+ self.GPS = False
58
+
59
+ def ___version___(self):
60
+ return "0.2.3"
53
61
 
54
62
 
55
63
  def load_mvp_data(self,data_path, delp=[], subdirs=False,format='raw',only_new=False, output_path=None):
@@ -625,7 +633,29 @@ class Analyzer:
625
633
  print('CTD data loaded successfully.')
626
634
  self.ctd = True
627
635
 
628
-
636
+ def load_GPS(self, gps_path):
637
+ """
638
+ Load GPS data from a .csv file in the gps_path.
639
+ Fills the object attributes with GPS data and associated metadata.
640
+ Args:
641
+ gps_path (str): Path to the .csv file containing GPS data.
642
+ """
643
+ self.gps_path = gps_path
644
+ gps_data = pd.read_csv(gps_path)
645
+ self.GPS_TIME = gps_data['time'].values
646
+ self.GPS_LAT = gps_data['latitude'].values
647
+ self.GPS_LON = gps_data['longitude'].values
648
+ print('GPS data loaded successfully.')
649
+ self.gps = True
650
+
651
+ self.Lon_mvp = np.zeros(( self.PRES_mvp.shape[0], self.PRES_mvp.shape[1]))
652
+ self.Lat_mvp = np.zeros(( self.PRES_mvp.shape[0], self.PRES_mvp.shape[1]))
653
+
654
+ for i in range(self.PRES_mvp.shape[0]):
655
+ self.Lon_mvp[i,:] = np.interp(self.TIME_mvp[i,:], self.GPS_TIME, self.GPS_LON.astype(float))
656
+ self.Lat_mvp[i,:] = np.interp(self.TIME_mvp[i,:], self.GPS_TIME, self.GPS_LAT.astype(float))
657
+
658
+ self.GPS = True
629
659
 
630
660
  def compute_waterflow(self,horizontal_speed=2,corr=False):
631
661
  """
@@ -1411,7 +1441,7 @@ class Analyzer:
1411
1441
  self.FLUO_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.FLUO_ctd, pressure_grid)
1412
1442
  self.TURB_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.TURB_ctd, pressure_grid)
1413
1443
  self.TEMP_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, TEMP_mvp_corr_mat, pressure_grid)
1414
- self.PRES_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, PRES_mvp_corr_mat, pressure_grid)
1444
+ self.PRES_mvp_corr_interp = np.tile(pressure_grid, (PRES_mvp_corr_mat.shape[0], 1))
1415
1445
  self.COND_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, COND_mvp_corr_mat, pressure_grid)
1416
1446
  self.SALT_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, SALT_mvp_corr_mat, pressure_grid)
1417
1447
  self.DO_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.DO_mvp, pressure_grid)
@@ -1419,6 +1449,9 @@ class Analyzer:
1419
1449
  self.TURB_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.TURB_mvp, pressure_grid)
1420
1450
  self.PH_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.PH_mvp, pressure_grid)
1421
1451
  self.SUNA_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.SUNA_mvp, pressure_grid)
1452
+ if self.GPS:
1453
+ self.Lat_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.Lat_mvp, pressure_grid)
1454
+ self.Lon_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.Lon_mvp, pressure_grid)
1422
1455
 
1423
1456
  if self.speed:
1424
1457
  self.SPEED_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, SPEED_mvp_corr_mat, pressure_grid)
@@ -1468,285 +1501,417 @@ class Analyzer:
1468
1501
 
1469
1502
 
1470
1503
 
1471
- def corrige_MVP_offset_on_ctd_simple(self,id_mvp,id_ctd,min_depth):
1472
- """
1473
- This function corrects the offset between the MVP and CTD profiles by aligning the temperature, conductivity profiles. It calculates the mean difference in temperature between the two profiles and applies this correction to the CTD temperature data.
1474
- id_mvp and id_ctd must be the same length as each MVP profile will be be corrected with the corresponding CTD profile. The function returns the corrected MVP temperature and conductivity profiles.
1475
- This version of the correction is less restritive than the other one, does not need the CTD aand MVP profiles to be exactly similar
1476
- We advice to choose a min_depth that avoid to take into acount the surface layer which can introduce errors.
1477
- """
1478
-
1479
- mean_temp_diff = []
1480
- mean_cond_diff = []
1481
- print("Calculating mean differences between MVP and CTD profiles before correction:")
1482
- for i in range(len(id_mvp)):
1483
- id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1484
- # Calculate the mean difference in temperature between the MVP and CTD profiles
1485
- temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1486
- mean_temp_diff.append(temp_diff)
1487
-
1488
- cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1489
- mean_cond_diff.append(cond_diff)
1490
- print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1491
- print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1492
-
1493
- for i in range(len(id_mvp)):
1494
- id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1495
-
1496
- # Calculate the mean difference in temperature between the MVP and CTD profiles
1497
- temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1498
- self.TEMP_mvp_corr_interp[id_mvp[i]] -= temp_diff
1499
-
1500
- cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1501
- self.COND_mvp_corr_interp[id_mvp[i]] -= cond_diff
1502
-
1503
-
1504
- mean_temp_diff = []
1505
- mean_cond_diff = []
1506
- print("After correction:")
1507
- for i in range(len(id_mvp)):
1508
- id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1509
-
1510
- # Calculate the mean difference in temperature between the MVP and CTD profiles
1511
- temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1512
- mean_temp_diff.append(temp_diff)
1513
-
1514
- cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1515
- mean_cond_diff.append(cond_diff)
1516
- print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1517
- print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1518
-
1504
+ def corrige_MVP_offset_on_ctd_simple(self,id_mvp,id_ctd,min_depth):
1505
+ """
1506
+ This function corrects the offset between the MVP and CTD profiles by aligning the temperature, conductivity profiles. It calculates the mean difference in temperature between the two profiles and applies this correction to the CTD temperature data.
1507
+ id_mvp and id_ctd must be the same length as each MVP profile will be be corrected with the corresponding CTD profile. The function returns the corrected MVP temperature and conductivity profiles.
1508
+ This version of the correction is less restritive than the other one, does not need the CTD aand MVP profiles to be exactly similar
1509
+ We advice to choose a min_depth that avoid to take into acount the surface layer which can introduce errors.
1510
+ """
1519
1511
 
1512
+ mean_temp_diff = []
1513
+ mean_cond_diff = []
1514
+ print("Calculating mean differences between MVP and CTD profiles before correction:")
1515
+ for i in range(len(id_mvp)):
1516
+ id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1517
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1518
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1519
+ mean_temp_diff.append(temp_diff)
1520
1520
 
1521
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1522
+ mean_cond_diff.append(cond_diff)
1523
+ print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1524
+ print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1521
1525
 
1522
- def to_netcdf(self, filepath, corrected=False, compression=True, engine=None, per_profile_files=False):
1523
- """
1524
- Export MVP data to a NetCDF file using xarray.
1526
+ for i in range(len(id_mvp)):
1527
+ id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1525
1528
 
1526
- Args:
1527
- filepath (str): Output NetCDF file path.
1528
- corrected (bool): Also write corrected arrays if present (*_mvp_corr). Default False.
1529
- compression (bool): Enable compression (engine dependent). Default True.
1530
- engine (str|None): One of 'netcdf4', 'h5netcdf', 'scipy'. If None, choose netcdf4.
1531
- per_profile_files (bool): If True, write one .nc per MVP cycle (two rows: down and up).
1532
- """
1533
- if not getattr(self, 'mvp', False):
1534
- raise RuntimeError("No MVP data loaded. Call load_mvp_data() first.")
1529
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1530
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1531
+ self.TEMP_mvp_corr_interp[id_mvp[i]] -= temp_diff
1535
1532
 
1536
- engine = 'netcdf4' if engine is None else engine
1537
- if engine == 'scipy' and compression:
1538
- print('Warning: scipy backend does not support compression; writing without compression.')
1539
- compression = False
1533
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1534
+ self.COND_mvp_corr_interp[id_mvp[i]] -= cond_diff
1540
1535
 
1541
- # Dimensions
1542
- n_prof, n_samp = self.PRES_mvp.shape
1543
1536
 
1544
- # Coordinates
1545
- profile_idx = np.arange(n_prof, dtype=np.int32)
1546
- sample_idx = np.arange(n_samp, dtype=np.int32)
1537
+ mean_temp_diff = []
1538
+ mean_cond_diff = []
1539
+ print("After correction:")
1540
+ for i in range(len(id_mvp)):
1541
+ id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1547
1542
 
1548
- # Direction per profile (down/up)
1549
- direction = None
1550
- if hasattr(self, 'DIR') and len(self.DIR) == n_prof:
1551
- direction = np.array(self.DIR, dtype=object)
1552
- else:
1553
- # Fallback based on even/odd
1554
- direction = np.array(['down' if i % 2 == 0 else 'up' for i in range(n_prof)], dtype=object)
1555
-
1556
- # Per-sample time as seconds since reference origin
1557
- # TIME_mvp is in days relative to self.date_ref
1558
- time_seconds = None
1559
- if hasattr(self, 'TIME_mvp'):
1560
- time_seconds = self.TIME_mvp * 24.0 * 3600.0
1561
- else:
1562
- time_seconds = np.full((n_prof, n_samp), np.nan)
1563
-
1564
- # Per-profile datetime (one timestamp per cast pair); map using i//2
1565
- profile_time = None
1566
- if hasattr(self, 'DATETIME_mvp') and len(getattr(self, 'DATETIME_mvp', [])) > 0:
1567
- prof_times = []
1568
- for i in range(n_prof):
1569
- j = i // 2
1570
- if j < len(self.DATETIME_mvp) and self.DATETIME_mvp[j] is not None:
1571
- prof_times.append(np.datetime64(self.DATETIME_mvp[j]))
1572
- else:
1573
- prof_times.append(np.datetime64('NaT'))
1574
- profile_time = np.array(prof_times, dtype='datetime64[ns]')
1575
- else:
1576
- profile_time = np.array([np.datetime64('NaT')] * n_prof, dtype='datetime64[ns]')
1543
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1544
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1545
+ mean_temp_diff.append(temp_diff)
1577
1546
 
1578
- # Build dataset variables safely
1579
- data_vars = {}
1547
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1548
+ mean_cond_diff.append(cond_diff)
1549
+ print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1550
+ print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1551
+
1580
1552
 
1581
- def add_var(var_name, arr, units=None, long_name=None):
1582
- if arr is None:
1583
- return
1584
- data_vars[var_name] = (
1585
- ('profile', 'sample'), arr,
1586
- {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1553
+
1554
+
1555
+ def to_netcdf(self, filepath, corrected=False, compression=True, engine=None, per_profile_files=False):
1556
+ """
1557
+ Export MVP data to a NetCDF file using xarray.
1558
+
1559
+ Args:
1560
+ filepath (str): Output NetCDF file path.
1561
+ corrected (bool): Also write corrected arrays if present (*_mvp_corr). Default False.
1562
+ compression (bool): Enable compression (engine dependent). Default True.
1563
+ engine (str|None): One of 'netcdf4', 'h5netcdf', 'scipy'. If None, choose netcdf4.
1564
+ per_profile_files (bool): If True, write one .nc per MVP cycle (two rows: down and up).
1565
+ """
1566
+ if not getattr(self, 'mvp', False):
1567
+ raise RuntimeError("No MVP data loaded. Call load_mvp_data() first.")
1568
+
1569
+ engine = 'netcdf4' if engine is None else engine
1570
+ if engine == 'scipy' and compression:
1571
+ print('Warning: scipy backend does not support compression; writing without compression.')
1572
+ compression = False
1573
+
1574
+ # Dimensions
1575
+ n_prof, n_samp = self.PRES_mvp.shape
1576
+
1577
+ # Coordinates
1578
+ profile_idx = np.arange(n_prof, dtype=np.int32)
1579
+ sample_idx = np.arange(n_samp, dtype=np.int32)
1580
+
1581
+ # Direction per profile (down/up)
1582
+ direction = None
1583
+ if hasattr(self, 'DIR') and len(self.DIR) == n_prof:
1584
+ direction = np.array(self.DIR, dtype=object)
1585
+ else:
1586
+ # Fallback based on even/odd
1587
+ direction = np.array(['down' if i % 2 == 0 else 'up' for i in range(n_prof)], dtype=object)
1588
+
1589
+ # Per-sample time as seconds since reference origin
1590
+ # TIME_mvp is in days relative to self.date_ref
1591
+ time_seconds = None
1592
+ if hasattr(self, 'TIME_mvp'):
1593
+ time_seconds = self.TIME_mvp * 24.0 * 3600.0
1594
+ else:
1595
+ time_seconds = np.full((n_prof, n_samp), np.nan)
1596
+
1597
+ # Per-profile datetime (one timestamp per cast pair); map using i//2
1598
+ profile_time = None
1599
+ if hasattr(self, 'DATETIME_mvp') and len(getattr(self, 'DATETIME_mvp', [])) > 0:
1600
+ prof_times = []
1601
+ for i in range(n_prof):
1602
+ j = i // 2
1603
+ if j < len(self.DATETIME_mvp) and self.DATETIME_mvp[j] is not None:
1604
+ prof_times.append(np.datetime64(self.DATETIME_mvp[j]))
1605
+ else:
1606
+ prof_times.append(np.datetime64('NaT'))
1607
+ profile_time = np.array(prof_times, dtype='datetime64[ns]')
1608
+ else:
1609
+ profile_time = np.array([np.datetime64('NaT')] * n_prof, dtype='datetime64[ns]')
1610
+
1611
+ # Build dataset variables safely
1612
+ data_vars = {}
1613
+
1614
+ def add_var(var_name, arr, units=None, long_name=None):
1615
+ if arr is None:
1616
+ return
1617
+ data_vars[var_name] = (
1618
+ ('profile', 'sample'), arr,
1619
+ {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1620
+ )
1621
+
1622
+ add_var('PRES', getattr(self, 'PRES_mvp', None), units='dbar', long_name='Sea water pressure')
1623
+ add_var('TEMP', getattr(self, 'TEMP_mvp', None), units='degC', long_name='In-situ temperature')
1624
+ add_var('COND', getattr(self, 'COND_mvp', None), units='mS/cm', long_name='Conductivity')
1625
+ add_var('SAL', getattr(self, 'SALT_mvp', None), units='psu', long_name='Practical salinity')
1626
+ add_var('SOUNDVEL', getattr(self, 'SOUNDVEL_mvp', None), units='m s-1', long_name='Sound speed')
1627
+ add_var('DO', getattr(self, 'DO_mvp', None), units='ml/L', long_name='Dissolved oxygen')
1628
+ add_var('TEMP2', getattr(self, 'TEMP2_mvp', None), units='degC', long_name='Oxygen sensor temperature')
1629
+ add_var('SUNA', getattr(self, 'SUNA_mvp', None), long_name='SUNA raw/derived')
1630
+ add_var('FLUO', getattr(self, 'FLUO_mvp', None), units='ug/L', long_name='Chl fluorescence')
1631
+ add_var('TURB', getattr(self, 'TURB_mvp', None), units='NTU', long_name='Turbidity')
1632
+ add_var('PH', getattr(self, 'PH_mvp', None), units='1', long_name='pH')
1633
+
1634
+ # Position and time arrays (2D)
1635
+ if hasattr(self, 'LAT_mvp'):
1636
+ add_var('LATITUDE', self.LAT_mvp, units='degrees_north', long_name='Latitude at sample')
1637
+ if hasattr(self, 'LON_mvp'):
1638
+ add_var('LONGITUDE', self.LON_mvp, units='degrees_east', long_name='Longitude at sample')
1639
+ # Time seconds since reference
1640
+ data_vars['TIME'] = (
1641
+ ('profile', 'sample'), time_seconds,
1642
+ {
1643
+ 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1644
+ 'long_name': 'Time at sample'
1645
+ }
1587
1646
  )
1588
-
1589
- add_var('PRES', getattr(self, 'PRES_mvp', None), units='dbar', long_name='Sea water pressure')
1590
- add_var('TEMP', getattr(self, 'TEMP_mvp', None), units='degC', long_name='In-situ temperature')
1591
- add_var('COND', getattr(self, 'COND_mvp', None), units='mS/cm', long_name='Conductivity')
1592
- add_var('SAL', getattr(self, 'SALT_mvp', None), units='psu', long_name='Practical salinity')
1593
- add_var('SOUNDVEL', getattr(self, 'SOUNDVEL_mvp', None), units='m s-1', long_name='Sound speed')
1594
- add_var('DO', getattr(self, 'DO_mvp', None), units='ml/L', long_name='Dissolved oxygen')
1595
- add_var('TEMP2', getattr(self, 'TEMP2_mvp', None), units='degC', long_name='Oxygen sensor temperature')
1596
- add_var('SUNA', getattr(self, 'SUNA_mvp', None), long_name='SUNA raw/derived')
1597
- add_var('FLUO', getattr(self, 'FLUO_mvp', None), units='ug/L', long_name='Chl fluorescence')
1598
- add_var('TURB', getattr(self, 'TURB_mvp', None), units='NTU', long_name='Turbidity')
1599
- add_var('PH', getattr(self, 'PH_mvp', None), units='1', long_name='pH')
1600
-
1601
- # Position and time arrays (2D)
1602
- if hasattr(self, 'LAT_mvp'):
1603
- add_var('LATITUDE', self.LAT_mvp, units='degrees_north', long_name='Latitude at sample')
1604
- if hasattr(self, 'LON_mvp'):
1605
- add_var('LONGITUDE', self.LON_mvp, units='degrees_east', long_name='Longitude at sample')
1606
- # Time seconds since reference
1607
- data_vars['TIME'] = (
1608
- ('profile', 'sample'), time_seconds,
1609
- {
1610
- 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1611
- 'long_name': 'Time at sample'
1612
- }
1613
- )
1614
1647
 
1615
- # Include corrected arrays if requested and present
1616
- if corrected:
1617
- def add_corr(name, attr, units=None, long_name=None):
1618
- if hasattr(self, attr):
1619
- data_vars[name] = (
1620
- ('profile', 'sample'), getattr(self, attr),
1621
- {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1648
+ # Include corrected arrays if requested and present
1649
+ if corrected:
1650
+ def add_corr(name, attr, units=None, long_name=None):
1651
+ if hasattr(self, attr):
1652
+ data_vars[name] = (
1653
+ ('profile', 'sample'), getattr(self, attr),
1654
+ {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1655
+ )
1656
+ add_corr('pressure_corrected', 'PRES_mvp_corr', units='dbar', long_name='Corrected pressure')
1657
+ add_corr('temperature_corrected', 'TEMP_mvp_corr', units='degC', long_name='Corrected temperature')
1658
+ add_corr('conductivity_corrected', 'COND_mvp_corr', units='mS/cm', long_name='Corrected conductivity')
1659
+ add_corr('salinity_corrected', 'SALT_mvp_corr', units='psu', long_name='Corrected salinity')
1660
+ if hasattr(self, 'TIME_mvp_corr'):
1661
+ data_vars['time_corrected'] = (
1662
+ ('profile', 'sample'), self.TIME_mvp_corr * 24.0 * 3600.0,
1663
+ {
1664
+ 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1665
+ 'long_name': 'Corrected time at sample'
1666
+ }
1622
1667
  )
1623
- add_corr('pressure_corrected', 'PRES_mvp_corr', units='dbar', long_name='Corrected pressure')
1624
- add_corr('temperature_corrected', 'TEMP_mvp_corr', units='degC', long_name='Corrected temperature')
1625
- add_corr('conductivity_corrected', 'COND_mvp_corr', units='mS/cm', long_name='Corrected conductivity')
1626
- add_corr('salinity_corrected', 'SALT_mvp_corr', units='psu', long_name='Corrected salinity')
1627
- if hasattr(self, 'TIME_mvp_corr'):
1628
- data_vars['time_corrected'] = (
1629
- ('profile', 'sample'), self.TIME_mvp_corr * 24.0 * 3600.0,
1630
- {
1631
- 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1632
- 'long_name': 'Corrected time at sample'
1633
- }
1668
+ if hasattr(self, 'LAT_mvp_corr'):
1669
+ add_corr('latitude_corrected', 'LAT_mvp_corr', units='degrees_north', long_name='Corrected latitude at sample')
1670
+ if hasattr(self, 'LON_mvp_corr'):
1671
+ add_corr('longitude_corrected', 'LON_mvp_corr', units='degrees_east', long_name='Corrected longitude at sample')
1672
+
1673
+ # Coordinates and auxiliary per-profile variables
1674
+ coords = {
1675
+ 'profile': ('profile', profile_idx),
1676
+ 'sample': ('sample', sample_idx)
1677
+ }
1678
+
1679
+ # Encode direction/time according to engine capabilities
1680
+ if engine in ('netcdf4', 'h5netcdf'):
1681
+ coords['direction'] = ('profile', direction.astype('U'), {'long_name': 'Profile direction'})
1682
+ coords['profile_time'] = ('profile', profile_time, {'long_name': 'Profile nominal time'})
1683
+ else:
1684
+ # scipy backend: avoid object strings and datetime; use numeric fallbacks
1685
+ dir_flag = np.where(direction.astype('U') == 'down', 0, 1).astype('int8')
1686
+ coords['direction_flag'] = (
1687
+ 'profile', dir_flag, {'long_name': 'Profile direction (0=down,1=up)'}
1688
+ )
1689
+ ref = np.datetime64(self.date_ref)
1690
+ pt = profile_time.astype('datetime64[s]')
1691
+ mask = (pt == np.datetime64('NaT'))
1692
+ secs = (pt - ref).astype('timedelta64[s]').astype('float64')
1693
+ secs[mask] = np.nan
1694
+ coords['profile_time_sec'] = (
1695
+ 'profile', secs,
1696
+ {'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1697
+ 'long_name': 'Profile nominal time'}
1634
1698
  )
1635
- if hasattr(self, 'LAT_mvp_corr'):
1636
- add_corr('latitude_corrected', 'LAT_mvp_corr', units='degrees_north', long_name='Corrected latitude at sample')
1637
- if hasattr(self, 'LON_mvp_corr'):
1638
- add_corr('longitude_corrected', 'LON_mvp_corr', units='degrees_east', long_name='Corrected longitude at sample')
1639
-
1640
- # Coordinates and auxiliary per-profile variables
1641
- coords = {
1642
- 'profile': ('profile', profile_idx),
1643
- 'sample': ('sample', sample_idx)
1644
- }
1645
-
1646
- # Encode direction/time according to engine capabilities
1647
- if engine in ('netcdf4', 'h5netcdf'):
1648
- coords['direction'] = ('profile', direction.astype('U'), {'long_name': 'Profile direction'})
1649
- coords['profile_time'] = ('profile', profile_time, {'long_name': 'Profile nominal time'})
1650
- else:
1651
- # scipy backend: avoid object strings and datetime; use numeric fallbacks
1652
- dir_flag = np.where(direction.astype('U') == 'down', 0, 1).astype('int8')
1653
- coords['direction_flag'] = (
1654
- 'profile', dir_flag, {'long_name': 'Profile direction (0=down,1=up)'}
1655
- )
1656
- ref = np.datetime64(self.date_ref)
1657
- pt = profile_time.astype('datetime64[s]')
1658
- mask = (pt == np.datetime64('NaT'))
1659
- secs = (pt - ref).astype('timedelta64[s]').astype('float64')
1660
- secs[mask] = np.nan
1661
- coords['profile_time_sec'] = (
1662
- 'profile', secs,
1663
- {'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1664
- 'long_name': 'Profile nominal time'}
1665
- )
1666
1699
 
1667
- # Optional per-profile lat/lon (first valid sample)
1668
- def first_valid(vec):
1669
- # vec shape (n_prof, n_samp)
1670
- out = np.full((vec.shape[0],), np.nan)
1671
- for i in range(vec.shape[0]):
1672
- row = vec[i]
1673
- j = np.where(~np.isnan(row))[0]
1674
- if j.size:
1675
- out[i] = row[j[0]]
1676
- return out
1677
-
1678
- if hasattr(self, 'LAT_mvp'):
1679
- coords['profile_lat'] = (
1680
- 'profile', first_valid(self.LAT_mvp), {'units': 'degrees_north', 'long_name': 'Profile latitude'}
1681
- )
1682
- if hasattr(self, 'LON_mvp'):
1683
- coords['profile_lon'] = (
1684
- 'profile', first_valid(self.LON_mvp), {'units': 'degrees_east', 'long_name': 'Profile longitude'}
1685
- )
1700
+ # Optional per-profile lat/lon (first valid sample)
1701
+ def first_valid(vec):
1702
+ # vec shape (n_prof, n_samp)
1703
+ out = np.full((vec.shape[0],), np.nan)
1704
+ for i in range(vec.shape[0]):
1705
+ row = vec[i]
1706
+ j = np.where(~np.isnan(row))[0]
1707
+ if j.size:
1708
+ out[i] = row[j[0]]
1709
+ return out
1710
+
1711
+ if hasattr(self, 'LAT_mvp'):
1712
+ coords['profile_lat'] = (
1713
+ 'profile', first_valid(self.LAT_mvp), {'units': 'degrees_north', 'long_name': 'Profile latitude'}
1714
+ )
1715
+ if hasattr(self, 'LON_mvp'):
1716
+ coords['profile_lon'] = (
1717
+ 'profile', first_valid(self.LON_mvp), {'units': 'degrees_east', 'long_name': 'Profile longitude'}
1718
+ )
1686
1719
 
1687
- # Global attributes
1688
- attrs = {
1689
- 'title': 'MVP profile data',
1690
- 'Conventions': 'CF-1.8',
1691
- 'institution': 'LMD/CNRS',
1692
- 'source': 'PyMVP',
1693
- 'history': f"Created on {datetime.now().isoformat()}",
1694
- 'mvp_Yorig': int(self.Yorig)
1695
- }
1696
-
1697
- ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs)
1698
-
1699
- # Compression encoding per engine
1700
- encoding = None
1701
- if compression:
1702
- if engine == 'netcdf4':
1703
- encoding = {name: {'zlib': True, 'complevel': 4} for name in data_vars.keys()}
1704
- elif engine == 'h5netcdf':
1705
- encoding = {name: {'compression': 'gzip', 'compression_opts': 4} for name in data_vars.keys()}
1706
-
1707
-
1708
- if (not per_profile_files) and filepath.lower().endswith('.nc'):
1709
- out_path = filepath
1710
- ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1711
- print(f"NetCDF written: {out_path} using engine={engine}")
1712
- return
1713
- base_dir = filepath
1714
-
1715
- if not base_dir.endswith(os.sep):
1716
- base_dir = base_dir + os.sep
1717
-
1718
- base_name = "MVP_" + os.path.basename(self.data_path).rstrip(os.sep)
1719
- if per_profile_files:
1720
- # Write one file per pair (down/up)
1721
- total_pairs = (n_prof + 1) // 2
1722
- for i in range(total_pairs):
1723
- idxs = [k for k in (2*i, 2*i+1) if k < n_prof]
1724
- if not idxs:
1725
- continue
1726
- ds_i = ds.isel(profile=idxs)
1727
-
1728
- #add i to filename
1729
- fname = f"{base_name}_profile_{i:03d}.nc"
1730
- out_path = os.path.join(base_dir, fname)
1731
- ds_i.to_netcdf(out_path, encoding=encoding, engine=engine)
1732
- print(f"NetCDF written per profile into: {base_dir} using engine={engine}")
1733
- else:
1734
- file_name = f"{base_name}.nc"
1735
- out_path = os.path.join(base_dir, file_name)
1736
- ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1737
- print(f"NetCDF written: {out_path} using engine={engine}")
1720
+ # Global attributes
1721
+ attrs = {
1722
+ 'title': 'MVP profile data',
1723
+ 'Conventions': 'CF-1.8',
1724
+ 'institution': 'LMD/CNRS',
1725
+ 'source': 'PyMVP',
1726
+ 'history': f"Created on {datetime.now().isoformat()}",
1727
+ 'mvp_Yorig': int(self.Yorig)
1728
+ }
1729
+
1730
+ ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs)
1731
+
1732
+ # Compression encoding per engine
1733
+ encoding = None
1734
+ if compression:
1735
+ if engine == 'netcdf4':
1736
+ encoding = {name: {'zlib': True, 'complevel': 4} for name in data_vars.keys()}
1737
+ elif engine == 'h5netcdf':
1738
+ encoding = {name: {'compression': 'gzip', 'compression_opts': 4} for name in data_vars.keys()}
1739
+
1740
+
1741
+ if (not per_profile_files) and filepath.lower().endswith('.nc'):
1742
+ out_path = filepath
1743
+ ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1744
+ print(f"NetCDF written: {out_path} using engine={engine}")
1745
+ return
1746
+ base_dir = filepath
1747
+
1748
+ if not base_dir.endswith(os.sep):
1749
+ base_dir = base_dir + os.sep
1750
+
1751
+ base_name = "MVP_" + os.path.basename(self.data_path).rstrip(os.sep)
1752
+ if per_profile_files:
1753
+ # Write one file per pair (down/up)
1754
+ total_pairs = (n_prof + 1) // 2
1755
+ for i in range(total_pairs):
1756
+ idxs = [k for k in (2*i, 2*i+1) if k < n_prof]
1757
+ if not idxs:
1758
+ continue
1759
+ ds_i = ds.isel(profile=idxs)
1760
+
1761
+ #add i to filename
1762
+ fname = f"{base_name}_profile_{i:03d}.nc"
1763
+ out_path = os.path.join(base_dir, fname)
1764
+ ds_i.to_netcdf(out_path, encoding=encoding, engine=engine)
1765
+ print(f"NetCDF written per profile into: {base_dir} using engine={engine}")
1766
+ else:
1767
+ file_name = f"{base_name}.nc"
1768
+ out_path = os.path.join(base_dir, file_name)
1769
+ ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1770
+ print(f"NetCDF written: {out_path} using engine={engine}")
1738
1771
 
1739
1772
 
1740
- def help(self):
1773
+ def help(self):
1774
+ """
1775
+ Print all methods of the class with their docstring (header).
1776
+ """
1777
+ for attr in dir(self):
1778
+ if callable(getattr(self, attr)) and not attr.startswith("__"):
1779
+ method = getattr(self, attr)
1780
+ doc = method.__doc__
1781
+ print(f"{attr}:\n{doc}\n{'-'*40}")
1782
+
1783
+
1784
+ def plot_MVP_transect(self,var='TEMP',l_id=None,depth_max=None,depth_min=None,vmax=None,vmin=None,cmap=None):
1741
1785
  """
1742
- Print all methods of the class with their docstring (header).
1786
+ Plot a section of 2D inteprolated MVP data
1787
+ Args:
1788
+ var (str): Variable to plot. Choose from 'TEMP', 'COND', 'SAL', 'DO', 'FLUO', 'TURB', 'PH', 'SUNA', 'SPEED'.
1789
+ l_id (list of int): List of profile indices to include in the transect. If None, use all profiles.
1790
+ depth_max (float): Maximum depth to display in the plot. If None, use max depth in data.
1791
+ depth_min (float): Minimum depth to display in the plot. If None, use 0.
1792
+ vmax (float): Maximum value for color scale. If None, use max value in data.
1793
+ vmin (float): Minimum value for color scale. If None, use min value in data.
1794
+ cmap: Matplotlib colormap to use. If None, use default colormap.
1795
+
1796
+
1743
1797
  """
1744
- for attr in dir(self):
1745
- if callable(getattr(self, attr)) and not attr.startswith("__"):
1746
- method = getattr(self, attr)
1747
- doc = method.__doc__
1748
- print(f"{attr}:\n{doc}\n{'-'*40}")
1749
1798
 
1799
+ if self.hasattr('PRES_mvp_corr_interp') == False:
1800
+ raise ValueError("Corrected and interpolated MVP data not available. Apply corrections and interpolation first.")
1801
+
1802
+ if l_id is None:
1803
+ l_id = list(range(self.PRES_mvp_corr_interp.shape[0]))
1804
+
1805
+ match var:
1806
+ case 'TEMP':
1807
+ var = self.TEMP_mvp_corr_interp
1808
+ case 'COND':
1809
+ var = self.COND_mvp_corr_interp
1810
+ case 'SAL':
1811
+ var = self.SALT_mvp_corr_interp
1812
+ case 'DO':
1813
+ var = self.DO_mvp_corr_interp
1814
+ case 'FLUO':
1815
+ var = self.FLUO_mvp_corr_interp
1816
+ case 'TURB':
1817
+ var = self.TURB_mvp_corr_interp
1818
+ case 'PH':
1819
+ var = self.PH_mvp_corr_interp
1820
+ case 'SUNA':
1821
+ var = self.SUNA_mvp_corr_interp
1822
+ case 'SPEED':
1823
+ var = self.SPEED_mvp_corr_interp
1824
+ case _:
1825
+ raise ValueError(f"Variable {var} not recognized. Choose from 'TEMP', 'COND', 'SAL', 'DO', 'FLUO', 'TURB', 'PH', 'SUNA', 'SPEED'.")
1826
+
1827
+
1828
+
1829
+ P = self.PRES_mvp_corr_interp[l_id]
1830
+ lat = self.Lat_mvp_corr_interp[l_id]
1831
+ lon = self.Lon_mvp_corr_interp[l_id]
1832
+ T = var[l_id]
1833
+
1834
+ if depth_max is None:
1835
+ depth_max = np.nanmax(P)
1836
+ if depth_min is None:
1837
+ depth_min = 0
1838
+
1839
+
1840
+ n_profiles, n_points = T.shape
1841
+
1842
+ # compute distance
1843
+ dist_cum = 0
1844
+ dist_all = np.zeros_like(T)
1845
+
1846
+ for i in range(n_profiles):
1847
+ dist_i = np.zeros(n_points)
1848
+ last_valid = None
1849
+ for j in range(n_points):
1850
+ if np.isnan(lat[i,j]) or np.isnan(lon[i,j]):
1851
+ dist_i[j] = np.nan
1852
+ else:
1853
+ if last_valid is None:
1854
+ dist_i[j] = 0
1855
+ else:
1856
+ dist_i[j] = dist_i[last_valid] + geodesic(
1857
+ (lat[i,last_valid], lon[i,last_valid]),
1858
+ (lat[i,j], lon[i,j])
1859
+ ).km
1860
+ last_valid = j
1861
+
1862
+ dist_i += dist_cum
1863
+ if np.any(~np.isnan(dist_i)):
1864
+ dist_cum = np.nanmax(dist_i)
1865
+ dist_all[i,:] = dist_i
1866
+
1867
+ # interpolation
1868
+ dist_flat = dist_all.flatten()
1869
+ T_flat = T.flatten()
1870
+ P_flat = P.flatten()
1871
+
1872
+ # del nan
1873
+ mask = ~np.isnan(dist_flat) & ~np.isnan(T_flat)
1874
+ dist_flat = dist_flat[mask]
1875
+ P_flat = P_flat[mask]
1876
+ T_flat = T_flat[mask]
1877
+
1878
+ # create regular grid
1879
+ dist_grid = np.linspace(dist_flat.min(), dist_flat.max(), 500)
1880
+ P_grid = np.linspace(depth_min, depth_max, 1200)
1881
+ DIST, PRES = np.meshgrid(dist_grid, P_grid)
1882
+
1883
+ # 2D interp
1884
+ T_grid = griddata(
1885
+ (dist_flat, P_flat),
1886
+ T_flat,
1887
+ (DIST, PRES),
1888
+ method='linear'
1889
+ )
1890
+
1891
+ # -----------------------------
1892
+ # 5. Lissage pour adoucir les frontières
1893
+ # -----------------------------
1894
+ T_grid_smooth = gaussian_filter(T_grid, sigma=2)
1895
+
1896
+
1897
+ if vmax is None:
1898
+ vmax = np.nanmax(T_grid_smooth)
1899
+ if vmin is None:
1900
+ vmin = np.nanmin(T_grid_smooth)
1901
+ # -----------------------------
1902
+ # 6. Plot
1903
+ # -----------------------------
1904
+ if cmap is None:
1905
+ cmap = plt.get_cmap('viridis')
1906
+ fig, ax = plt.subplots(figsize=(12,6))
1907
+ pcm = ax.pcolormesh(DIST, PRES, T_grid_smooth, shading='auto', cmap=cmap, vmin=vmin, vmax=vmax)
1908
+ ax.invert_yaxis()
1909
+ ax.set_xlabel("Distance le long du transect [km]")
1910
+ ax.set_ylabel("Profondeur [m]")
1911
+ ax.set_title("Transect de température (interpolé)")
1912
+ cbar = plt.colorbar(pcm, ax=ax)
1913
+ cbar.set_label("Température [°C]")
1914
+ plt.show()
1750
1915
 
1751
1916
 
1752
1917
  def split_ctd(pres, array):
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyMVP
3
- Version: 0.2.1
3
+ Version: 0.2.3
4
4
  Summary: Python package for Moving Vessel Profiler correction and analysis
5
5
  Author: MaximilienWemaere
6
6
  Requires-Python: >=3.10
@@ -13,3 +13,5 @@ Requires-Dist: cartopy>=0.25.0
13
13
  Requires-Dist: scipy>=1.16.2
14
14
  Requires-Dist: xarray>=2025.9.1
15
15
  Requires-Dist: netcdf4>=1.7.2
16
+ Requires-Dist: pandas>=2.1.0
17
+ Requires-Dist: geopy>=2.4.0
@@ -6,3 +6,5 @@ cartopy>=0.25.0
6
6
  scipy>=1.16.2
7
7
  xarray>=2025.9.1
8
8
  netcdf4>=1.7.2
9
+ pandas>=2.1.0
10
+ geopy>=2.4.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "PyMVP"
3
- version = "0.2.1"
3
+ version = "0.2.3"
4
4
  description = "Python package for Moving Vessel Profiler correction and analysis"
5
5
  authors = [{name="MaximilienWemaere"}]
6
6
  readme = "README.md"
@@ -14,6 +14,9 @@ dependencies = [
14
14
  "scipy>=1.16.2",
15
15
  "xarray>=2025.9.1",
16
16
  "netcdf4>=1.7.2",
17
+ "pandas>=2.1.0",
18
+ "geopy>=2.4.0"
17
19
  ]
18
20
 
19
-
21
+ [tool.setuptools.packages.find]
22
+ include = ["PyMVP*"]
File without changes
File without changes
File without changes
File without changes