PyMVP 0.1.9__tar.gz → 0.2.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyMVP
3
- Version: 0.1.9
3
+ Version: 0.2.3
4
4
  Summary: Python package for Moving Vessel Profiler correction and analysis
5
5
  Author: MaximilienWemaere
6
6
  Requires-Python: >=3.10
@@ -13,3 +13,5 @@ Requires-Dist: cartopy>=0.25.0
13
13
  Requires-Dist: scipy>=1.16.2
14
14
  Requires-Dist: xarray>=2025.9.1
15
15
  Requires-Dist: netcdf4>=1.7.2
16
+ Requires-Dist: pandas>=2.1.0
17
+ Requires-Dist: geopy>=2.4.0
@@ -33,10 +33,14 @@ import cartopy.feature as cfeature
33
33
  import xarray as xr
34
34
  from . import mvp_routines as mvp
35
35
  from scipy.ndimage import median_filter
36
+ from scipy.interpolate import griddata
37
+ from scipy.ndimage import gaussian_filter
38
+ from geopy.distance import geodesic
39
+ import pandas as pd
36
40
 
37
41
 
38
42
  class Analyzer:
39
- def __init__(self, data_path, output_path=None, subdirs=False, Yorig=1950):
43
+ def __init__(self, Yorig=1950):
40
44
  """
41
45
  Initialize the analyzer with the data path and reference year.
42
46
  Args:
@@ -46,14 +50,17 @@ class Analyzer:
46
50
  """
47
51
  self.Yorig = Yorig
48
52
  self.date_ref = datetime(Yorig, 1, 1)
49
- self.data_path = data_path
50
- self.output_path = output_path if output_path is not None else data_path
51
- self.subdirs = subdirs
52
53
  self.mvp = False
53
54
  self.ctd = False
55
+ self.speed = False
56
+ self.corrected = False
57
+ self.GPS = False
58
+
59
+ def ___version___(self):
60
+ return "0.2.3"
54
61
 
55
62
 
56
- def load_mvp_data(self,delp=[],data_path=None,format='raw',only_new=False):
63
+ def load_mvp_data(self,data_path, delp=[], subdirs=False,format='raw',only_new=False, output_path=None):
57
64
  """
58
65
  Load MVP data from .raw and .log files in the data_path folder.
59
66
  Fills the object attributes with data matrices and associated metadata.
@@ -61,8 +68,9 @@ class Analyzer:
61
68
  delp (list): Indices of profiles to remove from the list (optional).
62
69
  data_path (str): Path to the folder containing MVP files (optional).
63
70
  """
64
- if data_path is not None:
65
- self.data_path = data_path
71
+ self.data_path = data_path
72
+ self.subdirs = subdirs
73
+ self.output_path = output_path
66
74
 
67
75
  if format=='raw':
68
76
  if self.subdirs:
@@ -300,7 +308,7 @@ class Analyzer:
300
308
 
301
309
 
302
310
 
303
- def load_mvp_data_again(self,data_path=None,format='raw',delp=[]):
311
+ def load_mvp_data_again(self,data_path,format='raw',delp=[]):
304
312
  """
305
313
  Load MVP data from .raw and .log files in the data_path folder.
306
314
  Fills the object attributes with data matrices and associated metadata.
@@ -308,14 +316,13 @@ class Analyzer:
308
316
  data_path (str): Path to the folder containing MVP files.
309
317
  delp (list): Indices of profiles to remove from the list (optional).
310
318
  """
311
- if data_path is not None:
312
- self.data_path = data_path
319
+
313
320
 
314
321
  if format=='raw':
315
- files = sorted(filter(os.path.isfile,glob.glob(self.data_path + '*.raw', recursive=True)))
322
+ files = sorted(filter(os.path.isfile,glob.glob(data_path + '*.raw', recursive=True)))
316
323
  elif format=='ncdf':
317
- files = sorted(filter(os.path.isfile,glob.glob(self.data_path + '**/MVP*.nc', recursive=True)))
318
- print('Found ' + str(len(files)) + ' MVP files in the directory: ' + self.data_path)
324
+ files = sorted(filter(os.path.isfile,glob.glob(data_path + '**/MVP*.nc', recursive=True)))
325
+ print('Found ' + str(len(files)) + ' MVP files in the directory: ' + data_path)
319
326
 
320
327
 
321
328
 
@@ -562,12 +569,9 @@ class Analyzer:
562
569
  """
563
570
 
564
571
 
565
- if format=='cnv':
566
- list_of_ctd_files = sorted(filter(os.path.isfile,\
567
- glob.glob(data_path_ctd + '*.cnv')))
568
- elif format=='ncdf':
569
- list_of_ctd_files = sorted(filter(os.path.isfile,\
570
- glob.glob(data_path_ctd + 'CTD'+'*.nc')))
572
+ list_of_ctd_files = sorted(filter(os.path.isfile,\
573
+ glob.glob(data_path_ctd + 'CTD'+'*.nc')))
574
+
571
575
  print('Found ' + str(len(list_of_ctd_files)) + ' CTD files in the directory: ' + data_path_ctd)
572
576
 
573
577
 
@@ -629,12 +633,29 @@ class Analyzer:
629
633
  print('CTD data loaded successfully.')
630
634
  self.ctd = True
631
635
 
632
-
633
-
634
-
636
+ def load_GPS(self, gps_path):
637
+ """
638
+ Load GPS data from a .csv file in the gps_path.
639
+ Fills the object attributes with GPS data and associated metadata.
640
+ Args:
641
+ gps_path (str): Path to the .csv file containing GPS data.
642
+ """
643
+ self.gps_path = gps_path
644
+ gps_data = pd.read_csv(gps_path)
645
+ self.GPS_TIME = gps_data['time'].values
646
+ self.GPS_LAT = gps_data['latitude'].values
647
+ self.GPS_LON = gps_data['longitude'].values
648
+ print('GPS data loaded successfully.')
649
+ self.gps = True
635
650
 
651
+ self.Lon_mvp = np.zeros(( self.PRES_mvp.shape[0], self.PRES_mvp.shape[1]))
652
+ self.Lat_mvp = np.zeros(( self.PRES_mvp.shape[0], self.PRES_mvp.shape[1]))
636
653
 
654
+ for i in range(self.PRES_mvp.shape[0]):
655
+ self.Lon_mvp[i,:] = np.interp(self.TIME_mvp[i,:], self.GPS_TIME, self.GPS_LON.astype(float))
656
+ self.Lat_mvp[i,:] = np.interp(self.TIME_mvp[i,:], self.GPS_TIME, self.GPS_LAT.astype(float))
637
657
 
658
+ self.GPS = True
638
659
 
639
660
  def compute_waterflow(self,horizontal_speed=2,corr=False):
640
661
  """
@@ -655,6 +676,7 @@ class Analyzer:
655
676
 
656
677
  self.SPEED_mvp = SPEED_MVP
657
678
  print('Water flow speed computed successfully.')
679
+ self.speed = True
658
680
 
659
681
  def print_profile_metadata(self):
660
682
  """
@@ -938,7 +960,7 @@ class Analyzer:
938
960
  plt.xlabel('Fluorescence, ug/L')
939
961
  plt.ylabel('Pressure, dbar')
940
962
 
941
- def plot_diagramTS_raw(self,id_mvp=None,id_ctd=None,correction=False):
963
+ def plot_diagramTS(self,id_mvp=None,id_ctd=None,correction=False):
942
964
  """
943
965
  Plot the TS diagram (Salinity vs Temperature) for one or more profiles, with isopycnals.
944
966
  Args:
@@ -1234,7 +1256,7 @@ class Analyzer:
1234
1256
  print(f" MVP down: {rmse_cond_down:.4f} S/m (deep: {rmse_cond_down_deep:.4f} S/m)")
1235
1257
  print(f" MVP up: {rmse_cond_up:.4f} S/m (deep: {rmse_cond_up_deep:.4f} S/m)")
1236
1258
 
1237
- def correct_oxygen(self,id_mvp=None,id_ctd=None,num_sample=500,plotting=False,correction=False):
1259
+ def correct_oxygen(self,id_mvp=None,id_ctd=None,num_sample=500,plotting=False,):
1238
1260
  """
1239
1261
  Apply oxygen correction to MVP dissolved oxygen profiles thanks to CTD data.
1240
1262
  Args:
@@ -1295,9 +1317,9 @@ class Analyzer:
1295
1317
  rmse_after_full = np.mean(np.sqrt(np.nanmean((DO_mvp_corr_full_interp - DO_ctd_interp)**2,axis=1)))
1296
1318
  print(f"RMSE after correction (full profile): {rmse_after_full:.4f}")
1297
1319
 
1320
+ self.DO_mvp_raw = self.DO_mvp.copy()
1321
+ self.DO_mvp = DO_mvp_corr_full
1298
1322
 
1299
- if correction:
1300
- self.DO_mvp = DO_mvp_corr_full
1301
1323
 
1302
1324
  if plotting:
1303
1325
 
@@ -1314,7 +1336,6 @@ class Analyzer:
1314
1336
  plt.show()
1315
1337
 
1316
1338
 
1317
-
1318
1339
  def mvp_correction(self,high_cutoff=1,dp=0.1):
1319
1340
 
1320
1341
  T_MVP_corr = []
@@ -1368,7 +1389,7 @@ class Analyzer:
1368
1389
  self.COND_mvp_corr = {i: sublist for i, sublist in enumerate(C_MVP_corr)}
1369
1390
  self.SALT_mvp_corr = {i: sublist for i, sublist in enumerate(S_MVP_corr)}
1370
1391
  self.TIME_mvp_corr = {i: sublist for i, sublist in enumerate(Time_MVP_corr)}
1371
-
1392
+ self.corrected = True
1372
1393
 
1373
1394
  print("MVP profiles corrected.")
1374
1395
 
@@ -1380,6 +1401,11 @@ class Analyzer:
1380
1401
  """
1381
1402
  if not self.ctd:
1382
1403
  raise ValueError("CTD data not loaded.")
1404
+
1405
+ if not self.corrected:
1406
+ raise ValueError("MVP data not corrected. Apply corrections first.")
1407
+
1408
+
1383
1409
 
1384
1410
  if not hasattr(self, 'PRES_mvp_corr'):
1385
1411
  raise ValueError("Corrected MVP data not available. Apply corrections first.")
@@ -1397,8 +1423,9 @@ class Analyzer:
1397
1423
  max_lensalt = max([len(p) for p in self.SALT_mvp_corr.values()])
1398
1424
  SALT_mvp_corr_mat = np.array([list(row) + [np.nan] * (max_lensalt - len(row)) for row in self.SALT_mvp_corr.values()])
1399
1425
 
1400
- max_lenvspd = max([len(p) for p in self.SPEED_mvp_corr.values()])
1401
- SPEED_mvp_corr_mat = np.array([list(row) + [np.nan] * (max_lenvspd - len(row)) for row in self.SPEED_mvp_corr.values()])
1426
+ if self.speed:
1427
+ max_lenvspd = max([len(p) for p in self.SPEED_mvp_corr.values()])
1428
+ SPEED_mvp_corr_mat = np.array([list(row) + [np.nan] * (max_lenvspd - len(row)) for row in self.SPEED_mvp_corr.values()])
1402
1429
 
1403
1430
  max_lentime = max([len(p) for p in self.TIME_mvp_corr.values()])
1404
1431
  TIME_mvp_corr_mat = np.array([list(row) + [np.nan] * (max_lentime - len(row)) for row in self.TIME_mvp_corr.values()])
@@ -1406,211 +1433,311 @@ class Analyzer:
1406
1433
 
1407
1434
  pressure_grid = np.linspace(np.nanmin(PRES_mvp_corr_mat), np.nanmax(PRES_mvp_corr_mat), length)
1408
1435
 
1409
- self.TEMP_ctd_on_mvp = mvp.vertical_interp(self.PRES_ctd, self.TEMP_ctd, pressure_grid)
1410
- self.PRES_ctd_on_mvp = mvp.vertical_interp(self.PRES_ctd, self.PRES_ctd, pressure_grid)
1411
- self.COND_ctd_on_mvp = mvp.vertical_interp(self.PRES_ctd, self.COND_ctd, pressure_grid)
1412
- self.SALT_ctd_on_mvp = mvp.vertical_interp(self.PRES_ctd, self.SALT_ctd, pressure_grid)
1413
- self.OXY_ctd_on_mvp = mvp.vertical_interp(self.PRES_ctd, self.OXY_ctd, pressure_grid)
1436
+ self.TEMP_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.TEMP_ctd, pressure_grid)
1437
+ self.PRES_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.PRES_ctd, pressure_grid)
1438
+ self.COND_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.COND_ctd, pressure_grid)
1439
+ self.SALT_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.SALT_ctd, pressure_grid)
1440
+ self.DO_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.OXY_ctd, pressure_grid)
1441
+ self.FLUO_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.FLUO_ctd, pressure_grid)
1442
+ self.TURB_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.TURB_ctd, pressure_grid)
1414
1443
  self.TEMP_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, TEMP_mvp_corr_mat, pressure_grid)
1415
- self.PRES_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, PRES_mvp_corr_mat, pressure_grid)
1444
+ self.PRES_mvp_corr_interp = np.tile(pressure_grid, (PRES_mvp_corr_mat.shape[0], 1))
1416
1445
  self.COND_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, COND_mvp_corr_mat, pressure_grid)
1417
1446
  self.SALT_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, SALT_mvp_corr_mat, pressure_grid)
1418
- self.SPEED_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, SPEED_mvp_corr_mat, pressure_grid)
1447
+ self.DO_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.DO_mvp, pressure_grid)
1448
+ self.FLUO_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.FLUO_mvp, pressure_grid)
1449
+ self.TURB_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.TURB_mvp, pressure_grid)
1450
+ self.PH_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.PH_mvp, pressure_grid)
1451
+ self.SUNA_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.SUNA_mvp, pressure_grid)
1452
+ if self.GPS:
1453
+ self.Lat_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.Lat_mvp, pressure_grid)
1454
+ self.Lon_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.Lon_mvp, pressure_grid)
1455
+
1456
+ if self.speed:
1457
+ self.SPEED_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, SPEED_mvp_corr_mat, pressure_grid)
1419
1458
  self.TIME_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, TIME_mvp_corr_mat, pressure_grid)
1420
1459
 
1421
1460
  print('CTD data interpolated onto corrected MVP pressure levels.')
1422
1461
 
1423
1462
 
1424
- def to_netcdf(self, filepath=None, corrected=False, compression=True, engine=None, per_profile_files=False):
1463
+ def corrige_MVP_offset_on_ctd_exact(self,id_mvp,id_ctd,min_depth=-1):
1464
+ """
1465
+ This function corrects the offset between the MVP and CTD profiles by aligning the temperature, conductivity profiles. It calculates the mean difference in temperature between the two profiles and applies this correction to the CTD temperature data.
1466
+ id_mvp and id_ctd must be the same length as each MVP profile will be be corrected with the corresponding CTD profile. The function returns the corrected MVP temperature and conductivity profiles.
1467
+ This version of the correction suppose that CTD and MVP should be exactly the same profile (same location, same time). If it not the case, you shouldf use the other function _imple
1425
1468
  """
1426
- Export MVP data to a NetCDF file using xarray.
1427
1469
 
1428
- Args:
1429
- filepath (str): Output NetCDF file path.
1430
- corrected (bool): Also write corrected arrays if present (*_mvp_corr). Default False.
1431
- compression (bool): Enable compression (engine dependent). Default True.
1432
- engine (str|None): One of 'netcdf4', 'h5netcdf', 'scipy'. If None, choose netcdf4.
1433
- per_profile_files (bool): If True, write one .nc per MVP cycle (two rows: down and up).
1470
+ mean_temp_diff = []
1471
+ mean_cond_diff = []
1472
+
1473
+ print("Calculating mean differences between MVP and CTD profiles before correction:")
1474
+ for i in range(len(id_mvp)):
1475
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1476
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i]] - self.TEMP_ctd_interp[id_ctd[i]])
1477
+ mean_temp_diff.append(temp_diff)
1478
+
1479
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i]] - self.COND_ctd_interp[id_ctd[i]])
1480
+ mean_cond_diff.append(cond_diff)
1481
+ print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1482
+ print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1483
+
1484
+ for i in range(len(id_mvp)):
1485
+ self.TEMP_mvp_corr_interp[id_mvp[i]] = mvp.align_profiles(self.PRES_mvp_corr_interp[id_mvp[i]], self.TEMP_ctd_interp[id_ctd[i]], self.TEMP_mvp_corr_interp[id_mvp[i]],min_depth)[0]
1486
+ self.COND_mvp_corr_interp[id_mvp[i]] = mvp.align_profiles(self.PRES_mvp_corr_interp[id_mvp[i]], self.COND_ctd_interp[id_ctd[i]], self.COND_mvp_corr_interp[id_mvp[i]],min_depth)[0]
1487
+
1488
+
1489
+ mean_temp_diff = []
1490
+ mean_cond_diff = []
1491
+ print("After correction:")
1492
+ for i in range(len(id_mvp)):
1493
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1494
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i]] - self.TEMP_ctd_interp[id_ctd[i]])
1495
+ mean_temp_diff.append(temp_diff)
1496
+
1497
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i]] - self.COND_ctd_interp[id_ctd[i]])
1498
+ mean_cond_diff.append(cond_diff)
1499
+ print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1500
+ print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1501
+
1502
+
1503
+
1504
+ def corrige_MVP_offset_on_ctd_simple(self,id_mvp,id_ctd,min_depth):
1505
+ """
1506
+ This function corrects the offset between the MVP and CTD profiles by aligning the temperature, conductivity profiles. It calculates the mean difference in temperature between the two profiles and applies this correction to the CTD temperature data.
1507
+ id_mvp and id_ctd must be the same length as each MVP profile will be be corrected with the corresponding CTD profile. The function returns the corrected MVP temperature and conductivity profiles.
1508
+ This version of the correction is less restritive than the other one, does not need the CTD aand MVP profiles to be exactly similar
1509
+ We advice to choose a min_depth that avoid to take into acount the surface layer which can introduce errors.
1434
1510
  """
1435
- if not getattr(self, 'mvp', False):
1436
- raise RuntimeError("No MVP data loaded. Call load_mvp_data() first.")
1437
1511
 
1438
- engine = 'netcdf4' if engine is None else engine
1439
- if engine == 'scipy' and compression:
1440
- print('Warning: scipy backend does not support compression; writing without compression.')
1441
- compression = False
1512
+ mean_temp_diff = []
1513
+ mean_cond_diff = []
1514
+ print("Calculating mean differences between MVP and CTD profiles before correction:")
1515
+ for i in range(len(id_mvp)):
1516
+ id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1517
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1518
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1519
+ mean_temp_diff.append(temp_diff)
1520
+
1521
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1522
+ mean_cond_diff.append(cond_diff)
1523
+ print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1524
+ print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1525
+
1526
+ for i in range(len(id_mvp)):
1527
+ id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1528
+
1529
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1530
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1531
+ self.TEMP_mvp_corr_interp[id_mvp[i]] -= temp_diff
1532
+
1533
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1534
+ self.COND_mvp_corr_interp[id_mvp[i]] -= cond_diff
1535
+
1536
+
1537
+ mean_temp_diff = []
1538
+ mean_cond_diff = []
1539
+ print("After correction:")
1540
+ for i in range(len(id_mvp)):
1541
+ id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1542
+
1543
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1544
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1545
+ mean_temp_diff.append(temp_diff)
1546
+
1547
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1548
+ mean_cond_diff.append(cond_diff)
1549
+ print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1550
+ print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1551
+
1442
1552
 
1443
- # Dimensions
1444
- n_prof, n_samp = self.PRES_mvp.shape
1445
1553
 
1446
- # Coordinates
1447
- profile_idx = np.arange(n_prof, dtype=np.int32)
1448
- sample_idx = np.arange(n_samp, dtype=np.int32)
1449
1554
 
1450
- # Direction per profile (down/up)
1451
- direction = None
1452
- if hasattr(self, 'DIR') and len(self.DIR) == n_prof:
1453
- direction = np.array(self.DIR, dtype=object)
1454
- else:
1455
- # Fallback based on even/odd
1456
- direction = np.array(['down' if i % 2 == 0 else 'up' for i in range(n_prof)], dtype=object)
1457
-
1458
- # Per-sample time as seconds since reference origin
1459
- # TIME_mvp is in days relative to self.date_ref
1460
- time_seconds = None
1461
- if hasattr(self, 'TIME_mvp'):
1462
- time_seconds = self.TIME_mvp * 24.0 * 3600.0
1463
- else:
1464
- time_seconds = np.full((n_prof, n_samp), np.nan)
1465
-
1466
- # Per-profile datetime (one timestamp per cast pair); map using i//2
1467
- profile_time = None
1468
- if hasattr(self, 'DATETIME_mvp') and len(getattr(self, 'DATETIME_mvp', [])) > 0:
1469
- prof_times = []
1470
- for i in range(n_prof):
1471
- j = i // 2
1472
- if j < len(self.DATETIME_mvp) and self.DATETIME_mvp[j] is not None:
1473
- prof_times.append(np.datetime64(self.DATETIME_mvp[j]))
1474
- else:
1475
- prof_times.append(np.datetime64('NaT'))
1476
- profile_time = np.array(prof_times, dtype='datetime64[ns]')
1477
- else:
1478
- profile_time = np.array([np.datetime64('NaT')] * n_prof, dtype='datetime64[ns]')
1555
+ def to_netcdf(self, filepath, corrected=False, compression=True, engine=None, per_profile_files=False):
1556
+ """
1557
+ Export MVP data to a NetCDF file using xarray.
1479
1558
 
1480
- # Build dataset variables safely
1481
- data_vars = {}
1559
+ Args:
1560
+ filepath (str): Output NetCDF file path.
1561
+ corrected (bool): Also write corrected arrays if present (*_mvp_corr). Default False.
1562
+ compression (bool): Enable compression (engine dependent). Default True.
1563
+ engine (str|None): One of 'netcdf4', 'h5netcdf', 'scipy'. If None, choose netcdf4.
1564
+ per_profile_files (bool): If True, write one .nc per MVP cycle (two rows: down and up).
1565
+ """
1566
+ if not getattr(self, 'mvp', False):
1567
+ raise RuntimeError("No MVP data loaded. Call load_mvp_data() first.")
1482
1568
 
1483
- def add_var(var_name, arr, units=None, long_name=None):
1484
- if arr is None:
1485
- return
1486
- data_vars[var_name] = (
1487
- ('profile', 'sample'), arr,
1488
- {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1569
+ engine = 'netcdf4' if engine is None else engine
1570
+ if engine == 'scipy' and compression:
1571
+ print('Warning: scipy backend does not support compression; writing without compression.')
1572
+ compression = False
1573
+
1574
+ # Dimensions
1575
+ n_prof, n_samp = self.PRES_mvp.shape
1576
+
1577
+ # Coordinates
1578
+ profile_idx = np.arange(n_prof, dtype=np.int32)
1579
+ sample_idx = np.arange(n_samp, dtype=np.int32)
1580
+
1581
+ # Direction per profile (down/up)
1582
+ direction = None
1583
+ if hasattr(self, 'DIR') and len(self.DIR) == n_prof:
1584
+ direction = np.array(self.DIR, dtype=object)
1585
+ else:
1586
+ # Fallback based on even/odd
1587
+ direction = np.array(['down' if i % 2 == 0 else 'up' for i in range(n_prof)], dtype=object)
1588
+
1589
+ # Per-sample time as seconds since reference origin
1590
+ # TIME_mvp is in days relative to self.date_ref
1591
+ time_seconds = None
1592
+ if hasattr(self, 'TIME_mvp'):
1593
+ time_seconds = self.TIME_mvp * 24.0 * 3600.0
1594
+ else:
1595
+ time_seconds = np.full((n_prof, n_samp), np.nan)
1596
+
1597
+ # Per-profile datetime (one timestamp per cast pair); map using i//2
1598
+ profile_time = None
1599
+ if hasattr(self, 'DATETIME_mvp') and len(getattr(self, 'DATETIME_mvp', [])) > 0:
1600
+ prof_times = []
1601
+ for i in range(n_prof):
1602
+ j = i // 2
1603
+ if j < len(self.DATETIME_mvp) and self.DATETIME_mvp[j] is not None:
1604
+ prof_times.append(np.datetime64(self.DATETIME_mvp[j]))
1605
+ else:
1606
+ prof_times.append(np.datetime64('NaT'))
1607
+ profile_time = np.array(prof_times, dtype='datetime64[ns]')
1608
+ else:
1609
+ profile_time = np.array([np.datetime64('NaT')] * n_prof, dtype='datetime64[ns]')
1610
+
1611
+ # Build dataset variables safely
1612
+ data_vars = {}
1613
+
1614
+ def add_var(var_name, arr, units=None, long_name=None):
1615
+ if arr is None:
1616
+ return
1617
+ data_vars[var_name] = (
1618
+ ('profile', 'sample'), arr,
1619
+ {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1620
+ )
1621
+
1622
+ add_var('PRES', getattr(self, 'PRES_mvp', None), units='dbar', long_name='Sea water pressure')
1623
+ add_var('TEMP', getattr(self, 'TEMP_mvp', None), units='degC', long_name='In-situ temperature')
1624
+ add_var('COND', getattr(self, 'COND_mvp', None), units='mS/cm', long_name='Conductivity')
1625
+ add_var('SAL', getattr(self, 'SALT_mvp', None), units='psu', long_name='Practical salinity')
1626
+ add_var('SOUNDVEL', getattr(self, 'SOUNDVEL_mvp', None), units='m s-1', long_name='Sound speed')
1627
+ add_var('DO', getattr(self, 'DO_mvp', None), units='ml/L', long_name='Dissolved oxygen')
1628
+ add_var('TEMP2', getattr(self, 'TEMP2_mvp', None), units='degC', long_name='Oxygen sensor temperature')
1629
+ add_var('SUNA', getattr(self, 'SUNA_mvp', None), long_name='SUNA raw/derived')
1630
+ add_var('FLUO', getattr(self, 'FLUO_mvp', None), units='ug/L', long_name='Chl fluorescence')
1631
+ add_var('TURB', getattr(self, 'TURB_mvp', None), units='NTU', long_name='Turbidity')
1632
+ add_var('PH', getattr(self, 'PH_mvp', None), units='1', long_name='pH')
1633
+
1634
+ # Position and time arrays (2D)
1635
+ if hasattr(self, 'LAT_mvp'):
1636
+ add_var('LATITUDE', self.LAT_mvp, units='degrees_north', long_name='Latitude at sample')
1637
+ if hasattr(self, 'LON_mvp'):
1638
+ add_var('LONGITUDE', self.LON_mvp, units='degrees_east', long_name='Longitude at sample')
1639
+ # Time seconds since reference
1640
+ data_vars['TIME'] = (
1641
+ ('profile', 'sample'), time_seconds,
1642
+ {
1643
+ 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1644
+ 'long_name': 'Time at sample'
1645
+ }
1489
1646
  )
1490
-
1491
- add_var('PRES', getattr(self, 'PRES_mvp', None), units='dbar', long_name='Sea water pressure')
1492
- add_var('TEMP', getattr(self, 'TEMP_mvp', None), units='degC', long_name='In-situ temperature')
1493
- add_var('COND', getattr(self, 'COND_mvp', None), units='mS/cm', long_name='Conductivity')
1494
- add_var('SAL', getattr(self, 'SALT_mvp', None), units='psu', long_name='Practical salinity')
1495
- add_var('SOUNDVEL', getattr(self, 'SOUNDVEL_mvp', None), units='m s-1', long_name='Sound speed')
1496
- add_var('DO', getattr(self, 'DO_mvp', None), units='ml/L', long_name='Dissolved oxygen')
1497
- add_var('TEMP2', getattr(self, 'TEMP2_mvp', None), units='degC', long_name='Oxygen sensor temperature')
1498
- add_var('SUNA', getattr(self, 'SUNA_mvp', None), long_name='SUNA raw/derived')
1499
- add_var('FLUO', getattr(self, 'FLUO_mvp', None), units='ug/L', long_name='Chl fluorescence')
1500
- add_var('TURB', getattr(self, 'TURB_mvp', None), units='NTU', long_name='Turbidity')
1501
- add_var('PH', getattr(self, 'PH_mvp', None), units='1', long_name='pH')
1502
-
1503
- # Position and time arrays (2D)
1504
- if hasattr(self, 'LAT_mvp'):
1505
- add_var('LATITUDE', self.LAT_mvp, units='degrees_north', long_name='Latitude at sample')
1506
- if hasattr(self, 'LON_mvp'):
1507
- add_var('LONGITUDE', self.LON_mvp, units='degrees_east', long_name='Longitude at sample')
1508
- # Time seconds since reference
1509
- data_vars['TIME'] = (
1510
- ('profile', 'sample'), time_seconds,
1511
- {
1512
- 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1513
- 'long_name': 'Time at sample'
1514
- }
1515
- )
1516
1647
 
1517
- # Include corrected arrays if requested and present
1518
- if corrected:
1519
- def add_corr(name, attr, units=None, long_name=None):
1520
- if hasattr(self, attr):
1521
- data_vars[name] = (
1522
- ('profile', 'sample'), getattr(self, attr),
1523
- {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1648
+ # Include corrected arrays if requested and present
1649
+ if corrected:
1650
+ def add_corr(name, attr, units=None, long_name=None):
1651
+ if hasattr(self, attr):
1652
+ data_vars[name] = (
1653
+ ('profile', 'sample'), getattr(self, attr),
1654
+ {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1655
+ )
1656
+ add_corr('pressure_corrected', 'PRES_mvp_corr', units='dbar', long_name='Corrected pressure')
1657
+ add_corr('temperature_corrected', 'TEMP_mvp_corr', units='degC', long_name='Corrected temperature')
1658
+ add_corr('conductivity_corrected', 'COND_mvp_corr', units='mS/cm', long_name='Corrected conductivity')
1659
+ add_corr('salinity_corrected', 'SALT_mvp_corr', units='psu', long_name='Corrected salinity')
1660
+ if hasattr(self, 'TIME_mvp_corr'):
1661
+ data_vars['time_corrected'] = (
1662
+ ('profile', 'sample'), self.TIME_mvp_corr * 24.0 * 3600.0,
1663
+ {
1664
+ 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1665
+ 'long_name': 'Corrected time at sample'
1666
+ }
1524
1667
  )
1525
- add_corr('pressure_corrected', 'PRES_mvp_corr', units='dbar', long_name='Corrected pressure')
1526
- add_corr('temperature_corrected', 'TEMP_mvp_corr', units='degC', long_name='Corrected temperature')
1527
- add_corr('conductivity_corrected', 'COND_mvp_corr', units='mS/cm', long_name='Corrected conductivity')
1528
- add_corr('salinity_corrected', 'SALT_mvp_corr', units='psu', long_name='Corrected salinity')
1529
- if hasattr(self, 'TIME_mvp_corr'):
1530
- data_vars['time_corrected'] = (
1531
- ('profile', 'sample'), self.TIME_mvp_corr * 24.0 * 3600.0,
1532
- {
1533
- 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1534
- 'long_name': 'Corrected time at sample'
1535
- }
1668
+ if hasattr(self, 'LAT_mvp_corr'):
1669
+ add_corr('latitude_corrected', 'LAT_mvp_corr', units='degrees_north', long_name='Corrected latitude at sample')
1670
+ if hasattr(self, 'LON_mvp_corr'):
1671
+ add_corr('longitude_corrected', 'LON_mvp_corr', units='degrees_east', long_name='Corrected longitude at sample')
1672
+
1673
+ # Coordinates and auxiliary per-profile variables
1674
+ coords = {
1675
+ 'profile': ('profile', profile_idx),
1676
+ 'sample': ('sample', sample_idx)
1677
+ }
1678
+
1679
+ # Encode direction/time according to engine capabilities
1680
+ if engine in ('netcdf4', 'h5netcdf'):
1681
+ coords['direction'] = ('profile', direction.astype('U'), {'long_name': 'Profile direction'})
1682
+ coords['profile_time'] = ('profile', profile_time, {'long_name': 'Profile nominal time'})
1683
+ else:
1684
+ # scipy backend: avoid object strings and datetime; use numeric fallbacks
1685
+ dir_flag = np.where(direction.astype('U') == 'down', 0, 1).astype('int8')
1686
+ coords['direction_flag'] = (
1687
+ 'profile', dir_flag, {'long_name': 'Profile direction (0=down,1=up)'}
1688
+ )
1689
+ ref = np.datetime64(self.date_ref)
1690
+ pt = profile_time.astype('datetime64[s]')
1691
+ mask = (pt == np.datetime64('NaT'))
1692
+ secs = (pt - ref).astype('timedelta64[s]').astype('float64')
1693
+ secs[mask] = np.nan
1694
+ coords['profile_time_sec'] = (
1695
+ 'profile', secs,
1696
+ {'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1697
+ 'long_name': 'Profile nominal time'}
1536
1698
  )
1537
- if hasattr(self, 'LAT_mvp_corr'):
1538
- add_corr('latitude_corrected', 'LAT_mvp_corr', units='degrees_north', long_name='Corrected latitude at sample')
1539
- if hasattr(self, 'LON_mvp_corr'):
1540
- add_corr('longitude_corrected', 'LON_mvp_corr', units='degrees_east', long_name='Corrected longitude at sample')
1541
-
1542
- # Coordinates and auxiliary per-profile variables
1543
- coords = {
1544
- 'profile': ('profile', profile_idx),
1545
- 'sample': ('sample', sample_idx)
1546
- }
1547
-
1548
- # Encode direction/time according to engine capabilities
1549
- if engine in ('netcdf4', 'h5netcdf'):
1550
- coords['direction'] = ('profile', direction.astype('U'), {'long_name': 'Profile direction'})
1551
- coords['profile_time'] = ('profile', profile_time, {'long_name': 'Profile nominal time'})
1552
- else:
1553
- # scipy backend: avoid object strings and datetime; use numeric fallbacks
1554
- dir_flag = np.where(direction.astype('U') == 'down', 0, 1).astype('int8')
1555
- coords['direction_flag'] = (
1556
- 'profile', dir_flag, {'long_name': 'Profile direction (0=down,1=up)'}
1557
- )
1558
- ref = np.datetime64(self.date_ref)
1559
- pt = profile_time.astype('datetime64[s]')
1560
- mask = (pt == np.datetime64('NaT'))
1561
- secs = (pt - ref).astype('timedelta64[s]').astype('float64')
1562
- secs[mask] = np.nan
1563
- coords['profile_time_sec'] = (
1564
- 'profile', secs,
1565
- {'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1566
- 'long_name': 'Profile nominal time'}
1567
- )
1568
1699
 
1569
- # Optional per-profile lat/lon (first valid sample)
1570
- def first_valid(vec):
1571
- # vec shape (n_prof, n_samp)
1572
- out = np.full((vec.shape[0],), np.nan)
1573
- for i in range(vec.shape[0]):
1574
- row = vec[i]
1575
- j = np.where(~np.isnan(row))[0]
1576
- if j.size:
1577
- out[i] = row[j[0]]
1578
- return out
1579
-
1580
- if hasattr(self, 'LAT_mvp'):
1581
- coords['profile_lat'] = (
1582
- 'profile', first_valid(self.LAT_mvp), {'units': 'degrees_north', 'long_name': 'Profile latitude'}
1583
- )
1584
- if hasattr(self, 'LON_mvp'):
1585
- coords['profile_lon'] = (
1586
- 'profile', first_valid(self.LON_mvp), {'units': 'degrees_east', 'long_name': 'Profile longitude'}
1587
- )
1700
+ # Optional per-profile lat/lon (first valid sample)
1701
+ def first_valid(vec):
1702
+ # vec shape (n_prof, n_samp)
1703
+ out = np.full((vec.shape[0],), np.nan)
1704
+ for i in range(vec.shape[0]):
1705
+ row = vec[i]
1706
+ j = np.where(~np.isnan(row))[0]
1707
+ if j.size:
1708
+ out[i] = row[j[0]]
1709
+ return out
1710
+
1711
+ if hasattr(self, 'LAT_mvp'):
1712
+ coords['profile_lat'] = (
1713
+ 'profile', first_valid(self.LAT_mvp), {'units': 'degrees_north', 'long_name': 'Profile latitude'}
1714
+ )
1715
+ if hasattr(self, 'LON_mvp'):
1716
+ coords['profile_lon'] = (
1717
+ 'profile', first_valid(self.LON_mvp), {'units': 'degrees_east', 'long_name': 'Profile longitude'}
1718
+ )
1719
+
1720
+ # Global attributes
1721
+ attrs = {
1722
+ 'title': 'MVP profile data',
1723
+ 'Conventions': 'CF-1.8',
1724
+ 'institution': 'LMD/CNRS',
1725
+ 'source': 'PyMVP',
1726
+ 'history': f"Created on {datetime.now().isoformat()}",
1727
+ 'mvp_Yorig': int(self.Yorig)
1728
+ }
1729
+
1730
+ ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs)
1731
+
1732
+ # Compression encoding per engine
1733
+ encoding = None
1734
+ if compression:
1735
+ if engine == 'netcdf4':
1736
+ encoding = {name: {'zlib': True, 'complevel': 4} for name in data_vars.keys()}
1737
+ elif engine == 'h5netcdf':
1738
+ encoding = {name: {'compression': 'gzip', 'compression_opts': 4} for name in data_vars.keys()}
1739
+
1588
1740
 
1589
- # Global attributes
1590
- attrs = {
1591
- 'title': 'MVP profile data',
1592
- 'Conventions': 'CF-1.8',
1593
- 'institution': 'LMD/CNRS',
1594
- 'source': 'MVPAnalyzer',
1595
- 'history': f"Created on {datetime.now().isoformat()}",
1596
- 'mvp_Yorig': int(self.Yorig)
1597
- }
1598
-
1599
- ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs)
1600
-
1601
- # Compression encoding per engine
1602
- encoding = None
1603
- if compression:
1604
- if engine == 'netcdf4':
1605
- encoding = {name: {'zlib': True, 'complevel': 4} for name in data_vars.keys()}
1606
- elif engine == 'h5netcdf':
1607
- encoding = {name: {'compression': 'gzip', 'compression_opts': 4} for name in data_vars.keys()}
1608
-
1609
- # Determine output base directory
1610
- if filepath is None:
1611
- base_dir = self.output_path if hasattr(self, 'output_path') else os.getcwd() + os.sep
1612
- else:
1613
- # If a full file path was provided and not per_profile_files, honor it
1614
1741
  if (not per_profile_files) and filepath.lower().endswith('.nc'):
1615
1742
  out_path = filepath
1616
1743
  ds.to_netcdf(out_path, encoding=encoding, engine=engine)
@@ -1618,41 +1745,173 @@ class Analyzer:
1618
1745
  return
1619
1746
  base_dir = filepath
1620
1747
 
1621
- if not base_dir.endswith(os.sep):
1622
- base_dir = base_dir + os.sep
1623
-
1624
- base_name = "MVP_" + os.path.basename(self.data_path).rstrip(os.sep)
1625
- if per_profile_files:
1626
- # Write one file per pair (down/up)
1627
- total_pairs = (n_prof + 1) // 2
1628
- for i in range(total_pairs):
1629
- idxs = [k for k in (2*i, 2*i+1) if k < n_prof]
1630
- if not idxs:
1631
- continue
1632
- ds_i = ds.isel(profile=idxs)
1633
-
1634
- #add i to filename
1635
- fname = f"{base_name}_profile_{i:03d}.nc"
1636
- out_path = os.path.join(base_dir, fname)
1637
- ds_i.to_netcdf(out_path, encoding=encoding, engine=engine)
1638
- print(f"NetCDF written per profile into: {base_dir} using engine={engine}")
1639
- else:
1640
- file_name = f"{base_name}.nc"
1641
- out_path = os.path.join(base_dir, file_name)
1642
- ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1643
- print(f"NetCDF written: {out_path} using engine={engine}")
1748
+ if not base_dir.endswith(os.sep):
1749
+ base_dir = base_dir + os.sep
1750
+
1751
+ base_name = "MVP_" + os.path.basename(self.data_path).rstrip(os.sep)
1752
+ if per_profile_files:
1753
+ # Write one file per pair (down/up)
1754
+ total_pairs = (n_prof + 1) // 2
1755
+ for i in range(total_pairs):
1756
+ idxs = [k for k in (2*i, 2*i+1) if k < n_prof]
1757
+ if not idxs:
1758
+ continue
1759
+ ds_i = ds.isel(profile=idxs)
1760
+
1761
+ #add i to filename
1762
+ fname = f"{base_name}_profile_{i:03d}.nc"
1763
+ out_path = os.path.join(base_dir, fname)
1764
+ ds_i.to_netcdf(out_path, encoding=encoding, engine=engine)
1765
+ print(f"NetCDF written per profile into: {base_dir} using engine={engine}")
1766
+ else:
1767
+ file_name = f"{base_name}.nc"
1768
+ out_path = os.path.join(base_dir, file_name)
1769
+ ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1770
+ print(f"NetCDF written: {out_path} using engine={engine}")
1771
+
1772
+
1773
+ def help(self):
1774
+ """
1775
+ Print all methods of the class with their docstring (header).
1776
+ """
1777
+ for attr in dir(self):
1778
+ if callable(getattr(self, attr)) and not attr.startswith("__"):
1779
+ method = getattr(self, attr)
1780
+ doc = method.__doc__
1781
+ print(f"{attr}:\n{doc}\n{'-'*40}")
1644
1782
 
1645
1783
 
1646
- def help(self):
1784
+ def plot_MVP_transect(self,var='TEMP',l_id=None,depth_max=None,depth_min=None,vmax=None,vmin=None,cmap=None):
1647
1785
  """
1648
- Print all methods of the class with their docstring (header).
1786
+ Plot a section of 2D inteprolated MVP data
1787
+ Args:
1788
+ var (str): Variable to plot. Choose from 'TEMP', 'COND', 'SAL', 'DO', 'FLUO', 'TURB', 'PH', 'SUNA', 'SPEED'.
1789
+ l_id (list of int): List of profile indices to include in the transect. If None, use all profiles.
1790
+ depth_max (float): Maximum depth to display in the plot. If None, use max depth in data.
1791
+ depth_min (float): Minimum depth to display in the plot. If None, use 0.
1792
+ vmax (float): Maximum value for color scale. If None, use max value in data.
1793
+ vmin (float): Minimum value for color scale. If None, use min value in data.
1794
+ cmap: Matplotlib colormap to use. If None, use default colormap.
1795
+
1796
+
1649
1797
  """
1650
- for attr in dir(self):
1651
- if callable(getattr(self, attr)) and not attr.startswith("__"):
1652
- method = getattr(self, attr)
1653
- doc = method.__doc__
1654
- print(f"{attr}:\n{doc}\n{'-'*40}")
1655
1798
 
1799
+ if self.hasattr('PRES_mvp_corr_interp') == False:
1800
+ raise ValueError("Corrected and interpolated MVP data not available. Apply corrections and interpolation first.")
1801
+
1802
+ if l_id is None:
1803
+ l_id = list(range(self.PRES_mvp_corr_interp.shape[0]))
1804
+
1805
+ match var:
1806
+ case 'TEMP':
1807
+ var = self.TEMP_mvp_corr_interp
1808
+ case 'COND':
1809
+ var = self.COND_mvp_corr_interp
1810
+ case 'SAL':
1811
+ var = self.SALT_mvp_corr_interp
1812
+ case 'DO':
1813
+ var = self.DO_mvp_corr_interp
1814
+ case 'FLUO':
1815
+ var = self.FLUO_mvp_corr_interp
1816
+ case 'TURB':
1817
+ var = self.TURB_mvp_corr_interp
1818
+ case 'PH':
1819
+ var = self.PH_mvp_corr_interp
1820
+ case 'SUNA':
1821
+ var = self.SUNA_mvp_corr_interp
1822
+ case 'SPEED':
1823
+ var = self.SPEED_mvp_corr_interp
1824
+ case _:
1825
+ raise ValueError(f"Variable {var} not recognized. Choose from 'TEMP', 'COND', 'SAL', 'DO', 'FLUO', 'TURB', 'PH', 'SUNA', 'SPEED'.")
1826
+
1827
+
1828
+
1829
+ P = self.PRES_mvp_corr_interp[l_id]
1830
+ lat = self.Lat_mvp_corr_interp[l_id]
1831
+ lon = self.Lon_mvp_corr_interp[l_id]
1832
+ T = var[l_id]
1833
+
1834
+ if depth_max is None:
1835
+ depth_max = np.nanmax(P)
1836
+ if depth_min is None:
1837
+ depth_min = 0
1838
+
1839
+
1840
+ n_profiles, n_points = T.shape
1841
+
1842
+ # compute distance
1843
+ dist_cum = 0
1844
+ dist_all = np.zeros_like(T)
1845
+
1846
+ for i in range(n_profiles):
1847
+ dist_i = np.zeros(n_points)
1848
+ last_valid = None
1849
+ for j in range(n_points):
1850
+ if np.isnan(lat[i,j]) or np.isnan(lon[i,j]):
1851
+ dist_i[j] = np.nan
1852
+ else:
1853
+ if last_valid is None:
1854
+ dist_i[j] = 0
1855
+ else:
1856
+ dist_i[j] = dist_i[last_valid] + geodesic(
1857
+ (lat[i,last_valid], lon[i,last_valid]),
1858
+ (lat[i,j], lon[i,j])
1859
+ ).km
1860
+ last_valid = j
1861
+
1862
+ dist_i += dist_cum
1863
+ if np.any(~np.isnan(dist_i)):
1864
+ dist_cum = np.nanmax(dist_i)
1865
+ dist_all[i,:] = dist_i
1866
+
1867
+ # interpolation
1868
+ dist_flat = dist_all.flatten()
1869
+ T_flat = T.flatten()
1870
+ P_flat = P.flatten()
1871
+
1872
+ # del nan
1873
+ mask = ~np.isnan(dist_flat) & ~np.isnan(T_flat)
1874
+ dist_flat = dist_flat[mask]
1875
+ P_flat = P_flat[mask]
1876
+ T_flat = T_flat[mask]
1877
+
1878
+ # create regular grid
1879
+ dist_grid = np.linspace(dist_flat.min(), dist_flat.max(), 500)
1880
+ P_grid = np.linspace(depth_min, depth_max, 1200)
1881
+ DIST, PRES = np.meshgrid(dist_grid, P_grid)
1882
+
1883
+ # 2D interp
1884
+ T_grid = griddata(
1885
+ (dist_flat, P_flat),
1886
+ T_flat,
1887
+ (DIST, PRES),
1888
+ method='linear'
1889
+ )
1890
+
1891
+ # -----------------------------
1892
+ # 5. Lissage pour adoucir les frontières
1893
+ # -----------------------------
1894
+ T_grid_smooth = gaussian_filter(T_grid, sigma=2)
1895
+
1896
+
1897
+ if vmax is None:
1898
+ vmax = np.nanmax(T_grid_smooth)
1899
+ if vmin is None:
1900
+ vmin = np.nanmin(T_grid_smooth)
1901
+ # -----------------------------
1902
+ # 6. Plot
1903
+ # -----------------------------
1904
+ if cmap is None:
1905
+ cmap = plt.get_cmap('viridis')
1906
+ fig, ax = plt.subplots(figsize=(12,6))
1907
+ pcm = ax.pcolormesh(DIST, PRES, T_grid_smooth, shading='auto', cmap=cmap, vmin=vmin, vmax=vmax)
1908
+ ax.invert_yaxis()
1909
+ ax.set_xlabel("Distance le long du transect [km]")
1910
+ ax.set_ylabel("Profondeur [m]")
1911
+ ax.set_title("Transect de température (interpolé)")
1912
+ cbar = plt.colorbar(pcm, ax=ax)
1913
+ cbar.set_label("Température [°C]")
1914
+ plt.show()
1656
1915
 
1657
1916
 
1658
1917
  def split_ctd(pres, array):
@@ -36,12 +36,11 @@ import scipy.stats as st
36
36
  from datetime import date
37
37
  from datetime import datetime
38
38
  from scipy import interpolate
39
- from scipy.signal import butter, freqz
40
39
  from scipy import signal
41
40
  import gsw
42
- from scipy.interpolate import pchip_interpolate
41
+ from scipy.interpolate import interp1d
43
42
  from netCDF4 import Dataset
44
- from scipy.signal import butter, filtfilt, correlate, correlation_lags
43
+ from scipy.signal import butter, filtfilt, correlate, correlation_lags,savgol_filter
45
44
 
46
45
  #
47
46
  ################################################################################
@@ -993,3 +992,85 @@ def bin_average_v2(P,T,C,S,time,dp=0.05):
993
992
  np.array(C_bin),
994
993
  np.array(S_bin),
995
994
  np.array(time_bin))
995
+
996
+
997
+
998
+
999
+
1000
+
1001
+
1002
+ def align_profiles(P, T_ref, T_to_align_raw, min_depth=0,max_shift=20):
1003
+ """
1004
+ Pipeline complet :
1005
+ - estime ΔP
1006
+ - recale
1007
+ - estime ΔT
1008
+ - corrige
1009
+ """
1010
+
1011
+ ### 1. calcul delta de pression
1012
+
1013
+ # Masque pour exclure les valeurs non finies
1014
+ mask_nan = (
1015
+ np.isfinite(P) &
1016
+ np.isfinite(T_ref) &
1017
+ np.isfinite(T_to_align_raw)
1018
+ )
1019
+
1020
+ P = P[mask_nan]
1021
+ T_ref = T_ref[mask_nan]
1022
+ T_to_align = T_to_align_raw[mask_nan]
1023
+
1024
+ # Masque pour exclure la surface
1025
+ mask = P >= min_depth
1026
+
1027
+ P = P[mask]
1028
+ T_ref = T_ref[mask]
1029
+ T_to_align = T_to_align[mask]
1030
+
1031
+ # Lissage léger
1032
+ T1s = savgol_filter(T_ref, 11, 2)
1033
+ T2s = savgol_filter(T_to_align, 11, 2)
1034
+
1035
+ # Gradients
1036
+ dT1 = np.gradient(T1s, P)
1037
+ dT2 = np.gradient(T2s, P)
1038
+
1039
+ # Normalisation (important pour corrélation)
1040
+ dT1 = (dT1 - np.mean(dT1)) / np.std(dT1)
1041
+ dT2 = (dT2 - np.mean(dT2)) / np.std(dT2)
1042
+
1043
+ # Corrélation
1044
+ corr = correlate(dT2, dT1, mode='full')
1045
+ lags = np.arange(-len(dT1)+1, len(dT1))
1046
+
1047
+ # Convertir en décalage en pression
1048
+ dP = np.mean(np.diff(P))
1049
+ shifts = lags * dP
1050
+
1051
+ # Limiter les shifts plausibles
1052
+ valid = np.abs(shifts) <= max_shift
1053
+
1054
+ deltaP = shifts[valid][np.argmax(corr[valid])]
1055
+
1056
+ ### 2. recalage pression
1057
+ f = interp1d(P + deltaP, T_to_align, bounds_error=False, fill_value=np.nan)
1058
+ T_shifted = f(P)
1059
+
1060
+ ### 3. calcul delta de température
1061
+ mask = (P >= min_depth) & np.isfinite(T_ref) & np.isfinite(T_shifted)
1062
+ deltaT = np.median(T_shifted[mask] - T_ref[mask])
1063
+
1064
+ ### 4. recalage thermique
1065
+ T_corrected = T_shifted - deltaT
1066
+
1067
+ mask_corrected = np.isfinite(T_corrected)
1068
+
1069
+ # copie pour ne pas modifier l'original directement
1070
+ T_out = T_to_align_raw.copy()
1071
+
1072
+ # injection uniquement là où c’est valide
1073
+ T_out_indices = np.where(mask_nan)[0]
1074
+ T_out[T_out_indices[mask_corrected]] = T_corrected[mask_corrected]
1075
+
1076
+ return T_out, deltaP, deltaT
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyMVP
3
- Version: 0.1.9
3
+ Version: 0.2.3
4
4
  Summary: Python package for Moving Vessel Profiler correction and analysis
5
5
  Author: MaximilienWemaere
6
6
  Requires-Python: >=3.10
@@ -13,3 +13,5 @@ Requires-Dist: cartopy>=0.25.0
13
13
  Requires-Dist: scipy>=1.16.2
14
14
  Requires-Dist: xarray>=2025.9.1
15
15
  Requires-Dist: netcdf4>=1.7.2
16
+ Requires-Dist: pandas>=2.1.0
17
+ Requires-Dist: geopy>=2.4.0
@@ -6,3 +6,5 @@ cartopy>=0.25.0
6
6
  scipy>=1.16.2
7
7
  xarray>=2025.9.1
8
8
  netcdf4>=1.7.2
9
+ pandas>=2.1.0
10
+ geopy>=2.4.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "PyMVP"
3
- version = "0.1.9"
3
+ version = "0.2.3"
4
4
  description = "Python package for Moving Vessel Profiler correction and analysis"
5
5
  authors = [{name="MaximilienWemaere"}]
6
6
  readme = "README.md"
@@ -14,6 +14,9 @@ dependencies = [
14
14
  "scipy>=1.16.2",
15
15
  "xarray>=2025.9.1",
16
16
  "netcdf4>=1.7.2",
17
+ "pandas>=2.1.0",
18
+ "geopy>=2.4.0"
17
19
  ]
18
20
 
19
-
21
+ [tool.setuptools.packages.find]
22
+ include = ["PyMVP*"]
File without changes
File without changes
File without changes