PyMVP 0.2.1__tar.gz → 0.2.4__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyMVP
3
- Version: 0.2.1
3
+ Version: 0.2.4
4
4
  Summary: Python package for Moving Vessel Profiler correction and analysis
5
5
  Author: MaximilienWemaere
6
6
  Requires-Python: >=3.10
@@ -13,3 +13,5 @@ Requires-Dist: cartopy>=0.25.0
13
13
  Requires-Dist: scipy>=1.16.2
14
14
  Requires-Dist: xarray>=2025.9.1
15
15
  Requires-Dist: netcdf4>=1.7.2
16
+ Requires-Dist: pandas>=2.1.0
17
+ Requires-Dist: geopy>=2.4.0
@@ -33,6 +33,10 @@ import cartopy.feature as cfeature
33
33
  import xarray as xr
34
34
  from . import mvp_routines as mvp
35
35
  from scipy.ndimage import median_filter
36
+ from scipy.interpolate import griddata
37
+ from scipy.ndimage import gaussian_filter
38
+ from geopy.distance import geodesic
39
+ import pandas as pd
36
40
 
37
41
 
38
42
  class Analyzer:
@@ -50,6 +54,10 @@ class Analyzer:
50
54
  self.ctd = False
51
55
  self.speed = False
52
56
  self.corrected = False
57
+ self.GPS = False
58
+
59
+ def ___version___(self):
60
+ return "0.2.3"
53
61
 
54
62
 
55
63
  def load_mvp_data(self,data_path, delp=[], subdirs=False,format='raw',only_new=False, output_path=None):
@@ -106,8 +114,8 @@ class Analyzer:
106
114
  self.PH_mvp = nc['PH'].values
107
115
  self.SALT_mvp = nc['SAL'].values
108
116
  self.TIME_mvp = nc['TIME_s'].values
109
- self.LAT_mvp = nc['LATITUDE'].values
110
- self.LON_mvp = nc['LONGITUDE'].values
117
+ self.Lat_mvp = nc['LATITUDE'].values
118
+ self.Lon_mvp = nc['LONGITUDE'].values
111
119
  self.DATETIME_mvp = nc['profile_time'].values
112
120
  self.DIR = nc['direction'].values
113
121
  self.label_mvp = nc['profile'].values
@@ -285,8 +293,8 @@ class Analyzer:
285
293
  self.PH_mvp = PH_mvp
286
294
  self.SALT_mvp = SALT_mvp
287
295
  self.TIME_mvp = TIME_mvp
288
- self.LAT_mvp = LAT_mvp
289
- self.LON_mvp = LON_mvp
296
+ self.Lat_mvp = LAT_mvp
297
+ self.Lon_mvp = LON_mvp
290
298
  self.DATETIME_mvp = DATETIME_mvp
291
299
  self.DIR = DIR
292
300
  self.label_mvp = Label_mvp
@@ -333,8 +341,8 @@ class Analyzer:
333
341
  self.PH_mvp = nc['PH'].values
334
342
  self.SALT_mvp = nc['SAL'].values
335
343
  self.TIME_mvp = nc['TIME'].values
336
- self.LAT_mvp = nc['LATITUDE'].values
337
- self.LON_mvp = nc['LONGITUDE'].values
344
+ self.Lat_mvp = nc['LATITUDE'].values
345
+ self.Lon_mvp = nc['LONGITUDE'].values
338
346
  self.DATETIME_mvp = nc['profile_time'].values
339
347
  self.DIR = nc['direction'].values
340
348
  self.Label_mvp = nc['profile'].values
@@ -473,8 +481,8 @@ class Analyzer:
473
481
  self.PH_mvp = np.hstack((self.PH_mvp, nan_cols))
474
482
  self.SALT_mvp = np.hstack((self.SALT_mvp, nan_cols))
475
483
  self.TIME_mvp = np.hstack((self.TIME_mvp, nan_cols))
476
- self.LAT_mvp = np.hstack((self.LAT_mvp, nan_cols))
477
- self.LON_mvp = np.hstack((self.LON_mvp, nan_cols))
484
+ self.Lat_mvp = np.hstack((self.Lat_mvp, nan_cols))
485
+ self.Lon_mvp = np.hstack((self.Lon_mvp, nan_cols))
478
486
 
479
487
 
480
488
 
@@ -539,8 +547,8 @@ class Analyzer:
539
547
  self.PH_mvp = np.concatenate((self.PH_mvp, PH_mvp), axis=0)
540
548
  self.SALT_mvp = np.concatenate((self.SALT_mvp, SALT_mvp), axis=0)
541
549
  self.TIME_mvp = np.concatenate((self.TIME_mvp, TIME_mvp), axis=0)
542
- self.LAT_mvp = np.concatenate((self.LAT_mvp, LAT_mvp), axis=0)
543
- self.LON_mvp = np.concatenate((self.LON_mvp, LON_mvp), axis=0)
550
+ self.Lat_mvp = np.concatenate((self.Lat_mvp, LAT_mvp), axis=0)
551
+ self.Lon_mvp = np.concatenate((self.Lon_mvp, LON_mvp), axis=0)
544
552
 
545
553
  self.DATETIME_mvp.extend(DATETIME_mvp)
546
554
  self.DIR.extend(DIR)
@@ -577,7 +585,7 @@ class Analyzer:
577
585
  TEMP_ctd_temp = []
578
586
  COND_ctd_temp = []
579
587
  TURB_ctd_temp = []
580
- OXY_ctd_temp = []
588
+ DO_ctd_temp = []
581
589
  FLUO_ctd_temp = []
582
590
  CDOM_ctd_temp = []
583
591
  DATETIME_ctd = []
@@ -595,8 +603,8 @@ class Analyzer:
595
603
  SALT_ctd_temp.append(nc['SAL'].values[1])
596
604
  TURB_ctd_temp.append(nc['TURB'].values[0])
597
605
  TURB_ctd_temp.append(nc['TURB'].values[1])
598
- OXY_ctd_temp.append(nc['OXY'].values[0])
599
- OXY_ctd_temp.append(nc['OXY'].values[1])
606
+ DO_ctd_temp.append(nc['OXY'].values[0])
607
+ DO_ctd_temp.append(nc['OXY'].values[1])
600
608
  FLUO_ctd_temp.append(nc['FLUO'].values[0])
601
609
  FLUO_ctd_temp.append(nc['FLUO'].values[1])
602
610
  CDOM_ctd_temp.append(nc['CDOM'].values[0])
@@ -614,18 +622,41 @@ class Analyzer:
614
622
  self.COND_ctd = np.array(COND_ctd_temp)
615
623
  self.SALT_ctd = np.array(SALT_ctd_temp)
616
624
  self.TURB_ctd = np.array(TURB_ctd_temp)
617
- self.OXY_ctd = np.array(OXY_ctd_temp)
625
+ self.DO_ctd = np.array(DO_ctd_temp)
618
626
  self.FLUO_ctd = np.array(FLUO_ctd_temp)
619
627
  self.CDOM_ctd = np.array(CDOM_ctd_temp)
620
628
  self.LAT_ctd = np.array(LAT_ctd_temp)
621
629
  self.LON_ctd = np.array(LON_ctd_temp)
622
630
  self.DATETIME_ctd = np.array(DATETIME_ctd)
631
+ self.TIME_ctd = np.array([(np.datetime64(dt) - np.datetime64(self.date_ref)) / np.timedelta64(1, 'D') for dt in self.DATETIME_ctd])
623
632
 
624
633
 
625
634
  print('CTD data loaded successfully.')
626
635
  self.ctd = True
627
636
 
628
-
637
+ def load_GPS(self, gps_path):
638
+ """
639
+ Load GPS data from a .csv file in the gps_path.
640
+ Fills the object attributes with GPS data and associated metadata.
641
+ Args:
642
+ gps_path (str): Path to the .csv file containing GPS data.
643
+ """
644
+ self.gps_path = gps_path
645
+ gps_data = pd.read_csv(gps_path)
646
+ self.GPS_TIME = gps_data['time'].values
647
+ self.GPS_LAT = gps_data['latitude'].values
648
+ self.GPS_LON = gps_data['longitude'].values
649
+ print('GPS data loaded successfully.')
650
+ self.gps = True
651
+
652
+ self.Lon_mvp = np.zeros(( self.PRES_mvp.shape[0], self.PRES_mvp.shape[1]))
653
+ self.Lat_mvp = np.zeros(( self.PRES_mvp.shape[0], self.PRES_mvp.shape[1]))
654
+
655
+ for i in range(self.PRES_mvp.shape[0]):
656
+ self.Lon_mvp[i,:] = np.interp(self.TIME_mvp[i,:], self.GPS_TIME, self.GPS_LON.astype(float))
657
+ self.Lat_mvp[i,:] = np.interp(self.TIME_mvp[i,:], self.GPS_TIME, self.GPS_LAT.astype(float))
658
+
659
+ self.GPS = True
629
660
 
630
661
  def compute_waterflow(self,horizontal_speed=2,corr=False):
631
662
  """
@@ -657,7 +688,7 @@ class Analyzer:
657
688
  print('MVP data:')
658
689
  print('Number of profiles: ' + str(len(self.DATETIME_mvp)))
659
690
  for i in range(0,len(self.DATETIME_mvp)):
660
- print(f" Profil down {2*i} - Profil up {2*i+1} - Latitude: {self.LAT_mvp[2*i,0]:.5f}, Longitude: {self.LON_mvp[2*i,0]:.5f}, Date/Heure: {self.DATETIME_mvp[i]}")
691
+ print(f" Profil down {2*i} - Profil up {2*i+1} - Latitude: {self.Lat_mvp[2*i,0]:.5f}, Longitude: {self.Lon_mvp[2*i,0]:.5f}, Date/Heure: {self.DATETIME_mvp[i]}")
661
692
 
662
693
  if self.ctd:
663
694
  print('CTD data:')
@@ -701,8 +732,8 @@ class Analyzer:
701
732
  self.PH_mvp = self.PH_mvp[l_id,:]
702
733
  self.SALT_mvp = self.SALT_mvp[l_id,:]
703
734
  self.TIME_mvp = self.TIME_mvp[l_id,:]
704
- self.LAT_mvp = self.LAT_mvp[l_id,:]
705
- self.LON_mvp = self.LON_mvp[l_id,:]
735
+ self.Lat_mvp = self.Lat_mvp[l_id,:]
736
+ self.Lon_mvp = self.Lon_mvp[l_id,:]
706
737
  self.DATETIME_mvp = np.array(self.DATETIME_mvp)[l_id2]
707
738
  self.DIR = np.array(self.DIR)[l_id]
708
739
  self.label_mvp = np.array(self.label_mvp)[l_id]
@@ -721,7 +752,7 @@ class Analyzer:
721
752
  self.SALT_ctd = self.SALT_ctd[l_id,:]
722
753
  self.COND_ctd = self.COND_ctd[l_id,:]
723
754
  self.TURB_ctd = self.TURB_ctd[l_id,:]
724
- self.OXY_ctd = self.OXY_ctd[l_id,:]
755
+ self.DO_ctd = self.DO_ctd[l_id,:]
725
756
  self.FLUO_ctd = self.FLUO_ctd[l_id,:]
726
757
  self.CDOM_ctd = self.CDOM_ctd[l_id,:]
727
758
  self.LAT_ctd = self.LAT_ctd[l_id,:]
@@ -804,7 +835,7 @@ class Analyzer:
804
835
 
805
836
  put_label = True
806
837
  c = 0
807
- for i in range(0,self.LAT_mvp.shape[0],2):
838
+ for i in range(0,self.Lat_mvp.shape[0],2):
808
839
  if i>0:
809
840
  if self.label_mvp[i] == self.label_mvp[i-1]:
810
841
  put_label = False
@@ -812,8 +843,8 @@ class Analyzer:
812
843
  put_label = True
813
844
  c+=1
814
845
 
815
- lat = self.LAT_mvp[i,0] if self.LAT_mvp.ndim == 2 else self.LAT_mvp[i]
816
- lon = self.LON_mvp[i,0] if self.LON_mvp.ndim == 2 else self.LON_mvp[i]
846
+ lat = self.Lat_mvp[i,0] if self.Lat_mvp.ndim == 2 else self.Lat_mvp[i]
847
+ lon = self.Lon_mvp[i,0] if self.Lon_mvp.ndim == 2 else self.Lon_mvp[i]
817
848
  ax.scatter(lon, lat, color=colors[c], marker='o', label='MVP '+self.label_mvp[i] if put_label else "", transform=ccrs.PlateCarree())
818
849
 
819
850
  # CTD
@@ -895,8 +926,8 @@ class Analyzer:
895
926
  plt.plot(self.DO_mvp[id_mvp],self.PRES_mvp[id_mvp],label='MVP down')
896
927
  plt.plot(self.DO_mvp[id_mvp+1],self.PRES_mvp[id_mvp+1],label='MVP up')
897
928
  if self.ctd:
898
- plt.plot(self.OXY_ctd[id_ctd],self.PRES_ctd[id_ctd],label='CTD down')
899
- plt.plot(self.OXY_ctd[id_ctd+1],self.PRES_ctd[id_ctd+1],label='CTD up')
929
+ plt.plot(self.DO_ctd[id_ctd],self.PRES_ctd[id_ctd],label='CTD down')
930
+ plt.plot(self.DO_ctd[id_ctd+1],self.PRES_ctd[id_ctd+1],label='CTD up')
900
931
  plt.legend()
901
932
  plt.gca().invert_yaxis()
902
933
  plt.grid()
@@ -1043,7 +1074,7 @@ class Analyzer:
1043
1074
 
1044
1075
  TEMP_ctd_interp = mvp.vertical_interp(self.PRES_ctd[id_ctd1,:],self.TEMP_ctd[id_ctd1,:], pressure_grid)
1045
1076
  SALT_ctd_interp = mvp.vertical_interp(self.PRES_ctd[id_ctd1,:],self.SALT_ctd[id_ctd1,:], pressure_grid)
1046
- DO_ctd_interp = mvp.vertical_interp(self.PRES_ctd[id_ctd1,:],self.OXY_ctd[id_ctd1,:], pressure_grid)
1077
+ DO_ctd_interp = mvp.vertical_interp(self.PRES_ctd[id_ctd1,:],self.DO_ctd[id_ctd1,:], pressure_grid)
1047
1078
  COND_ctd_interp = mvp.vertical_interp(self.PRES_ctd[id_ctd1,:],self.COND_ctd[id_ctd1,:], pressure_grid)
1048
1079
 
1049
1080
  # differences study between MVP down and CTD profiles
@@ -1226,7 +1257,7 @@ class Analyzer:
1226
1257
  print(f" MVP down: {rmse_cond_down:.4f} S/m (deep: {rmse_cond_down_deep:.4f} S/m)")
1227
1258
  print(f" MVP up: {rmse_cond_up:.4f} S/m (deep: {rmse_cond_up_deep:.4f} S/m)")
1228
1259
 
1229
- def correct_oxygen(self,id_mvp=None,id_ctd=None,num_sample=500,plotting=False,):
1260
+ def correct_oxygen(self,id_mvp=None,id_ctd=None,plotting=False,):
1230
1261
  """
1231
1262
  Apply oxygen correction to MVP dissolved oxygen profiles thanks to CTD data.
1232
1263
  Args:
@@ -1248,63 +1279,65 @@ class Analyzer:
1248
1279
  id_ctd = id_mvp
1249
1280
 
1250
1281
 
1251
- # Interpolate MVP and CTD data to match pressure levels
1252
- pmin = np.nanmin(self.PRES_mvp)
1253
- pmax = np.nanmax(self.PRES_mvp)
1254
- pressure_grid = np.linspace(pmin, pmax, num_sample)
1255
-
1282
+ if hasattr(self,'DO_mvp_corr_interp') == False:
1283
+ raise ValueError("Please run the interpolation method first to create the DO_mvp_corr_interp attribute.")
1284
+
1256
1285
 
1257
- DO_mvp_interp = mvp.vertical_interp(self.PRES_mvp[id_mvp,:], self.DO_mvp[id_mvp,:], pressure_grid)
1258
- DO_ctd_interp = mvp.vertical_interp(self.PRES_ctd[id_ctd,:],self.OXY_ctd[id_ctd,:], pressure_grid)
1286
+ oxy_mvp = self.DO_mvp_corr_interp[id_mvp]
1287
+ oxy_ctd = self.DO_ctd_interp[id_ctd]
1288
+ pres = self.PRES_mvp_corr_interp[id_mvp]
1259
1289
 
1260
- mask = ~np.isnan(DO_mvp_interp) & ~np.isnan(DO_ctd_interp)
1261
- pressure_grid = pressure_grid[mask[0]]
1262
- DO_mvp_interp = DO_mvp_interp[mask]
1263
- DO_ctd_interp = DO_ctd_interp[mask]
1290
+ mask = ~np.isnan(oxy_mvp) & ~np.isnan(oxy_ctd)
1291
+ oxy_mvp = oxy_mvp[mask]
1292
+ oxy_ctd = oxy_ctd[mask]
1293
+ pres = pres[mask]
1264
1294
 
1265
- diff = DO_mvp_interp-DO_ctd_interp
1295
+ diff = oxy_mvp - oxy_ctd
1266
1296
 
1267
- A = np.vstack([pressure_grid, np.ones_like(pressure_grid)]).T
1268
- print(A.shape, diff.shape)
1297
+ A = np.vstack([oxy_ctd, np.ones(len(oxy_ctd))]).T
1269
1298
  diff = diff.flatten()
1270
- a_estime, b_estime = np.linalg.lstsq(A, diff, rcond=None)[0]
1271
-
1272
- print(f"Pente estimée (a) : {a_estime:.6f} ")
1273
- print(f"Biais estimé (b) : {b_estime:.6f} ")
1274
-
1275
- DO_mvp_corr = DO_mvp_interp - (a_estime*pressure_grid + b_estime)
1276
1299
 
1300
+ a_estim, b_estim = np.linalg.lstsq(A, diff, rcond=None)[0]
1301
+ print(f"Estimated linear relationship: diff = {a_estim:.4f} * oxy_ctd + {b_estim:.4f}")
1277
1302
 
1303
+ Do_mvp_corr = self.DO_mvp_corr_interp[id_mvp] - (a_estim * self.DO_ctd_interp[id_ctd] + b_estim)
1278
1304
 
1279
- rmse_before = np.sqrt(np.nanmean((DO_mvp_interp - DO_ctd_interp)**2))
1280
- rmse_after = np.sqrt(np.nanmean((DO_mvp_corr - DO_ctd_interp)**2))
1281
- print(f"RMSE before correction: {rmse_before:.4f}")
1282
- print(f"RMSE after correction: {rmse_after:.4f}")
1283
1305
 
1284
- DO_mvp_corr_full = self.DO_mvp - (a_estime*self.PRES_mvp + b_estime)
1285
-
1286
- DO_mvp_corr_full_interp = mvp.vertical_interp(self.PRES_mvp, DO_mvp_corr_full, pressure_grid)
1287
- rmse_after_full = np.mean(np.sqrt(np.nanmean((DO_mvp_corr_full_interp - DO_ctd_interp)**2,axis=1)))
1288
- print(f"RMSE after correction (full profile): {rmse_after_full:.4f}")
1289
-
1290
- self.DO_mvp_raw = self.DO_mvp.copy()
1291
- self.DO_mvp = DO_mvp_corr_full
1306
+ rmse_before = np.sqrt(np.nanmean((self.DO_mvp_corr_interp[id_mvp] - self.DO_ctd_interp[id_ctd])**2))
1307
+ rmse_after = np.sqrt(np.nanmean((Do_mvp_corr - self.DO_ctd_interp[id_ctd])**2))
1308
+ if plotting:
1309
+ print(f"RMSE before correction: {rmse_before:.4f}")
1310
+ print(f"RMSE after correction: {rmse_after:.4f}")
1292
1311
 
1293
1312
 
1294
1313
  if plotting:
1295
1314
 
1296
- plt.figure()
1297
- plt.plot(DO_mvp_interp,pressure_grid,label='MVP')
1298
- plt.plot(DO_ctd_interp,pressure_grid,label='CTD')
1299
- plt.plot(DO_mvp_corr,pressure_grid,label='MVP corrected')
1315
+ plt.figure(figsize=(6,8))
1316
+ plt.plot(oxy_mvp, pres, label='MVP DO')
1317
+ plt.plot(Do_mvp_corr[mask],pres,label='MVP DO corrigé')
1318
+ plt.plot(oxy_ctd, pres, label='CTD DO')
1300
1319
  plt.gca().invert_yaxis()
1301
- plt.xlabel('Dissolved Oxygen, %')
1302
- plt.ylabel('Pressure, dbar')
1303
- plt.title('Oxygen correction')
1320
+ plt.xlabel('Oxygène dissous [µmol/kg]')
1321
+ plt.ylabel('Profondeur [m]')
1322
+ plt.title(f'Profil de DO - Profil {id_mvp} MVP vs Profil {id_ctd} CTD')
1304
1323
  plt.legend()
1305
- plt.grid()
1306
1324
  plt.show()
1307
1325
 
1326
+ self.DO_mvp_corr_interp[id_mvp] = Do_mvp_corr
1327
+
1328
+
1329
+ def correct_oxygen_all(self,mode):
1330
+
1331
+
1332
+
1333
+ for id_mvp in range(0,self.PRES_mvp.shape[0]):
1334
+
1335
+ id_nearest_ctd = mvp.find_nearest_profile(self.TIME_mvp_corr_interp[id_mvp],self.Lat_mvp_corr_interp[id_mvp], self.Lon_mvp_corr_interp[id_mvp],self.TIME_ctd ,self.LAT_ctd, self.LON_ctd,mode)[0]
1336
+ print(id_mvp,id_nearest_ctd)
1337
+ self.correct_oxygen(id_mvp=id_mvp, id_ctd=id_nearest_ctd, plotting=False)
1338
+
1339
+ print("Oxygen correction applied to all MVP profiles using nearest CTD profiles.")
1340
+
1308
1341
 
1309
1342
  def mvp_correction(self,high_cutoff=1,dp=0.1):
1310
1343
 
@@ -1407,11 +1440,11 @@ class Analyzer:
1407
1440
  self.PRES_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.PRES_ctd, pressure_grid)
1408
1441
  self.COND_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.COND_ctd, pressure_grid)
1409
1442
  self.SALT_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.SALT_ctd, pressure_grid)
1410
- self.DO_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.OXY_ctd, pressure_grid)
1443
+ self.DO_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.DO_ctd, pressure_grid)
1411
1444
  self.FLUO_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.FLUO_ctd, pressure_grid)
1412
1445
  self.TURB_ctd_interp = mvp.vertical_interp(self.PRES_ctd, self.TURB_ctd, pressure_grid)
1413
1446
  self.TEMP_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, TEMP_mvp_corr_mat, pressure_grid)
1414
- self.PRES_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, PRES_mvp_corr_mat, pressure_grid)
1447
+ self.PRES_mvp_corr_interp = np.tile(pressure_grid, (PRES_mvp_corr_mat.shape[0], 1))
1415
1448
  self.COND_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, COND_mvp_corr_mat, pressure_grid)
1416
1449
  self.SALT_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, SALT_mvp_corr_mat, pressure_grid)
1417
1450
  self.DO_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.DO_mvp, pressure_grid)
@@ -1419,6 +1452,9 @@ class Analyzer:
1419
1452
  self.TURB_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.TURB_mvp, pressure_grid)
1420
1453
  self.PH_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.PH_mvp, pressure_grid)
1421
1454
  self.SUNA_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.SUNA_mvp, pressure_grid)
1455
+ if self.GPS:
1456
+ self.Lat_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.Lat_mvp, pressure_grid)
1457
+ self.Lon_mvp_corr_interp = mvp.vertical_interp(self.PRES_mvp, self.Lon_mvp, pressure_grid)
1422
1458
 
1423
1459
  if self.speed:
1424
1460
  self.SPEED_mvp_corr_interp = mvp.vertical_interp(PRES_mvp_corr_mat, SPEED_mvp_corr_mat, pressure_grid)
@@ -1468,285 +1504,417 @@ class Analyzer:
1468
1504
 
1469
1505
 
1470
1506
 
1471
- def corrige_MVP_offset_on_ctd_simple(self,id_mvp,id_ctd,min_depth):
1472
- """
1473
- This function corrects the offset between the MVP and CTD profiles by aligning the temperature, conductivity profiles. It calculates the mean difference in temperature between the two profiles and applies this correction to the CTD temperature data.
1474
- id_mvp and id_ctd must be the same length as each MVP profile will be be corrected with the corresponding CTD profile. The function returns the corrected MVP temperature and conductivity profiles.
1475
- This version of the correction is less restritive than the other one, does not need the CTD aand MVP profiles to be exactly similar
1476
- We advice to choose a min_depth that avoid to take into acount the surface layer which can introduce errors.
1477
- """
1478
-
1479
- mean_temp_diff = []
1480
- mean_cond_diff = []
1481
- print("Calculating mean differences between MVP and CTD profiles before correction:")
1482
- for i in range(len(id_mvp)):
1483
- id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1484
- # Calculate the mean difference in temperature between the MVP and CTD profiles
1485
- temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1486
- mean_temp_diff.append(temp_diff)
1487
-
1488
- cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1489
- mean_cond_diff.append(cond_diff)
1490
- print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1491
- print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1492
-
1493
- for i in range(len(id_mvp)):
1494
- id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1495
-
1496
- # Calculate the mean difference in temperature between the MVP and CTD profiles
1497
- temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1498
- self.TEMP_mvp_corr_interp[id_mvp[i]] -= temp_diff
1499
-
1500
- cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1501
- self.COND_mvp_corr_interp[id_mvp[i]] -= cond_diff
1502
-
1503
-
1504
- mean_temp_diff = []
1505
- mean_cond_diff = []
1506
- print("After correction:")
1507
- for i in range(len(id_mvp)):
1508
- id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1509
-
1510
- # Calculate the mean difference in temperature between the MVP and CTD profiles
1511
- temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1512
- mean_temp_diff.append(temp_diff)
1513
-
1514
- cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1515
- mean_cond_diff.append(cond_diff)
1516
- print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1517
- print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1518
-
1507
+ def corrige_MVP_offset_on_ctd_simple(self,id_mvp,id_ctd,min_depth):
1508
+ """
1509
+ This function corrects the offset between the MVP and CTD profiles by aligning the temperature, conductivity profiles. It calculates the mean difference in temperature between the two profiles and applies this correction to the CTD temperature data.
1510
+ id_mvp and id_ctd must be the same length as each MVP profile will be be corrected with the corresponding CTD profile. The function returns the corrected MVP temperature and conductivity profiles.
1511
+ This version of the correction is less restritive than the other one, does not need the CTD aand MVP profiles to be exactly similar
1512
+ We advice to choose a min_depth that avoid to take into acount the surface layer which can introduce errors.
1513
+ """
1519
1514
 
1515
+ mean_temp_diff = []
1516
+ mean_cond_diff = []
1517
+ print("Calculating mean differences between MVP and CTD profiles before correction:")
1518
+ for i in range(len(id_mvp)):
1519
+ id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1520
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1521
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1522
+ mean_temp_diff.append(temp_diff)
1520
1523
 
1524
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1525
+ mean_cond_diff.append(cond_diff)
1526
+ print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1527
+ print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1521
1528
 
1522
- def to_netcdf(self, filepath, corrected=False, compression=True, engine=None, per_profile_files=False):
1523
- """
1524
- Export MVP data to a NetCDF file using xarray.
1529
+ for i in range(len(id_mvp)):
1530
+ id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1525
1531
 
1526
- Args:
1527
- filepath (str): Output NetCDF file path.
1528
- corrected (bool): Also write corrected arrays if present (*_mvp_corr). Default False.
1529
- compression (bool): Enable compression (engine dependent). Default True.
1530
- engine (str|None): One of 'netcdf4', 'h5netcdf', 'scipy'. If None, choose netcdf4.
1531
- per_profile_files (bool): If True, write one .nc per MVP cycle (two rows: down and up).
1532
- """
1533
- if not getattr(self, 'mvp', False):
1534
- raise RuntimeError("No MVP data loaded. Call load_mvp_data() first.")
1532
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1533
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1534
+ self.TEMP_mvp_corr_interp[id_mvp[i]] -= temp_diff
1535
1535
 
1536
- engine = 'netcdf4' if engine is None else engine
1537
- if engine == 'scipy' and compression:
1538
- print('Warning: scipy backend does not support compression; writing without compression.')
1539
- compression = False
1536
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1537
+ self.COND_mvp_corr_interp[id_mvp[i]] -= cond_diff
1540
1538
 
1541
- # Dimensions
1542
- n_prof, n_samp = self.PRES_mvp.shape
1543
1539
 
1544
- # Coordinates
1545
- profile_idx = np.arange(n_prof, dtype=np.int32)
1546
- sample_idx = np.arange(n_samp, dtype=np.int32)
1540
+ mean_temp_diff = []
1541
+ mean_cond_diff = []
1542
+ print("After correction:")
1543
+ for i in range(len(id_mvp)):
1544
+ id_valid = self.PRES_mvp_corr_interp[id_mvp[i]] >= min_depth
1547
1545
 
1548
- # Direction per profile (down/up)
1549
- direction = None
1550
- if hasattr(self, 'DIR') and len(self.DIR) == n_prof:
1551
- direction = np.array(self.DIR, dtype=object)
1552
- else:
1553
- # Fallback based on even/odd
1554
- direction = np.array(['down' if i % 2 == 0 else 'up' for i in range(n_prof)], dtype=object)
1555
-
1556
- # Per-sample time as seconds since reference origin
1557
- # TIME_mvp is in days relative to self.date_ref
1558
- time_seconds = None
1559
- if hasattr(self, 'TIME_mvp'):
1560
- time_seconds = self.TIME_mvp * 24.0 * 3600.0
1561
- else:
1562
- time_seconds = np.full((n_prof, n_samp), np.nan)
1563
-
1564
- # Per-profile datetime (one timestamp per cast pair); map using i//2
1565
- profile_time = None
1566
- if hasattr(self, 'DATETIME_mvp') and len(getattr(self, 'DATETIME_mvp', [])) > 0:
1567
- prof_times = []
1568
- for i in range(n_prof):
1569
- j = i // 2
1570
- if j < len(self.DATETIME_mvp) and self.DATETIME_mvp[j] is not None:
1571
- prof_times.append(np.datetime64(self.DATETIME_mvp[j]))
1572
- else:
1573
- prof_times.append(np.datetime64('NaT'))
1574
- profile_time = np.array(prof_times, dtype='datetime64[ns]')
1575
- else:
1576
- profile_time = np.array([np.datetime64('NaT')] * n_prof, dtype='datetime64[ns]')
1546
+ # Calculate the mean difference in temperature between the MVP and CTD profiles
1547
+ temp_diff = np.nanmean(self.TEMP_mvp_corr_interp[id_mvp[i], id_valid] - self.TEMP_ctd_interp[id_ctd[i], id_valid])
1548
+ mean_temp_diff.append(temp_diff)
1577
1549
 
1578
- # Build dataset variables safely
1579
- data_vars = {}
1550
+ cond_diff = np.nanmean(self.COND_mvp_corr_interp[id_mvp[i], id_valid] - self.COND_ctd_interp[id_ctd[i], id_valid])
1551
+ mean_cond_diff.append(cond_diff)
1552
+ print("Mean temperature difference between MVP and CTD profiles:", np.mean(mean_temp_diff))
1553
+ print("Mean conductivity difference between MVP and CTD profiles:", np.mean(mean_cond_diff))
1554
+
1580
1555
 
1581
- def add_var(var_name, arr, units=None, long_name=None):
1582
- if arr is None:
1583
- return
1584
- data_vars[var_name] = (
1585
- ('profile', 'sample'), arr,
1586
- {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1556
+
1557
+
1558
+ def to_netcdf(self, filepath, corrected=False, compression=True, engine=None, per_profile_files=False):
1559
+ """
1560
+ Export MVP data to a NetCDF file using xarray.
1561
+
1562
+ Args:
1563
+ filepath (str): Output NetCDF file path.
1564
+ corrected (bool): Also write corrected arrays if present (*_mvp_corr). Default False.
1565
+ compression (bool): Enable compression (engine dependent). Default True.
1566
+ engine (str|None): One of 'netcdf4', 'h5netcdf', 'scipy'. If None, choose netcdf4.
1567
+ per_profile_files (bool): If True, write one .nc per MVP cycle (two rows: down and up).
1568
+ """
1569
+ if not getattr(self, 'mvp', False):
1570
+ raise RuntimeError("No MVP data loaded. Call load_mvp_data() first.")
1571
+
1572
+ engine = 'netcdf4' if engine is None else engine
1573
+ if engine == 'scipy' and compression:
1574
+ print('Warning: scipy backend does not support compression; writing without compression.')
1575
+ compression = False
1576
+
1577
+ # Dimensions
1578
+ n_prof, n_samp = self.PRES_mvp.shape
1579
+
1580
+ # Coordinates
1581
+ profile_idx = np.arange(n_prof, dtype=np.int32)
1582
+ sample_idx = np.arange(n_samp, dtype=np.int32)
1583
+
1584
+ # Direction per profile (down/up)
1585
+ direction = None
1586
+ if hasattr(self, 'DIR') and len(self.DIR) == n_prof:
1587
+ direction = np.array(self.DIR, dtype=object)
1588
+ else:
1589
+ # Fallback based on even/odd
1590
+ direction = np.array(['down' if i % 2 == 0 else 'up' for i in range(n_prof)], dtype=object)
1591
+
1592
+ # Per-sample time as seconds since reference origin
1593
+ # TIME_mvp is in days relative to self.date_ref
1594
+ time_seconds = None
1595
+ if hasattr(self, 'TIME_mvp'):
1596
+ time_seconds = self.TIME_mvp * 24.0 * 3600.0
1597
+ else:
1598
+ time_seconds = np.full((n_prof, n_samp), np.nan)
1599
+
1600
+ # Per-profile datetime (one timestamp per cast pair); map using i//2
1601
+ profile_time = None
1602
+ if hasattr(self, 'DATETIME_mvp') and len(getattr(self, 'DATETIME_mvp', [])) > 0:
1603
+ prof_times = []
1604
+ for i in range(n_prof):
1605
+ j = i // 2
1606
+ if j < len(self.DATETIME_mvp) and self.DATETIME_mvp[j] is not None:
1607
+ prof_times.append(np.datetime64(self.DATETIME_mvp[j]))
1608
+ else:
1609
+ prof_times.append(np.datetime64('NaT'))
1610
+ profile_time = np.array(prof_times, dtype='datetime64[ns]')
1611
+ else:
1612
+ profile_time = np.array([np.datetime64('NaT')] * n_prof, dtype='datetime64[ns]')
1613
+
1614
+ # Build dataset variables safely
1615
+ data_vars = {}
1616
+
1617
+ def add_var(var_name, arr, units=None, long_name=None):
1618
+ if arr is None:
1619
+ return
1620
+ data_vars[var_name] = (
1621
+ ('profile', 'sample'), arr,
1622
+ {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1623
+ )
1624
+
1625
+ add_var('PRES', getattr(self, 'PRES_mvp', None), units='dbar', long_name='Sea water pressure')
1626
+ add_var('TEMP', getattr(self, 'TEMP_mvp', None), units='degC', long_name='In-situ temperature')
1627
+ add_var('COND', getattr(self, 'COND_mvp', None), units='mS/cm', long_name='Conductivity')
1628
+ add_var('SAL', getattr(self, 'SALT_mvp', None), units='psu', long_name='Practical salinity')
1629
+ add_var('SOUNDVEL', getattr(self, 'SOUNDVEL_mvp', None), units='m s-1', long_name='Sound speed')
1630
+ add_var('DO', getattr(self, 'DO_mvp', None), units='ml/L', long_name='Dissolved oxygen')
1631
+ add_var('TEMP2', getattr(self, 'TEMP2_mvp', None), units='degC', long_name='Oxygen sensor temperature')
1632
+ add_var('SUNA', getattr(self, 'SUNA_mvp', None), long_name='SUNA raw/derived')
1633
+ add_var('FLUO', getattr(self, 'FLUO_mvp', None), units='ug/L', long_name='Chl fluorescence')
1634
+ add_var('TURB', getattr(self, 'TURB_mvp', None), units='NTU', long_name='Turbidity')
1635
+ add_var('PH', getattr(self, 'PH_mvp', None), units='1', long_name='pH')
1636
+
1637
+ # Position and time arrays (2D)
1638
+ if hasattr(self, 'LAT_mvp'):
1639
+ add_var('LATITUDE', self.Lat_mvp, units='degrees_north', long_name='Latitude at sample')
1640
+ if hasattr(self, 'LON_mvp'):
1641
+ add_var('LONGITUDE', self.Lon_mvp, units='degrees_east', long_name='Longitude at sample')
1642
+ # Time seconds since reference
1643
+ data_vars['TIME'] = (
1644
+ ('profile', 'sample'), time_seconds,
1645
+ {
1646
+ 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1647
+ 'long_name': 'Time at sample'
1648
+ }
1587
1649
  )
1588
-
1589
- add_var('PRES', getattr(self, 'PRES_mvp', None), units='dbar', long_name='Sea water pressure')
1590
- add_var('TEMP', getattr(self, 'TEMP_mvp', None), units='degC', long_name='In-situ temperature')
1591
- add_var('COND', getattr(self, 'COND_mvp', None), units='mS/cm', long_name='Conductivity')
1592
- add_var('SAL', getattr(self, 'SALT_mvp', None), units='psu', long_name='Practical salinity')
1593
- add_var('SOUNDVEL', getattr(self, 'SOUNDVEL_mvp', None), units='m s-1', long_name='Sound speed')
1594
- add_var('DO', getattr(self, 'DO_mvp', None), units='ml/L', long_name='Dissolved oxygen')
1595
- add_var('TEMP2', getattr(self, 'TEMP2_mvp', None), units='degC', long_name='Oxygen sensor temperature')
1596
- add_var('SUNA', getattr(self, 'SUNA_mvp', None), long_name='SUNA raw/derived')
1597
- add_var('FLUO', getattr(self, 'FLUO_mvp', None), units='ug/L', long_name='Chl fluorescence')
1598
- add_var('TURB', getattr(self, 'TURB_mvp', None), units='NTU', long_name='Turbidity')
1599
- add_var('PH', getattr(self, 'PH_mvp', None), units='1', long_name='pH')
1600
-
1601
- # Position and time arrays (2D)
1602
- if hasattr(self, 'LAT_mvp'):
1603
- add_var('LATITUDE', self.LAT_mvp, units='degrees_north', long_name='Latitude at sample')
1604
- if hasattr(self, 'LON_mvp'):
1605
- add_var('LONGITUDE', self.LON_mvp, units='degrees_east', long_name='Longitude at sample')
1606
- # Time seconds since reference
1607
- data_vars['TIME'] = (
1608
- ('profile', 'sample'), time_seconds,
1609
- {
1610
- 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1611
- 'long_name': 'Time at sample'
1612
- }
1613
- )
1614
1650
 
1615
- # Include corrected arrays if requested and present
1616
- if corrected:
1617
- def add_corr(name, attr, units=None, long_name=None):
1618
- if hasattr(self, attr):
1619
- data_vars[name] = (
1620
- ('profile', 'sample'), getattr(self, attr),
1621
- {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1651
+ # Include corrected arrays if requested and present
1652
+ if corrected:
1653
+ def add_corr(name, attr, units=None, long_name=None):
1654
+ if hasattr(self, attr):
1655
+ data_vars[name] = (
1656
+ ('profile', 'sample'), getattr(self, attr),
1657
+ {k: v for k, v in [('units', units), ('long_name', long_name)] if v is not None}
1658
+ )
1659
+ add_corr('pressure_corrected', 'PRES_mvp_corr', units='dbar', long_name='Corrected pressure')
1660
+ add_corr('temperature_corrected', 'TEMP_mvp_corr', units='degC', long_name='Corrected temperature')
1661
+ add_corr('conductivity_corrected', 'COND_mvp_corr', units='mS/cm', long_name='Corrected conductivity')
1662
+ add_corr('salinity_corrected', 'SALT_mvp_corr', units='psu', long_name='Corrected salinity')
1663
+ if hasattr(self, 'TIME_mvp_corr'):
1664
+ data_vars['time_corrected'] = (
1665
+ ('profile', 'sample'), self.TIME_mvp_corr * 24.0 * 3600.0,
1666
+ {
1667
+ 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1668
+ 'long_name': 'Corrected time at sample'
1669
+ }
1622
1670
  )
1623
- add_corr('pressure_corrected', 'PRES_mvp_corr', units='dbar', long_name='Corrected pressure')
1624
- add_corr('temperature_corrected', 'TEMP_mvp_corr', units='degC', long_name='Corrected temperature')
1625
- add_corr('conductivity_corrected', 'COND_mvp_corr', units='mS/cm', long_name='Corrected conductivity')
1626
- add_corr('salinity_corrected', 'SALT_mvp_corr', units='psu', long_name='Corrected salinity')
1627
- if hasattr(self, 'TIME_mvp_corr'):
1628
- data_vars['time_corrected'] = (
1629
- ('profile', 'sample'), self.TIME_mvp_corr * 24.0 * 3600.0,
1630
- {
1631
- 'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1632
- 'long_name': 'Corrected time at sample'
1633
- }
1671
+ if hasattr(self, 'LAT_mvp_corr'):
1672
+ add_corr('latitude_corrected', 'LAT_mvp_corr', units='degrees_north', long_name='Corrected latitude at sample')
1673
+ if hasattr(self, 'LON_mvp_corr'):
1674
+ add_corr('longitude_corrected', 'LON_mvp_corr', units='degrees_east', long_name='Corrected longitude at sample')
1675
+
1676
+ # Coordinates and auxiliary per-profile variables
1677
+ coords = {
1678
+ 'profile': ('profile', profile_idx),
1679
+ 'sample': ('sample', sample_idx)
1680
+ }
1681
+
1682
+ # Encode direction/time according to engine capabilities
1683
+ if engine in ('netcdf4', 'h5netcdf'):
1684
+ coords['direction'] = ('profile', direction.astype('U'), {'long_name': 'Profile direction'})
1685
+ coords['profile_time'] = ('profile', profile_time, {'long_name': 'Profile nominal time'})
1686
+ else:
1687
+ # scipy backend: avoid object strings and datetime; use numeric fallbacks
1688
+ dir_flag = np.where(direction.astype('U') == 'down', 0, 1).astype('int8')
1689
+ coords['direction_flag'] = (
1690
+ 'profile', dir_flag, {'long_name': 'Profile direction (0=down,1=up)'}
1691
+ )
1692
+ ref = np.datetime64(self.date_ref)
1693
+ pt = profile_time.astype('datetime64[s]')
1694
+ mask = (pt == np.datetime64('NaT'))
1695
+ secs = (pt - ref).astype('timedelta64[s]').astype('float64')
1696
+ secs[mask] = np.nan
1697
+ coords['profile_time_sec'] = (
1698
+ 'profile', secs,
1699
+ {'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1700
+ 'long_name': 'Profile nominal time'}
1634
1701
  )
1635
- if hasattr(self, 'LAT_mvp_corr'):
1636
- add_corr('latitude_corrected', 'LAT_mvp_corr', units='degrees_north', long_name='Corrected latitude at sample')
1637
- if hasattr(self, 'LON_mvp_corr'):
1638
- add_corr('longitude_corrected', 'LON_mvp_corr', units='degrees_east', long_name='Corrected longitude at sample')
1639
-
1640
- # Coordinates and auxiliary per-profile variables
1641
- coords = {
1642
- 'profile': ('profile', profile_idx),
1643
- 'sample': ('sample', sample_idx)
1644
- }
1645
-
1646
- # Encode direction/time according to engine capabilities
1647
- if engine in ('netcdf4', 'h5netcdf'):
1648
- coords['direction'] = ('profile', direction.astype('U'), {'long_name': 'Profile direction'})
1649
- coords['profile_time'] = ('profile', profile_time, {'long_name': 'Profile nominal time'})
1650
- else:
1651
- # scipy backend: avoid object strings and datetime; use numeric fallbacks
1652
- dir_flag = np.where(direction.astype('U') == 'down', 0, 1).astype('int8')
1653
- coords['direction_flag'] = (
1654
- 'profile', dir_flag, {'long_name': 'Profile direction (0=down,1=up)'}
1655
- )
1656
- ref = np.datetime64(self.date_ref)
1657
- pt = profile_time.astype('datetime64[s]')
1658
- mask = (pt == np.datetime64('NaT'))
1659
- secs = (pt - ref).astype('timedelta64[s]').astype('float64')
1660
- secs[mask] = np.nan
1661
- coords['profile_time_sec'] = (
1662
- 'profile', secs,
1663
- {'units': f'seconds since {self.date_ref.strftime("%Y-%m-%d %H:%M:%S")}',
1664
- 'long_name': 'Profile nominal time'}
1665
- )
1666
1702
 
1667
- # Optional per-profile lat/lon (first valid sample)
1668
- def first_valid(vec):
1669
- # vec shape (n_prof, n_samp)
1670
- out = np.full((vec.shape[0],), np.nan)
1671
- for i in range(vec.shape[0]):
1672
- row = vec[i]
1673
- j = np.where(~np.isnan(row))[0]
1674
- if j.size:
1675
- out[i] = row[j[0]]
1676
- return out
1677
-
1678
- if hasattr(self, 'LAT_mvp'):
1679
- coords['profile_lat'] = (
1680
- 'profile', first_valid(self.LAT_mvp), {'units': 'degrees_north', 'long_name': 'Profile latitude'}
1681
- )
1682
- if hasattr(self, 'LON_mvp'):
1683
- coords['profile_lon'] = (
1684
- 'profile', first_valid(self.LON_mvp), {'units': 'degrees_east', 'long_name': 'Profile longitude'}
1685
- )
1703
+ # Optional per-profile lat/lon (first valid sample)
1704
+ def first_valid(vec):
1705
+ # vec shape (n_prof, n_samp)
1706
+ out = np.full((vec.shape[0],), np.nan)
1707
+ for i in range(vec.shape[0]):
1708
+ row = vec[i]
1709
+ j = np.where(~np.isnan(row))[0]
1710
+ if j.size:
1711
+ out[i] = row[j[0]]
1712
+ return out
1713
+
1714
+ if hasattr(self, 'LAT_mvp'):
1715
+ coords['profile_lat'] = (
1716
+ 'profile', first_valid(self.Lat_mvp), {'units': 'degrees_north', 'long_name': 'Profile latitude'}
1717
+ )
1718
+ if hasattr(self, 'LON_mvp'):
1719
+ coords['profile_lon'] = (
1720
+ 'profile', first_valid(self.Lon_mvp), {'units': 'degrees_east', 'long_name': 'Profile longitude'}
1721
+ )
1686
1722
 
1687
- # Global attributes
1688
- attrs = {
1689
- 'title': 'MVP profile data',
1690
- 'Conventions': 'CF-1.8',
1691
- 'institution': 'LMD/CNRS',
1692
- 'source': 'PyMVP',
1693
- 'history': f"Created on {datetime.now().isoformat()}",
1694
- 'mvp_Yorig': int(self.Yorig)
1695
- }
1696
-
1697
- ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs)
1698
-
1699
- # Compression encoding per engine
1700
- encoding = None
1701
- if compression:
1702
- if engine == 'netcdf4':
1703
- encoding = {name: {'zlib': True, 'complevel': 4} for name in data_vars.keys()}
1704
- elif engine == 'h5netcdf':
1705
- encoding = {name: {'compression': 'gzip', 'compression_opts': 4} for name in data_vars.keys()}
1706
-
1707
-
1708
- if (not per_profile_files) and filepath.lower().endswith('.nc'):
1709
- out_path = filepath
1710
- ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1711
- print(f"NetCDF written: {out_path} using engine={engine}")
1712
- return
1713
- base_dir = filepath
1714
-
1715
- if not base_dir.endswith(os.sep):
1716
- base_dir = base_dir + os.sep
1717
-
1718
- base_name = "MVP_" + os.path.basename(self.data_path).rstrip(os.sep)
1719
- if per_profile_files:
1720
- # Write one file per pair (down/up)
1721
- total_pairs = (n_prof + 1) // 2
1722
- for i in range(total_pairs):
1723
- idxs = [k for k in (2*i, 2*i+1) if k < n_prof]
1724
- if not idxs:
1725
- continue
1726
- ds_i = ds.isel(profile=idxs)
1727
-
1728
- #add i to filename
1729
- fname = f"{base_name}_profile_{i:03d}.nc"
1730
- out_path = os.path.join(base_dir, fname)
1731
- ds_i.to_netcdf(out_path, encoding=encoding, engine=engine)
1732
- print(f"NetCDF written per profile into: {base_dir} using engine={engine}")
1733
- else:
1734
- file_name = f"{base_name}.nc"
1735
- out_path = os.path.join(base_dir, file_name)
1736
- ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1737
- print(f"NetCDF written: {out_path} using engine={engine}")
1723
+ # Global attributes
1724
+ attrs = {
1725
+ 'title': 'MVP profile data',
1726
+ 'Conventions': 'CF-1.8',
1727
+ 'institution': 'LMD/CNRS',
1728
+ 'source': 'PyMVP',
1729
+ 'history': f"Created on {datetime.now().isoformat()}",
1730
+ 'mvp_Yorig': int(self.Yorig)
1731
+ }
1732
+
1733
+ ds = xr.Dataset(data_vars=data_vars, coords=coords, attrs=attrs)
1738
1734
 
1735
+ # Compression encoding per engine
1736
+ encoding = None
1737
+ if compression:
1738
+ if engine == 'netcdf4':
1739
+ encoding = {name: {'zlib': True, 'complevel': 4} for name in data_vars.keys()}
1740
+ elif engine == 'h5netcdf':
1741
+ encoding = {name: {'compression': 'gzip', 'compression_opts': 4} for name in data_vars.keys()}
1739
1742
 
1740
- def help(self):
1743
+
1744
+ if (not per_profile_files) and filepath.lower().endswith('.nc'):
1745
+ out_path = filepath
1746
+ ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1747
+ print(f"NetCDF written: {out_path} using engine={engine}")
1748
+ return
1749
+ base_dir = filepath
1750
+
1751
+ if not base_dir.endswith(os.sep):
1752
+ base_dir = base_dir + os.sep
1753
+
1754
+ base_name = "MVP_" + os.path.basename(self.data_path).rstrip(os.sep)
1755
+ if per_profile_files:
1756
+ # Write one file per pair (down/up)
1757
+ total_pairs = (n_prof + 1) // 2
1758
+ for i in range(total_pairs):
1759
+ idxs = [k for k in (2*i, 2*i+1) if k < n_prof]
1760
+ if not idxs:
1761
+ continue
1762
+ ds_i = ds.isel(profile=idxs)
1763
+
1764
+ #add i to filename
1765
+ fname = f"{base_name}_profile_{i:03d}.nc"
1766
+ out_path = os.path.join(base_dir, fname)
1767
+ ds_i.to_netcdf(out_path, encoding=encoding, engine=engine)
1768
+ print(f"NetCDF written per profile into: {base_dir} using engine={engine}")
1769
+ else:
1770
+ file_name = f"{base_name}.nc"
1771
+ out_path = os.path.join(base_dir, file_name)
1772
+ ds.to_netcdf(out_path, encoding=encoding, engine=engine)
1773
+ print(f"NetCDF written: {out_path} using engine={engine}")
1774
+
1775
+
1776
+ def help(self):
1777
+ """
1778
+ Print all methods of the class with their docstring (header).
1779
+ """
1780
+ for attr in dir(self):
1781
+ if callable(getattr(self, attr)) and not attr.startswith("__"):
1782
+ method = getattr(self, attr)
1783
+ doc = method.__doc__
1784
+ print(f"{attr}:\n{doc}\n{'-'*40}")
1785
+
1786
+
1787
+ def plot_MVP_transect(self,var='TEMP',l_id=None,depth_max=None,depth_min=None,vmax=None,vmin=None,cmap=None):
1741
1788
  """
1742
- Print all methods of the class with their docstring (header).
1789
+ Plot a section of 2D inteprolated MVP data
1790
+ Args:
1791
+ var (str): Variable to plot. Choose from 'TEMP', 'COND', 'SAL', 'DO', 'FLUO', 'TURB', 'PH', 'SUNA', 'SPEED'.
1792
+ l_id (list of int): List of profile indices to include in the transect. If None, use all profiles.
1793
+ depth_max (float): Maximum depth to display in the plot. If None, use max depth in data.
1794
+ depth_min (float): Minimum depth to display in the plot. If None, use 0.
1795
+ vmax (float): Maximum value for color scale. If None, use max value in data.
1796
+ vmin (float): Minimum value for color scale. If None, use min value in data.
1797
+ cmap: Matplotlib colormap to use. If None, use default colormap.
1798
+
1799
+
1743
1800
  """
1744
- for attr in dir(self):
1745
- if callable(getattr(self, attr)) and not attr.startswith("__"):
1746
- method = getattr(self, attr)
1747
- doc = method.__doc__
1748
- print(f"{attr}:\n{doc}\n{'-'*40}")
1749
1801
 
1802
+ if hasattr(self, 'PRES_mvp_corr_interp') == False:
1803
+ raise ValueError("Corrected and interpolated MVP data not available. Apply corrections and interpolation first.")
1804
+
1805
+ if l_id is None:
1806
+ l_id = list(range(self.PRES_mvp_corr_interp.shape[0]))
1807
+
1808
+ match var:
1809
+ case 'TEMP':
1810
+ var = self.TEMP_mvp_corr_interp
1811
+ case 'COND':
1812
+ var = self.COND_mvp_corr_interp
1813
+ case 'SAL':
1814
+ var = self.SALT_mvp_corr_interp
1815
+ case 'DO':
1816
+ var = self.DO_mvp_corr_interp
1817
+ case 'FLUO':
1818
+ var = self.FLUO_mvp_corr_interp
1819
+ case 'TURB':
1820
+ var = self.TURB_mvp_corr_interp
1821
+ case 'PH':
1822
+ var = self.PH_mvp_corr_interp
1823
+ case 'SUNA':
1824
+ var = self.SUNA_mvp_corr_interp
1825
+ case 'SPEED':
1826
+ var = self.SPEED_mvp_corr_interp
1827
+ case _:
1828
+ raise ValueError(f"Variable {var} not recognized. Choose from 'TEMP', 'COND', 'SAL', 'DO', 'FLUO', 'TURB', 'PH', 'SUNA', 'SPEED'.")
1829
+
1830
+
1831
+
1832
+ P = self.PRES_mvp_corr_interp[l_id]
1833
+ lat = self.Lat_mvp_corr_interp[l_id]
1834
+ lon = self.Lon_mvp_corr_interp[l_id]
1835
+ T = var[l_id]
1836
+
1837
+ if depth_max is None:
1838
+ depth_max = np.nanmax(P)
1839
+ if depth_min is None:
1840
+ depth_min = 0
1841
+
1842
+
1843
+ n_profiles, n_points = T.shape
1844
+
1845
+ # compute distance
1846
+ dist_cum = 0
1847
+ dist_all = np.zeros_like(T)
1848
+
1849
+ for i in range(n_profiles):
1850
+ dist_i = np.zeros(n_points)
1851
+ last_valid = None
1852
+ for j in range(n_points):
1853
+ if np.isnan(lat[i,j]) or np.isnan(lon[i,j]):
1854
+ dist_i[j] = np.nan
1855
+ else:
1856
+ if last_valid is None:
1857
+ dist_i[j] = 0
1858
+ else:
1859
+ dist_i[j] = dist_i[last_valid] + geodesic(
1860
+ (lat[i,last_valid], lon[i,last_valid]),
1861
+ (lat[i,j], lon[i,j])
1862
+ ).km
1863
+ last_valid = j
1864
+
1865
+ dist_i += dist_cum
1866
+ if np.any(~np.isnan(dist_i)):
1867
+ dist_cum = np.nanmax(dist_i)
1868
+ dist_all[i,:] = dist_i
1869
+
1870
+ # interpolation
1871
+ dist_flat = dist_all.flatten()
1872
+ T_flat = T.flatten()
1873
+ P_flat = P.flatten()
1874
+
1875
+ # del nan
1876
+ mask = ~np.isnan(dist_flat) & ~np.isnan(T_flat)
1877
+ dist_flat = dist_flat[mask]
1878
+ P_flat = P_flat[mask]
1879
+ T_flat = T_flat[mask]
1880
+
1881
+ # create regular grid
1882
+ dist_grid = np.linspace(dist_flat.min(), dist_flat.max(), 500)
1883
+ P_grid = np.linspace(depth_min, depth_max, 1200)
1884
+ DIST, PRES = np.meshgrid(dist_grid, P_grid)
1885
+
1886
+ # 2D interp
1887
+ T_grid = griddata(
1888
+ (dist_flat, P_flat),
1889
+ T_flat,
1890
+ (DIST, PRES),
1891
+ method='linear'
1892
+ )
1893
+
1894
+ # -----------------------------
1895
+ # 5. Lissage pour adoucir les frontières
1896
+ # -----------------------------
1897
+ T_grid_smooth = gaussian_filter(T_grid, sigma=2)
1898
+
1899
+
1900
+ if vmax is None:
1901
+ vmax = np.nanmax(T_grid_smooth)
1902
+ if vmin is None:
1903
+ vmin = np.nanmin(T_grid_smooth)
1904
+ # -----------------------------
1905
+ # 6. Plot
1906
+ # -----------------------------
1907
+ if cmap is None:
1908
+ cmap = plt.get_cmap('viridis')
1909
+ fig, ax = plt.subplots(figsize=(12,6))
1910
+ pcm = ax.pcolormesh(DIST, PRES, T_grid_smooth, shading='auto', cmap=cmap, vmin=vmin, vmax=vmax)
1911
+ ax.invert_yaxis()
1912
+ ax.set_xlabel("Distance le long du transect [km]")
1913
+ ax.set_ylabel("Profondeur [m]")
1914
+ ax.set_title(f"{var} transect (interpolated)")
1915
+ cbar = plt.colorbar(pcm, ax=ax)
1916
+ cbar.set_label(f"{var} (units)")
1917
+ plt.show()
1750
1918
 
1751
1919
 
1752
1920
  def split_ctd(pres, array):
@@ -1074,3 +1074,55 @@ def align_profiles(P, T_ref, T_to_align_raw, min_depth=0,max_shift=20):
1074
1074
  T_out[T_out_indices[mask_corrected]] = T_corrected[mask_corrected]
1075
1075
 
1076
1076
  return T_out, deltaP, deltaT
1077
+
1078
+
1079
+
1080
+
1081
+ def find_nearest_profile(time_mvp,Lat_mvp,Lon_mvp,time_ctd,Lat_ctd,Lon_ctd,mode):
1082
+
1083
+ if mode=='Dist':
1084
+ idx = len(Lat_mvp)//2
1085
+ Lat_mvp = np.radians(Lat_mvp[idx])
1086
+ Lon_mvp = np.radians(Lon_mvp[idx])
1087
+
1088
+ R = 6371.0
1089
+
1090
+ min_dist = np.inf
1091
+ nearest_index = -1
1092
+
1093
+ for i in range(len(Lat_ctd)):
1094
+
1095
+ lat,lon = np.radians(Lat_ctd[i]), np.radians(Lon_ctd[i])
1096
+ mask = np.isfinite(lat) & np.isfinite(lon)
1097
+ lat,lon = lat[mask], lon[mask]
1098
+ lat,lon = lat[0],lon[0]
1099
+
1100
+ dlon = lon - Lon_mvp
1101
+ dlat = lat - Lat_mvp
1102
+ a = np.sin(dlat / 2)**2 + np.cos(Lat_mvp) * np.cos(lat) * np.sin(dlon / 2)**2
1103
+ c = 2 * np.arctan2(np.sqrt(a), np.sqrt(1 - a))
1104
+ dist = R * c * 1e3 # Convert to meters
1105
+ if dist < min_dist:
1106
+ min_dist = dist
1107
+ nearest_index = i
1108
+
1109
+
1110
+ return nearest_index, min_dist
1111
+
1112
+ elif mode=='Time':
1113
+ time_mvp = time_mvp[len(time_mvp)//2] # Take the middle time of the MVP cycle as reference
1114
+
1115
+ min_time_diff = np.inf
1116
+ nearest_index = -1
1117
+ for i in range(len(time_ctd)):
1118
+ time_diff = np.abs(time_ctd[i,-1] - time_mvp)
1119
+ if time_diff < min_time_diff:
1120
+ min_time_diff = time_diff
1121
+ nearest_index = i
1122
+ return nearest_index, min_time_diff
1123
+
1124
+ else:
1125
+ raise ValueError("Mode should be 'Dist' or 'Time'")
1126
+
1127
+
1128
+
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: PyMVP
3
- Version: 0.2.1
3
+ Version: 0.2.4
4
4
  Summary: Python package for Moving Vessel Profiler correction and analysis
5
5
  Author: MaximilienWemaere
6
6
  Requires-Python: >=3.10
@@ -13,3 +13,5 @@ Requires-Dist: cartopy>=0.25.0
13
13
  Requires-Dist: scipy>=1.16.2
14
14
  Requires-Dist: xarray>=2025.9.1
15
15
  Requires-Dist: netcdf4>=1.7.2
16
+ Requires-Dist: pandas>=2.1.0
17
+ Requires-Dist: geopy>=2.4.0
@@ -6,3 +6,5 @@ cartopy>=0.25.0
6
6
  scipy>=1.16.2
7
7
  xarray>=2025.9.1
8
8
  netcdf4>=1.7.2
9
+ pandas>=2.1.0
10
+ geopy>=2.4.0
@@ -1,6 +1,6 @@
1
1
  [project]
2
2
  name = "PyMVP"
3
- version = "0.2.1"
3
+ version = "0.2.4"
4
4
  description = "Python package for Moving Vessel Profiler correction and analysis"
5
5
  authors = [{name="MaximilienWemaere"}]
6
6
  readme = "README.md"
@@ -14,6 +14,9 @@ dependencies = [
14
14
  "scipy>=1.16.2",
15
15
  "xarray>=2025.9.1",
16
16
  "netcdf4>=1.7.2",
17
+ "pandas>=2.1.0",
18
+ "geopy>=2.4.0"
17
19
  ]
18
20
 
19
-
21
+ [tool.setuptools.packages.find]
22
+ include = ["PyMVP*"]
File without changes
File without changes
File without changes