masster 0.3.18__py3-none-any.whl → 0.3.20__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (31) hide show
  1. masster/__init__.py +2 -0
  2. masster/_version.py +1 -1
  3. masster/data/libs/README.md +17 -0
  4. masster/data/libs/ccm.py +533 -0
  5. masster/data/libs/central_carbon_README.md +17 -0
  6. masster/data/libs/central_carbon_metabolites.csv +120 -0
  7. masster/data/libs/urine.py +333 -0
  8. masster/data/libs/urine_metabolites.csv +51 -0
  9. masster/sample/h5.py +1 -1
  10. masster/sample/helpers.py +3 -7
  11. masster/sample/lib.py +32 -25
  12. masster/sample/load.py +9 -3
  13. masster/sample/plot.py +113 -27
  14. masster/study/export.py +27 -10
  15. masster/study/h5.py +58 -40
  16. masster/study/helpers.py +450 -196
  17. masster/study/helpers_optimized.py +5 -5
  18. masster/study/load.py +144 -118
  19. masster/study/plot.py +691 -277
  20. masster/study/processing.py +9 -5
  21. masster/study/study.py +6 -6
  22. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/METADATA +1 -1
  23. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/RECORD +31 -25
  24. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
  25. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  26. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  27. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  28. /masster/data/{examples → wiff}/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  29. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/WHEEL +0 -0
  30. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/entry_points.txt +0 -0
  31. {masster-0.3.18.dist-info → masster-0.3.20.dist-info}/licenses/LICENSE +0 -0
masster/study/helpers.py CHANGED
@@ -6,7 +6,7 @@ like data retrieval, filtering, compression, and utility functions.
6
6
 
7
7
  The functions are organized into the following sections:
8
8
  1. Chromatogram extraction functions (BPC, TIC, EIC, chrom matrix)
9
- 2. Data retrieval helper functions (get_sample, get_consensus, etc.)
9
+ 2. Data retrieval helper functions (get_sample, get_consensus, etc.)
10
10
  3. UID helper functions (_get_*_uids)
11
11
  4. Data filtering and selection functions
12
12
  5. Data compression and restoration functions
@@ -150,9 +150,19 @@ def get_bpc(owner, sample=None, rt_unit="s", label=None, original=False):
150
150
  # build Chromatogram
151
151
  ycol = "inty"
152
152
  try:
153
- chrom = Chromatogram(rt=bpc_pd["rt"].to_numpy(), inty=bpc_pd[ycol].to_numpy(), label=label or "Base Peak Chromatogram", rt_unit=rt_unit)
153
+ chrom = Chromatogram(
154
+ rt=bpc_pd["rt"].to_numpy(),
155
+ inty=bpc_pd[ycol].to_numpy(),
156
+ label=label or "Base Peak Chromatogram",
157
+ rt_unit=rt_unit,
158
+ )
154
159
  except Exception:
155
- chrom = Chromatogram(rt=bpc_pd["rt"].values, inty=bpc_pd[ycol].values, label=label or "Base Peak Chromatogram", rt_unit=rt_unit)
160
+ chrom = Chromatogram(
161
+ rt=bpc_pd["rt"].values,
162
+ inty=bpc_pd[ycol].values,
163
+ label=label or "Base Peak Chromatogram",
164
+ rt_unit=rt_unit,
165
+ )
156
166
 
157
167
  return chrom
158
168
 
@@ -204,13 +214,21 @@ def get_tic(owner, sample=None, label=None):
204
214
  tic_pd = tic_pd.rename(columns={tic_pd.columns[1]: "inty_tot"})
205
215
 
206
216
  try:
207
- chrom = Chromatogram(rt=tic_pd["rt"].to_numpy(), inty=tic_pd["inty_tot"].to_numpy(), label=label or "Total Ion Chromatogram")
217
+ chrom = Chromatogram(
218
+ rt=tic_pd["rt"].to_numpy(),
219
+ inty=tic_pd["inty_tot"].to_numpy(),
220
+ label=label or "Total Ion Chromatogram",
221
+ )
208
222
  except Exception:
209
- chrom = Chromatogram(rt=tic_pd["rt"].values, inty=tic_pd["inty_tot"].values, label=label or "Total Ion Chromatogram")
223
+ chrom = Chromatogram(
224
+ rt=tic_pd["rt"].values,
225
+ inty=tic_pd["inty_tot"].values,
226
+ label=label or "Total Ion Chromatogram",
227
+ )
210
228
 
211
229
  return chrom
212
230
 
213
-
231
+
214
232
  def get_eic(owner, sample=None, mz=None, mz_tol=None, rt_unit="s", label=None):
215
233
  """
216
234
  Return a Chromatogram object containing the Extracted Ion Chromatogram (EIC) for a target m/z.
@@ -223,7 +241,7 @@ def get_eic(owner, sample=None, mz=None, mz_tol=None, rt_unit="s", label=None):
223
241
 
224
242
  Parameters:
225
243
  owner: Study or Sample instance
226
- sample: Sample identifier (required if owner is Study)
244
+ sample: Sample identifier (required if owner is Study)
227
245
  mz (float): Target m/z value
228
246
  mz_tol (float): m/z tolerance. If None, uses owner.parameters.eic_mz_tol (for Study) or defaults to 0.01
229
247
  rt_unit (str): Retention time unit for the chromatogram
@@ -234,7 +252,7 @@ def get_eic(owner, sample=None, mz=None, mz_tol=None, rt_unit="s", label=None):
234
252
  """
235
253
  # Use default mz_tol from study parameters if not provided
236
254
  if mz_tol is None:
237
- if hasattr(owner, 'parameters') and hasattr(owner.parameters, 'eic_mz_tol'):
255
+ if hasattr(owner, "parameters") and hasattr(owner.parameters, "eic_mz_tol"):
238
256
  mz_tol = owner.parameters.eic_mz_tol
239
257
  else:
240
258
  mz_tol = 0.01 # fallback default
@@ -267,17 +285,18 @@ def get_eic(owner, sample=None, mz=None, mz_tol=None, rt_unit="s", label=None):
267
285
  mz_min = mz - mz_tol
268
286
  mz_max = mz + mz_tol
269
287
  eic_data = s.ms1_df.filter(
270
- (pl.col("mz") >= mz_min) & (pl.col("mz") <= mz_max)
288
+ (pl.col("mz") >= mz_min) & (pl.col("mz") <= mz_max),
271
289
  )
272
290
 
273
291
  if eic_data.is_empty():
274
292
  # Return empty chromatogram if no data found
275
293
  import numpy as _np
294
+
276
295
  return Chromatogram(
277
- rt=_np.array([0.0]),
278
- inty=_np.array([0.0]),
296
+ rt=_np.array([0.0]),
297
+ inty=_np.array([0.0]),
279
298
  label=label or f"EIC m/z={mz:.4f} ± {mz_tol} (empty)",
280
- rt_unit=rt_unit
299
+ rt_unit=rt_unit,
281
300
  )
282
301
 
283
302
  # Aggregate intensities per retention time (sum in case of multiple points per rt)
@@ -290,34 +309,35 @@ def get_eic(owner, sample=None, mz=None, mz_tol=None, rt_unit="s", label=None):
290
309
  if eic_pd.empty:
291
310
  # Return empty chromatogram if no data found
292
311
  import numpy as _np
312
+
293
313
  return Chromatogram(
294
- rt=_np.array([0.0]),
295
- inty=_np.array([0.0]),
314
+ rt=_np.array([0.0]),
315
+ inty=_np.array([0.0]),
296
316
  label=label or f"EIC m/z={mz:.4f} ± {mz_tol} (empty)",
297
- rt_unit=rt_unit
317
+ rt_unit=rt_unit,
298
318
  )
299
319
 
300
320
  # build Chromatogram
301
321
  try:
302
322
  chrom = Chromatogram(
303
- rt=eic_pd["rt"].to_numpy(),
304
- inty=eic_pd["inty"].to_numpy(),
323
+ rt=eic_pd["rt"].to_numpy(),
324
+ inty=eic_pd["inty"].to_numpy(),
305
325
  label=label or f"EIC m/z={mz:.4f} ± {mz_tol}",
306
- rt_unit=rt_unit
326
+ rt_unit=rt_unit,
307
327
  )
308
328
  except Exception:
309
329
  chrom = Chromatogram(
310
- rt=eic_pd["rt"].values,
311
- inty=eic_pd["inty"].values,
330
+ rt=eic_pd["rt"].values,
331
+ inty=eic_pd["inty"].values,
312
332
  label=label or f"EIC m/z={mz:.4f} ± {mz_tol}",
313
- rt_unit=rt_unit
333
+ rt_unit=rt_unit,
314
334
  )
315
335
 
316
336
  return chrom
317
337
 
318
338
 
319
339
  # =====================================================================================
320
- # DATA RETRIEVAL AND MATRIX FUNCTIONS
340
+ # DATA RETRIEVAL AND MATRIX FUNCTIONS
321
341
  # =====================================================================================
322
342
 
323
343
 
@@ -451,9 +471,9 @@ def align_reset(self):
451
471
  self.alignment_ref_index = None
452
472
  # in self.features_df, set rt equal to rt_original
453
473
  self.features_df = self.features_df.with_columns(
454
- pl.col("rt_original").alias("rt")
474
+ pl.col("rt_original").alias("rt"),
455
475
  )
456
-
476
+
457
477
  # Ensure column order is maintained after with_columns operation
458
478
  self._ensure_features_df_schema_order()
459
479
 
@@ -614,7 +634,7 @@ def get_consensus_matches(self, uids=None):
614
634
  return matches
615
635
 
616
636
 
617
- # =====================================================================================
637
+ # =====================================================================================
618
638
  # UID HELPER FUNCTIONS
619
639
  # =====================================================================================
620
640
 
@@ -796,7 +816,7 @@ def get_sample(self, sample):
796
816
  return cache[sample_uid]
797
817
 
798
818
  sample_path = row.get("sample_path", None)
799
- s = Sample(log_level='ERROR')
819
+ s = Sample(log_level="ERROR")
800
820
  try:
801
821
  if sample_path:
802
822
  try:
@@ -816,13 +836,13 @@ def get_orphans(self):
816
836
  Get all features that are not in the consensus mapping.
817
837
  """
818
838
  not_in_consensus = self.features_df.filter(
819
- ~self.features_df["feature_uid"].is_in(self.consensus_mapping_df["feature_uid"].to_list())
839
+ ~self.features_df["feature_uid"].is_in(self.consensus_mapping_df["feature_uid"].to_list()),
820
840
  )
821
841
  return not_in_consensus
822
842
 
823
843
 
824
844
  # =====================================================================================
825
- # DATA COMPRESSION AND RESTORATION FUNCTIONS
845
+ # DATA COMPRESSION AND RESTORATION FUNCTIONS
826
846
  # =====================================================================================
827
847
 
828
848
 
@@ -878,7 +898,7 @@ def compress_features(self):
878
898
 
879
899
  removed_count = initial_count - len(self.features_df)
880
900
  self.logger.info(
881
- f"Compressed features: removed {removed_count} features not in consensus, cleared ms2_specs column"
901
+ f"Compressed features: removed {removed_count} features not in consensus, cleared ms2_specs column",
882
902
  )
883
903
 
884
904
 
@@ -949,13 +969,20 @@ def restore_features(self, samples=None, maps=False):
949
969
  # Load sample to get its features_df
950
970
  # Use a direct load call with map=False to prevent feature synchronization
951
971
  # which would remove filled features that don't exist in the original FeatureMap
952
- sample = Sample(log_level="DEBUG")
972
+ # Use ERROR log level to suppress info messages
973
+ sample = Sample(log_level="ERROR")
953
974
  sample._load_sample5(sample_path, map=False)
954
975
 
955
976
  if sample.features_df is None or sample.features_df.is_empty():
956
977
  self.logger.warning(f"No features found in sample {sample_name}")
957
978
  continue
958
979
 
980
+ # Check which columns are actually available in the sample
981
+ available_columns = [col for col in columns_to_update if col in sample.features_df.columns]
982
+ if not available_columns:
983
+ self.logger.debug(f"No target columns found in sample {sample_name}")
984
+ continue
985
+
959
986
  # Create update data for this sample
960
987
  updates_made = 0
961
988
  for row in sample.features_df.iter_rows(named=True):
@@ -967,8 +994,8 @@ def restore_features(self, samples=None, maps=False):
967
994
  if key in study_feature_mapping:
968
995
  feature_uid = study_feature_mapping[key]
969
996
 
970
- # Update the specific columns in study.features_df
971
- for col in columns_to_update:
997
+ # Update only the available columns in study.features_df
998
+ for col in available_columns:
972
999
  if col in row and col in self.features_df.columns:
973
1000
  # Get the original column dtype to preserve it
974
1001
  original_dtype = self.features_df[col].dtype
@@ -993,7 +1020,8 @@ def restore_features(self, samples=None, maps=False):
993
1020
  )
994
1021
  updates_made += 1
995
1022
 
996
- self.logger.debug(f"Updated {updates_made} features from sample {sample_name}")
1023
+ if updates_made > 0:
1024
+ self.logger.debug(f"Updated {updates_made} features from sample {sample_name}")
997
1025
 
998
1026
  # If maps is True, load featureXML data
999
1027
  if maps:
@@ -1076,13 +1104,18 @@ def restore_chrom(self, samples=None, mz_tol=0.010, rt_tol=10.0):
1076
1104
 
1077
1105
  try:
1078
1106
  # Load sample (with map=False to prevent feature synchronization)
1079
- sample = Sample(log_level="WARNING")
1107
+ # Use ERROR log level to suppress info messages
1108
+ sample = Sample(log_level="ERROR")
1080
1109
  sample._load_sample5(sample_path, map=False)
1081
1110
 
1082
1111
  if sample.features_df is None or sample.features_df.is_empty():
1083
1112
  self.logger.warning(f"No features found in sample {sample_name}")
1084
1113
  continue
1085
1114
 
1115
+ # Check if chrom column exists in sample
1116
+ if "chrom" not in sample.features_df.columns:
1117
+ continue
1118
+
1086
1119
  # Update chromatograms from this sample
1087
1120
  for row in sample.features_df.iter_rows(named=True):
1088
1121
  feature_id = row.get("feature_id")
@@ -1119,7 +1152,7 @@ def restore_chrom(self, samples=None, mz_tol=0.010, rt_tol=10.0):
1119
1152
  total_chroms = len(self.features_df)
1120
1153
 
1121
1154
  self.logger.debug(
1122
- f"Chromatograms still missing: {empty_chroms}/{total_chroms} ({empty_chroms / total_chroms * 100:.1f}%)"
1155
+ f"Chromatograms still missing: {empty_chroms}/{total_chroms} ({empty_chroms / total_chroms * 100:.1f}%)",
1123
1156
  )
1124
1157
 
1125
1158
  if empty_chroms == 0:
@@ -1163,7 +1196,8 @@ def restore_chrom(self, samples=None, mz_tol=0.010, rt_tol=10.0):
1163
1196
 
1164
1197
  try:
1165
1198
  # Load sample for MS1 data extraction
1166
- sample = Sample(log_level="WARNING")
1199
+ # Use ERROR log level to suppress info messages
1200
+ sample = Sample(log_level="ERROR")
1167
1201
  sample._load_sample5(sample_path, map=False)
1168
1202
 
1169
1203
  if not hasattr(sample, "ms1_df") or sample.ms1_df is None or sample.ms1_df.is_empty():
@@ -1249,7 +1283,7 @@ def restore_chrom(self, samples=None, mz_tol=0.010, rt_tol=10.0):
1249
1283
  final_total = len(self.features_df)
1250
1284
 
1251
1285
  self.logger.info(
1252
- f"Chromatogram restoration complete: {final_non_null}/{final_total} ({final_non_null / final_total * 100:.1f}%)"
1286
+ f"Chromatogram restoration complete: {final_non_null}/{final_total} ({final_non_null / final_total * 100:.1f}%)",
1253
1287
  )
1254
1288
  self.logger.info(f"Restored from .sample5 files: {restored_count}, Gap-filled from raw data: {filled_count}")
1255
1289
 
@@ -1290,7 +1324,7 @@ def compress_ms2(self, max_replicates=5):
1290
1324
 
1291
1325
  removed_count = initial_count - len(self.consensus_ms2)
1292
1326
  self.logger.info(
1293
- f"Compressed MS2 data: removed {removed_count} entries, kept max {max_replicates} per consensus/energy pair"
1327
+ f"Compressed MS2 data: removed {removed_count} entries, kept max {max_replicates} per consensus/energy pair",
1294
1328
  )
1295
1329
 
1296
1330
 
@@ -1328,14 +1362,14 @@ def compress_chrom(self):
1328
1362
  def sample_name_replace(self, replace_dict):
1329
1363
  """
1330
1364
  Replace sample names in samples_df based on a dictionary mapping.
1331
-
1332
- Takes all names in self.samples_df['sample_name'], creates a copy, and replaces
1333
- all keys with their corresponding values from replace_dict. Checks that all
1365
+
1366
+ Takes all names in self.samples_df['sample_name'], creates a copy, and replaces
1367
+ all keys with their corresponding values from replace_dict. Checks that all
1334
1368
  resulting sample names are unique. If unique, replaces the values in self.samples_df.
1335
1369
 
1336
1370
  Parameters:
1337
1371
  replace_dict (dict): Dictionary mapping old names (keys) to new names (values).
1338
- All keys found in sample names will be replaced with their
1372
+ All keys found in sample names will be replaced with their
1339
1373
  corresponding values.
1340
1374
  e.g., {"old_name1": "new_name1", "old_name2": "new_name2"}
1341
1375
 
@@ -1348,22 +1382,22 @@ def sample_name_replace(self, replace_dict):
1348
1382
  """
1349
1383
  if not isinstance(replace_dict, dict):
1350
1384
  raise ValueError("replace_dict must be a dictionary")
1351
-
1385
+
1352
1386
  if self.samples_df is None or len(self.samples_df) == 0:
1353
1387
  self.logger.warning("No samples found in study.")
1354
1388
  return
1355
-
1389
+
1356
1390
  if not replace_dict:
1357
1391
  self.logger.warning("Empty replace_dict provided, no changes made.")
1358
1392
  return
1359
1393
 
1360
1394
  # Get current sample names
1361
1395
  current_names = self.samples_df.get_column("sample_name").to_list()
1362
-
1396
+
1363
1397
  # Create a copy and apply replacements
1364
1398
  new_names = []
1365
1399
  replaced_count = 0
1366
-
1400
+
1367
1401
  for name in current_names:
1368
1402
  if name in replace_dict:
1369
1403
  new_names.append(replace_dict[name])
@@ -1371,7 +1405,7 @@ def sample_name_replace(self, replace_dict):
1371
1405
  self.logger.debug(f"Replacing sample name: '{name}' -> '{replace_dict[name]}'")
1372
1406
  else:
1373
1407
  new_names.append(name)
1374
-
1408
+
1375
1409
  # Check that all new names are unique
1376
1410
  if len(set(new_names)) != len(new_names):
1377
1411
  duplicates = []
@@ -1382,19 +1416,19 @@ def sample_name_replace(self, replace_dict):
1382
1416
  else:
1383
1417
  seen.add(name)
1384
1418
  raise ValueError(f"Resulting sample names are not unique. Duplicates found: {duplicates}")
1385
-
1419
+
1386
1420
  # If we get here, all names are unique - apply the changes
1387
1421
  self.samples_df = self.samples_df.with_columns(
1388
1422
  pl.Series("sample_name", new_names).alias("sample_name"),
1389
1423
  )
1390
-
1424
+
1391
1425
  self.logger.info(f"Successfully replaced {replaced_count} sample names")
1392
1426
 
1393
1427
 
1394
1428
  def sample_name_reset(self):
1395
1429
  """
1396
1430
  Reset sample names to the basename of sample_path without extensions.
1397
-
1431
+
1398
1432
  Takes all paths in self.samples_df['sample_path'], extracts the basename,
1399
1433
  removes file extensions, and checks that all resulting names are unique.
1400
1434
  If unique, replaces the values in self.samples_df['sample_name'].
@@ -1407,31 +1441,31 @@ def sample_name_reset(self):
1407
1441
  RuntimeError: If any sample_path is None or empty
1408
1442
  """
1409
1443
  import os
1410
-
1444
+
1411
1445
  if self.samples_df is None or len(self.samples_df) == 0:
1412
1446
  self.logger.warning("No samples found in study.")
1413
1447
  return
1414
1448
 
1415
1449
  # Get current sample paths
1416
1450
  sample_paths = self.samples_df.get_column("sample_path").to_list()
1417
-
1451
+
1418
1452
  # Extract basenames without extensions
1419
1453
  new_names = []
1420
-
1454
+
1421
1455
  for i, path in enumerate(sample_paths):
1422
1456
  if path is None or path == "":
1423
1457
  raise RuntimeError(f"Sample at index {i} has no sample_path set")
1424
-
1458
+
1425
1459
  # Get basename and remove extension(s)
1426
1460
  basename = os.path.basename(path)
1427
1461
  # Remove all extensions (handles cases like .tar.gz, .sample5.gz, etc.)
1428
1462
  name_without_ext = basename
1429
- while '.' in name_without_ext:
1463
+ while "." in name_without_ext:
1430
1464
  name_without_ext = os.path.splitext(name_without_ext)[0]
1431
-
1465
+
1432
1466
  new_names.append(name_without_ext)
1433
1467
  self.logger.debug(f"Resetting sample name from path: '{path}' -> '{name_without_ext}'")
1434
-
1468
+
1435
1469
  # Check that all new names are unique
1436
1470
  if len(set(new_names)) != len(new_names):
1437
1471
  duplicates = []
@@ -1442,12 +1476,12 @@ def sample_name_reset(self):
1442
1476
  else:
1443
1477
  seen.add(name)
1444
1478
  raise ValueError(f"Resulting sample names are not unique. Duplicates found: {duplicates}")
1445
-
1479
+
1446
1480
  # If we get here, all names are unique - apply the changes
1447
1481
  self.samples_df = self.samples_df.with_columns(
1448
1482
  pl.Series("sample_name", new_names).alias("sample_name"),
1449
1483
  )
1450
-
1484
+
1451
1485
  self.logger.info(f"Successfully reset {len(new_names)} sample names from sample paths")
1452
1486
 
1453
1487
 
@@ -1704,7 +1738,7 @@ def features_select(
1704
1738
  if isinstance(chrom_coherence, tuple) and len(chrom_coherence) == 2:
1705
1739
  min_coherence, max_coherence = chrom_coherence
1706
1740
  filter_conditions.append(
1707
- (pl.col("chrom_coherence") >= min_coherence) & (pl.col("chrom_coherence") <= max_coherence)
1741
+ (pl.col("chrom_coherence") >= min_coherence) & (pl.col("chrom_coherence") <= max_coherence),
1708
1742
  )
1709
1743
  else:
1710
1744
  filter_conditions.append(pl.col("chrom_coherence") >= chrom_coherence)
@@ -1717,7 +1751,7 @@ def features_select(
1717
1751
  if isinstance(chrom_prominence, tuple) and len(chrom_prominence) == 2:
1718
1752
  min_prominence, max_prominence = chrom_prominence
1719
1753
  filter_conditions.append(
1720
- (pl.col("chrom_prominence") >= min_prominence) & (pl.col("chrom_prominence") <= max_prominence)
1754
+ (pl.col("chrom_prominence") >= min_prominence) & (pl.col("chrom_prominence") <= max_prominence),
1721
1755
  )
1722
1756
  else:
1723
1757
  filter_conditions.append(pl.col("chrom_prominence") >= chrom_prominence)
@@ -1731,7 +1765,7 @@ def features_select(
1731
1765
  min_prominence_scaled, max_prominence_scaled = chrom_prominence_scaled
1732
1766
  filter_conditions.append(
1733
1767
  (pl.col("chrom_prominence_scaled") >= min_prominence_scaled)
1734
- & (pl.col("chrom_prominence_scaled") <= max_prominence_scaled)
1768
+ & (pl.col("chrom_prominence_scaled") <= max_prominence_scaled),
1735
1769
  )
1736
1770
  else:
1737
1771
  filter_conditions.append(pl.col("chrom_prominence_scaled") >= chrom_prominence_scaled)
@@ -1745,7 +1779,7 @@ def features_select(
1745
1779
  min_height_scaled, max_height_scaled = chrom_height_scaled
1746
1780
  filter_conditions.append(
1747
1781
  (pl.col("chrom_height_scaled") >= min_height_scaled)
1748
- & (pl.col("chrom_height_scaled") <= max_height_scaled)
1782
+ & (pl.col("chrom_height_scaled") <= max_height_scaled),
1749
1783
  )
1750
1784
  else:
1751
1785
  filter_conditions.append(pl.col("chrom_height_scaled") >= chrom_height_scaled)
@@ -1852,7 +1886,7 @@ def features_filter(self, features):
1852
1886
  # Single comprehensive log message
1853
1887
  if mapping_removed_count > 0:
1854
1888
  self.logger.info(
1855
- f"Kept {final_count} features and removed {mapping_removed_count} consensus mappings. Filtered out {removed_count} features."
1889
+ f"Kept {final_count} features and removed {mapping_removed_count} consensus mappings. Filtered out {removed_count} features.",
1856
1890
  )
1857
1891
  else:
1858
1892
  self.logger.info(f"Kept {final_count} features. Filtered out {removed_count} features.")
@@ -1929,7 +1963,7 @@ def features_delete(self, features):
1929
1963
  # Single comprehensive log message
1930
1964
  if mapping_removed_count > 0:
1931
1965
  self.logger.info(
1932
- f"Deleted {removed_count} features and {mapping_removed_count} consensus mappings. Remaining features: {final_count}"
1966
+ f"Deleted {removed_count} features and {mapping_removed_count} consensus mappings. Remaining features: {final_count}",
1933
1967
  )
1934
1968
  else:
1935
1969
  self.logger.info(f"Deleted {removed_count} features. Remaining features: {final_count}")
@@ -1994,7 +2028,7 @@ def consensus_select(
1994
2028
  # Filter by m/z
1995
2029
  if mz is not None:
1996
2030
  consensus_len_before_filter = len(consensus)
1997
-
2031
+
1998
2032
  if isinstance(mz, tuple) and len(mz) == 2:
1999
2033
  # Check if second value is smaller than first (indicating mz, mz_tol format)
2000
2034
  if mz[1] < mz[0]:
@@ -2008,18 +2042,19 @@ def consensus_select(
2008
2042
  consensus = consensus.filter((pl.col("mz") >= min_mz) & (pl.col("mz") <= max_mz))
2009
2043
  else:
2010
2044
  # Single float value - use default mz tolerance from study parameters
2011
- default_mz_tol = getattr(self, 'parameters', None)
2012
- if default_mz_tol and hasattr(default_mz_tol, 'eic_mz_tol'):
2045
+ default_mz_tol = getattr(self, "parameters", None)
2046
+ if default_mz_tol and hasattr(default_mz_tol, "eic_mz_tol"):
2013
2047
  default_mz_tol = default_mz_tol.eic_mz_tol
2014
2048
  else:
2015
2049
  # Fallback to align_defaults if study parameters not available
2016
2050
  from masster.study.defaults.align_def import align_defaults
2051
+
2017
2052
  default_mz_tol = align_defaults().mz_max_diff
2018
-
2053
+
2019
2054
  min_mz = mz - default_mz_tol
2020
2055
  max_mz = mz + default_mz_tol
2021
2056
  consensus = consensus.filter((pl.col("mz") >= min_mz) & (pl.col("mz") <= max_mz))
2022
-
2057
+
2023
2058
  self.logger.debug(
2024
2059
  f"Selected consensus by mz. Consensus removed: {consensus_len_before_filter - len(consensus)}",
2025
2060
  )
@@ -2027,7 +2062,7 @@ def consensus_select(
2027
2062
  # Filter by retention time
2028
2063
  if rt is not None:
2029
2064
  consensus_len_before_filter = len(consensus)
2030
-
2065
+
2031
2066
  if isinstance(rt, tuple) and len(rt) == 2:
2032
2067
  # Check if second value is smaller than first (indicating rt, rt_tol format)
2033
2068
  if rt[1] < rt[0]:
@@ -2041,18 +2076,19 @@ def consensus_select(
2041
2076
  consensus = consensus.filter((pl.col("rt") >= min_rt) & (pl.col("rt") <= max_rt))
2042
2077
  else:
2043
2078
  # Single float value - use default rt tolerance from study parameters
2044
- default_rt_tol = getattr(self, 'parameters', None)
2045
- if default_rt_tol and hasattr(default_rt_tol, 'eic_rt_tol'):
2079
+ default_rt_tol = getattr(self, "parameters", None)
2080
+ if default_rt_tol and hasattr(default_rt_tol, "eic_rt_tol"):
2046
2081
  default_rt_tol = default_rt_tol.eic_rt_tol
2047
2082
  else:
2048
2083
  # Fallback to align_defaults if study parameters not available
2049
2084
  from masster.study.defaults.align_def import align_defaults
2085
+
2050
2086
  default_rt_tol = align_defaults().rt_max_diff
2051
-
2087
+
2052
2088
  min_rt = rt - default_rt_tol
2053
2089
  max_rt = rt + default_rt_tol
2054
2090
  consensus = consensus.filter((pl.col("rt") >= min_rt) & (pl.col("rt") <= max_rt))
2055
-
2091
+
2056
2092
  self.logger.debug(
2057
2093
  f"Selected consensus by rt. Consensus removed: {consensus_len_before_filter - len(consensus)}",
2058
2094
  )
@@ -2077,7 +2113,7 @@ def consensus_select(
2077
2113
  # Treat as range
2078
2114
  min_uid, max_uid = consensus_uid
2079
2115
  consensus = consensus.filter(
2080
- (pl.col("consensus_uid") >= min_uid) & (pl.col("consensus_uid") <= max_uid)
2116
+ (pl.col("consensus_uid") >= min_uid) & (pl.col("consensus_uid") <= max_uid),
2081
2117
  )
2082
2118
  else:
2083
2119
  # Treat as list
@@ -2105,7 +2141,7 @@ def consensus_select(
2105
2141
  if isinstance(number_samples, tuple) and len(number_samples) == 2:
2106
2142
  min_samples, max_samples = number_samples
2107
2143
  consensus = consensus.filter(
2108
- (pl.col("number_samples") >= min_samples) & (pl.col("number_samples") <= max_samples)
2144
+ (pl.col("number_samples") >= min_samples) & (pl.col("number_samples") <= max_samples),
2109
2145
  )
2110
2146
  else:
2111
2147
  consensus = consensus.filter(pl.col("number_samples") >= number_samples)
@@ -2163,7 +2199,7 @@ def consensus_select(
2163
2199
  min_coherence, max_coherence = chrom_coherence_mean
2164
2200
  consensus = consensus.filter(
2165
2201
  (pl.col("chrom_coherence_mean") >= min_coherence)
2166
- & (pl.col("chrom_coherence_mean") <= max_coherence)
2202
+ & (pl.col("chrom_coherence_mean") <= max_coherence),
2167
2203
  )
2168
2204
  else:
2169
2205
  consensus = consensus.filter(pl.col("chrom_coherence_mean") >= chrom_coherence_mean)
@@ -2181,7 +2217,7 @@ def consensus_select(
2181
2217
  min_prominence, max_prominence = chrom_prominence_mean
2182
2218
  consensus = consensus.filter(
2183
2219
  (pl.col("chrom_prominence_mean") >= min_prominence)
2184
- & (pl.col("chrom_prominence_mean") <= max_prominence)
2220
+ & (pl.col("chrom_prominence_mean") <= max_prominence),
2185
2221
  )
2186
2222
  else:
2187
2223
  consensus = consensus.filter(pl.col("chrom_prominence_mean") >= chrom_prominence_mean)
@@ -2199,7 +2235,7 @@ def consensus_select(
2199
2235
  min_prominence_scaled, max_prominence_scaled = chrom_prominence_scaled_mean
2200
2236
  consensus = consensus.filter(
2201
2237
  (pl.col("chrom_prominence_scaled_mean") >= min_prominence_scaled)
2202
- & (pl.col("chrom_prominence_scaled_mean") <= max_prominence_scaled)
2238
+ & (pl.col("chrom_prominence_scaled_mean") <= max_prominence_scaled),
2203
2239
  )
2204
2240
  else:
2205
2241
  consensus = consensus.filter(pl.col("chrom_prominence_scaled_mean") >= chrom_prominence_scaled_mean)
@@ -2217,7 +2253,7 @@ def consensus_select(
2217
2253
  min_height_scaled, max_height_scaled = chrom_height_scaled_mean
2218
2254
  consensus = consensus.filter(
2219
2255
  (pl.col("chrom_height_scaled_mean") >= min_height_scaled)
2220
- & (pl.col("chrom_height_scaled_mean") <= max_height_scaled)
2256
+ & (pl.col("chrom_height_scaled_mean") <= max_height_scaled),
2221
2257
  )
2222
2258
  else:
2223
2259
  consensus = consensus.filter(pl.col("chrom_height_scaled_mean") >= chrom_height_scaled_mean)
@@ -2234,7 +2270,7 @@ def consensus_select(
2234
2270
  if isinstance(rt_delta_mean, tuple) and len(rt_delta_mean) == 2:
2235
2271
  min_rt_delta, max_rt_delta = rt_delta_mean
2236
2272
  consensus = consensus.filter(
2237
- (pl.col("rt_delta_mean") >= min_rt_delta) & (pl.col("rt_delta_mean") <= max_rt_delta)
2273
+ (pl.col("rt_delta_mean") >= min_rt_delta) & (pl.col("rt_delta_mean") <= max_rt_delta),
2238
2274
  )
2239
2275
  else:
2240
2276
  consensus = consensus.filter(pl.col("rt_delta_mean") >= rt_delta_mean)
@@ -2261,10 +2297,10 @@ def consensus_select(
2261
2297
  # Multiple columns
2262
2298
  valid_columns = [col for col in sortby if col in consensus.columns]
2263
2299
  invalid_columns = [col for col in sortby if col not in consensus.columns]
2264
-
2300
+
2265
2301
  if invalid_columns:
2266
2302
  self.logger.warning(f"Sort columns not found in consensus DataFrame: {invalid_columns}")
2267
-
2303
+
2268
2304
  if valid_columns:
2269
2305
  consensus = consensus.sort(valid_columns, descending=descending)
2270
2306
  else:
@@ -2355,7 +2391,7 @@ def consensus_filter(self, consensus):
2355
2391
 
2356
2392
  removed_consensus_count = initial_consensus_count - len(self.consensus_df)
2357
2393
  self.logger.info(
2358
- f"Filtered {removed_consensus_count} consensus features. Remaining consensus: {len(self.consensus_df)}"
2394
+ f"Filtered {removed_consensus_count} consensus features. Remaining consensus: {len(self.consensus_df)}",
2359
2395
  )
2360
2396
 
2361
2397
 
@@ -2485,7 +2521,9 @@ def samples_select(
2485
2521
  if len(sample_batch) == 2 and not isinstance(sample_batch, list):
2486
2522
  # Treat as range
2487
2523
  min_batch, max_batch = sample_batch
2488
- filter_conditions.append((pl.col("sample_batch") >= min_batch) & (pl.col("sample_batch") <= max_batch))
2524
+ filter_conditions.append(
2525
+ (pl.col("sample_batch") >= min_batch) & (pl.col("sample_batch") <= max_batch),
2526
+ )
2489
2527
  else:
2490
2528
  # Treat as list
2491
2529
  filter_conditions.append(pl.col("sample_batch").is_in(sample_batch))
@@ -2501,7 +2539,9 @@ def samples_select(
2501
2539
  if len(sample_sequence) == 2 and not isinstance(sample_sequence, list):
2502
2540
  # Treat as range
2503
2541
  min_seq, max_seq = sample_sequence
2504
- filter_conditions.append((pl.col("sample_sequence") >= min_seq) & (pl.col("sample_sequence") <= max_seq))
2542
+ filter_conditions.append(
2543
+ (pl.col("sample_sequence") >= min_seq) & (pl.col("sample_sequence") <= max_seq),
2544
+ )
2505
2545
  else:
2506
2546
  # Treat as list
2507
2547
  filter_conditions.append(pl.col("sample_sequence").is_in(sample_sequence))
@@ -2515,7 +2555,9 @@ def samples_select(
2515
2555
  if "num_features" in available_columns:
2516
2556
  if isinstance(num_features, tuple) and len(num_features) == 2:
2517
2557
  min_features, max_features = num_features
2518
- filter_conditions.append((pl.col("num_features") >= min_features) & (pl.col("num_features") <= max_features))
2558
+ filter_conditions.append(
2559
+ (pl.col("num_features") >= min_features) & (pl.col("num_features") <= max_features),
2560
+ )
2519
2561
  else:
2520
2562
  filter_conditions.append(pl.col("num_features") >= num_features)
2521
2563
  else:
@@ -2572,15 +2614,15 @@ def samples_select(
2572
2614
  def samples_delete(self, samples):
2573
2615
  """
2574
2616
  Delete samples and all related data from the study based on sample identifiers.
2575
-
2576
- This function eliminates all data related to the specified samples (and their sample_uids)
2617
+
2618
+ This function eliminates all data related to the specified samples (and their sample_uids)
2577
2619
  from all dataframes including:
2578
2620
  - samples_df: Removes the sample rows
2579
2621
  - features_df: Removes all features belonging to these samples
2580
2622
  - consensus_mapping_df: Removes mappings for features from these samples
2581
2623
  - consensus_ms2: Removes MS2 spectra for features from these samples
2582
2624
  - feature_maps: Removes the corresponding feature maps
2583
-
2625
+
2584
2626
  Also updates map_id values to maintain sequential indices after deletion.
2585
2627
 
2586
2628
  Parameters:
@@ -2642,10 +2684,10 @@ def samples_delete(self, samples):
2642
2684
 
2643
2685
  # Get map_ids to remove from feature_maps (needed before samples_df deletion)
2644
2686
  map_ids_to_remove = []
2645
- if hasattr(self, 'feature_maps') and self.feature_maps is not None:
2687
+ if hasattr(self, "feature_maps") and self.feature_maps is not None:
2646
2688
  # Get map_ids for samples to be deleted
2647
2689
  map_ids_df = self.samples_df.filter(
2648
- pl.col("sample_uid").is_in(sample_uids_to_remove)
2690
+ pl.col("sample_uid").is_in(sample_uids_to_remove),
2649
2691
  ).select("map_id")
2650
2692
  if not map_ids_df.is_empty():
2651
2693
  map_ids_to_remove = map_ids_df["map_id"].to_list()
@@ -2683,7 +2725,7 @@ def samples_delete(self, samples):
2683
2725
 
2684
2726
  # 5. Remove from feature_maps and update map_id
2685
2727
  removed_maps_count = 0
2686
- if hasattr(self, 'feature_maps') and self.feature_maps is not None and map_ids_to_remove:
2728
+ if hasattr(self, "feature_maps") and self.feature_maps is not None and map_ids_to_remove:
2687
2729
  # Remove feature maps in reverse order to maintain indices
2688
2730
  for map_id in sorted(map_ids_to_remove, reverse=True):
2689
2731
  if 0 <= map_id < len(self.feature_maps):
@@ -2694,7 +2736,7 @@ def samples_delete(self, samples):
2694
2736
  if len(self.samples_df) > 0:
2695
2737
  new_map_ids = list(range(len(self.samples_df)))
2696
2738
  self.samples_df = self.samples_df.with_columns(
2697
- pl.lit(new_map_ids).alias("map_id")
2739
+ pl.lit(new_map_ids).alias("map_id"),
2698
2740
  )
2699
2741
 
2700
2742
  # Calculate and log results
@@ -2705,16 +2747,16 @@ def samples_delete(self, samples):
2705
2747
  summary_parts = [
2706
2748
  f"Deleted {removed_sample_count} samples",
2707
2749
  ]
2708
-
2750
+
2709
2751
  if removed_features_count > 0:
2710
2752
  summary_parts.append(f"{removed_features_count} features")
2711
-
2753
+
2712
2754
  if removed_mapping_count > 0:
2713
2755
  summary_parts.append(f"{removed_mapping_count} consensus mappings")
2714
-
2756
+
2715
2757
  if removed_ms2_count > 0:
2716
2758
  summary_parts.append(f"{removed_ms2_count} MS2 spectra")
2717
-
2759
+
2718
2760
  if removed_maps_count > 0:
2719
2761
  summary_parts.append(f"{removed_maps_count} feature maps")
2720
2762
 
@@ -2735,14 +2777,14 @@ def samples_delete(self, samples):
2735
2777
  def sample_color(self, by=None, palette="Turbo256"):
2736
2778
  """
2737
2779
  Set sample colors in the sample_color column of samples_df.
2738
-
2780
+
2739
2781
  When a new sample is added, this function resets all colors picking from the specified palette.
2740
2782
  The default palette is Turbo256.
2741
2783
 
2742
2784
  Parameters:
2743
2785
  by (str or list, optional): Property to base colors on. Options:
2744
2786
  - 'sample_uid': Use sample_uid values to assign colors
2745
- - 'sample_index': Use sample index (position) to assign colors
2787
+ - 'sample_index': Use sample index (position) to assign colors
2746
2788
  - 'sample_type': Use sample_type values to assign colors
2747
2789
  - 'sample_name': Use sample_name values to assign colors
2748
2790
  - list of colors: Use provided list of hex color codes
@@ -2755,7 +2797,7 @@ def sample_color(self, by=None, palette="Turbo256"):
2755
2797
  - 'Magma256': Magma colormap (256 colors, perceptually uniform)
2756
2798
  - 'Cividis256': Cividis colormap (256 colors, colorblind-friendly)
2757
2799
  - 'Set1': Qualitative palette (9 distinct colors)
2758
- - 'Set2': Qualitative palette (8 distinct colors)
2800
+ - 'Set2': Qualitative palette (8 distinct colors)
2759
2801
  - 'Set3': Qualitative palette (12 distinct colors)
2760
2802
  - 'Tab10': Tableau 10 palette (10 distinct colors)
2761
2803
  - 'Tab20': Tableau 20 palette (20 distinct colors)
@@ -2766,7 +2808,7 @@ def sample_color(self, by=None, palette="Turbo256"):
2766
2808
  - 'Coolwarm': Cool-warm diverging colormap
2767
2809
  - 'Seismic': Seismic diverging colormap
2768
2810
  - Any other colormap name supported by the cmap library
2769
-
2811
+
2770
2812
  For a complete catalog of available colormaps, see:
2771
2813
  https://cmap-docs.readthedocs.io/en/latest/catalog/
2772
2814
 
@@ -2776,10 +2818,10 @@ def sample_color(self, by=None, palette="Turbo256"):
2776
2818
  Example:
2777
2819
  # Set colors based on sample type
2778
2820
  study.sample_color(by='sample_type', palette='Set1')
2779
-
2821
+
2780
2822
  # Set colors using a custom color list
2781
2823
  study.sample_color(by=['#FF0000', '#00FF00', '#0000FF'])
2782
-
2824
+
2783
2825
  # Reset to default Turbo256 sequential colors
2784
2826
  study.sample_color()
2785
2827
  """
@@ -2788,11 +2830,13 @@ def sample_color(self, by=None, palette="Turbo256"):
2788
2830
  return
2789
2831
 
2790
2832
  sample_count = len(self.samples_df)
2791
-
2833
+
2792
2834
  # Handle custom color list
2793
2835
  if isinstance(by, list):
2794
2836
  if len(by) < sample_count:
2795
- self.logger.warning(f"Provided color list has {len(by)} colors but {sample_count} samples. Repeating colors.")
2837
+ self.logger.warning(
2838
+ f"Provided color list has {len(by)} colors but {sample_count} samples. Repeating colors.",
2839
+ )
2796
2840
  # Cycle through the provided colors if there aren't enough
2797
2841
  colors = []
2798
2842
  for i in range(sample_count):
@@ -2808,10 +2852,10 @@ def sample_color(self, by=None, palette="Turbo256"):
2808
2852
  except ValueError as e:
2809
2853
  self.logger.error(f"Error sampling colors from colormap: {e}")
2810
2854
  return
2811
-
2812
- elif by == 'sample_uid':
2855
+
2856
+ elif by == "sample_uid":
2813
2857
  # Use sample_uid to determine position in evenly sampled colormap
2814
- sample_uids = self.samples_df['sample_uid'].to_list()
2858
+ sample_uids = self.samples_df["sample_uid"].to_list()
2815
2859
  try:
2816
2860
  # Sample colors evenly for the number of samples
2817
2861
  palette_colors = _sample_colors_from_colormap(palette, sample_count)
@@ -2823,29 +2867,29 @@ def sample_color(self, by=None, palette="Turbo256"):
2823
2867
  except ValueError as e:
2824
2868
  self.logger.error(f"Error sampling colors from colormap: {e}")
2825
2869
  return
2826
-
2827
- elif by == 'sample_index':
2870
+
2871
+ elif by == "sample_index":
2828
2872
  # Use sample index (position in DataFrame) with evenly sampled colors
2829
2873
  try:
2830
2874
  colors = _sample_colors_from_colormap(palette, sample_count)
2831
2875
  except ValueError as e:
2832
2876
  self.logger.error(f"Error sampling colors from colormap: {e}")
2833
2877
  return
2834
-
2835
- elif by == 'sample_type':
2878
+
2879
+ elif by == "sample_type":
2836
2880
  # Use sample_type to assign colors - same type gets same color
2837
2881
  # Sample colors evenly across colormap for unique types
2838
- sample_types = self.samples_df['sample_type'].to_list()
2839
- unique_types = list(set([t for t in sample_types if t is not None]))
2840
-
2882
+ sample_types = self.samples_df["sample_type"].to_list()
2883
+ unique_types = list({t for t in sample_types if t is not None})
2884
+
2841
2885
  try:
2842
2886
  # Sample colors evenly for unique types
2843
2887
  type_colors = _sample_colors_from_colormap(palette, len(unique_types))
2844
2888
  type_to_color = {}
2845
-
2889
+
2846
2890
  for i, sample_type in enumerate(unique_types):
2847
2891
  type_to_color[sample_type] = type_colors[i]
2848
-
2892
+
2849
2893
  colors = []
2850
2894
  for sample_type in sample_types:
2851
2895
  if sample_type is None:
@@ -2856,21 +2900,21 @@ def sample_color(self, by=None, palette="Turbo256"):
2856
2900
  except ValueError as e:
2857
2901
  self.logger.error(f"Error sampling colors from colormap: {e}")
2858
2902
  return
2859
-
2860
- elif by == 'sample_name':
2903
+
2904
+ elif by == "sample_name":
2861
2905
  # Use sample_name to assign colors - same name gets same color (unlikely but possible)
2862
2906
  # Sample colors evenly across colormap for unique names
2863
- sample_names = self.samples_df['sample_name'].to_list()
2864
- unique_names = list(set([n for n in sample_names if n is not None]))
2865
-
2907
+ sample_names = self.samples_df["sample_name"].to_list()
2908
+ unique_names = list({n for n in sample_names if n is not None})
2909
+
2866
2910
  try:
2867
2911
  # Sample colors evenly for unique names
2868
2912
  name_colors = _sample_colors_from_colormap(palette, len(unique_names))
2869
2913
  name_to_color = {}
2870
-
2914
+
2871
2915
  for i, sample_name in enumerate(unique_names):
2872
2916
  name_to_color[sample_name] = name_colors[i]
2873
-
2917
+
2874
2918
  colors = []
2875
2919
  for sample_name in sample_names:
2876
2920
  if sample_name is None:
@@ -2882,14 +2926,16 @@ def sample_color(self, by=None, palette="Turbo256"):
2882
2926
  self.logger.error(f"Error sampling colors from colormap: {e}")
2883
2927
  return
2884
2928
  else:
2885
- self.logger.error(f"Invalid by value: {by}. Must be 'sample_uid', 'sample_index', 'sample_type', 'sample_name', a list of colors, or None.")
2929
+ self.logger.error(
2930
+ f"Invalid by value: {by}. Must be 'sample_uid', 'sample_index', 'sample_type', 'sample_name', a list of colors, or None.",
2931
+ )
2886
2932
  return
2887
2933
 
2888
2934
  # Update the sample_color column
2889
2935
  self.samples_df = self.samples_df.with_columns(
2890
- pl.Series("sample_color", colors).alias("sample_color")
2936
+ pl.Series("sample_color", colors).alias("sample_color"),
2891
2937
  )
2892
-
2938
+
2893
2939
  if isinstance(by, list):
2894
2940
  self.logger.debug(f"Set sample colors using provided color list ({len(by)} colors)")
2895
2941
  elif by is None:
@@ -2901,28 +2947,28 @@ def sample_color(self, by=None, palette="Turbo256"):
2901
2947
  def sample_color_reset(self):
2902
2948
  """
2903
2949
  Reset sample colors to default coloring using the 'turbo' colormap.
2904
-
2950
+
2905
2951
  This function assigns colors by distributing samples evenly across the full
2906
2952
  turbo colormap range, ensuring maximum color diversity and visual distinction
2907
2953
  between samples.
2908
-
2954
+
2909
2955
  Returns:
2910
2956
  None (modifies self.samples_df in place)
2911
2957
  """
2912
2958
  if self.samples_df is None or len(self.samples_df) == 0:
2913
2959
  self.logger.warning("No samples found in study.")
2914
2960
  return
2915
-
2961
+
2916
2962
  try:
2917
2963
  from cmap import Colormap
2918
-
2964
+
2919
2965
  # Use turbo colormap
2920
- cm = Colormap('turbo')
2921
-
2966
+ cm = Colormap("turbo")
2967
+
2922
2968
  # Get sample count and assign colors evenly distributed across colormap
2923
2969
  n_samples = len(self.samples_df)
2924
2970
  colors = []
2925
-
2971
+
2926
2972
  # Distribute samples evenly across the full colormap range
2927
2973
  for i in range(n_samples):
2928
2974
  # Evenly distribute samples across colormap (avoiding endpoints to prevent white/black)
@@ -2930,9 +2976,9 @@ def sample_color_reset(self):
2930
2976
  # Optionally, map to a subset of colormap to avoid extreme colors
2931
2977
  # Use 10% to 90% of colormap range for better color diversity
2932
2978
  normalized_value = 0.1 + (normalized_value * 0.8)
2933
-
2979
+
2934
2980
  color_rgba = cm(normalized_value)
2935
-
2981
+
2936
2982
  # Convert RGBA to hex
2937
2983
  if len(color_rgba) >= 3:
2938
2984
  r, g, b = color_rgba[:3]
@@ -2941,14 +2987,14 @@ def sample_color_reset(self):
2941
2987
  r, g, b = int(r * 255), int(g * 255), int(b * 255)
2942
2988
  hex_color = f"#{r:02x}{g:02x}{b:02x}"
2943
2989
  colors.append(hex_color)
2944
-
2990
+
2945
2991
  # Update the sample_color column
2946
2992
  self.samples_df = self.samples_df.with_columns(
2947
- pl.Series("sample_color", colors).alias("sample_color")
2993
+ pl.Series("sample_color", colors).alias("sample_color"),
2948
2994
  )
2949
-
2995
+
2950
2996
  self.logger.debug(f"Reset sample colors using turbo colormap with even distribution ({n_samples} samples)")
2951
-
2997
+
2952
2998
  except ImportError:
2953
2999
  self.logger.error("cmap library is required for sample color reset. Install with: pip install cmap")
2954
3000
  except Exception as e:
@@ -2958,13 +3004,13 @@ def sample_color_reset(self):
2958
3004
  def _get_color_palette(palette_name):
2959
3005
  """
2960
3006
  Get color palette as a list of hex color codes using the cmap library.
2961
-
3007
+
2962
3008
  Parameters:
2963
3009
  palette_name (str): Name of the palette
2964
-
3010
+
2965
3011
  Returns:
2966
3012
  list: List of hex color codes
2967
-
3013
+
2968
3014
  Raises:
2969
3015
  ValueError: If palette_name is not supported
2970
3016
  """
@@ -2972,40 +3018,38 @@ def _get_color_palette(palette_name):
2972
3018
  from cmap import Colormap
2973
3019
  except ImportError:
2974
3020
  raise ValueError("cmap library is required for color palettes. Install with: pip install cmap")
2975
-
3021
+
2976
3022
  # Map common palette names to cmap names
2977
3023
  palette_mapping = {
2978
3024
  # Scientific colormaps
2979
3025
  "Turbo256": "turbo",
2980
- "Viridis256": "viridis",
3026
+ "Viridis256": "viridis",
2981
3027
  "Plasma256": "plasma",
2982
3028
  "Inferno256": "inferno",
2983
3029
  "Magma256": "magma",
2984
3030
  "Cividis256": "cividis",
2985
-
2986
3031
  # Qualitative palettes
2987
3032
  "Set1": "Set1",
2988
- "Set2": "Set2",
3033
+ "Set2": "Set2",
2989
3034
  "Set3": "Set3",
2990
3035
  "Tab10": "tab10",
2991
3036
  "Tab20": "tab20",
2992
3037
  "Dark2": "Dark2",
2993
3038
  "Paired": "Paired",
2994
-
2995
3039
  # Additional useful palettes
2996
3040
  "Spectral": "Spectral",
2997
3041
  "Rainbow": "rainbow",
2998
3042
  "Coolwarm": "coolwarm",
2999
3043
  "Seismic": "seismic",
3000
3044
  }
3001
-
3045
+
3002
3046
  # Get the cmap name
3003
3047
  cmap_name = palette_mapping.get(palette_name, palette_name.lower())
3004
-
3048
+
3005
3049
  try:
3006
3050
  # Create colormap
3007
3051
  cm = Colormap(cmap_name)
3008
-
3052
+
3009
3053
  # Determine number of colors to generate
3010
3054
  if "256" in palette_name:
3011
3055
  n_colors = 256
@@ -3021,7 +3065,7 @@ def _get_color_palette(palette_name):
3021
3065
  n_colors = 20
3022
3066
  else:
3023
3067
  n_colors = 256 # Default for continuous colormaps
3024
-
3068
+
3025
3069
  # Generate colors
3026
3070
  if n_colors <= 20:
3027
3071
  # For discrete palettes, use evenly spaced indices
@@ -3029,11 +3073,11 @@ def _get_color_palette(palette_name):
3029
3073
  else:
3030
3074
  # For continuous palettes, use full range
3031
3075
  indices = [i / (n_colors - 1) for i in range(n_colors)]
3032
-
3076
+
3033
3077
  # Get colors as RGBA and convert to hex
3034
3078
  colors = cm(indices)
3035
3079
  hex_colors = []
3036
-
3080
+
3037
3081
  for color in colors:
3038
3082
  if len(color) >= 3: # RGBA or RGB
3039
3083
  r, g, b = color[:3]
@@ -3042,25 +3086,26 @@ def _get_color_palette(palette_name):
3042
3086
  r, g, b = int(r * 255), int(g * 255), int(b * 255)
3043
3087
  hex_color = f"#{r:02x}{g:02x}{b:02x}"
3044
3088
  hex_colors.append(hex_color)
3045
-
3089
+
3046
3090
  return hex_colors
3047
-
3091
+
3048
3092
  except Exception as e:
3049
- raise ValueError(f"Failed to create colormap '{cmap_name}': {e}. "
3050
- f"Available palettes: {list(palette_mapping.keys())}")
3093
+ raise ValueError(
3094
+ f"Failed to create colormap '{cmap_name}': {e}. Available palettes: {list(palette_mapping.keys())}",
3095
+ )
3051
3096
 
3052
3097
 
3053
3098
  def _sample_colors_from_colormap(palette_name, n_colors):
3054
3099
  """
3055
3100
  Sample colors evenly from the whole colormap range, similar to sample_color_reset.
3056
-
3101
+
3057
3102
  Parameters:
3058
3103
  palette_name (str): Name of the palette/colormap
3059
3104
  n_colors (int): Number of colors to sample
3060
-
3105
+
3061
3106
  Returns:
3062
3107
  list: List of hex color codes sampled evenly from the colormap
3063
-
3108
+
3064
3109
  Raises:
3065
3110
  ValueError: If palette_name is not supported
3066
3111
  """
@@ -3068,51 +3113,49 @@ def _sample_colors_from_colormap(palette_name, n_colors):
3068
3113
  from cmap import Colormap
3069
3114
  except ImportError:
3070
3115
  raise ValueError("cmap library is required for color palettes. Install with: pip install cmap")
3071
-
3116
+
3072
3117
  # Map common palette names to cmap names (same as _get_color_palette)
3073
3118
  palette_mapping = {
3074
3119
  # Scientific colormaps
3075
3120
  "Turbo256": "turbo",
3076
- "Viridis256": "viridis",
3121
+ "Viridis256": "viridis",
3077
3122
  "Plasma256": "plasma",
3078
3123
  "Inferno256": "inferno",
3079
3124
  "Magma256": "magma",
3080
3125
  "Cividis256": "cividis",
3081
-
3082
3126
  # Qualitative palettes
3083
3127
  "Set1": "Set1",
3084
- "Set2": "Set2",
3128
+ "Set2": "Set2",
3085
3129
  "Set3": "Set3",
3086
3130
  "Tab10": "tab10",
3087
3131
  "Tab20": "tab20",
3088
3132
  "Dark2": "Dark2",
3089
3133
  "Paired": "Paired",
3090
-
3091
3134
  # Additional useful palettes
3092
3135
  "Spectral": "Spectral",
3093
3136
  "Rainbow": "rainbow",
3094
3137
  "Coolwarm": "coolwarm",
3095
3138
  "Seismic": "seismic",
3096
3139
  }
3097
-
3140
+
3098
3141
  # Get the cmap name
3099
3142
  cmap_name = palette_mapping.get(palette_name, palette_name.lower())
3100
-
3143
+
3101
3144
  try:
3102
3145
  # Create colormap
3103
3146
  cm = Colormap(cmap_name)
3104
-
3147
+
3105
3148
  colors = []
3106
-
3149
+
3107
3150
  # Distribute samples evenly across the full colormap range (same approach as sample_color_reset)
3108
3151
  for i in range(n_colors):
3109
3152
  # Evenly distribute samples across colormap (avoiding endpoints to prevent white/black)
3110
3153
  normalized_value = (i + 0.5) / n_colors # +0.5 to center samples in their bins
3111
3154
  # Map to a subset of colormap to avoid extreme colors (use 10% to 90% range)
3112
3155
  normalized_value = 0.1 + (normalized_value * 0.8)
3113
-
3156
+
3114
3157
  color_rgba = cm(normalized_value)
3115
-
3158
+
3116
3159
  # Convert RGBA to hex
3117
3160
  if len(color_rgba) >= 3:
3118
3161
  r, g, b = color_rgba[:3]
@@ -3121,12 +3164,13 @@ def _sample_colors_from_colormap(palette_name, n_colors):
3121
3164
  r, g, b = int(r * 255), int(g * 255), int(b * 255)
3122
3165
  hex_color = f"#{r:02x}{g:02x}{b:02x}"
3123
3166
  colors.append(hex_color)
3124
-
3167
+
3125
3168
  return colors
3126
-
3169
+
3127
3170
  except Exception as e:
3128
- raise ValueError(f"Failed to create colormap '{cmap_name}': {e}. "
3129
- f"Available palettes: {list(palette_mapping.keys())}")
3171
+ raise ValueError(
3172
+ f"Failed to create colormap '{cmap_name}': {e}. Available palettes: {list(palette_mapping.keys())}",
3173
+ )
3130
3174
 
3131
3175
 
3132
3176
  def _matplotlib_to_hex(color_dict):
@@ -3135,32 +3179,32 @@ def _matplotlib_to_hex(color_dict):
3135
3179
 
3136
3180
 
3137
3181
  # =====================================================================================
3138
- # SCHEMA AND DATA STRUCTURE FUNCTIONS
3182
+ # SCHEMA AND DATA STRUCTURE FUNCTIONS
3139
3183
  # =====================================================================================
3140
3184
 
3141
3185
 
3142
3186
  def _ensure_features_df_schema_order(self):
3143
3187
  """
3144
3188
  Ensure features_df columns are ordered according to study5_schema.json.
3145
-
3189
+
3146
3190
  This method should be called after operations that might scramble the column order.
3147
3191
  """
3148
3192
  if self.features_df is None or self.features_df.is_empty():
3149
3193
  return
3150
-
3194
+
3151
3195
  try:
3152
3196
  import os
3153
3197
  import json
3154
3198
  from masster.study.h5 import _reorder_columns_by_schema
3155
-
3199
+
3156
3200
  # Load schema
3157
3201
  schema_path = os.path.join(os.path.dirname(__file__), "study5_schema.json")
3158
- with open(schema_path, 'r') as f:
3202
+ with open(schema_path) as f:
3159
3203
  schema = json.load(f)
3160
-
3204
+
3161
3205
  # Reorder columns to match schema
3162
- self.features_df = _reorder_columns_by_schema(self.features_df, schema, 'features_df')
3163
-
3206
+ self.features_df = _reorder_columns_by_schema(self.features_df, schema, "features_df")
3207
+
3164
3208
  except Exception as e:
3165
3209
  self.logger.warning(f"Failed to reorder features_df columns: {e}")
3166
3210
 
@@ -3168,38 +3212,248 @@ def _ensure_features_df_schema_order(self):
3168
3212
  def migrate_map_id_to_index(self):
3169
3213
  """
3170
3214
  Migrate map_id from string-based OpenMS unique IDs to integer indices.
3171
-
3215
+
3172
3216
  This function converts the map_id column from string type (with OpenMS unique IDs)
3173
3217
  to integer type where each map_id corresponds to the index of the feature map
3174
3218
  in self.features_maps.
3175
-
3219
+
3176
3220
  This migration is needed for studies that were created before the map_id format
3177
3221
  change from OpenMS unique IDs to feature map indices.
3178
3222
  """
3179
3223
  if self.samples_df is None or self.samples_df.is_empty():
3180
3224
  self.logger.warning("No samples to migrate")
3181
3225
  return
3182
-
3226
+
3183
3227
  # Check if migration is needed
3184
- current_dtype = self.samples_df['map_id'].dtype
3228
+ current_dtype = self.samples_df["map_id"].dtype
3185
3229
  if current_dtype == pl.Int64:
3186
3230
  self.logger.info("map_id column is already Int64 type - no migration needed")
3187
3231
  return
3188
-
3232
+
3189
3233
  self.logger.info("Migrating map_id from string-based OpenMS IDs to integer indices")
3190
-
3234
+
3191
3235
  # Create new map_id values based on sample order
3192
3236
  # Each sample gets a map_id that corresponds to its position in features_maps
3193
3237
  sample_count = len(self.samples_df)
3194
3238
  new_map_ids = list(range(sample_count))
3195
-
3239
+
3196
3240
  # Update the map_id column
3197
3241
  self.samples_df = self.samples_df.with_columns(
3198
- pl.lit(new_map_ids).alias("map_id")
3242
+ pl.lit(new_map_ids).alias("map_id"),
3199
3243
  )
3200
-
3244
+
3201
3245
  # Ensure the column is Int64 type
3202
3246
  self.samples_df = self.samples_df.cast({"map_id": pl.Int64})
3203
-
3247
+
3204
3248
  self.logger.info(f"Successfully migrated {sample_count} samples to indexed map_id format")
3205
3249
  self.logger.info(f"map_id now ranges from 0 to {sample_count - 1}")
3250
+
3251
+
3252
+ def restore_ms2(self, samples=None, **kwargs):
3253
+ """
3254
+ Restore MS2 data by re-running find_ms2 on specified samples.
3255
+
3256
+ This function rebuilds the consensus_ms2 DataFrame by re-extracting MS2 spectra
3257
+ from the original sample files. Use this to reverse the effects of compress_ms2().
3258
+
3259
+ Parameters:
3260
+ samples (list, optional): List of sample_uids or sample_names to process.
3261
+ If None, processes all samples.
3262
+ **kwargs: Additional keyword arguments passed to find_ms2()
3263
+ (e.g., mz_tol, centroid, deisotope, etc.)
3264
+ """
3265
+ if self.features_df is None or self.features_df.is_empty():
3266
+ self.logger.error("No features_df found in study.")
3267
+ return
3268
+
3269
+ if self.samples_df is None or self.samples_df.is_empty():
3270
+ self.logger.error("No samples_df found in study.")
3271
+ return
3272
+
3273
+ # Get sample_uids to process
3274
+ sample_uids = self._get_sample_uids(samples)
3275
+ if not sample_uids:
3276
+ self.logger.warning("No valid samples specified.")
3277
+ return
3278
+
3279
+ self.logger.info(f"Restoring MS2 data from {len(sample_uids)} samples...")
3280
+
3281
+ # Clear existing consensus_ms2 to rebuild from scratch
3282
+ initial_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
3283
+ self.consensus_ms2 = pl.DataFrame()
3284
+
3285
+ # Re-run find_ms2 which will rebuild consensus_ms2
3286
+ try:
3287
+ self.find_ms2(**kwargs)
3288
+
3289
+ final_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
3290
+
3291
+ self.logger.info(f"MS2 restoration completed: {initial_ms2_count} -> {final_ms2_count} MS2 spectra")
3292
+
3293
+ except Exception as e:
3294
+ self.logger.error(f"Failed to restore MS2 data: {e}")
3295
+ raise
3296
+
3297
+
3298
+ def decompress(self, features=True, ms2=True, chrom=True, samples=None, **kwargs):
3299
+ """
3300
+ Reverse any compression effects by restoring compressed data adaptively.
3301
+
3302
+ This function restores data that was compressed using compress(), compress_features(),
3303
+ compress_ms2(), compress_chrom(), or study.save(compress=True). It optimizes the
3304
+ decompression process for speed by only processing what actually needs restoration.
3305
+
3306
+ Parameters:
3307
+ features (bool): Restore features data (ms2_specs, ms2_scans, chrom_area)
3308
+ ms2 (bool): Restore MS2 spectra by re-running find_ms2()
3309
+ chrom (bool): Restore chromatogram objects
3310
+ samples (list, optional): List of sample_uids or sample_names to process.
3311
+ If None, processes all samples.
3312
+ **kwargs: Additional keyword arguments for restoration functions:
3313
+ - For restore_chrom: mz_tol (default: 0.010), rt_tol (default: 10.0)
3314
+ - For restore_ms2/find_ms2: mz_tol, centroid, deisotope, etc.
3315
+
3316
+ Performance Optimizations:
3317
+ - Adaptive processing: Only restores what actually needs restoration
3318
+ - Processes features and chromatograms together when possible (shared file I/O)
3319
+ - Uses cached sample instances to avoid repeated file loading
3320
+ - Processes MS2 restoration last as it's the most computationally expensive
3321
+ - Provides detailed progress information for long-running operations
3322
+
3323
+ Example:
3324
+ # Restore everything (but only what needs restoration)
3325
+ study.decompress()
3326
+
3327
+ # Restore only chromatograms with custom tolerances
3328
+ study.decompress(features=False, ms2=False, chrom=True, mz_tol=0.005, rt_tol=5.0)
3329
+
3330
+ # Restore specific samples only
3331
+ study.decompress(samples=["sample1", "sample2"])
3332
+ """
3333
+ if not any([features, ms2, chrom]):
3334
+ self.logger.warning("No decompression operations specified.")
3335
+ return
3336
+
3337
+ # Get sample_uids to process
3338
+ sample_uids = self._get_sample_uids(samples)
3339
+ if not sample_uids:
3340
+ self.logger.warning("No valid samples specified.")
3341
+ return
3342
+
3343
+ # Adaptively check what actually needs to be done
3344
+ import polars as pl
3345
+
3346
+ # Check if features need restoration (more sophisticated logic)
3347
+ features_need_restoration = False
3348
+ if features and not self.features_df.is_empty():
3349
+ # Check for completely missing columns that should exist after feature processing
3350
+ missing_cols = []
3351
+ for col in ["ms2_scans", "ms2_specs"]:
3352
+ if col not in self.features_df.columns:
3353
+ missing_cols.append(col)
3354
+
3355
+ # If columns are missing entirely, we likely need restoration
3356
+ if missing_cols:
3357
+ features_need_restoration = True
3358
+ else:
3359
+ # If columns exist, check if they're mostly null (indicating compression)
3360
+ # But be smart about it - only check if we have consensus features with MS2
3361
+ if not self.consensus_ms2.is_empty():
3362
+ # We have MS2 data, so ms2_specs should have some content
3363
+ null_ms2_specs = self.features_df.filter(pl.col("ms2_specs").is_null()).height
3364
+ total_features = len(self.features_df)
3365
+ # If more than 90% are null but we have MS2 data, likely compressed
3366
+ if null_ms2_specs > (total_features * 0.9):
3367
+ features_need_restoration = True
3368
+
3369
+ # Check if chromatograms need restoration
3370
+ chrom_need_restoration = False
3371
+ if chrom and not self.features_df.is_empty():
3372
+ if "chrom" not in self.features_df.columns:
3373
+ # Column completely missing
3374
+ chrom_need_restoration = True
3375
+ else:
3376
+ null_chroms = self.features_df.filter(pl.col("chrom").is_null()).height
3377
+ total_features = len(self.features_df)
3378
+ # If more than 50% are null, likely need restoration
3379
+ chrom_need_restoration = null_chroms > (total_features * 0.5)
3380
+
3381
+ # Check if MS2 data might need restoration (compare expected vs actual)
3382
+ ms2_need_restoration = False
3383
+ if ms2:
3384
+ current_ms2_count = len(self.consensus_ms2) if not self.consensus_ms2.is_empty() else 0
3385
+ consensus_count = len(self.consensus_df) if not self.consensus_df.is_empty() else 0
3386
+
3387
+ if consensus_count > 0:
3388
+ # Calculate expected MS2 count based on consensus features with MS2 potential
3389
+ # This is a heuristic - if we have very few MS2 compared to consensus, likely compressed
3390
+ expected_ratio = 3.0 # Expect at least 3 MS2 per consensus on average
3391
+ expected_ms2 = consensus_count * expected_ratio
3392
+
3393
+ if current_ms2_count < min(expected_ms2 * 0.3, consensus_count * 0.8):
3394
+ ms2_need_restoration = True
3395
+
3396
+ # Build list of operations that actually need to be done
3397
+ operations_needed = []
3398
+ if features and features_need_restoration:
3399
+ operations_needed.append("features")
3400
+ if chrom and chrom_need_restoration:
3401
+ operations_needed.append("chromatograms")
3402
+ if ms2 and ms2_need_restoration:
3403
+ operations_needed.append("MS2 spectra")
3404
+
3405
+ # Early exit if nothing needs to be done
3406
+ if not operations_needed:
3407
+ self.logger.info("All data appears to be already decompressed. No operations needed.")
3408
+ return
3409
+
3410
+ self.logger.info(f"Starting adaptive decompression: {', '.join(operations_needed)} from {len(sample_uids)} samples")
3411
+
3412
+ try:
3413
+ # Phase 1: Restore features and chromatograms together (shared file I/O)
3414
+ if ("features" in operations_needed and "chromatograms" in operations_needed):
3415
+ self.logger.info("Phase 1: Restoring features and chromatograms together...")
3416
+
3417
+ # Extract relevant kwargs for restore_features and restore_chrom
3418
+ restore_kwargs = {}
3419
+ if 'mz_tol' in kwargs:
3420
+ restore_kwargs['mz_tol'] = kwargs['mz_tol']
3421
+ if 'rt_tol' in kwargs:
3422
+ restore_kwargs['rt_tol'] = kwargs['rt_tol']
3423
+
3424
+ # Restore features first (includes chrom column)
3425
+ self.restore_features(samples=samples)
3426
+
3427
+ # Then do additional chrom gap-filling if needed
3428
+ self.restore_chrom(samples=samples, **restore_kwargs)
3429
+
3430
+ elif ("features" in operations_needed and "chromatograms" not in operations_needed):
3431
+ self.logger.info("Phase 1: Restoring features data...")
3432
+ self.restore_features(samples=samples)
3433
+
3434
+ elif ("chromatograms" in operations_needed and "features" not in operations_needed):
3435
+ self.logger.info("Phase 1: Restoring chromatograms...")
3436
+ restore_kwargs = {}
3437
+ if 'mz_tol' in kwargs:
3438
+ restore_kwargs['mz_tol'] = kwargs['mz_tol']
3439
+ if 'rt_tol' in kwargs:
3440
+ restore_kwargs['rt_tol'] = kwargs['rt_tol']
3441
+ self.restore_chrom(samples=samples, **restore_kwargs)
3442
+
3443
+ # Phase 2: Restore MS2 data (most computationally expensive, done last)
3444
+ if "MS2 spectra" in operations_needed:
3445
+ self.logger.info("Phase 2: Restoring MS2 spectra...")
3446
+
3447
+ # Extract MS2-specific kwargs
3448
+ ms2_kwargs = {}
3449
+ for key, value in kwargs.items():
3450
+ if key in ['mz_tol', 'centroid', 'deisotope', 'dia_stats', 'feature_uid']:
3451
+ ms2_kwargs[key] = value
3452
+
3453
+ self.restore_ms2(samples=samples, **ms2_kwargs)
3454
+
3455
+ self.logger.info("Adaptive decompression completed successfully")
3456
+
3457
+ except Exception as e:
3458
+ self.logger.error(f"Decompression failed: {e}")
3459
+ raise