masster 0.5.10__py3-none-any.whl → 0.5.12__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/_version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.5.10"
4
+ __version__ = "0.5.12"
5
5
 
6
6
 
7
7
  def get_version():
masster/sample/plot.py CHANGED
@@ -1041,7 +1041,9 @@ def plot_2d(
1041
1041
  height=600,
1042
1042
  width=750,
1043
1043
  mz_range=None,
1044
- rt_range=None
1044
+ rt_range=None,
1045
+ legend=None,
1046
+ colorby=None
1045
1047
  ):
1046
1048
  """
1047
1049
  Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
@@ -1087,6 +1089,13 @@ def plot_2d(
1087
1089
  Maximum pixel size for dynamic rasterization when using dynspread.
1088
1090
  raster_threshold (float, default 0.8):
1089
1091
  Threshold used for the dynspread process in dynamic rasterization.
1092
+ legend (str, optional):
1093
+ Legend position for categorical feature coloring ("top_right", "bottom_left", etc.) or None.
1094
+ Only applies when colorby is not None and contains categorical data.
1095
+ colorby (str, optional):
1096
+ Feature property to use for coloring. If None (default), uses current green/red scheme
1097
+ for features with/without MS2 data. If specified and contains categorical data, applies
1098
+ categorical coloring with legend support (similar to plot_2d_oracle).
1090
1099
  Behavior:
1091
1100
  - Checks for a loaded mzML file by verifying that self.file_obj is not None.
1092
1101
  - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
@@ -1203,6 +1212,12 @@ def plot_2d(
1203
1212
 
1204
1213
  color_1 = "forestgreen"
1205
1214
  color_2 = "darkorange"
1215
+
1216
+ # Handle colorby parameter for feature coloring
1217
+ use_categorical_coloring = False
1218
+ feature_colors = {}
1219
+ categorical_groups = []
1220
+
1206
1221
  if filename is not None:
1207
1222
  dyn = False
1208
1223
  if not filename.endswith(".html"):
@@ -1270,74 +1285,203 @@ def plot_2d(
1270
1285
  feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
1271
1286
  # keep only iso==0, i.e. the main
1272
1287
  feats = feats[feats["iso"] == 0]
1273
- # find features with ms2_scans not None and iso==0
1274
- features_df = feats[feats["ms2_scans"].notnull()]
1275
- # Create feature points with proper sizing method
1276
- feature_hover_1 = HoverTool(
1277
- tooltips=[
1278
- ("rt", "@rt"),
1279
- ("m/z", "@mz{0.0000}"),
1280
- ("feature_uid", "@feature_uid"),
1281
- ("inty", "@inty"),
1282
- ("iso", "@iso"),
1283
- ("adduct", "@adduct"),
1284
- ("chrom_coherence", "@chrom_coherence"),
1285
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1286
- ],
1287
- )
1288
- feature_points_1 = hv.Points(
1289
- features_df,
1290
- kdims=["rt", "mz"],
1291
- vdims=[
1292
- "feature_uid",
1293
- "inty",
1294
- "iso",
1295
- "adduct",
1296
- "ms2_scans",
1297
- "chrom_coherence",
1298
- "chrom_prominence_scaled",
1299
- ],
1300
- label="Features with MS2 data",
1301
- ).options(
1302
- color=color_1,
1303
- marker=marker_type,
1304
- size=size_1,
1305
- tools=[feature_hover_1],
1306
- hooks=hooks,
1307
- )
1308
- # find features without MS2 data
1309
- features_df = feats[feats["ms2_scans"].isnull()]
1310
- feature_hover_2 = HoverTool(
1311
- tooltips=[
1312
- ("rt", "@rt"),
1313
- ("m/z", "@mz{0.0000}"),
1314
- ("feature_uid", "@feature_uid"),
1315
- ("inty", "@inty"),
1316
- ("iso", "@iso"),
1317
- ("adduct", "@adduct"),
1318
- ("chrom_coherence", "@chrom_coherence"),
1319
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1320
- ],
1321
- )
1322
- feature_points_2 = hv.Points(
1323
- features_df,
1324
- kdims=["rt", "mz"],
1325
- vdims=[
1326
- "feature_uid",
1327
- "inty",
1328
- "iso",
1329
- "adduct",
1330
- "chrom_coherence",
1331
- "chrom_prominence_scaled",
1332
- ],
1333
- label="Features without MS2 data",
1334
- ).options(
1335
- color="red",
1336
- marker=marker_type,
1337
- size=size_2,
1338
- tools=[feature_hover_2],
1339
- hooks=hooks,
1340
- )
1288
+
1289
+ # Handle colorby parameter
1290
+ if colorby is not None and colorby in feats.columns:
1291
+ # Check if colorby data is categorical (string-like)
1292
+ colorby_values = feats[colorby].dropna()
1293
+ is_categorical = (
1294
+ feats[colorby].dtype in ["object", "string", "category"] or
1295
+ (len(colorby_values) > 0 and isinstance(colorby_values.iloc[0], str))
1296
+ )
1297
+
1298
+ if is_categorical:
1299
+ use_categorical_coloring = True
1300
+ # Get unique categories, sorted
1301
+ categorical_groups = sorted(feats[colorby].dropna().unique())
1302
+
1303
+ # Set up colors for categorical data using matplotlib colormap
1304
+ from matplotlib.colors import to_hex
1305
+ try:
1306
+ from matplotlib.cm import get_cmap
1307
+ colormap_func = get_cmap(cmap if cmap != 'iridescent' else 'tab20')
1308
+ feature_colors = {}
1309
+ for i, group in enumerate(categorical_groups):
1310
+ if len(categorical_groups) <= 20:
1311
+ # Use qualitative colors for small number of categories
1312
+ color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
1313
+ else:
1314
+ # Use continuous colormap for many categories
1315
+ color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
1316
+ feature_colors[group] = to_hex(color_val)
1317
+ except Exception as e:
1318
+ self.logger.warning(f"Could not set up categorical coloring: {e}, using default colors")
1319
+ use_categorical_coloring = False
1320
+
1321
+ if use_categorical_coloring and colorby is not None:
1322
+ # Create separate feature points for each category
1323
+ for i, group in enumerate(categorical_groups):
1324
+ group_features = feats[feats[colorby] == group]
1325
+ if len(group_features) == 0:
1326
+ continue
1327
+
1328
+ # Split by MS2 status
1329
+ group_with_ms2 = group_features[group_features["ms2_scans"].notnull()]
1330
+ group_without_ms2 = group_features[group_features["ms2_scans"].isnull()]
1331
+
1332
+ group_color = feature_colors.get(group, color_1)
1333
+
1334
+ if len(group_with_ms2) > 0:
1335
+ feature_hover = HoverTool(
1336
+ tooltips=[
1337
+ ("rt", "@rt"),
1338
+ ("m/z", "@mz{0.0000}"),
1339
+ ("feature_uid", "@feature_uid"),
1340
+ ("inty", "@inty"),
1341
+ ("iso", "@iso"),
1342
+ ("adduct", "@adduct"),
1343
+ ("chrom_coherence", "@chrom_coherence"),
1344
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1345
+ (colorby, f"@{colorby}"),
1346
+ ],
1347
+ )
1348
+ group_points_ms2 = hv.Points(
1349
+ group_with_ms2,
1350
+ kdims=["rt", "mz"],
1351
+ vdims=[
1352
+ "feature_uid",
1353
+ "inty",
1354
+ "iso",
1355
+ "adduct",
1356
+ "ms2_scans",
1357
+ "chrom_coherence",
1358
+ "chrom_prominence_scaled",
1359
+ colorby,
1360
+ ],
1361
+ label=f"{group} (MS2)",
1362
+ ).options(
1363
+ color=group_color,
1364
+ marker=marker_type,
1365
+ size=size_1,
1366
+ tools=[feature_hover],
1367
+ hooks=hooks,
1368
+ )
1369
+ if feature_points_1 is None:
1370
+ feature_points_1 = group_points_ms2
1371
+ else:
1372
+ feature_points_1 = feature_points_1 * group_points_ms2
1373
+
1374
+ if len(group_without_ms2) > 0:
1375
+ feature_hover = HoverTool(
1376
+ tooltips=[
1377
+ ("rt", "@rt"),
1378
+ ("m/z", "@mz{0.0000}"),
1379
+ ("feature_uid", "@feature_uid"),
1380
+ ("inty", "@inty"),
1381
+ ("iso", "@iso"),
1382
+ ("adduct", "@adduct"),
1383
+ ("chrom_coherence", "@chrom_coherence"),
1384
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1385
+ (colorby, f"@{colorby}"),
1386
+ ],
1387
+ )
1388
+ group_points_no_ms2 = hv.Points(
1389
+ group_without_ms2,
1390
+ kdims=["rt", "mz"],
1391
+ vdims=[
1392
+ "feature_uid",
1393
+ "inty",
1394
+ "iso",
1395
+ "adduct",
1396
+ "chrom_coherence",
1397
+ "chrom_prominence_scaled",
1398
+ colorby,
1399
+ ],
1400
+ label=f"{group} (no MS2)",
1401
+ ).options(
1402
+ color=group_color,
1403
+ marker=marker_type,
1404
+ size=size_2,
1405
+ tools=[feature_hover],
1406
+ hooks=hooks,
1407
+ )
1408
+ if feature_points_2 is None:
1409
+ feature_points_2 = group_points_no_ms2
1410
+ else:
1411
+ feature_points_2 = feature_points_2 * group_points_no_ms2
1412
+ else:
1413
+ # Use original green/red coloring scheme for MS2 presence
1414
+ # find features with ms2_scans not None and iso==0
1415
+ features_df = feats[feats["ms2_scans"].notnull()]
1416
+ # Create feature points with proper sizing method
1417
+ feature_hover_1 = HoverTool(
1418
+ tooltips=[
1419
+ ("rt", "@rt"),
1420
+ ("m/z", "@mz{0.0000}"),
1421
+ ("feature_uid", "@feature_uid"),
1422
+ ("inty", "@inty"),
1423
+ ("iso", "@iso"),
1424
+ ("adduct", "@adduct"),
1425
+ ("chrom_coherence", "@chrom_coherence"),
1426
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1427
+ ],
1428
+ )
1429
+ if len(features_df) > 0:
1430
+ feature_points_1 = hv.Points(
1431
+ features_df,
1432
+ kdims=["rt", "mz"],
1433
+ vdims=[
1434
+ "feature_uid",
1435
+ "inty",
1436
+ "iso",
1437
+ "adduct",
1438
+ "ms2_scans",
1439
+ "chrom_coherence",
1440
+ "chrom_prominence_scaled",
1441
+ ],
1442
+ label="Features with MS2 data",
1443
+ ).options(
1444
+ color=color_1,
1445
+ marker=marker_type,
1446
+ size=size_1,
1447
+ tools=[feature_hover_1],
1448
+ hooks=hooks,
1449
+ )
1450
+
1451
+ # find features without MS2 data
1452
+ features_df = feats[feats["ms2_scans"].isnull()]
1453
+ feature_hover_2 = HoverTool(
1454
+ tooltips=[
1455
+ ("rt", "@rt"),
1456
+ ("m/z", "@mz{0.0000}"),
1457
+ ("feature_uid", "@feature_uid"),
1458
+ ("inty", "@inty"),
1459
+ ("iso", "@iso"),
1460
+ ("adduct", "@adduct"),
1461
+ ("chrom_coherence", "@chrom_coherence"),
1462
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1463
+ ],
1464
+ )
1465
+ if len(features_df) > 0:
1466
+ feature_points_2 = hv.Points(
1467
+ features_df,
1468
+ kdims=["rt", "mz"],
1469
+ vdims=[
1470
+ "feature_uid",
1471
+ "inty",
1472
+ "iso",
1473
+ "adduct",
1474
+ "chrom_coherence",
1475
+ "chrom_prominence_scaled",
1476
+ ],
1477
+ label="Features without MS2 data",
1478
+ ).options(
1479
+ color="red",
1480
+ marker=marker_type,
1481
+ size=size_2,
1482
+ tools=[feature_hover_2],
1483
+ hooks=hooks,
1484
+ )
1341
1485
 
1342
1486
  if show_isotopes:
1343
1487
  # Use proper Polars filter syntax to avoid boolean indexing issues
@@ -1451,6 +1595,31 @@ def plot_2d(
1451
1595
  if title is not None:
1452
1596
  overlay = overlay.opts(title=title)
1453
1597
 
1598
+ # Handle legend positioning for categorical coloring
1599
+ if legend is not None and use_categorical_coloring and len(categorical_groups) > 1:
1600
+ # Map legend position parameter to HoloViews legend position
1601
+ legend_position_map = {
1602
+ "top_right": "top_right",
1603
+ "top_left": "top_left",
1604
+ "bottom_right": "bottom_right",
1605
+ "bottom_left": "bottom_left",
1606
+ "right": "right",
1607
+ "left": "left",
1608
+ "top": "top",
1609
+ "bottom": "bottom"
1610
+ }
1611
+
1612
+ hv_legend_pos = legend_position_map.get(legend, "bottom_right")
1613
+
1614
+ # Apply legend configuration to the overlay
1615
+ overlay = overlay.opts(
1616
+ legend_position=hv_legend_pos,
1617
+ legend_opts={'title': '', 'padding': 2, 'spacing': 2}
1618
+ )
1619
+ elif legend is None and use_categorical_coloring:
1620
+ # Explicitly hide legend when legend=None but categorical coloring is used
1621
+ overlay = overlay.opts(show_legend=False)
1622
+
1454
1623
  # Handle slider functionality
1455
1624
  if use_slider_sizing:
1456
1625
  # For slider functionality, we need to work with the feature points directly
masster/study/export.py CHANGED
@@ -496,7 +496,7 @@ def export_mgf(self, **kwargs):
496
496
  # Write END IONS
497
497
  f.write("END IONS\n\n")
498
498
 
499
- self.logger.info(f"Exported {len(mgf_data)} spectra to {filename}")
499
+ self.logger.success(f"Exported {len(mgf_data)} spectra to {filename}")
500
500
 
501
501
 
502
502
  def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs) -> None:
@@ -1183,7 +1183,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
1183
1183
  for line in mgf_lines:
1184
1184
  f.write(line + "\n")
1185
1185
 
1186
- self.logger.info(f"Exported mzTab-M to {filename}")
1186
+ self.logger.success(f"Exported mzTab-M to {filename}")
1187
1187
 
1188
1188
 
1189
1189
  def export_xlsx(self, filename: str | None = None) -> None:
@@ -1311,7 +1311,7 @@ def export_xlsx(self, filename: str | None = None) -> None:
1311
1311
  f"Written worksheet '{sheet_name}' with shape {data.shape}",
1312
1312
  )
1313
1313
 
1314
- self.logger.info(f"Study exported to {filename}")
1314
+ self.logger.success(f"Study exported to {filename}")
1315
1315
 
1316
1316
  except Exception as e:
1317
1317
  self.logger.error(f"Error writing Excel file: {e}")
@@ -1424,8 +1424,6 @@ def export_parquet(self, filename: str | None = None) -> None:
1424
1424
 
1425
1425
  # Report results
1426
1426
  if exported_files:
1427
- self.logger.info(f"Study exported to {len(exported_files)} Parquet files:")
1428
- for file_path in exported_files:
1429
- self.logger.info(f" - {file_path}")
1427
+ self.logger.success(f"Study exported to {len(exported_files)} Parquet files.")
1430
1428
  else:
1431
1429
  self.logger.error("No Parquet files were created - no data available to export")
masster/study/h5.py CHANGED
@@ -834,6 +834,19 @@ def _create_dataframe_with_objects(data: dict, object_columns: list) -> pl.DataF
834
834
  object_data = {k: v for k, v in data.items() if k in object_columns}
835
835
  regular_data = {k: v for k, v in data.items() if k not in object_columns}
836
836
 
837
+ # Final check: ensure no numpy object arrays in regular_data
838
+ problematic_cols = []
839
+ for k, v in regular_data.items():
840
+ if hasattr(v, 'dtype') and str(v.dtype) == 'object':
841
+ problematic_cols.append(k)
842
+
843
+ if problematic_cols:
844
+ # Move these to object_data
845
+ for col in problematic_cols:
846
+ object_data[col] = _reconstruct_object_column(regular_data[col], col)
847
+ del regular_data[col]
848
+ object_columns.append(col)
849
+
837
850
  # Determine expected length from regular data or first object column
838
851
  expected_length = None
839
852
  if regular_data:
@@ -861,8 +874,47 @@ def _create_dataframe_with_objects(data: dict, object_columns: list) -> pl.DataF
861
874
 
862
875
  # Create DataFrame with regular columns first
863
876
  if regular_data:
864
- df = pl.DataFrame(regular_data)
865
- # print(f"DEBUG: Created DataFrame with regular columns, shape: {df.shape}")
877
+ # Final safety check: convert any remaining numpy object arrays to Python lists
878
+ # and handle numpy scalars within lists
879
+ safe_regular_data = {}
880
+ import numpy as np
881
+
882
+ def convert_numpy_scalars(value):
883
+ """Convert numpy scalars to Python native types recursively."""
884
+ if isinstance(value, np.generic):
885
+ return value.item() # Convert numpy scalar to Python scalar
886
+ elif isinstance(value, list):
887
+ return [convert_numpy_scalars(item) for item in value]
888
+ else:
889
+ return value
890
+
891
+ for k, v in regular_data.items():
892
+ if hasattr(v, 'dtype') and str(v.dtype) == 'object':
893
+ # Convert numpy object array to Python list
894
+ safe_regular_data[k] = [convert_numpy_scalars(item) for item in (v.tolist() if hasattr(v, 'tolist') else list(v))]
895
+ elif isinstance(v, list):
896
+ # Handle lists that might contain numpy scalars
897
+ safe_regular_data[k] = [convert_numpy_scalars(item) for item in v]
898
+ else:
899
+ safe_regular_data[k] = convert_numpy_scalars(v)
900
+
901
+ # Create DataFrame with proper error handling
902
+ try:
903
+ df = pl.DataFrame(safe_regular_data)
904
+ except Exception as e:
905
+ # If direct creation fails, try creating column by column to identify and handle problematic columns
906
+ df = pl.DataFrame()
907
+ for k, v in safe_regular_data.items():
908
+ try:
909
+ df = df.with_columns([pl.Series(k, v)])
910
+ except Exception:
911
+ # Skip problematic columns or convert them to string as a fallback
912
+ try:
913
+ df = df.with_columns([pl.Series(k, [str(item) for item in v])])
914
+ except Exception:
915
+ # Last resort: skip the column entirely
916
+ continue
917
+
866
918
  # Add Object columns one by one
867
919
  for col, values in object_data.items():
868
920
  # print(f"DEBUG: Adding object column '{col}', type: {type(values)}, length: {len(values) if values is not None else 'None'}")
@@ -1185,9 +1237,29 @@ def _load_dataframe_from_group(
1185
1237
  logger.debug(
1186
1238
  f"Object column '{col}': length={len(data[col]) if data[col] is not None else 'None'}",
1187
1239
  )
1240
+
1241
+ # Debug: check for problematic data types in all columns before DataFrame creation
1242
+ for col, values in data.items():
1243
+ if hasattr(values, 'dtype') and str(values.dtype) == 'object':
1244
+ logger.warning(f"Column '{col}' has numpy object dtype but is not in object_columns: {object_columns}")
1245
+ if col not in object_columns:
1246
+ object_columns.append(col)
1247
+
1188
1248
  df = _create_dataframe_with_objects(data, object_columns)
1189
1249
  else:
1190
- df = pl.DataFrame(data)
1250
+ # Debug: check for problematic data types when no object columns are expected
1251
+ for col, values in data.items():
1252
+ if hasattr(values, 'dtype') and str(values.dtype) == 'object':
1253
+ logger.warning(f"Column '{col}' has numpy object dtype but no object_columns specified!")
1254
+ # Treat as object column
1255
+ if object_columns is None:
1256
+ object_columns = []
1257
+ object_columns.append(col)
1258
+
1259
+ if object_columns:
1260
+ df = _create_dataframe_with_objects(data, object_columns)
1261
+ else:
1262
+ df = pl.DataFrame(data)
1191
1263
 
1192
1264
  # Clean null values and apply schema
1193
1265
  df = _clean_string_nulls(df)
masster/study/helpers.py CHANGED
@@ -2663,7 +2663,7 @@ def features_filter(
2663
2663
  removed_count = initial_count - final_count
2664
2664
 
2665
2665
  self.logger.info(
2666
- f"Filtered features: kept {final_count:,}, removed {removed_count:,}"
2666
+ f"Filtered features. Kept: {final_count:,}. Removed: {removed_count:,}."
2667
2667
  )
2668
2668
 
2669
2669
 
masster/study/merge.py CHANGED
@@ -427,9 +427,13 @@ def merge(study, **kwargs) -> None:
427
427
  # Feature maps will be generated on-demand within each merge method
428
428
 
429
429
  study.logger.info(
430
- f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
431
- )
432
-
430
+ f"Merging samples using {params.method}, min_samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
431
+ )
432
+ if "chunked" in params.method:
433
+ study.logger.info(
434
+ f"threads={params.threads}, chunk_size={params.chunk_size}, dechunking='{params.dechunking}'"
435
+ )
436
+
433
437
  # Initialize
434
438
  study.consensus_df = pl.DataFrame()
435
439
  study.consensus_ms2 = pl.DataFrame()
@@ -751,7 +755,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
751
755
 
752
756
  else:
753
757
  # Parallel processing
754
- study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
758
+ #study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
755
759
 
756
760
  # Prepare chunk data for parallel processing using features_df slices
757
761
  chunk_data_list = []
@@ -812,7 +816,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
812
816
  serialized_chunk_results.append((chunk_start_idx, consensus_features))
813
817
  completed_chunks += 1
814
818
  n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
815
- study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
819
+ study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
816
820
  except Exception as exc:
817
821
  # Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
818
822
  if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
@@ -846,7 +850,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
846
850
  serialized_chunk_results.append((chunk_start_idx, consensus_features))
847
851
  completed_chunks += 1
848
852
  n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
849
- study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
853
+ study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
850
854
  except Exception as exc:
851
855
  study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
852
856
  raise exc
@@ -926,7 +930,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
926
930
 
927
931
  else:
928
932
  # Parallel processing
929
- study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
933
+ #study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
930
934
 
931
935
  # Prepare chunk data for parallel processing using features_df slices
932
936
  chunk_data_list = []
@@ -987,7 +991,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
987
991
  serialized_chunk_results.append((chunk_start_idx, consensus_features))
988
992
  completed_chunks += 1
989
993
  n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
990
- study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
994
+ study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
991
995
  except Exception as exc:
992
996
  # Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
993
997
  if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
@@ -1021,7 +1025,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
1021
1025
  serialized_chunk_results.append((chunk_start_idx, consensus_features))
1022
1026
  completed_chunks += 1
1023
1027
  n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
1024
- study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
1028
+ study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
1025
1029
  except Exception as exc:
1026
1030
  study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
1027
1031
  raise exc
@@ -2251,15 +2255,13 @@ def _perform_adduct_grouping(study, rt_tol, mz_tol):
2251
2255
  {
2252
2256
  "consensus_uid": row["consensus_uid"],
2253
2257
  "rt": row["rt"],
2254
- "mz": row["mz"], # Add missing mz field
2258
+ "mz": row["mz"],
2255
2259
  "adduct_mass_neutral_top": row.get("adduct_mass_neutral_top"),
2256
2260
  "adduct_top": row.get("adduct_top"),
2257
2261
  "inty_mean": row.get("inty_mean", 0),
2258
2262
  },
2259
2263
  )
2260
2264
 
2261
- # Use optimized adduct grouping
2262
- study.logger.info(f"About to call adduct grouping for {len(consensus_data)} consensus features")
2263
2265
  adduct_group_list, adduct_of_list = __merge_adduct_grouping(
2264
2266
  study, consensus_data, rt_tol/3, mz_tol
2265
2267
  )
@@ -2714,8 +2716,6 @@ def __identify_adduct_by_mass_shift(study, rt_tol, cached_adducts_df=None):
2714
2716
  study.logger.debug("No consensus features for adduct identification by mass shift")
2715
2717
  return
2716
2718
 
2717
- study.logger.info(f"Identifying coeluting adducts by mass shifts in {len(study.consensus_df)} consensus features...")
2718
-
2719
2719
  # Get adducts DataFrame if not provided
2720
2720
  if cached_adducts_df is None or cached_adducts_df.is_empty():
2721
2721
  try:
@@ -3021,8 +3021,7 @@ def __identify_adduct_by_mass_shift(study, rt_tol, cached_adducts_df=None):
3021
3021
  pl.Series("adduct_mass_neutral_top", new_adduct_mass_neutral_top),
3022
3022
  pl.Series("adduct_mass_shift_top", new_adduct_mass_shift_top)
3023
3023
  ])
3024
-
3025
- study.logger.info(f"Updated adduct assignments for {updated_count} consensus features based on mass shifts")
3024
+ study.logger.success(f"Adduct information updated for {updated_count} consensus features.")
3026
3025
  else:
3027
3026
  study.logger.debug("No consensus features updated based on mass shift analysis")
3028
3027
 
@@ -3391,7 +3390,7 @@ def __merge_adduct_grouping(study, consensus_data, rt_tol, mz_tol):
3391
3390
  adduct_of_list = [0] * len(consensus_data)
3392
3391
  return adduct_group_list, adduct_of_list
3393
3392
 
3394
- study.logger.info(f"Built local intensity matrix: {len(intensity_matrix_pd)} features x {len(intensity_matrix_pd.columns)} samples")
3393
+ study.logger.debug(f"Built local intensity matrix: {len(intensity_matrix_pd)} features x {len(intensity_matrix_pd.columns)} samples")
3395
3394
 
3396
3395
  except Exception as e:
3397
3396
  study.logger.warning(f"Could not build local intensity matrix: {e}. Creating single-feature groups.")
@@ -3401,7 +3400,7 @@ def __merge_adduct_grouping(study, consensus_data, rt_tol, mz_tol):
3401
3400
 
3402
3401
  # Step 2: Get adduct pairs with likelihood information and build hash map for fast lookup
3403
3402
  adduct_pairs_with_likelihood = _get_adduct_deltas_with_likelihood(study)
3404
- study.logger.info(f"Using {len(adduct_pairs_with_likelihood)} adduct pairs with likelihood scoring")
3403
+ study.logger.debug(f"Using {len(adduct_pairs_with_likelihood)} adduct pairs with likelihood scoring")
3405
3404
 
3406
3405
  # Build hash map for O(1) mass shift lookup
3407
3406
  mass_shift_map = {} # rounded_delta -> [(likelihood, adduct1, adduct2), ...]
@@ -86,8 +86,6 @@ def align(self, **kwargs):
86
86
  self.logger.error(f"Unknown alignment algorithm '{algorithm}'")
87
87
  return
88
88
 
89
- self.logger.success("Alignment completed.")
90
-
91
89
  # Reset consensus data structures after alignment since RT changes invalidate consensus
92
90
  consensus_reset_count = 0
93
91
  if not self.consensus_df.is_empty():
@@ -681,16 +679,15 @@ def _align_pose_clustering(study_obj, params):
681
679
  params_oms.setValue("pairfinder:distance_RT:exponent", 2.0)
682
680
 
683
681
  aligner = oms.MapAlignmentAlgorithmPoseClustering()
684
- study_obj.logger.info("Starting alignment with PoseClustering")
682
+ study_obj.logger.info(
683
+ f"Align RTs with Pose clustering: rt_tol={params.get('rt_tol')}",
684
+ )
685
685
 
686
686
  # Set ref_index to feature map index with largest number of features
687
687
  ref_index = [
688
688
  i[0] for i in sorted(enumerate([fm.size() for fm in fmaps]), key=lambda x: x[1])
689
689
  ][-1]
690
- study_obj.logger.debug(
691
- f"Reference map is {study_obj.samples_df.row(ref_index, named=True)['sample_name']}",
692
- )
693
-
690
+
694
691
  aligner.setParameters(params_oms)
695
692
  aligner.setReference(fmaps[ref_index])
696
693
  study_obj.logger.debug(f"Parameters for alignment: {params}")
@@ -836,6 +833,12 @@ def _align_pose_clustering(study_obj, params):
836
833
  # Clean up temporary feature maps to release memory
837
834
  del fmaps
838
835
  study_obj.logger.debug("Temporary feature maps deleted to release memory")
836
+
837
+ # Resolve reference sample UID from the reference index
838
+ ref_sample_uid = sample_uid_lookup.get(ref_index)
839
+ study_obj.logger.success(
840
+ f"Alignment completed. Reference sample UID {ref_sample_uid}.",
841
+ )
839
842
 
840
843
 
841
844
  def _align_kd_algorithm(study_obj, params):
@@ -879,7 +882,7 @@ def _align_kd_algorithm(study_obj, params):
879
882
  _raw_mp = None
880
883
  max_points = int(_raw_mp) if _raw_mp is not None else 1000
881
884
  study_obj.logger.info(
882
- f"KD align: rt_tol={params.get('rt_tol')}, max_points={max_points}",
885
+ f"Align RTs with KD-Tree: rt_tol={params.get('rt_tol')}, max_points={max_points}",
883
886
  )
884
887
 
885
888
  # Work directly with features_df instead of feature maps
@@ -1092,7 +1095,7 @@ def _align_kd_algorithm(study_obj, params):
1092
1095
  )
1093
1096
 
1094
1097
  study_obj.logger.success(
1095
- f"Alignment completed. Reference sample UID {ref_sample_uid} (index {ref_index}).",
1098
+ f"Alignment completed. Reference sample UID {ref_sample_uid}.",
1096
1099
  )
1097
1100
 
1098
1101
 
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.10
3
+ Version: 0.5.12
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,5 +1,5 @@
1
1
  masster/__init__.py,sha256=ueZ224WPNRRjQEYTaQUol818nwQgJwB93HbEfmtPRmg,1041
2
- masster/_version.py,sha256=ykzsX9zBanG6fFefFoIDPED_UySJmLofW0r3TROBhFY,257
2
+ masster/_version.py,sha256=NpsdpZszb6deAwFjiRivIjfQHvakNcnvE_OoEMce2HQ,257
3
3
  masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
4
4
  masster/logger.py,sha256=XT2gUcUIct8LWzTp9n484g5MaB89toT76CGA41oBvfA,18375
5
5
  masster/spectrum.py,sha256=TWIgDcl0lveG40cLVZTWGp8-FxMolu-P8EjZyRBtXL4,49850
@@ -25,7 +25,7 @@ masster/sample/helpers.py,sha256=Mt9LX-Dy1Xro1a_Sy6nxQzCkP_-q7nK4xVnNm44v7UA,438
25
25
  masster/sample/lib.py,sha256=E-j9c3Wd8f9a-H8xj7CAOwlA8KcyXPoFyYm3c8r7LtI,33755
26
26
  masster/sample/load.py,sha256=swjRBCoFGni9iPztHIKPVB5ru_xDMVryB_inPXdujTw,51819
27
27
  masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,4456
28
- masster/sample/plot.py,sha256=Cf_kuUiZnVHSlZfJQbV8Wtmdw1PPG5D3g1UbLobaXMs,96483
28
+ masster/sample/plot.py,sha256=5qn2Cpl363f3hW1ZeI4BZV9_36VLx39PKQMrebJhfp4,104864
29
29
  masster/sample/processing.py,sha256=qk-6_v424nwfaoVmdbHj-_lJiW7OkWS7SuQzQWNAFGI,55919
30
30
  masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
31
31
  masster/sample/sample.py,sha256=pw4fIE5gecdupZOOWFUiRCs0x-3qa3Nv7V_UdJ-CAsc,22202
@@ -40,15 +40,15 @@ masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2At
40
40
  masster/sample/defaults/sample_def.py,sha256=keoXyMyrm_iLgbYqfIbqCpJ3XHBVlNwCNmb5iMQL0iY,14579
41
41
  masster/study/__init__.py,sha256=55axdFuqRX4aXtJ8ocnhcLB32fNtmmJpCi58moO0r4g,237
42
42
  masster/study/analysis.py,sha256=L-wXBnGZCLB5UUDrjIdOiMG9zdej3Tw_SftcEmmTukM,84264
43
- masster/study/export.py,sha256=joFK9jip2UM4lVAvhkdKVeUdNdM4D8uP2WE49IaVJgw,60172
44
- masster/study/h5.py,sha256=KpvV6-0RGIAjYBNa7AodbLmlGtoDUvbeC_jB2IZdYvA,96118
45
- masster/study/helpers.py,sha256=QwPyGTuRKZoimK_y1kX4Ag_0rJNB1MYoP0Q2mXEVshs,191930
43
+ masster/study/export.py,sha256=c1HJdLAM6Ply0n8f0DjMk4mXd9lOYePr60UJTBksUho,60092
44
+ masster/study/h5.py,sha256=bznE9kKEfLNo0QtbyC6a6snfnR3Zjkx5BcjBNbRVlJ8,99579
45
+ masster/study/helpers.py,sha256=fBZ6hDa_C8muqS4XWkE6KXtNQ-yEX4bkxnu34y1SZ5c,191933
46
46
  masster/study/id.py,sha256=heKU309cUsNeFxbWYvqxVIAJLrR1H0YqMgLanLx9Do4,80091
47
47
  masster/study/load.py,sha256=BMjoUDkXNI6iU2tRE2eBRzxMrvW0gRyLepqYOWaMPXU,101192
48
- masster/study/merge.py,sha256=aEZjNhrsQZxkRhyyuOUjlIN_tdA6y2VX2BAkvfPd_Sc,169300
48
+ masster/study/merge.py,sha256=eV7iaeChBFglVBXqxgAl4P207gSYeuG2WU2rPVw1_34,169178
49
49
  masster/study/parameters.py,sha256=bTvmcwX9INxzcrEAmTiFH8qeWVhwkvMTZjuP394pz5o,3279
50
50
  masster/study/plot.py,sha256=ftQAVgEYkZuKAVIlbTR5bUypF8DpMOxSXwOyYz_BsOQ,110610
51
- masster/study/processing.py,sha256=n-JbH1ZHtSE1xlyi69ZrcHMsxw7dAyodC5hnaNld2to,58537
51
+ masster/study/processing.py,sha256=5b8K4tP-Xu1-mhdf0om-m-g65Z9Uz3Dp4UBhuMLh0yU,58627
52
52
  masster/study/save.py,sha256=47AP518epJJ9TjaGGyrLKsMsyjIk8_J4ka7bmsnRtFQ,9268
53
53
  masster/study/study.py,sha256=gudugPJk3LOtZh-YsszSRCBDrBG78cexoG0CSM86EPs,38701
54
54
  masster/study/study5_schema.json,sha256=lTFePwY8bQngyBnNCP60-UP9tnZLGhFo3YtJgwHTWdo,7797
@@ -67,8 +67,8 @@ masster/wizard/README.md,sha256=mL1A3YWJZOefpJ6D0-HqGLkVRmUlOpwyVFdvJBeeoZM,1414
67
67
  masster/wizard/__init__.py,sha256=a2hcZnHASjfuw1lqZhZnvTR58rc33rRnoGAY_JfvGhI,683
68
68
  masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
69
69
  masster/wizard/wizard.py,sha256=UobIGFZtp1s_9WJlpl6DQ2-pp7flPQ6dlYZJqYE92OM,38131
70
- masster-0.5.10.dist-info/METADATA,sha256=wPI5dLDPHYjlcafoYNdUWlnUDc-bS-HjBruaVnVDxpA,45191
71
- masster-0.5.10.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
72
- masster-0.5.10.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
73
- masster-0.5.10.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
74
- masster-0.5.10.dist-info/RECORD,,
70
+ masster-0.5.12.dist-info/METADATA,sha256=Zsqci475Yv3lnTJOCRHtaCeII1RDGeO9qSyYMX2OgFA,45191
71
+ masster-0.5.12.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
72
+ masster-0.5.12.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
73
+ masster-0.5.12.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
74
+ masster-0.5.12.dist-info/RECORD,,