masster 0.5.10__tar.gz → 0.5.11__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (95) hide show
  1. {masster-0.5.10 → masster-0.5.11}/PKG-INFO +1 -1
  2. {masster-0.5.10 → masster-0.5.11}/pyproject.toml +1 -1
  3. {masster-0.5.10 → masster-0.5.11}/src/masster/_version.py +1 -1
  4. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/plot.py +238 -69
  5. {masster-0.5.10 → masster-0.5.11}/src/masster/study/export.py +4 -6
  6. {masster-0.5.10 → masster-0.5.11}/src/masster/study/h5.py +34 -1
  7. {masster-0.5.10 → masster-0.5.11}/src/masster/study/helpers.py +1 -1
  8. {masster-0.5.10 → masster-0.5.11}/src/masster/study/merge.py +14 -10
  9. {masster-0.5.10 → masster-0.5.11}/src/masster/study/processing.py +12 -9
  10. {masster-0.5.10 → masster-0.5.11}/uv.lock +1 -1
  11. {masster-0.5.10 → masster-0.5.11}/.github/workflows/publish.yml +0 -0
  12. {masster-0.5.10 → masster-0.5.11}/.github/workflows/security.yml +0 -0
  13. {masster-0.5.10 → masster-0.5.11}/.github/workflows/test.yml +0 -0
  14. {masster-0.5.10 → masster-0.5.11}/.gitignore +0 -0
  15. {masster-0.5.10 → masster-0.5.11}/.pre-commit-config.yaml +0 -0
  16. {masster-0.5.10 → masster-0.5.11}/LICENSE +0 -0
  17. {masster-0.5.10 → masster-0.5.11}/Makefile +0 -0
  18. {masster-0.5.10 → masster-0.5.11}/README.md +0 -0
  19. {masster-0.5.10 → masster-0.5.11}/TESTING.md +0 -0
  20. {masster-0.5.10 → masster-0.5.11}/demo/example_batch_process.py +0 -0
  21. {masster-0.5.10 → masster-0.5.11}/demo/example_sample_process.py +0 -0
  22. {masster-0.5.10 → masster-0.5.11}/src/masster/__init__.py +0 -0
  23. {masster-0.5.10 → masster-0.5.11}/src/masster/chromatogram.py +0 -0
  24. {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
  25. {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
  26. {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
  27. {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
  28. {masster-0.5.10 → masster-0.5.11}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
  29. {masster-0.5.10 → masster-0.5.11}/src/masster/data/libs/aa.csv +0 -0
  30. {masster-0.5.10 → masster-0.5.11}/src/masster/data/libs/ccm.csv +0 -0
  31. {masster-0.5.10 → masster-0.5.11}/src/masster/data/libs/hilic.csv +0 -0
  32. {masster-0.5.10 → masster-0.5.11}/src/masster/data/libs/urine.csv +0 -0
  33. {masster-0.5.10 → masster-0.5.11}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  34. {masster-0.5.10 → masster-0.5.11}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  35. {masster-0.5.10 → masster-0.5.11}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  36. {masster-0.5.10 → masster-0.5.11}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  37. {masster-0.5.10 → masster-0.5.11}/src/masster/lib/__init__.py +0 -0
  38. {masster-0.5.10 → masster-0.5.11}/src/masster/lib/lib.py +0 -0
  39. {masster-0.5.10 → masster-0.5.11}/src/masster/logger.py +0 -0
  40. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/__init__.py +0 -0
  41. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/adducts.py +0 -0
  42. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/__init__.py +0 -0
  43. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/find_adducts_def.py +0 -0
  44. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/find_features_def.py +0 -0
  45. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/find_ms2_def.py +0 -0
  46. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
  47. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/defaults/sample_def.py +0 -0
  48. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/h5.py +0 -0
  49. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/helpers.py +0 -0
  50. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/lib.py +0 -0
  51. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/load.py +0 -0
  52. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/parameters.py +0 -0
  53. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/processing.py +0 -0
  54. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/quant.py +0 -0
  55. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/sample.py +0 -0
  56. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/sample5_schema.json +0 -0
  57. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/save.py +0 -0
  58. {masster-0.5.10 → masster-0.5.11}/src/masster/sample/sciex.py +0 -0
  59. {masster-0.5.10 → masster-0.5.11}/src/masster/spectrum.py +0 -0
  60. {masster-0.5.10 → masster-0.5.11}/src/masster/study/__init__.py +0 -0
  61. {masster-0.5.10 → masster-0.5.11}/src/masster/study/analysis.py +0 -0
  62. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/__init__.py +0 -0
  63. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/align_def.py +0 -0
  64. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/export_def.py +0 -0
  65. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/fill_def.py +0 -0
  66. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/find_consensus_def.py +0 -0
  67. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/find_ms2_def.py +0 -0
  68. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/identify_def.py +0 -0
  69. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
  70. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/integrate_def.py +0 -0
  71. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/merge_def.py +0 -0
  72. {masster-0.5.10 → masster-0.5.11}/src/masster/study/defaults/study_def.py +0 -0
  73. {masster-0.5.10 → masster-0.5.11}/src/masster/study/id.py +0 -0
  74. {masster-0.5.10 → masster-0.5.11}/src/masster/study/load.py +0 -0
  75. {masster-0.5.10 → masster-0.5.11}/src/masster/study/parameters.py +0 -0
  76. {masster-0.5.10 → masster-0.5.11}/src/masster/study/plot.py +0 -0
  77. {masster-0.5.10 → masster-0.5.11}/src/masster/study/save.py +0 -0
  78. {masster-0.5.10 → masster-0.5.11}/src/masster/study/study.py +0 -0
  79. {masster-0.5.10 → masster-0.5.11}/src/masster/study/study5_schema.json +0 -0
  80. {masster-0.5.10 → masster-0.5.11}/src/masster/wizard/README.md +0 -0
  81. {masster-0.5.10 → masster-0.5.11}/src/masster/wizard/__init__.py +0 -0
  82. {masster-0.5.10 → masster-0.5.11}/src/masster/wizard/example.py +0 -0
  83. {masster-0.5.10 → masster-0.5.11}/src/masster/wizard/wizard.py +0 -0
  84. {masster-0.5.10 → masster-0.5.11}/tests/conftest.py +0 -0
  85. {masster-0.5.10 → masster-0.5.11}/tests/test_chromatogram.py +0 -0
  86. {masster-0.5.10 → masster-0.5.11}/tests/test_defaults.py +0 -0
  87. {masster-0.5.10 → masster-0.5.11}/tests/test_imports.py +0 -0
  88. {masster-0.5.10 → masster-0.5.11}/tests/test_integration.py +0 -0
  89. {masster-0.5.10 → masster-0.5.11}/tests/test_logger.py +0 -0
  90. {masster-0.5.10 → masster-0.5.11}/tests/test_parameters.py +0 -0
  91. {masster-0.5.10 → masster-0.5.11}/tests/test_sample.py +0 -0
  92. {masster-0.5.10 → masster-0.5.11}/tests/test_spectrum.py +0 -0
  93. {masster-0.5.10 → masster-0.5.11}/tests/test_study.py +0 -0
  94. {masster-0.5.10 → masster-0.5.11}/tests/test_version.py +0 -0
  95. {masster-0.5.10 → masster-0.5.11}/tox.ini +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.5.10
3
+ Version: 0.5.11
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,7 +1,7 @@
1
1
 
2
2
  [project]
3
3
  name = "masster"
4
- version = "0.5.10"
4
+ version = "0.5.11"
5
5
  description = "Mass spectrometry data analysis package"
6
6
  authors = [
7
7
  { name = "Zamboni Lab" }
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.5.10"
4
+ __version__ = "0.5.11"
5
5
 
6
6
 
7
7
  def get_version():
@@ -1041,7 +1041,9 @@ def plot_2d(
1041
1041
  height=600,
1042
1042
  width=750,
1043
1043
  mz_range=None,
1044
- rt_range=None
1044
+ rt_range=None,
1045
+ legend=None,
1046
+ colorby=None
1045
1047
  ):
1046
1048
  """
1047
1049
  Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
@@ -1087,6 +1089,13 @@ def plot_2d(
1087
1089
  Maximum pixel size for dynamic rasterization when using dynspread.
1088
1090
  raster_threshold (float, default 0.8):
1089
1091
  Threshold used for the dynspread process in dynamic rasterization.
1092
+ legend (str, optional):
1093
+ Legend position for categorical feature coloring ("top_right", "bottom_left", etc.) or None.
1094
+ Only applies when colorby is not None and contains categorical data.
1095
+ colorby (str, optional):
1096
+ Feature property to use for coloring. If None (default), uses current green/red scheme
1097
+ for features with/without MS2 data. If specified and contains categorical data, applies
1098
+ categorical coloring with legend support (similar to plot_2d_oracle).
1090
1099
  Behavior:
1091
1100
  - Checks for a loaded mzML file by verifying that self.file_obj is not None.
1092
1101
  - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
@@ -1203,6 +1212,12 @@ def plot_2d(
1203
1212
 
1204
1213
  color_1 = "forestgreen"
1205
1214
  color_2 = "darkorange"
1215
+
1216
+ # Handle colorby parameter for feature coloring
1217
+ use_categorical_coloring = False
1218
+ feature_colors = {}
1219
+ categorical_groups = []
1220
+
1206
1221
  if filename is not None:
1207
1222
  dyn = False
1208
1223
  if not filename.endswith(".html"):
@@ -1270,74 +1285,203 @@ def plot_2d(
1270
1285
  feats = feats[(feats["rt"] >= rt_range[0]) & (feats["rt"] <= rt_range[1])]
1271
1286
  # keep only iso==0, i.e. the main
1272
1287
  feats = feats[feats["iso"] == 0]
1273
- # find features with ms2_scans not None and iso==0
1274
- features_df = feats[feats["ms2_scans"].notnull()]
1275
- # Create feature points with proper sizing method
1276
- feature_hover_1 = HoverTool(
1277
- tooltips=[
1278
- ("rt", "@rt"),
1279
- ("m/z", "@mz{0.0000}"),
1280
- ("feature_uid", "@feature_uid"),
1281
- ("inty", "@inty"),
1282
- ("iso", "@iso"),
1283
- ("adduct", "@adduct"),
1284
- ("chrom_coherence", "@chrom_coherence"),
1285
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1286
- ],
1287
- )
1288
- feature_points_1 = hv.Points(
1289
- features_df,
1290
- kdims=["rt", "mz"],
1291
- vdims=[
1292
- "feature_uid",
1293
- "inty",
1294
- "iso",
1295
- "adduct",
1296
- "ms2_scans",
1297
- "chrom_coherence",
1298
- "chrom_prominence_scaled",
1299
- ],
1300
- label="Features with MS2 data",
1301
- ).options(
1302
- color=color_1,
1303
- marker=marker_type,
1304
- size=size_1,
1305
- tools=[feature_hover_1],
1306
- hooks=hooks,
1307
- )
1308
- # find features without MS2 data
1309
- features_df = feats[feats["ms2_scans"].isnull()]
1310
- feature_hover_2 = HoverTool(
1311
- tooltips=[
1312
- ("rt", "@rt"),
1313
- ("m/z", "@mz{0.0000}"),
1314
- ("feature_uid", "@feature_uid"),
1315
- ("inty", "@inty"),
1316
- ("iso", "@iso"),
1317
- ("adduct", "@adduct"),
1318
- ("chrom_coherence", "@chrom_coherence"),
1319
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1320
- ],
1321
- )
1322
- feature_points_2 = hv.Points(
1323
- features_df,
1324
- kdims=["rt", "mz"],
1325
- vdims=[
1326
- "feature_uid",
1327
- "inty",
1328
- "iso",
1329
- "adduct",
1330
- "chrom_coherence",
1331
- "chrom_prominence_scaled",
1332
- ],
1333
- label="Features without MS2 data",
1334
- ).options(
1335
- color="red",
1336
- marker=marker_type,
1337
- size=size_2,
1338
- tools=[feature_hover_2],
1339
- hooks=hooks,
1340
- )
1288
+
1289
+ # Handle colorby parameter
1290
+ if colorby is not None and colorby in feats.columns:
1291
+ # Check if colorby data is categorical (string-like)
1292
+ colorby_values = feats[colorby].dropna()
1293
+ is_categorical = (
1294
+ feats[colorby].dtype in ["object", "string", "category"] or
1295
+ (len(colorby_values) > 0 and isinstance(colorby_values.iloc[0], str))
1296
+ )
1297
+
1298
+ if is_categorical:
1299
+ use_categorical_coloring = True
1300
+ # Get unique categories, sorted
1301
+ categorical_groups = sorted(feats[colorby].dropna().unique())
1302
+
1303
+ # Set up colors for categorical data using matplotlib colormap
1304
+ from matplotlib.colors import to_hex
1305
+ try:
1306
+ from matplotlib.cm import get_cmap
1307
+ colormap_func = get_cmap(cmap if cmap != 'iridescent' else 'tab20')
1308
+ feature_colors = {}
1309
+ for i, group in enumerate(categorical_groups):
1310
+ if len(categorical_groups) <= 20:
1311
+ # Use qualitative colors for small number of categories
1312
+ color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
1313
+ else:
1314
+ # Use continuous colormap for many categories
1315
+ color_val = colormap_func(i / max(1, len(categorical_groups) - 1))
1316
+ feature_colors[group] = to_hex(color_val)
1317
+ except Exception as e:
1318
+ self.logger.warning(f"Could not set up categorical coloring: {e}, using default colors")
1319
+ use_categorical_coloring = False
1320
+
1321
+ if use_categorical_coloring and colorby is not None:
1322
+ # Create separate feature points for each category
1323
+ for i, group in enumerate(categorical_groups):
1324
+ group_features = feats[feats[colorby] == group]
1325
+ if len(group_features) == 0:
1326
+ continue
1327
+
1328
+ # Split by MS2 status
1329
+ group_with_ms2 = group_features[group_features["ms2_scans"].notnull()]
1330
+ group_without_ms2 = group_features[group_features["ms2_scans"].isnull()]
1331
+
1332
+ group_color = feature_colors.get(group, color_1)
1333
+
1334
+ if len(group_with_ms2) > 0:
1335
+ feature_hover = HoverTool(
1336
+ tooltips=[
1337
+ ("rt", "@rt"),
1338
+ ("m/z", "@mz{0.0000}"),
1339
+ ("feature_uid", "@feature_uid"),
1340
+ ("inty", "@inty"),
1341
+ ("iso", "@iso"),
1342
+ ("adduct", "@adduct"),
1343
+ ("chrom_coherence", "@chrom_coherence"),
1344
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1345
+ (colorby, f"@{colorby}"),
1346
+ ],
1347
+ )
1348
+ group_points_ms2 = hv.Points(
1349
+ group_with_ms2,
1350
+ kdims=["rt", "mz"],
1351
+ vdims=[
1352
+ "feature_uid",
1353
+ "inty",
1354
+ "iso",
1355
+ "adduct",
1356
+ "ms2_scans",
1357
+ "chrom_coherence",
1358
+ "chrom_prominence_scaled",
1359
+ colorby,
1360
+ ],
1361
+ label=f"{group} (MS2)",
1362
+ ).options(
1363
+ color=group_color,
1364
+ marker=marker_type,
1365
+ size=size_1,
1366
+ tools=[feature_hover],
1367
+ hooks=hooks,
1368
+ )
1369
+ if feature_points_1 is None:
1370
+ feature_points_1 = group_points_ms2
1371
+ else:
1372
+ feature_points_1 = feature_points_1 * group_points_ms2
1373
+
1374
+ if len(group_without_ms2) > 0:
1375
+ feature_hover = HoverTool(
1376
+ tooltips=[
1377
+ ("rt", "@rt"),
1378
+ ("m/z", "@mz{0.0000}"),
1379
+ ("feature_uid", "@feature_uid"),
1380
+ ("inty", "@inty"),
1381
+ ("iso", "@iso"),
1382
+ ("adduct", "@adduct"),
1383
+ ("chrom_coherence", "@chrom_coherence"),
1384
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1385
+ (colorby, f"@{colorby}"),
1386
+ ],
1387
+ )
1388
+ group_points_no_ms2 = hv.Points(
1389
+ group_without_ms2,
1390
+ kdims=["rt", "mz"],
1391
+ vdims=[
1392
+ "feature_uid",
1393
+ "inty",
1394
+ "iso",
1395
+ "adduct",
1396
+ "chrom_coherence",
1397
+ "chrom_prominence_scaled",
1398
+ colorby,
1399
+ ],
1400
+ label=f"{group} (no MS2)",
1401
+ ).options(
1402
+ color=group_color,
1403
+ marker=marker_type,
1404
+ size=size_2,
1405
+ tools=[feature_hover],
1406
+ hooks=hooks,
1407
+ )
1408
+ if feature_points_2 is None:
1409
+ feature_points_2 = group_points_no_ms2
1410
+ else:
1411
+ feature_points_2 = feature_points_2 * group_points_no_ms2
1412
+ else:
1413
+ # Use original green/red coloring scheme for MS2 presence
1414
+ # find features with ms2_scans not None and iso==0
1415
+ features_df = feats[feats["ms2_scans"].notnull()]
1416
+ # Create feature points with proper sizing method
1417
+ feature_hover_1 = HoverTool(
1418
+ tooltips=[
1419
+ ("rt", "@rt"),
1420
+ ("m/z", "@mz{0.0000}"),
1421
+ ("feature_uid", "@feature_uid"),
1422
+ ("inty", "@inty"),
1423
+ ("iso", "@iso"),
1424
+ ("adduct", "@adduct"),
1425
+ ("chrom_coherence", "@chrom_coherence"),
1426
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1427
+ ],
1428
+ )
1429
+ if len(features_df) > 0:
1430
+ feature_points_1 = hv.Points(
1431
+ features_df,
1432
+ kdims=["rt", "mz"],
1433
+ vdims=[
1434
+ "feature_uid",
1435
+ "inty",
1436
+ "iso",
1437
+ "adduct",
1438
+ "ms2_scans",
1439
+ "chrom_coherence",
1440
+ "chrom_prominence_scaled",
1441
+ ],
1442
+ label="Features with MS2 data",
1443
+ ).options(
1444
+ color=color_1,
1445
+ marker=marker_type,
1446
+ size=size_1,
1447
+ tools=[feature_hover_1],
1448
+ hooks=hooks,
1449
+ )
1450
+
1451
+ # find features without MS2 data
1452
+ features_df = feats[feats["ms2_scans"].isnull()]
1453
+ feature_hover_2 = HoverTool(
1454
+ tooltips=[
1455
+ ("rt", "@rt"),
1456
+ ("m/z", "@mz{0.0000}"),
1457
+ ("feature_uid", "@feature_uid"),
1458
+ ("inty", "@inty"),
1459
+ ("iso", "@iso"),
1460
+ ("adduct", "@adduct"),
1461
+ ("chrom_coherence", "@chrom_coherence"),
1462
+ ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1463
+ ],
1464
+ )
1465
+ if len(features_df) > 0:
1466
+ feature_points_2 = hv.Points(
1467
+ features_df,
1468
+ kdims=["rt", "mz"],
1469
+ vdims=[
1470
+ "feature_uid",
1471
+ "inty",
1472
+ "iso",
1473
+ "adduct",
1474
+ "chrom_coherence",
1475
+ "chrom_prominence_scaled",
1476
+ ],
1477
+ label="Features without MS2 data",
1478
+ ).options(
1479
+ color="red",
1480
+ marker=marker_type,
1481
+ size=size_2,
1482
+ tools=[feature_hover_2],
1483
+ hooks=hooks,
1484
+ )
1341
1485
 
1342
1486
  if show_isotopes:
1343
1487
  # Use proper Polars filter syntax to avoid boolean indexing issues
@@ -1451,6 +1595,31 @@ def plot_2d(
1451
1595
  if title is not None:
1452
1596
  overlay = overlay.opts(title=title)
1453
1597
 
1598
+ # Handle legend positioning for categorical coloring
1599
+ if legend is not None and use_categorical_coloring and len(categorical_groups) > 1:
1600
+ # Map legend position parameter to HoloViews legend position
1601
+ legend_position_map = {
1602
+ "top_right": "top_right",
1603
+ "top_left": "top_left",
1604
+ "bottom_right": "bottom_right",
1605
+ "bottom_left": "bottom_left",
1606
+ "right": "right",
1607
+ "left": "left",
1608
+ "top": "top",
1609
+ "bottom": "bottom"
1610
+ }
1611
+
1612
+ hv_legend_pos = legend_position_map.get(legend, "bottom_right")
1613
+
1614
+ # Apply legend configuration to the overlay
1615
+ overlay = overlay.opts(
1616
+ legend_position=hv_legend_pos,
1617
+ legend_opts={'title': '', 'padding': 2, 'spacing': 2}
1618
+ )
1619
+ elif legend is None and use_categorical_coloring:
1620
+ # Explicitly hide legend when legend=None but categorical coloring is used
1621
+ overlay = overlay.opts(show_legend=False)
1622
+
1454
1623
  # Handle slider functionality
1455
1624
  if use_slider_sizing:
1456
1625
  # For slider functionality, we need to work with the feature points directly
@@ -496,7 +496,7 @@ def export_mgf(self, **kwargs):
496
496
  # Write END IONS
497
497
  f.write("END IONS\n\n")
498
498
 
499
- self.logger.info(f"Exported {len(mgf_data)} spectra to {filename}")
499
+ self.logger.success(f"Exported {len(mgf_data)} spectra to {filename}")
500
500
 
501
501
 
502
502
  def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs) -> None:
@@ -1183,7 +1183,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
1183
1183
  for line in mgf_lines:
1184
1184
  f.write(line + "\n")
1185
1185
 
1186
- self.logger.info(f"Exported mzTab-M to {filename}")
1186
+ self.logger.success(f"Exported mzTab-M to {filename}")
1187
1187
 
1188
1188
 
1189
1189
  def export_xlsx(self, filename: str | None = None) -> None:
@@ -1311,7 +1311,7 @@ def export_xlsx(self, filename: str | None = None) -> None:
1311
1311
  f"Written worksheet '{sheet_name}' with shape {data.shape}",
1312
1312
  )
1313
1313
 
1314
- self.logger.info(f"Study exported to {filename}")
1314
+ self.logger.success(f"Study exported to {filename}")
1315
1315
 
1316
1316
  except Exception as e:
1317
1317
  self.logger.error(f"Error writing Excel file: {e}")
@@ -1424,8 +1424,6 @@ def export_parquet(self, filename: str | None = None) -> None:
1424
1424
 
1425
1425
  # Report results
1426
1426
  if exported_files:
1427
- self.logger.info(f"Study exported to {len(exported_files)} Parquet files:")
1428
- for file_path in exported_files:
1429
- self.logger.info(f" - {file_path}")
1427
+ self.logger.success(f"Study exported to {len(exported_files)} Parquet files.")
1430
1428
  else:
1431
1429
  self.logger.error("No Parquet files were created - no data available to export")
@@ -834,6 +834,19 @@ def _create_dataframe_with_objects(data: dict, object_columns: list) -> pl.DataF
834
834
  object_data = {k: v for k, v in data.items() if k in object_columns}
835
835
  regular_data = {k: v for k, v in data.items() if k not in object_columns}
836
836
 
837
+ # Final check: ensure no numpy object arrays in regular_data
838
+ problematic_cols = []
839
+ for k, v in regular_data.items():
840
+ if hasattr(v, 'dtype') and str(v.dtype) == 'object':
841
+ problematic_cols.append(k)
842
+
843
+ if problematic_cols:
844
+ # Move these to object_data
845
+ for col in problematic_cols:
846
+ object_data[col] = _reconstruct_object_column(regular_data[col], col)
847
+ del regular_data[col]
848
+ object_columns.append(col)
849
+
837
850
  # Determine expected length from regular data or first object column
838
851
  expected_length = None
839
852
  if regular_data:
@@ -1185,9 +1198,29 @@ def _load_dataframe_from_group(
1185
1198
  logger.debug(
1186
1199
  f"Object column '{col}': length={len(data[col]) if data[col] is not None else 'None'}",
1187
1200
  )
1201
+
1202
+ # Debug: check for problematic data types in all columns before DataFrame creation
1203
+ for col, values in data.items():
1204
+ if hasattr(values, 'dtype') and str(values.dtype) == 'object':
1205
+ logger.warning(f"Column '{col}' has numpy object dtype but is not in object_columns: {object_columns}")
1206
+ if col not in object_columns:
1207
+ object_columns.append(col)
1208
+
1188
1209
  df = _create_dataframe_with_objects(data, object_columns)
1189
1210
  else:
1190
- df = pl.DataFrame(data)
1211
+ # Debug: check for problematic data types when no object columns are expected
1212
+ for col, values in data.items():
1213
+ if hasattr(values, 'dtype') and str(values.dtype) == 'object':
1214
+ logger.warning(f"Column '{col}' has numpy object dtype but no object_columns specified!")
1215
+ # Treat as object column
1216
+ if object_columns is None:
1217
+ object_columns = []
1218
+ object_columns.append(col)
1219
+
1220
+ if object_columns:
1221
+ df = _create_dataframe_with_objects(data, object_columns)
1222
+ else:
1223
+ df = pl.DataFrame(data)
1191
1224
 
1192
1225
  # Clean null values and apply schema
1193
1226
  df = _clean_string_nulls(df)
@@ -2663,7 +2663,7 @@ def features_filter(
2663
2663
  removed_count = initial_count - final_count
2664
2664
 
2665
2665
  self.logger.info(
2666
- f"Filtered features: kept {final_count:,}, removed {removed_count:,}"
2666
+ f"Filtered features. Kept: {final_count:,}. Removed: {removed_count:,}."
2667
2667
  )
2668
2668
 
2669
2669
 
@@ -427,9 +427,13 @@ def merge(study, **kwargs) -> None:
427
427
  # Feature maps will be generated on-demand within each merge method
428
428
 
429
429
  study.logger.info(
430
- f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
431
- )
432
-
430
+ f"Merging samples using {params.method}, min_samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
431
+ )
432
+ if "chunked" in params.method:
433
+ study.logger.info(
434
+ f"threads={params.threads}, chunk_size={params.chunk_size}, dechunking='{params.dechunking}'"
435
+ )
436
+
433
437
  # Initialize
434
438
  study.consensus_df = pl.DataFrame()
435
439
  study.consensus_ms2 = pl.DataFrame()
@@ -751,7 +755,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
751
755
 
752
756
  else:
753
757
  # Parallel processing
754
- study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
758
+ #study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
755
759
 
756
760
  # Prepare chunk data for parallel processing using features_df slices
757
761
  chunk_data_list = []
@@ -812,7 +816,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
812
816
  serialized_chunk_results.append((chunk_start_idx, consensus_features))
813
817
  completed_chunks += 1
814
818
  n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
815
- study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
819
+ study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
816
820
  except Exception as exc:
817
821
  # Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
818
822
  if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
@@ -846,7 +850,7 @@ def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
846
850
  serialized_chunk_results.append((chunk_start_idx, consensus_features))
847
851
  completed_chunks += 1
848
852
  n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
849
- study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
853
+ study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
850
854
  except Exception as exc:
851
855
  study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
852
856
  raise exc
@@ -926,7 +930,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
926
930
 
927
931
  else:
928
932
  # Parallel processing
929
- study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
933
+ #study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
930
934
 
931
935
  # Prepare chunk data for parallel processing using features_df slices
932
936
  chunk_data_list = []
@@ -987,7 +991,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
987
991
  serialized_chunk_results.append((chunk_start_idx, consensus_features))
988
992
  completed_chunks += 1
989
993
  n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
990
- study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
994
+ study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
991
995
  except Exception as exc:
992
996
  # Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
993
997
  if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
@@ -1021,7 +1025,7 @@ def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cac
1021
1025
  serialized_chunk_results.append((chunk_start_idx, consensus_features))
1022
1026
  completed_chunks += 1
1023
1027
  n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
1024
- study.logger.success(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
1028
+ study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
1025
1029
  except Exception as exc:
1026
1030
  study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
1027
1031
  raise exc
@@ -2259,7 +2263,7 @@ def _perform_adduct_grouping(study, rt_tol, mz_tol):
2259
2263
  )
2260
2264
 
2261
2265
  # Use optimized adduct grouping
2262
- study.logger.info(f"About to call adduct grouping for {len(consensus_data)} consensus features")
2266
+ #study.logger.info(f"About to call adduct grouping for {len(consensus_data)} consensus features")
2263
2267
  adduct_group_list, adduct_of_list = __merge_adduct_grouping(
2264
2268
  study, consensus_data, rt_tol/3, mz_tol
2265
2269
  )
@@ -86,8 +86,6 @@ def align(self, **kwargs):
86
86
  self.logger.error(f"Unknown alignment algorithm '{algorithm}'")
87
87
  return
88
88
 
89
- self.logger.success("Alignment completed.")
90
-
91
89
  # Reset consensus data structures after alignment since RT changes invalidate consensus
92
90
  consensus_reset_count = 0
93
91
  if not self.consensus_df.is_empty():
@@ -681,16 +679,15 @@ def _align_pose_clustering(study_obj, params):
681
679
  params_oms.setValue("pairfinder:distance_RT:exponent", 2.0)
682
680
 
683
681
  aligner = oms.MapAlignmentAlgorithmPoseClustering()
684
- study_obj.logger.info("Starting alignment with PoseClustering")
682
+ study_obj.logger.info(
683
+ f"Align RTs with Pose clustering: rt_tol={params.get('rt_tol')}",
684
+ )
685
685
 
686
686
  # Set ref_index to feature map index with largest number of features
687
687
  ref_index = [
688
688
  i[0] for i in sorted(enumerate([fm.size() for fm in fmaps]), key=lambda x: x[1])
689
689
  ][-1]
690
- study_obj.logger.debug(
691
- f"Reference map is {study_obj.samples_df.row(ref_index, named=True)['sample_name']}",
692
- )
693
-
690
+
694
691
  aligner.setParameters(params_oms)
695
692
  aligner.setReference(fmaps[ref_index])
696
693
  study_obj.logger.debug(f"Parameters for alignment: {params}")
@@ -836,6 +833,12 @@ def _align_pose_clustering(study_obj, params):
836
833
  # Clean up temporary feature maps to release memory
837
834
  del fmaps
838
835
  study_obj.logger.debug("Temporary feature maps deleted to release memory")
836
+
837
+ # Resolve reference sample UID from the reference index
838
+ ref_sample_uid = sample_uid_lookup.get(ref_index)
839
+ study_obj.logger.success(
840
+ f"Alignment completed. Reference sample UID {ref_sample_uid}.",
841
+ )
839
842
 
840
843
 
841
844
  def _align_kd_algorithm(study_obj, params):
@@ -879,7 +882,7 @@ def _align_kd_algorithm(study_obj, params):
879
882
  _raw_mp = None
880
883
  max_points = int(_raw_mp) if _raw_mp is not None else 1000
881
884
  study_obj.logger.info(
882
- f"KD align: rt_tol={params.get('rt_tol')}, max_points={max_points}",
885
+ f"Align RTs with KD-Tree: rt_tol={params.get('rt_tol')}, max_points={max_points}",
883
886
  )
884
887
 
885
888
  # Work directly with features_df instead of feature maps
@@ -1092,7 +1095,7 @@ def _align_kd_algorithm(study_obj, params):
1092
1095
  )
1093
1096
 
1094
1097
  study_obj.logger.success(
1095
- f"Alignment completed. Reference sample UID {ref_sample_uid} (index {ref_index}).",
1098
+ f"Alignment completed. Reference sample UID {ref_sample_uid}.",
1096
1099
  )
1097
1100
 
1098
1101
 
@@ -1420,7 +1420,7 @@ wheels = [
1420
1420
 
1421
1421
  [[package]]
1422
1422
  name = "masster"
1423
- version = "0.5.10"
1423
+ version = "0.5.11"
1424
1424
  source = { editable = "." }
1425
1425
  dependencies = [
1426
1426
  { name = "alpharaw" },
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes
File without changes