masster 0.5.28__py3-none-any.whl → 0.6.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/plot.py CHANGED
@@ -1115,6 +1115,7 @@ def plot_2d(
1115
1115
  filename=None,
1116
1116
  show_features=True,
1117
1117
  show_only_features_with_ms2=False,
1118
+ show_only_features_with_id=False,
1118
1119
  show_isotopes=False,
1119
1120
  show_ms2=False,
1120
1121
  show_in_browser=False,
@@ -1134,6 +1135,7 @@ def plot_2d(
1134
1135
  rt_range=None,
1135
1136
  legend=None,
1136
1137
  colorby=None,
1138
+ tooltip=None,
1137
1139
  ):
1138
1140
  """
1139
1141
  Plot a two-dimensional visualization of MS1 survey scan data with optional overlays
@@ -1151,6 +1153,9 @@ def plot_2d(
1151
1153
  show_only_features_with_ms2 (bool, default False):
1152
1154
  If True, only display features that have associated MS2 scans. When False,
1153
1155
  features without MS2 data are also shown.
1156
+ show_only_features_with_id (bool, default False):
1157
+ If True, only display features with non-null id_top_name (identified features).
1158
+ When False, all features are shown. Only applies when colorby='id'.
1154
1159
  show_isotopes (bool, default False):
1155
1160
  Whether to overlay isotope information on top of the features.
1156
1161
  show_ms2 (bool, default False):
@@ -1186,6 +1191,9 @@ def plot_2d(
1186
1191
  Feature property to use for coloring. If None (default), uses current green/red scheme
1187
1192
  for features with/without MS2 data. If specified and contains categorical data, applies
1188
1193
  categorical coloring with legend support (similar to plot_2d_oracle).
1194
+ tooltip (str, optional):
1195
+ Controls the feature hover tooltip content. Use None or "ms1" (default) to display the
1196
+ full feature details, or "id" to show only rt, m/z, feature_uid, inty, and any id_* columns.
1189
1197
  Behavior:
1190
1198
  - Checks for a loaded mzML file by verifying that self.file_obj is not None.
1191
1199
  - Converts internal MS1 data (a Polars DataFrame) to a Pandas DataFrame and filters out low-intensity
@@ -1376,8 +1384,104 @@ def plot_2d(
1376
1384
  # keep only iso==0, i.e. the main
1377
1385
  feats = feats[feats["iso"] == 0]
1378
1386
 
1387
+ tooltip_mode = str(tooltip).lower() if tooltip is not None else "ms1"
1388
+ if tooltip_mode not in {"ms1", "id"}:
1389
+ tooltip_mode = "ms1"
1390
+
1391
+ id_columns = [col for col in feats.columns if isinstance(col, str) and col.startswith("id_")]
1392
+
1393
+ def build_feature_tooltips(*, include_iso=True, include_iso_of=False, include_colorby=None):
1394
+ base_tooltips = [
1395
+ ("rt", "@rt"),
1396
+ ("m/z", "@mz{0.0000}"),
1397
+ ("feature_uid", "@feature_uid"),
1398
+ ("inty", "@inty"),
1399
+ ]
1400
+
1401
+ if tooltip_mode == "id":
1402
+ base_tooltips.extend((col, f"@{col}") for col in id_columns)
1403
+ return base_tooltips
1404
+
1405
+ if include_iso:
1406
+ base_tooltips.append(("iso", "@iso"))
1407
+ if include_iso_of:
1408
+ base_tooltips.append(("iso_of", "@iso_of"))
1409
+ base_tooltips.append(("adduct", "@adduct"))
1410
+ base_tooltips.append(("chrom_coherence", "@chrom_coherence"))
1411
+ base_tooltips.append(("chrom_prominence_scaled", "@chrom_prominence_scaled"))
1412
+
1413
+ if include_colorby and tooltip_mode != "id":
1414
+ base_tooltips.append((include_colorby, f"@{include_colorby}"))
1415
+
1416
+ return base_tooltips
1417
+
1418
+ handled_colorby = False
1419
+ colorby_id_mode = False
1420
+
1421
+ if colorby == "id":
1422
+ if "id_top_name" not in feats.columns:
1423
+ self.logger.warning("colorby='id' requested but 'id_top_name' column is missing; using default colors")
1424
+ else:
1425
+ handled_colorby = True
1426
+ colorby_id_mode = True
1427
+ id_values = feats["id_top_name"]
1428
+ annotated_mask = id_values.notna() & (id_values.astype(str).str.strip() != "")
1429
+
1430
+ annotated_features = feats[annotated_mask].copy()
1431
+ unannotated_features = feats[~annotated_mask].copy()
1432
+
1433
+ # Apply show_only_features_with_id filter if requested
1434
+ if show_only_features_with_id:
1435
+ # Only keep annotated features, discard unannotated
1436
+ unannotated_features = unannotated_features.iloc[0:0] # Empty dataframe
1437
+
1438
+ feature_hover_annotated = HoverTool(
1439
+ tooltips=build_feature_tooltips(),
1440
+ )
1441
+ feature_hover_unannotated = HoverTool(
1442
+ tooltips=build_feature_tooltips(),
1443
+ )
1444
+
1445
+ # Select only plottable columns for vdims (exclude complex objects like Chromatogram)
1446
+ base_vdims = ["feature_uid", "inty", "iso", "adduct", "chrom_coherence", "chrom_prominence_scaled"]
1447
+ # Add id_* columns if they exist
1448
+ id_vdims = [col for col in feats.columns if isinstance(col, str) and col.startswith("id_")]
1449
+ all_vdims = base_vdims + id_vdims
1450
+
1451
+ if len(annotated_features) > 0:
1452
+ vdims_annotated = [col for col in all_vdims if col in annotated_features.columns]
1453
+ feature_points_1 = hv.Points(
1454
+ annotated_features,
1455
+ kdims=["rt", "mz"],
1456
+ vdims=vdims_annotated,
1457
+ label="Annotated features",
1458
+ ).options(
1459
+ color="#2e7d32",
1460
+ marker=marker_type,
1461
+ size=size_1,
1462
+ tools=[feature_hover_annotated],
1463
+ hooks=hooks,
1464
+ show_legend=True,
1465
+ )
1466
+
1467
+ if len(unannotated_features) > 0:
1468
+ vdims_unannotated = [col for col in all_vdims if col in unannotated_features.columns]
1469
+ feature_points_2 = hv.Points(
1470
+ unannotated_features,
1471
+ kdims=["rt", "mz"],
1472
+ vdims=vdims_unannotated,
1473
+ label="Unannotated features",
1474
+ ).options(
1475
+ color="#9e9e9e",
1476
+ marker=marker_type,
1477
+ size=size_2,
1478
+ tools=[feature_hover_unannotated],
1479
+ hooks=hooks,
1480
+ show_legend=True,
1481
+ )
1482
+
1379
1483
  # Handle colorby parameter
1380
- if colorby is not None and colorby in feats.columns:
1484
+ if (not handled_colorby) and colorby is not None and colorby in feats.columns:
1381
1485
  # Check if colorby data is categorical (string-like)
1382
1486
  colorby_values = feats[colorby].dropna()
1383
1487
  is_categorical = feats[colorby].dtype in ["object", "string", "category"] or (
@@ -1424,17 +1528,7 @@ def plot_2d(
1424
1528
 
1425
1529
  if len(group_with_ms2) > 0:
1426
1530
  feature_hover = HoverTool(
1427
- tooltips=[
1428
- ("rt", "@rt"),
1429
- ("m/z", "@mz{0.0000}"),
1430
- ("feature_uid", "@feature_uid"),
1431
- ("inty", "@inty"),
1432
- ("iso", "@iso"),
1433
- ("adduct", "@adduct"),
1434
- ("chrom_coherence", "@chrom_coherence"),
1435
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1436
- (colorby, f"@{colorby}"),
1437
- ],
1531
+ tooltips=build_feature_tooltips(include_colorby=colorby),
1438
1532
  )
1439
1533
  group_points_ms2 = hv.Points(
1440
1534
  group_with_ms2,
@@ -1464,17 +1558,7 @@ def plot_2d(
1464
1558
 
1465
1559
  if len(group_without_ms2) > 0:
1466
1560
  feature_hover = HoverTool(
1467
- tooltips=[
1468
- ("rt", "@rt"),
1469
- ("m/z", "@mz{0.0000}"),
1470
- ("feature_uid", "@feature_uid"),
1471
- ("inty", "@inty"),
1472
- ("iso", "@iso"),
1473
- ("adduct", "@adduct"),
1474
- ("chrom_coherence", "@chrom_coherence"),
1475
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1476
- (colorby, f"@{colorby}"),
1477
- ],
1561
+ tooltips=build_feature_tooltips(include_colorby=colorby),
1478
1562
  )
1479
1563
  group_points_no_ms2 = hv.Points(
1480
1564
  group_without_ms2,
@@ -1500,22 +1584,15 @@ def plot_2d(
1500
1584
  feature_points_2 = group_points_no_ms2
1501
1585
  else:
1502
1586
  feature_points_2 = feature_points_2 * group_points_no_ms2
1503
- else:
1587
+
1588
+ # Only use default coloring if no special colorby mode was handled
1589
+ if not handled_colorby and not use_categorical_coloring:
1504
1590
  # Use original green/red coloring scheme for MS2 presence
1505
1591
  # find features with ms2_scans not None and iso==0
1506
1592
  features_df = feats[feats["ms2_scans"].notnull()]
1507
1593
  # Create feature points with proper sizing method
1508
1594
  feature_hover_1 = HoverTool(
1509
- tooltips=[
1510
- ("rt", "@rt"),
1511
- ("m/z", "@mz{0.0000}"),
1512
- ("feature_uid", "@feature_uid"),
1513
- ("inty", "@inty"),
1514
- ("iso", "@iso"),
1515
- ("adduct", "@adduct"),
1516
- ("chrom_coherence", "@chrom_coherence"),
1517
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1518
- ],
1595
+ tooltips=build_feature_tooltips(),
1519
1596
  )
1520
1597
  if len(features_df) > 0:
1521
1598
  feature_points_1 = hv.Points(
@@ -1542,16 +1619,7 @@ def plot_2d(
1542
1619
  # find features without MS2 data
1543
1620
  features_df = feats[feats["ms2_scans"].isnull()]
1544
1621
  feature_hover_2 = HoverTool(
1545
- tooltips=[
1546
- ("rt", "@rt"),
1547
- ("m/z", "@mz{0.0000}"),
1548
- ("feature_uid", "@feature_uid"),
1549
- ("inty", "@inty"),
1550
- ("iso", "@iso"),
1551
- ("adduct", "@adduct"),
1552
- ("chrom_coherence", "@chrom_coherence"),
1553
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1554
- ],
1622
+ tooltips=build_feature_tooltips(),
1555
1623
  )
1556
1624
  if len(features_df) > 0:
1557
1625
  feature_points_2 = hv.Points(
@@ -1581,17 +1649,7 @@ def plot_2d(
1581
1649
  if hasattr(features_df, "to_pandas"):
1582
1650
  features_df = features_df.to_pandas()
1583
1651
  feature_hover_iso = HoverTool(
1584
- tooltips=[
1585
- ("rt", "@rt"),
1586
- ("m/z", "@mz{0.0000}"),
1587
- ("feature_uid", "@feature_uid"),
1588
- ("inty", "@inty"),
1589
- ("iso", "@iso"),
1590
- ("iso_of", "@iso_of"),
1591
- ("adduct", "@adduct"),
1592
- ("chrom_coherence", "@chrom_coherence"),
1593
- ("chrom_prominence_scaled", "@chrom_prominence_scaled"),
1594
- ],
1652
+ tooltips=build_feature_tooltips(include_iso_of=True),
1595
1653
  )
1596
1654
  feature_points_iso = hv.Points(
1597
1655
  features_df,
@@ -1676,18 +1734,31 @@ def plot_2d(
1676
1734
  overlay = overlay * feature_points_4
1677
1735
  if feature_points_3 is not None:
1678
1736
  overlay = overlay * feature_points_3
1679
- if feature_points_1 is not None:
1680
- overlay = overlay * feature_points_1
1681
- if not show_only_features_with_ms2 and feature_points_2 is not None:
1682
- overlay = overlay * feature_points_2
1737
+
1738
+ # In colorby='id' mode, draw unannotated (grey) first, then annotated (green) on top
1739
+ if colorby_id_mode:
1740
+ # Draw grey points first (bottom layer)
1741
+ if feature_points_2 is not None:
1742
+ overlay = overlay * feature_points_2
1743
+ # Draw green points last (top layer)
1744
+ if feature_points_1 is not None:
1745
+ overlay = overlay * feature_points_1
1746
+ else:
1747
+ # Default order: green (with MS2) first, then red (without MS2)
1748
+ if feature_points_1 is not None:
1749
+ overlay = overlay * feature_points_1
1750
+ # In non-id mode, only show features without MS2 if show_only_features_with_ms2 is False
1751
+ if not show_only_features_with_ms2 and feature_points_2 is not None:
1752
+ overlay = overlay * feature_points_2
1753
+
1683
1754
  if feature_points_iso is not None:
1684
1755
  overlay = overlay * feature_points_iso
1685
1756
 
1686
1757
  if title is not None:
1687
1758
  overlay = overlay.opts(title=title)
1688
1759
 
1689
- # Handle legend positioning for categorical coloring
1690
- if legend is not None and use_categorical_coloring and len(categorical_groups) > 1:
1760
+ # Handle legend positioning for categorical coloring or colorby='id' mode
1761
+ if legend is not None and (colorby_id_mode or (use_categorical_coloring and len(categorical_groups) > 1)):
1691
1762
  # Map legend position parameter to HoloViews legend position
1692
1763
  legend_position_map = {
1693
1764
  "top_right": "top_right",
@@ -1704,8 +1775,8 @@ def plot_2d(
1704
1775
 
1705
1776
  # Apply legend configuration to the overlay
1706
1777
  overlay = overlay.opts(legend_position=hv_legend_pos, legend_opts={"title": "", "padding": 2, "spacing": 2})
1707
- elif legend is None and use_categorical_coloring:
1708
- # Explicitly hide legend when legend=None but categorical coloring is used
1778
+ elif legend is None and (colorby_id_mode or use_categorical_coloring):
1779
+ # Explicitly hide legend when legend=None but categorical coloring or id mode is used
1709
1780
  overlay = overlay.opts(show_legend=False)
1710
1781
 
1711
1782
  # Handle slider functionality
@@ -1728,12 +1799,27 @@ def plot_2d(
1728
1799
  if feature_points_3 is not None:
1729
1800
  updated_points_3 = feature_points_3.opts(size=size_val)
1730
1801
  feature_overlay = updated_points_3 if feature_overlay is None else feature_overlay * updated_points_3
1731
- if feature_points_1 is not None:
1732
- updated_points_1 = feature_points_1.opts(size=size_val)
1733
- feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
1734
- if not show_only_features_with_ms2 and feature_points_2 is not None:
1735
- updated_points_2 = feature_points_2.opts(size=size_val)
1736
- feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
1802
+
1803
+ # In colorby='id' mode, draw unannotated (grey) first, then annotated (green) on top
1804
+ if colorby_id_mode:
1805
+ # Draw grey points first (bottom layer)
1806
+ if feature_points_2 is not None:
1807
+ updated_points_2 = feature_points_2.opts(size=size_val)
1808
+ feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
1809
+ # Draw green points last (top layer)
1810
+ if feature_points_1 is not None:
1811
+ updated_points_1 = feature_points_1.opts(size=size_val)
1812
+ feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
1813
+ else:
1814
+ # Default order: green (with MS2) first, then red (without MS2)
1815
+ if feature_points_1 is not None:
1816
+ updated_points_1 = feature_points_1.opts(size=size_val)
1817
+ feature_overlay = updated_points_1 if feature_overlay is None else feature_overlay * updated_points_1
1818
+ # In non-id mode, only show features without MS2 if show_only_features_with_ms2 is False
1819
+ if not show_only_features_with_ms2 and feature_points_2 is not None:
1820
+ updated_points_2 = feature_points_2.opts(size=size_val)
1821
+ feature_overlay = updated_points_2 if feature_overlay is None else feature_overlay * updated_points_2
1822
+
1737
1823
  if feature_points_iso is not None:
1738
1824
  updated_points_iso = feature_points_iso.opts(size=size_val)
1739
1825
  feature_overlay = (
@@ -2390,10 +2476,28 @@ def plot_dda_stats(
2390
2476
  "time_ms2_to_ms2",
2391
2477
  "time_ms2_to_ms1",
2392
2478
  ]
2393
- # Ensure that 'index' and 'rt' are kept for hover along with the columns to plot
2479
+ # skip cols that are not in stats
2480
+ cols_to_plot = [col for col in cols_to_plot if col in stats.columns]
2394
2481
  stats = stats[["scan_uid", "cycle", "rt", *cols_to_plot]]
2395
2482
  # set any value < 0 to None
2396
- stats[stats < 0] = None
2483
+ # Replace negative values with nulls in a polars-friendly way
2484
+ numeric_types = {
2485
+ pl.Float32, pl.Float64,
2486
+ pl.Int8, pl.Int16, pl.Int32, pl.Int64,
2487
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64
2488
+ }
2489
+ exprs = []
2490
+ for col_name, dtype in stats.schema.items():
2491
+ if dtype in numeric_types:
2492
+ exprs.append(
2493
+ pl.when(pl.col(col_name) < 0)
2494
+ .then(None)
2495
+ .otherwise(pl.col(col_name))
2496
+ .alias(col_name)
2497
+ )
2498
+ else:
2499
+ exprs.append(pl.col(col_name))
2500
+ stats = stats.select(exprs)
2397
2501
 
2398
2502
  # Create a Scatter for each column in cols_to_plot stacked vertically, with hover enabled
2399
2503
  scatter_plots = []
masster/sample/sample.py CHANGED
@@ -129,6 +129,13 @@ from masster.sample.helpers import get_eic
129
129
  from masster.sample.helpers import set_source
130
130
  from masster.sample.helpers import _recreate_feature_map
131
131
  from masster.sample.helpers import _get_feature_map
132
+ from masster.sample.id import lib_load
133
+ from masster.sample.id import identify
134
+ from masster.sample.id import get_id
135
+ from masster.sample.id import id_reset
136
+ from masster.sample.id import lib_reset
137
+ from masster.sample.importers import import_oracle
138
+ from masster.sample.importers import import_tima
132
139
  from masster.sample.load import chrom_extract
133
140
  from masster.sample.load import _index_file
134
141
  from masster.sample.load import load
@@ -164,7 +171,9 @@ from masster.sample.save import export_chrom
164
171
  from masster.sample.save import export_dda_stats
165
172
  from masster.sample.save import export_features
166
173
  from masster.sample.save import export_mgf
167
- from masster.sample.save import export_xlsx
174
+ from masster.sample.save import export_excel
175
+ from masster.sample.save import export_slaw
176
+ from masster.sample.save import export_mztab
168
177
  from masster.sample.save import save
169
178
 
170
179
 
@@ -259,9 +268,10 @@ class Sample:
259
268
  # the polars data frame with MS1 level data
260
269
  self.ms1_df = pl.DataFrame()
261
270
 
262
- # lightweight lib data for matching, targeted analyses, etc. > superseded by study methods
263
- self.lib = None
264
- self.lib_match = None
271
+ # identification DataFrames (lib_df and id_df)
272
+ self.lib_df = None # library DataFrame (from masster.lib or CSV/JSON)
273
+ self.id_df = None # identification results DataFrame
274
+ self._lib = None # reference to Lib object if loaded
265
275
  self.chrom_df = None
266
276
 
267
277
  if params.filename is not None:
@@ -292,11 +302,22 @@ class Sample:
292
302
  update_parameters = update_parameters
293
303
  get_parameters_property = get_parameters_property
294
304
  set_parameters_property = set_parameters_property
305
+ # Identification methods from id.py
306
+ lib_load = lib_load
307
+ identify = identify
308
+ get_id = get_id
309
+ id_reset = id_reset
310
+ lib_reset = lib_reset
311
+ # Importers from importers.py
312
+ import_oracle = import_oracle
313
+ import_tima = import_tima
295
314
  export_features = export_features
296
- export_xlsx = export_xlsx
315
+ export_excel = export_excel
316
+ export_slaw = export_slaw
297
317
  export_mgf = export_mgf
298
318
  export_chrom = export_chrom
299
319
  export_dda_stats = export_dda_stats
320
+ export_mztab = export_mztab
300
321
  plot_2d = plot_2d
301
322
  plot_2d_oracle = plot_2d_oracle
302
323
  plot_dda_stats = plot_dda_stats
@@ -93,10 +93,108 @@
93
93
  },
94
94
  "ms1_spec": {
95
95
  "dtype": "pl.Object"
96
+ },
97
+ "id_top_name": {
98
+ "dtype": "pl.Utf8"
99
+ },
100
+ "id_top_class": {
101
+ "dtype": "pl.Utf8"
102
+ },
103
+ "id_top_adduct": {
104
+ "dtype": "pl.Utf8"
105
+ },
106
+ "id_top_score": {
107
+ "dtype": "pl.Float64"
108
+ },
109
+ "id_source": {
110
+ "dtype": "pl.Utf8"
111
+ }
112
+ }
113
+ },
114
+ "lib_df": {
115
+ "columns": {
116
+ "lib_uid": {
117
+ "dtype": "pl.Int64"
118
+ },
119
+ "cmpd_uid": {
120
+ "dtype": "pl.Int64"
121
+ },
122
+ "name": {
123
+ "dtype": "pl.Utf8"
124
+ },
125
+ "shortname": {
126
+ "dtype": "pl.Utf8"
127
+ },
128
+ "class": {
129
+ "dtype": "pl.Utf8"
130
+ },
131
+ "formula": {
132
+ "dtype": "pl.Utf8"
133
+ },
134
+ "iso": {
135
+ "dtype": "pl.Int64"
136
+ },
137
+ "smiles": {
138
+ "dtype": "pl.Utf8"
139
+ },
140
+ "inchi": {
141
+ "dtype": "pl.Utf8"
142
+ },
143
+ "inchikey": {
144
+ "dtype": "pl.Utf8"
145
+ },
146
+ "adduct": {
147
+ "dtype": "pl.Utf8"
148
+ },
149
+ "z": {
150
+ "dtype": "pl.Int64"
151
+ },
152
+ "m": {
153
+ "dtype": "pl.Float64"
154
+ },
155
+ "mz": {
156
+ "dtype": "pl.Float64"
157
+ },
158
+ "rt": {
159
+ "dtype": "pl.Float64"
160
+ },
161
+ "quant_group": {
162
+ "dtype": "pl.Int64"
163
+ },
164
+ "probability": {
165
+ "dtype": "pl.Float64"
166
+ },
167
+ "source_id": {
168
+ "dtype": "pl.Utf8"
169
+ }
170
+ }
171
+ },
172
+ "id_df": {
173
+ "columns": {
174
+ "feature_uid": {
175
+ "dtype": "pl.Int64"
176
+ },
177
+ "lib_uid": {
178
+ "dtype": "pl.Int64"
179
+ },
180
+ "mz_delta": {
181
+ "dtype": "pl.Float64"
182
+ },
183
+ "rt_delta": {
184
+ "dtype": "pl.Float64"
185
+ },
186
+ "matcher": {
187
+ "dtype": "pl.Utf8"
188
+ },
189
+ "score": {
190
+ "dtype": "pl.Float64"
191
+ },
192
+ "iso": {
193
+ "dtype": "pl.Int64"
96
194
  }
97
195
  }
98
196
  },
99
- "generated_date": "2025-08-03",
197
+ "generated_date": "2025-10-30",
100
198
  "ms1_df": {
101
199
  "columns": {
102
200
  "cycle": {