masster 0.5.4__py3-none-any.whl → 0.5.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/data/libs/hilic.csv +4812 -0
- masster/sample/adducts.py +1 -1
- masster/sample/helpers.py +17 -1
- masster/sample/plot.py +114 -22
- masster/sample/sample.py +6 -3
- masster/sample/save.py +61 -2
- masster/spectrum.py +5 -2
- masster/study/helpers.py +2 -2
- masster/study/plot.py +144 -47
- masster/study/processing.py +0 -3
- masster/wizard/wizard.py +13 -24
- {masster-0.5.4.dist-info → masster-0.5.6.dist-info}/METADATA +1 -1
- {masster-0.5.4.dist-info → masster-0.5.6.dist-info}/RECORD +17 -16
- {masster-0.5.4.dist-info → masster-0.5.6.dist-info}/WHEEL +0 -0
- {masster-0.5.4.dist-info → masster-0.5.6.dist-info}/entry_points.txt +0 -0
- {masster-0.5.4.dist-info → masster-0.5.6.dist-info}/licenses/LICENSE +0 -0
masster/sample/adducts.py
CHANGED
|
@@ -473,7 +473,7 @@ def find_adducts(self, **kwargs):
|
|
|
473
473
|
self.logger.debug(f"Min probability threshold: {min_probability}")
|
|
474
474
|
|
|
475
475
|
# Generate comprehensive adduct specifications using the Sample method
|
|
476
|
-
adducts_df = _get_adducts(
|
|
476
|
+
adducts_df = self._get_adducts(
|
|
477
477
|
adducts_list=adducts_list,
|
|
478
478
|
charge_min=charge_min,
|
|
479
479
|
charge_max=charge_max,
|
masster/sample/helpers.py
CHANGED
|
@@ -358,7 +358,7 @@ def get_eic(self, mz, mz_tol=None):
|
|
|
358
358
|
return None
|
|
359
359
|
|
|
360
360
|
|
|
361
|
-
def
|
|
361
|
+
def features_select(
|
|
362
362
|
self,
|
|
363
363
|
mz=None,
|
|
364
364
|
rt=None,
|
|
@@ -372,6 +372,7 @@ def select(
|
|
|
372
372
|
height_scaled=None,
|
|
373
373
|
prominence=None,
|
|
374
374
|
height=None,
|
|
375
|
+
uids=None,
|
|
375
376
|
):
|
|
376
377
|
"""
|
|
377
378
|
Select features based on specified criteria and return the filtered DataFrame.
|
|
@@ -389,6 +390,7 @@ def select(
|
|
|
389
390
|
height_scaled: scaled height filter (tuple for range, single value for minimum)
|
|
390
391
|
prominence: prominence filter (tuple for range, single value for minimum)
|
|
391
392
|
height: height filter (tuple for range, single value for minimum)
|
|
393
|
+
uids: feature UID filter (list of feature UIDs, polars/pandas DataFrame with feature_uid/feature_id column, or None for all features)
|
|
392
394
|
|
|
393
395
|
Returns:
|
|
394
396
|
polars.DataFrame: Filtered features DataFrame
|
|
@@ -398,6 +400,20 @@ def select(
|
|
|
398
400
|
# self.logger.info("No features found. R")
|
|
399
401
|
return
|
|
400
402
|
feats = self.features_df.clone()
|
|
403
|
+
|
|
404
|
+
# Filter by feature UIDs if provided
|
|
405
|
+
if uids is not None:
|
|
406
|
+
feature_uids_to_keep = self._get_feature_uids(features=uids, verbose=True)
|
|
407
|
+
if not feature_uids_to_keep:
|
|
408
|
+
self.logger.warning("No valid feature UIDs provided.")
|
|
409
|
+
return feats.limit(0) # Return empty DataFrame with same structure
|
|
410
|
+
|
|
411
|
+
feats_len_before_filter = len(feats)
|
|
412
|
+
feats = feats.filter(pl.col("feature_uid").is_in(feature_uids_to_keep))
|
|
413
|
+
self.logger.debug(
|
|
414
|
+
f"Selected features by UIDs. Features removed: {feats_len_before_filter - len(feats)}",
|
|
415
|
+
)
|
|
416
|
+
|
|
401
417
|
if coherence is not None:
|
|
402
418
|
has_coherence = "chrom_coherence" in self.features_df.columns
|
|
403
419
|
if not has_coherence:
|
masster/sample/plot.py
CHANGED
|
@@ -57,12 +57,78 @@ from holoviews import dim
|
|
|
57
57
|
from holoviews.plotting.util import process_cmap
|
|
58
58
|
from matplotlib.colors import rgb2hex
|
|
59
59
|
|
|
60
|
+
# Import cmap for colormap handling
|
|
61
|
+
try:
|
|
62
|
+
from cmap import Colormap
|
|
63
|
+
except ImportError:
|
|
64
|
+
Colormap = None
|
|
65
|
+
|
|
60
66
|
# Parameters removed - using hardcoded defaults
|
|
61
67
|
|
|
62
68
|
|
|
63
69
|
hv.extension("bokeh")
|
|
64
70
|
|
|
65
71
|
|
|
72
|
+
def _process_cmap(cmap, fallback="viridis", logger=None):
|
|
73
|
+
"""
|
|
74
|
+
Process colormap using the cmap package, similar to study's implementation.
|
|
75
|
+
|
|
76
|
+
Parameters:
|
|
77
|
+
cmap: Colormap specification (string name, cmap.Colormap object, or None)
|
|
78
|
+
fallback: Fallback colormap name if cmap processing fails
|
|
79
|
+
logger: Logger for warnings (optional)
|
|
80
|
+
|
|
81
|
+
Returns:
|
|
82
|
+
list: List of hex color strings for the colormap
|
|
83
|
+
"""
|
|
84
|
+
# Handle None case
|
|
85
|
+
if cmap is None:
|
|
86
|
+
cmap = "viridis"
|
|
87
|
+
elif cmap == "grey":
|
|
88
|
+
cmap = "Greys256"
|
|
89
|
+
elif cmap == "iridescent":
|
|
90
|
+
cmap = "iridescent_r"
|
|
91
|
+
|
|
92
|
+
# If cmap package is not available, fall back to process_cmap
|
|
93
|
+
if Colormap is None:
|
|
94
|
+
if logger:
|
|
95
|
+
logger.warning("cmap package not available, using holoviews process_cmap")
|
|
96
|
+
return process_cmap(cmap, provider="bokeh")
|
|
97
|
+
|
|
98
|
+
try:
|
|
99
|
+
# Handle colormap using cmap.Colormap
|
|
100
|
+
if isinstance(cmap, str):
|
|
101
|
+
colormap = Colormap(cmap)
|
|
102
|
+
# Generate 256 colors and convert to hex
|
|
103
|
+
import matplotlib.colors as mcolors
|
|
104
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
105
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
106
|
+
else:
|
|
107
|
+
colormap = cmap
|
|
108
|
+
# Try to use to_bokeh() method first
|
|
109
|
+
try:
|
|
110
|
+
palette = colormap.to_bokeh()
|
|
111
|
+
# Ensure we got a color palette, not another mapper
|
|
112
|
+
if not isinstance(palette, (list, tuple)):
|
|
113
|
+
# Fall back to generating colors manually
|
|
114
|
+
import matplotlib.colors as mcolors
|
|
115
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
116
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
117
|
+
except AttributeError:
|
|
118
|
+
# Fall back to generating colors manually
|
|
119
|
+
import matplotlib.colors as mcolors
|
|
120
|
+
colors = colormap(np.linspace(0, 1, 256))
|
|
121
|
+
palette = [mcolors.rgb2hex(color) for color in colors]
|
|
122
|
+
|
|
123
|
+
return palette
|
|
124
|
+
|
|
125
|
+
except (AttributeError, ValueError, TypeError) as e:
|
|
126
|
+
# Fallback to process_cmap if cmap interpretation fails
|
|
127
|
+
if logger:
|
|
128
|
+
logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to {fallback}")
|
|
129
|
+
return process_cmap(fallback, provider="bokeh")
|
|
130
|
+
|
|
131
|
+
|
|
66
132
|
def _is_notebook_environment():
|
|
67
133
|
"""
|
|
68
134
|
Detect if code is running in a notebook environment (Jupyter, JupyterLab, or Marimo).
|
|
@@ -462,10 +528,8 @@ def plot_2d(
|
|
|
462
528
|
self.logger.error("No MS1 data available.")
|
|
463
529
|
return
|
|
464
530
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
elif cmap == "grey":
|
|
468
|
-
cmap = "Greys256"
|
|
531
|
+
# Process colormap using the cmap package
|
|
532
|
+
cmap_palette = _process_cmap(cmap, fallback="iridescent_r", logger=self.logger)
|
|
469
533
|
|
|
470
534
|
# get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
|
|
471
535
|
spectradf = self.ms1_df.select(["rt", "mz", "inty"])
|
|
@@ -585,7 +649,7 @@ def plot_2d(
|
|
|
585
649
|
dynamic=dyn, # alpha=10, min_alpha=0,
|
|
586
650
|
).opts(
|
|
587
651
|
active_tools=["box_zoom"],
|
|
588
|
-
cmap=
|
|
652
|
+
cmap=cmap_palette,
|
|
589
653
|
tools=["hover"],
|
|
590
654
|
hooks=[new_bounds_hook],
|
|
591
655
|
width=width,
|
|
@@ -1003,10 +1067,8 @@ def plot_2d_oracle(
|
|
|
1003
1067
|
print("Please load a file first.")
|
|
1004
1068
|
return
|
|
1005
1069
|
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
elif cmap == "iridescent":
|
|
1009
|
-
cmap = "iridescent_r"
|
|
1070
|
+
# Process colormap using the cmap package
|
|
1071
|
+
cmap_palette = _process_cmap(cmap, fallback="Greys256", logger=self.logger)
|
|
1010
1072
|
|
|
1011
1073
|
# get columns rt, mz, inty from self.ms1_df, It's polars DataFrame
|
|
1012
1074
|
spectradf = self.ms1_df.to_pandas()
|
|
@@ -1057,7 +1119,7 @@ def plot_2d_oracle(
|
|
|
1057
1119
|
dynamic=dyn, # alpha=10, min_alpha=0,
|
|
1058
1120
|
).opts(
|
|
1059
1121
|
active_tools=["box_zoom"],
|
|
1060
|
-
cmap=
|
|
1122
|
+
cmap=cmap_palette,
|
|
1061
1123
|
tools=["hover"],
|
|
1062
1124
|
hooks=[new_bounds_hook],
|
|
1063
1125
|
width=1000,
|
|
@@ -1183,13 +1245,45 @@ def plot_2d_oracle(
|
|
|
1183
1245
|
|
|
1184
1246
|
if cvalues is not None:
|
|
1185
1247
|
num_colors = len(cvalues)
|
|
1186
|
-
|
|
1187
|
-
|
|
1188
|
-
|
|
1189
|
-
|
|
1190
|
-
|
|
1191
|
-
|
|
1192
|
-
|
|
1248
|
+
|
|
1249
|
+
# Use cmap package for categorical colormap
|
|
1250
|
+
try:
|
|
1251
|
+
if Colormap is not None:
|
|
1252
|
+
# Use rainbow colormap for categorical data
|
|
1253
|
+
colormap = Colormap("rainbow")
|
|
1254
|
+
colors = []
|
|
1255
|
+
for i in range(num_colors):
|
|
1256
|
+
# Generate evenly spaced colors across the colormap
|
|
1257
|
+
t = i / (num_colors - 1) if num_colors > 1 else 0.5
|
|
1258
|
+
color = colormap(t)
|
|
1259
|
+
# Convert to hex
|
|
1260
|
+
import matplotlib.colors as mcolors
|
|
1261
|
+
# Convert color to hex - handle different color formats
|
|
1262
|
+
if hasattr(color, '__len__') and len(color) >= 3:
|
|
1263
|
+
# It's an array-like color (RGB or RGBA)
|
|
1264
|
+
colors.append(mcolors.rgb2hex(color[:3]))
|
|
1265
|
+
else:
|
|
1266
|
+
# It's a single value, convert to RGB
|
|
1267
|
+
colors.append(mcolors.rgb2hex([color, color, color]))
|
|
1268
|
+
else:
|
|
1269
|
+
# Fallback to original method
|
|
1270
|
+
cmap = "rainbow"
|
|
1271
|
+
cmap_provider = "colorcet"
|
|
1272
|
+
cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
|
|
1273
|
+
colors = [
|
|
1274
|
+
rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
|
|
1275
|
+
for i in range(num_colors)
|
|
1276
|
+
]
|
|
1277
|
+
except Exception:
|
|
1278
|
+
# Final fallback to original method
|
|
1279
|
+
cmap = "rainbow"
|
|
1280
|
+
cmap_provider = "colorcet"
|
|
1281
|
+
cm = process_cmap(cmap, ncolors=num_colors, provider=cmap_provider)
|
|
1282
|
+
colors = [
|
|
1283
|
+
rgb2hex(cm[int(i * (len(cm) - 1) / (num_colors - 1))]) if num_colors > 1 else rgb2hex(cm[0])
|
|
1284
|
+
for i in range(num_colors)
|
|
1285
|
+
]
|
|
1286
|
+
|
|
1193
1287
|
# assign color to each row based on id_class. If id_class is null, assign 'black'
|
|
1194
1288
|
feats["color"] = "black"
|
|
1195
1289
|
|
|
@@ -1577,10 +1671,8 @@ def plot_ms2_cycle(
|
|
|
1577
1671
|
print("Cycle number not found in scans_df.")
|
|
1578
1672
|
return
|
|
1579
1673
|
|
|
1580
|
-
|
|
1581
|
-
|
|
1582
|
-
elif cmap == "grey":
|
|
1583
|
-
cmap = "Greys256"
|
|
1674
|
+
# Process colormap using the cmap package
|
|
1675
|
+
cmap_palette = _process_cmap(cmap, fallback="iridescent_r", logger=self.logger)
|
|
1584
1676
|
|
|
1585
1677
|
# find all scans in cycle
|
|
1586
1678
|
scans = self.scans_df.filter(pl.col("cycle") == cycle)
|
|
@@ -1645,7 +1737,7 @@ def plot_ms2_cycle(
|
|
|
1645
1737
|
dynamic=raster_dynamic, # alpha=10, min_alpha=0,
|
|
1646
1738
|
).opts(
|
|
1647
1739
|
active_tools=["box_zoom"],
|
|
1648
|
-
cmap=
|
|
1740
|
+
cmap=cmap_palette,
|
|
1649
1741
|
tools=["hover"],
|
|
1650
1742
|
hooks=[new_bounds_hook],
|
|
1651
1743
|
width=1000,
|
masster/sample/sample.py
CHANGED
|
@@ -57,10 +57,11 @@ from masster.sample.helpers import _get_scan_uids
|
|
|
57
57
|
from masster.sample.helpers import _get_feature_uids
|
|
58
58
|
# from masster.sample.helpers import _features_sync - made internal only
|
|
59
59
|
from masster.sample.adducts import find_adducts
|
|
60
|
+
from masster.sample.adducts import _get_adducts
|
|
60
61
|
# Removed _get_adducts - only used in study modules
|
|
61
62
|
from masster.sample.helpers import features_delete
|
|
62
63
|
from masster.sample.helpers import features_filter
|
|
63
|
-
from masster.sample.helpers import
|
|
64
|
+
from masster.sample.helpers import features_select
|
|
64
65
|
from masster.sample.helpers import select_closest_scan
|
|
65
66
|
from masster.sample.helpers import get_dda_stats
|
|
66
67
|
from masster.sample.helpers import get_feature
|
|
@@ -110,6 +111,7 @@ from masster.sample.save import export_chrom
|
|
|
110
111
|
from masster.sample.save import export_dda_stats
|
|
111
112
|
from masster.sample.save import export_features
|
|
112
113
|
from masster.sample.save import export_mgf
|
|
114
|
+
from masster.sample.save import export_xlsx
|
|
113
115
|
from masster.sample.save import save
|
|
114
116
|
# Removed internal-only import: _save_featureXML
|
|
115
117
|
|
|
@@ -139,7 +141,6 @@ class Sample:
|
|
|
139
141
|
|
|
140
142
|
Core initialization parameters:
|
|
141
143
|
- file (str, optional): The file path or file object to be loaded
|
|
142
|
-
- ondisk (bool): Whether to keep data on disk or load into memory. Default is False
|
|
143
144
|
- label (str, optional): An optional label to identify the file or dataset
|
|
144
145
|
- log_level (str): The logging level to be set for the logger. Defaults to 'INFO'
|
|
145
146
|
- log_label (str, optional): Optional label for the logger
|
|
@@ -221,11 +222,12 @@ class Sample:
|
|
|
221
222
|
save = save
|
|
222
223
|
find_features = find_features
|
|
223
224
|
find_adducts = find_adducts
|
|
225
|
+
_get_adducts= _get_adducts
|
|
224
226
|
find_iso = find_iso
|
|
225
227
|
find_ms2 = find_ms2
|
|
226
228
|
get_spectrum = get_spectrum
|
|
227
229
|
filter = features_filter
|
|
228
|
-
select =
|
|
230
|
+
select = features_select
|
|
229
231
|
features_filter = filter # New function that keeps only specified features
|
|
230
232
|
filter_features = filter
|
|
231
233
|
features_select = select
|
|
@@ -238,6 +240,7 @@ class Sample:
|
|
|
238
240
|
get_parameters_property = get_parameters_property
|
|
239
241
|
set_parameters_property = set_parameters_property
|
|
240
242
|
export_features = export_features
|
|
243
|
+
export_xlsx = export_xlsx
|
|
241
244
|
export_mgf = export_mgf
|
|
242
245
|
export_chrom = export_chrom
|
|
243
246
|
export_dda_stats = export_dda_stats
|
masster/sample/save.py
CHANGED
|
@@ -105,7 +105,7 @@ def save(self, filename=None):
|
|
|
105
105
|
self._save_sample5(filename=filename)
|
|
106
106
|
self.file_path = filename
|
|
107
107
|
|
|
108
|
-
|
|
108
|
+
'''
|
|
109
109
|
def _save_featureXML(self, filename="features.featureXML"):
|
|
110
110
|
if self._oms_features_map is None:
|
|
111
111
|
self.logger.warning("No features found.")
|
|
@@ -114,7 +114,7 @@ def _save_featureXML(self, filename="features.featureXML"):
|
|
|
114
114
|
fh.store(filename, self._oms_features_map)
|
|
115
115
|
self.logger.debug(f"Features Map saved to {filename}")
|
|
116
116
|
|
|
117
|
-
|
|
117
|
+
'''
|
|
118
118
|
def export_features(self, filename="features.csv"):
|
|
119
119
|
"""
|
|
120
120
|
Export the features DataFrame to a CSV or Excel file.
|
|
@@ -411,6 +411,11 @@ def export_mgf(
|
|
|
411
411
|
rt_str = f"{rt:.2f}"
|
|
412
412
|
mz_str = f"{mz:.4f}"
|
|
413
413
|
|
|
414
|
+
# Initialize charge for this feature
|
|
415
|
+
charge = preferred_charge
|
|
416
|
+
if row["charge"] is not None and row["charge"] != 0:
|
|
417
|
+
charge = row["charge"]
|
|
418
|
+
|
|
414
419
|
# Skip features without MS2 data (unless include_all_ms1 is True, but we already handled MS1 above)
|
|
415
420
|
if row["ms2_scans"] is None:
|
|
416
421
|
skip = skip + 1
|
|
@@ -822,6 +827,60 @@ def export_dda_stats(self, filename="stats.csv"):
|
|
|
822
827
|
self.logger.info(f"DDA statistics exported to {filename}")
|
|
823
828
|
|
|
824
829
|
|
|
830
|
+
def export_xlsx(self, filename="features.xlsx"):
|
|
831
|
+
"""
|
|
832
|
+
Export the features DataFrame to an Excel file.
|
|
833
|
+
|
|
834
|
+
This method exports the features DataFrame (features_df) to an Excel (.xlsx) file.
|
|
835
|
+
Columns with data types 'List' or 'Object' are excluded from the export to ensure
|
|
836
|
+
compatibility with Excel format. A boolean column 'has_ms2' is added to indicate
|
|
837
|
+
whether MS2 data is available for each feature.
|
|
838
|
+
|
|
839
|
+
Parameters:
|
|
840
|
+
filename (str): The path to the output Excel file. Must end with '.xlsx' or '.xls'.
|
|
841
|
+
Defaults to 'features.xlsx'.
|
|
842
|
+
|
|
843
|
+
Raises:
|
|
844
|
+
ValueError: If filename doesn't end with '.xlsx' or '.xls'
|
|
845
|
+
|
|
846
|
+
Side Effects:
|
|
847
|
+
Writes the exported data to the specified Excel file and logs the export operation.
|
|
848
|
+
"""
|
|
849
|
+
if self.features_df is None:
|
|
850
|
+
self.logger.warning("No features found. Cannot export to Excel.")
|
|
851
|
+
return
|
|
852
|
+
|
|
853
|
+
# Validate filename extension
|
|
854
|
+
if not filename.lower().endswith(('.xlsx', '.xls')):
|
|
855
|
+
raise ValueError("Filename must end with '.xlsx' or '.xls' for Excel export")
|
|
856
|
+
|
|
857
|
+
filename = os.path.abspath(filename)
|
|
858
|
+
|
|
859
|
+
# Clone the DataFrame to avoid modifying the original
|
|
860
|
+
clean_df = self.features_df.clone()
|
|
861
|
+
|
|
862
|
+
# Add a column has_ms2=True if column ms2_scans is not None
|
|
863
|
+
if "ms2_scans" in clean_df.columns:
|
|
864
|
+
clean_df = clean_df.with_columns(
|
|
865
|
+
(pl.col("ms2_scans").is_not_null()).alias("has_ms2")
|
|
866
|
+
)
|
|
867
|
+
|
|
868
|
+
# Filter out columns with List or Object data types that can't be exported to Excel
|
|
869
|
+
exportable_columns = [
|
|
870
|
+
col for col in clean_df.columns
|
|
871
|
+
if clean_df[col].dtype not in (pl.List, pl.Object)
|
|
872
|
+
]
|
|
873
|
+
|
|
874
|
+
clean_df = clean_df.select(exportable_columns)
|
|
875
|
+
|
|
876
|
+
# Convert to pandas and export to Excel
|
|
877
|
+
pandas_df = clean_df.to_pandas()
|
|
878
|
+
pandas_df.to_excel(filename, index=False)
|
|
879
|
+
|
|
880
|
+
self.logger.info(f"Features exported to {filename} (Excel format)")
|
|
881
|
+
self.logger.debug(f"Exported {len(clean_df)} features with {len(exportable_columns)} columns")
|
|
882
|
+
|
|
883
|
+
|
|
825
884
|
def export_chrom(self, filename="chrom.csv"):
|
|
826
885
|
# saves self.chrom_df to a csv file. Remove the scan_uid and chrom columns if the file already exists
|
|
827
886
|
if self.chrom_df is None:
|
masster/spectrum.py
CHANGED
|
@@ -827,8 +827,11 @@ class Spectrum:
|
|
|
827
827
|
inty = inty[idx]
|
|
828
828
|
p.line(mz, inty, line_color="black", legend_label=label)
|
|
829
829
|
else:
|
|
830
|
-
data
|
|
831
|
-
data = {
|
|
830
|
+
# Build data dictionary from spectrum attributes (numpy arrays)
|
|
831
|
+
data = {}
|
|
832
|
+
for key, val in self.__dict__.items():
|
|
833
|
+
if isinstance(val, np.ndarray) and val.size == mz.size:
|
|
834
|
+
data[key] = val
|
|
832
835
|
if ylog:
|
|
833
836
|
data["zeros"] = np.ones_like(mz)
|
|
834
837
|
else:
|
masster/study/helpers.py
CHANGED
|
@@ -500,7 +500,7 @@ def align_reset(self):
|
|
|
500
500
|
# TODO I don't get this param
|
|
501
501
|
def get_consensus(self, quant="chrom_area"):
|
|
502
502
|
if self.consensus_df is None:
|
|
503
|
-
self.logger.error("No consensus
|
|
503
|
+
self.logger.error("No consensus found.")
|
|
504
504
|
return None
|
|
505
505
|
|
|
506
506
|
# Convert Polars DataFrame to pandas for this operation since the result is used for export
|
|
@@ -613,7 +613,7 @@ def get_gaps_matrix(self, uids=None, samples=None):
|
|
|
613
613
|
import polars as pl
|
|
614
614
|
|
|
615
615
|
if self.consensus_df is None or self.consensus_df.is_empty():
|
|
616
|
-
self.logger.error("No consensus
|
|
616
|
+
self.logger.error("No consensus found.")
|
|
617
617
|
return None
|
|
618
618
|
|
|
619
619
|
if self.consensus_mapping_df is None or self.consensus_mapping_df.is_empty():
|
masster/study/plot.py
CHANGED
|
@@ -564,6 +564,10 @@ def plot_consensus_2d(
|
|
|
564
564
|
Parameters:
|
|
565
565
|
filename (str, optional): Path to save the plot
|
|
566
566
|
colorby (str): Column name to use for color mapping (default: "number_samples")
|
|
567
|
+
Automatically detects if column contains categorical (string) or
|
|
568
|
+
numeric data and applies appropriate color mapping:
|
|
569
|
+
- Categorical: Uses factor_cmap with distinct colors and legend
|
|
570
|
+
- Numeric: Uses LinearColorMapper with continuous colorbar
|
|
567
571
|
sizeby (str): Column name to use for size mapping (default: "inty_mean")
|
|
568
572
|
markersize (int): Base marker size (default: 6)
|
|
569
573
|
scaling (str): Controls whether points scale with zoom. Options:
|
|
@@ -645,12 +649,13 @@ def plot_consensus_2d(
|
|
|
645
649
|
from bokeh.models import HoverTool
|
|
646
650
|
from bokeh.models import LinearColorMapper
|
|
647
651
|
from bokeh.io.export import export_png
|
|
652
|
+
from bokeh.transform import factor_cmap
|
|
648
653
|
|
|
649
654
|
try:
|
|
650
655
|
from bokeh.models import ColorBar # type: ignore[attr-defined]
|
|
651
656
|
except ImportError:
|
|
652
657
|
from bokeh.models.annotations import ColorBar
|
|
653
|
-
from bokeh.palettes import viridis
|
|
658
|
+
from bokeh.palettes import viridis, Category20
|
|
654
659
|
|
|
655
660
|
# Import cmap for colormap handling
|
|
656
661
|
from cmap import Colormap
|
|
@@ -695,61 +700,144 @@ def plot_consensus_2d(
|
|
|
695
700
|
self.logger.warning(f"Could not interpret colormap '{cmap}': {e}, falling back to viridis")
|
|
696
701
|
palette = viridis(256)
|
|
697
702
|
|
|
698
|
-
|
|
699
|
-
|
|
700
|
-
|
|
701
|
-
|
|
703
|
+
# Check if colorby column contains categorical data (string/object)
|
|
704
|
+
colorby_values = data[colorby].to_list()
|
|
705
|
+
is_categorical = (
|
|
706
|
+
data_pd[colorby].dtype in ["object", "string", "category"] or
|
|
707
|
+
isinstance(colorby_values[0], str) if colorby_values else False
|
|
702
708
|
)
|
|
709
|
+
|
|
710
|
+
if is_categorical:
|
|
711
|
+
# Handle categorical coloring
|
|
712
|
+
# Use natural order of unique values - don't sort to preserve correct legend mapping
|
|
713
|
+
# Sorting would break the correspondence between legend labels and point colors
|
|
714
|
+
unique_values = [v for v in data_pd[colorby].unique() if v is not None]
|
|
715
|
+
|
|
716
|
+
if len(unique_values) <= 20:
|
|
717
|
+
palette = Category20[min(20, max(3, len(unique_values)))]
|
|
718
|
+
else:
|
|
719
|
+
# For many categories, use a subset of the viridis palette
|
|
720
|
+
palette = viridis(min(256, len(unique_values)))
|
|
721
|
+
|
|
722
|
+
color_mapper = factor_cmap(colorby, palette, unique_values)
|
|
723
|
+
else:
|
|
724
|
+
# Handle numeric coloring with LinearColorMapper
|
|
725
|
+
color_mapper = LinearColorMapper(
|
|
726
|
+
palette=palette,
|
|
727
|
+
low=data[colorby].min(),
|
|
728
|
+
high=data[colorby].max(),
|
|
729
|
+
)
|
|
703
730
|
# scatter plot rt vs mz
|
|
704
731
|
p = bp.figure(
|
|
705
732
|
width=width,
|
|
706
733
|
height=height,
|
|
707
|
-
title="Consensus
|
|
734
|
+
title=f"Consensus features, colored by {colorby}",
|
|
708
735
|
)
|
|
709
|
-
p.xaxis.axis_label = "
|
|
710
|
-
p.yaxis.axis_label = "m/z"
|
|
736
|
+
p.xaxis.axis_label = "RT [s]"
|
|
737
|
+
p.yaxis.axis_label = "m/z [Th]"
|
|
711
738
|
scatter_renderer: Any = None
|
|
712
|
-
if
|
|
713
|
-
#
|
|
714
|
-
|
|
715
|
-
|
|
716
|
-
|
|
717
|
-
|
|
739
|
+
if is_categorical:
|
|
740
|
+
# For categorical data, create separate renderers for each category
|
|
741
|
+
# This enables proper legend interactivity where each category can be toggled independently
|
|
742
|
+
unique_values = [v for v in data_pd[colorby].unique() if v is not None]
|
|
743
|
+
|
|
744
|
+
if len(unique_values) <= 20:
|
|
745
|
+
palette = Category20[min(20, max(3, len(unique_values)))]
|
|
746
|
+
else:
|
|
747
|
+
palette = viridis(min(256, len(unique_values)))
|
|
748
|
+
|
|
749
|
+
# Create a separate renderer for each category
|
|
750
|
+
for i, category in enumerate(unique_values):
|
|
751
|
+
# Filter data for this category
|
|
752
|
+
category_data = data.filter(pl.col(colorby) == category)
|
|
753
|
+
category_data_pd = category_data.to_pandas()
|
|
754
|
+
category_source = bp.ColumnDataSource(category_data_pd)
|
|
755
|
+
|
|
756
|
+
color = palette[i % len(palette)]
|
|
757
|
+
|
|
758
|
+
if scaling.lower() in ["dyn", "dynamic"]:
|
|
759
|
+
# Calculate appropriate radius for dynamic scaling
|
|
760
|
+
rt_range = data["rt"].max() - data["rt"].min()
|
|
761
|
+
mz_range = data["mz"].max() - data["mz"].min()
|
|
762
|
+
dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
|
|
763
|
+
|
|
764
|
+
renderer = p.circle(
|
|
765
|
+
x="rt",
|
|
766
|
+
y="mz",
|
|
767
|
+
radius=dynamic_radius,
|
|
768
|
+
fill_color=color,
|
|
769
|
+
line_color=None,
|
|
770
|
+
alpha=alpha,
|
|
771
|
+
source=category_source,
|
|
772
|
+
legend_label=str(category),
|
|
773
|
+
)
|
|
774
|
+
else:
|
|
775
|
+
renderer = p.scatter(
|
|
776
|
+
x="rt",
|
|
777
|
+
y="mz",
|
|
778
|
+
size="markersize",
|
|
779
|
+
fill_color=color,
|
|
780
|
+
line_color=None,
|
|
781
|
+
alpha=alpha,
|
|
782
|
+
source=category_source,
|
|
783
|
+
legend_label=str(category),
|
|
784
|
+
)
|
|
785
|
+
|
|
786
|
+
# No single scatter_renderer for categorical data
|
|
787
|
+
scatter_renderer = None
|
|
718
788
|
|
|
719
|
-
scatter_renderer = p.circle(
|
|
720
|
-
x="rt",
|
|
721
|
-
y="mz",
|
|
722
|
-
radius=dynamic_radius,
|
|
723
|
-
fill_color={"field": colorby, "transform": color_mapper},
|
|
724
|
-
line_color=None,
|
|
725
|
-
alpha=alpha,
|
|
726
|
-
source=source,
|
|
727
|
-
)
|
|
728
789
|
else:
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
|
|
733
|
-
|
|
734
|
-
|
|
735
|
-
|
|
736
|
-
|
|
737
|
-
|
|
790
|
+
# Handle numeric coloring - single renderer with color mapping
|
|
791
|
+
if scaling.lower() in ["dyn", "dynamic"]:
|
|
792
|
+
# Calculate appropriate radius for dynamic scaling
|
|
793
|
+
rt_range = data["rt"].max() - data["rt"].min()
|
|
794
|
+
mz_range = data["mz"].max() - data["mz"].min()
|
|
795
|
+
dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
|
|
796
|
+
|
|
797
|
+
scatter_renderer = p.circle(
|
|
798
|
+
x="rt",
|
|
799
|
+
y="mz",
|
|
800
|
+
radius=dynamic_radius,
|
|
801
|
+
fill_color={"field": colorby, "transform": color_mapper},
|
|
802
|
+
line_color=None,
|
|
803
|
+
alpha=alpha,
|
|
804
|
+
source=source,
|
|
805
|
+
)
|
|
806
|
+
else:
|
|
807
|
+
scatter_renderer = p.scatter(
|
|
808
|
+
x="rt",
|
|
809
|
+
y="mz",
|
|
810
|
+
size="markersize",
|
|
811
|
+
fill_color={"field": colorby, "transform": color_mapper},
|
|
812
|
+
line_color=None,
|
|
813
|
+
alpha=alpha,
|
|
814
|
+
source=source,
|
|
815
|
+
)
|
|
738
816
|
# add hover tool
|
|
739
|
-
# Start with base tooltips
|
|
817
|
+
# Start with base tooltips - rt and mz moved to top, removed consensus_id and iso_mean
|
|
740
818
|
tooltips = [
|
|
819
|
+
("rt", "@rt"),
|
|
820
|
+
("mz", "@mz"),
|
|
741
821
|
("consensus_uid", "@consensus_uid"),
|
|
742
|
-
("consensus_id", "@consensus_id"),
|
|
743
822
|
("number_samples", "@number_samples"),
|
|
744
823
|
("number_ms2", "@number_ms2"),
|
|
745
|
-
("rt", "@rt"),
|
|
746
|
-
("mz", "@mz"),
|
|
747
824
|
("inty_mean", "@inty_mean"),
|
|
748
|
-
("iso_mean", "@iso_mean"),
|
|
749
825
|
("coherence_mean", "@chrom_coherence_mean"),
|
|
750
826
|
("prominence_scaled_mean", "@chrom_prominence_scaled_mean"),
|
|
751
827
|
]
|
|
752
828
|
|
|
829
|
+
# Add adduct_top if it exists in data
|
|
830
|
+
if "adduct_top" in data.columns:
|
|
831
|
+
tooltips.append(("adduct_top", "@adduct_top"))
|
|
832
|
+
|
|
833
|
+
# Add id_top_name if it exists in data
|
|
834
|
+
if "id_top_name" in data.columns:
|
|
835
|
+
tooltips.append(("id_top_name", "@id_top_name"))
|
|
836
|
+
|
|
837
|
+
# Add id_top_adduct if it exists in data
|
|
838
|
+
if "id_top_adduct" in data.columns:
|
|
839
|
+
tooltips.append(("id_top_adduct", "@id_top_adduct"))
|
|
840
|
+
|
|
753
841
|
# Add id_top_* columns if they exist and have non-null values
|
|
754
842
|
id_top_columns = ["id_top_name", "id_top_class", "id_top_adduct", "id_top_score"]
|
|
755
843
|
for col in id_top_columns:
|
|
@@ -764,19 +852,28 @@ def plot_consensus_2d(
|
|
|
764
852
|
|
|
765
853
|
hover = HoverTool(
|
|
766
854
|
tooltips=tooltips,
|
|
767
|
-
renderers=[scatter_renderer],
|
|
768
855
|
)
|
|
856
|
+
# For categorical data, hover will work on all renderers automatically
|
|
857
|
+
# For numeric data, specify the single renderer
|
|
858
|
+
if not is_categorical and scatter_renderer:
|
|
859
|
+
hover.renderers = [scatter_renderer]
|
|
860
|
+
|
|
769
861
|
p.add_tools(hover)
|
|
770
862
|
|
|
771
|
-
# add colorbar
|
|
772
|
-
|
|
773
|
-
|
|
774
|
-
|
|
775
|
-
|
|
776
|
-
|
|
777
|
-
|
|
778
|
-
|
|
779
|
-
|
|
863
|
+
# add colorbar only for numeric data (LinearColorMapper)
|
|
864
|
+
if not is_categorical:
|
|
865
|
+
color_bar = ColorBar(
|
|
866
|
+
color_mapper=color_mapper,
|
|
867
|
+
label_standoff=12,
|
|
868
|
+
location=(0, 0),
|
|
869
|
+
title=colorby,
|
|
870
|
+
ticker=BasicTicker(desired_num_ticks=8),
|
|
871
|
+
)
|
|
872
|
+
p.add_layout(color_bar, "right")
|
|
873
|
+
else:
|
|
874
|
+
# For categorical data, configure the legend that was automatically created
|
|
875
|
+
p.legend.location = "top_right"
|
|
876
|
+
p.legend.click_policy = "hide"
|
|
780
877
|
|
|
781
878
|
if filename is not None:
|
|
782
879
|
# Convert relative paths to absolute paths using study folder as base
|