masster 0.5.1__py3-none-any.whl → 0.5.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/load.py +5 -4
- masster/study/defaults/align_def.py +0 -204
- masster/study/defaults/fill_def.py +9 -1
- masster/study/defaults/merge_def.py +20 -69
- masster/study/export.py +25 -5
- masster/study/h5.py +160 -42
- masster/study/helpers.py +430 -53
- masster/study/load.py +986 -158
- masster/study/merge.py +683 -1076
- masster/study/plot.py +43 -38
- masster/study/processing.py +337 -280
- masster/study/study.py +58 -135
- masster/wizard/wizard.py +20 -6
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/METADATA +1 -1
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/RECORD +19 -20
- masster/study/defaults/fill_chrom_def.py +0 -260
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/WHEEL +0 -0
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/entry_points.txt +0 -0
- {masster-0.5.1.dist-info → masster-0.5.3.dist-info}/licenses/LICENSE +0 -0
masster/study/plot.py
CHANGED
|
@@ -1385,6 +1385,7 @@ def plot_rt_correction(
|
|
|
1385
1385
|
"""
|
|
1386
1386
|
Plot RT correction per sample: (rt - rt_original) vs rt overlaid for selected samples.
|
|
1387
1387
|
|
|
1388
|
+
Only features with filled==False are used for the RT correction plot.
|
|
1388
1389
|
This uses the same color mapping as `plot_bpc` so curves for the same samples match.
|
|
1389
1390
|
"""
|
|
1390
1391
|
from bokeh.plotting import figure, show, output_file
|
|
@@ -1447,29 +1448,35 @@ def plot_rt_correction(
|
|
|
1447
1448
|
if sample_feats.is_empty():
|
|
1448
1449
|
continue
|
|
1449
1450
|
|
|
1450
|
-
#
|
|
1451
|
-
|
|
1452
|
-
|
|
1453
|
-
|
|
1454
|
-
|
|
1451
|
+
# Filter to only use features with filled==False
|
|
1452
|
+
if "filled" in sample_feats.columns:
|
|
1453
|
+
sample_feats = sample_feats.filter(~pl.col("filled"))
|
|
1454
|
+
if sample_feats.is_empty():
|
|
1455
|
+
continue
|
|
1455
1456
|
|
|
1456
|
-
#
|
|
1457
|
-
if "rt" not in
|
|
1457
|
+
# Stay in Polars - much faster than pandas conversion!
|
|
1458
|
+
if "rt" not in sample_feats.columns or "rt_original" not in sample_feats.columns:
|
|
1458
1459
|
continue
|
|
1459
1460
|
|
|
1460
|
-
#
|
|
1461
|
-
|
|
1462
|
-
|
|
1463
|
-
|
|
1461
|
+
# Filter nulls and add delta column in Polars
|
|
1462
|
+
sample_feats = (
|
|
1463
|
+
sample_feats
|
|
1464
|
+
.filter(
|
|
1465
|
+
pl.col("rt").is_not_null() &
|
|
1466
|
+
pl.col("rt_original").is_not_null()
|
|
1467
|
+
)
|
|
1468
|
+
.with_columns([
|
|
1469
|
+
(pl.col("rt") - pl.col("rt_original")).alias("delta")
|
|
1470
|
+
])
|
|
1471
|
+
.sort("rt")
|
|
1472
|
+
)
|
|
1464
1473
|
|
|
1465
|
-
|
|
1466
|
-
|
|
1467
|
-
delta = rt - rt_orig
|
|
1474
|
+
if sample_feats.is_empty():
|
|
1475
|
+
continue
|
|
1468
1476
|
|
|
1469
|
-
#
|
|
1470
|
-
|
|
1471
|
-
|
|
1472
|
-
delta = delta[idx]
|
|
1477
|
+
# Extract arrays directly from Polars
|
|
1478
|
+
rt = sample_feats["rt"].to_numpy()
|
|
1479
|
+
delta = sample_feats["delta"].to_numpy()
|
|
1473
1480
|
|
|
1474
1481
|
sample_name = str(uid)
|
|
1475
1482
|
if samples_info is not None:
|
|
@@ -1759,21 +1766,26 @@ def plot_consensus_stats(
|
|
|
1759
1766
|
import polars as pl
|
|
1760
1767
|
import numpy as np
|
|
1761
1768
|
|
|
1762
|
-
#
|
|
1763
|
-
|
|
1764
|
-
|
|
1769
|
+
# Get the consensus statistics data using the new helper method
|
|
1770
|
+
data_df = self.get_consensus_stats()
|
|
1771
|
+
|
|
1772
|
+
if data_df is None or data_df.is_empty():
|
|
1773
|
+
self.logger.error("No consensus statistics data available.")
|
|
1765
1774
|
return
|
|
1766
1775
|
|
|
1767
|
-
#
|
|
1768
|
-
|
|
1776
|
+
# Remove consensus_uid column for plotting (keep only numeric columns)
|
|
1777
|
+
if "consensus_uid" in data_df.columns:
|
|
1778
|
+
data_df_clean = data_df.drop("consensus_uid")
|
|
1779
|
+
else:
|
|
1780
|
+
data_df_clean = data_df
|
|
1769
1781
|
|
|
1770
|
-
# Define specific columns to plot in the exact order requested
|
|
1782
|
+
# Define specific columns to plot in the exact order requested (excluding consensus_uid)
|
|
1771
1783
|
desired_columns = [
|
|
1772
1784
|
"rt",
|
|
1773
1785
|
"rt_delta_mean",
|
|
1774
1786
|
"mz",
|
|
1775
|
-
"mz_range", # mz_max-mz_min
|
|
1776
|
-
"log10_inty_mean", # log10(inty_mean)
|
|
1787
|
+
"mz_range", # mz_max-mz_min
|
|
1788
|
+
"log10_inty_mean", # log10(inty_mean)
|
|
1777
1789
|
"number_samples",
|
|
1778
1790
|
"number_ms2",
|
|
1779
1791
|
"charge_mean",
|
|
@@ -1783,20 +1795,13 @@ def plot_consensus_stats(
|
|
|
1783
1795
|
"chrom_prominence_scaled_mean"
|
|
1784
1796
|
]
|
|
1785
1797
|
|
|
1786
|
-
# Calculate derived columns if they don't exist
|
|
1787
|
-
if "mz_range" not in data_df.columns and "mz_max" in data_df.columns and "mz_min" in data_df.columns:
|
|
1788
|
-
data_df = data_df.with_columns((pl.col("mz_max") - pl.col("mz_min")).alias("mz_range"))
|
|
1789
|
-
|
|
1790
|
-
if "log10_inty_mean" not in data_df.columns and "inty_mean" in data_df.columns:
|
|
1791
|
-
data_df = data_df.with_columns(pl.col("inty_mean").log10().alias("log10_inty_mean"))
|
|
1792
|
-
|
|
1793
1798
|
# Filter to only include columns that exist in the dataframe, preserving order
|
|
1794
|
-
numeric_columns = [col for col in desired_columns if col in
|
|
1799
|
+
numeric_columns = [col for col in desired_columns if col in data_df_clean.columns]
|
|
1795
1800
|
|
|
1796
1801
|
# Check if the numeric columns are actually numeric
|
|
1797
1802
|
final_numeric_columns = []
|
|
1798
1803
|
for col in numeric_columns:
|
|
1799
|
-
dtype =
|
|
1804
|
+
dtype = data_df_clean[col].dtype
|
|
1800
1805
|
if dtype in [pl.Int8, pl.Int16, pl.Int32, pl.Int64,
|
|
1801
1806
|
pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
|
|
1802
1807
|
pl.Float32, pl.Float64]:
|
|
@@ -1805,13 +1810,13 @@ def plot_consensus_stats(
|
|
|
1805
1810
|
numeric_columns = final_numeric_columns
|
|
1806
1811
|
|
|
1807
1812
|
if len(numeric_columns) == 0:
|
|
1808
|
-
self.logger.error(f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(
|
|
1813
|
+
self.logger.error(f"None of the requested consensus statistics columns were found or are numeric. Available columns: {list(data_df_clean.columns)}")
|
|
1809
1814
|
return
|
|
1810
1815
|
|
|
1811
1816
|
self.logger.debug(f"Creating distribution plots for {len(numeric_columns)} specific consensus columns: {numeric_columns}")
|
|
1812
1817
|
|
|
1813
|
-
#
|
|
1814
|
-
data_df_clean =
|
|
1818
|
+
# Select only the numeric columns for plotting
|
|
1819
|
+
data_df_clean = data_df_clean.select(numeric_columns)
|
|
1815
1820
|
|
|
1816
1821
|
# Check if all numeric columns are empty
|
|
1817
1822
|
all_columns_empty = True
|