masster 0.5.12__tar.gz → 0.5.13__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.5.12 → masster-0.5.13}/PKG-INFO +1 -1
- {masster-0.5.12 → masster-0.5.13}/pyproject.toml +1 -1
- {masster-0.5.12 → masster-0.5.13}/src/masster/_version.py +1 -1
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/id.py +4 -3
- masster-0.5.13/src/masster/study/importers.py +222 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/merge.py +2 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/plot.py +84 -12
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/study.py +4 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/study5_schema.json +3 -0
- {masster-0.5.12 → masster-0.5.13}/uv.lock +1 -1
- {masster-0.5.12 → masster-0.5.13}/.github/workflows/publish.yml +0 -0
- {masster-0.5.12 → masster-0.5.13}/.github/workflows/security.yml +0 -0
- {masster-0.5.12 → masster-0.5.13}/.github/workflows/test.yml +0 -0
- {masster-0.5.12 → masster-0.5.13}/.gitignore +0 -0
- {masster-0.5.12 → masster-0.5.13}/.pre-commit-config.yaml +0 -0
- {masster-0.5.12 → masster-0.5.13}/LICENSE +0 -0
- {masster-0.5.12 → masster-0.5.13}/Makefile +0 -0
- {masster-0.5.12 → masster-0.5.13}/README.md +0 -0
- {masster-0.5.12 → masster-0.5.13}/TESTING.md +0 -0
- {masster-0.5.12 → masster-0.5.13}/demo/example_batch_process.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/demo/example_sample_process.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/__init__.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/chromatogram.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/libs/aa.csv +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/libs/ccm.csv +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/libs/hilic.csv +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/libs/urine.csv +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/lib/__init__.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/lib/lib.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/logger.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/__init__.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/adducts.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/defaults/find_adducts_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/defaults/sample_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/h5.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/helpers.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/lib.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/load.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/parameters.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/plot.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/processing.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/quant.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/sample.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/sample5_schema.json +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/save.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/sample/sciex.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/spectrum.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/__init__.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/analysis.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/fill_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/identify_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/integrate_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/merge_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/defaults/study_def.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/export.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/h5.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/helpers.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/load.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/parameters.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/processing.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/study/save.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/wizard/README.md +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/wizard/__init__.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/wizard/example.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/src/masster/wizard/wizard.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/conftest.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_chromatogram.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_defaults.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_imports.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_integration.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_logger.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_parameters.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_sample.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_spectrum.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_study.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tests/test_version.py +0 -0
- {masster-0.5.12 → masster-0.5.13}/tox.ini +0 -0
|
@@ -661,7 +661,8 @@ def _update_consensus_id_columns(study, logger=None):
|
|
|
661
661
|
("id_top_name", pl.String),
|
|
662
662
|
("id_top_class", pl.String),
|
|
663
663
|
("id_top_adduct", pl.String),
|
|
664
|
-
("id_top_score", pl.Float64)
|
|
664
|
+
("id_top_score", pl.Float64),
|
|
665
|
+
("id_source", pl.String)
|
|
665
666
|
]:
|
|
666
667
|
if col_name not in study.consensus_df.columns:
|
|
667
668
|
study.consensus_df = study.consensus_df.with_columns(
|
|
@@ -1076,7 +1077,7 @@ def id_reset(study):
|
|
|
1076
1077
|
|
|
1077
1078
|
# Check which columns exist before trying to update them
|
|
1078
1079
|
id_columns_to_reset = []
|
|
1079
|
-
for col in ["id_top_name", "id_top_class", "id_top_adduct", "id_top_score"]:
|
|
1080
|
+
for col in ["id_top_name", "id_top_class", "id_top_adduct", "id_top_score", "id_source"]:
|
|
1080
1081
|
if col in study.consensus_df.columns:
|
|
1081
1082
|
if col == "id_top_score":
|
|
1082
1083
|
id_columns_to_reset.append(pl.lit(None, dtype=pl.Float64).alias(col))
|
|
@@ -1170,7 +1171,7 @@ def lib_reset(study):
|
|
|
1170
1171
|
|
|
1171
1172
|
# Check which columns exist before trying to update them
|
|
1172
1173
|
id_columns_to_reset = []
|
|
1173
|
-
for col in ["id_top_name", "id_top_class", "id_top_adduct", "id_top_score"]:
|
|
1174
|
+
for col in ["id_top_name", "id_top_class", "id_top_adduct", "id_top_score", "id_source"]:
|
|
1174
1175
|
if col in study.consensus_df.columns:
|
|
1175
1176
|
if col == "id_top_score":
|
|
1176
1177
|
id_columns_to_reset.append(pl.lit(None, dtype=pl.Float64).alias(col))
|
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
"""
|
|
2
|
+
import.py
|
|
3
|
+
|
|
4
|
+
Module providing import functionality for Study class, specifically for importing
|
|
5
|
+
oracle identification data into consensus features.
|
|
6
|
+
"""
|
|
7
|
+
|
|
8
|
+
from __future__ import annotations
|
|
9
|
+
|
|
10
|
+
import os
|
|
11
|
+
import pandas as pd
|
|
12
|
+
import polars as pl
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
def import_oracle(
|
|
16
|
+
self,
|
|
17
|
+
folder,
|
|
18
|
+
min_id_level=None,
|
|
19
|
+
max_id_level=None,
|
|
20
|
+
):
|
|
21
|
+
"""
|
|
22
|
+
Import oracle identification data and map it to consensus features.
|
|
23
|
+
|
|
24
|
+
This method reads oracle identification results from folder/diag/summary_by_feature.csv
|
|
25
|
+
and maps them to consensus features using the 'uit' (feature_uid) column. The oracle
|
|
26
|
+
data is used to populate identification columns in consensus_df.
|
|
27
|
+
|
|
28
|
+
Parameters:
|
|
29
|
+
folder (str): Path to oracle folder containing diag/summary_by_feature.csv
|
|
30
|
+
min_id_level (int, optional): Minimum identification level to include
|
|
31
|
+
max_id_level (int, optional): Maximum identification level to include
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
None: Updates consensus_df in-place with oracle identification data
|
|
35
|
+
|
|
36
|
+
Raises:
|
|
37
|
+
FileNotFoundError: If the oracle summary file doesn't exist
|
|
38
|
+
ValueError: If consensus_df is empty or doesn't have required columns
|
|
39
|
+
|
|
40
|
+
Example:
|
|
41
|
+
>>> study.import_oracle(
|
|
42
|
+
... folder="path/to/oracle_results",
|
|
43
|
+
... min_id_level=2,
|
|
44
|
+
... max_id_level=4
|
|
45
|
+
... )
|
|
46
|
+
"""
|
|
47
|
+
|
|
48
|
+
self.logger.info(f"Starting oracle import from folder: {folder}")
|
|
49
|
+
|
|
50
|
+
# Validate inputs
|
|
51
|
+
if self.consensus_df is None or self.consensus_df.is_empty():
|
|
52
|
+
raise ValueError("consensus_df is empty or not available. Run merge() first.")
|
|
53
|
+
|
|
54
|
+
if "consensus_uid" not in self.consensus_df.columns:
|
|
55
|
+
raise ValueError("consensus_df must contain 'consensus_uid' column")
|
|
56
|
+
|
|
57
|
+
# Check if oracle file exists
|
|
58
|
+
oracle_file_path = os.path.join(folder, "diag", "summary_by_feature.csv")
|
|
59
|
+
if not os.path.exists(oracle_file_path):
|
|
60
|
+
raise FileNotFoundError(f"Oracle summary file not found: {oracle_file_path}")
|
|
61
|
+
|
|
62
|
+
self.logger.debug(f"Loading oracle data from: {oracle_file_path}")
|
|
63
|
+
|
|
64
|
+
try:
|
|
65
|
+
# Read oracle data using pandas first for easier processing
|
|
66
|
+
oracle_data = pd.read_csv(oracle_file_path)
|
|
67
|
+
self.logger.info(f"Oracle data loaded successfully with {len(oracle_data)} rows")
|
|
68
|
+
except Exception as e:
|
|
69
|
+
self.logger.error(f"Could not read {oracle_file_path}: {e}")
|
|
70
|
+
raise
|
|
71
|
+
|
|
72
|
+
# Select relevant columns from oracle data
|
|
73
|
+
required_oracle_cols = ["title", "id_level", "id_label", "id_ion", "id_class", "score"]
|
|
74
|
+
missing_cols = [col for col in required_oracle_cols if col not in oracle_data.columns]
|
|
75
|
+
if missing_cols:
|
|
76
|
+
raise ValueError(f"Oracle data missing required columns: {missing_cols}")
|
|
77
|
+
|
|
78
|
+
oracle_subset = oracle_data[required_oracle_cols].copy()
|
|
79
|
+
|
|
80
|
+
# Extract consensus_uid from title column (format: "uid:XYZ, ...")
|
|
81
|
+
self.logger.debug("Extracting consensus UIDs from oracle titles using pattern 'uid:(\\d+)'")
|
|
82
|
+
oracle_subset["consensus_uid"] = oracle_subset["title"].str.extract(r"uid:(\d+)")
|
|
83
|
+
|
|
84
|
+
# Remove rows where consensus_uid extraction failed
|
|
85
|
+
oracle_subset = oracle_subset.dropna(subset=["consensus_uid"])
|
|
86
|
+
oracle_subset["consensus_uid"] = oracle_subset["consensus_uid"].astype(int)
|
|
87
|
+
|
|
88
|
+
self.logger.debug(f"Extracted consensus UIDs for {len(oracle_subset)} oracle entries")
|
|
89
|
+
|
|
90
|
+
# Apply id_level filters if specified
|
|
91
|
+
initial_count = len(oracle_subset)
|
|
92
|
+
if min_id_level is not None:
|
|
93
|
+
oracle_subset = oracle_subset[oracle_subset["id_level"] >= min_id_level]
|
|
94
|
+
self.logger.debug(f"After min_id_level filter ({min_id_level}): {len(oracle_subset)} entries")
|
|
95
|
+
|
|
96
|
+
if max_id_level is not None:
|
|
97
|
+
oracle_subset = oracle_subset[oracle_subset["id_level"] <= max_id_level]
|
|
98
|
+
self.logger.debug(f"After max_id_level filter ({max_id_level}): {len(oracle_subset)} entries")
|
|
99
|
+
|
|
100
|
+
if len(oracle_subset) == 0:
|
|
101
|
+
self.logger.warning("No oracle entries remain after filtering")
|
|
102
|
+
return
|
|
103
|
+
|
|
104
|
+
# Sort by id_level (descending) to prioritize higher confidence identifications
|
|
105
|
+
# and remove duplicates by consensus_uid, keeping the first (highest id_level)
|
|
106
|
+
oracle_subset = oracle_subset.sort_values(by=["id_level"], ascending=False)
|
|
107
|
+
oracle_subset = oracle_subset.drop_duplicates(subset=["consensus_uid"], keep="first")
|
|
108
|
+
|
|
109
|
+
self.logger.debug(f"After deduplication by consensus_uid: {len(oracle_subset)} unique identifications")
|
|
110
|
+
|
|
111
|
+
# Convert to polars for efficient joining
|
|
112
|
+
oracle_pl = pl.DataFrame(oracle_subset)
|
|
113
|
+
|
|
114
|
+
self.logger.debug(f"Oracle data ready for consensus mapping: {len(oracle_pl)} entries")
|
|
115
|
+
|
|
116
|
+
if oracle_pl.is_empty():
|
|
117
|
+
self.logger.warning("No oracle entries could be processed")
|
|
118
|
+
return
|
|
119
|
+
|
|
120
|
+
# Group by consensus_uid and select the best identification (highest id_level)
|
|
121
|
+
# In case of ties, take the first one
|
|
122
|
+
best_ids = (
|
|
123
|
+
oracle_pl
|
|
124
|
+
.group_by("consensus_uid")
|
|
125
|
+
.agg([
|
|
126
|
+
pl.col("id_level").max().alias("max_id_level")
|
|
127
|
+
])
|
|
128
|
+
.join(oracle_pl, on="consensus_uid")
|
|
129
|
+
.filter(pl.col("id_level") == pl.col("max_id_level"))
|
|
130
|
+
.group_by("consensus_uid")
|
|
131
|
+
.first() # In case of ties, take the first
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
self.logger.debug(f"Selected best identifications for {len(best_ids)} consensus features")
|
|
135
|
+
|
|
136
|
+
# Prepare the identification columns
|
|
137
|
+
id_columns = {
|
|
138
|
+
"id_top_name": best_ids.select("consensus_uid", "id_label"),
|
|
139
|
+
"id_top_adduct": best_ids.select("consensus_uid", "id_ion"),
|
|
140
|
+
"id_top_class": best_ids.select("consensus_uid", "id_class"),
|
|
141
|
+
"id_top_score": best_ids.select("consensus_uid", pl.col("score").round(3).alias("score")),
|
|
142
|
+
"id_source": best_ids.select(
|
|
143
|
+
"consensus_uid",
|
|
144
|
+
pl.when(pl.col("id_level") == 1)
|
|
145
|
+
.then(pl.lit("lipidoracle ms1"))
|
|
146
|
+
.otherwise(pl.lit("lipidoracle ms2"))
|
|
147
|
+
.alias("id_source")
|
|
148
|
+
)
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
# Initialize identification columns in consensus_df if they don't exist
|
|
152
|
+
for col_name in id_columns.keys():
|
|
153
|
+
if col_name not in self.consensus_df.columns:
|
|
154
|
+
if col_name == "id_top_score":
|
|
155
|
+
self.consensus_df = self.consensus_df.with_columns(
|
|
156
|
+
pl.lit(None, dtype=pl.Float64).alias(col_name)
|
|
157
|
+
)
|
|
158
|
+
else:
|
|
159
|
+
self.consensus_df = self.consensus_df.with_columns(
|
|
160
|
+
pl.lit(None, dtype=pl.String).alias(col_name)
|
|
161
|
+
)
|
|
162
|
+
|
|
163
|
+
# Update consensus_df with oracle identifications
|
|
164
|
+
for col_name, id_data in id_columns.items():
|
|
165
|
+
oracle_column = id_data.columns[1] # second column (after consensus_uid)
|
|
166
|
+
|
|
167
|
+
# Create update dataframe
|
|
168
|
+
update_data = id_data.rename({oracle_column: col_name})
|
|
169
|
+
|
|
170
|
+
# Join and update
|
|
171
|
+
self.consensus_df = (
|
|
172
|
+
self.consensus_df
|
|
173
|
+
.join(update_data, on="consensus_uid", how="left", suffix="_oracle")
|
|
174
|
+
.with_columns(
|
|
175
|
+
pl.coalesce([f"{col_name}_oracle", col_name]).alias(col_name)
|
|
176
|
+
)
|
|
177
|
+
.drop(f"{col_name}_oracle")
|
|
178
|
+
)
|
|
179
|
+
|
|
180
|
+
# Replace NaN values with None in identification columns
|
|
181
|
+
id_col_names = ["id_top_name", "id_top_adduct", "id_top_class", "id_top_score", "id_source"]
|
|
182
|
+
for col_name in id_col_names:
|
|
183
|
+
if col_name in self.consensus_df.columns:
|
|
184
|
+
# For string columns, replace empty strings and "nan" with None
|
|
185
|
+
if col_name != "id_top_score":
|
|
186
|
+
self.consensus_df = self.consensus_df.with_columns(
|
|
187
|
+
pl.when(
|
|
188
|
+
pl.col(col_name).is_null() |
|
|
189
|
+
(pl.col(col_name) == "") |
|
|
190
|
+
(pl.col(col_name) == "nan") |
|
|
191
|
+
(pl.col(col_name) == "NaN")
|
|
192
|
+
)
|
|
193
|
+
.then(None)
|
|
194
|
+
.otherwise(pl.col(col_name))
|
|
195
|
+
.alias(col_name)
|
|
196
|
+
)
|
|
197
|
+
# For numeric columns, replace NaN with None
|
|
198
|
+
else:
|
|
199
|
+
self.consensus_df = self.consensus_df.with_columns(
|
|
200
|
+
pl.when(pl.col(col_name).is_null() | pl.col(col_name).is_nan())
|
|
201
|
+
.then(None)
|
|
202
|
+
.otherwise(pl.col(col_name))
|
|
203
|
+
.alias(col_name)
|
|
204
|
+
)
|
|
205
|
+
|
|
206
|
+
# Count how many consensus features were updated
|
|
207
|
+
updated_count = self.consensus_df.filter(pl.col("id_top_name").is_not_null()).height
|
|
208
|
+
total_consensus = len(self.consensus_df)
|
|
209
|
+
|
|
210
|
+
self.logger.info(
|
|
211
|
+
f"Oracle import complete: {updated_count}/{total_consensus} "
|
|
212
|
+
f"consensus features now have identifications ({updated_count/total_consensus*100:.1f}%)"
|
|
213
|
+
)
|
|
214
|
+
|
|
215
|
+
# Update history
|
|
216
|
+
self.update_history(["import_oracle"], {
|
|
217
|
+
"folder": folder,
|
|
218
|
+
"min_id_level": min_id_level,
|
|
219
|
+
"max_id_level": max_id_level,
|
|
220
|
+
"updated_features": updated_count,
|
|
221
|
+
"total_features": total_consensus
|
|
222
|
+
})
|
|
@@ -1792,6 +1792,7 @@ def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_
|
|
|
1792
1792
|
"id_top_class": None,
|
|
1793
1793
|
"id_top_adduct": None,
|
|
1794
1794
|
"id_top_score": None,
|
|
1795
|
+
"id_source": None,
|
|
1795
1796
|
}
|
|
1796
1797
|
|
|
1797
1798
|
|
|
@@ -2194,6 +2195,7 @@ def _extract_consensus_features(study, consensus_map, min_samples, cached_adduct
|
|
|
2194
2195
|
"id_top_class": None,
|
|
2195
2196
|
"id_top_adduct": None,
|
|
2196
2197
|
"id_top_score": None,
|
|
2198
|
+
"id_source": None,
|
|
2197
2199
|
},
|
|
2198
2200
|
)
|
|
2199
2201
|
|
|
@@ -630,6 +630,7 @@ def plot_consensus_2d(
|
|
|
630
630
|
height=450,
|
|
631
631
|
mz_range=None,
|
|
632
632
|
rt_range=None,
|
|
633
|
+
legend="bottom_right",
|
|
633
634
|
):
|
|
634
635
|
"""
|
|
635
636
|
Plot consensus features in a 2D scatter plot with retention time vs m/z.
|
|
@@ -652,6 +653,9 @@ def plot_consensus_2d(
|
|
|
652
653
|
height (int): Plot height in pixels (default: 900)
|
|
653
654
|
mz_range (tuple, optional): m/z range for filtering consensus features (min_mz, max_mz)
|
|
654
655
|
rt_range (tuple, optional): Retention time range for filtering consensus features (min_rt, max_rt)
|
|
656
|
+
legend (str, optional): Legend position for categorical data. Options: 'top_right', 'top_left',
|
|
657
|
+
'bottom_right', 'bottom_left', 'right', 'left', 'top', 'bottom'.
|
|
658
|
+
If None, legend is hidden. Only applies to categorical coloring (default: "bottom_right")
|
|
655
659
|
"""
|
|
656
660
|
if self.consensus_df is None:
|
|
657
661
|
self.logger.error("No consensus map found.")
|
|
@@ -783,13 +787,20 @@ def plot_consensus_2d(
|
|
|
783
787
|
# Sorting would break the correspondence between legend labels and point colors
|
|
784
788
|
unique_values = [v for v in data_pd[colorby].unique() if v is not None]
|
|
785
789
|
|
|
786
|
-
if
|
|
787
|
-
|
|
790
|
+
# Use the custom palette from cmap if available, otherwise fall back to defaults
|
|
791
|
+
if len(palette) >= len(unique_values):
|
|
792
|
+
# Use custom colormap palette - sample evenly across the palette
|
|
793
|
+
import numpy as np
|
|
794
|
+
indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
|
|
795
|
+
categorical_palette = [palette[i] for i in indices]
|
|
796
|
+
elif len(unique_values) <= 20:
|
|
797
|
+
# Fall back to Category20 if custom palette is too small
|
|
798
|
+
categorical_palette = Category20[min(20, max(3, len(unique_values)))]
|
|
788
799
|
else:
|
|
789
800
|
# For many categories, use a subset of the viridis palette
|
|
790
|
-
|
|
801
|
+
categorical_palette = viridis(min(256, len(unique_values)))
|
|
791
802
|
|
|
792
|
-
color_mapper = factor_cmap(colorby,
|
|
803
|
+
color_mapper = factor_cmap(colorby, categorical_palette, unique_values)
|
|
793
804
|
else:
|
|
794
805
|
# Handle numeric coloring with LinearColorMapper
|
|
795
806
|
color_mapper = LinearColorMapper(
|
|
@@ -809,21 +820,65 @@ def plot_consensus_2d(
|
|
|
809
820
|
if is_categorical:
|
|
810
821
|
# For categorical data, create separate renderers for each category
|
|
811
822
|
# This enables proper legend interactivity where each category can be toggled independently
|
|
812
|
-
|
|
823
|
+
all_unique_values = list(data_pd[colorby].unique())
|
|
824
|
+
unique_values = [v for v in all_unique_values if v is not None]
|
|
825
|
+
has_none_values = None in all_unique_values
|
|
813
826
|
|
|
814
|
-
if
|
|
815
|
-
|
|
827
|
+
# Use the custom palette from cmap if available, otherwise fall back to defaults
|
|
828
|
+
if len(palette) >= len(unique_values):
|
|
829
|
+
# Use custom colormap palette - sample evenly across the palette
|
|
830
|
+
import numpy as np
|
|
831
|
+
indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
|
|
832
|
+
categorical_palette = [palette[i] for i in indices]
|
|
833
|
+
elif len(unique_values) <= 20:
|
|
834
|
+
# Fall back to Category20 if custom palette is too small
|
|
835
|
+
categorical_palette = Category20[min(20, max(3, len(unique_values)))]
|
|
816
836
|
else:
|
|
817
|
-
|
|
837
|
+
categorical_palette = viridis(min(256, len(unique_values)))
|
|
818
838
|
|
|
819
|
-
#
|
|
839
|
+
# Handle None values with black color FIRST so they appear in the background
|
|
840
|
+
if has_none_values:
|
|
841
|
+
# Filter data for None values
|
|
842
|
+
none_data = data.filter(pl.col(colorby).is_null())
|
|
843
|
+
none_data_pd = none_data.to_pandas()
|
|
844
|
+
none_source = bp.ColumnDataSource(none_data_pd)
|
|
845
|
+
|
|
846
|
+
if scaling.lower() in ["dyn", "dynamic"]:
|
|
847
|
+
# Calculate appropriate radius for dynamic scaling
|
|
848
|
+
rt_range = data["rt"].max() - data["rt"].min()
|
|
849
|
+
mz_range = data["mz"].max() - data["mz"].min()
|
|
850
|
+
dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
|
|
851
|
+
|
|
852
|
+
renderer = p.circle(
|
|
853
|
+
x="rt",
|
|
854
|
+
y="mz",
|
|
855
|
+
radius=dynamic_radius,
|
|
856
|
+
fill_color="lightgray",
|
|
857
|
+
line_color=None,
|
|
858
|
+
alpha=alpha,
|
|
859
|
+
source=none_source,
|
|
860
|
+
legend_label="None",
|
|
861
|
+
)
|
|
862
|
+
else:
|
|
863
|
+
renderer = p.scatter(
|
|
864
|
+
x="rt",
|
|
865
|
+
y="mz",
|
|
866
|
+
size="markersize",
|
|
867
|
+
fill_color="lightgray",
|
|
868
|
+
line_color=None,
|
|
869
|
+
alpha=alpha,
|
|
870
|
+
source=none_source,
|
|
871
|
+
legend_label="None",
|
|
872
|
+
)
|
|
873
|
+
|
|
874
|
+
# Create a separate renderer for each non-None category (plotted on top of None values)
|
|
820
875
|
for i, category in enumerate(unique_values):
|
|
821
876
|
# Filter data for this category
|
|
822
877
|
category_data = data.filter(pl.col(colorby) == category)
|
|
823
878
|
category_data_pd = category_data.to_pandas()
|
|
824
879
|
category_source = bp.ColumnDataSource(category_data_pd)
|
|
825
880
|
|
|
826
|
-
color =
|
|
881
|
+
color = categorical_palette[i % len(categorical_palette)]
|
|
827
882
|
|
|
828
883
|
if scaling.lower() in ["dyn", "dynamic"]:
|
|
829
884
|
# Calculate appropriate radius for dynamic scaling
|
|
@@ -942,8 +997,25 @@ def plot_consensus_2d(
|
|
|
942
997
|
p.add_layout(color_bar, "right")
|
|
943
998
|
else:
|
|
944
999
|
# For categorical data, configure the legend that was automatically created
|
|
945
|
-
|
|
946
|
-
|
|
1000
|
+
if legend is not None:
|
|
1001
|
+
# Map legend position parameter to Bokeh legend position
|
|
1002
|
+
legend_position_map = {
|
|
1003
|
+
"top_right": "top_right",
|
|
1004
|
+
"top_left": "top_left",
|
|
1005
|
+
"bottom_right": "bottom_right",
|
|
1006
|
+
"bottom_left": "bottom_left",
|
|
1007
|
+
"right": "right",
|
|
1008
|
+
"left": "left",
|
|
1009
|
+
"top": "top",
|
|
1010
|
+
"bottom": "bottom"
|
|
1011
|
+
}
|
|
1012
|
+
|
|
1013
|
+
bokeh_legend_pos = legend_position_map.get(legend, "bottom_right")
|
|
1014
|
+
p.legend.location = bokeh_legend_pos
|
|
1015
|
+
p.legend.click_policy = "hide"
|
|
1016
|
+
else:
|
|
1017
|
+
# Hide legend when legend=None
|
|
1018
|
+
p.legend.visible = False
|
|
947
1019
|
|
|
948
1020
|
if filename is not None:
|
|
949
1021
|
# Convert relative paths to absolute paths using study folder as base
|
|
@@ -109,6 +109,7 @@ from masster.study.parameters import set_parameters_property
|
|
|
109
109
|
from masster.study.save import save, save_consensus, save_samples
|
|
110
110
|
from masster.study.export import export_mgf, export_mztab, export_xlsx, export_parquet
|
|
111
111
|
from masster.study.id import lib_load, identify, get_id, id_reset, lib_reset, _get_adducts
|
|
112
|
+
from masster.study.importers import import_oracle
|
|
112
113
|
|
|
113
114
|
from masster.logger import MassterLogger
|
|
114
115
|
from masster.study.defaults.study_def import study_defaults
|
|
@@ -454,6 +455,9 @@ class Study:
|
|
|
454
455
|
reset_id = id_reset
|
|
455
456
|
lib_reset = lib_reset
|
|
456
457
|
reset_lib = lib_reset
|
|
458
|
+
|
|
459
|
+
# === Oracle Import Operations ===
|
|
460
|
+
import_oracle = import_oracle
|
|
457
461
|
|
|
458
462
|
# === Parameter Management ===
|
|
459
463
|
update_history = update_history
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|