masster 0.5.12__py3-none-any.whl → 0.5.14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

@@ -0,0 +1,331 @@
1
+ """
2
+ import.py
3
+
4
+ Module providing import functionality for Study class, specifically for importing
5
+ oracle identification data into consensus features.
6
+ """
7
+
8
+ from __future__ import annotations
9
+
10
+ import os
11
+ import pandas as pd
12
+ import polars as pl
13
+
14
+
15
+ def import_oracle(
16
+ self,
17
+ folder,
18
+ min_id_level=None,
19
+ max_id_level=None,
20
+ ):
21
+ """
22
+ Import oracle identification data and map it to consensus features.
23
+
24
+ This method reads oracle identification results from folder/diag/annotation_full.csv
25
+ and creates lib_df and id_df DataFrames with detailed library and identification information.
26
+ It also updates consensus_df with top identification results.
27
+
28
+ Parameters:
29
+ folder (str): Path to oracle folder containing diag/annotation_full.csv
30
+ min_id_level (int, optional): Minimum identification level to include
31
+ max_id_level (int, optional): Maximum identification level to include
32
+
33
+ Returns:
34
+ None: Updates consensus_df, creates lib_df and id_df in-place with oracle identification data
35
+
36
+ Raises:
37
+ FileNotFoundError: If the oracle annotation file doesn't exist
38
+ ValueError: If consensus_df is empty or doesn't have required columns
39
+
40
+ Example:
41
+ >>> study.import_oracle(
42
+ ... folder="path/to/oracle_results",
43
+ ... min_id_level=2,
44
+ ... max_id_level=4
45
+ ... )
46
+ """
47
+
48
+ self.logger.info(f"Starting oracle import from folder: {folder}")
49
+
50
+ # Validate inputs
51
+ if self.consensus_df is None or self.consensus_df.is_empty():
52
+ raise ValueError("consensus_df is empty or not available. Run merge() first.")
53
+
54
+ if "consensus_uid" not in self.consensus_df.columns:
55
+ raise ValueError("consensus_df must contain 'consensus_uid' column")
56
+
57
+ # Check if oracle file exists
58
+ oracle_file_path = os.path.join(folder, "diag", "annotation_full.csv")
59
+ if not os.path.exists(oracle_file_path):
60
+ raise FileNotFoundError(f"Oracle annotation file not found: {oracle_file_path}")
61
+
62
+ self.logger.debug(f"Loading oracle data from: {oracle_file_path}")
63
+
64
+ try:
65
+ # Read oracle data using pandas first for easier processing
66
+ oracle_data = pd.read_csv(oracle_file_path)
67
+ self.logger.info(f"Oracle data loaded successfully with {len(oracle_data)} rows")
68
+ except Exception as e:
69
+ self.logger.error(f"Could not read {oracle_file_path}: {e}")
70
+ raise
71
+
72
+ # Extract consensus_uid from scan_title column (format: "uid:XYZ, ...")
73
+ self.logger.debug("Extracting consensus UIDs from oracle scan_title using pattern 'uid:(\\d+)'")
74
+ oracle_data["consensus_uid"] = oracle_data["scan_title"].str.extract(r"uid:(\d+)", expand=False)
75
+
76
+ # Remove rows where consensus_uid extraction failed
77
+ initial_count = len(oracle_data)
78
+ oracle_data = oracle_data.dropna(subset=["consensus_uid"])
79
+ oracle_data["consensus_uid"] = oracle_data["consensus_uid"].astype(int)
80
+
81
+ self.logger.debug(f"Extracted consensus UIDs for {len(oracle_data)}/{initial_count} oracle entries")
82
+
83
+ # Apply id_level filters if specified
84
+ if min_id_level is not None:
85
+ oracle_data = oracle_data[oracle_data["level"] >= min_id_level]
86
+ self.logger.debug(f"After min_id_level filter ({min_id_level}): {len(oracle_data)} entries")
87
+
88
+ if max_id_level is not None:
89
+ oracle_data = oracle_data[oracle_data["level"] <= max_id_level]
90
+ self.logger.debug(f"After max_id_level filter ({max_id_level}): {len(oracle_data)} entries")
91
+
92
+ if len(oracle_data) == 0:
93
+ self.logger.warning("No oracle entries remain after filtering")
94
+ return
95
+
96
+ # === CREATE LIB_DF ===
97
+ self.logger.debug("Creating lib_df from Oracle annotation data")
98
+ self.logger.debug(f"Oracle data shape before lib_df creation: {oracle_data.shape}")
99
+
100
+ # Create unique lib_uid for each library entry
101
+ oracle_data["lib_uid"] = range(len(oracle_data))
102
+
103
+ # Map Oracle columns to lib_df schema
104
+ lib_data = []
105
+ for _, row in oracle_data.iterrows():
106
+ # Convert cmpd_uid to integer, using lib_uid as fallback
107
+ cmpd_uid = row["lib_uid"] # Use lib_uid as integer compound identifier
108
+ try:
109
+ if row.get("lib_id") is not None:
110
+ cmpd_uid = int(float(str(row["lib_id"]))) # Convert to int, handling potential float strings
111
+ except (ValueError, TypeError):
112
+ pass # Keep lib_uid as fallback
113
+
114
+ lib_entry = {
115
+ "lib_uid": row["lib_uid"],
116
+ "cmpd_uid": cmpd_uid, # Integer compound identifier
117
+ "source_id": "LipidOracle", # Fixed source identifier
118
+ "name": row.get("name", None),
119
+ "shortname": row.get("species", None),
120
+ "class": row.get("hg", None),
121
+ "smiles": None, # Not available in Oracle data
122
+ "inchi": None, # Not available in Oracle data
123
+ "inchikey": None, # Not available in Oracle data
124
+ "formula": row.get("formula", None),
125
+ "iso": 0, # Fixed isotope value
126
+ "adduct": row.get("ion", None),
127
+ "probability": row.get("score", None),
128
+ "m": None, # Would need to calculate from formula
129
+ "z": 1 if row.get("ion", "").find("+") != -1 else (-1 if row.get("ion", "").find("-") != -1 else None),
130
+ "mz": row.get("mz", None), # Use mz column from annotation_full.csv
131
+ "rt": None, # Set to null as requested
132
+ "quant_group": None, # Set to null as requested
133
+ "db_id": row.get("lib_id", None),
134
+ "db": row.get("lib", None)
135
+ }
136
+ lib_data.append(lib_entry)
137
+
138
+ self.logger.debug(f"Created {len(lib_data)} lib_data entries")
139
+
140
+ # Create lib_df as Polars DataFrame with error handling for mixed types
141
+ try:
142
+ lib_df_temp = pl.DataFrame(lib_data)
143
+ except Exception as e:
144
+ self.logger.warning(f"Error creating lib_df with polars: {e}")
145
+ # Fallback: convert to pandas first, then to polars
146
+ lib_df_pandas = pd.DataFrame(lib_data)
147
+ lib_df_temp = pl.from_pandas(lib_df_pandas)
148
+
149
+ # Ensure uniqueness by name and adduct combination
150
+ # Sort by lib_uid and keep first occurrence (earliest in processing order)
151
+ self.lib_df = (
152
+ lib_df_temp
153
+ .sort("lib_uid")
154
+ .unique(subset=["name", "adduct"], keep="first")
155
+ )
156
+
157
+ self.logger.info(f"Created lib_df with {len(self.lib_df)} library entries ({len(lib_data) - len(self.lib_df)} duplicates removed)")
158
+
159
+ # === CREATE ID_DF ===
160
+ self.logger.debug("Creating id_df from Oracle identification matches")
161
+
162
+ # Create identification matches
163
+ id_data = []
164
+ for _, row in oracle_data.iterrows():
165
+ # Use dmz from annotation_full.csv directly for mz_delta
166
+ mz_delta = None
167
+ if row.get("dmz") is not None:
168
+ try:
169
+ mz_delta = float(row["dmz"])
170
+ except (ValueError, TypeError):
171
+ pass
172
+
173
+ # Use rt_err from annotation_full.csv for rt_delta, None if NaN
174
+ rt_delta = None
175
+ rt_err_value = row.get("rt_err")
176
+ if rt_err_value is not None and not (isinstance(rt_err_value, float) and pd.isna(rt_err_value)):
177
+ try:
178
+ rt_delta = float(rt_err_value)
179
+ except (ValueError, TypeError):
180
+ pass
181
+
182
+ # Create matcher as "lipidoracle-" + score_metric from annotation_full.csv
183
+ matcher = "lipidoracle" # default fallback
184
+ if row.get("score_metric") is not None:
185
+ try:
186
+ score_metric = str(row["score_metric"])
187
+ matcher = f"lipidoracle-{score_metric}"
188
+ except (ValueError, TypeError):
189
+ pass
190
+
191
+ id_entry = {
192
+ "consensus_uid": row["consensus_uid"],
193
+ "lib_uid": row["lib_uid"],
194
+ "mz_delta": mz_delta,
195
+ "rt_delta": rt_delta,
196
+ "matcher": matcher,
197
+ "score": row.get("score", None)
198
+ }
199
+ id_data.append(id_entry)
200
+
201
+ # Create id_df as Polars DataFrame with error handling
202
+ try:
203
+ id_df_temp = pl.DataFrame(id_data)
204
+ except Exception as e:
205
+ self.logger.warning(f"Error creating id_df with polars: {e}")
206
+ # Fallback: convert to pandas first, then to polars
207
+ id_df_pandas = pd.DataFrame(id_data)
208
+ id_df_temp = pl.from_pandas(id_df_pandas)
209
+
210
+ # Filter id_df to only include lib_uids that exist in the final unique lib_df
211
+ unique_lib_uids = self.lib_df.select("lib_uid").to_series()
212
+ self.id_df = id_df_temp.filter(pl.col("lib_uid").is_in(unique_lib_uids))
213
+
214
+ self.logger.info(f"Created id_df with {len(self.id_df)} identification matches")
215
+
216
+ # === UPDATE CONSENSUS_DF (existing functionality) ===
217
+ self.logger.debug("Updating consensus_df with top identification results")
218
+
219
+ # Convert to polars for efficient joining with error handling
220
+ try:
221
+ oracle_pl = pl.DataFrame(oracle_data)
222
+ except Exception as e:
223
+ self.logger.warning(f"Error converting oracle_data to polars: {e}")
224
+ # Convert using from_pandas properly
225
+ oracle_pl = pl.from_pandas(oracle_data.reset_index(drop=True))
226
+
227
+ # Group by consensus_uid and select the best identification (highest level)
228
+ # In case of ties, take the first one
229
+ best_ids = (
230
+ oracle_pl
231
+ .group_by("consensus_uid")
232
+ .agg([
233
+ pl.col("level").max().alias("max_level")
234
+ ])
235
+ .join(oracle_pl, on="consensus_uid")
236
+ .filter(pl.col("level") == pl.col("max_level"))
237
+ .group_by("consensus_uid")
238
+ .first() # In case of ties, take the first
239
+ )
240
+
241
+ self.logger.debug(f"Selected best identifications for {len(best_ids)} consensus features")
242
+
243
+ # Prepare the identification columns
244
+ id_columns = {
245
+ "id_top_name": best_ids.select("consensus_uid", "name"),
246
+ "id_top_adduct": best_ids.select("consensus_uid", "ion"),
247
+ "id_top_class": best_ids.select("consensus_uid", "hg"),
248
+ "id_top_score": best_ids.select("consensus_uid", pl.col("score").round(3).alias("score")),
249
+ "id_source": best_ids.select(
250
+ "consensus_uid",
251
+ pl.when(pl.col("level") == 1)
252
+ .then(pl.lit("lipidoracle ms1"))
253
+ .otherwise(pl.lit("lipidoracle ms2"))
254
+ .alias("id_source")
255
+ )
256
+ }
257
+
258
+ # Initialize identification columns in consensus_df if they don't exist
259
+ for col_name in id_columns.keys():
260
+ if col_name not in self.consensus_df.columns:
261
+ if col_name == "id_top_score":
262
+ self.consensus_df = self.consensus_df.with_columns(
263
+ pl.lit(None, dtype=pl.Float64).alias(col_name)
264
+ )
265
+ else:
266
+ self.consensus_df = self.consensus_df.with_columns(
267
+ pl.lit(None, dtype=pl.String).alias(col_name)
268
+ )
269
+
270
+ # Update consensus_df with oracle identifications
271
+ for col_name, id_data_col in id_columns.items():
272
+ oracle_column = id_data_col.columns[1] # second column (after consensus_uid)
273
+
274
+ # Create update dataframe
275
+ update_data = id_data_col.rename({oracle_column: col_name})
276
+
277
+ # Join and update
278
+ self.consensus_df = (
279
+ self.consensus_df
280
+ .join(update_data, on="consensus_uid", how="left", suffix="_oracle")
281
+ .with_columns(
282
+ pl.coalesce([f"{col_name}_oracle", col_name]).alias(col_name)
283
+ )
284
+ .drop(f"{col_name}_oracle")
285
+ )
286
+
287
+ # Replace NaN values with None in identification columns
288
+ id_col_names = ["id_top_name", "id_top_adduct", "id_top_class", "id_top_score", "id_source"]
289
+ for col_name in id_col_names:
290
+ if col_name in self.consensus_df.columns:
291
+ # For string columns, replace empty strings and "nan" with None
292
+ if col_name != "id_top_score":
293
+ self.consensus_df = self.consensus_df.with_columns(
294
+ pl.when(
295
+ pl.col(col_name).is_null() |
296
+ (pl.col(col_name) == "") |
297
+ (pl.col(col_name) == "nan") |
298
+ (pl.col(col_name) == "NaN")
299
+ )
300
+ .then(None)
301
+ .otherwise(pl.col(col_name))
302
+ .alias(col_name)
303
+ )
304
+ # For numeric columns, replace NaN with None
305
+ else:
306
+ self.consensus_df = self.consensus_df.with_columns(
307
+ pl.when(pl.col(col_name).is_null() | pl.col(col_name).is_nan())
308
+ .then(None)
309
+ .otherwise(pl.col(col_name))
310
+ .alias(col_name)
311
+ )
312
+
313
+ # Count how many consensus features were updated
314
+ updated_count = self.consensus_df.filter(pl.col("id_top_name").is_not_null()).height
315
+ total_consensus = len(self.consensus_df)
316
+
317
+ self.logger.success(
318
+ f"LipidOracle import completed. {updated_count}/{total_consensus} "
319
+ f"consensus features now have identifications ({updated_count/total_consensus*100:.1f}%)"
320
+ )
321
+
322
+ # Update history
323
+ self.update_history(["import_oracle"], {
324
+ "folder": folder,
325
+ "min_id_level": min_id_level,
326
+ "max_id_level": max_id_level,
327
+ "updated_features": updated_count,
328
+ "total_features": total_consensus,
329
+ "lib_entries": len(self.lib_df),
330
+ "id_matches": len(self.id_df)
331
+ })
masster/study/merge.py CHANGED
@@ -1792,6 +1792,7 @@ def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_
1792
1792
  "id_top_class": None,
1793
1793
  "id_top_adduct": None,
1794
1794
  "id_top_score": None,
1795
+ "id_source": None,
1795
1796
  }
1796
1797
 
1797
1798
 
@@ -2194,6 +2195,7 @@ def _extract_consensus_features(study, consensus_map, min_samples, cached_adduct
2194
2195
  "id_top_class": None,
2195
2196
  "id_top_adduct": None,
2196
2197
  "id_top_score": None,
2198
+ "id_source": None,
2197
2199
  },
2198
2200
  )
2199
2201
 
@@ -3021,7 +3023,7 @@ def __identify_adduct_by_mass_shift(study, rt_tol, cached_adducts_df=None):
3021
3023
  pl.Series("adduct_mass_neutral_top", new_adduct_mass_neutral_top),
3022
3024
  pl.Series("adduct_mass_shift_top", new_adduct_mass_shift_top)
3023
3025
  ])
3024
- study.logger.success(f"Adduct information updated for {updated_count} consensus features.")
3026
+ study.logger.info(f"Adduct information updated for {updated_count} consensus features.")
3025
3027
  else:
3026
3028
  study.logger.debug("No consensus features updated based on mass shift analysis")
3027
3029
 
masster/study/plot.py CHANGED
@@ -630,6 +630,8 @@ def plot_consensus_2d(
630
630
  height=450,
631
631
  mz_range=None,
632
632
  rt_range=None,
633
+ legend="bottom_right",
634
+ show_none=True,
633
635
  ):
634
636
  """
635
637
  Plot consensus features in a 2D scatter plot with retention time vs m/z.
@@ -652,6 +654,10 @@ def plot_consensus_2d(
652
654
  height (int): Plot height in pixels (default: 900)
653
655
  mz_range (tuple, optional): m/z range for filtering consensus features (min_mz, max_mz)
654
656
  rt_range (tuple, optional): Retention time range for filtering consensus features (min_rt, max_rt)
657
+ legend (str, optional): Legend position for categorical data. Options: 'top_right', 'top_left',
658
+ 'bottom_right', 'bottom_left', 'right', 'left', 'top', 'bottom'.
659
+ If None, legend is hidden. Only applies to categorical coloring (default: "bottom_right")
660
+ show_none (bool): Whether to display points with None values for colorby column (default: True)
655
661
  """
656
662
  if self.consensus_df is None:
657
663
  self.logger.error("No consensus map found.")
@@ -730,6 +736,10 @@ def plot_consensus_2d(
730
736
  from bokeh.models.annotations import ColorBar
731
737
  from bokeh.palettes import viridis, Category20
732
738
 
739
+ # Filter out None values for colorby column if show_none=False
740
+ if not show_none and colorby in data.columns:
741
+ data = data.filter(pl.col(colorby).is_not_null())
742
+
733
743
  # Convert Polars DataFrame to pandas for Bokeh compatibility
734
744
  data_pd = data.to_pandas()
735
745
  source = ColumnDataSource(data_pd)
@@ -783,13 +793,20 @@ def plot_consensus_2d(
783
793
  # Sorting would break the correspondence between legend labels and point colors
784
794
  unique_values = [v for v in data_pd[colorby].unique() if v is not None]
785
795
 
786
- if len(unique_values) <= 20:
787
- palette = Category20[min(20, max(3, len(unique_values)))]
796
+ # Use the custom palette from cmap if available, otherwise fall back to defaults
797
+ if len(palette) >= len(unique_values):
798
+ # Use custom colormap palette - sample evenly across the palette
799
+ import numpy as np
800
+ indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
801
+ categorical_palette = [palette[i] for i in indices]
802
+ elif len(unique_values) <= 20:
803
+ # Fall back to Category20 if custom palette is too small
804
+ categorical_palette = Category20[min(20, max(3, len(unique_values)))]
788
805
  else:
789
806
  # For many categories, use a subset of the viridis palette
790
- palette = viridis(min(256, len(unique_values)))
807
+ categorical_palette = viridis(min(256, len(unique_values)))
791
808
 
792
- color_mapper = factor_cmap(colorby, palette, unique_values)
809
+ color_mapper = factor_cmap(colorby, categorical_palette, unique_values)
793
810
  else:
794
811
  # Handle numeric coloring with LinearColorMapper
795
812
  color_mapper = LinearColorMapper(
@@ -809,21 +826,65 @@ def plot_consensus_2d(
809
826
  if is_categorical:
810
827
  # For categorical data, create separate renderers for each category
811
828
  # This enables proper legend interactivity where each category can be toggled independently
812
- unique_values = [v for v in data_pd[colorby].unique() if v is not None]
829
+ all_unique_values = list(data_pd[colorby].unique())
830
+ unique_values = [v for v in all_unique_values if v is not None]
831
+ has_none_values = None in all_unique_values
813
832
 
814
- if len(unique_values) <= 20:
815
- palette = Category20[min(20, max(3, len(unique_values)))]
833
+ # Use the custom palette from cmap if available, otherwise fall back to defaults
834
+ if len(palette) >= len(unique_values):
835
+ # Use custom colormap palette - sample evenly across the palette
836
+ import numpy as np
837
+ indices = np.linspace(0, len(palette) - 1, len(unique_values)).astype(int)
838
+ categorical_palette = [palette[i] for i in indices]
839
+ elif len(unique_values) <= 20:
840
+ # Fall back to Category20 if custom palette is too small
841
+ categorical_palette = Category20[min(20, max(3, len(unique_values)))]
816
842
  else:
817
- palette = viridis(min(256, len(unique_values)))
843
+ categorical_palette = viridis(min(256, len(unique_values)))
818
844
 
819
- # Create a separate renderer for each category
845
+ # Handle None values with black color FIRST so they appear in the background
846
+ if has_none_values and show_none:
847
+ # Filter data for None values
848
+ none_data = data.filter(pl.col(colorby).is_null())
849
+ none_data_pd = none_data.to_pandas()
850
+ none_source = bp.ColumnDataSource(none_data_pd)
851
+
852
+ if scaling.lower() in ["dyn", "dynamic"]:
853
+ # Calculate appropriate radius for dynamic scaling
854
+ rt_range = data["rt"].max() - data["rt"].min()
855
+ mz_range = data["mz"].max() - data["mz"].min()
856
+ dynamic_radius = min(rt_range, mz_range) * 0.0005 * markersize
857
+
858
+ renderer = p.circle(
859
+ x="rt",
860
+ y="mz",
861
+ radius=dynamic_radius,
862
+ fill_color="lightgray",
863
+ line_color=None,
864
+ alpha=alpha,
865
+ source=none_source,
866
+ legend_label="None",
867
+ )
868
+ else:
869
+ renderer = p.scatter(
870
+ x="rt",
871
+ y="mz",
872
+ size="markersize",
873
+ fill_color="lightgray",
874
+ line_color=None,
875
+ alpha=alpha,
876
+ source=none_source,
877
+ legend_label="None",
878
+ )
879
+
880
+ # Create a separate renderer for each non-None category (plotted on top of None values)
820
881
  for i, category in enumerate(unique_values):
821
882
  # Filter data for this category
822
883
  category_data = data.filter(pl.col(colorby) == category)
823
884
  category_data_pd = category_data.to_pandas()
824
885
  category_source = bp.ColumnDataSource(category_data_pd)
825
886
 
826
- color = palette[i % len(palette)]
887
+ color = categorical_palette[i % len(categorical_palette)]
827
888
 
828
889
  if scaling.lower() in ["dyn", "dynamic"]:
829
890
  # Calculate appropriate radius for dynamic scaling
@@ -892,33 +953,19 @@ def plot_consensus_2d(
892
953
  ("number_samples", "@number_samples"),
893
954
  ("number_ms2", "@number_ms2"),
894
955
  ("inty_mean", "@inty_mean"),
895
- ("coherence_mean", "@chrom_coherence_mean"),
896
- ("prominence_scaled_mean", "@chrom_prominence_scaled_mean"),
897
956
  ]
898
957
 
899
- # Add adduct_top if it exists in data
900
- if "adduct_top" in data.columns:
901
- tooltips.append(("adduct_top", "@adduct_top"))
902
-
903
- # Add id_top_name if it exists in data
904
- if "id_top_name" in data.columns:
905
- tooltips.append(("id_top_name", "@id_top_name"))
906
-
907
- # Add id_top_adduct if it exists in data
908
- if "id_top_adduct" in data.columns:
909
- tooltips.append(("id_top_adduct", "@id_top_adduct"))
910
-
911
958
  # Add id_top_* columns if they exist and have non-null values
912
- id_top_columns = ["id_top_name", "id_top_class", "id_top_adduct", "id_top_score"]
959
+ id_top_columns = ["id_top_name", "id_top_adduct", "id_top_class", "id_top_score"]
913
960
  for col in id_top_columns:
914
961
  if col in data.columns:
915
962
  # Check if the column has any non-null values
916
963
  if data.filter(pl.col(col).is_not_null()).height > 0:
917
964
  # Format score column with decimal places, others as strings
918
965
  if col == "id_top_score":
919
- tooltips.append((col.replace("id_top_", "id_"), f"@{col}{{0.0000}}"))
966
+ tooltips.append((col, f"@{col}{{0.0}}"))
920
967
  else:
921
- tooltips.append((col.replace("id_top_", "id_"), f"@{col}"))
968
+ tooltips.append((col, f"@{col}"))
922
969
 
923
970
  hover = HoverTool(
924
971
  tooltips=tooltips,
@@ -942,8 +989,25 @@ def plot_consensus_2d(
942
989
  p.add_layout(color_bar, "right")
943
990
  else:
944
991
  # For categorical data, configure the legend that was automatically created
945
- p.legend.location = "top_right"
946
- p.legend.click_policy = "hide"
992
+ if legend is not None:
993
+ # Map legend position parameter to Bokeh legend position
994
+ legend_position_map = {
995
+ "top_right": "top_right",
996
+ "top_left": "top_left",
997
+ "bottom_right": "bottom_right",
998
+ "bottom_left": "bottom_left",
999
+ "right": "right",
1000
+ "left": "left",
1001
+ "top": "top",
1002
+ "bottom": "bottom"
1003
+ }
1004
+
1005
+ bokeh_legend_pos = legend_position_map.get(legend, "bottom_right")
1006
+ p.legend.location = bokeh_legend_pos
1007
+ p.legend.click_policy = "hide"
1008
+ else:
1009
+ # Hide legend when legend=None
1010
+ p.legend.visible = False
947
1011
 
948
1012
  if filename is not None:
949
1013
  # Convert relative paths to absolute paths using study folder as base
masster/study/study.py CHANGED
@@ -109,6 +109,7 @@ from masster.study.parameters import set_parameters_property
109
109
  from masster.study.save import save, save_consensus, save_samples
110
110
  from masster.study.export import export_mgf, export_mztab, export_xlsx, export_parquet
111
111
  from masster.study.id import lib_load, identify, get_id, id_reset, lib_reset, _get_adducts
112
+ from masster.study.importers import import_oracle
112
113
 
113
114
  from masster.logger import MassterLogger
114
115
  from masster.study.defaults.study_def import study_defaults
@@ -454,6 +455,9 @@ class Study:
454
455
  reset_id = id_reset
455
456
  lib_reset = lib_reset
456
457
  reset_lib = lib_reset
458
+
459
+ # === Oracle Import Operations ===
460
+ import_oracle = import_oracle
457
461
 
458
462
  # === Parameter Management ===
459
463
  update_history = update_history
@@ -114,6 +114,9 @@
114
114
  },
115
115
  "id_top_score": {
116
116
  "dtype": "pl.Float64"
117
+ },
118
+ "id_source": {
119
+ "dtype": "pl.String"
117
120
  }
118
121
  }
119
122
  },
@@ -318,6 +321,12 @@
318
321
  "name": {
319
322
  "dtype": "pl.String"
320
323
  },
324
+ "shortname": {
325
+ "dtype": "pl.String"
326
+ },
327
+ "class": {
328
+ "dtype": "pl.String"
329
+ },
321
330
  "smiles": {
322
331
  "dtype": "pl.String"
323
332
  },
@@ -336,6 +345,9 @@
336
345
  "adduct": {
337
346
  "dtype": "pl.String"
338
347
  },
348
+ "probability": {
349
+ "dtype": "pl.Float64"
350
+ },
339
351
  "m": {
340
352
  "dtype": "pl.Float64"
341
353
  },
@@ -5,13 +5,13 @@ This module provides the Wizard class for fully automated processing of MS data
5
5
  from raw files to final study results, including batch conversion, assembly,
6
6
  alignment, merging, plotting, and export.
7
7
 
8
- The create_script() function allows immediate generation of standalone analysis
8
+ The create_analysis() function allows immediate generation of standalone analysis
9
9
  scripts without creating a Wizard instance first.
10
10
 
11
- The execute() function combines create_script() with immediate execution of the
11
+ The analyze() function combines create_analysis() with immediate execution of the
12
12
  generated script for fully automated processing.
13
13
  """
14
14
 
15
- from .wizard import Wizard, wizard_def, create_script, execute
15
+ from .wizard import Wizard, wizard_def, create_analysis, create_notebook, analyze
16
16
 
17
- __all__ = ["Wizard", "wizard_def", "create_script", "execute"]
17
+ __all__ = ["Wizard", "wizard_def", "create_analysis", "create_notebook", "analyze"]