masster 0.4.21__py3-none-any.whl → 0.4.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/_version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.4.21"
4
+ __version__ = "0.4.22"
5
5
 
6
6
 
7
7
  def get_version():
masster/sample/save.py CHANGED
@@ -344,7 +344,6 @@ def export_mgf(
344
344
  tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
345
345
 
346
346
  # First pass: Export MS1 spectra for ALL features with ms1_spec data
347
- print("Exporting MS1 spectra...")
348
347
  for row in tqdm(
349
348
  features_list,
350
349
  total=len(features_list),
@@ -398,7 +397,6 @@ def export_mgf(
398
397
  ms1_fallback_count += 1
399
398
 
400
399
  # Second pass: Export MS2 spectra for features with MS2 data
401
- print("Exporting MS2 spectra...")
402
400
  for row in tqdm(
403
401
  features_list,
404
402
  total=len(features_list),
masster/study/export.py CHANGED
@@ -498,7 +498,7 @@ def export_mgf(self, **kwargs):
498
498
  self.logger.info(f"Exported {len(mgf_data)} spectra to {filename}")
499
499
 
500
500
 
501
- def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None:
501
+ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs) -> None:
502
502
  """
503
503
  Export the study as a fully compliant mzTab-M file.
504
504
 
@@ -1184,7 +1184,7 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
1184
1184
  self.logger.info(f"Exported mzTab-M to {filename}")
1185
1185
 
1186
1186
 
1187
- def export_xlsx(self, filename: str = None) -> None:
1187
+ def export_xlsx(self, filename: str | None = None) -> None:
1188
1188
  """
1189
1189
  Export the study data to an Excel workbook with multiple worksheets.
1190
1190
 
@@ -1295,7 +1295,7 @@ def export_xlsx(self, filename: str = None) -> None:
1295
1295
  self.logger.error(f"Error writing Excel file: {e}")
1296
1296
 
1297
1297
 
1298
- def export_parquet(self, filename: str = None) -> None:
1298
+ def export_parquet(self, filename: str | None = None) -> None:
1299
1299
  """
1300
1300
  Export the study data to multiple Parquet files with different suffixes.
1301
1301
 
masster/study/load.py CHANGED
@@ -1257,17 +1257,53 @@ def load_features(self):
1257
1257
  feature_map = oms.FeatureMap()
1258
1258
 
1259
1259
  # Convert DataFrame features to OpenMS Features
1260
+ # Keep track of next available feature_id for this sample
1261
+ next_feature_id = 1
1262
+ used_feature_ids = set()
1263
+
1264
+ # First pass: collect existing feature_ids to avoid conflicts
1265
+ for feature_row in sample_features.iter_rows(named=True):
1266
+ if feature_row["feature_id"] is not None:
1267
+ used_feature_ids.add(int(feature_row["feature_id"]))
1268
+
1269
+ # Find the next available feature_id
1270
+ while next_feature_id in used_feature_ids:
1271
+ next_feature_id += 1
1272
+
1260
1273
  for feature_row in sample_features.iter_rows(named=True):
1261
1274
  feature = oms.Feature()
1262
1275
 
1263
1276
  # Set properties from DataFrame (handle missing values gracefully)
1264
1277
  try:
1265
- feature.setUniqueId(int(feature_row["feature_id"]))
1278
+ # Skip features with missing critical data
1279
+ if feature_row["mz"] is None:
1280
+ self.logger.warning("Skipping feature due to missing mz")
1281
+ continue
1282
+ if feature_row["rt"] is None:
1283
+ self.logger.warning("Skipping feature due to missing rt")
1284
+ continue
1285
+ if feature_row["inty"] is None:
1286
+ self.logger.warning("Skipping feature due to missing inty")
1287
+ continue
1288
+
1289
+ # Handle missing feature_id by generating a new one
1290
+ if feature_row["feature_id"] is None:
1291
+ feature_id = next_feature_id
1292
+ next_feature_id += 1
1293
+ self.logger.debug(f"Generated new feature_id {feature_id} for feature with missing ID")
1294
+ else:
1295
+ feature_id = int(feature_row["feature_id"])
1296
+
1297
+ feature.setUniqueId(feature_id)
1266
1298
  feature.setMZ(float(feature_row["mz"]))
1267
1299
  feature.setRT(float(feature_row["rt"]))
1268
1300
  feature.setIntensity(float(feature_row["inty"]))
1269
- feature.setOverallQuality(float(feature_row["quality"]))
1270
- feature.setCharge(int(feature_row["charge"]))
1301
+
1302
+ # Handle optional fields that might be None
1303
+ if feature_row.get("quality") is not None:
1304
+ feature.setOverallQuality(float(feature_row["quality"]))
1305
+ if feature_row.get("charge") is not None:
1306
+ feature.setCharge(int(feature_row["charge"]))
1271
1307
 
1272
1308
  # Add to feature map
1273
1309
  feature_map.push_back(feature)
masster/study/plot.py CHANGED
@@ -1724,221 +1724,154 @@ def plot_consensus_stats(
1724
1724
  self,
1725
1725
  filename=None,
1726
1726
  width=1200,
1727
- height=1200,
1727
+ height=None,
1728
1728
  alpha=0.6,
1729
- markersize=3,
1729
+ bins=30,
1730
+ n_cols=4,
1730
1731
  ):
1731
1732
  """
1732
- Plot a scatter plot matrix (SPLOM) of consensus statistics using Bokeh.
1733
-
1733
+ Plot histograms/distributions for all numeric columns in consensus_df.
1734
+
1734
1735
  Parameters:
1735
1736
  filename (str, optional): Output filename for saving the plot
1736
1737
  width (int): Overall width of the plot (default: 1200)
1737
- height (int): Overall height of the plot (default: 1200)
1738
- alpha (float): Point transparency (default: 0.6)
1739
- markersize (int): Size of points (default: 5)
1738
+ height (int, optional): Overall height of the plot (auto-calculated if None)
1739
+ alpha (float): Histogram transparency (default: 0.6)
1740
+ bins (int): Number of histogram bins (default: 30)
1741
+ n_cols (int): Number of columns in the grid layout (default: 4)
1740
1742
  """
1741
1743
  from bokeh.layouts import gridplot
1742
- from bokeh.models import ColumnDataSource, HoverTool
1743
- from bokeh.plotting import figure, show, output_file
1744
+ from bokeh.plotting import figure
1745
+ import polars as pl
1746
+ import numpy as np
1744
1747
 
1745
1748
  # Check if consensus_df exists and has data
1746
1749
  if self.consensus_df is None or self.consensus_df.is_empty():
1747
1750
  self.logger.error("No consensus data available. Run merge/find_consensus first.")
1748
1751
  return
1749
1752
 
1750
- # Define the columns to plot
1751
- columns = [
1752
- "rt",
1753
- "mz",
1754
- "number_samples",
1755
- "log10_quality",
1756
- "mz_delta_mean",
1757
- "rt_delta_mean",
1758
- "chrom_coherence_mean",
1759
- "chrom_prominence_scaled_mean",
1760
- "inty_mean",
1761
- "number_ms2",
1762
- ]
1763
-
1764
- # Check which columns exist in the dataframe and compute missing ones
1765
- available_columns = self.consensus_df.columns
1753
+ # Get all columns and their data types - work with original dataframe
1766
1754
  data_df = self.consensus_df.clone()
1767
1755
 
1768
- # Add log10_quality if quality exists
1769
- if "quality" in available_columns and "log10_quality" not in available_columns:
1770
- data_df = data_df.with_columns(
1771
- pl.col("quality").log10().alias("log10_quality"),
1772
- )
1773
-
1774
- # Filter columns that actually exist
1775
- final_columns = [col for col in columns if col in data_df.columns]
1776
-
1777
- if len(final_columns) < 2:
1778
- self.logger.error(f"Need at least 2 columns for SPLOM. Available: {final_columns}")
1756
+ # Identify numeric columns (excluding ID columns that are typically strings)
1757
+ id_columns = ["consensus_uid", "consensus_id", "uid", "id"]
1758
+ numeric_columns = []
1759
+
1760
+ for col in data_df.columns:
1761
+ if col not in id_columns:
1762
+ dtype = data_df[col].dtype
1763
+ # Check if column is numeric (int, float, or can be converted to numeric)
1764
+ if dtype in [pl.Int8, pl.Int16, pl.Int32, pl.Int64,
1765
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
1766
+ pl.Float32, pl.Float64]:
1767
+ numeric_columns.append(col)
1768
+
1769
+ if len(numeric_columns) == 0:
1770
+ self.logger.error("No numeric columns found in consensus_df for plotting distributions.")
1779
1771
  return
1780
1772
 
1781
- self.logger.debug(f"Creating SPLOM with columns: {final_columns}")
1782
-
1783
- # Add important ID columns for tooltips even if not plotting them
1784
- tooltip_columns = []
1785
- for id_col in ["consensus_uid", "consensus_id"]:
1786
- if id_col in data_df.columns and id_col not in final_columns:
1787
- tooltip_columns.append(id_col)
1773
+ self.logger.debug(f"Creating distribution plots for {len(numeric_columns)} numeric columns: {numeric_columns}")
1788
1774
 
1789
- # Select plotting columns plus tooltip columns
1790
- all_columns = final_columns + tooltip_columns
1791
- data_pd = data_df.select(all_columns).to_pandas()
1775
+ # Work directly with Polars - no conversion to pandas needed
1776
+ data_df_clean = data_df.select(numeric_columns)
1792
1777
 
1793
- # Remove any infinite or NaN values
1794
- data_pd = data_pd.replace([np.inf, -np.inf], np.nan).dropna()
1795
-
1796
- if data_pd.empty:
1797
- self.logger.error("No valid data after removing NaN/infinite values.")
1778
+ # Check if all numeric columns are empty
1779
+ all_columns_empty = True
1780
+ for col in numeric_columns:
1781
+ # Check if column has any non-null, finite values
1782
+ non_null_count = data_df_clean[col].filter(
1783
+ data_df_clean[col].is_not_null() &
1784
+ (data_df_clean[col].is_finite() if data_df_clean[col].dtype in [pl.Float32, pl.Float64] else pl.lit(True))
1785
+ ).len()
1786
+
1787
+ if non_null_count > 0:
1788
+ all_columns_empty = False
1789
+ break
1790
+
1791
+ if all_columns_empty:
1792
+ self.logger.error("All numeric columns contain only NaN/infinite values.")
1798
1793
  return
1799
1794
 
1800
- source = ColumnDataSource(data_pd)
1801
-
1802
- n_vars = len(final_columns)
1803
-
1804
- # Fixed dimensions - override user input to ensure consistent layout
1805
- total_width = 1200
1806
- total_height = 1200
1807
-
1808
- # Calculate plot sizes to ensure uniform inner plot areas
1809
- # First column needs extra width for y-axis labels
1810
- plot_width_first = 180 # Wider to account for y-axis labels
1811
- plot_width_others = 120 # Standard width for other columns
1812
- plot_height_normal = 120 # Standard height
1813
- plot_height_last = 155 # Taller last row to accommodate x-axis labels while keeping inner plot area same size
1795
+ # Calculate grid dimensions
1796
+ n_plots = len(numeric_columns)
1797
+ n_rows = (n_plots + n_cols - 1) // n_cols # Ceiling division
1798
+
1799
+ # Auto-calculate height if not provided
1800
+ if height is None:
1801
+ plot_height = 300
1802
+ height = plot_height * n_rows + 100 # Add some padding
1803
+ else:
1804
+ plot_height = (height - 100) // n_rows # Subtract padding and divide
1805
+
1806
+ plot_width = (width - 100) // n_cols # Subtract padding and divide
1814
1807
 
1815
- # Create grid of plots with variable outer sizes but equal inner areas
1808
+ # Create plots grid
1816
1809
  plots = []
1817
-
1818
- for i, y_var in enumerate(final_columns):
1819
- row = []
1820
- for j, x_var in enumerate(final_columns):
1821
- # Determine if this plot needs axis labels
1822
- has_x_label = i == n_vars - 1 # bottom row
1823
- has_y_label = j == 0 # left column
1824
-
1825
- # First column wider to accommodate y-axis labels, ensuring equal inner plot areas
1826
- current_width = plot_width_first if has_y_label else plot_width_others
1827
- current_height = plot_height_last if has_x_label else plot_height_normal
1828
-
1829
- p = figure(
1830
- width=current_width,
1831
- height=current_height,
1832
- title=None, # No title on any plot
1833
- toolbar_location=None,
1834
- # Adjusted borders - first column has more space, others minimal
1835
- min_border_left=70 if has_y_label else 15,
1836
- min_border_bottom=50 if has_x_label else 15,
1837
- min_border_right=15,
1838
- min_border_top=15,
1839
- )
1840
-
1841
- # Ensure subplot background and border are explicitly white so the plot looks
1842
- # correct in dark and light themes.
1843
- p.outline_line_color = None
1844
- p.border_fill_color = "white"
1845
- p.border_fill_alpha = 1.0
1846
- p.background_fill_color = "white"
1847
-
1848
- # Remove axis lines to eliminate black lines between plots
1849
- p.xaxis.axis_line_color = None
1850
- p.yaxis.axis_line_color = None
1851
-
1852
- # Keep subtle grid lines for data reference
1853
- p.grid.visible = True
1854
- p.grid.grid_line_color = "#E0E0E0" # Light gray grid lines
1855
-
1856
- # Set axis labels and formatting
1857
- if has_x_label: # bottom row
1858
- p.xaxis.axis_label = x_var
1859
- p.xaxis.axis_label_text_font_size = "12pt"
1860
- p.xaxis.major_label_text_font_size = "9pt"
1861
- p.xaxis.axis_label_standoff = 15
1862
- else:
1863
- p.xaxis.major_label_text_font_size = "0pt"
1864
- p.xaxis.minor_tick_line_color = None
1865
- p.xaxis.major_tick_line_color = None
1866
-
1867
- if has_y_label: # left column
1868
- p.yaxis.axis_label = y_var
1869
- p.yaxis.axis_label_text_font_size = "10pt" # Smaller y-axis title
1870
- p.yaxis.major_label_text_font_size = "8pt"
1871
- p.yaxis.axis_label_standoff = 12
1872
- else:
1873
- p.yaxis.major_label_text_font_size = "0pt"
1874
- p.yaxis.minor_tick_line_color = None
1875
- p.yaxis.major_tick_line_color = None
1876
-
1877
- if i == j:
1878
- # Diagonal: histogram
1879
- hist, edges = np.histogram(data_pd[x_var], bins=30)
1880
- p.quad(
1881
- top=hist,
1882
- bottom=0,
1883
- left=edges[:-1],
1884
- right=edges[1:],
1885
- fill_color="green",
1886
- line_color="white",
1887
- alpha=alpha,
1888
- )
1889
- else:
1890
- # Off-diagonal: scatter plot
1891
- scatter = p.scatter(
1892
- x=x_var,
1893
- y=y_var,
1894
- size=markersize,
1895
- alpha=alpha,
1896
- color="blue",
1897
- source=source,
1898
- )
1899
-
1900
- # Add hover tool
1901
- hover = HoverTool(
1902
- tooltips=[
1903
- (x_var, f"@{x_var}{{0.0000}}"),
1904
- (y_var, f"@{y_var}{{0.0000}}"),
1905
- (
1906
- "consensus_uid",
1907
- "@consensus_uid"
1908
- if "consensus_uid" in data_pd.columns
1909
- else "@consensus_id"
1910
- if "consensus_id" in data_pd.columns
1911
- else "N/A",
1912
- ),
1913
- ("rt", "@rt{0.00}" if "rt" in data_pd.columns else "N/A"),
1914
- ("mz", "@mz{0.0000}" if "mz" in data_pd.columns else "N/A"),
1915
- ],
1916
- renderers=[scatter],
1917
- )
1918
- p.add_tools(hover)
1919
-
1920
- row.append(p)
1921
- plots.append(row)
1922
-
1923
- # Link axes for same variables
1924
- for i in range(n_vars):
1925
- for j in range(n_vars):
1926
- if i != j: # Don't link diagonal plots
1927
- # Link x-axis to other plots in same column
1928
- for k in range(n_vars):
1929
- if k != i and k != j:
1930
- plots[i][j].x_range = plots[k][j].x_range
1931
-
1932
- # Link y-axis to other plots in same row
1933
- for k in range(n_vars):
1934
- if k != j and k != i:
1935
- plots[i][j].y_range = plots[i][k].y_range
1936
-
1937
- # Create grid layout and force overall background/border to white so the outer
1938
- # container doesn't show dark UI colors in night mode.
1810
+ current_row = []
1811
+
1812
+ for i, col in enumerate(numeric_columns):
1813
+ # Create histogram for this column
1814
+ p = figure(
1815
+ width=plot_width,
1816
+ height=plot_height,
1817
+ title=col,
1818
+ toolbar_location="above",
1819
+ tools="pan,wheel_zoom,box_zoom,reset,save"
1820
+ )
1821
+
1822
+ # Set white background
1823
+ p.background_fill_color = "white"
1824
+ p.border_fill_color = "white"
1825
+
1826
+ # Calculate histogram using Polars
1827
+ # Get valid (non-null, finite) values for this column
1828
+ if data_df_clean[col].dtype in [pl.Float32, pl.Float64]:
1829
+ valid_values = data_df_clean.filter(
1830
+ data_df_clean[col].is_not_null() & data_df_clean[col].is_finite()
1831
+ )[col]
1832
+ else:
1833
+ valid_values = data_df_clean.filter(data_df_clean[col].is_not_null())[col]
1834
+
1835
+ if valid_values.len() == 0:
1836
+ self.logger.warning(f"No valid values for column {col}")
1837
+ continue
1838
+
1839
+ # Convert to numpy for histogram calculation
1840
+ values_array = valid_values.to_numpy()
1841
+ hist, edges = np.histogram(values_array, bins=bins)
1842
+
1843
+ # Create histogram bars
1844
+ p.quad(
1845
+ top=hist,
1846
+ bottom=0,
1847
+ left=edges[:-1],
1848
+ right=edges[1:],
1849
+ fill_color="steelblue",
1850
+ line_color="white",
1851
+ alpha=alpha,
1852
+ )
1853
+
1854
+ # Style the plot
1855
+ p.title.text_font_size = "12pt"
1856
+ p.xaxis.axis_label = col
1857
+ p.yaxis.axis_label = "Count"
1858
+ p.grid.visible = True
1859
+ p.grid.grid_line_color = "#E0E0E0"
1860
+
1861
+ current_row.append(p)
1862
+
1863
+ # If we've filled a row or reached the end, add the row to plots
1864
+ if len(current_row) == n_cols or i == n_plots - 1:
1865
+ # Fill remaining spots in the last row with None if needed
1866
+ while len(current_row) < n_cols and i == n_plots - 1:
1867
+ current_row.append(None)
1868
+ plots.append(current_row)
1869
+ current_row = []
1870
+
1871
+ # Create grid layout
1939
1872
  grid = gridplot(plots)
1940
-
1941
- # Set overall background and border to white when supported
1873
+
1874
+ # Set overall background to white
1942
1875
  if hasattr(grid, "background_fill_color"):
1943
1876
  grid.background_fill_color = "white"
1944
1877
  if hasattr(grid, "border_fill_color"):
@@ -15,6 +15,85 @@ from masster.study.defaults import (
15
15
  )
16
16
 
17
17
 
18
+ def _generate_feature_maps_on_demand_for_align(study):
19
+ """
20
+ Generate feature maps on-demand from study.features_df for alignment operations.
21
+ Returns temporary feature maps that are not cached in the study.
22
+
23
+ Args:
24
+ study: Study object containing features_df and samples_df
25
+
26
+ Returns:
27
+ list: List of temporary FeatureMap objects
28
+ """
29
+ import polars as pl
30
+ import pyopenms as oms
31
+
32
+ if study.features_df is None or len(study.features_df) == 0:
33
+ study.logger.error("No features_df available for generating feature maps")
34
+ return []
35
+
36
+ temp_feature_maps = []
37
+
38
+ # Process each sample in order
39
+ for sample_index, row_dict in enumerate(study.samples_df.iter_rows(named=True)):
40
+ sample_uid = row_dict["sample_uid"]
41
+ sample_name = row_dict["sample_name"]
42
+
43
+ # Get features for this sample from features_df
44
+ sample_features = study.features_df.filter(pl.col("sample_uid") == sample_uid)
45
+
46
+ # Create new FeatureMap
47
+ feature_map = oms.FeatureMap()
48
+
49
+ # Convert DataFrame features to OpenMS Features
50
+ for feature_row in sample_features.iter_rows(named=True):
51
+ feature = oms.Feature()
52
+
53
+ # Set properties from DataFrame (handle missing values gracefully)
54
+ try:
55
+ # Skip features with missing critical data
56
+ if feature_row["mz"] is None:
57
+ study.logger.warning("Skipping feature due to missing mz")
58
+ continue
59
+ if feature_row["rt"] is None:
60
+ study.logger.warning("Skipping feature due to missing rt")
61
+ continue
62
+ if feature_row["inty"] is None:
63
+ study.logger.warning("Skipping feature due to missing inty")
64
+ continue
65
+
66
+ # Handle missing feature_id by generating a new one
67
+ if feature_row["feature_id"] is None:
68
+ # Use a simple incremental ID for alignment purposes
69
+ feature_id = len(temp_feature_maps) * 100000 + feature_map.size() + 1
70
+ study.logger.debug(f"Generated new feature_id {feature_id} for feature with missing ID in sample {sample_name}")
71
+ else:
72
+ feature_id = int(feature_row["feature_id"])
73
+
74
+ feature.setUniqueId(feature_id)
75
+ feature.setMZ(float(feature_row["mz"]))
76
+ feature.setRT(float(feature_row["rt"]))
77
+ feature.setIntensity(float(feature_row["inty"]))
78
+
79
+ # Handle optional fields that might be None
80
+ if feature_row.get("quality") is not None:
81
+ feature.setOverallQuality(float(feature_row["quality"]))
82
+ if feature_row.get("charge") is not None:
83
+ feature.setCharge(int(feature_row["charge"]))
84
+
85
+ # Add to feature map
86
+ feature_map.push_back(feature)
87
+ except (ValueError, TypeError) as e:
88
+ study.logger.warning(f"Skipping feature due to conversion error: {e}")
89
+ continue
90
+
91
+ temp_feature_maps.append(feature_map)
92
+
93
+ study.logger.debug(f"Generated {len(temp_feature_maps)} temporary feature maps from features_df for alignment")
94
+ return temp_feature_maps
95
+
96
+
18
97
  def align(self, **kwargs):
19
98
  """Align feature maps using pose clustering or KD algorithm and update feature RTs.
20
99
 
@@ -90,13 +169,9 @@ def align(self, **kwargs):
90
169
  self.store_history(["align"], params.to_dict())
91
170
  self.logger.debug("Parameters stored to align")
92
171
 
93
- if len(self.features_maps) < len(self.samples_df):
94
- self.features_maps = []
95
- self.load_features()
96
-
97
- # self.logger.debug("Starting alignment")
98
-
99
- fmaps = self.features_maps
172
+ # Generate temporary feature maps on-demand from features_df instead of using cached data
173
+ self.logger.debug("Generating feature maps on-demand from features_df for alignment")
174
+ fmaps = _generate_feature_maps_on_demand_for_align(self)
100
175
 
101
176
  # Choose alignment algorithm
102
177
  algorithm = params.get("algorithm").lower()
@@ -108,6 +183,9 @@ def align(self, **kwargs):
108
183
  _align_kd_algorithm(self, fmaps, params)
109
184
  else:
110
185
  self.logger.error(f"Unknown alignment algorithm '{algorithm}'")
186
+ # Clean up temporary feature maps to release memory
187
+ del fmaps
188
+ return
111
189
 
112
190
  # check if rt_original exists in features_df, if not, add it after rt
113
191
  if "rt_original" not in self.features_df.columns:
@@ -256,6 +334,10 @@ def align(self, **kwargs):
256
334
  if params.get("save_features"):
257
335
  self.save_samples()
258
336
 
337
+ # Clean up temporary feature maps to release memory
338
+ del fmaps
339
+ self.logger.debug("Temporary feature maps deleted to release memory")
340
+
259
341
 
260
342
  def find_ms2(self, **kwargs):
261
343
  """
@@ -787,10 +869,22 @@ def _align_pose_clustering(study_obj, fmaps, params):
787
869
  and study_obj.samples_df.row(index, named=True)["sample_type"] == "blank"
788
870
  ):
789
871
  continue
790
- trafo = oms.TransformationDescription()
791
- aligner.align(fm, trafo)
792
- transformer = oms.MapAlignmentTransformer()
793
- transformer.transformRetentionTimes(fm, trafo, True)
872
+
873
+ # Skip feature maps with insufficient data points for alignment
874
+ if fm.size() < 2:
875
+ sample_name = study_obj.samples_df.row(index, named=True)["sample_name"]
876
+ study_obj.logger.warning(f"Skipping alignment for sample '{sample_name}' - insufficient features ({fm.size()} features)")
877
+ continue
878
+
879
+ try:
880
+ trafo = oms.TransformationDescription()
881
+ aligner.align(fm, trafo)
882
+ transformer = oms.MapAlignmentTransformer()
883
+ transformer.transformRetentionTimes(fm, trafo, True)
884
+ except RuntimeError as e:
885
+ sample_name = study_obj.samples_df.row(index, named=True)["sample_name"]
886
+ study_obj.logger.warning(f"Failed to align sample '{sample_name}': {e}")
887
+ continue
794
888
 
795
889
  study_obj.alignment_ref_index = ref_index
796
890
 
masster/wizard/wizard.py CHANGED
@@ -127,7 +127,7 @@ class wizard_def:
127
127
 
128
128
  # === Feature Detection ===
129
129
  chrom_fwhm: float = 0.5
130
- noise_threshold: float = 200.0
130
+ noise: float = 50.0
131
131
  chrom_peak_snr: float = 5.0
132
132
  tol_ppm: float = 10.0
133
133
  detector_type: str = "unknown" # Detected detector type ("orbitrap", "quadrupole", "unknown")
@@ -307,15 +307,15 @@ class Wizard:
307
307
  """
308
308
  try:
309
309
  # Find first file
310
- for extension in ['.wiff', '.raw', '.mzML', '.d']:
310
+ for extension in ['.wiff', '.raw', '.mzML']:
311
311
  pattern = f"**/*{extension}" if True else f"*{extension}" # search_subfolders=True
312
312
  files = list(self.source_path.rglob(pattern))
313
313
  if files:
314
314
  first_file = files[0]
315
315
  break
316
316
  else:
317
- return None
318
-
317
+ return 'positive'
318
+
319
319
  # Only implement for .wiff files initially (most common format)
320
320
  if first_file.suffix.lower() == '.wiff':
321
321
  from masster.sample.load import _wiff_to_dict
@@ -337,7 +337,7 @@ class Wizard:
337
337
  # Silently fall back to default if inference fails
338
338
  pass
339
339
 
340
- return None
340
+ return 'positive'
341
341
 
342
342
  @property
343
343
  def polarity(self) -> str:
@@ -543,9 +543,9 @@ class Wizard:
543
543
  ' sample = Sample(log_label=sample_name)',
544
544
  ' sample.load(filename=str(raw_file))',
545
545
  ' sample.find_features(',
546
- ' noise=PARAMS[\'noise_threshold\'],',
547
- ' chrom_fwhm=PARAMS[\'smoothing_width\'],',
548
- ' chrom_peak_snr=PARAMS[\'peak_threshold\']',
546
+ ' noise=PARAMS[\'noise\'],',
547
+ ' chrom_fwhm=PARAMS[\'chrom_fwhm\'],',
548
+ ' chrom_peak_snr=PARAMS[\'chrom_peak_snr\']',
549
549
  ' )',
550
550
  ' sample.find_adducts(adducts=PARAMS[\'adducts\'])',
551
551
  ' sample.find_ms2()',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: masster
3
- Version: 0.4.21
3
+ Version: 0.4.22
4
4
  Summary: Mass spectrometry data analysis package
5
5
  Project-URL: homepage, https://github.com/zamboni-lab/masster
6
6
  Project-URL: repository, https://github.com/zamboni-lab/masster
@@ -1,5 +1,5 @@
1
1
  masster/__init__.py,sha256=ueZ224WPNRRjQEYTaQUol818nwQgJwB93HbEfmtPRmg,1041
2
- masster/_version.py,sha256=y5IX-RaakVoxIcxnpOMo4HnIGk-qPigff8Q07QC6yJU,257
2
+ masster/_version.py,sha256=vQFUBi9UR5DFflCbwobRTLg-jW9TsSQB1GlM1tfxYuc,257
3
3
  masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
4
4
  masster/logger.py,sha256=tR65N23zfrNpcZNbZm2ot_Aual9XrGB1MWjLrovZkMs,16749
5
5
  masster/spectrum.py,sha256=XJSUrqXZSzfpWnD8v5IMClXMRZLKLYIk014qaMOS9_k,49738
@@ -28,7 +28,7 @@ masster/sample/processing.py,sha256=A1u5u7lGG0HR_ciUhJFmmwgugher7_AZQopNnbu65Bs,
28
28
  masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
29
29
  masster/sample/sample.py,sha256=uQP5DLdsRSC2YwZZvspsL9rgl_HefB-oxrL2dpgg_fc,19788
30
30
  masster/sample/sample5_schema.json,sha256=H5e2T6rHIDzul2kp_yP-ILUUWUpW08wP2pEQjMR0nSk,3977
31
- masster/sample/save.py,sha256=MfgHGiR2ofLyVp_rP9FIu7mUG3A6PEZHSscMypSTH7M,36425
31
+ masster/sample/save.py,sha256=2yQtcQcRJjgAKPImTydj7LpyyMop_Q9JKRlNEK4yU6k,36339
32
32
  masster/sample/sciex.py,sha256=vnbxsq_qnAQVuzcpziP1o3IC4kM5amGBcPmC2TAuDLw,46319
33
33
  masster/sample/defaults/__init__.py,sha256=A09AOP44cxD_oYohyt7XFUho0zndRcrzVD4DUaGnKH4,447
34
34
  masster/sample/defaults/find_adducts_def.py,sha256=Bu2KiBJRxD0SAnOPNMm_Nk-6fx6QYoRXjFNGzz-0_o0,13570
@@ -37,15 +37,15 @@ masster/sample/defaults/find_ms2_def.py,sha256=KTELMAnioGLYbhzAwOgK14TZqboPEvzeB
37
37
  masster/sample/defaults/get_spectrum_def.py,sha256=o62p31PhGd-LiIkTOzKQhwPtnO2AtQDHcPu-O-YoQPs,11460
38
38
  masster/sample/defaults/sample_def.py,sha256=keoXyMyrm_iLgbYqfIbqCpJ3XHBVlNwCNmb5iMQL0iY,14579
39
39
  masster/study/__init__.py,sha256=55axdFuqRX4aXtJ8ocnhcLB32fNtmmJpCi58moO0r4g,237
40
- masster/study/export.py,sha256=CKrmje4_cQAJnQjQsB5u3mXkvDDbxKkAJO8e_MoeB38,59270
40
+ masster/study/export.py,sha256=c-UQPYRwNBde8E1cYOB-0ZZz2tBDTwglRMlPfSKYB0w,59291
41
41
  masster/study/h5.py,sha256=eINlVmcJuntwbkkZHwzm10c63Kg7zib49vkzLDj1PyU,84790
42
42
  masster/study/helpers.py,sha256=6nDTNlsZbZWf9L6D5qzK2TUO2y7UBq51Ftj8N4bkIAk,160260
43
43
  masster/study/id.py,sha256=6NUBBKZCFOU1wlDKM0eXQeOIStSZCRNJ_3x7ZaIHzmM,55263
44
- masster/study/load.py,sha256=CQQY_7BzagE3oQTdDlqNyfuMdVWIAft-M4a2WCFnxp0,70695
44
+ masster/study/load.py,sha256=mI6UyErlj3vIzSuG93fOjsxA7IIDCaiKfcuAcc2538o,72425
45
45
  masster/study/merge.py,sha256=3R_Dg6l2mnJUu3gFVAgrAN5hFSQyfHbqYPmc2cUfJqQ,159232
46
46
  masster/study/parameters.py,sha256=0elaF7YspTsB7qyajWAbRNL2VfKlGz5GJLifmO8IGkk,3276
47
- masster/study/plot.py,sha256=G9eStCUPxJTHrQk9TZpivk5rYxO1vbu5Yba6rP8NELM,90649
48
- masster/study/processing.py,sha256=WOcBdQ1agayASLkrtJ9GfUT0mgCJZVNnbuT77-J-KDY,52011
47
+ masster/study/plot.py,sha256=OGUa_dDTD2QydbLg-4APRZc7Jx1kk9eXC9-GOLLgI1I,87666
48
+ masster/study/processing.py,sha256=p0d-DyxA0YI6K9OPQZYTEs00DC6obr6-kLHPVWljEO0,56437
49
49
  masster/study/save.py,sha256=BANh9F1s-q7MclO1Mq_-v4xQyHeloEgmoPgRDVc-9aE,9037
50
50
  masster/study/study.py,sha256=rk-pJNg80N6xbROa9fqPfwVxFgzL_FLoSUNOTYeD5E0,40116
51
51
  masster/study/study5_schema.json,sha256=ghBeAXFS4a4Uavdn6TUVs9GaR1QOTnADCjQTOkN0tjU,7563
@@ -64,9 +64,9 @@ masster/study/defaults/study_def.py,sha256=h8dYbi9xv0sesCSQik49Z53IkskMmNtW6ixl7
64
64
  masster/wizard/README.md,sha256=mL1A3YWJZOefpJ6D0-HqGLkVRmUlOpwyVFdvJBeeoZM,14149
65
65
  masster/wizard/__init__.py,sha256=a2hcZnHASjfuw1lqZhZnvTR58rc33rRnoGAY_JfvGhI,683
66
66
  masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
67
- masster/wizard/wizard.py,sha256=498u3rvYNC0mb0kDG7T71E8TKwMk2s1KKzHgDBn76c4,37698
68
- masster-0.4.21.dist-info/METADATA,sha256=BuBxOoHsoKYPLa5Jy6a2SPIUNDv9uCnujAbB7ibMqJU,44207
69
- masster-0.4.21.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
70
- masster-0.4.21.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
71
- masster-0.4.21.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
72
- masster-0.4.21.dist-info/RECORD,,
67
+ masster/wizard/wizard.py,sha256=esgaifLRyaGxytif9qOkTy-21VxlUQxrvl47K-l-BpE,37666
68
+ masster-0.4.22.dist-info/METADATA,sha256=CXrrzzCC5cZ_G9plLZyCtiNpTXevD0wPuUNm0mIy-a4,44207
69
+ masster-0.4.22.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
70
+ masster-0.4.22.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
71
+ masster-0.4.22.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
72
+ masster-0.4.22.dist-info/RECORD,,