masster 0.4.20__py3-none-any.whl → 0.4.22__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/load.py CHANGED
@@ -1257,17 +1257,53 @@ def load_features(self):
1257
1257
  feature_map = oms.FeatureMap()
1258
1258
 
1259
1259
  # Convert DataFrame features to OpenMS Features
1260
+ # Keep track of next available feature_id for this sample
1261
+ next_feature_id = 1
1262
+ used_feature_ids = set()
1263
+
1264
+ # First pass: collect existing feature_ids to avoid conflicts
1265
+ for feature_row in sample_features.iter_rows(named=True):
1266
+ if feature_row["feature_id"] is not None:
1267
+ used_feature_ids.add(int(feature_row["feature_id"]))
1268
+
1269
+ # Find the next available feature_id
1270
+ while next_feature_id in used_feature_ids:
1271
+ next_feature_id += 1
1272
+
1260
1273
  for feature_row in sample_features.iter_rows(named=True):
1261
1274
  feature = oms.Feature()
1262
1275
 
1263
1276
  # Set properties from DataFrame (handle missing values gracefully)
1264
1277
  try:
1265
- feature.setUniqueId(int(feature_row["feature_id"]))
1278
+ # Skip features with missing critical data
1279
+ if feature_row["mz"] is None:
1280
+ self.logger.warning("Skipping feature due to missing mz")
1281
+ continue
1282
+ if feature_row["rt"] is None:
1283
+ self.logger.warning("Skipping feature due to missing rt")
1284
+ continue
1285
+ if feature_row["inty"] is None:
1286
+ self.logger.warning("Skipping feature due to missing inty")
1287
+ continue
1288
+
1289
+ # Handle missing feature_id by generating a new one
1290
+ if feature_row["feature_id"] is None:
1291
+ feature_id = next_feature_id
1292
+ next_feature_id += 1
1293
+ self.logger.debug(f"Generated new feature_id {feature_id} for feature with missing ID")
1294
+ else:
1295
+ feature_id = int(feature_row["feature_id"])
1296
+
1297
+ feature.setUniqueId(feature_id)
1266
1298
  feature.setMZ(float(feature_row["mz"]))
1267
1299
  feature.setRT(float(feature_row["rt"]))
1268
1300
  feature.setIntensity(float(feature_row["inty"]))
1269
- feature.setOverallQuality(float(feature_row["quality"]))
1270
- feature.setCharge(int(feature_row["charge"]))
1301
+
1302
+ # Handle optional fields that might be None
1303
+ if feature_row.get("quality") is not None:
1304
+ feature.setOverallQuality(float(feature_row["quality"]))
1305
+ if feature_row.get("charge") is not None:
1306
+ feature.setCharge(int(feature_row["charge"]))
1271
1307
 
1272
1308
  # Add to feature map
1273
1309
  feature_map.push_back(feature)
masster/study/merge.py CHANGED
@@ -400,7 +400,7 @@ def merge(self, **kwargs) -> None:
400
400
  # Feature maps will be generated on-demand within each merge method
401
401
 
402
402
  self.logger.info(
403
- f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da, min_rel_cc_size={params.min_rel_cc_size}, max_pairwise_log_fc={params.max_pairwise_log_fc}, max_nr_conflicts={params.max_nr_conflicts}"
403
+ f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
404
404
  )
405
405
 
406
406
  # Initialize
@@ -446,7 +446,7 @@ def merge(self, **kwargs) -> None:
446
446
  # Note: _merge_qt_chunked populates consensus_df directly, no need to extract
447
447
 
448
448
  # Enhanced post-clustering to merge over-segmented features (for qt and kd methods)
449
- if params.method in ['qt', 'sensitivity', 'qt_chunked', 'kd_chunked']:
449
+ if params.method in ['qt', 'sensitivity', 'qt_chunked', 'kd_chunked', 'quality']:
450
450
  self._consensus_cleanup(params.rt_tol, params.mz_tol)
451
451
 
452
452
  # Perform adduct grouping
@@ -705,11 +705,11 @@ def _merge_kd_strict(self, params: merge_defaults) -> oms.ConsensusMap:
705
705
  optimized_params = params
706
706
 
707
707
  # Phase 1: Standard KD clustering
708
- self.logger.info("Initial KD clustering")
708
+ self.logger.debug("Initial KD clustering")
709
709
  consensus_map = _merge_kd(self, optimized_params)
710
710
 
711
711
  # Phase 2: Post-processing quality control
712
- self.logger.info("Post-processing quality control")
712
+ self.logger.debug("Post-processing quality control")
713
713
  consensus_map = _apply_kd_strict_postprocessing(self, consensus_map, optimized_params)
714
714
 
715
715
  return consensus_map
@@ -911,7 +911,7 @@ def _apply_kd_strict_postprocessing(self, consensus_map: oms.ConsensusMap, param
911
911
  final_feature_count = len(self.consensus_df)
912
912
  reduction_pct = ((initial_feature_count - final_feature_count) / initial_feature_count * 100) if initial_feature_count > 0 else 0
913
913
 
914
- self.logger.info(f"Post-processing complete: {initial_feature_count} → {final_feature_count} features ({reduction_pct:.1f}% reduction)")
914
+ self.logger.info(f"Consensus cleanup complete: {initial_feature_count} → {final_feature_count} features ({reduction_pct:.1f}% reduction)")
915
915
 
916
916
  # Create a new consensus map for compatibility (the processed data is in consensus_df)
917
917
  processed_consensus_map = oms.ConsensusMap()
@@ -1691,8 +1691,12 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
1691
1691
  mz_min_local = mz_max_local = consensus_mz
1692
1692
 
1693
1693
  # Store chunk consensus with feature tracking
1694
+ # Generate unique 16-character consensus_id string
1695
+ import uuid
1696
+ consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
1697
+
1694
1698
  chunk_consensus_data = {
1695
- 'consensus_id': consensus_id_counter,
1699
+ 'consensus_id': consensus_id_str,
1696
1700
  'chunk_idx': chunk_idx,
1697
1701
  'chunk_start_idx': chunk_start_idx,
1698
1702
  'mz': consensus_mz,
@@ -1710,7 +1714,6 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
1710
1714
  }
1711
1715
 
1712
1716
  all_chunk_consensus.append(chunk_consensus_data)
1713
- consensus_id_counter += 1
1714
1717
 
1715
1718
  if not all_chunk_consensus:
1716
1719
  # No valid consensus features found
@@ -2094,9 +2097,13 @@ def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_
2094
2097
  ms2_count += len(ms2_scans)
2095
2098
 
2096
2099
  # Build consensus metadata
2100
+ # Generate unique 16-character consensus_id string
2101
+ import uuid
2102
+ consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
2103
+
2097
2104
  return {
2098
2105
  "consensus_uid": int(consensus_uid),
2099
- "consensus_id": str(consensus_uid), # Use simple string ID
2106
+ "consensus_id": consensus_id_str, # Use unique 16-char string ID
2100
2107
  "quality": round(float(np.mean(quality_values)), 3) if len(quality_values) > 0 else 1.0,
2101
2108
  "number_samples": number_samples if number_samples is not None else len(feature_data_list),
2102
2109
  "rt": round(float(np.mean(rt_values)), 4) if len(rt_values) > 0 else 0.0,
@@ -2118,6 +2125,7 @@ def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_
2118
2125
  "chrom_prominence_mean": round(float(np.mean(prominence_values)), 0) if len(prominence_values) > 0 else 0.0,
2119
2126
  "chrom_prominence_scaled_mean": round(float(np.mean(prominence_scaled_values)), 3) if len(prominence_scaled_values) > 0 else 0.0,
2120
2127
  "chrom_height_scaled_mean": round(float(np.mean(height_scaled_values)), 3) if len(height_scaled_values) > 0 else 0.0,
2128
+ "iso": None, # Will be filled by find_iso() function
2121
2129
  "iso_mean": round(float(np.mean(iso_values)), 2) if len(iso_values) > 0 else 0.0,
2122
2130
  "charge_mean": round(float(np.mean(charge_values)), 2) if len(charge_values) > 0 else 0.0,
2123
2131
  "number_ms2": int(ms2_count),
@@ -2509,10 +2517,14 @@ def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts
2509
2517
  if ms2_scans is not None:
2510
2518
  ms2_count += len(ms2_scans)
2511
2519
 
2520
+ # Generate unique 16-character consensus_id string (UUID-based)
2521
+ import uuid
2522
+ consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
2523
+
2512
2524
  metadata_list.append(
2513
2525
  {
2514
2526
  "consensus_uid": int(i), # "consensus_id": i,
2515
- "consensus_id": str(feature.getUniqueId()),
2527
+ "consensus_id": consensus_id_str, # Use unique 16-char string ID
2516
2528
  "quality": round(float(feature.getQuality()), 3),
2517
2529
  "number_samples": len(feature_data_list),
2518
2530
  # "number_ext": int(len(features_list)),
@@ -2577,6 +2589,7 @@ def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts
2577
2589
  )
2578
2590
  if len(height_scaled_values) > 0
2579
2591
  else 0.0,
2592
+ "iso": None, # Will be filled by find_iso() function
2580
2593
  "iso_mean": round(float(np.mean(iso_values)), 2)
2581
2594
  if len(iso_values) > 0
2582
2595
  else 0.0,
@@ -3325,7 +3338,9 @@ def _finalize_merge(self, link_ms2, min_samples):
3325
3338
  f"Merging completed with empty result. Consensus features: {len(self.consensus_df)}. "
3326
3339
  f"This may be due to min_samples ({min_samples}) being too high for the available data.",
3327
3340
  )
3328
-
3341
+
3342
+ # add iso data from raw files.
3343
+ self.find_iso()
3329
3344
  if link_ms2:
3330
3345
  self.find_ms2()
3331
3346
 
masster/study/plot.py CHANGED
@@ -42,9 +42,10 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
42
42
  from bokeh.io.export import export_png
43
43
  export_png(plot_object, filename=filename)
44
44
  logger.info(f"Plot saved to: {abs_filename}")
45
- except Exception:
45
+ except Exception as e:
46
46
  # Fall back to HTML if PNG export not available
47
47
  html_filename = filename.replace('.png', '.html')
48
+ abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.png', '.html')
48
49
  from bokeh.resources import Resources
49
50
  from bokeh.embed import file_html
50
51
 
@@ -54,7 +55,7 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
54
55
  with open(html_filename, 'w', encoding='utf-8') as f:
55
56
  f.write(html)
56
57
 
57
- logger.warning(f"PNG export not available, saved as HTML instead: {html_filename}")
58
+ logger.warning(f"PNG export not available ({str(e)}). Use export_png. Saved as HTML instead: {abs_html_filename}")
58
59
  elif filename.endswith(".pdf"):
59
60
  # Try to save as PDF, fall back to HTML if not available
60
61
  try:
@@ -74,6 +75,26 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
74
75
  f.write(html)
75
76
 
76
77
  logger.warning(f"PDF export not available, saved as HTML instead: {html_filename}")
78
+ elif filename.endswith(".svg"):
79
+ # Try to save as SVG, fall back to HTML if not available
80
+ try:
81
+ from bokeh.io.export import export_svg
82
+ export_svg(plot_object, filename=filename)
83
+ logger.info(f"Plot saved to: {abs_filename}")
84
+ except Exception as e:
85
+ # Fall back to HTML if SVG export not available
86
+ html_filename = filename.replace('.svg', '.html')
87
+ abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.svg', '.html')
88
+ from bokeh.resources import Resources
89
+ from bokeh.embed import file_html
90
+
91
+ resources = Resources(mode='cdn')
92
+ html = file_html(plot_object, resources, title=plot_title)
93
+
94
+ with open(html_filename, 'w', encoding='utf-8') as f:
95
+ f.write(html)
96
+
97
+ logger.warning(f"SVG export not available ({str(e)}). Saved as HTML instead: {abs_html_filename}")
77
98
  else:
78
99
  # Default to HTML for unknown extensions using isolated approach
79
100
  from bokeh.resources import Resources
@@ -181,6 +202,22 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
181
202
  logger.warning(f"PDF export not available, saved as HTML instead: {abs_html_filename}")
182
203
  except Exception as e:
183
204
  logger.error(f"Failed to save {plot_title} as HTML fallback: {e}")
205
+ elif filename.endswith(".svg"):
206
+ # Try to save as SVG, fall back to HTML if not available
207
+ try:
208
+ from bokeh.io.export import export_svg
209
+ bokeh_layout = panel_obj.get_root()
210
+ export_svg(bokeh_layout, filename=filename)
211
+ logger.info(f"{plot_title} saved to: {abs_filename}")
212
+ except Exception as e:
213
+ # Fall back to HTML if SVG export not available
214
+ html_filename = filename.replace('.svg', '.html')
215
+ abs_html_filename = os.path.abspath(html_filename)
216
+ try:
217
+ panel_obj.save(html_filename, embed=True)
218
+ logger.warning(f"SVG export not available ({str(e)}), saved as HTML instead: {abs_html_filename}")
219
+ except Exception as e:
220
+ logger.error(f"Failed to save {plot_title} as HTML fallback: {e}")
184
221
  else:
185
222
  # Default to HTML for unknown extensions
186
223
  try:
@@ -1687,221 +1724,154 @@ def plot_consensus_stats(
1687
1724
  self,
1688
1725
  filename=None,
1689
1726
  width=1200,
1690
- height=1200,
1727
+ height=None,
1691
1728
  alpha=0.6,
1692
- markersize=3,
1729
+ bins=30,
1730
+ n_cols=4,
1693
1731
  ):
1694
1732
  """
1695
- Plot a scatter plot matrix (SPLOM) of consensus statistics using Bokeh.
1696
-
1733
+ Plot histograms/distributions for all numeric columns in consensus_df.
1734
+
1697
1735
  Parameters:
1698
1736
  filename (str, optional): Output filename for saving the plot
1699
1737
  width (int): Overall width of the plot (default: 1200)
1700
- height (int): Overall height of the plot (default: 1200)
1701
- alpha (float): Point transparency (default: 0.6)
1702
- markersize (int): Size of points (default: 5)
1738
+ height (int, optional): Overall height of the plot (auto-calculated if None)
1739
+ alpha (float): Histogram transparency (default: 0.6)
1740
+ bins (int): Number of histogram bins (default: 30)
1741
+ n_cols (int): Number of columns in the grid layout (default: 4)
1703
1742
  """
1704
1743
  from bokeh.layouts import gridplot
1705
- from bokeh.models import ColumnDataSource, HoverTool
1706
- from bokeh.plotting import figure, show, output_file
1744
+ from bokeh.plotting import figure
1745
+ import polars as pl
1746
+ import numpy as np
1707
1747
 
1708
1748
  # Check if consensus_df exists and has data
1709
1749
  if self.consensus_df is None or self.consensus_df.is_empty():
1710
1750
  self.logger.error("No consensus data available. Run merge/find_consensus first.")
1711
1751
  return
1712
1752
 
1713
- # Define the columns to plot
1714
- columns = [
1715
- "rt",
1716
- "mz",
1717
- "number_samples",
1718
- "log10_quality",
1719
- "mz_delta_mean",
1720
- "rt_delta_mean",
1721
- "chrom_coherence_mean",
1722
- "chrom_prominence_scaled_mean",
1723
- "inty_mean",
1724
- "number_ms2",
1725
- ]
1726
-
1727
- # Check which columns exist in the dataframe and compute missing ones
1728
- available_columns = self.consensus_df.columns
1753
+ # Get all columns and their data types - work with original dataframe
1729
1754
  data_df = self.consensus_df.clone()
1730
1755
 
1731
- # Add log10_quality if quality exists
1732
- if "quality" in available_columns and "log10_quality" not in available_columns:
1733
- data_df = data_df.with_columns(
1734
- pl.col("quality").log10().alias("log10_quality"),
1735
- )
1736
-
1737
- # Filter columns that actually exist
1738
- final_columns = [col for col in columns if col in data_df.columns]
1739
-
1740
- if len(final_columns) < 2:
1741
- self.logger.error(f"Need at least 2 columns for SPLOM. Available: {final_columns}")
1756
+ # Identify numeric columns (excluding ID columns that are typically strings)
1757
+ id_columns = ["consensus_uid", "consensus_id", "uid", "id"]
1758
+ numeric_columns = []
1759
+
1760
+ for col in data_df.columns:
1761
+ if col not in id_columns:
1762
+ dtype = data_df[col].dtype
1763
+ # Check if column is numeric (int, float, or can be converted to numeric)
1764
+ if dtype in [pl.Int8, pl.Int16, pl.Int32, pl.Int64,
1765
+ pl.UInt8, pl.UInt16, pl.UInt32, pl.UInt64,
1766
+ pl.Float32, pl.Float64]:
1767
+ numeric_columns.append(col)
1768
+
1769
+ if len(numeric_columns) == 0:
1770
+ self.logger.error("No numeric columns found in consensus_df for plotting distributions.")
1742
1771
  return
1743
1772
 
1744
- self.logger.debug(f"Creating SPLOM with columns: {final_columns}")
1745
-
1746
- # Add important ID columns for tooltips even if not plotting them
1747
- tooltip_columns = []
1748
- for id_col in ["consensus_uid", "consensus_id"]:
1749
- if id_col in data_df.columns and id_col not in final_columns:
1750
- tooltip_columns.append(id_col)
1773
+ self.logger.debug(f"Creating distribution plots for {len(numeric_columns)} numeric columns: {numeric_columns}")
1751
1774
 
1752
- # Select plotting columns plus tooltip columns
1753
- all_columns = final_columns + tooltip_columns
1754
- data_pd = data_df.select(all_columns).to_pandas()
1775
+ # Work directly with Polars - no conversion to pandas needed
1776
+ data_df_clean = data_df.select(numeric_columns)
1755
1777
 
1756
- # Remove any infinite or NaN values
1757
- data_pd = data_pd.replace([np.inf, -np.inf], np.nan).dropna()
1758
-
1759
- if data_pd.empty:
1760
- self.logger.error("No valid data after removing NaN/infinite values.")
1778
+ # Check if all numeric columns are empty
1779
+ all_columns_empty = True
1780
+ for col in numeric_columns:
1781
+ # Check if column has any non-null, finite values
1782
+ non_null_count = data_df_clean[col].filter(
1783
+ data_df_clean[col].is_not_null() &
1784
+ (data_df_clean[col].is_finite() if data_df_clean[col].dtype in [pl.Float32, pl.Float64] else pl.lit(True))
1785
+ ).len()
1786
+
1787
+ if non_null_count > 0:
1788
+ all_columns_empty = False
1789
+ break
1790
+
1791
+ if all_columns_empty:
1792
+ self.logger.error("All numeric columns contain only NaN/infinite values.")
1761
1793
  return
1762
1794
 
1763
- source = ColumnDataSource(data_pd)
1764
-
1765
- n_vars = len(final_columns)
1766
-
1767
- # Fixed dimensions - override user input to ensure consistent layout
1768
- total_width = 1200
1769
- total_height = 1200
1770
-
1771
- # Calculate plot sizes to ensure uniform inner plot areas
1772
- # First column needs extra width for y-axis labels
1773
- plot_width_first = 180 # Wider to account for y-axis labels
1774
- plot_width_others = 120 # Standard width for other columns
1775
- plot_height_normal = 120 # Standard height
1776
- plot_height_last = 155 # Taller last row to accommodate x-axis labels while keeping inner plot area same size
1795
+ # Calculate grid dimensions
1796
+ n_plots = len(numeric_columns)
1797
+ n_rows = (n_plots + n_cols - 1) // n_cols # Ceiling division
1798
+
1799
+ # Auto-calculate height if not provided
1800
+ if height is None:
1801
+ plot_height = 300
1802
+ height = plot_height * n_rows + 100 # Add some padding
1803
+ else:
1804
+ plot_height = (height - 100) // n_rows # Subtract padding and divide
1805
+
1806
+ plot_width = (width - 100) // n_cols # Subtract padding and divide
1777
1807
 
1778
- # Create grid of plots with variable outer sizes but equal inner areas
1808
+ # Create plots grid
1779
1809
  plots = []
1780
-
1781
- for i, y_var in enumerate(final_columns):
1782
- row = []
1783
- for j, x_var in enumerate(final_columns):
1784
- # Determine if this plot needs axis labels
1785
- has_x_label = i == n_vars - 1 # bottom row
1786
- has_y_label = j == 0 # left column
1787
-
1788
- # First column wider to accommodate y-axis labels, ensuring equal inner plot areas
1789
- current_width = plot_width_first if has_y_label else plot_width_others
1790
- current_height = plot_height_last if has_x_label else plot_height_normal
1791
-
1792
- p = figure(
1793
- width=current_width,
1794
- height=current_height,
1795
- title=None, # No title on any plot
1796
- toolbar_location=None,
1797
- # Adjusted borders - first column has more space, others minimal
1798
- min_border_left=70 if has_y_label else 15,
1799
- min_border_bottom=50 if has_x_label else 15,
1800
- min_border_right=15,
1801
- min_border_top=15,
1802
- )
1803
-
1804
- # Ensure subplot background and border are explicitly white so the plot looks
1805
- # correct in dark and light themes.
1806
- p.outline_line_color = None
1807
- p.border_fill_color = "white"
1808
- p.border_fill_alpha = 1.0
1809
- p.background_fill_color = "white"
1810
-
1811
- # Remove axis lines to eliminate black lines between plots
1812
- p.xaxis.axis_line_color = None
1813
- p.yaxis.axis_line_color = None
1814
-
1815
- # Keep subtle grid lines for data reference
1816
- p.grid.visible = True
1817
- p.grid.grid_line_color = "#E0E0E0" # Light gray grid lines
1818
-
1819
- # Set axis labels and formatting
1820
- if has_x_label: # bottom row
1821
- p.xaxis.axis_label = x_var
1822
- p.xaxis.axis_label_text_font_size = "12pt"
1823
- p.xaxis.major_label_text_font_size = "9pt"
1824
- p.xaxis.axis_label_standoff = 15
1825
- else:
1826
- p.xaxis.major_label_text_font_size = "0pt"
1827
- p.xaxis.minor_tick_line_color = None
1828
- p.xaxis.major_tick_line_color = None
1829
-
1830
- if has_y_label: # left column
1831
- p.yaxis.axis_label = y_var
1832
- p.yaxis.axis_label_text_font_size = "10pt" # Smaller y-axis title
1833
- p.yaxis.major_label_text_font_size = "8pt"
1834
- p.yaxis.axis_label_standoff = 12
1835
- else:
1836
- p.yaxis.major_label_text_font_size = "0pt"
1837
- p.yaxis.minor_tick_line_color = None
1838
- p.yaxis.major_tick_line_color = None
1839
-
1840
- if i == j:
1841
- # Diagonal: histogram
1842
- hist, edges = np.histogram(data_pd[x_var], bins=30)
1843
- p.quad(
1844
- top=hist,
1845
- bottom=0,
1846
- left=edges[:-1],
1847
- right=edges[1:],
1848
- fill_color="green",
1849
- line_color="white",
1850
- alpha=alpha,
1851
- )
1852
- else:
1853
- # Off-diagonal: scatter plot
1854
- scatter = p.scatter(
1855
- x=x_var,
1856
- y=y_var,
1857
- size=markersize,
1858
- alpha=alpha,
1859
- color="blue",
1860
- source=source,
1861
- )
1862
-
1863
- # Add hover tool
1864
- hover = HoverTool(
1865
- tooltips=[
1866
- (x_var, f"@{x_var}{{0.0000}}"),
1867
- (y_var, f"@{y_var}{{0.0000}}"),
1868
- (
1869
- "consensus_uid",
1870
- "@consensus_uid"
1871
- if "consensus_uid" in data_pd.columns
1872
- else "@consensus_id"
1873
- if "consensus_id" in data_pd.columns
1874
- else "N/A",
1875
- ),
1876
- ("rt", "@rt{0.00}" if "rt" in data_pd.columns else "N/A"),
1877
- ("mz", "@mz{0.0000}" if "mz" in data_pd.columns else "N/A"),
1878
- ],
1879
- renderers=[scatter],
1880
- )
1881
- p.add_tools(hover)
1882
-
1883
- row.append(p)
1884
- plots.append(row)
1885
-
1886
- # Link axes for same variables
1887
- for i in range(n_vars):
1888
- for j in range(n_vars):
1889
- if i != j: # Don't link diagonal plots
1890
- # Link x-axis to other plots in same column
1891
- for k in range(n_vars):
1892
- if k != i and k != j:
1893
- plots[i][j].x_range = plots[k][j].x_range
1894
-
1895
- # Link y-axis to other plots in same row
1896
- for k in range(n_vars):
1897
- if k != j and k != i:
1898
- plots[i][j].y_range = plots[i][k].y_range
1899
-
1900
- # Create grid layout and force overall background/border to white so the outer
1901
- # container doesn't show dark UI colors in night mode.
1810
+ current_row = []
1811
+
1812
+ for i, col in enumerate(numeric_columns):
1813
+ # Create histogram for this column
1814
+ p = figure(
1815
+ width=plot_width,
1816
+ height=plot_height,
1817
+ title=col,
1818
+ toolbar_location="above",
1819
+ tools="pan,wheel_zoom,box_zoom,reset,save"
1820
+ )
1821
+
1822
+ # Set white background
1823
+ p.background_fill_color = "white"
1824
+ p.border_fill_color = "white"
1825
+
1826
+ # Calculate histogram using Polars
1827
+ # Get valid (non-null, finite) values for this column
1828
+ if data_df_clean[col].dtype in [pl.Float32, pl.Float64]:
1829
+ valid_values = data_df_clean.filter(
1830
+ data_df_clean[col].is_not_null() & data_df_clean[col].is_finite()
1831
+ )[col]
1832
+ else:
1833
+ valid_values = data_df_clean.filter(data_df_clean[col].is_not_null())[col]
1834
+
1835
+ if valid_values.len() == 0:
1836
+ self.logger.warning(f"No valid values for column {col}")
1837
+ continue
1838
+
1839
+ # Convert to numpy for histogram calculation
1840
+ values_array = valid_values.to_numpy()
1841
+ hist, edges = np.histogram(values_array, bins=bins)
1842
+
1843
+ # Create histogram bars
1844
+ p.quad(
1845
+ top=hist,
1846
+ bottom=0,
1847
+ left=edges[:-1],
1848
+ right=edges[1:],
1849
+ fill_color="steelblue",
1850
+ line_color="white",
1851
+ alpha=alpha,
1852
+ )
1853
+
1854
+ # Style the plot
1855
+ p.title.text_font_size = "12pt"
1856
+ p.xaxis.axis_label = col
1857
+ p.yaxis.axis_label = "Count"
1858
+ p.grid.visible = True
1859
+ p.grid.grid_line_color = "#E0E0E0"
1860
+
1861
+ current_row.append(p)
1862
+
1863
+ # If we've filled a row or reached the end, add the row to plots
1864
+ if len(current_row) == n_cols or i == n_plots - 1:
1865
+ # Fill remaining spots in the last row with None if needed
1866
+ while len(current_row) < n_cols and i == n_plots - 1:
1867
+ current_row.append(None)
1868
+ plots.append(current_row)
1869
+ current_row = []
1870
+
1871
+ # Create grid layout
1902
1872
  grid = gridplot(plots)
1903
-
1904
- # Set overall background and border to white when supported
1873
+
1874
+ # Set overall background to white
1905
1875
  if hasattr(grid, "background_fill_color"):
1906
1876
  grid.background_fill_color = "white"
1907
1877
  if hasattr(grid, "border_fill_color"):