masster 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/study/load.py CHANGED
@@ -213,18 +213,19 @@ def load(self, filename=None):
213
213
  return
214
214
 
215
215
  # self.logger.info(f"Loading study from {filename}")
216
- self._load_study5(filename)
216
+ from masster.study.h5 import _load_study5
217
+ _load_study5(self, filename)
217
218
 
218
219
  # After loading the study, check if we have consensus features before loading consensus XML
219
- if (self.consensus_df is not None and not self.consensus_df.is_empty()):
220
- consensus_xml_path = filename.replace(".study5", ".consensusXML")
221
- if os.path.exists(consensus_xml_path):
222
- self._load_consensusXML(filename=consensus_xml_path)
220
+ #if (self.consensus_df is not None and not self.consensus_df.is_empty()):
221
+ # consensus_xml_path = filename.replace(".study5", ".consensusXML")
222
+ # if os.path.exists(consensus_xml_path):
223
+ # self._load_consensusXML(filename=consensus_xml_path)
223
224
  # self.logger.info(f"Automatically loaded consensus from {consensus_xml_path}")
224
- else:
225
- self.logger.warning(f"No consensus XML file found at {consensus_xml_path}")
226
- else:
227
- self.logger.debug("No consensus features found, skipping consensusXML loading")
225
+ # else:
226
+ # self.logger.warning(f"No consensus XML file found at {consensus_xml_path}")
227
+ #else:
228
+ # self.logger.debug("No consensus features found, skipping consensusXML loading")
228
229
 
229
230
  self.filename = filename
230
231
 
@@ -260,9 +261,14 @@ def _fill_chrom_single_impl(
260
261
  min_number_abs = 1
261
262
  if isinstance(min_samples_rel, float) and min_samples_rel > 0:
262
263
  min_number_rel = int(min_samples_rel * len(self.samples_df))
263
- if isinstance(min_samples_abs, int) and min_samples_abs > 0:
264
- min_number_abs = int(min_samples_abs)
264
+ if isinstance(min_samples_abs, int) and min_samples_abs >= 0:
265
+ min_number_abs = int(min_samples_abs) if min_samples_abs > 0 else 0
265
266
  min_number = max(min_number_rel, min_number_abs)
267
+
268
+ # Special case: if min_samples_abs is explicitly 0, allow 0-sample features (like library features)
269
+ if isinstance(min_samples_abs, int) and min_samples_abs == 0:
270
+ min_number = 0
271
+
266
272
  self.logger.debug(f"Threshold for gap filling: number_samples>={min_number}")
267
273
 
268
274
  if min_number > 0:
@@ -276,7 +282,7 @@ def _fill_chrom_single_impl(
276
282
  )
277
283
  self.logger.debug("Identifying missing features...")
278
284
  # Instead of building full chromatogram matrix, identify missing consensus/sample combinations directly
279
- missing_combinations = self._get_missing_consensus_sample_combinations(uids)
285
+ missing_combinations = _get_missing_consensus_sample_combinations(self,uids)
280
286
  if not missing_combinations:
281
287
  self.logger.info("No missing features found to fill.")
282
288
  return
@@ -559,7 +565,7 @@ def fill_single(self, **kwargs):
559
565
  # end of parameter initialization
560
566
 
561
567
  # Store parameters in the Study object
562
- self.store_history(["fill_single"], params.to_dict())
568
+ self.update_history(["fill_single"], params.to_dict())
563
569
  self.logger.debug("Parameters stored to fill_single")
564
570
 
565
571
  # Call the original fill_chrom_single function with extracted parameters
@@ -753,10 +759,14 @@ def _fill_chrom_impl(
753
759
  min_number_abs = 1
754
760
  if isinstance(min_samples_rel, float) and min_samples_rel > 0:
755
761
  min_number_rel = int(min_samples_rel * len(self.samples_df))
756
- if isinstance(min_samples_abs, int) and min_samples_abs > 0:
757
- min_number_abs = int(min_samples_abs)
762
+ if isinstance(min_samples_abs, int) and min_samples_abs >= 0:
763
+ min_number_abs = int(min_samples_abs) if min_samples_abs > 0 else 0
758
764
  min_number = max(min_number_rel, min_number_abs)
759
765
 
766
+ # Special case: if min_samples_abs is explicitly 0, allow 0-sample features (like library features)
767
+ if isinstance(min_samples_abs, int) and min_samples_abs == 0:
768
+ min_number = 0
769
+
760
770
  self.logger.debug(f"Threshold for gap filling: number_samples>={min_number}")
761
771
 
762
772
  if min_number > 0:
@@ -769,7 +779,7 @@ def _fill_chrom_impl(
769
779
 
770
780
  # Get missing consensus/sample combinations using the optimized method
771
781
  self.logger.debug("Identifying missing features...")
772
- missing_combinations = self._get_missing_consensus_sample_combinations(uids)
782
+ missing_combinations = _get_missing_consensus_sample_combinations(self, uids)
773
783
 
774
784
  if not missing_combinations or len(missing_combinations) == 0:
775
785
  self.logger.info("No missing features found to fill.")
@@ -845,7 +855,7 @@ def _fill_chrom_impl(
845
855
  future_to_sample = {}
846
856
  for sample_info in samples_to_process:
847
857
  future = executor.submit(
848
- self._process_sample_for_parallel_fill,
858
+ _process_sample_for_parallel_fill, self,
849
859
  sample_info,
850
860
  consensus_info,
851
861
  uids,
@@ -979,7 +989,7 @@ def fill(self, **kwargs):
979
989
  # end of parameter initialization
980
990
 
981
991
  # Store parameters in the Study object
982
- self.store_history(["fill"], params.to_dict())
992
+ self.update_history(["fill"], params.to_dict())
983
993
  self.logger.debug("Parameters stored to fill")
984
994
 
985
995
  # Call the original fill_chrom function with extracted parameters
@@ -1115,7 +1125,7 @@ def _get_missing_consensus_sample_combinations(self, uids):
1115
1125
  return missing_combinations
1116
1126
 
1117
1127
 
1118
- def sanitize(self):
1128
+ def _sanitize(self):
1119
1129
  """
1120
1130
  Sanitize features DataFrame to ensure all complex objects are properly typed.
1121
1131
  Convert serialized objects back to their proper types (Chromatogram, Spectrum).
@@ -1209,7 +1219,7 @@ def sanitize(self):
1209
1219
  self.logger.error(f"Failed to recreate sanitized DataFrame: {e}")
1210
1220
 
1211
1221
 
1212
- def load_features(self):
1222
+ def _load_features(self):
1213
1223
  """
1214
1224
  Load features by reconstructing FeatureMaps from the processed features_df data.
1215
1225
 
@@ -1630,7 +1640,7 @@ def _add_sample_optimized(
1630
1640
  # - No _ensure_features_df_schema_order()
1631
1641
  # - No complex column alignment
1632
1642
  # - No type casting loops
1633
- # - No sample_color_reset()
1643
+ # - No set_samples_color(by=None) call needed
1634
1644
 
1635
1645
  self.logger.debug(
1636
1646
  f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (optimized)",
@@ -1914,42 +1924,6 @@ def _add_sample_standard(
1914
1924
 
1915
1925
  def _sample_color_reset_optimized(self):
1916
1926
  """
1917
- Optimized version of sample_color_reset that caches colormap initialization.
1927
+ Optimized version of sample color reset using set_samples_color.
1918
1928
  """
1919
- if self.samples_df is None or len(self.samples_df) == 0:
1920
- self.logger.warning("No samples found in study.")
1921
- return
1922
-
1923
- # Cache the colormap if not already cached
1924
- if not hasattr(self, "_cached_colormap"):
1925
- try:
1926
- from cmap import Colormap
1927
-
1928
- self._cached_colormap = Colormap("turbo")
1929
- except ImportError:
1930
- self.logger.warning("cmap package not available, using default colors")
1931
- return
1932
-
1933
- cm = self._cached_colormap
1934
- n_samples = len(self.samples_df)
1935
-
1936
- # Pre-allocate colors list for better performance
1937
- colors = [None] * n_samples
1938
-
1939
- # Vectorized color generation
1940
- for i in range(n_samples):
1941
- normalized_value = 0.1 + ((i + 0.5) / n_samples) * 0.8
1942
- color_rgba = cm(normalized_value)
1943
-
1944
- if len(color_rgba) >= 3:
1945
- r, g, b = color_rgba[:3]
1946
- if max(color_rgba[:3]) <= 1.0:
1947
- r, g, b = int(r * 255), int(g * 255), int(b * 255)
1948
- colors[i] = f"#{r:02x}{g:02x}{b:02x}"
1949
-
1950
- # Update the sample_color column efficiently
1951
- self.samples_df = self.samples_df.with_columns(
1952
- pl.Series("sample_color", colors).alias("sample_color"),
1953
- )
1954
-
1955
- self.logger.debug(f"Reset sample colors (cached) for {n_samples} samples")
1929
+ return self.set_samples_color(by=None)