PyPI - masster - Versions diffs - 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl - Mend

masster 0.4.22py3-none-any.whl → 0.5.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (27) hide show

masster/_version.py +1 -1
masster/data/libs/aa.csv +22 -0
masster/lib/lib.py +6 -0
masster/sample/adducts.py +1 -1
masster/sample/load.py +10 -9
masster/sample/plot.py +1 -1
masster/sample/processing.py +4 -4
masster/sample/sample.py +29 -32
masster/study/analysis.py +1762 -0
masster/study/defaults/fill_def.py +1 -1
masster/study/export.py +5 -3
masster/study/h5.py +3 -0
masster/study/helpers.py +153 -80
masster/study/id.py +545 -4
masster/study/load.py +33 -59
masster/study/merge.py +413 -315
masster/study/parameters.py +3 -3
masster/study/plot.py +398 -43
masster/study/processing.py +6 -14
masster/study/save.py +8 -4
masster/study/study.py +179 -139
masster/study/study5_schema.json +9 -0
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/METADATA +54 -14
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/RECORD +27 -25
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/WHEEL +0 -0
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/entry_points.txt +0 -0
{masster-0.4.22.dist-info → masster-0.5.1.dist-info}/licenses/LICENSE +0 -0

masster/study/merge.py CHANGED Viewed

@@ -274,7 +274,7 @@ def _serialize_feature_map(feature_map):
     return features_data
-def merge(self, **kwargs) -> None:
+def merge(study, **kwargs) -> None:
     """
     Group features across samples into consensus features using various algorithms.
@@ -342,7 +342,7 @@ def merge(self, **kwargs) -> None:
         if key in valid_params:
             setattr(params, key, value)
         else:
-            self.logger.warning(f"Unknown parameter '{key}' ignored")
+            study.logger.warning(f"Unknown parameter '{key}' ignored")
     # Backward compatibility: Map old method names to new names
     method_mapping = {
@@ -362,18 +362,18 @@ def merge(self, **kwargs) -> None:
     if params.method in method_mapping:
         old_method = params.method
         params.method = method_mapping[old_method]
-        self.logger.info(f"Method '{old_method}' is deprecated. Using '{params.method}' instead.")
+        study.logger.info(f"Method '{old_method}' is deprecated. Using '{params.method}' instead.")
     # Validate method
     if params.method not in ['sensitivity', 'qt', 'nowarp', 'kd_chunked', 'qt_chunked', 'quality']:
         raise ValueError(f"Invalid method '{params.method}'. Must be one of: ['sensitivity', 'qt', 'nowarp', 'kd_chunked', 'qt_chunked', 'quality']")
     # Check if chunked method is advisable for large datasets
-    num_samples = len(self.samples_df) if hasattr(self, 'samples_df') and self.samples_df is not None else 0
+    num_samples = len(study.samples_df) if hasattr(study, 'samples_df') and study.samples_df is not None else 0
     if num_samples > 500:
         chunked_methods = {'kd_chunked', 'qt_chunked'}
         if params.method not in chunked_methods:
-            self.logger.warning(
+            study.logger.warning(
                 f"Large dataset detected ({num_samples} samples > 500). "
                 f"For better performance and memory efficiency, consider using a chunked method: "
                 f"'kd_chunked' or 'qt_chunked' instead of '{params.method}'"
@@ -381,42 +381,43 @@ def merge(self, **kwargs) -> None:
     # Persist last used params for diagnostics
     try:
-        self._merge_params_last = params.to_dict()
+        study._merge_params_last = params.to_dict()
     except Exception:
-        self._merge_params_last = {}
+        study._merge_params_last = {}
     # Store merge parameters in history
     try:
-        if hasattr(self, 'store_history'):
-            self.store_history(['merge'], params.to_dict())
+        if hasattr(study, 'store_history'):
+            study.update_history(['merge'], params.to_dict())
         else:
-            self.logger.warning("History storage not available - parameters not saved to history")
+            study.logger.warning("History storage not available - parameters not saved to history")
     except Exception as e:
-        self.logger.warning(f"Failed to store merge parameters in history: {e}")
+        study.logger.warning(f"Failed to store merge parameters in history: {e}")
     # Ensure feature maps are available for merging (regenerate if needed)
-    if len(self.features_maps) < len(self.samples_df):
-        self.features_maps = []
+    if len(study.features_maps) < len(study.samples_df):
+        study.features_maps = []
         # Feature maps will be generated on-demand within each merge method
-    self.logger.info(
+    study.logger.info(
         f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
     )
     # Initialize
-    self._reset_consensus_data()
+    _reset_consensus_data(study)
     # Cache adducts for performance (avoid repeated _get_adducts() calls)
     cached_adducts_df = None
     cached_valid_adducts = None
     try:
-        cached_adducts_df = self._get_adducts()
+        from masster.study.id import _get_adducts
+        cached_adducts_df = _get_adducts(study)
         if not cached_adducts_df.is_empty():
             cached_valid_adducts = set(cached_adducts_df["name"].to_list())
         else:
             cached_valid_adducts = set()
     except Exception as e:
-        self.logger.warning(f"Could not retrieve study adducts: {e}")
+        study.logger.warning(f"Could not retrieve study adducts: {e}")
         cached_valid_adducts = set()
     # Always allow '?' adducts
@@ -424,58 +425,58 @@ def merge(self, **kwargs) -> None:
     # Route to algorithm implementation
     if params.method == 'sensitivity':
-        consensus_map = _merge_kd(self, params)
+        consensus_map = _merge_kd(study, params)
         # Extract consensus features
-        self._extract_consensus_features(consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
+        _extract_consensus_features(study, consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
     elif params.method == 'qt':
-        consensus_map = _merge_qt(self, params)
+        consensus_map = _merge_qt(study, params)
         # Extract consensus features
-        self._extract_consensus_features(consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
+        _extract_consensus_features(study, consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
     elif params.method == 'nowarp':
-        consensus_map = _merge_kd_nowarp(self, params)
+        consensus_map = _merge_kd_nowarp(study, params)
         # Extract consensus features
-        self._extract_consensus_features(consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
+        _extract_consensus_features(study, consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
     elif params.method == 'quality':
-        consensus_map = _merge_kd_strict(self, params)
+        consensus_map = _merge_kd_strict(study, params)
         # Note: _merge_kd_strict handles both consensus_df and consensus_mapping_df directly
     elif params.method == 'kd_chunked':
-        consensus_map = _merge_kd_chunked(self, params, cached_adducts_df, cached_valid_adducts)
+        consensus_map = _merge_kd_chunked(study, params, cached_adducts_df, cached_valid_adducts)
         # Note: _merge_kd_chunked populates consensus_df directly, no need to extract
     elif params.method == 'qt_chunked':
-        consensus_map = _merge_qt_chunked(self, params, cached_adducts_df, cached_valid_adducts)
+        consensus_map = _merge_qt_chunked(study, params, cached_adducts_df, cached_valid_adducts)
         # Note: _merge_qt_chunked populates consensus_df directly, no need to extract
     # Enhanced post-clustering to merge over-segmented features (for qt and kd methods)
     if params.method in ['qt', 'sensitivity', 'qt_chunked', 'kd_chunked', 'quality']:
-        self._consensus_cleanup(params.rt_tol, params.mz_tol)
+        _consensus_cleanup(study, params.rt_tol, params.mz_tol)
     # Perform adduct grouping
-    self._perform_adduct_grouping(params.rt_tol, params.mz_tol)
+    _perform_adduct_grouping(study, params.rt_tol, params.mz_tol)
     # Identify coeluting consensus features by mass shifts and update adduct information
-    self._identify_adduct_by_mass_shift(params.rt_tol, cached_adducts_df)
+    _identify_adduct_by_mass_shift(study, params.rt_tol, cached_adducts_df)
     # Link MS2 if requested
     if params.link_ms2:
-        self._finalize_merge(params.link_ms2, params.min_samples)
+        _finalize_merge(study, params.link_ms2, params.min_samples)
     # Log completion without the misleading feature count
     elapsed = time.time() - start_time
-    self.logger.debug(f"Merge process completed in {elapsed:.1f}s")
+    study.logger.debug(f"Merge process completed in {elapsed:.1f}s")
-def _merge_kd(self, params: merge_defaults) -> oms.ConsensusMap:
+def _merge_kd(study, params: merge_defaults) -> oms.ConsensusMap:
     """KD-tree based merge (fast, recommended)"""
     # Generate temporary feature maps on-demand from features_df
-    temp_feature_maps = _generate_feature_maps_on_demand(self)
+    temp_feature_maps = _generate_feature_maps_on_demand(study)
     consensus_map = oms.ConsensusMap()
     file_descriptions = consensus_map.getColumnHeaders()
     for i, feature_map in enumerate(temp_feature_maps):
         file_description = file_descriptions.get(i, oms.ColumnHeader())
-        file_description.filename = self.samples_df.row(i, named=True)["sample_name"]
+        file_description.filename = study.samples_df.row(i, named=True)["sample_name"]
         file_description.size = feature_map.size()
         file_description.unique_id = feature_map.getUniqueId()
         file_descriptions[i] = file_description
@@ -504,13 +505,99 @@ def _merge_kd(self, params: merge_defaults) -> oms.ConsensusMap:
     return consensus_map
+def _generate_feature_maps_from_samples(study):
+    """
+    Generate feature maps using Study-level features_df instead of Sample-level loading.
+    This uses the study's existing features_df which is already loaded.
+    Args:
+        study: Study object containing features_df
+    Returns:
+        list: List of temporary FeatureMap objects built from Study-level data
+    """
+    import pyopenms as oms
+    temp_feature_maps = []
+    study.logger.info(f"Building feature maps using Study-level features_df from {len(study.samples_df)} samples")
+    # Use the features_df from the study that's already loaded
+    if not hasattr(study, 'features_df') or study.features_df is None or study.features_df.is_empty():
+        study.logger.warning("No features_df available - features must be loaded first")
+        return temp_feature_maps
+    # Group features by sample
+    study.logger.info(f"Processing {len(study.features_df)} features grouped by sample")
+    # Get unique sample names/indices
+    if 'sample_uid' in study.features_df.columns:
+        sample_groups = study.features_df.group_by('sample_uid')
+        study.logger.debug("Grouping features by 'sample_uid' column")
+    elif 'sample_id' in study.features_df.columns:
+        sample_groups = study.features_df.group_by('sample_id')
+        study.logger.debug("Grouping features by 'sample_id' column")
+    elif 'sample' in study.features_df.columns:
+        sample_groups = study.features_df.group_by('sample')
+        study.logger.debug("Grouping features by 'sample' column")
+    else:
+        study.logger.warning("No sample grouping column found in features_df")
+        study.logger.info(f"Available columns: {study.features_df.columns}")
+        return temp_feature_maps
+    # Process each sample group
+    processed_samples = 0
+    for sample_key, sample_features in sample_groups:
+        try:
+            feature_map = oms.FeatureMap()
+            feature_count = 0
+            # Build features from this sample's features
+            for row in sample_features.iter_rows(named=True):
+                try:
+                    feature = oms.Feature()
+                    # Set feature properties
+                    if row.get("feature_id") is not None:
+                        feature.setUniqueId(int(row["feature_id"]))
+                    if row.get("mz") is not None:
+                        feature.setMZ(float(row["mz"]))
+                    if row.get("rt") is not None:
+                        feature.setRT(float(row["rt"]))
+                    if row.get("inty") is not None:
+                        feature.setIntensity(float(row["inty"]))
+                    if row.get("quality") is not None:
+                        feature.setOverallQuality(float(row["quality"]))
+                    if row.get("charge") is not None:
+                        feature.setCharge(int(row["charge"]))
+                    feature_map.push_back(feature)
+                    feature_count += 1
+                except (ValueError, TypeError) as e:
+                    study.logger.warning(f"Skipping feature in sample {sample_key} due to conversion error: {e}")
+                    continue
+            temp_feature_maps.append(feature_map)
+            processed_samples += 1
+            study.logger.debug(f"Built feature map for sample {sample_key} with {feature_count} features")
+        except Exception as e:
+            study.logger.warning(f"Failed to process sample group {sample_key}: {e}")
+            # Add empty feature map for failed samples to maintain sample order
+            temp_feature_maps.append(oms.FeatureMap())
+    study.logger.info(f"Generated {len(temp_feature_maps)} feature maps from {processed_samples} samples using Study-level features_df")
+    return temp_feature_maps
 def _generate_feature_maps_on_demand(study):
     """
-    Generate feature maps on-demand from study.features_df for merge operations.
+    Generate feature maps on-demand using Sample-level _load_ms1() for merge operations.
     Returns temporary feature maps that are not cached in the study.
     Args:
-        study: Study object containing features_df and samples_df
+        study: Study object containing samples
     Returns:
         list: List of temporary FeatureMap objects
@@ -519,6 +606,15 @@ def _generate_feature_maps_on_demand(study):
     import pyopenms as oms
     import numpy as np
+    # Check if we should use Sample-level loading instead of features_df
+    use_sample_loading = True  # Default to Sample-level loading as requested
+    # Use Sample-level loading if requested and samples_df is available
+    if use_sample_loading and hasattr(study, 'samples_df') and study.samples_df is not None and len(study.samples_df) > 0:
+        study.logger.debug("Building feature maps using Sample-level _load_ms1() instead of features_df")
+        return _generate_feature_maps_from_samples(study)
+    # Fallback to original features_df approach
     if study.features_df is None or len(study.features_df) == 0:
         study.logger.error("No features_df available for generating feature maps")
         return []
@@ -624,22 +720,22 @@ def _generate_feature_maps_on_demand(study):
     return temp_feature_maps
-def _merge_qt(self, params: merge_defaults) -> oms.ConsensusMap:
+def _merge_qt(study, params: merge_defaults) -> oms.ConsensusMap:
     """QT (Quality Threshold) based merge"""
     # Generate temporary feature maps on-demand from features_df
-    temp_feature_maps = _generate_feature_maps_on_demand(self)
+    temp_feature_maps = _generate_feature_maps_on_demand(study)
     n_samples = len(temp_feature_maps)
     if n_samples > 1000:
-        self.logger.warning(f"QT with {n_samples} samples may be slow [O(n²)]. Consider KD [O(n log n)]")
+        study.logger.warning(f"QT with {n_samples} samples may be slow [O(n²)]. Consider KD [O(n log n)]")
     consensus_map = oms.ConsensusMap()
     file_descriptions = consensus_map.getColumnHeaders()
     for i, feature_map in enumerate(temp_feature_maps):
         file_description = file_descriptions.get(i, oms.ColumnHeader())
-        file_description.filename = self.samples_df.row(i, named=True)["sample_name"]
+        file_description.filename = study.samples_df.row(i, named=True)["sample_name"]
         file_description.size = feature_map.size()
         file_description.unique_id = feature_map.getUniqueId()
         file_descriptions[i] = file_description
@@ -665,7 +761,7 @@ def _merge_qt(self, params: merge_defaults) -> oms.ConsensusMap:
     return consensus_map
-def _merge_kd_strict(self, params: merge_defaults) -> oms.ConsensusMap:
+def _merge_kd_strict(study, params: merge_defaults) -> oms.ConsensusMap:
     """
     Quality merge: Standard KD algorithm with post-processing quality control.
@@ -695,8 +791,8 @@ def _merge_kd_strict(self, params: merge_defaults) -> oms.ConsensusMap:
     if optimize_rt_tol:
         # Optimize RT tolerance first
-        optimal_rt_tol = _optimize_rt_tolerance(self, params)
-        self.logger.info(f"RT tolerance optimization: {params.rt_tol}s → {optimal_rt_tol}s")
+        optimal_rt_tol = _optimize_rt_tolerance(study, params)
+        study.logger.info(f"RT tolerance optimization: {params.rt_tol}s → {optimal_rt_tol}s")
         # Create modified params with optimal RT tolerance
         import copy
         optimized_params = copy.deepcopy(params)
@@ -705,22 +801,22 @@ def _merge_kd_strict(self, params: merge_defaults) -> oms.ConsensusMap:
         optimized_params = params
     # Phase 1: Standard KD clustering
-    self.logger.debug("Initial KD clustering")
-    consensus_map = _merge_kd(self, optimized_params)
+    study.logger.debug("Initial KD clustering")
+    consensus_map = _merge_kd(study, optimized_params)
     # Phase 2: Post-processing quality control
-    self.logger.debug("Post-processing quality control")
-    consensus_map = _apply_kd_strict_postprocessing(self, consensus_map, optimized_params)
+    study.logger.debug("Post-processing quality control")
+    consensus_map = _apply_kd_strict_postprocessing(study, consensus_map, optimized_params)
     return consensus_map
-def _optimize_rt_tolerance(self, params: merge_defaults) -> float:
+def _optimize_rt_tolerance(study, params: merge_defaults) -> float:
     """
     Optimize RT tolerance by testing different values and measuring oversegmentation.
     Args:
-        self: Study object
+        study: Study object
         params: Merge parameters
     Returns:
@@ -729,7 +825,7 @@ def _optimize_rt_tolerance(self, params: merge_defaults) -> float:
     rt_tol_range = getattr(params, 'rt_tol_range', (0.8, 2.0))
     rt_tol_steps = getattr(params, 'rt_tol_steps', 5)
-    self.logger.info(f"Optimizing RT tolerance in range {rt_tol_range} with {rt_tol_steps} steps")
+    study.logger.info(f"Optimizing RT tolerance in range {rt_tol_range} with {rt_tol_steps} steps")
     # Generate test values
     test_rt_tols = [rt_tol_range[0] + i * (rt_tol_range[1] - rt_tol_range[0]) / (rt_tol_steps - 1)
@@ -739,8 +835,8 @@ def _optimize_rt_tolerance(self, params: merge_defaults) -> float:
     best_score = float('inf')
     # Store original features for restoration
-    original_consensus_df = getattr(self, 'consensus_df', pl.DataFrame())
-    original_consensus_mapping_df = getattr(self, 'consensus_mapping_df', pl.DataFrame())
+    original_consensus_df = getattr(study, 'consensus_df', pl.DataFrame())
+    original_consensus_mapping_df = getattr(study, 'consensus_mapping_df', pl.DataFrame())
     for test_rt_tol in test_rt_tols:
         try:
@@ -750,18 +846,18 @@ def _optimize_rt_tolerance(self, params: merge_defaults) -> float:
             test_params.rt_tol = test_rt_tol
             # Run KD merge with test parameters
-            test_consensus_map = _merge_kd(self, test_params)
+            test_consensus_map = _merge_kd(study, test_params)
             # Extract consensus features temporarily for analysis
-            self._extract_consensus_features(test_consensus_map, test_params.min_samples)
+            _extract_consensus_features(study, test_consensus_map, test_params.min_samples)
-            if len(self.consensus_df) == 0:
+            if len(study.consensus_df) == 0:
                 continue
             # Calculate oversegmentation metrics
-            oversegmentation_score = _calculate_oversegmentation_score(self, test_rt_tol)
+            oversegmentation_score = _calculate_oversegmentation_score(study, test_rt_tol)
-            self.logger.debug(f"RT tol {test_rt_tol:.1f}s: {len(self.consensus_df)} features, score: {oversegmentation_score:.3f}")
+            study.logger.debug(f"RT tol {test_rt_tol:.1f}s: {len(study.consensus_df)} features, score: {oversegmentation_score:.3f}")
             # Lower score is better (less oversegmentation)
             if oversegmentation_score < best_score:
@@ -769,50 +865,50 @@ def _optimize_rt_tolerance(self, params: merge_defaults) -> float:
                 best_rt_tol = test_rt_tol
         except Exception as e:
-            self.logger.warning(f"RT tolerance optimization failed for {test_rt_tol}s: {e}")
+            study.logger.warning(f"RT tolerance optimization failed for {test_rt_tol}s: {e}")
             continue
     # Restore original consensus data
-    self.consensus_df = original_consensus_df
-    self.consensus_mapping_df = original_consensus_mapping_df
+    study.consensus_df = original_consensus_df
+    study.consensus_mapping_df = original_consensus_mapping_df
-    self.logger.info(f"Optimal RT tolerance: {best_rt_tol:.1f}s (score: {best_score:.3f})")
+    study.logger.info(f"Optimal RT tolerance: {best_rt_tol:.1f}s (score: {best_score:.3f})")
     return best_rt_tol
-def _calculate_oversegmentation_score(self, rt_tol: float) -> float:
+def _calculate_oversegmentation_score(study, rt_tol: float) -> float:
     """
     Calculate oversegmentation score based on feature density and RT spread metrics.
     Lower scores indicate less oversegmentation.
     Args:
-        self: Study object
+        study: Study object
         rt_tol: RT tolerance used
     Returns:
         Oversegmentation score (lower = better)
     """
-    if len(self.consensus_df) == 0:
+    if len(study.consensus_df) == 0:
         return float('inf')
     # Metric 1: Feature density (features per RT second)
-    rt_range = self.consensus_df['rt'].max() - self.consensus_df['rt'].min()
+    rt_range = study.consensus_df['rt'].max() - study.consensus_df['rt'].min()
     if rt_range <= 0:
         return float('inf')
-    feature_density = len(self.consensus_df) / rt_range
+    feature_density = len(study.consensus_df) / rt_range
     # Metric 2: Average RT spread relative to tolerance
-    rt_spreads = (self.consensus_df['rt_max'] - self.consensus_df['rt_min'])
+    rt_spreads = (study.consensus_df['rt_max'] - study.consensus_df['rt_min'])
     avg_rt_spread_ratio = rt_spreads.mean() / rt_tol if rt_tol > 0 else float('inf')
     # Metric 3: Proportion of features with low sample counts (indicates fragmentation)
-    low_sample_features = len(self.consensus_df.filter(pl.col('number_samples') <= 5))
-    low_sample_ratio = low_sample_features / len(self.consensus_df)
+    low_sample_features = len(study.consensus_df.filter(pl.col('number_samples') <= 5))
+    low_sample_ratio = low_sample_features / len(study.consensus_df)
     # Metric 4: Number of features with excessive RT spread
     excessive_spread_features = len(rt_spreads.filter(rt_spreads > rt_tol * 2))
-    excessive_spread_ratio = excessive_spread_features / len(self.consensus_df)
+    excessive_spread_ratio = excessive_spread_features / len(study.consensus_df)
     # Combined score (weighted combination)
     oversegmentation_score = (
@@ -825,7 +921,7 @@ def _calculate_oversegmentation_score(self, rt_tol: float) -> float:
     return oversegmentation_score
-def _apply_kd_strict_postprocessing(self, consensus_map: oms.ConsensusMap, params: merge_defaults) -> oms.ConsensusMap:
+def _apply_kd_strict_postprocessing(study, consensus_map: oms.ConsensusMap, params: merge_defaults) -> oms.ConsensusMap:
     """
     Apply post-processing quality control to KD consensus map.
@@ -837,20 +933,20 @@ def _apply_kd_strict_postprocessing(self, consensus_map: oms.ConsensusMap, param
         Processed consensus map with reduced oversegmentation
     """
     if consensus_map.size() == 0:
-        self.logger.warning("Empty consensus map provided to post-processing")
+        study.logger.warning("Empty consensus map provided to post-processing")
         return consensus_map
-    self.logger.debug(f"Post-processing {consensus_map.size()} initial consensus features")
+    study.logger.debug(f"Post-processing {consensus_map.size()} initial consensus features")
     # Step 1: Extract initial consensus features
     original_min_samples = params.min_samples
     params.min_samples = 1  # Extract all features initially
-    self._extract_consensus_features(consensus_map, params.min_samples)
-    initial_feature_count = len(self.consensus_df)
+    _extract_consensus_features(study, consensus_map, params.min_samples)
+    initial_feature_count = len(study.consensus_df)
     if initial_feature_count == 0:
-        self.logger.warning("No consensus features extracted for post-processing")
+        study.logger.warning("No consensus features extracted for post-processing")
         params.min_samples = original_min_samples
         return consensus_map
@@ -858,67 +954,67 @@ def _apply_kd_strict_postprocessing(self, consensus_map: oms.ConsensusMap, param
     secondary_merge_rt_tol = getattr(params, 'secondary_merge_rt_tol', 0.5)
     secondary_merge_mz_tol = getattr(params, 'secondary_merge_mz_tol', 0.005)
-    self.logger.debug(f"Secondary clustering with RT≤{secondary_merge_rt_tol}s, m/z≤{secondary_merge_mz_tol}")
-    merged_features = _perform_secondary_clustering(self, secondary_merge_rt_tol, secondary_merge_mz_tol)
+    study.logger.debug(f"Secondary clustering with RT≤{secondary_merge_rt_tol}s, m/z≤{secondary_merge_mz_tol}")
+    merged_features = _perform_secondary_clustering(study, secondary_merge_rt_tol, secondary_merge_mz_tol)
     # Step 3: Sample overlap validation
     min_sample_overlap = getattr(params, 'min_sample_overlap', 0.8)
     if min_sample_overlap > 0:
-        self.logger.debug(f"Sample overlap validation (threshold: {min_sample_overlap})")
-        merged_features = _validate_sample_overlap(self, merged_features, min_sample_overlap)
+        study.logger.debug(f"Sample overlap validation (threshold: {min_sample_overlap})")
+        merged_features = _validate_sample_overlap(study, merged_features, min_sample_overlap)
     # Step 4: RT spread quality filtering
     if params.rt_tol is not None:
         max_rt_spread = getattr(params, 'max_rt_spread', params.rt_tol * 2)
         if max_rt_spread is not None:
-            self.logger.debug(f"RT spread filtering (max: {max_rt_spread:.1f}s)")
-            merged_features = _filter_rt_spread(self, merged_features, max_rt_spread)
+            study.logger.debug(f"RT spread filtering (max: {max_rt_spread:.1f}s)")
+            merged_features = _filter_rt_spread(study, merged_features, max_rt_spread)
         else:
-            self.logger.debug("Skipping RT spread filtering - max_rt_spread is None")
+            study.logger.debug("Skipping RT spread filtering - max_rt_spread is None")
     else:
-        self.logger.debug("Skipping RT spread filtering - rt_tol is None")
+        study.logger.debug("Skipping RT spread filtering - rt_tol is None")
     # Step 5: Chromatographic coherence filtering (optional)
     min_coherence = getattr(params, 'min_coherence', 0.0)
     if min_coherence > 0:
-        self.logger.debug(f"Chromatographic coherence filtering (min: {min_coherence})")
-        merged_features = _filter_coherence(self, merged_features, min_coherence)
+        study.logger.debug(f"Chromatographic coherence filtering (min: {min_coherence})")
+        merged_features = _filter_coherence(study, merged_features, min_coherence)
     # Step 6: Rebuild consensus_df with filtered features and preserve mapping
-    original_mapping_df = self.consensus_mapping_df.clone()  # Save original mapping
-    self.consensus_df = pl.DataFrame(merged_features, strict=False)
+    original_mapping_df = study.consensus_mapping_df.clone()  # Save original mapping
+    study.consensus_df = pl.DataFrame(merged_features, strict=False)
     # Step 7: Apply original min_samples filter
     params.min_samples = original_min_samples
     if params.min_samples > 1:
-        l1 = len(self.consensus_df)
-        self.consensus_df = self.consensus_df.filter(
+        l1 = len(study.consensus_df)
+        study.consensus_df = study.consensus_df.filter(
             pl.col("number_samples") >= params.min_samples
         )
-        filtered_count = l1 - len(self.consensus_df)
+        filtered_count = l1 - len(study.consensus_df)
         if filtered_count > 0:
-            self.logger.debug(f"Filtered {filtered_count} features below min_samples threshold ({params.min_samples})")
+            study.logger.debug(f"Filtered {filtered_count} features below min_samples threshold ({params.min_samples})")
     # Step 8: Update consensus_mapping_df to match final consensus_df
-    if len(self.consensus_df) > 0 and len(original_mapping_df) > 0:
-        valid_consensus_ids = set(self.consensus_df['consensus_uid'].to_list())
-        self.consensus_mapping_df = original_mapping_df.filter(
+    if len(study.consensus_df) > 0 and len(original_mapping_df) > 0:
+        valid_consensus_ids = set(study.consensus_df['consensus_uid'].to_list())
+        study.consensus_mapping_df = original_mapping_df.filter(
             pl.col('consensus_uid').is_in(list(valid_consensus_ids))
         )
     else:
-        self.consensus_mapping_df = pl.DataFrame()
+        study.consensus_mapping_df = pl.DataFrame()
-    final_feature_count = len(self.consensus_df)
+    final_feature_count = len(study.consensus_df)
     reduction_pct = ((initial_feature_count - final_feature_count) / initial_feature_count * 100) if initial_feature_count > 0 else 0
-    self.logger.info(f"Consensus cleanup complete: {initial_feature_count} → {final_feature_count} features ({reduction_pct:.1f}% reduction)")
+    study.logger.info(f"Consensus cleanup complete: {initial_feature_count} → {final_feature_count} features ({reduction_pct:.1f}% reduction)")
     # Create a new consensus map for compatibility (the processed data is in consensus_df)
     processed_consensus_map = oms.ConsensusMap()
     return processed_consensus_map
-def _perform_secondary_clustering(self, rt_tol: float, mz_tol: float) -> list:
+def _perform_secondary_clustering(study, rt_tol: float, mz_tol: float) -> list:
     """
     Perform secondary clustering to merge very close features.
@@ -929,34 +1025,34 @@ def _perform_secondary_clustering(self, rt_tol: float, mz_tol: float) -> list:
     Returns:
         List of merged consensus feature dictionaries
     """
-    if len(self.consensus_df) == 0:
+    if len(study.consensus_df) == 0:
         return []
     # Convert consensus_df to list of dictionaries for clustering
     consensus_features = []
-    for i, row in enumerate(self.consensus_df.iter_rows(named=True)):
+    for i, row in enumerate(study.consensus_df.iter_rows(named=True)):
         consensus_features.append(dict(row))
     # Use Union-Find for efficient clustering
     class UnionFind:
-        def __init__(self, n):
-            self.parent = list(range(n))
-            self.rank = [0] * n
+        def __init__(study, n):
+            study.parent = list(range(n))
+            study.rank = [0] * n
-        def find(self, x):
-            if self.parent[x] != x:
-                self.parent[x] = self.find(self.parent[x])
-            return self.parent[x]
+        def find(study, x):
+            if study.parent[x] != x:
+                study.parent[x] = study.find(study.parent[x])
+            return study.parent[x]
-        def union(self, x, y):
-            px, py = self.find(x), self.find(y)
+        def union(study, x, y):
+            px, py = study.find(x), study.find(y)
             if px == py:
                 return
-            if self.rank[px] < self.rank[py]:
+            if study.rank[px] < study.rank[py]:
                 px, py = py, px
-            self.parent[py] = px
-            if self.rank[px] == self.rank[py]:
-                self.rank[px] += 1
+            study.parent[py] = px
+            if study.rank[px] == study.rank[py]:
+                study.rank[px] += 1
     n_features = len(consensus_features)
     uf = UnionFind(n_features)
@@ -992,7 +1088,7 @@ def _perform_secondary_clustering(self, rt_tol: float, mz_tol: float) -> list:
             merged_feature = _merge_feature_group(group)
             merged_features.append(merged_feature)
-    self.logger.debug(f"Secondary clustering: {n_features} → {len(merged_features)} features ({n_features - len(merged_features)} merged)")
+    study.logger.debug(f"Secondary clustering: {n_features} → {len(merged_features)} features ({n_features - len(merged_features)} merged)")
     return merged_features
@@ -1066,7 +1162,7 @@ def _merge_feature_group(feature_group: list) -> dict:
     return merged
-def _validate_sample_overlap(self, features: list, min_overlap: float) -> list:
+def _validate_sample_overlap(study, features: list, min_overlap: float) -> list:
     """
     Validate that merged features have sufficient sample overlap.
@@ -1097,7 +1193,7 @@ def _validate_sample_overlap(self, features: list, min_overlap: float) -> list:
     return validated_features
-def _filter_rt_spread(self, features: list, max_rt_spread: float) -> list:
+def _filter_rt_spread(study, features: list, max_rt_spread: float) -> list:
     """
     Filter out features with excessive RT spread.
@@ -1122,12 +1218,12 @@ def _filter_rt_spread(self, features: list, max_rt_spread: float) -> list:
             filtered_count += 1
     if filtered_count > 0:
-        self.logger.debug(f"Filtered {filtered_count} features with excessive RT spread (>{max_rt_spread:.1f}s)")
+        study.logger.debug(f"Filtered {filtered_count} features with excessive RT spread (>{max_rt_spread:.1f}s)")
     return filtered_features
-def _filter_coherence(self, features: list, min_coherence: float) -> list:
+def _filter_coherence(study, features: list, min_coherence: float) -> list:
     """
     Filter out features with low chromatographic coherence.
@@ -1150,23 +1246,23 @@ def _filter_coherence(self, features: list, min_coherence: float) -> list:
             filtered_count += 1
     if filtered_count > 0:
-        self.logger.debug(f"Filtered {filtered_count} features with low coherence (<{min_coherence})")
+        study.logger.debug(f"Filtered {filtered_count} features with low coherence (<{min_coherence})")
     return filtered_features
-def _merge_kd_nowarp(self, params: merge_defaults) -> oms.ConsensusMap:
+def _merge_kd_nowarp(study, params: merge_defaults) -> oms.ConsensusMap:
     """KD-tree based merge without RT warping"""
     # Generate temporary feature maps on-demand from features_df
-    temp_feature_maps = _generate_feature_maps_on_demand(self)
+    temp_feature_maps = _generate_feature_maps_on_demand(study)
     consensus_map = oms.ConsensusMap()
     file_descriptions = consensus_map.getColumnHeaders()
     for i, feature_map in enumerate(temp_feature_maps):
         file_description = file_descriptions.get(i, oms.ColumnHeader())
-        file_description.filename = self.samples_df.row(i, named=True)["sample_name"]
+        file_description.filename = study.samples_df.row(i, named=True)["sample_name"]
         file_description.size = feature_map.size()
         file_description.unique_id = feature_map.getUniqueId()
         file_descriptions[i] = file_description
@@ -1193,18 +1289,18 @@ def _merge_kd_nowarp(self, params: merge_defaults) -> oms.ConsensusMap:
     return consensus_map
-def _merge_kd_chunked(self, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> oms.ConsensusMap:
+def _merge_kd_chunked(study, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> oms.ConsensusMap:
     """KD-based chunked merge with proper cross-chunk consensus building and optional parallel processing"""
     # Generate temporary feature maps on-demand from features_df
-    temp_feature_maps = _generate_feature_maps_on_demand(self)
+    temp_feature_maps = _generate_feature_maps_on_demand(study)
     n_samples = len(temp_feature_maps)
     if n_samples <= params.chunk_size:
-        self.logger.info(f"Dataset size ({n_samples}) ≤ chunk_size, using KD merge")
-        consensus_map = _merge_kd(self, params)
+        study.logger.info(f"Dataset size ({n_samples}) ≤ chunk_size, using KD merge")
+        consensus_map = _merge_kd(study, params)
         # Extract consensus features to populate consensus_df for chunked method consistency
-        self._extract_consensus_features(consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
+        _extract_consensus_features(study, consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
         return consensus_map
     # Process in chunks
@@ -1213,21 +1309,21 @@ def _merge_kd_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
         chunk_end = min(i + params.chunk_size, n_samples)
         chunks.append((i, temp_feature_maps[i:chunk_end]))
-    self.logger.debug(f"Processing {len(chunks)} chunks of max {params.chunk_size} samples using {params.threads or 'sequential'} thread(s)")
+    study.logger.debug(f"Processing {len(chunks)} chunks of max {params.chunk_size} samples using {params.threads or 'sequential'} thread(s)")
     # Process each chunk to create chunk consensus maps
     chunk_consensus_maps = []
     if params.threads is None:
         # Sequential processing (original behavior)
-        for chunk_idx, (chunk_start_idx, chunk_maps) in enumerate(tqdm(chunks, desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}KD Chunk", disable=self.log_level not in ["TRACE", "DEBUG", "INFO"])):
+        for chunk_idx, (chunk_start_idx, chunk_maps) in enumerate(tqdm(chunks, desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {study.log_label}KD Chunk", disable=study.log_level not in ["TRACE", "DEBUG", "INFO"])):
             chunk_consensus_map = oms.ConsensusMap()
             # Set up file descriptions for chunk
             file_descriptions = chunk_consensus_map.getColumnHeaders()
             for j, feature_map in enumerate(chunk_maps):
                 file_description = file_descriptions.get(j, oms.ColumnHeader())
-                file_description.filename = self.samples_df.row(chunk_start_idx + j, named=True)["sample_name"]
+                file_description.filename = study.samples_df.row(chunk_start_idx + j, named=True)["sample_name"]
                 file_description.size = feature_map.size()
                 file_description.unique_id = feature_map.getUniqueId()
                 file_descriptions[j] = file_description
@@ -1255,7 +1351,7 @@ def _merge_kd_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
     else:
         # Parallel processing
-        self.logger.info(f"Processing chunks in parallel using {params.threads} processes")
+        study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
         # Prepare chunk data for parallel processing using features_df slices
         chunk_data_list = []
@@ -1264,7 +1360,7 @@ def _merge_kd_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
             chunk_sample_uids = []
             chunk_samples_df_rows = []
             for j in range(len(chunk_maps)):
-                sample_row = self.samples_df.row(chunk_start_idx + j, named=True)
+                sample_row = study.samples_df.row(chunk_start_idx + j, named=True)
                 chunk_sample_uids.append(sample_row['sample_uid'])
                 chunk_samples_df_rows.append(sample_row)
@@ -1272,7 +1368,7 @@ def _merge_kd_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
             chunk_samples_df = pl.DataFrame(chunk_samples_df_rows)
             # Filter features_df for this chunk's samples and select only necessary columns
-            chunk_features_df = self.features_df.filter(
+            chunk_features_df = study.features_df.filter(
                 pl.col('sample_uid').is_in(chunk_sample_uids)
             ).select([
                 'sample_uid', 'rt', 'mz', 'inty', 'charge', 'feature_id'
@@ -1316,22 +1412,22 @@ def _merge_kd_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
                         serialized_chunk_results.append((chunk_start_idx, consensus_features))
                         completed_chunks += 1
                         n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
-                        self.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
+                        study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
                     except Exception as exc:
                         # Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
                         if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
                             # Convert to RuntimeError so outer except block can catch it for fallback
                             raise RuntimeError(f"Windows multiprocessing failure: {exc}")
                         else:
-                            self.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
+                            study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
                             raise exc
         except (RuntimeError, OSError, BrokenProcessPool) as e:
             # Handle Windows multiprocessing issues - fallback to ThreadPoolExecutor
             if ("freeze_support" in str(e) or "spawn" in str(e) or "bootstrapping" in str(e) or
                 "process pool" in str(e).lower() or "Windows multiprocessing failure" in str(e)):
-                self.logger.warning(f"ProcessPoolExecutor failed (likely Windows multiprocessing issue): {e}")
-                self.logger.info(f"Falling back to ThreadPoolExecutor with {params.threads} threads")
+                study.logger.warning(f"ProcessPoolExecutor failed (likely Windows multiprocessing issue): {e}")
+                study.logger.info(f"Falling back to ThreadPoolExecutor with {params.threads} threads")
                 with ThreadPoolExecutor(max_workers=params.threads) as executor:
                     # Submit all chunk processing tasks
@@ -1350,9 +1446,9 @@ def _merge_kd_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
                             serialized_chunk_results.append((chunk_start_idx, consensus_features))
                             completed_chunks += 1
                             n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
-                            self.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
+                            study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
                         except Exception as exc:
-                            self.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
+                            study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
                             raise exc
             else:
                 # Re-raise other exceptions
@@ -1366,25 +1462,25 @@ def _merge_kd_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
     # Merge chunk results with proper cross-chunk consensus building
     # _merge_chunk_results now handles both ConsensusMap objects (sequential) and serialized data (parallel)
-    _merge_chunk_results(self, chunk_consensus_maps, params, cached_adducts_df, cached_valid_adducts)
+    _merge_chunk_results(study, chunk_consensus_maps, params, cached_adducts_df, cached_valid_adducts)
-    # Return a dummy consensus map for compatibility (consensus features are stored in self.consensus_df)
+    # Return a dummy consensus map for compatibility (consensus features are stored in study.consensus_df)
     consensus_map = oms.ConsensusMap()
     return consensus_map
-def _merge_qt_chunked(self, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> oms.ConsensusMap:
+def _merge_qt_chunked(study, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> oms.ConsensusMap:
     """QT-based chunked merge with proper cross-chunk consensus building and optional parallel processing"""
     # Generate temporary feature maps on-demand from features_df
-    temp_feature_maps = _generate_feature_maps_on_demand(self)
+    temp_feature_maps = _generate_feature_maps_on_demand(study)
     n_samples = len(temp_feature_maps)
     if n_samples <= params.chunk_size:
-        self.logger.info(f"Dataset size ({n_samples}) ≤ chunk_size, using QT merge")
-        consensus_map = _merge_qt(self, params)
+        study.logger.info(f"Dataset size ({n_samples}) ≤ chunk_size, using QT merge")
+        consensus_map = _merge_qt(study, params)
         # Extract consensus features to populate consensus_df for chunked method consistency
-        self._extract_consensus_features(consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
+        _extract_consensus_features(study, consensus_map, params.min_samples, cached_adducts_df, cached_valid_adducts)
         return consensus_map
     # Process in chunks
@@ -1393,21 +1489,21 @@ def _merge_qt_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
         chunk_end = min(i + params.chunk_size, n_samples)
         chunks.append((i, temp_feature_maps[i:chunk_end]))
-    self.logger.debug(f"Processing {len(chunks)} chunks of max {params.chunk_size} samples using {params.threads or 'sequential'} thread(s)")
+    study.logger.debug(f"Processing {len(chunks)} chunks of max {params.chunk_size} samples using {params.threads or 'sequential'} thread(s)")
     # Process each chunk to create chunk consensus maps
     chunk_consensus_maps = []
     if params.threads is None:
         # Sequential processing (original behavior)
-        for chunk_idx, (chunk_start_idx, chunk_maps) in enumerate(tqdm(chunks, desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}QT Chunk", disable=self.log_level not in ["TRACE", "DEBUG", "INFO"])):
+        for chunk_idx, (chunk_start_idx, chunk_maps) in enumerate(tqdm(chunks, desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {study.log_label}QT Chunk", disable=study.log_level not in ["TRACE", "DEBUG", "INFO"])):
             chunk_consensus_map = oms.ConsensusMap()
             # Set up file descriptions for chunk
             file_descriptions = chunk_consensus_map.getColumnHeaders()
             for j, feature_map in enumerate(chunk_maps):
                 file_description = file_descriptions.get(j, oms.ColumnHeader())
-                file_description.filename = self.samples_df.row(chunk_start_idx + j, named=True)["sample_name"]
+                file_description.filename = study.samples_df.row(chunk_start_idx + j, named=True)["sample_name"]
                 file_description.size = feature_map.size()
                 file_description.unique_id = feature_map.getUniqueId()
                 file_descriptions[j] = file_description
@@ -1430,7 +1526,7 @@ def _merge_qt_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
     else:
         # Parallel processing
-        self.logger.info(f"Processing chunks in parallel using {params.threads} processes")
+        study.logger.info(f"Processing chunks in parallel using {params.threads} processes")
         # Prepare chunk data for parallel processing using features_df slices
         chunk_data_list = []
@@ -1439,7 +1535,7 @@ def _merge_qt_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
             chunk_sample_uids = []
             chunk_samples_df_rows = []
             for j in range(len(chunk_maps)):
-                sample_row = self.samples_df.row(chunk_start_idx + j, named=True)
+                sample_row = study.samples_df.row(chunk_start_idx + j, named=True)
                 chunk_sample_uids.append(sample_row['sample_uid'])
                 chunk_samples_df_rows.append(sample_row)
@@ -1447,7 +1543,7 @@ def _merge_qt_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
             chunk_samples_df = pl.DataFrame(chunk_samples_df_rows)
             # Filter features_df for this chunk's samples and select only necessary columns
-            chunk_features_df = self.features_df.filter(
+            chunk_features_df = study.features_df.filter(
                 pl.col('sample_uid').is_in(chunk_sample_uids)
             ).select([
                 'sample_uid', 'rt', 'mz', 'inty', 'charge', 'feature_id'
@@ -1491,22 +1587,22 @@ def _merge_qt_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
                         serialized_chunk_results.append((chunk_start_idx, consensus_features))
                         completed_chunks += 1
                         n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
-                        self.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
+                        study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
                     except Exception as exc:
                         # Check if this is a BrokenProcessPool exception from Windows multiprocessing issues
                         if isinstance(exc, BrokenProcessPool) or "process pool" in str(exc).lower():
                             # Convert to RuntimeError so outer except block can catch it for fallback
                             raise RuntimeError(f"Windows multiprocessing failure: {exc}")
                         else:
-                            self.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
+                            study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
                             raise exc
         except (RuntimeError, OSError, BrokenProcessPool) as e:
             # Handle Windows multiprocessing issues - fallback to ThreadPoolExecutor
             if ("freeze_support" in str(e) or "spawn" in str(e) or "bootstrapping" in str(e) or
                 "process pool" in str(e).lower() or "Windows multiprocessing failure" in str(e)):
-                self.logger.warning(f"ProcessPoolExecutor failed (likely Windows multiprocessing issue): {e}")
-                self.logger.info(f"Falling back to ThreadPoolExecutor with {params.threads} threads")
+                study.logger.warning(f"ProcessPoolExecutor failed (likely Windows multiprocessing issue): {e}")
+                study.logger.info(f"Falling back to ThreadPoolExecutor with {params.threads} threads")
                 with ThreadPoolExecutor(max_workers=params.threads) as executor:
                     # Submit all chunk processing tasks
@@ -1525,9 +1621,9 @@ def _merge_qt_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
                             serialized_chunk_results.append((chunk_start_idx, consensus_features))
                             completed_chunks += 1
                             n_samples_in_chunk = len(chunk_data_list[chunk_idx]['chunk_samples_data'])
-                            self.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
+                            study.logger.info(f"Completed chunk {completed_chunks}/{total_chunks} (samples {chunk_start_idx + 1}-{chunk_start_idx + n_samples_in_chunk})")
                         except Exception as exc:
-                            self.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
+                            study.logger.error(f"Chunk {chunk_idx} generated an exception: {exc}")
                             raise exc
             else:
                 # Re-raise other exceptions
@@ -1541,14 +1637,14 @@ def _merge_qt_chunked(self, params: merge_defaults, cached_adducts_df=None, cach
     # Merge chunk results with proper cross-chunk consensus building
     # _merge_chunk_results now handles both ConsensusMap objects (sequential) and serialized data (parallel)
-    _merge_chunk_results(self, chunk_consensus_maps, params, cached_adducts_df, cached_valid_adducts)
+    _merge_chunk_results(study, chunk_consensus_maps, params, cached_adducts_df, cached_valid_adducts)
-    # Return a dummy consensus map for compatibility (consensus features are stored in self.consensus_df)
+    # Return a dummy consensus map for compatibility (consensus features are stored in study.consensus_df)
     consensus_map = oms.ConsensusMap()
     return consensus_map
-def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> None:
+def _merge_chunk_results(study, chunk_consensus_maps: list, params: merge_defaults, cached_adducts_df=None, cached_valid_adducts=None) -> None:
     """
     Scalable aggregation of chunk consensus maps into final consensus_df.
@@ -1561,7 +1657,8 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
     if len(chunk_consensus_maps) == 1:
         # Single chunk case - just extract using the true global min_samples.
         # No need for permissive threshold because we are not discarding singletons pre-aggregation.
-        self._extract_consensus_features(
+        _extract_consensus_features(
+            study,
             chunk_consensus_maps[0][1],
             params.min_samples,
             cached_adducts_df,
@@ -1572,10 +1669,10 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
     # Build feature_uid to feature_data lookup for fast access
     feature_uid_map = {
         row["feature_id"]: row["feature_uid"]
-        for row in self.features_df.iter_rows(named=True)
+        for row in study.features_df.iter_rows(named=True)
     }
-    features_lookup = _optimized_feature_lookup(self, self.features_df)
+    features_lookup = _optimized_feature_lookup(study, study.features_df)
     # Extract all consensus features from chunks with their feature_uids
     all_chunk_consensus = []
@@ -1717,8 +1814,8 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
     if not all_chunk_consensus:
         # No valid consensus features found
-        self.consensus_df = pl.DataFrame()
-        self.consensus_mapping_df = pl.DataFrame()
+        study.consensus_df = pl.DataFrame()
+        study.consensus_mapping_df = pl.DataFrame()
         return
     # Perform cross-chunk clustering using optimized spatial indexing
@@ -1744,22 +1841,22 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
             features_by_bin[(rt_bin, mz_bin)].append(i)
         class UF:
-            def __init__(self, n):
-                self.p = list(range(n))
-                self.r = [0]*n
-            def find(self, x):
-                if self.p[x] != x:
-                    self.p[x] = self.find(self.p[x])
-                return self.p[x]
-            def union(self, a,b):
-                pa, pb = self.find(a), self.find(b)
+            def __init__(study, n):
+                study.p = list(range(n))
+                study.r = [0]*n
+            def find(study, x):
+                if study.p[x] != x:
+                    study.p[x] = study.find(study.p[x])
+                return study.p[x]
+            def union(study, a,b):
+                pa, pb = study.find(a), study.find(b)
                 if pa == pb:
                     return
-                if self.r[pa] < self.r[pb]:
+                if study.r[pa] < study.r[pb]:
                     pa, pb = pb, pa
-                self.p[pb] = pa
-                if self.r[pa] == self.r[pb]:
-                    self.r[pa] += 1
+                study.p[pb] = pa
+                if study.r[pa] == study.r[pb]:
+                    study.r[pa] += 1
         uf = UF(n_features)
         checked = set()
@@ -1918,7 +2015,7 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
         # This allows proper cross-chunk consensus building before final filtering
         metadata = _calculate_consensus_statistics(
-            self,
+            study,
             consensus_uid_counter,
             list(feature_data_acc.values()),
             rt_values_chunk,
@@ -1937,7 +2034,7 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
         if rt_spread > max_allowed_spread:
             # Skip consensus features with excessive RT spread
-            self.logger.debug(f"Skipping consensus feature {consensus_uid_counter} with RT spread {rt_spread:.3f}s > {max_allowed_spread:.3f}s")
+            study.logger.debug(f"Skipping consensus feature {consensus_uid_counter} with RT spread {rt_spread:.3f}s > {max_allowed_spread:.3f}s")
             consensus_uid_counter += 1
             continue
@@ -1969,27 +2066,27 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
         consensus_uid_counter += 1
     # Assign DataFrames
-    self.consensus_df = pl.DataFrame(consensus_metadata, strict=False)
-    self.consensus_mapping_df = pl.DataFrame(consensus_mapping_list, strict=False)
+    study.consensus_df = pl.DataFrame(consensus_metadata, strict=False)
+    study.consensus_mapping_df = pl.DataFrame(consensus_mapping_list, strict=False)
     # Ensure mapping only contains features from retained consensus_df
-    if len(self.consensus_df) > 0:
-        valid_consensus_ids = set(self.consensus_df['consensus_uid'].to_list())
-        self.consensus_mapping_df = self.consensus_mapping_df.filter(
+    if len(study.consensus_df) > 0:
+        valid_consensus_ids = set(study.consensus_df['consensus_uid'].to_list())
+        study.consensus_mapping_df = study.consensus_mapping_df.filter(
             pl.col('consensus_uid').is_in(list(valid_consensus_ids))
         )
     else:
-        self.consensus_mapping_df = pl.DataFrame()
+        study.consensus_mapping_df = pl.DataFrame()
     # Attach empty consensus_map placeholder for downstream compatibility
-    self.consensus_map = oms.ConsensusMap()
+    study.consensus_map = oms.ConsensusMap()
     return
 def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_list: list,
                                   rt_values: list, mz_values: list,
                                   intensity_values: list, quality_values: list,
-                                  number_features: int = None, number_samples: int = None,
+                                  number_features: int | None = None, number_samples: int | None = None,
                                   cached_adducts_df=None, cached_valid_adducts=None) -> dict:
     """
     Calculate comprehensive statistics for a consensus feature from aggregated feature data.
@@ -2158,24 +2255,24 @@ def _cluster_consensus_features(features: list, rt_tol: float, mz_tol: float) ->
     # Use Union-Find for efficient clustering
     class UnionFind:
-        def __init__(self, n):
-            self.parent = list(range(n))
-            self.rank = [0] * n
+        def __init__(study, n):
+            study.parent = list(range(n))
+            study.rank = [0] * n
-        def find(self, x):
-            if self.parent[x] != x:
-                self.parent[x] = self.find(self.parent[x])
-            return self.parent[x]
+        def find(study, x):
+            if study.parent[x] != x:
+                study.parent[x] = study.find(study.parent[x])
+            return study.parent[x]
-        def union(self, x, y):
-            px, py = self.find(x), self.find(y)
+        def union(study, x, y):
+            px, py = study.find(x), study.find(y)
             if px == py:
                 return
-            if self.rank[px] < self.rank[py]:
+            if study.rank[px] < study.rank[py]:
                 px, py = py, px
-            self.parent[py] = px
-            if self.rank[px] == self.rank[py]:
-                self.rank[px] += 1
+            study.parent[py] = px
+            if study.rank[px] == study.rank[py]:
+                study.rank[px] += 1
     n_features = len(features)
     uf = UnionFind(n_features)
@@ -2208,39 +2305,39 @@ def _cluster_consensus_features(features: list, rt_tol: float, mz_tol: float) ->
     return list(groups_by_root.values())
-def _reset_consensus_data(self):
+def _reset_consensus_data(study):
     """Reset consensus-related DataFrames at the start of merge."""
-    self.consensus_df = pl.DataFrame()
-    self.consensus_ms2 = pl.DataFrame()
-    self.consensus_mapping_df = pl.DataFrame()
+    study.consensus_df = pl.DataFrame()
+    study.consensus_ms2 = pl.DataFrame()
+    study.consensus_mapping_df = pl.DataFrame()
-def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts_df=None, cached_valid_adducts=None):
+def _extract_consensus_features(study, consensus_map, min_samples, cached_adducts_df=None, cached_valid_adducts=None):
     """Extract consensus features and build metadata."""
-    # create a dict to map uid to feature_uid using self.features_df
+    # create a dict to map uid to feature_uid using study.features_df
     feature_uid_map = {
         row["feature_id"]: row["feature_uid"]
-        for row in self.features_df.iter_rows(named=True)
+        for row in study.features_df.iter_rows(named=True)
     }
     imax = consensus_map.size()
-    self.logger.debug(f"Found {imax} feature groups by clustering.")
+    study.logger.debug(f"Found {imax} feature groups by clustering.")
     # Pre-build fast lookup tables for features_df data using optimized approach
-    features_lookup = _optimized_feature_lookup(self, self.features_df)
+    features_lookup = _optimized_feature_lookup(study, study.features_df)
     # create a list to store the consensus mapping
     consensus_mapping = []
     metadata_list = []
-    tqdm_disable = self.log_level not in ["TRACE", "DEBUG"]
+    tqdm_disable = study.log_level not in ["TRACE", "DEBUG"]
     for i, feature in enumerate(
         tqdm(
             consensus_map,
             total=imax,
             disable=tqdm_disable,
-            desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Extract metadata",
+            desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {study.log_label}Extract metadata",
         ),
     ):
         # get all features in the feature map with the same unique id as the consensus feature
@@ -2486,7 +2583,7 @@ def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts
                         adduct_mass_shift_top = 1.007825
         else:
             # No valid adducts found - assign default based on study polarity
-            study_polarity = getattr(self, "polarity", "positive")
+            study_polarity = getattr(study, "polarity", "positive")
             if study_polarity in ["negative", "neg"]:
                 # Negative mode default
                 adduct_top = "[M-?]1-"
@@ -2618,55 +2715,55 @@ def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts
         )
     consensus_mapping_df = pl.DataFrame(consensus_mapping)
-    # remove all rows in consensus_mapping_df where consensus_id is not in self.featured_df['uid']
+    # remove all rows in consensus_mapping_df where consensus_id is not in study.featured_df['uid']
     l1 = len(consensus_mapping_df)
     consensus_mapping_df = consensus_mapping_df.filter(
-        pl.col("feature_uid").is_in(self.features_df["feature_uid"].to_list()),
+        pl.col("feature_uid").is_in(study.features_df["feature_uid"].to_list()),
     )
-    self.logger.debug(
+    study.logger.debug(
         f"Filtered {l1 - len(consensus_mapping_df)} orphan features from maps.",
     )
-    self.consensus_mapping_df = consensus_mapping_df
-    self.consensus_df = pl.DataFrame(metadata_list, strict=False)
+    study.consensus_mapping_df = consensus_mapping_df
+    study.consensus_df = pl.DataFrame(metadata_list, strict=False)
     if min_samples is None:
         min_samples = 1
     if min_samples < 1:
-        min_samples = int(min_samples * len(self.samples_df))
+        min_samples = int(min_samples * len(study.samples_df))
     # Validate that min_samples doesn't exceed the number of samples
-    if min_samples > len(self.samples_df):
-        self.logger.warning(
-            f"min_samples ({min_samples}) exceeds the number of samples ({len(self.samples_df)}). "
-            f"Setting min_samples to {len(self.samples_df)}.",
+    if min_samples > len(study.samples_df):
+        study.logger.warning(
+            f"min_samples ({min_samples}) exceeds the number of samples ({len(study.samples_df)}). "
+            f"Setting min_samples to {len(study.samples_df)}.",
         )
-        min_samples = len(self.samples_df)
+        min_samples = len(study.samples_df)
     # filter out consensus features with less than min_samples features
-    l1 = len(self.consensus_df)
-    self.consensus_df = self.consensus_df.filter(
+    l1 = len(study.consensus_df)
+    study.consensus_df = study.consensus_df.filter(
         pl.col("number_samples") >= min_samples,
     )
-    self.logger.debug(
-        f"Filtered {l1 - len(self.consensus_df)} consensus features with less than {min_samples} samples.",
+    study.logger.debug(
+        f"Filtered {l1 - len(study.consensus_df)} consensus features with less than {min_samples} samples.",
     )
     # filter out consensus mapping with less than min_samples features
-    self.consensus_mapping_df = self.consensus_mapping_df.filter(
-        pl.col("consensus_uid").is_in(self.consensus_df["consensus_uid"].to_list()),
+    study.consensus_mapping_df = study.consensus_mapping_df.filter(
+        pl.col("consensus_uid").is_in(study.consensus_df["consensus_uid"].to_list()),
     )
-    self.consensus_map = consensus_map
+    study.consensus_map = consensus_map
-def _perform_adduct_grouping(self, rt_tol, mz_tol):
+def _perform_adduct_grouping(study, rt_tol, mz_tol):
     """Perform adduct grouping on consensus features."""
     import polars as pl
     # Add adduct grouping and adduct_of assignment
-    if len(self.consensus_df) > 0:
+    if len(study.consensus_df) > 0:
         # Get relevant columns for grouping
         consensus_data = []
-        for row in self.consensus_df.iter_rows(named=True):
+        for row in study.consensus_df.iter_rows(named=True):
             consensus_data.append(
                 {
                     "consensus_uid": row["consensus_uid"],
@@ -2679,11 +2776,11 @@ def _perform_adduct_grouping(self, rt_tol, mz_tol):
         # Use optimized adduct grouping
         adduct_group_list, adduct_of_list = _optimized_adduct_grouping(
-            self, consensus_data, rt_tol, mz_tol
+            study, consensus_data, rt_tol, mz_tol
         )
         # Add the new columns to consensus_df
-        self.consensus_df = self.consensus_df.with_columns(
+        study.consensus_df = study.consensus_df.with_columns(
             [
                 pl.Series("adduct_group", adduct_group_list, dtype=pl.Int64),
                 pl.Series("adduct_of", adduct_of_list, dtype=pl.Int64),
@@ -2691,7 +2788,7 @@ def _perform_adduct_grouping(self, rt_tol, mz_tol):
         )
-def _count_tight_clusters(self, mz_tol: float = 0.04, rt_tol: float = 0.3) -> int:
+def _count_tight_clusters(study, mz_tol: float = 0.04, rt_tol: float = 0.3) -> int:
     """
     Count consensus features grouped in tight clusters.
@@ -2702,12 +2799,12 @@ def _count_tight_clusters(self, mz_tol: float = 0.04, rt_tol: float = 0.3) -> in
     Returns:
         Number of tight clusters found
     """
-    if len(self.consensus_df) < 2:
+    if len(study.consensus_df) < 2:
         return 0
     # Extract consensus feature data
     consensus_data = []
-    for row in self.consensus_df.iter_rows(named=True):
+    for row in study.consensus_df.iter_rows(named=True):
         consensus_data.append({
             'consensus_uid': row['consensus_uid'],
             'mz': row['mz'],
@@ -2768,7 +2865,7 @@ def _count_tight_clusters(self, mz_tol: float = 0.04, rt_tol: float = 0.3) -> in
     return tight_clusters_count
-def _consensus_cleanup(self, rt_tol, mz_tol):
+def _consensus_cleanup(study, rt_tol, mz_tol):
     """
     Consensus cleanup to merge over-segmented consensus features and remove isotopic features.
@@ -2777,20 +2874,20 @@ def _consensus_cleanup(self, rt_tol, mz_tol):
        (too many features in very tight m/z and RT windows)
     2. Performs deisotoping to remove +1 and +2 isotopic features
     """
-    if len(self.consensus_df) == 0:
+    if len(study.consensus_df) == 0:
         return
-    initial_count = len(self.consensus_df)
+    initial_count = len(study.consensus_df)
     # Only perform enhanced post-clustering if there are many features
     if initial_count < 50:
         return
-    self.logger.debug(f"Enhanced post-clustering: processing {initial_count} consensus features")
+    study.logger.debug(f"Enhanced post-clustering: processing {initial_count} consensus features")
     # Find tight clusters using spatial binning
     consensus_data = []
-    for row in self.consensus_df.iter_rows(named=True):
+    for row in study.consensus_df.iter_rows(named=True):
         consensus_data.append({
             'consensus_uid': row['consensus_uid'],
             'mz': row['mz'],
@@ -2873,7 +2970,7 @@ def _consensus_cleanup(self, rt_tol, mz_tol):
     if not merge_groups:
         return
-    self.logger.debug(f"Found {len(merge_groups)} over-segmented clusters to merge")
+    study.logger.debug(f"Found {len(merge_groups)} over-segmented clusters to merge")
     # Merge clusters by keeping the most representative feature
     uids_to_remove = set()
@@ -2892,25 +2989,25 @@ def _consensus_cleanup(self, rt_tol, mz_tol):
     if uids_to_remove:
         # Remove merged features from consensus_df
-        self.consensus_df = self.consensus_df.filter(
+        study.consensus_df = study.consensus_df.filter(
             ~pl.col('consensus_uid').is_in(list(uids_to_remove))
         )
         # Also update consensus_mapping_df if it exists
-        if hasattr(self, 'consensus_mapping_df') and not self.consensus_mapping_df.is_empty():
-            self.consensus_mapping_df = self.consensus_mapping_df.filter(
+        if hasattr(study, 'consensus_mapping_df') and not study.consensus_mapping_df.is_empty():
+            study.consensus_mapping_df = study.consensus_mapping_df.filter(
                 ~pl.col('consensus_uid').is_in(list(uids_to_remove))
             )
-        final_count = len(self.consensus_df)
+        final_count = len(study.consensus_df)
         reduction = initial_count - final_count
         reduction_pct = (reduction / initial_count) * 100
         if reduction > 0:
-            self.logger.debug(f"Enhanced post-clustering: {initial_count} → {final_count} features ({reduction_pct:.1f}% reduction)")
+            study.logger.debug(f"Enhanced post-clustering: {initial_count} → {final_count} features ({reduction_pct:.1f}% reduction)")
     # Step 2: Deisotoping - Remove +1 and +2 isotopic consensus features
-    pre_deisotoping_count = len(self.consensus_df)
+    pre_deisotoping_count = len(study.consensus_df)
     isotope_uids_to_remove = set()
     # Use strict tolerances for deisotoping (same as declustering)
@@ -2919,7 +3016,7 @@ def _consensus_cleanup(self, rt_tol, mz_tol):
     # Get current consensus data for isotope detection
     current_consensus_data = []
-    for row in self.consensus_df.iter_rows(named=True):
+    for row in study.consensus_df.iter_rows(named=True):
         current_consensus_data.append({
             'consensus_uid': row['consensus_uid'],
             'mz': row['mz'],
@@ -2970,31 +3067,31 @@ def _consensus_cleanup(self, rt_tol, mz_tol):
     # Remove isotopic features
     if isotope_uids_to_remove:
-        self.consensus_df = self.consensus_df.filter(
+        study.consensus_df = study.consensus_df.filter(
             ~pl.col('consensus_uid').is_in(list(isotope_uids_to_remove))
         )
         # Also update consensus_mapping_df if it exists
-        if hasattr(self, 'consensus_mapping_df') and not self.consensus_mapping_df.is_empty():
-            self.consensus_mapping_df = self.consensus_mapping_df.filter(
+        if hasattr(study, 'consensus_mapping_df') and not study.consensus_mapping_df.is_empty():
+            study.consensus_mapping_df = study.consensus_mapping_df.filter(
                 ~pl.col('consensus_uid').is_in(list(isotope_uids_to_remove))
             )
-        post_deisotoping_count = len(self.consensus_df)
+        post_deisotoping_count = len(study.consensus_df)
         isotope_reduction = pre_deisotoping_count - post_deisotoping_count
         if isotope_reduction > 0:
-            self.logger.debug(f"Deisotoping: {pre_deisotoping_count} → {post_deisotoping_count} features ({isotope_reduction} isotopic features removed)")
+            study.logger.debug(f"Deisotoping: {pre_deisotoping_count} → {post_deisotoping_count} features ({isotope_reduction} isotopic features removed)")
     # Final summary
-    final_count = len(self.consensus_df)
+    final_count = len(study.consensus_df)
     total_reduction = initial_count - final_count
     if total_reduction > 0:
         total_reduction_pct = (total_reduction / initial_count) * 100
-        self.logger.debug(f"Consensus cleanup complete: {initial_count} → {final_count} features ({total_reduction_pct:.1f}% total reduction)")
+        study.logger.debug(f"Consensus cleanup complete: {initial_count} → {final_count} features ({total_reduction_pct:.1f}% total reduction)")
-def _identify_adduct_by_mass_shift(self, rt_tol, cached_adducts_df=None):
+def _identify_adduct_by_mass_shift(study, rt_tol, cached_adducts_df=None):
     """
     Identify coeluting consensus features by characteristic mass shifts between adducts
     and update their adduct information accordingly.
@@ -3014,23 +3111,24 @@ def _identify_adduct_by_mass_shift(self, rt_tol, cached_adducts_df=None):
     from collections import defaultdict
     # Check if consensus_df exists and has features
-    if len(self.consensus_df) == 0:
-        self.logger.debug("No consensus features for adduct identification by mass shift")
+    if len(study.consensus_df) == 0:
+        study.logger.debug("No consensus features for adduct identification by mass shift")
         return
-    self.logger.info(f"Identifying coeluting adducts by mass shifts in {len(self.consensus_df)} consensus features...")
+    study.logger.info(f"Identifying coeluting adducts by mass shifts in {len(study.consensus_df)} consensus features...")
     # Get adducts DataFrame if not provided
     if cached_adducts_df is None or cached_adducts_df.is_empty():
         try:
             # Use lower min_probability for better adduct coverage in mass shift identification
-            cached_adducts_df = self._get_adducts(min_probability=0.01)
+            from masster.study.id import _get_adducts
+            cached_adducts_df = _get_adducts(study, min_probability=0.01)
         except Exception as e:
-            self.logger.warning(f"Could not retrieve adducts for mass shift identification: {e}")
+            study.logger.warning(f"Could not retrieve adducts for mass shift identification: {e}")
             return
     if cached_adducts_df.is_empty():
-        self.logger.debug("No adducts available for mass shift identification")
+        study.logger.debug("No adducts available for mass shift identification")
         return
     # Build catalogue of mass shifts between adducts
@@ -3081,11 +3179,11 @@ def _identify_adduct_by_mass_shift(self, rt_tol, cached_adducts_df=None):
                 "to_charge": charge2
             })
-    self.logger.debug(f"Generated mass shift catalog with {len(mass_shift_catalog)} unique shifts")
+    study.logger.debug(f"Generated mass shift catalog with {len(mass_shift_catalog)} unique shifts")
     # Get consensus features data
     consensus_data = []
-    for i, row in enumerate(self.consensus_df.iter_rows(named=True)):
+    for i, row in enumerate(study.consensus_df.iter_rows(named=True)):
         consensus_data.append({
             "index": i,
             "consensus_uid": row["consensus_uid"],
@@ -3234,7 +3332,7 @@ def _identify_adduct_by_mass_shift(self, rt_tol, cached_adducts_df=None):
                         }
                         updated_count += 2
-                        self.logger.debug(
+                        study.logger.debug(
                             f"Identified adduct pair: {from_adduct_name} (m/z {from_feature['mz']:.4f}) "
                             f"<-> {to_adduct_name} (m/z {to_feature['mz']:.4f}), "
                             f"RT {rt1:.2f}s, Δm/z {mz_diff:.4f}"
@@ -3244,7 +3342,7 @@ def _identify_adduct_by_mass_shift(self, rt_tol, cached_adducts_df=None):
     # Apply updates to consensus_df
     if adduct_updates:
         # Prepare update data
-        consensus_uids = self.consensus_df["consensus_uid"].to_list()
+        consensus_uids = study.consensus_df["consensus_uid"].to_list()
         new_adduct_top = []
         new_adduct_charge_top = []
@@ -3261,88 +3359,88 @@ def _identify_adduct_by_mass_shift(self, rt_tol, cached_adducts_df=None):
             else:
                 # Keep existing values
                 row_idx = consensus_uids.index(uid)
-                row = self.consensus_df.row(row_idx, named=True)
+                row = study.consensus_df.row(row_idx, named=True)
                 new_adduct_top.append(row.get("adduct_top"))
                 new_adduct_charge_top.append(row.get("adduct_charge_top"))
                 new_adduct_mass_neutral_top.append(row.get("adduct_mass_neutral_top"))
                 new_adduct_mass_shift_top.append(row.get("adduct_mass_shift_top"))
         # Update the DataFrame
-        self.consensus_df = self.consensus_df.with_columns([
+        study.consensus_df = study.consensus_df.with_columns([
             pl.Series("adduct_top", new_adduct_top),
             pl.Series("adduct_charge_top", new_adduct_charge_top),
             pl.Series("adduct_mass_neutral_top", new_adduct_mass_neutral_top),
             pl.Series("adduct_mass_shift_top", new_adduct_mass_shift_top)
         ])
-        self.logger.info(f"Updated adduct assignments for {updated_count} consensus features based on mass shifts")
+        study.logger.info(f"Updated adduct assignments for {updated_count} consensus features based on mass shifts")
     else:
-        self.logger.debug("No consensus features updated based on mass shift analysis")
+        study.logger.debug("No consensus features updated based on mass shift analysis")
-def _finalize_merge(self, link_ms2, min_samples):
+def _finalize_merge(study, link_ms2, min_samples):
     """Complete the merge process with final calculations and cleanup."""
     import polars as pl
     # Check if consensus_df is empty or missing required columns
-    if len(self.consensus_df) == 0 or "number_samples" not in self.consensus_df.columns:
-        self.logger.debug("No consensus features found or consensus_df is empty. Skipping finalize merge.")
+    if len(study.consensus_df) == 0 or "number_samples" not in study.consensus_df.columns:
+        study.logger.debug("No consensus features found or consensus_df is empty. Skipping finalize merge.")
         return
     # Validate min_samples parameter
     if min_samples is None:
         min_samples = 1
     if min_samples < 1:
-        min_samples = int(min_samples * len(self.samples_df))
+        min_samples = int(min_samples * len(study.samples_df))
     # Validate that min_samples doesn't exceed the number of samples
-    if min_samples > len(self.samples_df):
-        self.logger.warning(
-            f"min_samples ({min_samples}) exceeds the number of samples ({len(self.samples_df)}). "
-            f"Setting min_samples to {len(self.samples_df)}.",
+    if min_samples > len(study.samples_df):
+        study.logger.warning(
+            f"min_samples ({min_samples}) exceeds the number of samples ({len(study.samples_df)}). "
+            f"Setting min_samples to {len(study.samples_df)}.",
         )
-        min_samples = len(self.samples_df)
+        min_samples = len(study.samples_df)
     # Filter out consensus features with less than min_samples features
-    l1 = len(self.consensus_df)
-    self.consensus_df = self.consensus_df.filter(
+    l1 = len(study.consensus_df)
+    study.consensus_df = study.consensus_df.filter(
         pl.col("number_samples") >= min_samples,
     )
-    self.logger.debug(
-        f"Filtered {l1 - len(self.consensus_df)} consensus features with less than {min_samples} samples.",
+    study.logger.debug(
+        f"Filtered {l1 - len(study.consensus_df)} consensus features with less than {min_samples} samples.",
     )
     # Filter out consensus mapping with less than min_samples features
-    self.consensus_mapping_df = self.consensus_mapping_df.filter(
-        pl.col("consensus_uid").is_in(self.consensus_df["consensus_uid"].to_list()),
+    study.consensus_mapping_df = study.consensus_mapping_df.filter(
+        pl.col("consensus_uid").is_in(study.consensus_df["consensus_uid"].to_list()),
     )
     # Calculate the completeness of the consensus map
     # Log completion with tight cluster metrics
-    if len(self.consensus_df) > 0 and len(self.samples_df) > 0:
+    if len(study.consensus_df) > 0 and len(study.samples_df) > 0:
         c = (
-            len(self.consensus_mapping_df)
-            / len(self.consensus_df)
-            / len(self.samples_df)
+            len(study.consensus_mapping_df)
+            / len(study.consensus_df)
+            / len(study.samples_df)
         )
         # Count tight clusters with specified thresholds
-        tight_clusters = _count_tight_clusters(self,mz_tol=0.04, rt_tol=0.3)
+        tight_clusters = _count_tight_clusters(study,mz_tol=0.04, rt_tol=0.3)
-        self.logger.info(
-            f"Merging completed. Consensus features: {len(self.consensus_df)}. "
+        study.logger.info(
+            f"Merging completed. Consensus features: {len(study.consensus_df)}. "
             f"Completeness: {c:.2f}. Tight clusters left: {tight_clusters}.",
         )
     else:
-        self.logger.warning(
-            f"Merging completed with empty result. Consensus features: {len(self.consensus_df)}. "
+        study.logger.warning(
+            f"Merging completed with empty result. Consensus features: {len(study.consensus_df)}. "
             f"This may be due to min_samples ({min_samples}) being too high for the available data.",
         )
     # add iso data from raw files.
-    self.find_iso()
+    study.find_iso()
     if link_ms2:
-        self.find_ms2()
+        study.find_ms2()
 def _optimized_feature_lookup(study_obj, features_df):
@@ -3419,24 +3517,24 @@ def _optimized_adduct_grouping(study_obj, consensus_data, rt_tol, mz_tol):
     # Union-Find for efficient grouping
     class UnionFind:
-        def __init__(self, n):
-            self.parent = list(range(n))
-            self.rank = [0] * n
+        def __init__(study, n):
+            study.parent = list(range(n))
+            study.rank = [0] * n
-        def find(self, x):
-            if self.parent[x] != x:
-                self.parent[x] = self.find(self.parent[x])
-            return self.parent[x]
+        def find(study, x):
+            if study.parent[x] != x:
+                study.parent[x] = study.find(study.parent[x])
+            return study.parent[x]
-        def union(self, x, y):
-            px, py = self.find(x), self.find(y)
+        def union(study, x, y):
+            px, py = study.find(x), study.find(y)
             if px == py:
                 return
-            if self.rank[px] < self.rank[py]:
+            if study.rank[px] < study.rank[py]:
                 px, py = py, px
-            self.parent[py] = px
-            if self.rank[px] == self.rank[py]:
-                self.rank[px] += 1
+            study.parent[py] = px
+            if study.rank[px] == study.rank[py]:
+                study.rank[px] += 1
     uid_to_idx = {feature[0]: i for i, feature in enumerate(valid_features)}
     uf = UnionFind(len(valid_features))

masster 0.4.22__py3-none-any.whl → 0.5.1__py3-none-any.whl

Potentially problematic release.

masster 0.4.22py3-none-any.whl → 0.5.1py3-none-any.whl