PyPI - masster - Versions diffs - 0.4.20__py3-none-any.whl → 0.4.21__py3-none-any.whl - Mend

masster 0.4.20py3-none-any.whl → 0.4.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (28) hide show

masster/__init__.py +6 -0
masster/_version.py +1 -1
masster/sample/h5.py +58 -1
masster/sample/load.py +7 -1
masster/sample/plot.py +56 -65
masster/sample/processing.py +158 -0
masster/sample/sample.py +2 -0
masster/sample/sample5_schema.json +3 -0
masster/sample/save.py +137 -59
masster/spectrum.py +58 -9
masster/study/export.py +238 -152
masster/study/h5.py +65 -1
masster/study/helpers.py +3 -3
masster/study/merge.py +25 -10
masster/study/plot.py +39 -2
masster/study/processing.py +257 -1
masster/study/save.py +48 -5
masster/study/study.py +16 -3
masster/study/study5_schema.json +3 -0
masster/wizard/__init__.py +5 -2
masster/wizard/wizard.py +430 -1866
{masster-0.4.20.dist-info → masster-0.4.21.dist-info}/METADATA +1 -1
{masster-0.4.20.dist-info → masster-0.4.21.dist-info}/RECORD +26 -28
masster/wizard/test_structure.py +0 -49
masster/wizard/test_wizard.py +0 -285
{masster-0.4.20.dist-info → masster-0.4.21.dist-info}/WHEEL +0 -0
{masster-0.4.20.dist-info → masster-0.4.21.dist-info}/entry_points.txt +0 -0
{masster-0.4.20.dist-info → masster-0.4.21.dist-info}/licenses/LICENSE +0 -0

masster/study/merge.py CHANGED Viewed

@@ -400,7 +400,7 @@ def merge(self, **kwargs) -> None:
         # Feature maps will be generated on-demand within each merge method
     self.logger.info(
-        f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da, min_rel_cc_size={params.min_rel_cc_size}, max_pairwise_log_fc={params.max_pairwise_log_fc}, max_nr_conflicts={params.max_nr_conflicts}"
+        f"Merge: {params.method}, samples={params.min_samples}, rt_tol={params.rt_tol}s, mz_tol={params.mz_tol}Da"
     )
     # Initialize
@@ -446,7 +446,7 @@ def merge(self, **kwargs) -> None:
         # Note: _merge_qt_chunked populates consensus_df directly, no need to extract
     # Enhanced post-clustering to merge over-segmented features (for qt and kd methods)
-    if params.method in ['qt', 'sensitivity', 'qt_chunked', 'kd_chunked']:
+    if params.method in ['qt', 'sensitivity', 'qt_chunked', 'kd_chunked', 'quality']:
         self._consensus_cleanup(params.rt_tol, params.mz_tol)
     # Perform adduct grouping
@@ -705,11 +705,11 @@ def _merge_kd_strict(self, params: merge_defaults) -> oms.ConsensusMap:
         optimized_params = params
     # Phase 1: Standard KD clustering
-    self.logger.info("Initial KD clustering")
+    self.logger.debug("Initial KD clustering")
     consensus_map = _merge_kd(self, optimized_params)
     # Phase 2: Post-processing quality control
-    self.logger.info("Post-processing quality control")
+    self.logger.debug("Post-processing quality control")
     consensus_map = _apply_kd_strict_postprocessing(self, consensus_map, optimized_params)
     return consensus_map
@@ -911,7 +911,7 @@ def _apply_kd_strict_postprocessing(self, consensus_map: oms.ConsensusMap, param
     final_feature_count = len(self.consensus_df)
     reduction_pct = ((initial_feature_count - final_feature_count) / initial_feature_count * 100) if initial_feature_count > 0 else 0
-    self.logger.info(f"Post-processing complete: {initial_feature_count} → {final_feature_count} features ({reduction_pct:.1f}% reduction)")
+    self.logger.info(f"Consensus cleanup complete: {initial_feature_count} → {final_feature_count} features ({reduction_pct:.1f}% reduction)")
     # Create a new consensus map for compatibility (the processed data is in consensus_df)
     processed_consensus_map = oms.ConsensusMap()
@@ -1691,8 +1691,12 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
                 mz_min_local = mz_max_local = consensus_mz
             # Store chunk consensus with feature tracking
+            # Generate unique 16-character consensus_id string
+            import uuid
+            consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
             chunk_consensus_data = {
-                'consensus_id': consensus_id_counter,
+                'consensus_id': consensus_id_str,
                 'chunk_idx': chunk_idx,
                 'chunk_start_idx': chunk_start_idx,
                 'mz': consensus_mz,
@@ -1710,7 +1714,6 @@ def _merge_chunk_results(self, chunk_consensus_maps: list, params: merge_default
             }
             all_chunk_consensus.append(chunk_consensus_data)
-            consensus_id_counter += 1
     if not all_chunk_consensus:
         # No valid consensus features found
@@ -2094,9 +2097,13 @@ def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_
             ms2_count += len(ms2_scans)
     # Build consensus metadata
+    # Generate unique 16-character consensus_id string
+    import uuid
+    consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
     return {
         "consensus_uid": int(consensus_uid),
-        "consensus_id": str(consensus_uid),  # Use simple string ID
+        "consensus_id": consensus_id_str,  # Use unique 16-char string ID
         "quality": round(float(np.mean(quality_values)), 3) if len(quality_values) > 0 else 1.0,
         "number_samples": number_samples if number_samples is not None else len(feature_data_list),
         "rt": round(float(np.mean(rt_values)), 4) if len(rt_values) > 0 else 0.0,
@@ -2118,6 +2125,7 @@ def _calculate_consensus_statistics(study_obj, consensus_uid: int, feature_data_
         "chrom_prominence_mean": round(float(np.mean(prominence_values)), 0) if len(prominence_values) > 0 else 0.0,
         "chrom_prominence_scaled_mean": round(float(np.mean(prominence_scaled_values)), 3) if len(prominence_scaled_values) > 0 else 0.0,
         "chrom_height_scaled_mean": round(float(np.mean(height_scaled_values)), 3) if len(height_scaled_values) > 0 else 0.0,
+        "iso": None,  # Will be filled by find_iso() function
         "iso_mean": round(float(np.mean(iso_values)), 2) if len(iso_values) > 0 else 0.0,
         "charge_mean": round(float(np.mean(charge_values)), 2) if len(charge_values) > 0 else 0.0,
         "number_ms2": int(ms2_count),
@@ -2509,10 +2517,14 @@ def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts
             if ms2_scans is not None:
                 ms2_count += len(ms2_scans)
+        # Generate unique 16-character consensus_id string (UUID-based)
+        import uuid
+        consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
         metadata_list.append(
             {
                 "consensus_uid": int(i),  # "consensus_id": i,
-                "consensus_id": str(feature.getUniqueId()),
+                "consensus_id": consensus_id_str,  # Use unique 16-char string ID
                 "quality": round(float(feature.getQuality()), 3),
                 "number_samples": len(feature_data_list),
                 # "number_ext": int(len(features_list)),
@@ -2577,6 +2589,7 @@ def _extract_consensus_features(self, consensus_map, min_samples, cached_adducts
                 )
                 if len(height_scaled_values) > 0
                 else 0.0,
+                "iso": None,  # Will be filled by find_iso() function
                 "iso_mean": round(float(np.mean(iso_values)), 2)
                 if len(iso_values) > 0
                 else 0.0,
@@ -3325,7 +3338,9 @@ def _finalize_merge(self, link_ms2, min_samples):
             f"Merging completed with empty result. Consensus features: {len(self.consensus_df)}. "
             f"This may be due to min_samples ({min_samples}) being too high for the available data.",
         )
+    # add iso data from raw files.
+    self.find_iso()
     if link_ms2:
         self.find_ms2()

masster/study/plot.py CHANGED Viewed

@@ -42,9 +42,10 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
             from bokeh.io.export import export_png
             export_png(plot_object, filename=filename)
             logger.info(f"Plot saved to: {abs_filename}")
-        except Exception:
+        except Exception as e:
             # Fall back to HTML if PNG export not available
             html_filename = filename.replace('.png', '.html')
+            abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.png', '.html')
             from bokeh.resources import Resources
             from bokeh.embed import file_html
@@ -54,7 +55,7 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
             with open(html_filename, 'w', encoding='utf-8') as f:
                 f.write(html)
-            logger.warning(f"PNG export not available, saved as HTML instead: {html_filename}")
+            logger.warning(f"PNG export not available ({str(e)}). Use export_png. Saved as HTML instead: {abs_html_filename}")
     elif filename.endswith(".pdf"):
         # Try to save as PDF, fall back to HTML if not available
         try:
@@ -74,6 +75,26 @@ def _isolated_save_plot(plot_object, filename, abs_filename, logger, plot_title=
                 f.write(html)
             logger.warning(f"PDF export not available, saved as HTML instead: {html_filename}")
+    elif filename.endswith(".svg"):
+        # Try to save as SVG, fall back to HTML if not available
+        try:
+            from bokeh.io.export import export_svg
+            export_svg(plot_object, filename=filename)
+            logger.info(f"Plot saved to: {abs_filename}")
+        except Exception as e:
+            # Fall back to HTML if SVG export not available
+            html_filename = filename.replace('.svg', '.html')
+            abs_html_filename = html_filename if abs_filename == filename else abs_filename.replace('.svg', '.html')
+            from bokeh.resources import Resources
+            from bokeh.embed import file_html
+            resources = Resources(mode='cdn')
+            html = file_html(plot_object, resources, title=plot_title)
+            with open(html_filename, 'w', encoding='utf-8') as f:
+                f.write(html)
+            logger.warning(f"SVG export not available ({str(e)}). Saved as HTML instead: {abs_html_filename}")
     else:
         # Default to HTML for unknown extensions using isolated approach
         from bokeh.resources import Resources
@@ -181,6 +202,22 @@ def _isolated_save_panel_plot(panel_obj, filename, abs_filename, logger, plot_ti
                 logger.warning(f"PDF export not available, saved as HTML instead: {abs_html_filename}")
             except Exception as e:
                 logger.error(f"Failed to save {plot_title} as HTML fallback: {e}")
+    elif filename.endswith(".svg"):
+        # Try to save as SVG, fall back to HTML if not available
+        try:
+            from bokeh.io.export import export_svg
+            bokeh_layout = panel_obj.get_root()
+            export_svg(bokeh_layout, filename=filename)
+            logger.info(f"{plot_title} saved to: {abs_filename}")
+        except Exception as e:
+            # Fall back to HTML if SVG export not available
+            html_filename = filename.replace('.svg', '.html')
+            abs_html_filename = os.path.abspath(html_filename)
+            try:
+                panel_obj.save(html_filename, embed=True)
+                logger.warning(f"SVG export not available ({str(e)}), saved as HTML instead: {abs_html_filename}")
+            except Exception as e:
+                logger.error(f"Failed to save {plot_title} as HTML fallback: {e}")
     else:
         # Default to HTML for unknown extensions
         try:

masster/study/processing.py CHANGED Viewed

@@ -59,6 +59,17 @@ def align(self, **kwargs):
     """
     # parameters initialization
     params = align_defaults()
+    # Handle 'params' keyword argument specifically (like merge does)
+    if 'params' in kwargs:
+        provided_params = kwargs.pop('params')
+        if isinstance(provided_params, align_defaults):
+            params = provided_params
+            self.logger.debug("Using provided align_defaults parameters from 'params' argument")
+        else:
+            self.logger.warning("'params' argument is not an align_defaults instance, ignoring")
+    # Process remaining kwargs
     for key, value in kwargs.items():
         if isinstance(value, align_defaults):
             params = value
@@ -72,7 +83,7 @@ def align(self, **kwargs):
                         f"Failed to set parameter {key} = {value} (validation failed)",
                     )
             else:
-                self.logger.debug(f"Unknown parameter {key} ignored")
+                self.logger.warning(f"Unknown parameter '{key}' ignored")
     # end of parameter initialization
     # Store parameters in the Study object
@@ -825,6 +836,11 @@ def _align_kd_algorithm(study_obj, fmaps, params):
         f"Align time axes with rt_tol={params.get('rt_tol')}, min_samples={params.get('min_samples')}, max_points={max_points}",
     )
+    # Check if feature maps are empty before proceeding
+    if not fmaps:
+        study_obj.logger.error("No feature maps available for alignment. Cannot proceed with alignment.")
+        raise ValueError("No feature maps available for alignment. This usually indicates that all samples failed to load properly.")
     # Choose reference map (largest number of features)
     ref_index = max(range(len(fmaps)), key=lambda i: fmaps[i].size())
     ref_map = fmaps[ref_index]
@@ -1003,3 +1019,243 @@ def _align_pose_clustering_fallback(study_obj, fmaps, params):
         transformer.transformRetentionTimes(fm, trafo, True)
     study_obj.alignment_ref_index = ref_index
+def find_iso(self, rt_tol=0.1, mz_tol=0.01):
+    """
+    Find isotope patterns for consensus features by searching raw MS1 data.
+    OPTIMIZED VERSION: Each sample file is loaded only once for maximum efficiency.
+    For each consensus feature:
+    1. Find the associated feature with highest intensity
+    2. Load the corresponding sample5 file to access raw MS1 data
+    3. Use original_rt (before alignment) to find the correct scan
+    4. Search for isotope patterns in raw MS1 spectra
+    5. Look for isotope patterns: 0.33, 0.50, 0.66, 1.00, 1.50, 2.00, 3.00, 4.00, 5.00 Da
+    6. Store results as numpy arrays with [mz, inty] in the iso column
+    Parameters:
+        rt_tol (float): RT tolerance for scan matching in seconds
+        mz_tol (float): Additional m/z tolerance for isotope matching in Da
+    """
+    if self.consensus_df is None or self.consensus_df.is_empty():
+        self.logger.error("No consensus features found. Please run merge() first.")
+        return
+    if self.consensus_mapping_df is None or self.consensus_mapping_df.is_empty():
+        self.logger.error("No consensus mapping found. Please run merge() first.")
+        return
+    if self.features_df is None or self.features_df.is_empty():
+        self.logger.error("No features found.")
+        return
+    if self.samples_df is None or self.samples_df.is_empty():
+        self.logger.error("No samples found.")
+        return
+    # Add iso column if it doesn't exist
+    if "iso" not in self.consensus_df.columns:
+        self.consensus_df = self.consensus_df.with_columns(
+            pl.lit(None, dtype=pl.Object).alias("iso")
+        )
+    self.logger.info("Extracting isotopomers from raw MS1 data...")
+    # Isotope mass shifts to search for (up to 7x 13C isotopes)
+    isotope_shifts = [
+        0.33,
+        0.50,
+        0.66,
+        1.00335,
+        1.50502,
+        2.00670,
+        3.01005,
+        4.01340,
+        5.01675,
+        6.02010,
+        7.02345,
+    ]
+    consensus_iso_data = {}
+    # SUPER OPTIMIZATION: Vectorized pre-calculation using joins (10-100x faster)
+    self.logger.debug("Building sample-to-consensus mapping using vectorized operations...")
+    # Step 1: Join consensus_mapping with features to get intensities in one operation
+    consensus_with_features = self.consensus_mapping_df.join(
+        self.features_df.select(['feature_uid', 'sample_uid', 'inty', 'mz', 'rt', 'rt_original']),
+        on=['feature_uid', 'sample_uid'],
+        how='left'
+    )
+    # Step 2: Find the best feature (highest intensity) for each consensus using window functions
+    best_features = consensus_with_features.with_columns(
+        pl.col('inty').fill_null(0)  # Handle null intensities
+    ).with_columns(
+        pl.col('inty').max().over('consensus_uid').alias('max_inty')
+    ).filter(
+        pl.col('inty') == pl.col('max_inty')
+    ).group_by('consensus_uid').first()  # Take first if there are ties
+    # Step 3: Join with samples to get sample paths in one operation
+    best_features_with_paths = best_features.join(
+        self.samples_df.select(['sample_uid', 'sample_path']),
+        on='sample_uid',
+        how='left'
+    ).filter(
+        pl.col('sample_path').is_not_null()
+    )
+    # Step 4: Group by sample path for batch processing (much faster than nested loops)
+    sample_to_consensus = {}
+    for row in best_features_with_paths.iter_rows(named=True):
+        sample_path = row['sample_path']
+        consensus_uid = row['consensus_uid']
+        # Create feature data dictionary for compatibility
+        feature_data = {
+            'mz': row['mz'],
+            'rt': row['rt'],
+            'rt_original': row.get('rt_original', row['rt']),
+            'inty': row['inty']
+        }
+        if sample_path not in sample_to_consensus:
+            sample_to_consensus[sample_path] = []
+        sample_to_consensus[sample_path].append((consensus_uid, feature_data))
+    # Initialize failed consensus features (those not in the mapping)
+    processed_consensus_uids = set(best_features_with_paths['consensus_uid'].to_list())
+    for consensus_row in self.consensus_df.iter_rows(named=True):
+        consensus_uid = consensus_row["consensus_uid"]
+        if consensus_uid not in processed_consensus_uids:
+            consensus_iso_data[consensus_uid] = None
+    self.logger.debug(f"Will read {len(sample_to_consensus)} unique sample files for {len(self.consensus_df)} consensus features")
+    tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
+    # OPTIMIZATION 2: Process by sample file (load each file only once)
+    for sample_path, consensus_list in tqdm(
+        sample_to_consensus.items(),
+        desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Read files",
+        disable=tdqm_disable,
+    ):
+        try:
+            # Load MS1 data once per sample
+            ms1_df = self._load_ms1(sample_path)
+            if ms1_df is None or ms1_df.is_empty():
+                # Mark all consensus features from this sample as failed
+                for consensus_uid, _ in consensus_list:
+                    consensus_iso_data[consensus_uid] = None
+                continue
+            # Process all consensus features for this sample
+            for consensus_uid, best_feature in consensus_list:
+                # Get the original RT (before alignment correction)
+                base_mz = best_feature["mz"]
+                original_rt = best_feature.get("rt_original", best_feature["rt"])
+                # Find MS1 scans near the original RT
+                rt_min = original_rt - rt_tol
+                rt_max = original_rt + rt_tol
+                # Filter MS1 data for scans within RT window
+                ms1_window = ms1_df.filter(
+                    (pl.col("rt") >= rt_min) & (pl.col("rt") <= rt_max)
+                )
+                if ms1_window.is_empty():
+                    consensus_iso_data[consensus_uid] = None
+                    continue
+                isotope_matches = []
+                # Search for each isotope shift
+                for shift in isotope_shifts:
+                    target_mz = base_mz + shift
+                    mz_min_iso = target_mz - mz_tol
+                    mz_max_iso = target_mz + mz_tol
+                    # Find peaks in MS1 data within m/z tolerance
+                    isotope_peaks = ms1_window.filter(
+                        (pl.col("mz") >= mz_min_iso) & (pl.col("mz") <= mz_max_iso)
+                    )
+                    if not isotope_peaks.is_empty():
+                        # Get the peak with maximum intensity for this isotope
+                        max_peak = isotope_peaks.filter(
+                            pl.col("inty") == pl.col("inty").max()
+                        ).row(0, named=True)
+                        # Store as float with specific precision: m/z to 4 decimals, intensity rounded to integer
+                        mz_formatted = round(float(max_peak["mz"]), 4)
+                        inty_formatted = float(round(max_peak["inty"]))  # Round to integer, but keep as float
+                        isotope_matches.append([mz_formatted, inty_formatted])
+                # Store results as numpy array
+                if isotope_matches:
+                    consensus_iso_data[consensus_uid] = np.array(isotope_matches)
+                else:
+                    consensus_iso_data[consensus_uid] = None
+        except Exception as e:
+            self.logger.warning(f"Failed to load MS1 data from {sample_path}: {e}")
+            # Mark all consensus features from this sample as failed
+            for consensus_uid, _ in consensus_list:
+                consensus_iso_data[consensus_uid] = None
+            continue
+    # Update consensus_df with isotope data
+    # Create mapping function for update
+    def get_iso_data(uid):
+        return consensus_iso_data.get(uid, None)
+    # Update the iso column
+    self.consensus_df = self.consensus_df.with_columns(
+        pl.col("consensus_uid").map_elements(
+            lambda uid: get_iso_data(uid),
+            return_dtype=pl.Object
+        ).alias("iso")
+    )
+    # Count how many consensus features have isotope data
+    iso_count = sum(1 for data in consensus_iso_data.values() if data is not None and len(data) > 0)
+    self.logger.info(f"Optimized isotope detection completed. Found isotope patterns for {iso_count}/{len(self.consensus_df)} consensus features.")
+def reset_iso(self):
+    """
+    Reset the iso column in consensus_df to None, clearing all isotope data.
+    This function clears any previously computed isotope patterns from the
+    consensus_df, setting the 'iso' column to None for all features. This
+    is useful before re-running isotope detection with different parameters
+    or to clear isotope data entirely.
+    Returns:
+        None
+    """
+    if self.consensus_df is None:
+        self.logger.warning("No consensus_df found. Nothing to reset.")
+        return
+    if "iso" not in self.consensus_df.columns:
+        self.logger.warning("No 'iso' column found in consensus_df. Nothing to reset.")
+        return
+    # Count how many features currently have isotope data
+    iso_count = self.consensus_df.select(
+        pl.col("iso").is_not_null().sum().alias("count")
+    ).item(0, "count")
+    # Reset the iso column to None
+    self.consensus_df = self.consensus_df.with_columns(
+        pl.lit(None, dtype=pl.Object).alias("iso")
+    )
+    self.logger.info(f"Reset isotope data for {iso_count} features. All 'iso' values set to None.")

masster/study/save.py CHANGED Viewed

@@ -154,13 +154,56 @@ def save_samples(self, samples=None):
 def _save_consensusXML(self, filename: str):
-    if self.consensus_map is None:
-        self.logger.error("No consensus map found.")
+    if self.consensus_df is None or self.consensus_df.is_empty():
+        self.logger.error("No consensus features found.")
         return
+    # Build consensus map from consensus_df with proper consensus_id values
+    import pyopenms as oms
+    consensus_map = oms.ConsensusMap()
+    # Set up file descriptions for all samples
+    file_descriptions = consensus_map.getColumnHeaders()
+    if hasattr(self, 'samples_df') and not self.samples_df.is_empty():
+        for i, sample_row in enumerate(self.samples_df.iter_rows(named=True)):
+            file_description = file_descriptions.get(i, oms.ColumnHeader())
+            file_description.filename = sample_row.get("sample_name", f"sample_{i}")
+            file_description.size = 0  # Will be updated if needed
+            file_description.unique_id = i + 1
+            file_descriptions[i] = file_description
+        consensus_map.setColumnHeaders(file_descriptions)
+    # Add consensus features to the map (simplified version without individual features)
+    for consensus_row in self.consensus_df.iter_rows(named=True):
+        consensus_feature = oms.ConsensusFeature()
+        # Set basic properties
+        consensus_feature.setRT(float(consensus_row.get("rt", 0.0)))
+        consensus_feature.setMZ(float(consensus_row.get("mz", 0.0)))
+        consensus_feature.setIntensity(float(consensus_row.get("inty_mean", 0.0)))
+        consensus_feature.setQuality(float(consensus_row.get("quality", 1.0)))
+        # Set the unique consensus_id as the unique ID
+        consensus_id_str = consensus_row.get("consensus_id", "")
+        if consensus_id_str and len(consensus_id_str) == 16:
+            try:
+                # Convert 16-character hex string to integer for OpenMS
+                consensus_uid = int(consensus_id_str, 16)
+                consensus_feature.setUniqueId(consensus_uid)
+            except ValueError:
+                # Fallback to hash if not hex
+                consensus_feature.setUniqueId(hash(consensus_id_str) & 0x7FFFFFFFFFFFFFFF)
+        else:
+            # Fallback to consensus_uid
+            consensus_feature.setUniqueId(consensus_row.get("consensus_uid", 0))
+        consensus_map.push_back(consensus_feature)
+    # Save the consensus map
     fh = oms.ConsensusXMLFile()
-    fh.store(filename, self.consensus_map)
-    self.logger.debug(f"Saved consensus map to {filename}")
+    fh.store(filename, consensus_map)
+    self.logger.debug(f"Saved consensus map with {len(self.consensus_df)} features to {filename}")
+    self.logger.debug("Features use unique 16-character consensus_id strings")
 def save_consensus(self, **kwargs):

masster/study/study.py CHANGED Viewed

@@ -55,6 +55,7 @@ import polars as pl
 from masster.study.h5 import _load_study5
 from masster.study.h5 import _save_study5
 from masster.study.h5 import _save_study5_compressed
+from masster.study.h5 import _load_ms1
 from masster.study.helpers import _get_consensus_uids
 from masster.study.helpers import _get_feature_uids
 from masster.study.helpers import _get_sample_uids
@@ -126,6 +127,8 @@ from masster.study.merge import _finalize_merge
 from masster.study.merge import _count_tight_clusters
 from masster.study.processing import integrate
 from masster.study.processing import find_ms2
+from masster.study.processing import find_iso
+from masster.study.processing import reset_iso
 from masster.study.parameters import store_history
 from masster.study.parameters import get_parameters
 from masster.study.parameters import update_parameters
@@ -385,6 +388,9 @@ class Study:
     merge = merge
     find_consensus = merge  # Backward compatibility alias
     find_ms2 = find_ms2
+    find_iso = find_iso
+    reset_iso = reset_iso
+    iso_reset = reset_iso
     integrate = integrate
     integrate_chrom = integrate  # Backward compatibility alias
     fill = fill
@@ -421,9 +427,11 @@ class Study:
     set_source = set_source
     sample_color = sample_color
     sample_color_reset = sample_color_reset
+    reset_sample_color = sample_color_reset
     name_replace = sample_name_replace
     name_reset = sample_name_reset
+    reset_name = sample_name_reset
     # === Data Compression and Storage ===
     compress = compress
     compress_features = compress_features
@@ -436,8 +444,10 @@ class Study:
     # === Reset Operations ===
     fill_reset = fill_reset
+    reset_fill = fill_reset
     align_reset = align_reset
+    reset_align = align_reset
     # === Plotting and Visualization ===
     plot_alignment = plot_alignment
     plot_chrom = plot_chrom
@@ -461,8 +471,10 @@ class Study:
     identify = identify
     get_id = get_id
     id_reset = id_reset
+    reset_id = id_reset
     lib_reset = lib_reset
+    reset_lib = lib_reset
     # === Parameter Management ===
     store_history = store_history
     get_parameters = get_parameters
@@ -478,6 +490,7 @@ class Study:
     _load_study5 = _load_study5
     _save_study5 = _save_study5
     _save_study5_compressed = _save_study5_compressed
+    _load_ms1 = _load_ms1
     _get_consensus_uids = _get_consensus_uids
     _get_feature_uids = _get_feature_uids
     _get_sample_uids = _get_sample_uids

masster/study/study5_schema.json CHANGED Viewed

@@ -70,6 +70,9 @@
       "chrom_height_scaled_mean": {
         "dtype": "pl.Float64"
       },
+      "iso": {
+        "dtype": "pl.Object"
+      },
       "iso_mean": {
         "dtype": "pl.Float64"
       },

masster/wizard/__init__.py CHANGED Viewed

@@ -7,8 +7,11 @@ alignment, merging, plotting, and export.
 The create_script() function allows immediate generation of standalone analysis
 scripts without creating a Wizard instance first.
+The execute() function combines create_script() with immediate execution of the
+generated script for fully automated processing.
 """
-from .wizard import Wizard, wizard_def, create_script
+from .wizard import Wizard, wizard_def, create_script, execute
-__all__ = ["Wizard", "wizard_def", "create_script"]
+__all__ = ["Wizard", "wizard_def", "create_script", "execute"]

masster 0.4.20__py3-none-any.whl → 0.4.21__py3-none-any.whl

Potentially problematic release.

masster 0.4.20py3-none-any.whl → 0.4.21py3-none-any.whl