PyPI - masster - Versions diffs - 0.5.17__py3-none-any.whl → 0.5.18__py3-none-any.whl - Mend

masster 0.5.17py3-none-any.whl → 0.5.18py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (18) hide show

masster/_version.py +1 -1
masster/sample/adducts.py +12 -0
masster/sample/defaults/sample_def.py +30 -6
masster/sample/h5.py +52 -6
masster/sample/lib.py +9 -3
masster/sample/load.py +47 -120
masster/sample/processing.py +1 -1
masster/sample/sample.py +5 -3
masster/sample/sciex.py +60 -646
masster/sample/thermo.py +801 -0
masster/study/id.py +3 -1
masster/study/load.py +15 -792
masster/study/study.py +1 -0
{masster-0.5.17.dist-info → masster-0.5.18.dist-info}/METADATA +3 -2
{masster-0.5.17.dist-info → masster-0.5.18.dist-info}/RECORD +18 -17
{masster-0.5.17.dist-info → masster-0.5.18.dist-info}/WHEEL +0 -0
{masster-0.5.17.dist-info → masster-0.5.18.dist-info}/entry_points.txt +0 -0
{masster-0.5.17.dist-info → masster-0.5.18.dist-info}/licenses/LICENSE +0 -0

masster/study/load.py CHANGED Viewed

@@ -161,7 +161,8 @@ def add_sample(self, file, type=None, reset=False, adducts=None):
         bool: True if successful, False otherwise.
     """
-    success = self._add_sample_optimized(
+    success = _add_sample_noms1(
+        self,
         file,
         type=type,
         reset=reset,
@@ -1031,511 +1032,6 @@ def _process_sample_for_parallel_fill(
     return new_features, new_mapping, counter
-'''
-def _load_ms1_optimized(self, sample_path, mz_ranges, rt_ranges):
-    """
-    OPTIMIZED: Load only the MS1 data we actually need instead of the entire file.
-    Pre-filter by m/z and RT ranges to reduce memory usage and processing time.
-    """
-    try:
-        # Load full MS1 data (we'll optimize this further later)
-        ms1_data = self._load_ms1(filename=sample_path)
-        if ms1_data is None or ms1_data.is_empty():
-            return ms1_data
-        # OPTIMIZATION: Pre-filter to only relevant m/z ranges to reduce data size
-        if mz_ranges:
-            # Build comprehensive m/z filter covering all ranges
-            mz_min = min(r[0] for r in mz_ranges)
-            mz_max = max(r[1] for r in mz_ranges)
-            # Pre-filter by broad m/z range first (much faster than multiple OR conditions)
-            ms1_filtered = ms1_data.filter(
-                (pl.col("mz") >= mz_min) & (pl.col("mz") <= mz_max)
-            )
-            # If we have RT ranges, also pre-filter by RT
-            if rt_ranges and len(rt_ranges) > 0:
-                rt_min = min(r[0] for r in rt_ranges)
-                rt_max = max(r[1] for r in rt_ranges)
-                ms1_filtered = ms1_filtered.filter(
-                    (pl.col("rt") >= rt_min) & (pl.col("rt") <= rt_max)
-                )
-            return ms1_filtered
-        return ms1_data
-    except Exception:
-        return pl.DataFrame()
-'''
-'''
-def _create_empty_features(self, consensus_uids, consensus_info, sample_uid, features_df_max_uid):
-    """Create empty features for consensus UIDs when no MS1 data is available."""
-    new_features = []
-    new_mapping = []
-    for i, consensus_uid in enumerate(consensus_uids):
-        cons = consensus_info[consensus_uid]
-        feature_uid = features_df_max_uid + i + 1
-        # Create minimal empty feature
-        empty_eic = Chromatogram(
-            rt=np.array([cons["rt_start_mean"], cons["rt_end_mean"]]),
-            inty=np.array([0.0, 0.0]),
-            label=f"EIC mz={cons['mz']:.4f}",
-            file="",
-            mz=cons["mz"],
-            feature_start=cons["rt_start_mean"],
-            feature_end=cons["rt_end_mean"],
-            feature_apex=cons["rt"],
-        )
-        new_feature = {
-            "sample_uid": sample_uid,
-            "feature_uid": feature_uid,
-            "feature_id": None,
-            "mz": cons["mz"],
-            "rt": cons["rt"],
-            "rt_original": 0.0 if cons["rt"] == 0.0 else None,
-            "rt_start": cons["rt_start_mean"],
-            "rt_end": cons["rt_end_mean"],
-            "rt_delta": cons["rt_end_mean"] - cons["rt_start_mean"],
-            "mz_start": None,
-            "mz_end": None,
-            "inty": 0.0,
-            "quality": None,
-            "charge": None,
-            "iso": None,
-            "iso_of": None,
-            "adduct": None,
-            "adduct_mass": None,
-            "adduct_group": None,
-            "chrom": empty_eic,
-            "filled": True,
-            "chrom_area": 0.0,
-            "chrom_coherence": None,
-            "chrom_prominence": None,
-            "chrom_prominence_scaled": None,
-            "chrom_height_scaled": None,
-            "ms2_scans": None,
-            "ms2_specs": None,
-        }
-        new_features.append(new_feature)
-        new_mapping.append({
-            "consensus_uid": consensus_uid,
-            "sample_uid": sample_uid,
-            "feature_uid": feature_uid,
-        })
-    return new_features, new_mapping, len(new_features)
-'''
-'''
-def _create_feature_fast(self, consensus_uid, sample_uid, features_df_max_uid, consensus_info):
-    """
-    OPTIMIZED: Create a minimal empty feature quickly without expensive operations.
-    Used for RT=0 features and other cases where we just need a placeholder feature.
-    """
-    cons = consensus_info[consensus_uid]
-    feature_uid = features_df_max_uid
-    # Create minimal empty feature
-    empty_eic = Chromatogram(
-        rt=np.array([cons["rt_start_mean"], cons["rt_end_mean"]]),
-        inty=np.array([0.0, 0.0]),
-        label=f"EIC mz={cons['mz']:.4f}",
-        file="",
-        mz=cons["mz"],
-        feature_start=cons["rt_start_mean"],
-        feature_end=cons["rt_end_mean"]
-    )
-    new_feature = {
-        "uid": feature_uid,
-        "sample_uid": sample_uid,
-        "mz": cons["mz"],
-        "rt": cons["rt"],
-        "mz_centroid": None,
-        "rt_centroid": None,
-        "iso": None,
-        "iso_of": None,
-        "adduct": None,
-        "adduct_mass": None,
-        "adduct_group": None,
-        "chrom": empty_eic,
-        "filled": True,
-        "chrom_area": 0.0,
-        "chrom_coherence": None,
-        "chrom_prominence": None,
-        "chrom_prominence_scaled": None,
-        "chrom_height_scaled": None,
-        "ms2_scans": None,
-        "ms2_specs": None,
-    }
-    new_features = [new_feature]
-    new_mapping = [{
-        "consensus_uid": consensus_uid,
-        "sample_uid": sample_uid,
-        "feature_uid": feature_uid,
-    }]
-    return new_features, new_mapping, 1
-'''
-'''
-def _process_rt_zero_features_batch(self, rt_zero_consensus_uids, consensus_info, sample_uid,
-                                  features_df_max_uid, rt_zero_features):
-    """
-    OPTIMIZED: Process all RT=0 features in a batch since they share similar characteristics.
-    RT=0 features are typically not real peaks but artifacts or noise.
-    """
-    new_features = []
-    new_mapping = []
-    for consensus_uid in rt_zero_consensus_uids:
-        new_features_batch, new_mapping_batch, _ = self._create_feature_fast(
-            consensus_uid, sample_uid, features_df_max_uid, consensus_info
-        )
-        new_features.extend(new_features_batch)
-        new_mapping.extend(new_mapping_batch)
-        features_df_max_uid += 1
-        # Track RT=0 features for statistics
-        rt_zero_features.append(1)
-    return new_features, new_mapping, features_df_max_uid
-'''
-'''
-def _process_normal_rt_features_batch(self, normal_rt_consensus_uids, consensus_info, ms1_data,
-                                    sample_uid, sample_path, mz_tol, rt_tol, features_df_max_uid):
-    """
-    OPTIMIZED: Process normal RT features in batch with pre-filtered MS1 data.
-    Only loads chromatograms once per batch instead of per feature.
-    """
-    new_features = []
-    new_mapping = []
-    if len(normal_rt_consensus_uids) == 0:
-        return new_features, new_mapping, features_df_max_uid
-    # OPTIMIZATION: Pre-filter MS1 data by m/z range to reduce data size
-    all_mzs = [consensus_info[cuid]["mz"] for cuid in normal_rt_consensus_uids]
-    mz_min = min(all_mzs) - max(0.01, min(all_mzs) * mz_tol / 1e6)
-    mz_max = max(all_mzs) + max(0.01, max(all_mzs) * mz_tol / 1e6)
-    # Pre-filter MS1 data once for all features
-    ms1_filtered = ms1_data.filter(
-        (pl.col("mz") >= mz_min) & (pl.col("mz") <= mz_max)
-    )
-    # Early exit if no data in m/z range
-    if ms1_filtered.shape[0] == 0:
-        # Create empty features for all consensus UIDs
-        for consensus_uid in normal_rt_consensus_uids:
-            new_features_batch, new_mapping_batch, _ = self._create_feature_fast(
-                consensus_uid, sample_uid, features_df_max_uid, consensus_info
-            )
-            new_features.extend(new_features_batch)
-            new_mapping.extend(new_mapping_batch)
-            features_df_max_uid += 1
-        return new_features, new_mapping, features_df_max_uid
-    # Process each feature with pre-filtered data
-    for consensus_uid in normal_rt_consensus_uids:
-        info = consensus_info[consensus_uid]
-        mz, rt = info["mz"], info["rt"]
-        # Extract chromatogram using pre-loaded MS1 data (FIXED!)
-        sample_obj = self._load_ms1(sample_path)  # Get the sample object for extract_eic method
-        eic = sample_obj.extract_eic(
-            mz, mz_tol, rt, rt_tol, ms1_data=ms1_filtered  # Use the pre-filtered data!
-        )
-        # Find best peak
-        best_peak = self._find_best_peak_in_eic(eic, rt, rt_tol)
-        # Create feature
-        new_feature = {
-            "uid": features_df_max_uid,
-            "sample_uid": sample_uid,
-            "mz": mz,
-            "rt": rt,
-            "mz_centroid": None,
-            "rt_centroid": None,
-            "iso": None,
-            "iso_of": None,
-            "adduct": None,
-            "adduct_mass": None,
-            "adduct_group": None,
-            "chrom": eic if best_peak else Chromatogram(
-                rt=np.array([rt, rt]),
-                inty=np.array([0.0, 0.0]),
-                label=f"EIC mz={mz:.4f}",
-                file="",
-                mz=mz,
-                feature_start=rt,
-                feature_end=rt
-            ),
-            "filled": True,
-            "chrom_area": best_peak.get("area", 0.0) if best_peak else 0.0,
-            "chrom_coherence": best_peak.get("coherence") if best_peak else None,
-            "chrom_prominence": best_peak.get("prominence") if best_peak else None,
-            "chrom_prominence_scaled": best_peak.get("prominence_scaled") if best_peak else None,
-            "chrom_height_scaled": best_peak.get("height_scaled") if best_peak else None,
-            "ms2_scans": None,
-            "ms2_specs": None,
-        }
-        new_features.append(new_feature)
-        new_mapping.append({
-            "consensus_uid": consensus_uid,
-            "sample_uid": sample_uid,
-            "feature_uid": features_df_max_uid,
-        })
-        features_df_max_uid += 1
-    return new_features, new_mapping, features_df_max_uid
-'''
-'''def _batch_process_features(self, consensus_uids, consensus_info, ms1_data, sample_uid, sample_path,
-                           mz_tol, rt_tol, features_df_max_uid, rt_zero_features):
-    """
-    OPTIMIZED: Process all missing features for a sample in a single batch operation.
-    This avoids repeated filtering of the MS1 dataframe.
-    """
-    new_features = []
-    new_mapping = []
-    # OPTIMIZATION: Process RT=0 features separately (they need special handling)
-    rt_zero_data = {}
-    if rt_zero_features:
-        rt_zero_data = self._process_rt_zero_features_batch(
-            rt_zero_features, consensus_info, ms1_data, mz_tol, rt_tol
-        )
-    # OPTIMIZATION: Build comprehensive filter for all normal RT features at once
-    normal_rt_features = [uid for uid in consensus_uids if uid not in rt_zero_features]
-    normal_rt_data = {}
-    if normal_rt_features:
-        normal_rt_data = self._process_normal_rt_features_batch(
-            normal_rt_features, consensus_info, ms1_data, mz_tol, rt_tol
-        )
-    # Combine results and create features
-    all_feature_data = {**rt_zero_data, **normal_rt_data}
-    for i, consensus_uid in enumerate(consensus_uids):
-        feature_uid = features_df_max_uid + i + 1
-        cons = consensus_info[consensus_uid]
-        # Get pre-processed data for this feature
-        feature_ms1_data = all_feature_data.get(consensus_uid, pl.DataFrame())
-        # Create feature using optimized chromatogram creation
-        new_feature, area = self._create_feature_fast(
-            consensus_uid, cons, feature_ms1_data, sample_uid, sample_path,
-            feature_uid, mz_tol, rt_tol
-        )
-        new_features.append(new_feature)
-        new_mapping.append({
-            "consensus_uid": consensus_uid,
-            "sample_uid": sample_uid,
-            "feature_uid": feature_uid,
-        })
-    return new_features, new_mapping, len(new_features)
-    # Process each missing feature
-    for consensus_uid in sample_missing:
-        cons = consensus_info[consensus_uid]
-        mz = cons["mz"]
-        rt = cons["rt"]
-        rt_start_mean = cons["rt_start_mean"]
-        rt_end_mean = cons["rt_end_mean"]
-        # Filter MS1 data for this feature
-        if hasattr(file, "ms1_df") and not file.ms1_df.is_empty():
-            # Special handling for RT=0 (library-derived features)
-            if rt == 0.0:
-                # Simple RT=0 processing: find max intensity across full m/z range
-                d_full = file.ms1_df.filter(
-                    (pl.col("mz") >= mz - mz_tol) & (pl.col("mz") <= mz + mz_tol)
-                )
-                if not d_full.is_empty():
-                    max_inty = d_full["inty"].max()
-                    if max_inty > 0:
-                        max_rt = d_full.filter(pl.col("inty") == max_inty)["rt"].min()
-                        # Use default rt_tol for RT=0 features
-                        eic_rt_tol = rt_tol
-                        # Filter around max RT
-                        d = d_full.filter(
-                            (pl.col("rt") >= max_rt - eic_rt_tol) &
-                            (pl.col("rt") <= max_rt + eic_rt_tol)
-                        )
-                        # Update consensus RT info
-                        rt = max_rt
-                        rt_start_mean = max_rt - eic_rt_tol
-                        rt_end_mean = max_rt + eic_rt_tol
-                    else:
-                        d = pl.DataFrame()
-                else:
-                    d = pl.DataFrame()
-            else:
-                # Normal RT-based filtering for non-zero RT
-                d = file.ms1_df.filter(
-                    (pl.col("mz") >= mz - mz_tol)
-                    & (pl.col("mz") <= mz + mz_tol)
-                    & (pl.col("rt") >= rt_start_mean - rt_tol)
-                    & (pl.col("rt") <= rt_end_mean + rt_tol),
-                )
-        else:
-            d = pl.DataFrame()
-        # Create chromatogram
-        if d.is_empty():
-            eic = Chromatogram(
-                rt=np.array([rt_start_mean, rt_end_mean]),
-                inty=np.array([0.0, 0.0]),
-                label=f"EIC mz={mz:.4f}",
-                file=sample_path,
-                mz=mz,
-                mz_tol=mz_tol,
-                feature_start=rt_start_mean,
-                feature_end=rt_end_mean,
-                feature_apex=rt,
-            )
-            max_inty = 0.0
-            area = 0.0
-            chrom_coherence = None
-            chrom_prominence = None
-            chrom_prominence_scaled = None
-            chrom_height_scaled = None
-            peak_rt_start = rt_start_mean
-            peak_rt_end = rt_end_mean
-            peak_rt_delta = rt_end_mean - rt_start_mean
-        else:
-            eic_rt = d.group_by("rt").agg(pl.col("inty").max()).sort("rt")
-            if len(eic_rt) > 4:
-                eic = Chromatogram(
-                    eic_rt["rt"].to_numpy(),
-                    eic_rt["inty"].to_numpy(),
-                    label=f"EIC mz={mz:.4f}",
-                    file=sample_path,
-                    mz=mz,
-                    mz_tol=mz_tol,
-                    feature_start=rt_start_mean,
-                    feature_end=rt_end_mean,
-                    feature_apex=rt,
-                ).find_peaks()
-                max_inty = np.max(eic.inty)
-                area = eic.feature_area
-                # Extract chromatogram peak properties from first peak (if available)
-                if len(eic.peak_rts) > 0 and eic.feature_start is not None and eic.feature_end is not None:
-                    chrom_coherence = round(eic.feature_coherence, 3) if eic.feature_coherence is not None else None
-                    chrom_prominence = round(eic.peak_prominences[0], 3) if len(eic.peak_prominences) > 0 else None
-                    chrom_prominence_scaled = round(eic.peak_prominences[0] / (np.mean(eic.inty) + 1e-10), 3) if len(eic.peak_prominences) > 0 else None
-                    chrom_height_scaled = round(eic.peak_heights[0] / (np.mean(eic.inty) + 1e-10), 3) if len(eic.peak_heights) > 0 else None
-                    peak_rt_start = eic.feature_start
-                    peak_rt_end = eic.feature_end
-                    peak_rt_delta = peak_rt_end - peak_rt_start
-                else:
-                    chrom_coherence = None
-                    chrom_prominence = None
-                    chrom_prominence_scaled = None
-                    chrom_height_scaled = None
-                    peak_rt_start = rt_start_mean
-                    peak_rt_end = rt_end_mean
-                    peak_rt_delta = rt_end_mean - rt_start_mean
-            else:
-                eic = Chromatogram(
-                    eic_rt["rt"].to_numpy(),
-                    eic_rt["inty"].to_numpy(),
-                    label=f"EIC mz={mz:.4f}",
-                    file=sample_path,
-                    mz=mz,
-                    mz_tol=mz_tol,
-                    feature_start=rt_start_mean,
-                    feature_end=rt_end_mean,
-                    feature_apex=rt,
-                )
-                max_inty = 0.0
-                area = 0.0
-                chrom_coherence = None
-                chrom_prominence = None
-                chrom_prominence_scaled = None
-                chrom_height_scaled = None
-                peak_rt_start = rt_start_mean
-                peak_rt_end = rt_end_mean
-                peak_rt_delta = rt_end_mean - rt_start_mean
-        # Generate feature UID (will be adjusted later to ensure global uniqueness)
-        feature_uid = features_df_max_uid + len(new_features) + 1
-        # Handle rt_original: for RT=0 features, set to 0; otherwise estimate from closest feature
-        if rt == 0.0 or (hasattr(cons, 'get') and cons.get("rt") == 0.0):
-            estimated_rt_original = 0.0
-        else:
-            estimated_rt_original = _estimate_rt_original_for_filled_feature(
-                self, sample_uid, rt, logger=self.logger if hasattr(self, 'logger') else None
-            )
-        # Create new feature entry with updated chromatogram properties
-        new_feature = {
-            "sample_uid": sample_uid,
-            "feature_uid": feature_uid,
-            "feature_id": None,
-            "mz": mz,
-            "rt": rt,
-            "rt_original": estimated_rt_original,
-            "rt_start": peak_rt_start,
-            "rt_end": peak_rt_end,
-            "rt_delta": peak_rt_delta,
-            "mz_start": None,
-            "mz_end": None,
-            "inty": max_inty,
-            "quality": None,
-            "charge": None,
-            "iso": None,
-            "iso_of": None,
-            "adduct": None,
-            "adduct_mass": None,
-            "adduct_group": None,
-            "chrom": eic,
-            "filled": True,
-            "chrom_area": area,
-            "chrom_coherence": chrom_coherence,
-            "chrom_prominence": chrom_prominence,
-            "chrom_prominence_scaled": chrom_prominence_scaled,
-            "chrom_height_scaled": chrom_height_scaled,
-            "ms2_scans": None,
-            "ms2_specs": None,
-        }
-        new_features.append(new_feature)
-        new_mapping.append(
-            {
-                "consensus_uid": consensus_uid,
-                "sample_uid": sample_uid,
-                "feature_uid": feature_uid,
-            },
-        )
-        counter += 1
-    return new_features, new_mapping, counter
-'''
 def _fill_chrom_impl(
     self,
     uids=None,
@@ -2198,10 +1694,19 @@ def _add_sample_noms1(
             self.logger.warning(f"Failed to add sample {file}: {e}")
             return False
-    # Check if features map was created successfully
-    #if ddaobj._oms_features_map is None:
-    #    self.logger.warning(f"Failed to add sample {file}: No features map created")
-    #    return False
+    # Check polarity compatibility
+    sample_polarity = getattr(ddaobj, 'polarity', None)
+    study_polarity = getattr(self, 'polarity', None)
+    if sample_polarity is not None and study_polarity is not None:
+        # Normalize polarity names for comparison
+        sample_pol_norm = "positive" if sample_polarity in ["pos", "positive"] else "negative" if sample_polarity in ["neg", "negative"] else sample_polarity
+        study_pol_norm = "positive" if study_polarity in ["pos", "positive"] else "negative" if study_polarity in ["neg", "negative"] else study_polarity
+        if sample_pol_norm != study_pol_norm:
+            self.logger.warning(f"Sample {sample_name} polarity ({sample_polarity}) differs from study polarity ({study_polarity}). Skipping sample.")
+            return False
     #self.features_maps.append(ddaobj._oms_features_map)
@@ -2310,285 +1815,3 @@ def _add_sample_noms1(
         f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (optimized)",
     )
     return True
-'''
-def _add_sample_standard(
-    self,
-    file,
-    type=None,
-    reset=False,
-    adducts=None,
-    skip_color_reset=True,
-    skip_schema_check=True,
-):
-    """
-    Standard add_sample method that uses full sample loading (includes ms1_df).
-    This method uses the standard sample.load() method which loads all data
-    including ms1_df, providing full functionality but potentially slower performance
-    for large MS1 datasets.
-    Returns True if successful, False otherwise.
-    """
-    self.logger.debug(f"Adding (standard): {file}")
-    # Basic validation
-    basename = os.path.basename(file)
-    sample_name = os.path.splitext(basename)[0]
-    if sample_name in self.samples_df["sample_name"].to_list():
-        self.logger.warning(f"Sample {sample_name} already exists. Skipping.")
-        return False
-    if not os.path.exists(file):
-        self.logger.error(f"File {file} does not exist.")
-        return False
-    if not file.endswith((".sample5", ".wiff", ".raw", ".mzML")):
-        self.logger.error(f"Unsupported file type: {file}")
-        return False
-    # Load sample using standard method (includes ms1_df)
-    ddaobj = Sample()
-    ddaobj.logger_update(level="WARNING", label=os.path.basename(file))
-    # Use standard loading method that loads all data including ms1_df
-    if file.endswith(".sample5"):
-        ddaobj.load(file)
-        # restore _oms_features_map
-        ddaobj._get_feature_map()
-    else:
-        try:
-            ddaobj.load(file)
-            ddaobj.find_features()
-            ddaobj.find_adducts(adducts=adducts)
-            ddaobj.find_ms2()
-        except Exception as e:
-            self.logger.warning(f"Failed to add sample {file}: {e}")
-            return False
-    # Check if features map was created successfully
-    if ddaobj._oms_features_map is None:
-        self.logger.warning(f"Failed to add sample {file}: No features map created")
-        return False
-    self.features_maps.append(ddaobj._oms_features_map)
-    # Determine sample type
-    sample_type = "sample" if type is None else type
-    if "qc" in sample_name.lower():
-        sample_type = "qc"
-    if "blank" in sample_name.lower():
-        sample_type = "blank"
-    map_id_value = len(self.features_maps) - 1
-    # Handle file paths
-    if file.endswith(".sample5"):
-        final_sample_path = file
-        # self.logger.trace(f"Using existing .sample5 file: {final_sample_path}")
-    else:
-        if self.folder is not None:
-            if not os.path.exists(self.folder):
-                os.makedirs(self.folder)
-            final_sample_path = os.path.join(self.folder, sample_name + ".sample5")
-        else:
-            final_sample_path = os.path.join(os.getcwd(), sample_name + ".sample5")
-        ddaobj.save(final_sample_path)
-        self.logger.debug(f"Saved converted sample: {final_sample_path}")
-    # Efficient scan counting
-    ms1_count = ms2_count = 0
-    if (
-        hasattr(ddaobj, "scans_df")
-        and ddaobj.scans_df is not None
-        and not ddaobj.scans_df.is_empty()
-    ):
-        scan_counts = (
-            ddaobj.scans_df.group_by("ms_level").len().to_dict(as_series=False)
-        )
-        ms_levels = scan_counts.get("ms_level", [])
-        counts = scan_counts.get("len", [])
-        for level, count in zip(ms_levels, counts):
-            if level == 1:
-                ms1_count = count
-            elif level == 2:
-                ms2_count = count
-    # Create sample entry
-    next_sequence = len(self.samples_df) + 1 if not self.samples_df.is_empty() else 1
-    new_sample = pl.DataFrame(
-        {
-            "sample_uid": [int(len(self.samples_df) + 1)],
-            "sample_name": [sample_name],
-            "sample_path": [final_sample_path],
-            "sample_type": [sample_type],
-            "map_id": [map_id_value],
-            "sample_source": [getattr(ddaobj, "file_source", file)],
-            "sample_color": [None],  # Will be set in batch at end
-            "sample_group": [""],
-            "sample_batch": [1],
-            "sample_sequence": [next_sequence],
-            "num_features": [int(ddaobj._oms_features_map.size())],
-            "num_ms1": [ms1_count],
-            "num_ms2": [ms2_count],
-        },
-    )
-    self.samples_df = pl.concat([self.samples_df, new_sample])
-    # SIMPLIFIED feature processing
-    current_sample_uid = len(self.samples_df)
-    # Add required columns with minimal operations
-    columns_to_add = [
-        pl.lit(current_sample_uid).alias("sample_uid"),
-        pl.lit(False).alias("filled"),
-        pl.lit(-1.0).alias("chrom_area"),
-    ]
-    # Only add rt_original if it doesn't exist
-    if "rt_original" not in ddaobj.features_df.columns:
-        columns_to_add.append(pl.col("rt").alias("rt_original"))
-    f_df = ddaobj.features_df.with_columns(columns_to_add)
-    if self.features_df.is_empty():
-        # First sample
-        self.features_df = f_df.with_columns(
-            pl.int_range(pl.len()).add(1).alias("feature_uid"),
-        )
-    else:
-        # Subsequent samples - minimal overhead
-        offset = self.features_df["feature_uid"].max() + 1
-        f_df = f_df.with_columns(
-            pl.int_range(pl.len()).add(offset).alias("feature_uid"),
-        )
-        # Use diagonal concatenation for flexibility
-        self.features_df = pl.concat([self.features_df, f_df], how="diagonal")
-    self.logger.debug(
-        f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (standard)",
-    )
-    return True
-    ## COMMENT AR: Is this intentional?
-    # Use standard loading method that loads all data including ms1_df
-    ddaobj.load(file)
-    if ddaobj.features_df is None and not reset:
-        ddaobj._oms_features_map = None
-    if ddaobj._oms_features_map is None or reset:
-        ddaobj.find_features()
-        ddaobj.find_adducts(adducts=adducts)
-        ddaobj.find_ms2()
-    self.features_maps.append(ddaobj._oms_features_map)
-    # Determine sample type
-    sample_type = "sample" if type is None else type
-    if "qc" in sample_name.lower():
-        sample_type = "qc"
-    if "blank" in sample_name.lower():
-        sample_type = "blank"
-    map_id_value = len(self.features_maps) - 1
-    # Handle file paths
-    if file.endswith(".sample5"):
-        final_sample_path = file
-        # self.logger.trace(f"Using existing .sample5 file: {final_sample_path}")
-    else:
-        if self.folder is not None:
-            if not os.path.exists(self.folder):
-                os.makedirs(self.folder)
-            final_sample_path = os.path.join(self.folder, sample_name + ".sample5")
-        else:
-            final_sample_path = os.path.join(os.getcwd(), sample_name + ".sample5")
-        ddaobj.save(final_sample_path)
-        self.logger.debug(f"Saved converted sample: {final_sample_path}")
-    # Efficient scan counting
-    ms1_count = ms2_count = 0
-    if (
-        hasattr(ddaobj, "scans_df")
-        and ddaobj.scans_df is not None
-        and not ddaobj.scans_df.is_empty()
-    ):
-        scan_counts = (
-            ddaobj.scans_df.group_by("ms_level").len().to_dict(as_series=False)
-        )
-        ms_levels = scan_counts.get("ms_level", [])
-        counts = scan_counts.get("len", [])
-        for level, count in zip(ms_levels, counts):
-            if level == 1:
-                ms1_count = count
-            elif level == 2:
-                ms2_count = count
-    # Create sample entry
-    next_sequence = len(self.samples_df) + 1 if not self.samples_df.is_empty() else 1
-    new_sample = pl.DataFrame(
-        {
-            "sample_uid": [int(len(self.samples_df) + 1)],
-            "sample_name": [sample_name],
-            "sample_path": [final_sample_path],
-            "sample_type": [sample_type],
-            "map_id": [map_id_value],
-            "sample_source": [getattr(ddaobj, "file_source", file)],
-            "sample_color": [None],  # Will be set in batch at end
-            "sample_group": [""],
-            "sample_batch": [1],
-            "sample_sequence": [next_sequence],
-            "num_features": [int(ddaobj._oms_features_map.size())],
-            "num_ms1": [ms1_count],
-            "num_ms2": [ms2_count],
-        },
-    )
-    self.samples_df = pl.concat([self.samples_df, new_sample])
-    # SIMPLIFIED feature processing
-    current_sample_uid = len(self.samples_df)
-    # Add required columns with minimal operations
-    columns_to_add = [
-        pl.lit(current_sample_uid).alias("sample_uid"),
-        pl.lit(False).alias("filled"),
-        pl.lit(-1.0).alias("chrom_area"),
-    ]
-    # Only add rt_original if it doesn't exist
-    if "rt_original" not in ddaobj.features_df.columns:
-        columns_to_add.append(pl.col("rt").alias("rt_original"))
-    f_df = ddaobj.features_df.with_columns(columns_to_add)
-    if self.features_df.is_empty():
-        # First sample
-        self.features_df = f_df.with_columns(
-            pl.int_range(pl.len()).add(1).alias("feature_uid"),
-        )
-    else:
-        # Subsequent samples - minimal overhead
-        offset = self.features_df["feature_uid"].max() + 1
-        f_df = f_df.with_columns(
-            pl.int_range(pl.len()).add(offset).alias("feature_uid"),
-        )
-        # Use diagonal concatenation for flexibility
-        self.features_df = pl.concat([self.features_df, f_df], how="diagonal")
-    self.logger.debug(
-        f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (standard)",
-    )
-    return True
-'''
-'''def _sample_color_reset_optimized(self):
-    """
-    Optimized version of sample color reset using set_samples_color.
-    """
-    return self.set_samples_color(by=None)
-'''

masster 0.5.17__py3-none-any.whl → 0.5.18__py3-none-any.whl

Potentially problematic release.

masster 0.5.17py3-none-any.whl → 0.5.18py3-none-any.whl