PyPI - masster - Versions diffs - 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl - Mend

masster 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (54) hide show

masster/__init__.py +8 -8
masster/_version.py +1 -1
masster/chromatogram.py +3 -9
masster/data/libs/README.md +1 -1
masster/data/libs/ccm.csv +120 -120
masster/data/libs/ccm.py +116 -62
masster/data/libs/central_carbon_README.md +1 -1
masster/data/libs/urine.py +161 -65
masster/data/libs/urine_metabolites.csv +4693 -4693
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +2 -2
masster/logger.py +43 -78
masster/sample/__init__.py +1 -1
masster/sample/adducts.py +264 -338
masster/sample/defaults/find_adducts_def.py +8 -21
masster/sample/defaults/find_features_def.py +1 -6
masster/sample/defaults/get_spectrum_def.py +1 -5
masster/sample/defaults/sample_def.py +1 -5
masster/sample/h5.py +282 -561
masster/sample/helpers.py +75 -131
masster/sample/lib.py +17 -42
masster/sample/load.py +17 -31
masster/sample/parameters.py +2 -6
masster/sample/plot.py +27 -88
masster/sample/processing.py +87 -117
masster/sample/quant.py +51 -57
masster/sample/sample.py +90 -103
masster/sample/sample5_schema.json +44 -44
masster/sample/save.py +12 -35
masster/sample/sciex.py +19 -66
masster/spectrum.py +20 -58
masster/study/__init__.py +1 -1
masster/study/defaults/align_def.py +1 -5
masster/study/defaults/fill_chrom_def.py +1 -5
masster/study/defaults/fill_def.py +1 -5
masster/study/defaults/integrate_chrom_def.py +1 -5
masster/study/defaults/integrate_def.py +1 -5
masster/study/defaults/study_def.py +25 -58
masster/study/export.py +207 -233
masster/study/h5.py +136 -470
masster/study/helpers.py +202 -495
masster/study/helpers_optimized.py +13 -40
masster/study/id.py +110 -213
masster/study/load.py +143 -230
masster/study/plot.py +257 -518
masster/study/processing.py +257 -469
masster/study/save.py +5 -15
masster/study/study.py +276 -379
masster/study/study5_schema.json +96 -96
{masster-0.4.0.dist-info → masster-0.4.1.dist-info}/METADATA +1 -1
masster-0.4.1.dist-info/RECORD +67 -0
masster-0.4.0.dist-info/RECORD +0 -67
{masster-0.4.0.dist-info → masster-0.4.1.dist-info}/WHEEL +0 -0
{masster-0.4.0.dist-info → masster-0.4.1.dist-info}/entry_points.txt +0 -0
{masster-0.4.0.dist-info → masster-0.4.1.dist-info}/licenses/LICENSE +0 -0

masster/study/load.py CHANGED Viewed

@@ -10,10 +10,10 @@ import pyopenms as oms
 from tqdm import tqdm
-from master.chromatogram import Chromatogram
-from master.study.defaults import fill_defaults
-from master.sample.sample import Sample
-from master.spectrum import Spectrum
+from masster.chromatogram import Chromatogram
+from masster.study.defaults import fill_defaults
+from masster.sample.sample import Sample
+from masster.spectrum import Spectrum
 # Pre-import heavy modules to avoid repeated loading in add_sample()
@@ -94,9 +94,7 @@ def add(
         if len(files) > 0:
             # Limit files if max_files is specified
-            remaining_slots = (
-                max_files - counter if max_files is not None else len(files)
-            )
+            remaining_slots = max_files - counter if max_files is not None else len(files)
             files = files[:remaining_slots]
             self.logger.debug(f"Found {len(files)} {ext} files")
@@ -119,9 +117,7 @@ def add(
             # Batch process all files of this extension using ultra-optimized method
             if files_to_process:
-                self.logger.debug(
-                    f"Batch processing {len(files_to_process)} {ext} files",
-                )
+                self.logger.debug(f"Batch processing {len(files_to_process)} {ext} files")
                 successful = self._add_samples_batch(
                     files_to_process,
                     reset=reset,
@@ -260,8 +256,7 @@ def _fill_chrom_single_impl(
     if min_number > 0:
         original_count = len(uids)
         uids = self.consensus_df.filter(
-            (pl.col("number_samples") >= min_number)
-            & (pl.col("consensus_uid").is_in(uids)),
+            (pl.col("number_samples") >= min_number) & (pl.col("consensus_uid").is_in(uids)),
         )["consensus_uid"].to_list()
         self.logger.debug(
             f"Features to fill: {original_count} -> {len(uids)}",
@@ -276,15 +271,13 @@ def _fill_chrom_single_impl(
     # Build lookup dictionaries
     self.logger.debug("Building lookup dictionaries...")
     consensus_info = {}
-    consensus_subset = self.consensus_df.select(
-        [
-            "consensus_uid",
-            "rt_start_mean",
-            "rt_end_mean",
-            "mz",
-            "rt",
-        ],
-    ).filter(pl.col("consensus_uid").is_in(uids))
+    consensus_subset = self.consensus_df.select([
+        "consensus_uid",
+        "rt_start_mean",
+        "rt_end_mean",
+        "mz",
+        "rt",
+    ]).filter(pl.col("consensus_uid").is_in(uids))
     for row in consensus_subset.iter_rows(named=True):
         consensus_info[row["consensus_uid"]] = {
@@ -451,13 +444,11 @@ def _fill_chrom_single_impl(
             }
             new_features.append(new_feature)
-            new_mapping.append(
-                {
-                    "consensus_uid": consensus_uid,
-                    "sample_uid": sample_uid,
-                    "feature_uid": feature_uid,
-                },
-            )
+            new_mapping.append({
+                "consensus_uid": consensus_uid,
+                "sample_uid": sample_uid,
+                "feature_uid": feature_uid,
+            })
             counter += 1
     # Add new features to DataFrames
@@ -480,10 +471,7 @@ def _fill_chrom_single_impl(
             for row in rows_to_add:
                 # Cast numeric columns to ensure consistency
                 for key, value in row.items():
-                    if (
-                        key in ["mz", "rt", "intensity", "area", "height"]
-                        and value is not None
-                    ):
+                    if key in ["mz", "rt", "intensity", "area", "height"] and value is not None:
                         row[key] = float(value)
                     elif key in ["sample_id", "feature_id"] and value is not None:
                         row[key] = int(value)
@@ -530,7 +518,7 @@ def fill_single(self, **kwargs):
         min_samples_abs: Absolute minimum sample threshold (default: 2)
     """
     # parameters initialization
-    from master.study.defaults import fill_defaults
+    from masster.study.defaults import fill_defaults
     params = fill_defaults()
@@ -702,13 +690,11 @@ def _process_sample_for_parallel_fill(
         }
         new_features.append(new_feature)
-        new_mapping.append(
-            {
-                "consensus_uid": consensus_uid,
-                "sample_uid": sample_uid,
-                "feature_uid": feature_uid,
-            },
-        )
+        new_mapping.append({
+            "consensus_uid": consensus_uid,
+            "sample_uid": sample_uid,
+            "feature_uid": feature_uid,
+        })
         counter += 1
     return new_features, new_mapping, counter
@@ -754,8 +740,7 @@ def _fill_chrom_impl(
     if min_number > 0:
         original_count = len(uids)
         uids = self.consensus_df.filter(
-            (pl.col("number_samples") >= min_number)
-            & (pl.col("consensus_uid").is_in(uids)),
+            (pl.col("number_samples") >= min_number) & (pl.col("consensus_uid").is_in(uids)),
         )["consensus_uid"].to_list()
         self.logger.debug(f"Features to fill: {original_count} -> {len(uids)}")
@@ -782,15 +767,13 @@ def _fill_chrom_impl(
     # Build lookup dictionaries
     self.logger.debug("Building lookup dictionaries...")
     consensus_info = {}
-    consensus_subset = self.consensus_df.select(
-        [
-            "consensus_uid",
-            "rt_start_mean",
-            "rt_end_mean",
-            "mz",
-            "rt",
-        ],
-    ).filter(pl.col("consensus_uid").is_in(uids))
+    consensus_subset = self.consensus_df.select([
+        "consensus_uid",
+        "rt_start_mean",
+        "rt_end_mean",
+        "mz",
+        "rt",
+    ]).filter(pl.col("consensus_uid").is_in(uids))
     for row in consensus_subset.iter_rows(named=True):
         consensus_info[row["consensus_uid"]] = {
@@ -807,13 +790,11 @@ def _fill_chrom_impl(
     for row in self.samples_df.filter(
         pl.col("sample_uid").is_in(unique_sample_uids),
     ).iter_rows(named=True):
-        samples_to_process.append(
-            {
-                "sample_name": row["sample_name"],
-                "sample_uid": row["sample_uid"],
-                "sample_path": row["sample_path"],
-            },
-        )
+        samples_to_process.append({
+            "sample_name": row["sample_name"],
+            "sample_uid": row["sample_uid"],
+            "sample_path": row["sample_path"],
+        })
     total_missing = len(missing_combinations_df)
     self.logger.debug(
@@ -821,9 +802,7 @@ def _fill_chrom_impl(
     )
     # Calculate current max feature_uid to avoid conflicts
-    features_df_max_uid = (
-        self.features_df["feature_uid"].max() if not self.features_df.is_empty() else 0
-    )
+    features_df_max_uid = self.features_df["feature_uid"].max() if not self.features_df.is_empty() else 0
     # Process samples in parallel
     all_new_features: list[dict] = []
@@ -897,10 +876,7 @@ def _fill_chrom_impl(
             for row in rows_to_add:
                 # Cast numeric columns to ensure consistency
                 for key, value in row.items():
-                    if (
-                        key in ["mz", "rt", "intensity", "area", "height"]
-                        and value is not None
-                    ):
+                    if key in ["mz", "rt", "intensity", "area", "height"] and value is not None:
                         row[key] = float(value)
                     elif key in ["sample_id", "feature_id"] and value is not None:
                         row[key] = int(value)
@@ -949,10 +925,7 @@ def fill(self, **kwargs):
     """
     # parameters initialization
     params = fill_defaults()
-    num_workers = kwargs.get(
-        "num_workers",
-        4,
-    )  # Default parameter not in defaults class
+    num_workers = kwargs.get("num_workers", 4)  # Default parameter not in defaults class
     for key, value in kwargs.items():
         if isinstance(value, fill_defaults):
@@ -1015,9 +988,7 @@ def _get_missing_consensus_sample_combinations(self, uids):
         .agg(pl.count("feature_uid").alias("count"))
     )
-    total_existing = (
-        consensus_counts["count"].sum() if not consensus_counts.is_empty() else 0
-    )
+    total_existing = consensus_counts["count"].sum() if not consensus_counts.is_empty() else 0
     # If >95% filled, likely no gaps (common case)
     if total_existing >= total_possible * 0.95:
@@ -1036,12 +1007,10 @@ def _get_missing_consensus_sample_combinations(self, uids):
         # Get existing combinations for target UIDs only
         existing_by_consensus = {}
-        for consensus_uid, feature_uid in self.consensus_mapping_df.select(
-            [
-                "consensus_uid",
-                "feature_uid",
-            ],
-        ).iter_rows():
+        for consensus_uid, feature_uid in self.consensus_mapping_df.select([
+            "consensus_uid",
+            "feature_uid",
+        ]).iter_rows():
             if consensus_uid in uids_set and feature_uid in feature_to_sample:
                 if consensus_uid not in existing_by_consensus:
                     existing_by_consensus[consensus_uid] = set()
@@ -1049,9 +1018,7 @@ def _get_missing_consensus_sample_combinations(self, uids):
         # Get sample info once
         all_samples = list(
-            self.samples_df.select(
-                ["sample_uid", "sample_name", "sample_path"],
-            ).iter_rows(),
+            self.samples_df.select(["sample_uid", "sample_name", "sample_path"]).iter_rows(),
         )
         # Check for missing combinations
@@ -1059,17 +1026,13 @@ def _get_missing_consensus_sample_combinations(self, uids):
             existing_samples = existing_by_consensus.get(consensus_uid, set())
             for sample_uid, sample_name, sample_path in all_samples:
                 if sample_uid not in existing_samples:
-                    missing_combinations.append(
-                        (consensus_uid, sample_uid, sample_name, sample_path),
-                    )
+                    missing_combinations.append((consensus_uid, sample_uid, sample_name, sample_path))
         return missing_combinations
     else:
         # For studies with many gaps, use bulk operations
-        self.logger.debug(
-            f"Study {total_existing / total_possible * 100:.1f}% filled, using bulk optimization",
-        )
+        self.logger.debug(f"Study {total_existing / total_possible * 100:.1f}% filled, using bulk optimization")
         # Build efficient lookups
         uids_set = set(uids)
@@ -1080,20 +1043,16 @@ def _get_missing_consensus_sample_combinations(self, uids):
         # Build existing combinations set
         existing_combinations = {
             (consensus_uid, feature_to_sample[feature_uid])
-            for consensus_uid, feature_uid in self.consensus_mapping_df.select(
-                [
-                    "consensus_uid",
-                    "feature_uid",
-                ],
-            ).iter_rows()
+            for consensus_uid, feature_uid in self.consensus_mapping_df.select([
+                "consensus_uid",
+                "feature_uid",
+            ]).iter_rows()
             if consensus_uid in uids_set and feature_uid in feature_to_sample
         }
         # Get all sample info
         all_samples = list(
-            self.samples_df.select(
-                ["sample_uid", "sample_name", "sample_path"],
-            ).iter_rows(),
+            self.samples_df.select(["sample_uid", "sample_name", "sample_path"]).iter_rows(),
         )
         # Generate all missing combinations
@@ -1162,10 +1121,7 @@ def sanitize(self):
                 for ms2_specs in row_data["ms2_specs"]:
                     if not isinstance(ms2_specs, Spectrum):
                         try:
-                            new_ms2_specs = Spectrum(
-                                mz=np.array([0]),
-                                inty=np.array([0]),
-                            )
+                            new_ms2_specs = Spectrum(mz=np.array([0]), inty=np.array([0]))
                             if hasattr(ms2_specs, "__dict__"):
                                 new_ms2_specs.from_dict(ms2_specs.__dict__)
                             else:
@@ -1204,8 +1160,8 @@ def sanitize(self):
 def load_features(self):
     """
     Load features by reconstructing FeatureMaps from the processed features_df data.
-    This ensures that the loaded FeatureMaps contain the same processed features
+    This ensures that the loaded FeatureMaps contain the same processed features
     as stored in features_df, rather than loading raw features from .featureXML files
     which may not match the processed data after filtering, alignment, etc.
     """
@@ -1213,25 +1169,25 @@ def load_features(self):
     import pyopenms as oms
     from tqdm import tqdm
     from datetime import datetime
     self.features_maps = []
     # Check if features_df exists and is not empty
     if self.features_df is None:
         self.logger.warning("features_df is None. Falling back to XML loading.")
         self._load_features_from_xml()
         return
     if len(self.features_df) == 0:
         self.logger.warning("features_df is empty. Falling back to XML loading.")
         self._load_features_from_xml()
         return
     # If we get here, we should use the new method
     self.logger.debug("Reconstructing FeatureMaps from features_df.")
     tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
     # Process each sample in order
     for sample_index, row_dict in tqdm(
         enumerate(self.samples_df.iter_rows(named=True)),
@@ -1239,39 +1195,37 @@ def load_features(self):
         desc=f"{datetime.now().strftime('%Y-%m-%d %H:%M:%S.%f')[:-3]} | INFO     | {self.log_label}Reconstruct FeatureMaps from DataFrame",
         disable=tdqm_disable,
     ):
-        sample_uid = row_dict["sample_uid"]
-        sample_name = row_dict["sample_name"]
+        sample_uid = row_dict['sample_uid']
+        sample_name = row_dict['sample_name']
         # Get features for this sample from features_df
-        sample_features = self.features_df.filter(pl.col("sample_uid") == sample_uid)
+        sample_features = self.features_df.filter(pl.col('sample_uid') == sample_uid)
         # Create new FeatureMap
         feature_map = oms.FeatureMap()
         # Convert DataFrame features to OpenMS Features
         for feature_row in sample_features.iter_rows(named=True):
             feature = oms.Feature()
             # Set properties from DataFrame (handle missing values gracefully)
             try:
-                feature.setUniqueId(int(feature_row["feature_id"]))
-                feature.setMZ(float(feature_row["mz"]))
-                feature.setRT(float(feature_row["rt"]))
-                feature.setIntensity(float(feature_row["inty"]))
-                feature.setOverallQuality(float(feature_row["quality"]))
-                feature.setCharge(int(feature_row["charge"]))
+                feature.setUniqueId(int(feature_row['feature_id']))
+                feature.setMZ(float(feature_row['mz']))
+                feature.setRT(float(feature_row['rt']))
+                feature.setIntensity(float(feature_row['inty']))
+                feature.setOverallQuality(float(feature_row['quality']))
+                feature.setCharge(int(feature_row['charge']))
                 # Add to feature map
                 feature_map.push_back(feature)
             except (ValueError, TypeError) as e:
                 self.logger.warning(f"Skipping feature due to conversion error: {e}")
                 continue
         self.features_maps.append(feature_map)
-    self.logger.debug(
-        f"Successfully reconstructed {len(self.features_maps)} FeatureMaps from features_df.",
-    )
+    self.logger.debug(f"Successfully reconstructed {len(self.features_maps)} FeatureMaps from features_df.")
 def _load_features_from_xml(self):
@@ -1326,14 +1280,7 @@ def _load_consensusXML(self, filename="alignment.consensusXML"):
     self.logger.debug(f"Loaded consensus map from {filename}.")
-def _add_samples_batch(
-    self,
-    files,
-    reset=False,
-    adducts=None,
-    blacklist=None,
-    fast=True,
-):
+def _add_samples_batch(self, files, reset=False, adducts=None, blacklist=None, fast=True):
     """
     Optimized batch addition of samples.
@@ -1356,9 +1303,7 @@ def _add_samples_batch(
     if blacklist is None:
         blacklist = set()
-    self.logger.debug(
-        f"Starting batch addition of {len(files)} samples (skip_ms1={fast})...",
-    )
+    self.logger.debug(f"Starting batch addition of {len(files)} samples (skip_ms1={fast})...")
     successful_additions = 0
     failed_additions = 0
@@ -1415,9 +1360,7 @@ def _add_samples_batch(
         # Color assignment done once for all samples
         self._sample_color_reset_optimized()
-        self.logger.debug(
-            f"Add samples complete: {successful_additions} successful, {failed_additions} failed",
-        )
+        self.logger.debug(f"Add samples complete: {successful_additions} successful, {failed_additions} failed")
     return successful_additions
@@ -1463,7 +1406,7 @@ def _add_sample_optimized(
     # Load sample
     ddaobj = Sample()
     ddaobj.logger_update(level="WARNING", label=os.path.basename(file))
     # Try optimized loading first (study-specific, skips ms1_df for better performance)
     if file.endswith(".sample5"):
@@ -1499,7 +1442,7 @@ def _add_sample_optimized(
     # Handle file paths
     if file.endswith(".sample5"):
         final_sample_path = file
-        # self.logger.debug(f"Using existing .sample5 file: {final_sample_path}")
+        #self.logger.debug(f"Using existing .sample5 file: {final_sample_path}")
     else:
         if self.folder is not None:
             if not os.path.exists(self.folder):
@@ -1512,14 +1455,8 @@ def _add_sample_optimized(
     # Efficient scan counting
     ms1_count = ms2_count = 0
-    if (
-        hasattr(ddaobj, "scans_df")
-        and ddaobj.scans_df is not None
-        and not ddaobj.scans_df.is_empty()
-    ):
-        scan_counts = (
-            ddaobj.scans_df.group_by("ms_level").len().to_dict(as_series=False)
-        )
+    if hasattr(ddaobj, "scans_df") and ddaobj.scans_df is not None and not ddaobj.scans_df.is_empty():
+        scan_counts = ddaobj.scans_df.group_by("ms_level").len().to_dict(as_series=False)
         ms_levels = scan_counts.get("ms_level", [])
         counts = scan_counts.get("len", [])
         for level, count in zip(ms_levels, counts):
@@ -1530,23 +1467,21 @@ def _add_sample_optimized(
     # Create sample entry
     next_sequence = len(self.samples_df) + 1 if not self.samples_df.is_empty() else 1
-    new_sample = pl.DataFrame(
-        {
-            "sample_uid": [int(len(self.samples_df) + 1)],
-            "sample_name": [sample_name],
-            "sample_path": [final_sample_path],
-            "sample_type": [sample_type],
-            "map_id": [map_id_value],
-            "sample_source": [getattr(ddaobj, "file_source", file)],
-            "sample_color": [None],  # Will be set in batch at end
-            "sample_group": [""],
-            "sample_batch": [1],
-            "sample_sequence": [next_sequence],
-            "num_features": [int(ddaobj._oms_features_map.size())],
-            "num_ms1": [ms1_count],
-            "num_ms2": [ms2_count],
-        },
-    )
+    new_sample = pl.DataFrame({
+        "sample_uid": [int(len(self.samples_df) + 1)],
+        "sample_name": [sample_name],
+        "sample_path": [final_sample_path],
+        "sample_type": [sample_type],
+        "map_id": [map_id_value],
+        "sample_source": [getattr(ddaobj, "file_source", file)],
+        "sample_color": [None],  # Will be set in batch at end
+        "sample_group": [""],
+        "sample_batch": [1],
+        "sample_sequence": [next_sequence],
+        "num_features": [int(ddaobj._oms_features_map.size())],
+        "num_ms1": [ms1_count],
+        "num_ms2": [ms2_count],
+    })
     self.samples_df = pl.concat([self.samples_df, new_sample])
@@ -1588,9 +1523,7 @@ def _add_sample_optimized(
     # - No type casting loops
     # - No sample_color_reset()
-    self.logger.debug(
-        f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (optimized)",
-    )
+    self.logger.debug(f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (optimized)")
     return True
@@ -1634,7 +1567,7 @@ def _add_sample_standard(
     ddaobj = Sample()
     ddaobj.logger_update(level="WARNING", label=os.path.basename(file))
     # Use standard loading method that loads all data including ms1_df
     if file.endswith(".sample5"):
         ddaobj.load(file)
         # restore _oms_features_map
@@ -1668,7 +1601,7 @@ def _add_sample_standard(
     # Handle file paths
     if file.endswith(".sample5"):
         final_sample_path = file
-        # self.logger.trace(f"Using existing .sample5 file: {final_sample_path}")
+        #self.logger.trace(f"Using existing .sample5 file: {final_sample_path}")
     else:
         if self.folder is not None:
             if not os.path.exists(self.folder):
@@ -1681,14 +1614,8 @@ def _add_sample_standard(
     # Efficient scan counting
     ms1_count = ms2_count = 0
-    if (
-        hasattr(ddaobj, "scans_df")
-        and ddaobj.scans_df is not None
-        and not ddaobj.scans_df.is_empty()
-    ):
-        scan_counts = (
-            ddaobj.scans_df.group_by("ms_level").len().to_dict(as_series=False)
-        )
+    if hasattr(ddaobj, "scans_df") and ddaobj.scans_df is not None and not ddaobj.scans_df.is_empty():
+        scan_counts = ddaobj.scans_df.group_by("ms_level").len().to_dict(as_series=False)
         ms_levels = scan_counts.get("ms_level", [])
         counts = scan_counts.get("len", [])
         for level, count in zip(ms_levels, counts):
@@ -1699,23 +1626,21 @@ def _add_sample_standard(
     # Create sample entry
     next_sequence = len(self.samples_df) + 1 if not self.samples_df.is_empty() else 1
-    new_sample = pl.DataFrame(
-        {
-            "sample_uid": [int(len(self.samples_df) + 1)],
-            "sample_name": [sample_name],
-            "sample_path": [final_sample_path],
-            "sample_type": [sample_type],
-            "map_id": [map_id_value],
-            "sample_source": [getattr(ddaobj, "file_source", file)],
-            "sample_color": [None],  # Will be set in batch at end
-            "sample_group": [""],
-            "sample_batch": [1],
-            "sample_sequence": [next_sequence],
-            "num_features": [int(ddaobj._oms_features_map.size())],
-            "num_ms1": [ms1_count],
-            "num_ms2": [ms2_count],
-        },
-    )
+    new_sample = pl.DataFrame({
+        "sample_uid": [int(len(self.samples_df) + 1)],
+        "sample_name": [sample_name],
+        "sample_path": [final_sample_path],
+        "sample_type": [sample_type],
+        "map_id": [map_id_value],
+        "sample_source": [getattr(ddaobj, "file_source", file)],
+        "sample_color": [None],  # Will be set in batch at end
+        "sample_group": [""],
+        "sample_batch": [1],
+        "sample_sequence": [next_sequence],
+        "num_features": [int(ddaobj._oms_features_map.size())],
+        "num_ms1": [ms1_count],
+        "num_ms2": [ms2_count],
+    })
     self.samples_df = pl.concat([self.samples_df, new_sample])
@@ -1750,9 +1675,7 @@ def _add_sample_standard(
         # Use diagonal concatenation for flexibility
         self.features_df = pl.concat([self.features_df, f_df], how="diagonal")
-    self.logger.debug(
-        f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (standard)",
-    )
+    self.logger.debug(f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (standard)")
     return True
     ## COMMENT AR: Is this intentional?
     # Use standard loading method that loads all data including ms1_df
@@ -1780,7 +1703,7 @@ def _add_sample_standard(
     # Handle file paths
     if file.endswith(".sample5"):
         final_sample_path = file
-        # self.logger.trace(f"Using existing .sample5 file: {final_sample_path}")
+        #self.logger.trace(f"Using existing .sample5 file: {final_sample_path}")
     else:
         if self.folder is not None:
             if not os.path.exists(self.folder):
@@ -1793,14 +1716,8 @@ def _add_sample_standard(
     # Efficient scan counting
     ms1_count = ms2_count = 0
-    if (
-        hasattr(ddaobj, "scans_df")
-        and ddaobj.scans_df is not None
-        and not ddaobj.scans_df.is_empty()
-    ):
-        scan_counts = (
-            ddaobj.scans_df.group_by("ms_level").len().to_dict(as_series=False)
-        )
+    if hasattr(ddaobj, "scans_df") and ddaobj.scans_df is not None and not ddaobj.scans_df.is_empty():
+        scan_counts = ddaobj.scans_df.group_by("ms_level").len().to_dict(as_series=False)
         ms_levels = scan_counts.get("ms_level", [])
         counts = scan_counts.get("len", [])
         for level, count in zip(ms_levels, counts):
@@ -1811,23 +1728,21 @@ def _add_sample_standard(
     # Create sample entry
     next_sequence = len(self.samples_df) + 1 if not self.samples_df.is_empty() else 1
-    new_sample = pl.DataFrame(
-        {
-            "sample_uid": [int(len(self.samples_df) + 1)],
-            "sample_name": [sample_name],
-            "sample_path": [final_sample_path],
-            "sample_type": [sample_type],
-            "map_id": [map_id_value],
-            "sample_source": [getattr(ddaobj, "file_source", file)],
-            "sample_color": [None],  # Will be set in batch at end
-            "sample_group": [""],
-            "sample_batch": [1],
-            "sample_sequence": [next_sequence],
-            "num_features": [int(ddaobj._oms_features_map.size())],
-            "num_ms1": [ms1_count],
-            "num_ms2": [ms2_count],
-        },
-    )
+    new_sample = pl.DataFrame({
+        "sample_uid": [int(len(self.samples_df) + 1)],
+        "sample_name": [sample_name],
+        "sample_path": [final_sample_path],
+        "sample_type": [sample_type],
+        "map_id": [map_id_value],
+        "sample_source": [getattr(ddaobj, "file_source", file)],
+        "sample_color": [None],  # Will be set in batch at end
+        "sample_group": [""],
+        "sample_batch": [1],
+        "sample_sequence": [next_sequence],
+        "num_features": [int(ddaobj._oms_features_map.size())],
+        "num_ms1": [ms1_count],
+        "num_ms2": [ms2_count],
+    })
     self.samples_df = pl.concat([self.samples_df, new_sample])
@@ -1862,9 +1777,7 @@ def _add_sample_standard(
         # Use diagonal concatenation for flexibility
         self.features_df = pl.concat([self.features_df, f_df], how="diagonal")
-    self.logger.debug(
-        f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (standard)",
-    )
+    self.logger.debug(f"Added sample {sample_name} with {ddaobj._oms_features_map.size()} features (standard)")
     return True

masster 0.4.0__py3-none-any.whl → 0.4.1__py3-none-any.whl

masster 0.4.0py3-none-any.whl → 0.4.1py3-none-any.whl