PyPI - masster - Versions diffs - 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl - Mend - Supply Chain Defender

masster 0.4.2py3-none-any.whl → 0.4.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (23) hide show

masster/study/helpers.py CHANGED Viewed

@@ -1599,11 +1599,12 @@ def features_select(
     Returns:
         polars.DataFrame: Filtered features DataFrame
     """
+    # Consolidated optimized implementation (previously in helpers_optimized.py)
     if self.features_df is None or self.features_df.is_empty():
         self.logger.warning("No features found in study.")
         return pl.DataFrame()
-    # Early return if no filters provided - performance optimization
+    # Early return if no filters provided
     filter_params = [
         mz,
         rt,
@@ -1624,10 +1625,10 @@ def features_select(
     initial_count = len(self.features_df)
-    # Pre-check available columns once for efficiency
+    # Pre-check available columns once
     available_columns = set(self.features_df.columns)
-    # Build all filter conditions first, then apply them all at once
+    # Build all filter conditions
     filter_conditions = []
     warnings = []
@@ -1786,18 +1787,18 @@ def features_select(
         else:
             warnings.append("'chrom_height_scaled' column not found in features_df")
-    # Log all warnings once at the end for efficiency
+    # Log warnings once at the end
     for warning in warnings:
         self.logger.warning(warning)
-    # Apply all filters at once using lazy evaluation for optimal performance
+    # Apply all filters at once if any exist
     if filter_conditions:
         # Combine all conditions with AND
         combined_filter = filter_conditions[0]
         for condition in filter_conditions[1:]:
             combined_filter = combined_filter & condition
-        # Apply the combined filter using lazy evaluation
+        # Apply the combined filter using lazy evaluation for better performance
         feats = self.features_df.lazy().filter(combined_filter).collect()
     else:
         feats = self.features_df.clone()
@@ -1807,12 +1808,106 @@ def features_select(
     if final_count == 0:
         self.logger.warning("No features remaining after applying selection criteria.")
     else:
-        # removed_count = initial_count - final_count
-        self.logger.info(f"Features selected: {final_count} (out of {initial_count})")
+        removed_count = initial_count - final_count
+        self.logger.info(f"Features selected: {final_count} (removed: {removed_count})")
     return feats
+def features_select_benchmarked(
+    self,
+    mz=None,
+    rt=None,
+    inty=None,
+    sample_uid=None,
+    sample_name=None,
+    consensus_uid=None,
+    feature_uid=None,
+    filled=None,
+    quality=None,
+    chrom_coherence=None,
+    chrom_prominence=None,
+    chrom_prominence_scaled=None,
+    chrom_height_scaled=None,
+):
+    """
+    Benchmarked version that compares old vs new implementation performance.
+    If an original implementation is available as `features_select_original` on the Study
+    instance, it will be used for comparison; otherwise only the optimized run is timed.
+    """
+    import time
+    original_time = None
+    # If an original implementation was stored, call it for comparison
+    original_impl = getattr(self, "features_select_original", None)
+    if callable(original_impl):
+        start_time = time.perf_counter()
+        _ = original_impl(
+            mz=mz,
+            rt=rt,
+            inty=inty,
+            sample_uid=sample_uid,
+            sample_name=sample_name,
+            consensus_uid=consensus_uid,
+            feature_uid=feature_uid,
+            filled=filled,
+            quality=quality,
+            chrom_coherence=chrom_coherence,
+            chrom_prominence=chrom_prominence,
+            chrom_prominence_scaled=chrom_prominence_scaled,
+            chrom_height_scaled=chrom_height_scaled,
+        )
+        original_time = time.perf_counter() - start_time
+    # Call the optimized method
+    start_time = time.perf_counter()
+    result_optimized = self.features_select(
+        mz=mz,
+        rt=rt,
+        inty=inty,
+        sample_uid=sample_uid,
+        sample_name=sample_name,
+        consensus_uid=consensus_uid,
+        feature_uid=feature_uid,
+        filled=filled,
+        quality=quality,
+        chrom_coherence=chrom_coherence,
+        chrom_prominence=chrom_prominence,
+        chrom_prominence_scaled=chrom_prominence_scaled,
+        chrom_height_scaled=chrom_height_scaled,
+    )
+    optimized_time = time.perf_counter() - start_time
+    # Log performance comparison when possible
+    if original_time is not None:
+        speedup = original_time / optimized_time if optimized_time > 0 else float("inf")
+        self.logger.info(
+            f"Performance comparison - Original: {original_time:.4f}s, Optimized: {optimized_time:.4f}s, Speedup: {speedup:.2f}x",
+        )
+    else:
+        self.logger.info(f"Optimized features_select executed in {optimized_time:.4f}s")
+    return result_optimized
+def monkey_patch_study():
+    """
+    (Optional) Monkey-patch helper for Study. Stores the current Study.features_select
+    as `features_select_original` if not already set, then replaces Study.features_select
+    with the optimized `features_select` defined above. This function is idempotent.
+    """
+    from masster.study.study import Study
+    # Only set original if it doesn't exist yet
+    if not hasattr(Study, "features_select_original"):
+        Study.features_select_original = Study.features_select
+    Study.features_select = features_select
+    Study.features_select_benchmarked = features_select_benchmarked
+    print("Patched Study.features_select with consolidated optimized implementation")
 def features_filter(self, features):
     """
     Filter features_df by keeping only features that match the given criteria.