masster 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

Files changed (23) hide show
  1. masster/_version.py +1 -1
  2. masster/lib/__init__.py +9 -0
  3. masster/lib/lib.py +598 -0
  4. masster/study/helpers.py +103 -8
  5. {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/METADATA +791 -789
  6. {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/RECORD +10 -20
  7. {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/WHEEL +2 -1
  8. masster-0.4.3.dist-info/top_level.txt +1 -0
  9. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
  10. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
  11. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
  12. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
  13. masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
  14. masster/data/libs/ccm.csv +0 -120
  15. masster/data/libs/urine.csv +0 -4693
  16. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
  17. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
  18. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
  19. masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
  20. masster/sample/sample5_schema.json +0 -196
  21. masster/study/study5_schema.json +0 -360
  22. {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/entry_points.txt +0 -0
  23. {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/licenses/LICENSE +0 -0
masster/study/helpers.py CHANGED
@@ -1599,11 +1599,12 @@ def features_select(
1599
1599
  Returns:
1600
1600
  polars.DataFrame: Filtered features DataFrame
1601
1601
  """
1602
+ # Consolidated optimized implementation (previously in helpers_optimized.py)
1602
1603
  if self.features_df is None or self.features_df.is_empty():
1603
1604
  self.logger.warning("No features found in study.")
1604
1605
  return pl.DataFrame()
1605
1606
 
1606
- # Early return if no filters provided - performance optimization
1607
+ # Early return if no filters provided
1607
1608
  filter_params = [
1608
1609
  mz,
1609
1610
  rt,
@@ -1624,10 +1625,10 @@ def features_select(
1624
1625
 
1625
1626
  initial_count = len(self.features_df)
1626
1627
 
1627
- # Pre-check available columns once for efficiency
1628
+ # Pre-check available columns once
1628
1629
  available_columns = set(self.features_df.columns)
1629
1630
 
1630
- # Build all filter conditions first, then apply them all at once
1631
+ # Build all filter conditions
1631
1632
  filter_conditions = []
1632
1633
  warnings = []
1633
1634
 
@@ -1786,18 +1787,18 @@ def features_select(
1786
1787
  else:
1787
1788
  warnings.append("'chrom_height_scaled' column not found in features_df")
1788
1789
 
1789
- # Log all warnings once at the end for efficiency
1790
+ # Log warnings once at the end
1790
1791
  for warning in warnings:
1791
1792
  self.logger.warning(warning)
1792
1793
 
1793
- # Apply all filters at once using lazy evaluation for optimal performance
1794
+ # Apply all filters at once if any exist
1794
1795
  if filter_conditions:
1795
1796
  # Combine all conditions with AND
1796
1797
  combined_filter = filter_conditions[0]
1797
1798
  for condition in filter_conditions[1:]:
1798
1799
  combined_filter = combined_filter & condition
1799
1800
 
1800
- # Apply the combined filter using lazy evaluation
1801
+ # Apply the combined filter using lazy evaluation for better performance
1801
1802
  feats = self.features_df.lazy().filter(combined_filter).collect()
1802
1803
  else:
1803
1804
  feats = self.features_df.clone()
@@ -1807,12 +1808,106 @@ def features_select(
1807
1808
  if final_count == 0:
1808
1809
  self.logger.warning("No features remaining after applying selection criteria.")
1809
1810
  else:
1810
- # removed_count = initial_count - final_count
1811
- self.logger.info(f"Features selected: {final_count} (out of {initial_count})")
1811
+ removed_count = initial_count - final_count
1812
+ self.logger.info(f"Features selected: {final_count} (removed: {removed_count})")
1812
1813
 
1813
1814
  return feats
1814
1815
 
1815
1816
 
1817
+ def features_select_benchmarked(
1818
+ self,
1819
+ mz=None,
1820
+ rt=None,
1821
+ inty=None,
1822
+ sample_uid=None,
1823
+ sample_name=None,
1824
+ consensus_uid=None,
1825
+ feature_uid=None,
1826
+ filled=None,
1827
+ quality=None,
1828
+ chrom_coherence=None,
1829
+ chrom_prominence=None,
1830
+ chrom_prominence_scaled=None,
1831
+ chrom_height_scaled=None,
1832
+ ):
1833
+ """
1834
+ Benchmarked version that compares old vs new implementation performance.
1835
+ If an original implementation is available as `features_select_original` on the Study
1836
+ instance, it will be used for comparison; otherwise only the optimized run is timed.
1837
+ """
1838
+ import time
1839
+
1840
+ original_time = None
1841
+ # If an original implementation was stored, call it for comparison
1842
+ original_impl = getattr(self, "features_select_original", None)
1843
+ if callable(original_impl):
1844
+ start_time = time.perf_counter()
1845
+ _ = original_impl(
1846
+ mz=mz,
1847
+ rt=rt,
1848
+ inty=inty,
1849
+ sample_uid=sample_uid,
1850
+ sample_name=sample_name,
1851
+ consensus_uid=consensus_uid,
1852
+ feature_uid=feature_uid,
1853
+ filled=filled,
1854
+ quality=quality,
1855
+ chrom_coherence=chrom_coherence,
1856
+ chrom_prominence=chrom_prominence,
1857
+ chrom_prominence_scaled=chrom_prominence_scaled,
1858
+ chrom_height_scaled=chrom_height_scaled,
1859
+ )
1860
+ original_time = time.perf_counter() - start_time
1861
+
1862
+ # Call the optimized method
1863
+ start_time = time.perf_counter()
1864
+ result_optimized = self.features_select(
1865
+ mz=mz,
1866
+ rt=rt,
1867
+ inty=inty,
1868
+ sample_uid=sample_uid,
1869
+ sample_name=sample_name,
1870
+ consensus_uid=consensus_uid,
1871
+ feature_uid=feature_uid,
1872
+ filled=filled,
1873
+ quality=quality,
1874
+ chrom_coherence=chrom_coherence,
1875
+ chrom_prominence=chrom_prominence,
1876
+ chrom_prominence_scaled=chrom_prominence_scaled,
1877
+ chrom_height_scaled=chrom_height_scaled,
1878
+ )
1879
+ optimized_time = time.perf_counter() - start_time
1880
+
1881
+ # Log performance comparison when possible
1882
+ if original_time is not None:
1883
+ speedup = original_time / optimized_time if optimized_time > 0 else float("inf")
1884
+ self.logger.info(
1885
+ f"Performance comparison - Original: {original_time:.4f}s, Optimized: {optimized_time:.4f}s, Speedup: {speedup:.2f}x",
1886
+ )
1887
+ else:
1888
+ self.logger.info(f"Optimized features_select executed in {optimized_time:.4f}s")
1889
+
1890
+ return result_optimized
1891
+
1892
+
1893
+ def monkey_patch_study():
1894
+ """
1895
+ (Optional) Monkey-patch helper for Study. Stores the current Study.features_select
1896
+ as `features_select_original` if not already set, then replaces Study.features_select
1897
+ with the optimized `features_select` defined above. This function is idempotent.
1898
+ """
1899
+ from masster.study.study import Study
1900
+
1901
+ # Only set original if it doesn't exist yet
1902
+ if not hasattr(Study, "features_select_original"):
1903
+ Study.features_select_original = Study.features_select
1904
+
1905
+ Study.features_select = features_select
1906
+ Study.features_select_benchmarked = features_select_benchmarked
1907
+
1908
+ print("Patched Study.features_select with consolidated optimized implementation")
1909
+
1910
+
1816
1911
  def features_filter(self, features):
1817
1912
  """
1818
1913
  Filter features_df by keeping only features that match the given criteria.