masster 0.4.2__py3-none-any.whl → 0.4.3__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/lib/__init__.py +9 -0
- masster/lib/lib.py +598 -0
- masster/study/helpers.py +103 -8
- {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/METADATA +791 -789
- {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/RECORD +10 -20
- {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/WHEEL +2 -1
- masster-0.4.3.dist-info/top_level.txt +1 -0
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil2_01_20250602151849.sample5 +0 -0
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_QC_dil3_01_20250602150634.sample5 +0 -0
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5 +0 -0
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5 +0 -0
- masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5 +0 -0
- masster/data/libs/ccm.csv +0 -120
- masster/data/libs/urine.csv +0 -4693
- masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- masster/sample/sample5_schema.json +0 -196
- masster/study/study5_schema.json +0 -360
- {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/entry_points.txt +0 -0
- {masster-0.4.2.dist-info → masster-0.4.3.dist-info}/licenses/LICENSE +0 -0
masster/study/helpers.py
CHANGED
|
@@ -1599,11 +1599,12 @@ def features_select(
|
|
|
1599
1599
|
Returns:
|
|
1600
1600
|
polars.DataFrame: Filtered features DataFrame
|
|
1601
1601
|
"""
|
|
1602
|
+
# Consolidated optimized implementation (previously in helpers_optimized.py)
|
|
1602
1603
|
if self.features_df is None or self.features_df.is_empty():
|
|
1603
1604
|
self.logger.warning("No features found in study.")
|
|
1604
1605
|
return pl.DataFrame()
|
|
1605
1606
|
|
|
1606
|
-
# Early return if no filters provided
|
|
1607
|
+
# Early return if no filters provided
|
|
1607
1608
|
filter_params = [
|
|
1608
1609
|
mz,
|
|
1609
1610
|
rt,
|
|
@@ -1624,10 +1625,10 @@ def features_select(
|
|
|
1624
1625
|
|
|
1625
1626
|
initial_count = len(self.features_df)
|
|
1626
1627
|
|
|
1627
|
-
# Pre-check available columns once
|
|
1628
|
+
# Pre-check available columns once
|
|
1628
1629
|
available_columns = set(self.features_df.columns)
|
|
1629
1630
|
|
|
1630
|
-
# Build all filter conditions
|
|
1631
|
+
# Build all filter conditions
|
|
1631
1632
|
filter_conditions = []
|
|
1632
1633
|
warnings = []
|
|
1633
1634
|
|
|
@@ -1786,18 +1787,18 @@ def features_select(
|
|
|
1786
1787
|
else:
|
|
1787
1788
|
warnings.append("'chrom_height_scaled' column not found in features_df")
|
|
1788
1789
|
|
|
1789
|
-
# Log
|
|
1790
|
+
# Log warnings once at the end
|
|
1790
1791
|
for warning in warnings:
|
|
1791
1792
|
self.logger.warning(warning)
|
|
1792
1793
|
|
|
1793
|
-
# Apply all filters at once
|
|
1794
|
+
# Apply all filters at once if any exist
|
|
1794
1795
|
if filter_conditions:
|
|
1795
1796
|
# Combine all conditions with AND
|
|
1796
1797
|
combined_filter = filter_conditions[0]
|
|
1797
1798
|
for condition in filter_conditions[1:]:
|
|
1798
1799
|
combined_filter = combined_filter & condition
|
|
1799
1800
|
|
|
1800
|
-
# Apply the combined filter using lazy evaluation
|
|
1801
|
+
# Apply the combined filter using lazy evaluation for better performance
|
|
1801
1802
|
feats = self.features_df.lazy().filter(combined_filter).collect()
|
|
1802
1803
|
else:
|
|
1803
1804
|
feats = self.features_df.clone()
|
|
@@ -1807,12 +1808,106 @@ def features_select(
|
|
|
1807
1808
|
if final_count == 0:
|
|
1808
1809
|
self.logger.warning("No features remaining after applying selection criteria.")
|
|
1809
1810
|
else:
|
|
1810
|
-
|
|
1811
|
-
self.logger.info(f"Features selected: {final_count} (
|
|
1811
|
+
removed_count = initial_count - final_count
|
|
1812
|
+
self.logger.info(f"Features selected: {final_count} (removed: {removed_count})")
|
|
1812
1813
|
|
|
1813
1814
|
return feats
|
|
1814
1815
|
|
|
1815
1816
|
|
|
1817
|
+
def features_select_benchmarked(
|
|
1818
|
+
self,
|
|
1819
|
+
mz=None,
|
|
1820
|
+
rt=None,
|
|
1821
|
+
inty=None,
|
|
1822
|
+
sample_uid=None,
|
|
1823
|
+
sample_name=None,
|
|
1824
|
+
consensus_uid=None,
|
|
1825
|
+
feature_uid=None,
|
|
1826
|
+
filled=None,
|
|
1827
|
+
quality=None,
|
|
1828
|
+
chrom_coherence=None,
|
|
1829
|
+
chrom_prominence=None,
|
|
1830
|
+
chrom_prominence_scaled=None,
|
|
1831
|
+
chrom_height_scaled=None,
|
|
1832
|
+
):
|
|
1833
|
+
"""
|
|
1834
|
+
Benchmarked version that compares old vs new implementation performance.
|
|
1835
|
+
If an original implementation is available as `features_select_original` on the Study
|
|
1836
|
+
instance, it will be used for comparison; otherwise only the optimized run is timed.
|
|
1837
|
+
"""
|
|
1838
|
+
import time
|
|
1839
|
+
|
|
1840
|
+
original_time = None
|
|
1841
|
+
# If an original implementation was stored, call it for comparison
|
|
1842
|
+
original_impl = getattr(self, "features_select_original", None)
|
|
1843
|
+
if callable(original_impl):
|
|
1844
|
+
start_time = time.perf_counter()
|
|
1845
|
+
_ = original_impl(
|
|
1846
|
+
mz=mz,
|
|
1847
|
+
rt=rt,
|
|
1848
|
+
inty=inty,
|
|
1849
|
+
sample_uid=sample_uid,
|
|
1850
|
+
sample_name=sample_name,
|
|
1851
|
+
consensus_uid=consensus_uid,
|
|
1852
|
+
feature_uid=feature_uid,
|
|
1853
|
+
filled=filled,
|
|
1854
|
+
quality=quality,
|
|
1855
|
+
chrom_coherence=chrom_coherence,
|
|
1856
|
+
chrom_prominence=chrom_prominence,
|
|
1857
|
+
chrom_prominence_scaled=chrom_prominence_scaled,
|
|
1858
|
+
chrom_height_scaled=chrom_height_scaled,
|
|
1859
|
+
)
|
|
1860
|
+
original_time = time.perf_counter() - start_time
|
|
1861
|
+
|
|
1862
|
+
# Call the optimized method
|
|
1863
|
+
start_time = time.perf_counter()
|
|
1864
|
+
result_optimized = self.features_select(
|
|
1865
|
+
mz=mz,
|
|
1866
|
+
rt=rt,
|
|
1867
|
+
inty=inty,
|
|
1868
|
+
sample_uid=sample_uid,
|
|
1869
|
+
sample_name=sample_name,
|
|
1870
|
+
consensus_uid=consensus_uid,
|
|
1871
|
+
feature_uid=feature_uid,
|
|
1872
|
+
filled=filled,
|
|
1873
|
+
quality=quality,
|
|
1874
|
+
chrom_coherence=chrom_coherence,
|
|
1875
|
+
chrom_prominence=chrom_prominence,
|
|
1876
|
+
chrom_prominence_scaled=chrom_prominence_scaled,
|
|
1877
|
+
chrom_height_scaled=chrom_height_scaled,
|
|
1878
|
+
)
|
|
1879
|
+
optimized_time = time.perf_counter() - start_time
|
|
1880
|
+
|
|
1881
|
+
# Log performance comparison when possible
|
|
1882
|
+
if original_time is not None:
|
|
1883
|
+
speedup = original_time / optimized_time if optimized_time > 0 else float("inf")
|
|
1884
|
+
self.logger.info(
|
|
1885
|
+
f"Performance comparison - Original: {original_time:.4f}s, Optimized: {optimized_time:.4f}s, Speedup: {speedup:.2f}x",
|
|
1886
|
+
)
|
|
1887
|
+
else:
|
|
1888
|
+
self.logger.info(f"Optimized features_select executed in {optimized_time:.4f}s")
|
|
1889
|
+
|
|
1890
|
+
return result_optimized
|
|
1891
|
+
|
|
1892
|
+
|
|
1893
|
+
def monkey_patch_study():
|
|
1894
|
+
"""
|
|
1895
|
+
(Optional) Monkey-patch helper for Study. Stores the current Study.features_select
|
|
1896
|
+
as `features_select_original` if not already set, then replaces Study.features_select
|
|
1897
|
+
with the optimized `features_select` defined above. This function is idempotent.
|
|
1898
|
+
"""
|
|
1899
|
+
from masster.study.study import Study
|
|
1900
|
+
|
|
1901
|
+
# Only set original if it doesn't exist yet
|
|
1902
|
+
if not hasattr(Study, "features_select_original"):
|
|
1903
|
+
Study.features_select_original = Study.features_select
|
|
1904
|
+
|
|
1905
|
+
Study.features_select = features_select
|
|
1906
|
+
Study.features_select_benchmarked = features_select_benchmarked
|
|
1907
|
+
|
|
1908
|
+
print("Patched Study.features_select with consolidated optimized implementation")
|
|
1909
|
+
|
|
1910
|
+
|
|
1816
1911
|
def features_filter(self, features):
|
|
1817
1912
|
"""
|
|
1818
1913
|
Filter features_df by keeping only features that match the given criteria.
|