masster 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/__init__.py +6 -1
- masster/_version.py +1 -1
- masster/logger.py +42 -0
- masster/sample/h5.py +58 -1
- masster/sample/load.py +12 -5
- masster/sample/plot.py +56 -65
- masster/sample/processing.py +158 -0
- masster/sample/sample.py +2 -9
- masster/sample/sample5_schema.json +3 -0
- masster/sample/save.py +137 -59
- masster/spectrum.py +58 -9
- masster/study/export.py +238 -152
- masster/study/h5.py +65 -1
- masster/study/helpers.py +55 -14
- masster/study/merge.py +910 -67
- masster/study/plot.py +50 -7
- masster/study/processing.py +257 -1
- masster/study/save.py +48 -5
- masster/study/study.py +34 -3
- masster/study/study5_schema.json +3 -0
- masster/wizard/__init__.py +8 -2
- masster/wizard/wizard.py +612 -876
- {masster-0.4.19.dist-info → masster-0.4.21.dist-info}/METADATA +1 -1
- {masster-0.4.19.dist-info → masster-0.4.21.dist-info}/RECORD +27 -30
- masster/wizard/test_structure.py +0 -49
- masster/wizard/test_wizard.py +0 -285
- masster/wizard.py +0 -1175
- {masster-0.4.19.dist-info → masster-0.4.21.dist-info}/WHEEL +0 -0
- {masster-0.4.19.dist-info → masster-0.4.21.dist-info}/entry_points.txt +0 -0
- {masster-0.4.19.dist-info → masster-0.4.21.dist-info}/licenses/LICENSE +0 -0
masster/study/helpers.py
CHANGED
|
@@ -509,8 +509,9 @@ def get_consensus(self, quant="chrom_area"):
|
|
|
509
509
|
# Convert Polars DataFrame to pandas for this operation since the result is used for export
|
|
510
510
|
df1 = self.consensus_df.to_pandas().copy()
|
|
511
511
|
|
|
512
|
-
#
|
|
513
|
-
|
|
512
|
+
# Keep consensus_id as string (UUID format)
|
|
513
|
+
# Note: consensus_id is now a 16-character UUID string, not an integer
|
|
514
|
+
df1["consensus_id"] = df1["consensus_id"].astype("string")
|
|
514
515
|
# set consensus_id as index
|
|
515
516
|
df1.set_index("consensus_uid", inplace=True)
|
|
516
517
|
# sort by consensus_id
|
|
@@ -640,21 +641,61 @@ def get_gaps_stats(self, uids=None):
|
|
|
640
641
|
return gaps_stats
|
|
641
642
|
|
|
642
643
|
|
|
643
|
-
|
|
644
|
-
|
|
644
|
+
def get_consensus_matches(self, uids=None, filled=True):
|
|
645
|
+
"""
|
|
646
|
+
Get feature matches for consensus UIDs with optimized join operation.
|
|
647
|
+
|
|
648
|
+
Parameters:
|
|
649
|
+
uids: Consensus UID(s) to get matches for. Can be:
|
|
650
|
+
- None: get matches for all consensus features
|
|
651
|
+
- int: single consensus UID (converted to list)
|
|
652
|
+
- list: multiple consensus UIDs
|
|
653
|
+
filled (bool): Whether to include filled rows (True) or exclude them (False).
|
|
654
|
+
Default is True to maintain backward compatibility.
|
|
655
|
+
|
|
656
|
+
Returns:
|
|
657
|
+
pl.DataFrame: Feature matches for the specified consensus UIDs
|
|
658
|
+
"""
|
|
659
|
+
# Handle single int by converting to list
|
|
660
|
+
if isinstance(uids, int):
|
|
661
|
+
uids = [uids]
|
|
662
|
+
|
|
645
663
|
uids = self._get_consensus_uids(uids)
|
|
646
|
-
|
|
647
|
-
|
|
648
|
-
|
|
649
|
-
|
|
650
|
-
|
|
664
|
+
|
|
665
|
+
if not uids:
|
|
666
|
+
return pl.DataFrame()
|
|
667
|
+
|
|
668
|
+
# Early validation checks
|
|
669
|
+
if self.consensus_mapping_df is None or self.consensus_mapping_df.is_empty():
|
|
670
|
+
self.logger.warning("No consensus mapping data available")
|
|
671
|
+
return pl.DataFrame()
|
|
672
|
+
|
|
673
|
+
if self.features_df is None or self.features_df.is_empty():
|
|
674
|
+
self.logger.warning("No feature data available")
|
|
675
|
+
return pl.DataFrame()
|
|
676
|
+
|
|
677
|
+
# Build the query with optional filled filter
|
|
678
|
+
features_query = self.features_df.lazy()
|
|
679
|
+
|
|
680
|
+
# Apply filled filter if specified
|
|
681
|
+
if not filled and "filled" in self.features_df.columns:
|
|
682
|
+
features_query = features_query.filter(~pl.col("filled"))
|
|
683
|
+
|
|
684
|
+
# Optimized single-pass operation using join instead of two separate filters
|
|
685
|
+
# This avoids creating intermediate Python lists and leverages Polars' optimized joins
|
|
686
|
+
matches = (
|
|
687
|
+
features_query
|
|
688
|
+
.join(
|
|
689
|
+
self.consensus_mapping_df
|
|
690
|
+
.lazy()
|
|
691
|
+
.filter(pl.col("consensus_uid").is_in(uids))
|
|
692
|
+
.select("feature_uid"), # Only select what we need for the join
|
|
693
|
+
on="feature_uid",
|
|
694
|
+
how="inner"
|
|
651
695
|
)
|
|
652
|
-
.
|
|
653
|
-
.to_series()
|
|
654
|
-
.to_list()
|
|
696
|
+
.collect(streaming=True) # Use streaming for memory efficiency with large datasets
|
|
655
697
|
)
|
|
656
|
-
|
|
657
|
-
matches = self.features_df.filter(pl.col("feature_uid").is_in(fid)).clone()
|
|
698
|
+
|
|
658
699
|
return matches
|
|
659
700
|
|
|
660
701
|
|