PyPI - masster - Versions diffs - 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl - Mend

masster 0.4.19py3-none-any.whl → 0.4.21py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of masster might be problematic. Click here for more details.

Files changed (30) hide show

masster/__init__.py +6 -1
masster/_version.py +1 -1
masster/logger.py +42 -0
masster/sample/h5.py +58 -1
masster/sample/load.py +12 -5
masster/sample/plot.py +56 -65
masster/sample/processing.py +158 -0
masster/sample/sample.py +2 -9
masster/sample/sample5_schema.json +3 -0
masster/sample/save.py +137 -59
masster/spectrum.py +58 -9
masster/study/export.py +238 -152
masster/study/h5.py +65 -1
masster/study/helpers.py +55 -14
masster/study/merge.py +910 -67
masster/study/plot.py +50 -7
masster/study/processing.py +257 -1
masster/study/save.py +48 -5
masster/study/study.py +34 -3
masster/study/study5_schema.json +3 -0
masster/wizard/__init__.py +8 -2
masster/wizard/wizard.py +612 -876
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/METADATA +1 -1
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/RECORD +27 -30
masster/wizard/test_structure.py +0 -49
masster/wizard/test_wizard.py +0 -285
masster/wizard.py +0 -1175
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/WHEEL +0 -0
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/entry_points.txt +0 -0
{masster-0.4.19.dist-info → masster-0.4.21.dist-info}/licenses/LICENSE +0 -0

masster/study/helpers.py CHANGED Viewed

@@ -509,8 +509,9 @@ def get_consensus(self, quant="chrom_area"):
     # Convert Polars DataFrame to pandas for this operation since the result is used for export
     df1 = self.consensus_df.to_pandas().copy()
-    # set consensus_id as uint64
-    df1["consensus_id"] = df1["consensus_id"].astype("uint64")
+    # Keep consensus_id as string (UUID format)
+    # Note: consensus_id is now a 16-character UUID string, not an integer
+    df1["consensus_id"] = df1["consensus_id"].astype("string")
     # set consensus_id as index
     df1.set_index("consensus_uid", inplace=True)
     # sort by consensus_id
@@ -640,21 +641,61 @@ def get_gaps_stats(self, uids=None):
     return gaps_stats
-# TODO is uid not supposed to be a list anymore?
-def get_consensus_matches(self, uids=None):
+def get_consensus_matches(self, uids=None, filled=True):
+    """
+    Get feature matches for consensus UIDs with optimized join operation.
+    Parameters:
+        uids: Consensus UID(s) to get matches for. Can be:
+              - None: get matches for all consensus features
+              - int: single consensus UID (converted to list)
+              - list: multiple consensus UIDs
+        filled (bool): Whether to include filled rows (True) or exclude them (False).
+                      Default is True to maintain backward compatibility.
+    Returns:
+        pl.DataFrame: Feature matches for the specified consensus UIDs
+    """
+    # Handle single int by converting to list
+    if isinstance(uids, int):
+        uids = [uids]
     uids = self._get_consensus_uids(uids)
-    # find all rows in consensus_mapping_df with consensus_id=id - use Polars filtering
-    fid = (
-        self.consensus_mapping_df.filter(
-            pl.col("consensus_uid").is_in(uids),
+    if not uids:
+        return pl.DataFrame()
+    # Early validation checks
+    if self.consensus_mapping_df is None or self.consensus_mapping_df.is_empty():
+        self.logger.warning("No consensus mapping data available")
+        return pl.DataFrame()
+    if self.features_df is None or self.features_df.is_empty():
+        self.logger.warning("No feature data available")
+        return pl.DataFrame()
+    # Build the query with optional filled filter
+    features_query = self.features_df.lazy()
+    # Apply filled filter if specified
+    if not filled and "filled" in self.features_df.columns:
+        features_query = features_query.filter(~pl.col("filled"))
+    # Optimized single-pass operation using join instead of two separate filters
+    # This avoids creating intermediate Python lists and leverages Polars' optimized joins
+    matches = (
+        features_query
+        .join(
+            self.consensus_mapping_df
+            .lazy()
+            .filter(pl.col("consensus_uid").is_in(uids))
+            .select("feature_uid"),  # Only select what we need for the join
+            on="feature_uid",
+            how="inner"
         )
-        .select("feature_uid")
-        .to_series()
-        .to_list()
+        .collect(streaming=True)  # Use streaming for memory efficiency with large datasets
     )
-    # select all rows in features_df with uid in fid
-    matches = self.features_df.filter(pl.col("feature_uid").is_in(fid)).clone()
     return matches

masster 0.4.19__py3-none-any.whl → 0.4.21__py3-none-any.whl

Potentially problematic release.

masster 0.4.19py3-none-any.whl → 0.4.21py3-none-any.whl