masster 0.5.27__py3-none-any.whl → 0.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/sample/sample.py CHANGED
@@ -129,6 +129,12 @@ from masster.sample.helpers import get_eic
129
129
  from masster.sample.helpers import set_source
130
130
  from masster.sample.helpers import _recreate_feature_map
131
131
  from masster.sample.helpers import _get_feature_map
132
+ from masster.sample.id import lib_load
133
+ from masster.sample.id import identify
134
+ from masster.sample.id import get_id
135
+ from masster.sample.id import id_reset
136
+ from masster.sample.id import lib_reset
137
+ from masster.sample.importers import import_oracle
132
138
  from masster.sample.load import chrom_extract
133
139
  from masster.sample.load import _index_file
134
140
  from masster.sample.load import load
@@ -259,9 +265,10 @@ class Sample:
259
265
  # the polars data frame with MS1 level data
260
266
  self.ms1_df = pl.DataFrame()
261
267
 
262
- # lightweight lib data for matching, targeted analyses, etc. > superseded by study methods
263
- self.lib = None
264
- self.lib_match = None
268
+ # identification DataFrames (lib_df and id_df)
269
+ self.lib_df = None # library DataFrame (from masster.lib or CSV/JSON)
270
+ self.id_df = None # identification results DataFrame
271
+ self._lib = None # reference to Lib object if loaded
265
272
  self.chrom_df = None
266
273
 
267
274
  if params.filename is not None:
@@ -292,6 +299,14 @@ class Sample:
292
299
  update_parameters = update_parameters
293
300
  get_parameters_property = get_parameters_property
294
301
  set_parameters_property = set_parameters_property
302
+ # Identification methods from id.py
303
+ lib_load = lib_load
304
+ identify = identify
305
+ get_id = get_id
306
+ id_reset = id_reset
307
+ lib_reset = lib_reset
308
+ # Importers from importers.py
309
+ import_oracle = import_oracle
295
310
  export_features = export_features
296
311
  export_xlsx = export_xlsx
297
312
  export_mgf = export_mgf
@@ -93,10 +93,108 @@
93
93
  },
94
94
  "ms1_spec": {
95
95
  "dtype": "pl.Object"
96
+ },
97
+ "id_top_name": {
98
+ "dtype": "pl.Utf8"
99
+ },
100
+ "id_top_class": {
101
+ "dtype": "pl.Utf8"
102
+ },
103
+ "id_top_adduct": {
104
+ "dtype": "pl.Utf8"
105
+ },
106
+ "id_top_score": {
107
+ "dtype": "pl.Float64"
108
+ },
109
+ "id_source": {
110
+ "dtype": "pl.Utf8"
111
+ }
112
+ }
113
+ },
114
+ "lib_df": {
115
+ "columns": {
116
+ "lib_uid": {
117
+ "dtype": "pl.Int64"
118
+ },
119
+ "cmpd_uid": {
120
+ "dtype": "pl.Int64"
121
+ },
122
+ "name": {
123
+ "dtype": "pl.Utf8"
124
+ },
125
+ "shortname": {
126
+ "dtype": "pl.Utf8"
127
+ },
128
+ "class": {
129
+ "dtype": "pl.Utf8"
130
+ },
131
+ "formula": {
132
+ "dtype": "pl.Utf8"
133
+ },
134
+ "iso": {
135
+ "dtype": "pl.Int64"
136
+ },
137
+ "smiles": {
138
+ "dtype": "pl.Utf8"
139
+ },
140
+ "inchi": {
141
+ "dtype": "pl.Utf8"
142
+ },
143
+ "inchikey": {
144
+ "dtype": "pl.Utf8"
145
+ },
146
+ "adduct": {
147
+ "dtype": "pl.Utf8"
148
+ },
149
+ "z": {
150
+ "dtype": "pl.Int64"
151
+ },
152
+ "m": {
153
+ "dtype": "pl.Float64"
154
+ },
155
+ "mz": {
156
+ "dtype": "pl.Float64"
157
+ },
158
+ "rt": {
159
+ "dtype": "pl.Float64"
160
+ },
161
+ "quant_group": {
162
+ "dtype": "pl.Int64"
163
+ },
164
+ "probability": {
165
+ "dtype": "pl.Float64"
166
+ },
167
+ "source_id": {
168
+ "dtype": "pl.Utf8"
169
+ }
170
+ }
171
+ },
172
+ "id_df": {
173
+ "columns": {
174
+ "feature_uid": {
175
+ "dtype": "pl.Int64"
176
+ },
177
+ "lib_uid": {
178
+ "dtype": "pl.Int64"
179
+ },
180
+ "mz_delta": {
181
+ "dtype": "pl.Float64"
182
+ },
183
+ "rt_delta": {
184
+ "dtype": "pl.Float64"
185
+ },
186
+ "matcher": {
187
+ "dtype": "pl.Utf8"
188
+ },
189
+ "score": {
190
+ "dtype": "pl.Float64"
191
+ },
192
+ "iso": {
193
+ "dtype": "pl.Int64"
96
194
  }
97
195
  }
98
196
  },
99
- "generated_date": "2025-08-03",
197
+ "generated_date": "2025-10-30",
100
198
  "ms1_df": {
101
199
  "columns": {
102
200
  "cycle": {
@@ -96,19 +96,15 @@ class study_defaults:
96
96
  "adducts": {
97
97
  "dtype": "list[str]",
98
98
  "description": "List of adduct specifications in OpenMS format (element:charge:probability). Charged adduct probabilities must sum to 1.0.",
99
- "default": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
99
+ "default": ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05"],
100
100
  "examples": {
101
- "positive": ["H:+:0.8", "Na:+:0.1", "NH4:+:0.1"],
102
- "negative": [
103
- "H-1:-:0.95",
104
- "Cl:-:0.05",
105
- "CH2O2:0:0.2",
106
- "H-2-O:0:0.2",
107
- ],
101
+ "positive": ["+H:1:0.65", "+Na:1:0.15", "+NH4:1:0.15", "+K:1:0.05", "-H2O:0:0.15"],
102
+ "negative": ["-H:-1:0.95", "+Cl:-1:0.05", "+CH2O2:0:0.2", "-H2O:0:0.2"],
108
103
  },
109
104
  "validation_rules": [
110
- "Format: element:charge:probability",
111
- "Charge must be +, -, or 0 (neutral)",
105
+ "Format: formula:charge:probability (e.g., '+H:1:0.65', '-H:-1:0.95', '-H2O:0:0.15')",
106
+ "Formula must start with + or - to indicate gain/loss (e.g., '+H', '-H', '+Na', '-H2O')",
107
+ "Charge must be an integer (positive, negative, or 0 for neutral)",
112
108
  "Probability must be between 0.0 and 1.0",
113
109
  "Sum of all charged adduct probabilities must equal 1.0",
114
110
  ],
@@ -128,7 +124,7 @@ class study_defaults:
128
124
  """Set polarity-specific defaults for adducts if not explicitly provided."""
129
125
  # If adducts is None, set based on polarity
130
126
  if self.adducts is None:
131
- if self.polarity.lower() in ["positive", "pos"]:
127
+ if self.polarity.lower() in ["positive", "pos", "+"]:
132
128
  self.adducts = [
133
129
  "+H:1:0.65",
134
130
  "+Na:1:0.15",
@@ -136,7 +132,7 @@ class study_defaults:
136
132
  "+K:1:0.05",
137
133
  "-H2O:0:0.15",
138
134
  ]
139
- elif self.polarity.lower() in ["negative", "neg"]:
135
+ elif self.polarity.lower() in ["negative", "neg", "-"]:
140
136
  self.adducts = [
141
137
  "-H:-1:0.9",
142
138
  "+Cl:-1:0.1",
masster/study/id.py CHANGED
@@ -24,7 +24,8 @@ def lib_load(
24
24
  lib_source: either a CSV/JSON file path (str) or a Lib instance
25
25
  polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV/JSON path.
26
26
  If None, uses study.polarity automatically.
27
- adducts: specific adducts to generate - used when lib_source is a CSV/JSON path
27
+ adducts: specific adducts to generate - used when lib_source is a CSV/JSON path.
28
+ If None, uses study.parameters.adducts if available.
28
29
  iso: isotope generation mode ("13C" to generate 13C isotopes, None for no isotopes)
29
30
 
30
31
  Side effects:
@@ -51,6 +52,18 @@ def lib_load(
51
52
  else:
52
53
  polarity = "positive" # Default fallback
53
54
  study.logger.debug(f"Using study polarity: {polarity}")
55
+
56
+ # Use study.parameters.adducts if adducts not explicitly provided
57
+ # If study.parameters.adducts is also None, lib will use its default adducts for the polarity
58
+ if adducts is None:
59
+ if hasattr(study, "parameters") and hasattr(study.parameters, "adducts"):
60
+ adducts = study.parameters.adducts
61
+ if adducts:
62
+ study.logger.debug(f"Using study.parameters.adducts: {adducts}")
63
+ else:
64
+ study.logger.debug(f"study.parameters.adducts is None, lib will use default adducts for {polarity} mode")
65
+ else:
66
+ study.logger.debug(f"study.parameters.adducts not found, lib will use default adducts for {polarity} mode")
54
67
 
55
68
  # Handle string input (CSV or JSON file path)
56
69
  if isinstance(lib_source, str):
@@ -403,42 +416,64 @@ def _find_matches_vectorized(lib_df, cons_mz, cons_rt, mz_tol, rt_tol, logger, c
403
416
  """
404
417
  Find library matches using optimized vectorized operations.
405
418
 
406
- FIXED VERSION: Prevents incorrect matching of same compound to different m/z values.
419
+ Automatically skips RT filtering if library has no RT data for the matched entries.
407
420
  """
408
421
  # Filter by m/z tolerance using vectorized operations
409
422
  matches = lib_df.filter((pl.col("mz") >= cons_mz - mz_tol) & (pl.col("mz") <= cons_mz + mz_tol))
410
423
 
411
424
  initial_match_count = len(matches)
412
425
 
413
- # Apply RT filter if available - STRICT VERSION (no fallback)
426
+ # Apply RT filter if requested AND if data is available
427
+ # Strategy: Handle mixed RT/no-RT entries properly by treating them separately
414
428
  if rt_tol is not None and cons_rt is not None and not matches.is_empty():
415
- # First, check if any m/z matches have RT data
429
+ # Separate entries with and without RT data
416
430
  rt_candidates = matches.filter(pl.col("rt").is_not_null())
431
+ no_rt_entries = matches.filter(pl.col("rt").is_null())
417
432
 
418
433
  if not rt_candidates.is_empty():
419
434
  # Apply RT filtering to candidates with RT data
420
435
  rt_matches = rt_candidates.filter((pl.col("rt") >= cons_rt - rt_tol) & (pl.col("rt") <= cons_rt + rt_tol))
421
436
 
422
- if not rt_matches.is_empty():
437
+ # Combine RT-filtered matches with entries that have no RT data
438
+ # Rationale: Entries without RT can't be filtered by RT, so include them
439
+ if not rt_matches.is_empty() and not no_rt_entries.is_empty():
440
+ # Both RT matches and no-RT entries exist
441
+ matches = pl.concat([rt_matches, no_rt_entries])
442
+ if logger:
443
+ logger.debug(
444
+ f"Consensus {cons_uid}: {initial_match_count} m/z matches, {len(rt_candidates)} with RT, "
445
+ f"{len(rt_matches)} passed RT filter, {len(no_rt_entries)} with no RT → {len(matches)} total matches"
446
+ )
447
+ elif not rt_matches.is_empty():
448
+ # Only RT matches, no entries without RT
423
449
  matches = rt_matches
424
450
  if logger:
425
451
  logger.debug(
426
- f"Consensus {cons_uid}: {initial_match_count} m/z matches, {len(rt_candidates)} with RT, {len(matches)} after RT filter"
452
+ f"Consensus {cons_uid}: {initial_match_count} m/z matches, {len(rt_candidates)} with RT, "
453
+ f"{len(matches)} passed RT filter"
454
+ )
455
+ elif not no_rt_entries.is_empty():
456
+ # No RT matches passed filter, but there are entries without RT
457
+ matches = no_rt_entries
458
+ if logger:
459
+ logger.debug(
460
+ f"Consensus {cons_uid}: {initial_match_count} m/z matches, {len(rt_candidates)} with RT but none passed RT filter, "
461
+ f"using {len(matches)} entries with no RT data"
427
462
  )
428
463
  else:
429
- # NO FALLBACK - if RT filtering finds no matches, return empty
430
- matches = rt_matches # This is empty
464
+ # No RT matches and no entries without RT - return empty
465
+ matches = pl.DataFrame()
431
466
  if logger:
432
467
  logger.debug(
433
468
  f"Consensus {cons_uid}: RT filtering eliminated all {len(rt_candidates)} candidates (rt_tol={rt_tol}s) - no matches returned"
434
469
  )
435
470
  else:
436
- # No RT data in library matches - return empty if strict RT filtering requested
471
+ # All m/z matches have no RT data - keep all m/z matches
437
472
  if logger:
438
473
  logger.debug(
439
- f"Consensus {cons_uid}: {initial_match_count} m/z matches but none have library RT data - no matches returned due to RT filtering"
474
+ f"Consensus {cons_uid}: {initial_match_count} m/z matches, all have no RT data - using m/z matches only"
440
475
  )
441
- matches = pl.DataFrame() # Return empty DataFrame
476
+ # matches already contains the m/z-filtered results (which are all no_rt_entries)
442
477
 
443
478
  # FIX 1: Add stricter m/z validation - prioritize more accurate matches
444
479
  if not matches.is_empty():
@@ -884,6 +919,18 @@ def identify(study, features=None, params=None, **kwargs):
884
919
  effective_mz_tol = getattr(params, "mz_tol", 0.01)
885
920
  effective_rt_tol = getattr(params, "rt_tol", 2.0)
886
921
 
922
+ # Check if library has RT data - if not, disable RT filtering
923
+ if effective_rt_tol is not None and hasattr(study, "lib_df") and study.lib_df is not None:
924
+ if "rt" in study.lib_df.columns:
925
+ # Check if library has any non-null RT values
926
+ rt_count = study.lib_df.filter(pl.col("rt").is_not_null()).shape[0]
927
+ if rt_count == 0:
928
+ if logger:
929
+ logger.info(
930
+ f"Library has no retention time data - disabling RT filtering (was rt_tol={effective_rt_tol})"
931
+ )
932
+ effective_rt_tol = None
933
+
887
934
  if logger:
888
935
  logger.debug(
889
936
  f"Starting identification with mz_tolerance={effective_mz_tol}, rt_tolerance={effective_rt_tol}",
@@ -1483,7 +1530,7 @@ def _get_adducts(study, adducts_list: list | None = None, **kwargs):
1483
1530
  if charge_min <= abs(total_charge) <= charge_max and total_charge != 0:
1484
1531
  components = [spec] * multiplier
1485
1532
  formatted_name = _format_adduct_name(components)
1486
- probability_multiplied = float(spec["probability"]) ** multiplier
1533
+ probability_multiplied = (float(spec["probability"]) ** multiplier) / 2.0
1487
1534
 
1488
1535
  combinations_list.append(
1489
1536
  {
masster/study/load.py CHANGED
@@ -191,17 +191,6 @@ def load(self, filename=None):
191
191
 
192
192
  _load_study5(self, filename)
193
193
 
194
- # After loading the study, check if we have consensus features before loading consensus XML
195
- # if (self.consensus_df is not None and not self.consensus_df.is_empty()):
196
- # consensus_xml_path = filename.replace(".study5", ".consensusXML")
197
- # if os.path.exists(consensus_xml_path):
198
- # self._load_consensusXML(filename=consensus_xml_path)
199
- # self.logger.info(f"Automatically loaded consensus from {consensus_xml_path}")
200
- # else:
201
- # self.logger.warning(f"No consensus XML file found at {consensus_xml_path}")
202
- # else:
203
- # self.logger.debug("No consensus features found, skipping consensusXML loading")
204
-
205
194
  self.filename = filename
206
195
 
207
196
 
masster/study/merge.py CHANGED
@@ -441,9 +441,15 @@ def merge(study, **kwargs) -> None:
441
441
  cached_valid_adducts = None
442
442
  try:
443
443
  cached_adducts_df = study._get_adducts()
444
+ # Remove all adducts with wrong polarity
445
+ if study.polarity == "positive":
446
+ cached_adducts_df = cached_adducts_df.filter(pl.col("charge") >= 0)
447
+ else:
448
+ cached_adducts_df = cached_adducts_df.filter(pl.col("charge") <= 0)
444
449
  if not cached_adducts_df.is_empty():
445
450
  cached_valid_adducts = set(cached_adducts_df["name"].to_list())
446
451
  else:
452
+ study.logger.warning(f"No valid adducts found for polarity '{study.polarity}'")
447
453
  cached_valid_adducts = set()
448
454
  except Exception as e:
449
455
  study.logger.warning(f"Could not retrieve study adducts: {e}")
@@ -452,6 +458,13 @@ def merge(study, **kwargs) -> None:
452
458
  # Always allow '?' adducts
453
459
  cached_valid_adducts.add("?")
454
460
 
461
+ # Bypass for single sample case
462
+ if len(study.samples_df) == 1:
463
+ study.logger.info("Single sample detected - bypassing merge algorithm and using direct feature mapping")
464
+ _handle_single_sample_merge(study, cached_adducts_df, cached_valid_adducts)
465
+ # Skip all post-processing for single sample case
466
+ return
467
+
455
468
  # Route to algorithm implementation
456
469
  if params.method == "kd":
457
470
  consensus_map = _merge_kd(study, params)
@@ -1719,6 +1732,10 @@ def _calculate_consensus_statistics(
1719
1732
  mz_values: m/z values from chunk consensus features
1720
1733
  intensity_values: Intensity values from chunk consensus features
1721
1734
  quality_values: Quality values from chunk consensus features
1735
+ number_features: Number of unique features contributing
1736
+ number_samples: Number of unique samples contributing
1737
+ cached_adducts_df: Cached DataFrame of valid adducts for the study
1738
+ cached_valid_adducts: Cached set of valid adduct names for the study
1722
1739
 
1723
1740
  Returns:
1724
1741
  Dictionary with consensus feature metadata
@@ -3612,6 +3629,142 @@ def __merge_adduct_grouping(study, consensus_data, rt_tol, mz_tol):
3612
3629
  return adduct_group_list, adduct_of_list
3613
3630
 
3614
3631
 
3632
+ def _handle_single_sample_merge(study, cached_adducts_df=None, cached_valid_adducts=None):
3633
+ """
3634
+ Handle merge for the special case of a single sample.
3635
+ Directly populate consensus_df from the sample's features_df without any filtering.
3636
+
3637
+ Args:
3638
+ study: Study object with single sample
3639
+ cached_adducts_df: Pre-computed adducts DataFrame (optional)
3640
+ cached_valid_adducts: Set of valid adduct names (optional)
3641
+ """
3642
+ import polars as pl
3643
+ import uuid
3644
+
3645
+ if len(study.samples_df) != 1:
3646
+ raise ValueError("_handle_single_sample_merge should only be called with exactly one sample")
3647
+
3648
+ # Get the single sample's features
3649
+ sample_row = study.samples_df.row(0, named=True)
3650
+ sample_uid = sample_row["sample_uid"]
3651
+
3652
+ # Filter features for this sample
3653
+ sample_features = study.features_df.filter(pl.col("sample_uid") == sample_uid)
3654
+
3655
+ if len(sample_features) == 0:
3656
+ study.logger.warning("No features found for single sample")
3657
+ study.consensus_df = pl.DataFrame()
3658
+ study.consensus_mapping_df = pl.DataFrame()
3659
+ return
3660
+
3661
+ study.logger.info(f"Creating consensus from {len(sample_features)} features in single sample")
3662
+
3663
+ # Create consensus features directly from sample features
3664
+ consensus_list = []
3665
+ mapping_list = []
3666
+
3667
+ # Cache valid adducts
3668
+ valid_adducts = cached_valid_adducts if cached_valid_adducts is not None else set()
3669
+ valid_adducts.add("?") # Always allow '?' adducts
3670
+
3671
+ for i, feature_row in enumerate(sample_features.iter_rows(named=True)):
3672
+ # Generate unique consensus ID
3673
+ consensus_id_str = str(uuid.uuid4()).replace("-", "")[:16]
3674
+
3675
+ # Handle adduct information
3676
+ adduct = feature_row.get("adduct")
3677
+ if adduct is None or adduct not in valid_adducts:
3678
+ # Set default adduct based on study polarity
3679
+ study_polarity = getattr(study, "polarity", "positive")
3680
+ if study_polarity in ["negative", "neg"]:
3681
+ adduct = "[M-?]1-"
3682
+ adduct_charge = -1
3683
+ adduct_mass_shift = -1.007825
3684
+ else:
3685
+ adduct = "[M+?]1+"
3686
+ adduct_charge = 1
3687
+ adduct_mass_shift = 1.007825
3688
+ else:
3689
+ # Try to get charge and mass shift from cached adducts
3690
+ adduct_charge = 1
3691
+ adduct_mass_shift = 1.007825
3692
+ if cached_adducts_df is not None and not cached_adducts_df.is_empty():
3693
+ matching_adduct = cached_adducts_df.filter(pl.col("name") == adduct)
3694
+ if not matching_adduct.is_empty():
3695
+ adduct_row = matching_adduct.row(0, named=True)
3696
+ adduct_charge = adduct_row["charge"]
3697
+ adduct_mass_shift = adduct_row["mass_shift"]
3698
+
3699
+ # Calculate neutral mass
3700
+ mz = feature_row.get("mz", 0.0)
3701
+ if adduct_charge and adduct_mass_shift is not None:
3702
+ adduct_mass_neutral = mz * abs(adduct_charge) - adduct_mass_shift
3703
+ else:
3704
+ adduct_mass_neutral = None
3705
+
3706
+ # Count MS2 scans
3707
+ ms2_scans = feature_row.get("ms2_scans", [])
3708
+ ms2_count = len(ms2_scans) if ms2_scans else 0
3709
+
3710
+ # Create consensus feature metadata
3711
+ consensus_feature = {
3712
+ "consensus_uid": i,
3713
+ "consensus_id": consensus_id_str,
3714
+ "quality": feature_row.get("quality", 1.0),
3715
+ "number_samples": 1, # Always 1 for single sample
3716
+ "rt": feature_row.get("rt", 0.0),
3717
+ "mz": mz,
3718
+ "rt_min": feature_row.get("rt", 0.0),
3719
+ "rt_max": feature_row.get("rt", 0.0),
3720
+ "rt_mean": feature_row.get("rt", 0.0),
3721
+ "rt_start_mean": feature_row.get("rt_start", 0.0),
3722
+ "rt_end_mean": feature_row.get("rt_end", 0.0),
3723
+ "rt_delta_mean": feature_row.get("rt_delta", 0.0),
3724
+ "mz_min": mz,
3725
+ "mz_max": mz,
3726
+ "mz_mean": mz,
3727
+ "mz_start_mean": feature_row.get("mz_start", 0.0),
3728
+ "mz_end_mean": feature_row.get("mz_end", 0.0),
3729
+ "inty_mean": feature_row.get("inty", 0.0),
3730
+ "bl": -1.0,
3731
+ "chrom_coherence_mean": feature_row.get("chrom_coherence", 0.0),
3732
+ "chrom_prominence_mean": feature_row.get("chrom_prominence", 0.0),
3733
+ "chrom_prominence_scaled_mean": feature_row.get("chrom_prominence_scaled", 0.0),
3734
+ "chrom_height_scaled_mean": feature_row.get("chrom_height_scaled", 0.0),
3735
+ "iso": None, # Will be filled by find_iso() function
3736
+ "iso_mean": feature_row.get("iso", 0.0),
3737
+ "charge_mean": feature_row.get("charge", 0.0),
3738
+ "number_ms2": ms2_count,
3739
+ "adducts": [[adduct, 1, 100.0]], # Single adduct with 100% frequency
3740
+ "adduct_top": adduct,
3741
+ "adduct_charge_top": adduct_charge,
3742
+ "adduct_mass_neutral_top": adduct_mass_neutral,
3743
+ "adduct_mass_shift_top": adduct_mass_shift,
3744
+ "id_top_name": None,
3745
+ "id_top_class": None,
3746
+ "id_top_adduct": None,
3747
+ "id_top_score": None,
3748
+ "id_source": None,
3749
+ }
3750
+
3751
+ consensus_list.append(consensus_feature)
3752
+
3753
+ # Create mapping entry
3754
+ mapping_entry = {
3755
+ "consensus_uid": i,
3756
+ "sample_uid": sample_uid,
3757
+ "feature_uid": feature_row.get("feature_uid"),
3758
+ }
3759
+ mapping_list.append(mapping_entry)
3760
+
3761
+ # Create DataFrames
3762
+ study.consensus_df = pl.DataFrame(consensus_list, strict=False)
3763
+ study.consensus_mapping_df = pl.DataFrame(mapping_list, strict=False)
3764
+
3765
+ study.logger.info(f"Created {len(consensus_list)} consensus features from single sample")
3766
+
3767
+
3615
3768
  def _fast_correlation(x, y):
3616
3769
  """
3617
3770
  Fast correlation coefficient calculation for consensus matrix data.