masster 0.5.13__py3-none-any.whl → 0.5.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/lib/lib.py +371 -57
- masster/study/helpers.py +1 -0
- masster/study/id.py +233 -36
- masster/study/importers.py +161 -52
- masster/study/merge.py +1 -1
- masster/study/plot.py +10 -18
- masster/study/study5_schema.json +9 -0
- masster/wizard/__init__.py +4 -4
- masster/wizard/wizard.py +437 -19
- {masster-0.5.13.dist-info → masster-0.5.14.dist-info}/METADATA +1 -1
- {masster-0.5.13.dist-info → masster-0.5.14.dist-info}/RECORD +15 -15
- {masster-0.5.13.dist-info → masster-0.5.14.dist-info}/WHEEL +0 -0
- {masster-0.5.13.dist-info → masster-0.5.14.dist-info}/entry_points.txt +0 -0
- {masster-0.5.13.dist-info → masster-0.5.14.dist-info}/licenses/LICENSE +0 -0
masster/study/id.py
CHANGED
|
@@ -145,16 +145,61 @@ def lib_load(
|
|
|
145
145
|
column_order.append("quant_group")
|
|
146
146
|
elif col == "formula" and "iso" in columns_list and "iso" not in column_order:
|
|
147
147
|
column_order.append("iso")
|
|
148
|
-
|
|
149
|
-
# Apply the column ordering
|
|
150
|
-
filtered_lf = filtered_lf.select(column_order)
|
|
151
148
|
|
|
149
|
+
|
|
152
150
|
# Add to existing lib_df instead of replacing
|
|
153
151
|
if (
|
|
154
152
|
hasattr(study, "lib_df")
|
|
155
153
|
and study.lib_df is not None
|
|
156
154
|
and not study.lib_df.is_empty()
|
|
157
155
|
):
|
|
156
|
+
# Check for schema compatibility and handle mismatches
|
|
157
|
+
existing_cols = set(study.lib_df.columns)
|
|
158
|
+
new_cols = set(filtered_lf.columns)
|
|
159
|
+
|
|
160
|
+
# If schemas don't match, we need to align them
|
|
161
|
+
if existing_cols != new_cols:
|
|
162
|
+
# Get union of all columns
|
|
163
|
+
all_cols = existing_cols.union(new_cols)
|
|
164
|
+
|
|
165
|
+
# Add missing columns to existing data with appropriate defaults
|
|
166
|
+
for col in new_cols - existing_cols:
|
|
167
|
+
if col == "probability":
|
|
168
|
+
# Add probability column to existing data - try to calculate from adduct
|
|
169
|
+
if "adduct" in study.lib_df.columns:
|
|
170
|
+
try:
|
|
171
|
+
adduct_prob_map = _get_adduct_probabilities(study)
|
|
172
|
+
study.lib_df = study.lib_df.with_columns(
|
|
173
|
+
pl.col("adduct").map_elements(
|
|
174
|
+
lambda adduct: adduct_prob_map.get(adduct, 1.0) if adduct is not None else 1.0,
|
|
175
|
+
return_dtype=pl.Float64
|
|
176
|
+
).alias("probability")
|
|
177
|
+
)
|
|
178
|
+
except Exception:
|
|
179
|
+
study.lib_df = study.lib_df.with_columns(pl.lit(1.0).alias("probability"))
|
|
180
|
+
else:
|
|
181
|
+
study.lib_df = study.lib_df.with_columns(pl.lit(1.0).alias("probability"))
|
|
182
|
+
elif col == "iso":
|
|
183
|
+
study.lib_df = study.lib_df.with_columns(pl.lit(0).cast(pl.Int64).alias("iso"))
|
|
184
|
+
elif col == "quant_group":
|
|
185
|
+
# Set quant_group using cmpd_uid or lib_uid
|
|
186
|
+
if "cmpd_uid" in study.lib_df.columns:
|
|
187
|
+
study.lib_df = study.lib_df.with_columns(pl.col("cmpd_uid").cast(pl.Int64).alias("quant_group"))
|
|
188
|
+
else:
|
|
189
|
+
study.lib_df = study.lib_df.with_columns(pl.col("lib_uid").cast(pl.Int64).alias("quant_group"))
|
|
190
|
+
else:
|
|
191
|
+
# Default to null for other columns
|
|
192
|
+
study.lib_df = study.lib_df.with_columns(pl.lit(None).alias(col))
|
|
193
|
+
|
|
194
|
+
# Add missing columns to new data with appropriate defaults
|
|
195
|
+
for col in existing_cols - new_cols:
|
|
196
|
+
if col not in ["probability", "iso", "quant_group"]: # These should already be handled
|
|
197
|
+
filtered_lf = filtered_lf.with_columns(pl.lit(None).alias(col))
|
|
198
|
+
|
|
199
|
+
# Ensure column order matches for concatenation - use existing column order
|
|
200
|
+
existing_column_order = list(study.lib_df.columns)
|
|
201
|
+
filtered_lf = filtered_lf.select(existing_column_order)
|
|
202
|
+
|
|
158
203
|
# Concatenate with existing data
|
|
159
204
|
study.lib_df = pl.concat([study.lib_df, filtered_lf])
|
|
160
205
|
else:
|
|
@@ -209,8 +254,19 @@ def _setup_identify_parameters(params, kwargs):
|
|
|
209
254
|
|
|
210
255
|
# Override parameters with any provided kwargs
|
|
211
256
|
if kwargs:
|
|
257
|
+
# Handle parameter name mapping for backwards compatibility
|
|
258
|
+
param_mapping = {
|
|
259
|
+
'rt_tolerance': 'rt_tol',
|
|
260
|
+
'mz_tolerance': 'mz_tol'
|
|
261
|
+
}
|
|
262
|
+
|
|
212
263
|
for param_name, value in kwargs.items():
|
|
213
|
-
if
|
|
264
|
+
# Check if we need to map the parameter name
|
|
265
|
+
mapped_name = param_mapping.get(param_name, param_name)
|
|
266
|
+
|
|
267
|
+
if hasattr(params, mapped_name):
|
|
268
|
+
setattr(params, mapped_name, value)
|
|
269
|
+
elif hasattr(params, param_name):
|
|
214
270
|
setattr(params, param_name, value)
|
|
215
271
|
|
|
216
272
|
return params
|
|
@@ -319,9 +375,13 @@ def _perform_identification_matching(consensus_to_process, study, effective_mz_t
|
|
|
319
375
|
else None
|
|
320
376
|
)
|
|
321
377
|
|
|
322
|
-
# Get
|
|
378
|
+
# Get library probability as base score, then multiply by adduct probability
|
|
379
|
+
lib_probability = match_row.get("probability", 1.0) if match_row.get("probability") is not None else 1.0
|
|
323
380
|
adduct = match_row.get("adduct")
|
|
324
|
-
|
|
381
|
+
adduct_probability = adduct_prob_map.get(adduct, 1.0) if adduct else 1.0
|
|
382
|
+
score = lib_probability * adduct_probability
|
|
383
|
+
# Scale to 0-100 and round to 1 decimal place
|
|
384
|
+
score = round(score * 100.0, 1)
|
|
325
385
|
|
|
326
386
|
match_results.append({
|
|
327
387
|
"lib_uid": match_row.get("lib_uid"),
|
|
@@ -337,7 +397,11 @@ def _perform_identification_matching(consensus_to_process, study, effective_mz_t
|
|
|
337
397
|
|
|
338
398
|
|
|
339
399
|
def _find_matches_vectorized(lib_df, cons_mz, cons_rt, mz_tol, rt_tol, logger, cons_uid):
|
|
340
|
-
"""
|
|
400
|
+
"""
|
|
401
|
+
Find library matches using optimized vectorized operations.
|
|
402
|
+
|
|
403
|
+
FIXED VERSION: Prevents incorrect matching of same compound to different m/z values.
|
|
404
|
+
"""
|
|
341
405
|
# Filter by m/z tolerance using vectorized operations
|
|
342
406
|
matches = lib_df.filter(
|
|
343
407
|
(pl.col("mz") >= cons_mz - mz_tol) & (pl.col("mz") <= cons_mz + mz_tol)
|
|
@@ -345,43 +409,78 @@ def _find_matches_vectorized(lib_df, cons_mz, cons_rt, mz_tol, rt_tol, logger, c
|
|
|
345
409
|
|
|
346
410
|
initial_match_count = len(matches)
|
|
347
411
|
|
|
348
|
-
# Apply RT filter if available
|
|
412
|
+
# Apply RT filter if available - STRICT VERSION (no fallback)
|
|
349
413
|
if rt_tol is not None and cons_rt is not None and not matches.is_empty():
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
(pl.col("rt") >= cons_rt - rt_tol) &
|
|
353
|
-
(pl.col("rt") <= cons_rt + rt_tol)
|
|
354
|
-
)
|
|
414
|
+
# First, check if any m/z matches have RT data
|
|
415
|
+
rt_candidates = matches.filter(pl.col("rt").is_not_null())
|
|
355
416
|
|
|
356
|
-
if not
|
|
357
|
-
|
|
417
|
+
if not rt_candidates.is_empty():
|
|
418
|
+
# Apply RT filtering to candidates with RT data
|
|
419
|
+
rt_matches = rt_candidates.filter(
|
|
420
|
+
(pl.col("rt") >= cons_rt - rt_tol) &
|
|
421
|
+
(pl.col("rt") <= cons_rt + rt_tol)
|
|
422
|
+
)
|
|
423
|
+
|
|
424
|
+
if not rt_matches.is_empty():
|
|
425
|
+
matches = rt_matches
|
|
426
|
+
if logger:
|
|
427
|
+
logger.debug(
|
|
428
|
+
f"Consensus {cons_uid}: {initial_match_count} m/z matches, {len(rt_candidates)} with RT, {len(matches)} after RT filter"
|
|
429
|
+
)
|
|
430
|
+
else:
|
|
431
|
+
# NO FALLBACK - if RT filtering finds no matches, return empty
|
|
432
|
+
matches = rt_matches # This is empty
|
|
433
|
+
if logger:
|
|
434
|
+
logger.debug(
|
|
435
|
+
f"Consensus {cons_uid}: RT filtering eliminated all {len(rt_candidates)} candidates (rt_tol={rt_tol}s) - no matches returned"
|
|
436
|
+
)
|
|
437
|
+
else:
|
|
438
|
+
# No RT data in library matches - return empty if strict RT filtering requested
|
|
358
439
|
if logger:
|
|
359
440
|
logger.debug(
|
|
360
|
-
f"Consensus {cons_uid}: {initial_match_count} m/z matches
|
|
441
|
+
f"Consensus {cons_uid}: {initial_match_count} m/z matches but none have library RT data - no matches returned due to RT filtering"
|
|
361
442
|
)
|
|
443
|
+
matches = pl.DataFrame() # Return empty DataFrame
|
|
444
|
+
|
|
445
|
+
# FIX 1: Add stricter m/z validation - prioritize more accurate matches
|
|
446
|
+
if not matches.is_empty():
|
|
447
|
+
strict_mz_tol = mz_tol * 0.5 # Use 50% of tolerance as strict threshold
|
|
448
|
+
strict_matches = matches.filter(
|
|
449
|
+
(pl.col("mz") >= cons_mz - strict_mz_tol) & (pl.col("mz") <= cons_mz + strict_mz_tol)
|
|
450
|
+
)
|
|
451
|
+
|
|
452
|
+
if not strict_matches.is_empty():
|
|
453
|
+
# Use strict matches if available
|
|
454
|
+
matches = strict_matches
|
|
455
|
+
if logger:
|
|
456
|
+
logger.debug(f"Consensus {cons_uid}: Using {len(matches)} strict m/z matches (within {strict_mz_tol:.6f} Da)")
|
|
362
457
|
else:
|
|
363
458
|
if logger:
|
|
364
|
-
logger.debug(
|
|
365
|
-
f"Consensus {cons_uid}: {initial_match_count} m/z matches, 0 after RT filter - using m/z matches only"
|
|
366
|
-
)
|
|
459
|
+
logger.debug(f"Consensus {cons_uid}: No strict matches, using {len(matches)} loose matches")
|
|
367
460
|
|
|
368
|
-
#
|
|
461
|
+
# FIX 2: Improved deduplication - prioritize by m/z accuracy
|
|
369
462
|
if not matches.is_empty() and len(matches) > 1:
|
|
370
463
|
if "formula" in matches.columns and "adduct" in matches.columns:
|
|
371
464
|
pre_dedup_count = len(matches)
|
|
372
465
|
|
|
373
|
-
#
|
|
466
|
+
# Calculate m/z error for sorting
|
|
467
|
+
matches = matches.with_columns([
|
|
468
|
+
(pl.col("mz") - cons_mz).abs().alias("mz_error_abs")
|
|
469
|
+
])
|
|
470
|
+
|
|
471
|
+
# Group by formula and adduct, but keep the most accurate m/z match
|
|
374
472
|
matches = (
|
|
375
473
|
matches
|
|
376
|
-
.sort("lib_uid") #
|
|
474
|
+
.sort(["mz_error_abs", "lib_uid"]) # Sort by m/z accuracy first, then lib_uid for consistency
|
|
377
475
|
.group_by(["formula", "adduct"], maintain_order=True)
|
|
378
476
|
.first()
|
|
477
|
+
.drop("mz_error_abs") # Remove the temporary column
|
|
379
478
|
)
|
|
380
479
|
|
|
381
480
|
post_dedup_count = len(matches)
|
|
382
481
|
if logger and post_dedup_count < pre_dedup_count:
|
|
383
482
|
logger.debug(
|
|
384
|
-
f"Consensus {cons_uid}: deduplicated {pre_dedup_count} to {post_dedup_count} matches"
|
|
483
|
+
f"Consensus {cons_uid}: deduplicated {pre_dedup_count} to {post_dedup_count} matches (m/z accuracy prioritized)"
|
|
385
484
|
)
|
|
386
485
|
|
|
387
486
|
return matches
|
|
@@ -617,7 +716,11 @@ def _apply_scoring_adjustments(study, params):
|
|
|
617
716
|
|
|
618
717
|
|
|
619
718
|
def _update_consensus_id_columns(study, logger=None):
|
|
620
|
-
"""
|
|
719
|
+
"""
|
|
720
|
+
Update consensus_df with top-scoring identification results using safe in-place updates.
|
|
721
|
+
|
|
722
|
+
FIXED VERSION: Prevents same compound from being assigned to vastly different m/z values.
|
|
723
|
+
"""
|
|
621
724
|
try:
|
|
622
725
|
if not hasattr(study, "id_df") or study.id_df is None or study.id_df.is_empty():
|
|
623
726
|
if logger:
|
|
@@ -634,14 +737,47 @@ def _update_consensus_id_columns(study, logger=None):
|
|
|
634
737
|
logger.debug("No consensus data available")
|
|
635
738
|
return
|
|
636
739
|
|
|
637
|
-
# Get library columns we need
|
|
638
|
-
lib_columns = ["lib_uid", "name", "adduct"]
|
|
740
|
+
# Get library columns we need (include mz for validation)
|
|
741
|
+
lib_columns = ["lib_uid", "name", "adduct", "mz"]
|
|
639
742
|
if "class" in study.lib_df.columns:
|
|
640
743
|
lib_columns.append("class")
|
|
641
744
|
|
|
642
|
-
#
|
|
745
|
+
# FIX 1: Join identification results with consensus m/z for validation
|
|
746
|
+
id_with_consensus = study.id_df.join(
|
|
747
|
+
study.consensus_df.select(["consensus_uid", "mz"]),
|
|
748
|
+
on="consensus_uid",
|
|
749
|
+
how="left",
|
|
750
|
+
suffix="_consensus"
|
|
751
|
+
)
|
|
752
|
+
|
|
753
|
+
# FIX 2: Validate m/z accuracy - filter out poor matches
|
|
754
|
+
id_with_lib = id_with_consensus.join(
|
|
755
|
+
study.lib_df.select(["lib_uid", "mz"]),
|
|
756
|
+
on="lib_uid",
|
|
757
|
+
how="left",
|
|
758
|
+
suffix="_lib"
|
|
759
|
+
)
|
|
760
|
+
|
|
761
|
+
# Calculate actual m/z error and filter out excessive errors
|
|
762
|
+
id_validated = id_with_lib.with_columns([
|
|
763
|
+
(pl.col("mz") - pl.col("mz_lib")).abs().alias("actual_mz_error")
|
|
764
|
+
])
|
|
765
|
+
|
|
766
|
+
# Filter out matches with excessive m/z error
|
|
767
|
+
max_reasonable_error = 0.02 # 20 millidalton maximum error
|
|
768
|
+
id_validated = id_validated.filter(
|
|
769
|
+
(pl.col("actual_mz_error") <= max_reasonable_error) | pl.col("actual_mz_error").is_null()
|
|
770
|
+
)
|
|
771
|
+
|
|
772
|
+
if logger:
|
|
773
|
+
original_count = len(id_with_consensus)
|
|
774
|
+
validated_count = len(id_validated)
|
|
775
|
+
if validated_count < original_count:
|
|
776
|
+
logger.warning(f"Filtered out {original_count - validated_count} identifications with excessive m/z error (>{max_reasonable_error:.3f} Da)")
|
|
777
|
+
|
|
778
|
+
# Get top-scoring identification for each consensus feature (from validated results)
|
|
643
779
|
top_ids = (
|
|
644
|
-
|
|
780
|
+
id_validated
|
|
645
781
|
.sort(["consensus_uid", "score"], descending=[False, True])
|
|
646
782
|
.group_by("consensus_uid", maintain_order=True)
|
|
647
783
|
.first()
|
|
@@ -656,6 +792,37 @@ def _update_consensus_id_columns(study, logger=None):
|
|
|
656
792
|
.rename({"name": "id_top_name"})
|
|
657
793
|
)
|
|
658
794
|
|
|
795
|
+
# FIX 3: Check for conflicts where same compound+adduct assigned to very different m/z
|
|
796
|
+
if not top_ids.is_empty():
|
|
797
|
+
compound_groups = (
|
|
798
|
+
top_ids
|
|
799
|
+
.join(study.consensus_df.select(["consensus_uid", "mz"]), on="consensus_uid", how="left")
|
|
800
|
+
.group_by(["id_top_name", "id_top_adduct"])
|
|
801
|
+
.agg([
|
|
802
|
+
pl.col("consensus_uid").count().alias("count"),
|
|
803
|
+
pl.col("mz").min().alias("mz_min"),
|
|
804
|
+
pl.col("mz").max().alias("mz_max")
|
|
805
|
+
])
|
|
806
|
+
.with_columns([
|
|
807
|
+
(pl.col("mz_max") - pl.col("mz_min")).alias("mz_range")
|
|
808
|
+
])
|
|
809
|
+
)
|
|
810
|
+
|
|
811
|
+
# Find problematic assignments (same compound+adduct with >0.1 Da m/z range)
|
|
812
|
+
problematic = compound_groups.filter(
|
|
813
|
+
(pl.col("count") > 1) & (pl.col("mz_range") > 0.1)
|
|
814
|
+
)
|
|
815
|
+
|
|
816
|
+
if not problematic.is_empty() and logger:
|
|
817
|
+
for row in problematic.iter_rows(named=True):
|
|
818
|
+
name = row["id_top_name"]
|
|
819
|
+
adduct = row["id_top_adduct"]
|
|
820
|
+
count = row["count"]
|
|
821
|
+
mz_range = row["mz_range"]
|
|
822
|
+
logger.warning(
|
|
823
|
+
f"Identification conflict detected: '{name}' ({adduct}) assigned to {count} features with {mz_range:.4f} Da m/z range"
|
|
824
|
+
)
|
|
825
|
+
|
|
659
826
|
# Ensure we have the id_top columns in consensus_df
|
|
660
827
|
for col_name, dtype in [
|
|
661
828
|
("id_top_name", pl.String),
|
|
@@ -783,7 +950,7 @@ def identify(study, features=None, params=None, **kwargs):
|
|
|
783
950
|
if logger:
|
|
784
951
|
features_with_matches = len([r for r in results if len(r["matches"]) > 0])
|
|
785
952
|
total_matches = sum(len(r["matches"]) for r in results)
|
|
786
|
-
logger.
|
|
953
|
+
logger.success(
|
|
787
954
|
f"Identification completed: {features_with_matches}/{consensus_count} features matched, {total_matches} total identifications",
|
|
788
955
|
)
|
|
789
956
|
|
|
@@ -806,6 +973,8 @@ def get_id(study, features=None) -> pl.DataFrame:
|
|
|
806
973
|
- mz (consensus feature m/z)
|
|
807
974
|
- rt (consensus feature RT)
|
|
808
975
|
- name (compound name from library)
|
|
976
|
+
- shortname (short name from library, if available)
|
|
977
|
+
- class (compound class from library, if available)
|
|
809
978
|
- formula (molecular formula from library)
|
|
810
979
|
- adduct (adduct type from library)
|
|
811
980
|
- smiles (SMILES notation from library)
|
|
@@ -873,6 +1042,8 @@ def get_id(study, features=None) -> pl.DataFrame:
|
|
|
873
1042
|
lib_cols = [
|
|
874
1043
|
"lib_uid",
|
|
875
1044
|
"name",
|
|
1045
|
+
"shortname",
|
|
1046
|
+
"class",
|
|
876
1047
|
"formula",
|
|
877
1048
|
"adduct",
|
|
878
1049
|
"smiles",
|
|
@@ -901,6 +1072,8 @@ def get_id(study, features=None) -> pl.DataFrame:
|
|
|
901
1072
|
"cmpd_uid" if "cmpd_uid" in result_df.columns else None,
|
|
902
1073
|
"lib_uid",
|
|
903
1074
|
"name" if "name" in result_df.columns else None,
|
|
1075
|
+
"shortname" if "shortname" in result_df.columns else None,
|
|
1076
|
+
"class" if "class" in result_df.columns else None,
|
|
904
1077
|
"formula" if "formula" in result_df.columns else None,
|
|
905
1078
|
"adduct" if "adduct" in result_df.columns else None,
|
|
906
1079
|
"mz" if "mz" in result_df.columns else None,
|
|
@@ -952,6 +1125,8 @@ def get_id(study, features=None) -> pl.DataFrame:
|
|
|
952
1125
|
"cmpd_uid",
|
|
953
1126
|
"lib_uid",
|
|
954
1127
|
"name",
|
|
1128
|
+
"shortname",
|
|
1129
|
+
"class",
|
|
955
1130
|
"formula",
|
|
956
1131
|
"adduct",
|
|
957
1132
|
"mz",
|
|
@@ -1094,7 +1269,7 @@ def id_reset(study):
|
|
|
1094
1269
|
del study.history["identify"]
|
|
1095
1270
|
|
|
1096
1271
|
if logger:
|
|
1097
|
-
logger.
|
|
1272
|
+
logger.info("Identification data reset completed")
|
|
1098
1273
|
|
|
1099
1274
|
|
|
1100
1275
|
def lib_reset(study):
|
|
@@ -1123,11 +1298,33 @@ def lib_reset(study):
|
|
|
1123
1298
|
logger.debug("Checking for consensus features created by lib_to_consensus()")
|
|
1124
1299
|
|
|
1125
1300
|
try:
|
|
1126
|
-
# Filter for features
|
|
1127
|
-
#
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
|
|
1301
|
+
# Filter for features created by lib_to_consensus()
|
|
1302
|
+
# These can be identified by:
|
|
1303
|
+
# 1. number_samples < 1 (set to 0.0 by lib_to_consensus)
|
|
1304
|
+
# 2. AND have corresponding entries in consensus_mapping_df with sample_uid = 0 (virtual sample)
|
|
1305
|
+
|
|
1306
|
+
# First check if we have any features with number_samples < 1
|
|
1307
|
+
potential_lib_features = study.consensus_df.filter(pl.col("number_samples") < 1)
|
|
1308
|
+
|
|
1309
|
+
if potential_lib_features is not None and not potential_lib_features.is_empty():
|
|
1310
|
+
# Further filter by checking if they have sample_uid = 0 in consensus_mapping_df
|
|
1311
|
+
# This ensures we only remove library-derived features, not legitimate features with 0 samples
|
|
1312
|
+
if hasattr(study, "consensus_mapping_df") and not study.consensus_mapping_df.is_empty():
|
|
1313
|
+
lib_consensus_uids = study.consensus_mapping_df.filter(
|
|
1314
|
+
pl.col("sample_uid") == 0
|
|
1315
|
+
)["consensus_uid"].unique().to_list()
|
|
1316
|
+
|
|
1317
|
+
if lib_consensus_uids:
|
|
1318
|
+
lib_consensus_features = potential_lib_features.filter(
|
|
1319
|
+
pl.col("consensus_uid").is_in(lib_consensus_uids)
|
|
1320
|
+
)
|
|
1321
|
+
else:
|
|
1322
|
+
lib_consensus_features = pl.DataFrame() # No library features found
|
|
1323
|
+
else:
|
|
1324
|
+
# If no consensus_mapping_df, fall back to number_samples < 1 only
|
|
1325
|
+
lib_consensus_features = potential_lib_features
|
|
1326
|
+
else:
|
|
1327
|
+
lib_consensus_features = pl.DataFrame() # No features with number_samples < 1
|
|
1131
1328
|
|
|
1132
1329
|
if lib_consensus_features is not None and not lib_consensus_features.is_empty():
|
|
1133
1330
|
num_lib_features = len(lib_consensus_features)
|
|
@@ -1199,7 +1396,7 @@ def lib_reset(study):
|
|
|
1199
1396
|
del study.history["lib_to_consensus"]
|
|
1200
1397
|
|
|
1201
1398
|
if logger:
|
|
1202
|
-
logger.
|
|
1399
|
+
logger.info("Library and identification data reset completed")
|
|
1203
1400
|
|
|
1204
1401
|
|
|
1205
1402
|
def _get_adducts(study, adducts_list: list | None = None, **kwargs):
|