masster 0.4.13__py3-none-any.whl → 0.4.14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/sample.py +41 -0
- masster/study/processing.py +79 -51
- masster/study/study.py +61 -0
- {masster-0.4.13.dist-info → masster-0.4.14.dist-info}/METADATA +1 -1
- {masster-0.4.13.dist-info → masster-0.4.14.dist-info}/RECORD +9 -9
- {masster-0.4.13.dist-info → masster-0.4.14.dist-info}/WHEEL +0 -0
- {masster-0.4.13.dist-info → masster-0.4.14.dist-info}/entry_points.txt +0 -0
- {masster-0.4.13.dist-info → masster-0.4.14.dist-info}/licenses/LICENSE +0 -0
masster/_version.py
CHANGED
masster/sample/sample.py
CHANGED
|
@@ -299,6 +299,47 @@ class Sample:
|
|
|
299
299
|
find_ms2_defaults = find_ms2_defaults
|
|
300
300
|
get_spectrum_defaults = get_spectrum_defaults
|
|
301
301
|
|
|
302
|
+
def __dir__(self):
|
|
303
|
+
"""
|
|
304
|
+
Custom __dir__ implementation to hide internal methods starting with '_'
|
|
305
|
+
and backward compatibility aliases from tab completion and dir() calls,
|
|
306
|
+
while keeping them accessible to class methods.
|
|
307
|
+
|
|
308
|
+
Returns:
|
|
309
|
+
list: List of public attribute and method names (excluding internal and deprecated methods)
|
|
310
|
+
"""
|
|
311
|
+
# Define backward compatibility aliases to hide
|
|
312
|
+
backward_compatibility_aliases = {
|
|
313
|
+
'load_study', # deprecated alias for load_noms1
|
|
314
|
+
'filter_features', # alias for filter (deprecated naming)
|
|
315
|
+
'select_features', # alias for select (deprecated naming)
|
|
316
|
+
'features_filter', # confusing duplicate of filter
|
|
317
|
+
'features_select', # confusing duplicate of select
|
|
318
|
+
'merge_defaults', # alias for find_features_defaults (confusing)
|
|
319
|
+
}
|
|
320
|
+
|
|
321
|
+
# Get all attributes from the class
|
|
322
|
+
all_attrs = set()
|
|
323
|
+
|
|
324
|
+
# Add attributes from the class and all its bases
|
|
325
|
+
for cls in self.__class__.__mro__:
|
|
326
|
+
all_attrs.update(cls.__dict__.keys())
|
|
327
|
+
|
|
328
|
+
# Add instance attributes
|
|
329
|
+
all_attrs.update(self.__dict__.keys())
|
|
330
|
+
|
|
331
|
+
# Filter out attributes starting with '_' (but keep special methods like __init__, __str__, etc.)
|
|
332
|
+
# Also filter out backward compatibility aliases
|
|
333
|
+
public_attrs = [
|
|
334
|
+
attr for attr in all_attrs
|
|
335
|
+
if not attr.startswith('_') or attr.startswith('__') and attr.endswith('__')
|
|
336
|
+
]
|
|
337
|
+
|
|
338
|
+
# Remove backward compatibility aliases from the public attributes
|
|
339
|
+
public_attrs = [attr for attr in public_attrs if attr not in backward_compatibility_aliases]
|
|
340
|
+
|
|
341
|
+
return sorted(public_attrs)
|
|
342
|
+
|
|
302
343
|
def logger_update(
|
|
303
344
|
self,
|
|
304
345
|
level: str | None = None,
|
masster/study/processing.py
CHANGED
|
@@ -290,6 +290,85 @@ def merge(self, **kwargs):
|
|
|
290
290
|
# Complete merge process
|
|
291
291
|
self._finalize_merge(link_ms2, min_samples)
|
|
292
292
|
|
|
293
|
+
def _perform_adduct_grouping(self, rt_tol, mz_tol):
|
|
294
|
+
"""Perform adduct grouping on consensus features."""
|
|
295
|
+
# Add adduct grouping and adduct_of assignment
|
|
296
|
+
if len(self.consensus_df) > 0:
|
|
297
|
+
# Get relevant columns for grouping
|
|
298
|
+
consensus_data = []
|
|
299
|
+
for row in self.consensus_df.iter_rows(named=True):
|
|
300
|
+
consensus_data.append(
|
|
301
|
+
{
|
|
302
|
+
"consensus_uid": row["consensus_uid"],
|
|
303
|
+
"rt": row["rt"],
|
|
304
|
+
"adduct_mass_neutral_top": row.get("adduct_mass_neutral_top"),
|
|
305
|
+
"adduct_top": row.get("adduct_top"),
|
|
306
|
+
"inty_mean": row.get("inty_mean", 0),
|
|
307
|
+
},
|
|
308
|
+
)
|
|
309
|
+
|
|
310
|
+
# Use optimized adduct grouping
|
|
311
|
+
adduct_group_list, adduct_of_list = _optimized_adduct_grouping(
|
|
312
|
+
self, consensus_data, rt_tol, mz_tol
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
# Add the new columns to consensus_df
|
|
316
|
+
self.consensus_df = self.consensus_df.with_columns(
|
|
317
|
+
[
|
|
318
|
+
pl.Series("adduct_group", adduct_group_list, dtype=pl.Int64),
|
|
319
|
+
pl.Series("adduct_of", adduct_of_list, dtype=pl.Int64),
|
|
320
|
+
],
|
|
321
|
+
)
|
|
322
|
+
|
|
323
|
+
def _finalize_merge(self, link_ms2, min_samples):
|
|
324
|
+
"""Complete the merge process with final calculations and cleanup."""
|
|
325
|
+
# Validate min_samples parameter
|
|
326
|
+
if min_samples is None:
|
|
327
|
+
min_samples = 1
|
|
328
|
+
if min_samples < 1:
|
|
329
|
+
min_samples = int(min_samples * len(self.samples_df))
|
|
330
|
+
|
|
331
|
+
# Validate that min_samples doesn't exceed the number of samples
|
|
332
|
+
if min_samples > len(self.samples_df):
|
|
333
|
+
self.logger.warning(
|
|
334
|
+
f"min_samples ({min_samples}) exceeds the number of samples ({len(self.samples_df)}). "
|
|
335
|
+
f"Setting min_samples to {len(self.samples_df)}.",
|
|
336
|
+
)
|
|
337
|
+
min_samples = len(self.samples_df)
|
|
338
|
+
|
|
339
|
+
# Filter out consensus features with less than min_samples features
|
|
340
|
+
l1 = len(self.consensus_df)
|
|
341
|
+
self.consensus_df = self.consensus_df.filter(
|
|
342
|
+
pl.col("number_samples") >= min_samples,
|
|
343
|
+
)
|
|
344
|
+
self.logger.debug(
|
|
345
|
+
f"Filtered {l1 - len(self.consensus_df)} consensus features with less than {min_samples} samples.",
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
# Filter out consensus mapping with less than min_samples features
|
|
349
|
+
self.consensus_mapping_df = self.consensus_mapping_df.filter(
|
|
350
|
+
pl.col("consensus_uid").is_in(self.consensus_df["consensus_uid"].to_list()),
|
|
351
|
+
)
|
|
352
|
+
|
|
353
|
+
# Calculate the completeness of the consensus map
|
|
354
|
+
if len(self.consensus_df) > 0 and len(self.samples_df) > 0:
|
|
355
|
+
c = (
|
|
356
|
+
len(self.consensus_mapping_df)
|
|
357
|
+
/ len(self.consensus_df)
|
|
358
|
+
/ len(self.samples_df)
|
|
359
|
+
)
|
|
360
|
+
self.logger.info(
|
|
361
|
+
f"Merging completed. Consensus features: {len(self.consensus_df)}. Completeness: {c:.2f}.",
|
|
362
|
+
)
|
|
363
|
+
else:
|
|
364
|
+
self.logger.warning(
|
|
365
|
+
f"Merging completed with empty result. Consensus features: {len(self.consensus_df)}. "
|
|
366
|
+
f"This may be due to min_samples ({min_samples}) being too high for the available data.",
|
|
367
|
+
)
|
|
368
|
+
|
|
369
|
+
if link_ms2:
|
|
370
|
+
self.find_ms2()
|
|
371
|
+
|
|
293
372
|
def _reset_consensus_data(self):
|
|
294
373
|
"""Reset consensus-related DataFrames at the start of merge."""
|
|
295
374
|
self.consensus_df = pl.DataFrame()
|
|
@@ -605,7 +684,6 @@ def _extract_consensus_features(self, consensus_map, min_samples):
|
|
|
605
684
|
total_count = sum(adduct_counts.values())
|
|
606
685
|
for adduct, count in adduct_counts.items():
|
|
607
686
|
percentage = (count / total_count) * 100 if total_count > 0 else 0
|
|
608
|
-
mass = adduct_masses.get(adduct, None)
|
|
609
687
|
# Store as list with [name, num, %] format for the adducts column
|
|
610
688
|
adduct_values.append(
|
|
611
689
|
[
|
|
@@ -872,56 +950,6 @@ def _extract_consensus_features(self, consensus_map, min_samples):
|
|
|
872
950
|
|
|
873
951
|
self.consensus_map = consensus_map
|
|
874
952
|
|
|
875
|
-
# Add adduct grouping and adduct_of assignment
|
|
876
|
-
if len(self.consensus_df) > 0:
|
|
877
|
-
# Get rt_tol and mz_tol from kwargs or use defaults from merge_defaults
|
|
878
|
-
adduct_rt_tol = rt_tol # Use the same rt_tol from merge parameters
|
|
879
|
-
adduct_mz_tol = mz_tol # Use the same mz_tol from merge parameters
|
|
880
|
-
|
|
881
|
-
# Get relevant columns for grouping
|
|
882
|
-
consensus_data = []
|
|
883
|
-
for row in self.consensus_df.iter_rows(named=True):
|
|
884
|
-
consensus_data.append(
|
|
885
|
-
{
|
|
886
|
-
"consensus_uid": row["consensus_uid"],
|
|
887
|
-
"rt": row["rt"],
|
|
888
|
-
"adduct_mass_neutral_top": row.get("adduct_mass_neutral_top"),
|
|
889
|
-
"adduct_top": row.get("adduct_top"),
|
|
890
|
-
"inty_mean": row.get("inty_mean", 0),
|
|
891
|
-
},
|
|
892
|
-
)
|
|
893
|
-
|
|
894
|
-
# Use optimized adduct grouping
|
|
895
|
-
adduct_group_list, adduct_of_list = _optimized_adduct_grouping(
|
|
896
|
-
self, consensus_data, adduct_rt_tol, adduct_mz_tol
|
|
897
|
-
)
|
|
898
|
-
|
|
899
|
-
# Add the new columns to consensus_df
|
|
900
|
-
self.consensus_df = self.consensus_df.with_columns(
|
|
901
|
-
[
|
|
902
|
-
pl.Series("adduct_group", adduct_group_list, dtype=pl.Int64),
|
|
903
|
-
pl.Series("adduct_of", adduct_of_list, dtype=pl.Int64),
|
|
904
|
-
],
|
|
905
|
-
)
|
|
906
|
-
|
|
907
|
-
# calculate the completeness of the consensus map
|
|
908
|
-
if len(self.consensus_df) > 0 and len(self.samples_df) > 0:
|
|
909
|
-
c = (
|
|
910
|
-
len(self.consensus_mapping_df)
|
|
911
|
-
/ len(self.consensus_df)
|
|
912
|
-
/ len(self.samples_df)
|
|
913
|
-
)
|
|
914
|
-
self.logger.info(
|
|
915
|
-
f"Merging completed. Consensus features: {len(self.consensus_df)}. Completeness: {c:.2f}.",
|
|
916
|
-
)
|
|
917
|
-
else:
|
|
918
|
-
self.logger.warning(
|
|
919
|
-
f"Merging completed with empty result. Consensus features: {len(self.consensus_df)}. "
|
|
920
|
-
f"This may be due to min_samples ({min_samples}) being too high for the available data.",
|
|
921
|
-
)
|
|
922
|
-
if link_ms2:
|
|
923
|
-
self.find_ms2()
|
|
924
|
-
|
|
925
953
|
|
|
926
954
|
def _optimized_feature_lookup(study_obj, features_df):
|
|
927
955
|
"""
|
masster/study/study.py
CHANGED
|
@@ -119,6 +119,13 @@ from masster.study.processing import align
|
|
|
119
119
|
from masster.study.processing import merge
|
|
120
120
|
from masster.study.processing import integrate
|
|
121
121
|
from masster.study.processing import find_ms2
|
|
122
|
+
from masster.study.processing import _reset_consensus_data
|
|
123
|
+
from masster.study.processing import _process_merge_parameters
|
|
124
|
+
from masster.study.processing import _validate_merge_inputs
|
|
125
|
+
from masster.study.processing import _perform_feature_grouping
|
|
126
|
+
from masster.study.processing import _extract_consensus_features
|
|
127
|
+
from masster.study.processing import _perform_adduct_grouping
|
|
128
|
+
from masster.study.processing import _finalize_merge
|
|
122
129
|
from masster.study.parameters import store_history
|
|
123
130
|
from masster.study.parameters import get_parameters
|
|
124
131
|
from masster.study.parameters import update_parameters
|
|
@@ -490,6 +497,15 @@ class Study:
|
|
|
490
497
|
_format_adduct_name = _format_adduct_name
|
|
491
498
|
_parse_element_counts = _parse_element_counts
|
|
492
499
|
|
|
500
|
+
# === Merge Helper Methods ===
|
|
501
|
+
_reset_consensus_data = _reset_consensus_data
|
|
502
|
+
_process_merge_parameters = _process_merge_parameters
|
|
503
|
+
_validate_merge_inputs = _validate_merge_inputs
|
|
504
|
+
_perform_feature_grouping = _perform_feature_grouping
|
|
505
|
+
_extract_consensus_features = _extract_consensus_features
|
|
506
|
+
_perform_adduct_grouping = _perform_adduct_grouping
|
|
507
|
+
_finalize_merge = _finalize_merge
|
|
508
|
+
|
|
493
509
|
# === Default Parameters ===
|
|
494
510
|
study_defaults = study_defaults
|
|
495
511
|
align_defaults = align_defaults
|
|
@@ -587,6 +603,51 @@ class Study:
|
|
|
587
603
|
except Exception as e:
|
|
588
604
|
self.logger.error(f"Failed to reload current module {current_module}: {e}")
|
|
589
605
|
|
|
606
|
+
def __dir__(self):
|
|
607
|
+
"""
|
|
608
|
+
Custom __dir__ implementation to hide internal methods starting with '_'
|
|
609
|
+
and backward compatibility aliases from tab completion and dir() calls,
|
|
610
|
+
while keeping them accessible to class methods.
|
|
611
|
+
|
|
612
|
+
Returns:
|
|
613
|
+
list: List of public attribute and method names (excluding internal and deprecated methods)
|
|
614
|
+
"""
|
|
615
|
+
# Define backward compatibility aliases to hide
|
|
616
|
+
backward_compatibility_aliases = {
|
|
617
|
+
'add_folder', # alias for add
|
|
618
|
+
'find_consensus', # alias for merge
|
|
619
|
+
'integrate_chrom', # alias for integrate
|
|
620
|
+
'fill_chrom', # alias for fill
|
|
621
|
+
'fill_chrom_single', # alias for fill_single
|
|
622
|
+
'filter_consensus', # alias for consensus_filter
|
|
623
|
+
'select_consensus', # alias for consensus_select
|
|
624
|
+
'filter_features', # alias for features_filter
|
|
625
|
+
'select_features', # alias for features_select
|
|
626
|
+
'consensus_find', # alias for merge
|
|
627
|
+
}
|
|
628
|
+
|
|
629
|
+
# Get all attributes from the class
|
|
630
|
+
all_attrs = set()
|
|
631
|
+
|
|
632
|
+
# Add attributes from the class and all its bases
|
|
633
|
+
for cls in self.__class__.__mro__:
|
|
634
|
+
all_attrs.update(cls.__dict__.keys())
|
|
635
|
+
|
|
636
|
+
# Add instance attributes
|
|
637
|
+
all_attrs.update(self.__dict__.keys())
|
|
638
|
+
|
|
639
|
+
# Filter out attributes starting with '_' (but keep special methods like __init__, __str__, etc.)
|
|
640
|
+
# Also filter out backward compatibility aliases
|
|
641
|
+
public_attrs = [
|
|
642
|
+
attr for attr in all_attrs
|
|
643
|
+
if not attr.startswith('_') or attr.startswith('__') and attr.endswith('__')
|
|
644
|
+
]
|
|
645
|
+
|
|
646
|
+
# Remove backward compatibility aliases from the public attributes
|
|
647
|
+
public_attrs = [attr for attr in public_attrs if attr not in backward_compatibility_aliases]
|
|
648
|
+
|
|
649
|
+
return sorted(public_attrs)
|
|
650
|
+
|
|
590
651
|
def __str__(self):
|
|
591
652
|
"""
|
|
592
653
|
Returns a string representation of the study.
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
masster/__init__.py,sha256=8U4cIteNlYyHDrxWSbB_MsDKCX9tds07SJG8-vh8Oa8,738
|
|
2
|
-
masster/_version.py,sha256=
|
|
2
|
+
masster/_version.py,sha256=I-X6v7vJXEOjIJ56DR0aMg6dzSc0_uJOd8vE_X3gQ64,257
|
|
3
3
|
masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
|
|
4
4
|
masster/logger.py,sha256=W50V_uh8RSYwGxDrDFhOuj5jpu2tKJyt_16lMw9kQwA,14755
|
|
5
5
|
masster/spectrum.py,sha256=_upC_g2N9gwTaflXAugs9pSXpKUmzbIehofDordk7WI,47718
|
|
@@ -26,7 +26,7 @@ masster/sample/parameters.py,sha256=Gg2KcuNbV_wZ_Wwv93QlM5J19ji0oSIvZLPV1NoBmq0,
|
|
|
26
26
|
masster/sample/plot.py,sha256=abLnG0Bk75vqSGQz6uA3uTK3IE9N-s687ZH-n8Mhdzg,82757
|
|
27
27
|
masster/sample/processing.py,sha256=lCHRv290oAFOxe_zR5GMi4FdxodjJh1rj2uLWy_wHnc,49771
|
|
28
28
|
masster/sample/quant.py,sha256=tHNjvUFTdehKR31BXBZnVsBxMD9XJHgaltITOjr71uE,7562
|
|
29
|
-
masster/sample/sample.py,sha256=
|
|
29
|
+
masster/sample/sample.py,sha256=689fPI3JmMDrprv_q_JVt5CsCwcRgSR7MxS3OmHK2tA,20234
|
|
30
30
|
masster/sample/sample5_schema.json,sha256=voVB6z0TaIJwU-_SPUEYWKH7mKC16ycTe1nW6gODYP8,3916
|
|
31
31
|
masster/sample/save.py,sha256=XZl5ITYdOjojYFOoUZ-0ygVSPH1kT5Va6e8NyuTRNAI,32500
|
|
32
32
|
masster/sample/sciex.py,sha256=vnbxsq_qnAQVuzcpziP1o3IC4kM5amGBcPmC2TAuDLw,46319
|
|
@@ -44,9 +44,9 @@ masster/study/id.py,sha256=V2R2L3NtiPvl1STDOonBbYGtHikeN0VGH78ruUNhgNE,55263
|
|
|
44
44
|
masster/study/load.py,sha256=tK3ueWxrauyveZukyh1YaV8h8fSFDytLnTmrEpFvcwU,70458
|
|
45
45
|
masster/study/parameters.py,sha256=0elaF7YspTsB7qyajWAbRNL2VfKlGz5GJLifmO8IGkk,3276
|
|
46
46
|
masster/study/plot.py,sha256=Wp48DH5x1t8w6R67AMjxLaUIKZpDa82fnUoAgEeNY5E,87564
|
|
47
|
-
masster/study/processing.py,sha256=
|
|
47
|
+
masster/study/processing.py,sha256=czPoKNnLFwuFEMQtYtcc_7_0O1SxzNvoiu32a7aT9wc,78222
|
|
48
48
|
masster/study/save.py,sha256=F_H34zmvxV54Ds64ju90JJLy_F4hg6nRdHhJ9ssWKLA,6704
|
|
49
|
-
masster/study/study.py,sha256=
|
|
49
|
+
masster/study/study.py,sha256=s5QtO805nWuxIHcVFZ21tJScbIsNnX-Tj_WMTl-F6pE,38653
|
|
50
50
|
masster/study/study5_schema.json,sha256=c0w24QdHak01m04I1VPu97KvF2468FcaqROhf6pmLk4,7507
|
|
51
51
|
masster/study/defaults/__init__.py,sha256=m3Z5KXGqsTdh7GjYzZoENERt39yRg0ceVRV1DeCt1P0,610
|
|
52
52
|
masster/study/defaults/align_def.py,sha256=hHQbGgsOqMRHHr0Wn8Onr8XeaRz3-fFE0qGE-OMst80,20324
|
|
@@ -60,8 +60,8 @@ masster/study/defaults/integrate_chrom_def.py,sha256=0MNIWGTjty-Zu-NTQsIweuj3UVq
|
|
|
60
60
|
masster/study/defaults/integrate_def.py,sha256=Vf4SAzdBfnsSZ3IRaF0qZvWu3gMDPHdgPfMYoPKeWv8,7246
|
|
61
61
|
masster/study/defaults/merge_def.py,sha256=EBsKE3hsAkTEzN9dpdRD5W3_suTKy_WZ_96rwS0uBuE,8572
|
|
62
62
|
masster/study/defaults/study_def.py,sha256=h8dYbi9xv0sesCSQik49Z53IkskMmNtW6ixl7it5pL0,16033
|
|
63
|
-
masster-0.4.
|
|
64
|
-
masster-0.4.
|
|
65
|
-
masster-0.4.
|
|
66
|
-
masster-0.4.
|
|
67
|
-
masster-0.4.
|
|
63
|
+
masster-0.4.14.dist-info/METADATA,sha256=a4qQBBc3QxE9j56BQYQmNjz6ndG5-_FSf0A4BwrDDmk,44189
|
|
64
|
+
masster-0.4.14.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
65
|
+
masster-0.4.14.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
66
|
+
masster-0.4.14.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
67
|
+
masster-0.4.14.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|