masster 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- masster/logger.py +92 -78
- masster/sample/defaults/find_features_def.py +90 -94
- masster/sample/defaults/sample_def.py +15 -0
- masster/sample/h5.py +2 -2
- masster/sample/helpers.py +137 -136
- masster/sample/lib.py +11 -11
- masster/sample/load.py +13 -9
- masster/sample/plot.py +167 -60
- masster/sample/processing.py +150 -153
- masster/sample/sample.py +4 -4
- masster/sample/sample5_schema.json +62 -62
- masster/sample/save.py +16 -13
- masster/sample/sciex.py +187 -176
- masster/study/defaults/align_def.py +224 -6
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/study_def.py +2 -2
- masster/study/export.py +144 -131
- masster/study/h5.py +193 -133
- masster/study/helpers.py +293 -245
- masster/study/helpers_optimized.py +99 -57
- masster/study/load.py +51 -25
- masster/study/plot.py +453 -17
- masster/study/processing.py +197 -123
- masster/study/save.py +7 -7
- masster/study/study.py +97 -88
- masster/study/study5_schema.json +82 -82
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/METADATA +1 -1
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/RECORD +34 -32
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/WHEEL +0 -0
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/entry_points.txt +0 -0
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/licenses/LICENSE +0 -0
masster/sample/processing.py
CHANGED
|
@@ -19,55 +19,34 @@ from .defaults.get_spectrum_def import get_spectrum_defaults
|
|
|
19
19
|
|
|
20
20
|
|
|
21
21
|
def get_spectrum(self, scan, **kwargs):
|
|
22
|
-
"""
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
centroid (bool, optional): Flag indicating whether the spectrum should be centroided.
|
|
43
|
-
If True and the spectrum is not already centroided, the method
|
|
44
|
-
applies denoising followed by centroiding using parameters from self.parameters.
|
|
45
|
-
Default is True.
|
|
46
|
-
deisotope (bool, optional): Flag indicating whether deisotoping should be performed. Default is False.
|
|
47
|
-
dia_stats (optional): Flag or parameter for processing DIA (data-independent acquisition)
|
|
48
|
-
statistics. If provided (and if applicable to the file type), additional
|
|
49
|
-
statistics will be computed for 'ztscan' files. Default is None.
|
|
50
|
-
feature (optional): An optional identifier used when computing DIA statistics. Default is None.
|
|
51
|
-
label (str, optional): Optional label to assign to the spectrum. If not provided,
|
|
52
|
-
a default name is generated based on the MS level and retention time.
|
|
53
|
-
Default is None.
|
|
54
|
-
centroid_algo (str, optional): Algorithm to use for centroiding. Default is None.
|
|
22
|
+
"""Retrieve a single spectrum and optionally post-process it.
|
|
23
|
+
|
|
24
|
+
The function locates the requested scan in ``self.scans_df`` and returns a
|
|
25
|
+
:class:`Spectrum` object. Processing steps (centroiding, deisotoping,
|
|
26
|
+
trimming and optional DIA statistics) are controlled by parameters defined
|
|
27
|
+
in :class:`get_spectrum_defaults`. Pass an instance of that class via
|
|
28
|
+
``**kwargs`` or override individual parameters (they will be validated
|
|
29
|
+
against the defaults class).
|
|
30
|
+
|
|
31
|
+
Main parameters (from ``get_spectrum_defaults``):
|
|
32
|
+
|
|
33
|
+
- scan (list[int]): Scan id(s) to retrieve. A single integer or a list is accepted.
|
|
34
|
+
- precursor_trim (int): m/z window used to trim precursor region for MS2 (default: -10).
|
|
35
|
+
- max_peaks (int | None): Maximum number of peaks to keep; ``None`` keeps all.
|
|
36
|
+
- centroid (bool): Whether to centroid the spectrum (default: True).
|
|
37
|
+
- deisotope (bool): Whether to apply deisotoping (default: True).
|
|
38
|
+
- dia_stats (bool | None): Collect DIA/ztscan statistics when applicable (default: False).
|
|
39
|
+
- feature (int | None): Optional feature id used for computing DIA statistics.
|
|
40
|
+
- label (str | None): Optional label to assign to the returned Spectrum.
|
|
41
|
+
- centroid_algo (str | None): Centroiding algorithm to use (allowed: 'lmp', 'cwt', 'gaussian').
|
|
55
42
|
|
|
56
43
|
Returns:
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
- metadata such as MS level, retention time, energy, and an assigned label
|
|
60
|
-
Depending on the processing steps (centroiding, trimming, deisotoping, etc.), the
|
|
61
|
-
returned spectrum is modified accordingly.
|
|
62
|
-
Returns None or an empty spectrum if the scan is not found or if an error occurs.
|
|
44
|
+
Spectrum or None: Processed spectrum object (may be an empty Spectrum if
|
|
45
|
+
the scan is missing or on error).
|
|
63
46
|
|
|
64
47
|
Notes:
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
- For the 'alpharaw' file interface, the method uses internal DataFrame attributes to locate the
|
|
68
|
-
scan and its associated peaks.
|
|
69
|
-
- The method applies additional processing (denoising, centroiding, deisotoping, trimming) based on
|
|
70
|
-
the input flags and the MS level of the spectrum.
|
|
48
|
+
This wrapper validates provided parameters against ``get_spectrum_defaults``.
|
|
49
|
+
Use the defaults class to discover parameter constraints and allowed values.
|
|
71
50
|
"""
|
|
72
51
|
|
|
73
52
|
# parameters initialization
|
|
@@ -510,40 +489,51 @@ def _spec_to_mat(
|
|
|
510
489
|
|
|
511
490
|
|
|
512
491
|
def find_features(self, **kwargs):
|
|
513
|
-
"""
|
|
514
|
-
Detect features in mass spectrometry data by processing MS1 spectra, performing mass trace detection,
|
|
515
|
-
elution peak detection, and feature detection. Optionally, deisotope features and remove low-quality peaks.
|
|
492
|
+
"""Detect features from MS1 data (mass-trace detection, peak deconvolution, feature assembly).
|
|
516
493
|
|
|
517
|
-
|
|
518
|
-
|
|
519
|
-
|
|
520
|
-
resulting feature map is cleaned, deisotoped (if enabled), and assigned unique IDs before being stored.
|
|
494
|
+
The method converts internal MS1 data into an MSExperiment (one MSSpectrum per cycle), runs mass-trace
|
|
495
|
+
detection, deconvolutes mass traces to find chromatographic peaks, and assembles features. Results are
|
|
496
|
+
cleaned, optionally deisotoped, assigned unique IDs and stored in ``self.features`` / ``self.features_df``.
|
|
521
497
|
|
|
522
498
|
Parameters:
|
|
523
|
-
**kwargs: Keyword
|
|
524
|
-
|
|
525
|
-
|
|
526
|
-
|
|
527
|
-
|
|
528
|
-
|
|
529
|
-
|
|
530
|
-
|
|
531
|
-
|
|
532
|
-
|
|
533
|
-
|
|
534
|
-
|
|
535
|
-
|
|
499
|
+
**kwargs: Keyword overrides for any parameter available in :class:`find_features_defaults`.
|
|
500
|
+
You may pass a full ``find_features_defaults`` instance or individual parameter values.
|
|
501
|
+
|
|
502
|
+
Main parameters (what they mean, units and tuning guidance):
|
|
503
|
+
|
|
504
|
+
- chrom_fwhm (float, seconds):
|
|
505
|
+
Expected chromatographic peak full-width at half-maximum (FWHM) in seconds. This guides smoothing,
|
|
506
|
+
peak-finding window sizes and RT-based tolerances. Choose a value that matches your LC peak widths:
|
|
507
|
+
small values (e.g. 0.2–0.8 s) for sharp/fast separations, larger values (several seconds) for broad peaks.
|
|
508
|
+
Default: 1.0 s.
|
|
509
|
+
|
|
510
|
+
- noise (float, intensity units):
|
|
511
|
+
Intensity threshold used to ignore background points before mass-trace and peak detection. Raising
|
|
512
|
+
``noise`` reduces false positives from baseline fluctuations but may discard low-abundance true signals;
|
|
513
|
+
lowering it increases sensitivity but raises the false-positive rate. Set this to a conservative estimate of
|
|
514
|
+
your instrument baseline (default: 200.0, instrument-dependent).
|
|
515
|
+
|
|
516
|
+
- chrom_peak_snr (float, unitless):
|
|
517
|
+
Minimum signal-to-noise ratio required to accept an elution peak during peak deconvolution. SNR is usually
|
|
518
|
+
computed as peak height divided by a local noise estimate. Higher values make detection stricter (fewer
|
|
519
|
+
low-quality peaks), lower values make it more permissive. Typical tuning range: ~3 (relaxed) to >10
|
|
520
|
+
(stringent). Default: 10.0.
|
|
521
|
+
|
|
522
|
+
- isotope_filtering_model (str):
|
|
523
|
+
Isotope filtering model ('metabolites (2% RMS)', 'metabolites (5% RMS)', 'peptides', 'none').
|
|
524
|
+
Default: 'metabolites (5% RMS)'.
|
|
525
|
+
|
|
526
|
+
Tuning recommendation: first set ``chrom_fwhm`` to match your LC peak shape, then set ``noise`` to a baseline
|
|
527
|
+
intensity filter for your data, and finally adjust ``chrom_peak_snr`` to reach the desired balance between
|
|
528
|
+
sensitivity and specificity.
|
|
536
529
|
|
|
537
530
|
Attributes set:
|
|
538
|
-
self.features:
|
|
539
|
-
self.features_df:
|
|
540
|
-
detected features.
|
|
531
|
+
self.features: OpenMS FeatureMap produced by the routine (after ensureUniqueId).
|
|
532
|
+
self.features_df: cleaned polars DataFrame of detected features (zero-quality peaks removed).
|
|
541
533
|
|
|
542
534
|
Notes:
|
|
543
|
-
|
|
544
|
-
|
|
545
|
-
FeatureFindingMetabo) are used throughout the processing.
|
|
546
|
-
- After feature detection, additional cleaning is performed via internal helper methods.
|
|
535
|
+
The implementation relies on OpenMS components (MassTraceDetection, ElutionPeakDetection,
|
|
536
|
+
FeatureFindingMetabo). See ``find_features_defaults`` for the full list of adjustable parameters.
|
|
547
537
|
"""
|
|
548
538
|
if self.ms1_df is None:
|
|
549
539
|
self.logger.error("No MS1 data found. Please load a file first.")
|
|
@@ -570,24 +560,25 @@ def find_features(self, **kwargs):
|
|
|
570
560
|
self.logger.warning(f"Unknown parameter {key} ignored")
|
|
571
561
|
|
|
572
562
|
# Set global parameters
|
|
573
|
-
if hasattr(params,
|
|
563
|
+
if hasattr(params, "threads") and params.threads is not None:
|
|
574
564
|
try:
|
|
575
565
|
# Try setting via OpenMP environment variable first (newer approach)
|
|
576
566
|
import os
|
|
577
|
-
|
|
567
|
+
|
|
568
|
+
os.environ["OMP_NUM_THREADS"] = str(params.threads)
|
|
578
569
|
self.logger.debug(f"Set thread count to {params.threads} via OMP_NUM_THREADS")
|
|
579
570
|
except Exception:
|
|
580
571
|
self.logger.warning(f"Could not set thread count to {params.threads} - using default")
|
|
581
|
-
|
|
572
|
+
|
|
582
573
|
# Set debug mode if enabled
|
|
583
|
-
if hasattr(params,
|
|
574
|
+
if hasattr(params, "debug") and params.debug:
|
|
584
575
|
self.logger.debug("Debug mode enabled")
|
|
585
|
-
elif hasattr(params,
|
|
576
|
+
elif hasattr(params, "no_progress") and params.no_progress:
|
|
586
577
|
self.logger.debug("No progress mode enabled")
|
|
587
|
-
|
|
578
|
+
|
|
588
579
|
self.logger.info("Starting feature detection...")
|
|
589
580
|
self.logger.debug(
|
|
590
|
-
f"Parameters: chrom_fwhm={params.get('chrom_fwhm')}, noise={params.get('noise')}, tol_ppm={params.get('tol_ppm')}",
|
|
581
|
+
f"Parameters: chrom_fwhm={params.get('chrom_fwhm')}, noise={params.get('noise')}, tol_ppm={params.get('tol_ppm')}, isotope_filtering_model={params.get('isotope_filtering_model')}",
|
|
591
582
|
)
|
|
592
583
|
|
|
593
584
|
exp = oms.MSExperiment()
|
|
@@ -625,7 +616,7 @@ def find_features(self, **kwargs):
|
|
|
625
616
|
int(params.get("trace_termination_outliers")),
|
|
626
617
|
)
|
|
627
618
|
mtd_par.setValue("chrom_peak_snr", float(params.get("chrom_peak_snr")))
|
|
628
|
-
|
|
619
|
+
|
|
629
620
|
# Additional MTD parameters
|
|
630
621
|
mtd_par.setValue("min_sample_rate", float(params.get("min_sample_rate")))
|
|
631
622
|
mtd_par.setValue("min_trace_length", float(params.get("min_trace_length")))
|
|
@@ -644,19 +635,16 @@ def find_features(self, **kwargs):
|
|
|
644
635
|
# Apply EPD parameters using our parameter class
|
|
645
636
|
epd_par.setValue("width_filtering", params.get("width_filtering"))
|
|
646
637
|
epd_par.setValue("min_fwhm", float(params.get("chrom_fwhm_min")))
|
|
638
|
+
epd_par.setValue("max_fwhm", float(params.get("chrom_fwhm_max")))
|
|
647
639
|
epd_par.setValue("chrom_fwhm", float(params.get("chrom_fwhm")))
|
|
648
640
|
epd_par.setValue("chrom_peak_snr", float(params.get("chrom_peak_snr")))
|
|
649
641
|
if params.get("masstrace_snr_filtering"):
|
|
650
642
|
epd_par.setValue("masstrace_snr_filtering", "true")
|
|
651
643
|
if params.get("mz_scoring_13C"):
|
|
652
644
|
epd_par.setValue("mz_scoring_13C", "true")
|
|
653
|
-
|
|
645
|
+
|
|
654
646
|
# Additional EPD parameters
|
|
655
647
|
epd_par.setValue("enabled", "true" if params.get("enabled") else "false")
|
|
656
|
-
|
|
657
|
-
# Set min/max FWHM parameters
|
|
658
|
-
epd_par.setValue("min_fwhm", float(params.get("min_fwhm")))
|
|
659
|
-
epd_par.setValue("max_fwhm", float(params.get("max_fwhm")))
|
|
660
648
|
|
|
661
649
|
epd.setParameters(epd_par)
|
|
662
650
|
epd.detectPeaks(mass_traces, mass_traces_deconvol)
|
|
@@ -684,18 +672,19 @@ def find_features(self, **kwargs):
|
|
|
684
672
|
"report_chromatograms",
|
|
685
673
|
"true" if params.get("report_chromatograms") else "false",
|
|
686
674
|
)
|
|
687
|
-
|
|
675
|
+
ffm_par.setValue(
|
|
676
|
+
"report_smoothed_intensities",
|
|
677
|
+
"true" if params.get("report_smoothed_intensities") else "false",
|
|
678
|
+
)
|
|
688
679
|
# Additional FFM parameters
|
|
689
680
|
ffm_par.setValue("local_rt_range", float(params.get("local_rt_range")))
|
|
690
681
|
ffm_par.setValue("local_mz_range", float(params.get("local_mz_range")))
|
|
691
682
|
ffm_par.setValue("charge_lower_bound", int(params.get("charge_lower_bound")))
|
|
692
683
|
ffm_par.setValue("charge_upper_bound", int(params.get("charge_upper_bound")))
|
|
693
|
-
ffm_par.setValue(
|
|
694
|
-
"report_smoothed_intensities",
|
|
695
|
-
"true" if params.get("report_smoothed_intensities") else "false",
|
|
696
|
-
)
|
|
684
|
+
ffm_par.setValue("isotope_filtering_model", params.get("isotope_filtering_model"))
|
|
697
685
|
|
|
698
686
|
ffm.setParameters(ffm_par)
|
|
687
|
+
|
|
699
688
|
self.logger.debug("Running feature finding with parameters:")
|
|
700
689
|
self.logger.debug(ffm_par)
|
|
701
690
|
ffm.run(mass_traces_deconvol, feature_map, chrom_out)
|
|
@@ -712,7 +701,7 @@ def find_features(self, **kwargs):
|
|
|
712
701
|
df = self._features_deisotope(
|
|
713
702
|
df,
|
|
714
703
|
mz_tol=params.get("deisotope_mz_tol"),
|
|
715
|
-
rt_tol=params.get("
|
|
704
|
+
rt_tol=params.get("chrom_fwhm") * params.get("deisotope_rt_tol_factor"),
|
|
716
705
|
)
|
|
717
706
|
if params.get("deisotope"):
|
|
718
707
|
# record size before deisotoping
|
|
@@ -729,8 +718,8 @@ def find_features(self, **kwargs):
|
|
|
729
718
|
prominence_scaleds: list[float] = []
|
|
730
719
|
height_scaleds: list[float] = []
|
|
731
720
|
|
|
732
|
-
mz_tol =
|
|
733
|
-
rt_tol =
|
|
721
|
+
mz_tol = self.parameters.get("eic_mz_tol")
|
|
722
|
+
rt_tol = self.parameters.get("eic_rt_tol")
|
|
734
723
|
|
|
735
724
|
# iterate over all rows in df using polars iteration
|
|
736
725
|
self.logger.debug("Extracting EICs...")
|
|
@@ -807,27 +796,44 @@ def find_features(self, **kwargs):
|
|
|
807
796
|
|
|
808
797
|
|
|
809
798
|
def find_adducts(self, **kwargs):
|
|
810
|
-
"""
|
|
811
|
-
Detect adducts in mass spectrometry features using OpenMS MetaboliteFeatureDeconvolution.
|
|
799
|
+
"""Detect adduct relationships among detected features.
|
|
812
800
|
|
|
813
|
-
This method
|
|
814
|
-
|
|
815
|
-
|
|
801
|
+
This method groups features that are likely adducts of the same molecular entity
|
|
802
|
+
using OpenMS MetaboliteFeatureDeconvolution. Parameters are taken from the
|
|
803
|
+
:class:`find_adducts_defaults` dataclass; you can pass an instance of that class
|
|
804
|
+
via ``**kwargs`` or override individual parameter names (they will be validated
|
|
805
|
+
against the defaults class).
|
|
816
806
|
|
|
817
|
-
|
|
818
|
-
**kwargs: Keyword arguments for adduct detection parameters. Can include:
|
|
819
|
-
- A find_adducts_defaults instance to set all parameters at once
|
|
820
|
-
- Individual parameter names and values (see find_adducts_defaults for details)
|
|
807
|
+
Main parameters (from ``find_adducts_defaults``):
|
|
821
808
|
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
|
|
826
|
-
retention_max_diff (float): Maximum retention time difference for grouping (default: 1.0).
|
|
809
|
+
- adducts (list[str] | str | None):
|
|
810
|
+
List of potential adduct strings formatted for OpenMS, or a short ionization
|
|
811
|
+
mode string (``'pos'``/``'neg'``). When ``None`` a sensible positive-mode
|
|
812
|
+
default set is used.
|
|
827
813
|
|
|
828
|
-
|
|
829
|
-
|
|
830
|
-
|
|
814
|
+
- charge_min (int):
|
|
815
|
+
Minimum allowed charge state for grouping (default: 1).
|
|
816
|
+
|
|
817
|
+
- charge_max (int):
|
|
818
|
+
Maximum allowed charge state for grouping (default: 2).
|
|
819
|
+
|
|
820
|
+
- charge_span_max (int):
|
|
821
|
+
Maximum span between different charge states within the same adduct group
|
|
822
|
+
(default: 2).
|
|
823
|
+
|
|
824
|
+
- retention_max_diff (float, minutes):
|
|
825
|
+
Global maximum retention-time difference allowed for grouping (default: 1.0).
|
|
826
|
+
|
|
827
|
+
- retention_max_diff_local (float, minutes):
|
|
828
|
+
A tighter, local RT tolerance used for fine-grained grouping (default: 1.0).
|
|
829
|
+
|
|
830
|
+
Side effects:
|
|
831
|
+
Updates ``self.features_df`` with columns ``adduct``, ``adduct_mass`` and
|
|
832
|
+
``adduct_group`` populated from the OpenMS results.
|
|
833
|
+
|
|
834
|
+
Notes:
|
|
835
|
+
Use ``find_adducts_defaults`` to inspect available parameters and their
|
|
836
|
+
canonical descriptions/constraints.
|
|
831
837
|
"""
|
|
832
838
|
params = find_adducts_defaults()
|
|
833
839
|
for key, value in kwargs.items():
|
|
@@ -1177,54 +1183,44 @@ def analyze_dda(self):
|
|
|
1177
1183
|
|
|
1178
1184
|
|
|
1179
1185
|
def find_ms2(self, **kwargs):
|
|
1180
|
-
"""
|
|
1181
|
-
Link MS2 spectra to features in the dataset.
|
|
1182
|
-
This method matches MS2 spectra from the scans dataframe with features in the features dataframe
|
|
1183
|
-
based on retention time (RT) and precursor m/z tolerance criteria. For each feature in the provided
|
|
1184
|
-
or inferred list of feature ids (feature_uid), it computes the RT difference between the feature and available
|
|
1185
|
-
MS2 spectra. It then selects MS2 spectra that fall within a computed RT radius (based on the feature's
|
|
1186
|
-
start and end times) and a specified m/z tolerance. For each feature, it chooses one MS2 spectrum per
|
|
1187
|
-
unique cycle based on the closest RT difference, and it updates the feature with the list of matched
|
|
1188
|
-
scan ids and the spectrum corresponding to the first matching scan id. Additionally, the scan dataframe
|
|
1189
|
-
is updated to associate matched scan ids with the corresponding feature id.
|
|
1186
|
+
"""Link MS2 spectra to detected features.
|
|
1190
1187
|
|
|
1191
|
-
|
|
1192
|
-
|
|
1193
|
-
|
|
1194
|
-
|
|
1195
|
-
|
|
1196
|
-
Key Parameters:
|
|
1197
|
-
features (int or list of int, optional): A specific feature id or a list of feature ids to process.
|
|
1198
|
-
If an individual feature_uid is provided and equals -1, all features with no associated MS2 data will be processed.
|
|
1199
|
-
If None, all features in the features dataframe are processed.
|
|
1200
|
-
mz_tol (float, optional): The precursor m/z tolerance to consider when matching MS2 spectra. If not provided,
|
|
1201
|
-
it defaults to 0.5, except for certain file types ('ztscan' or 'dia') which set it to 4.
|
|
1202
|
-
centroid (bool, optional): If True, the returned spectrum will be centroided. Default is True.
|
|
1203
|
-
deisotope (bool, optional): Flag indicating whether deisotoping should be performed. Default is False.
|
|
1204
|
-
dia_stats (bool, optional): A flag to collect additional DIA-related statistics when retrieving a spectrum.
|
|
1205
|
-
Default is False.
|
|
1188
|
+
Matches MS2 scans from ``self.scans_df`` to features in ``self.features_df`` using
|
|
1189
|
+
retention time and precursor m/z criteria. Parameters are defined in
|
|
1190
|
+
:class:`find_ms2_defaults`; pass an instance via ``**kwargs`` or override
|
|
1191
|
+
individual parameters (they will be validated against the defaults class).
|
|
1206
1192
|
|
|
1207
|
-
|
|
1208
|
-
|
|
1193
|
+
Main parameters (from ``find_ms2_defaults``):
|
|
1194
|
+
|
|
1195
|
+
- mz_tol (float):
|
|
1196
|
+
Precursor m/z tolerance used for matching. The effective tolerance may be
|
|
1197
|
+
adjusted by file type (the defaults class provides ``get_mz_tolerance(file_type)``).
|
|
1198
|
+
Default: 0.5 (ztscan/DIA defaults may be larger).
|
|
1199
|
+
|
|
1200
|
+
- centroid (bool):
|
|
1201
|
+
If True, retrieved spectra will be centroided (default: True).
|
|
1209
1202
|
|
|
1210
|
-
|
|
1211
|
-
|
|
1212
|
-
|
|
1213
|
-
|
|
1203
|
+
- deisotope (bool):
|
|
1204
|
+
If True, spectra will be deisotoped before returning (default: False).
|
|
1205
|
+
|
|
1206
|
+
- dia_stats (bool):
|
|
1207
|
+
Collect additional DIA/ztscan statistics when retrieving spectra (default: False).
|
|
1208
|
+
|
|
1209
|
+
- features (int | list[int] | None):
|
|
1210
|
+
Specific feature uid or list of uids to process. Use ``None`` to process all
|
|
1211
|
+
features. An empty list is treated as ``None``.
|
|
1212
|
+
|
|
1213
|
+
- mz_tol_ztscan (float):
|
|
1214
|
+
m/z tolerance used for ztscan/DIA file types (default: 4.0).
|
|
1215
|
+
|
|
1216
|
+
Side effects:
|
|
1217
|
+
Updates ``self.features_df`` with columns ``ms2_scans`` and ``ms2_specs`` and
|
|
1218
|
+
updates ``self.scans_df`` to set the ``feature_uid`` for matched scans.
|
|
1214
1219
|
|
|
1215
1220
|
Notes:
|
|
1216
|
-
|
|
1217
|
-
|
|
1218
|
-
|
|
1219
|
-
- The function assumes that self.features_df and self.scans_df are already set up and contain the expected
|
|
1220
|
-
columns ('feature_uid', 'rt', 'rt_start', 'rt_end', 'mz' for features and 'scan_uid', 'rt', 'prec_mz', 'cycle', 'ms_level'
|
|
1221
|
-
for scans).
|
|
1222
|
-
|
|
1223
|
-
Examples:
|
|
1224
|
-
Assume the current instance has features and scans data loaded, then to link MS2 spectra for all features:
|
|
1225
|
-
instance.find_ms2()
|
|
1226
|
-
To link MS2 spectra for a specific list of feature ids:
|
|
1227
|
-
instance.find_ms2(feature_uid=[1, 3, 5])
|
|
1221
|
+
The function is implemented to be efficient by vectorizing the matching
|
|
1222
|
+
and performing batch updates. Use ``find_ms2_defaults`` to inspect all
|
|
1223
|
+
available parameters and their canonical descriptions.
|
|
1228
1224
|
"""
|
|
1229
1225
|
|
|
1230
1226
|
# parameters initialization
|
|
@@ -1374,6 +1370,7 @@ def find_ms2(self, **kwargs):
|
|
|
1374
1370
|
|
|
1375
1371
|
self.logger.debug("Update features.")
|
|
1376
1372
|
# Convert to polars if needed and batch update features_df
|
|
1373
|
+
# Convert to polars if needed and batch update features_df
|
|
1377
1374
|
if not isinstance(features_df, pl.DataFrame):
|
|
1378
1375
|
features_df = pl.from_pandas(features_df)
|
|
1379
1376
|
|
masster/sample/sample.py
CHANGED
|
@@ -333,15 +333,15 @@ class Sample:
|
|
|
333
333
|
if module_name.startswith(study_module_prefix) and module_name != current_module:
|
|
334
334
|
study_modules.append(module_name)
|
|
335
335
|
|
|
336
|
-
|
|
336
|
+
""" # Add parameters submodules
|
|
337
337
|
parameters_modules = []
|
|
338
338
|
parameters_module_prefix = f"{base_modname}.parameters."
|
|
339
339
|
for module_name in sys.modules:
|
|
340
340
|
if module_name.startswith(parameters_module_prefix) and module_name != current_module:
|
|
341
341
|
parameters_modules.append(module_name)
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
all_modules_to_reload = core_modules + sample_modules + study_modules
|
|
342
|
+
"""
|
|
343
|
+
|
|
344
|
+
all_modules_to_reload = core_modules + sample_modules + study_modules # + parameters_modules
|
|
345
345
|
|
|
346
346
|
# Reload all discovered modules
|
|
347
347
|
for full_module_name in all_modules_to_reload:
|