masster 0.6.0__tar.gz → 0.6.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.6.0 → masster-0.6.1}/PKG-INFO +3 -2
- {masster-0.6.0 → masster-0.6.1}/README.md +1 -0
- {masster-0.6.0 → masster-0.6.1}/pyproject.toml +2 -2
- {masster-0.6.0 → masster-0.6.1}/src/masster/_version.py +1 -1
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/export.py +62 -62
- {masster-0.6.0 → masster-0.6.1}/.gitignore +0 -0
- {masster-0.6.0 → masster-0.6.1}/LICENSE +0 -0
- {masster-0.6.0 → masster-0.6.1}/THIRD_PARTY_NOTICES.md +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/__init__.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/chromatogram.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/data/libs/aa_nort.json +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/data/libs/ccm_nort.json +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/lib/__init__.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/lib/lib.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/logger.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/__init__.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/adducts.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/defaults/find_adducts_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/defaults/sample_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/h5.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/helpers.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/id.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/importers.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/lib.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/load.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/parameters.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/plot.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/processing.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/quant.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/sample.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/sample5_schema.json +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/save.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/sciex.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/sample/thermo.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/spectrum.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/__init__.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/analysis.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/fill_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/identify_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/integrate_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/merge_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/defaults/study_def.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/h5.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/helpers.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/id.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/importers.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/load.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/merge.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/parameters.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/plot.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/processing.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/save.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/study.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/study/study5_schema.json +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/wizard/__init__.py +0 -0
- {masster-0.6.0 → masster-0.6.1}/src/masster/wizard/wizard.py +0 -0
|
@@ -1,12 +1,12 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: masster
|
|
3
|
-
Version: 0.6.
|
|
3
|
+
Version: 0.6.1
|
|
4
4
|
Summary: Mass spectrometry data analysis package
|
|
5
5
|
Project-URL: homepage, https://github.com/zamboni-lab/masster
|
|
6
6
|
Project-URL: repository, https://github.com/zamboni-lab/masster
|
|
7
7
|
Project-URL: documentation, https://github.com/zamboni-lab/masster#readme
|
|
8
8
|
Project-URL: Third-Party Licenses, https://github.com/zamboni-lab/masster/blob/main/THIRD_PARTY_NOTICES.md
|
|
9
|
-
Author: Zamboni Lab
|
|
9
|
+
Author: Zamboni Lab, ETH Zurich
|
|
10
10
|
License: GNU AFFERO GENERAL PUBLIC LICENSE
|
|
11
11
|
Version 3, 19 November 2007
|
|
12
12
|
|
|
@@ -833,6 +833,7 @@ study.save()
|
|
|
833
833
|
study.plot_samples_pca()
|
|
834
834
|
study.plot_samples_umap()
|
|
835
835
|
study.plot_samples_2d()
|
|
836
|
+
study.plot_heatmap()
|
|
836
837
|
|
|
837
838
|
# To know more about the available methods...
|
|
838
839
|
dir(study)
|
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
|
|
2
2
|
[project]
|
|
3
3
|
name = "masster"
|
|
4
|
-
version = "0.6.
|
|
4
|
+
version = "0.6.1"
|
|
5
5
|
description = "Mass spectrometry data analysis package"
|
|
6
6
|
authors = [
|
|
7
|
-
{ name = "Zamboni Lab" }
|
|
7
|
+
{ name = "Zamboni Lab, ETH Zurich" }
|
|
8
8
|
]
|
|
9
9
|
license = { file = "LICENSE" }
|
|
10
10
|
readme = "README.md"
|
|
@@ -524,7 +524,7 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
524
524
|
# Import here to avoid circular imports
|
|
525
525
|
from masster.study.id import get_id
|
|
526
526
|
|
|
527
|
-
# Get full enriched identification data for
|
|
527
|
+
# Get full enriched identification data for SME section
|
|
528
528
|
full_id_data = get_id(self)
|
|
529
529
|
if full_id_data is not None and not full_id_data.is_empty():
|
|
530
530
|
# Get top scoring identification for each consensus_uid for SML section
|
|
@@ -828,8 +828,8 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
828
828
|
smf_header = [
|
|
829
829
|
"SFH",
|
|
830
830
|
"SMF_ID",
|
|
831
|
-
"
|
|
832
|
-
"
|
|
831
|
+
"SME_ID_REFS",
|
|
832
|
+
"SME_ID_REF_ambiguity_code",
|
|
833
833
|
"adduct_ion",
|
|
834
834
|
"isotopomer",
|
|
835
835
|
"exp_mass_to_charge",
|
|
@@ -847,40 +847,40 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
847
847
|
|
|
848
848
|
# SMF table uses the same consensus features as SML, just different metadata
|
|
849
849
|
for idx, row in enumerate(self.consensus_df.iter_rows(named=True), 1):
|
|
850
|
-
# References to
|
|
851
|
-
|
|
852
|
-
|
|
850
|
+
# References to SME entries - each SMF can reference multiple SME entries for the same consensus_uid
|
|
851
|
+
SME_refs = "null"
|
|
852
|
+
SME_ambiguity = "null"
|
|
853
853
|
consensus_uid = row["consensus_uid"]
|
|
854
854
|
|
|
855
855
|
if full_id_data is not None:
|
|
856
|
-
# Find all
|
|
857
|
-
|
|
858
|
-
if
|
|
859
|
-
# Generate
|
|
856
|
+
# Find all SME entries for this consensus_uid
|
|
857
|
+
SME_matches = full_id_data.filter(pl.col("consensus_uid") == consensus_uid)
|
|
858
|
+
if SME_matches.height > 0:
|
|
859
|
+
# Generate SME IDs - we'll create a mapping in the SME section
|
|
860
860
|
# For now, use a simple approach based on consensus_uid and lib_uid
|
|
861
|
-
|
|
862
|
-
for i,
|
|
863
|
-
# Create a unique
|
|
864
|
-
|
|
865
|
-
|
|
866
|
-
|
|
867
|
-
|
|
868
|
-
if
|
|
869
|
-
|
|
861
|
+
SME_ids = []
|
|
862
|
+
for i, SME_row in enumerate(SME_matches.iter_rows(named=True)):
|
|
863
|
+
# Create a unique SME ID based on consensus_uid and position
|
|
864
|
+
SME_id_base = consensus_uid * 1000 # Ensure uniqueness across consensus features
|
|
865
|
+
SME_id = SME_id_base + i + 1
|
|
866
|
+
SME_ids.append(str(SME_id))
|
|
867
|
+
|
|
868
|
+
if SME_ids:
|
|
869
|
+
SME_refs = "|".join(SME_ids)
|
|
870
870
|
# Set ambiguity code: 1=ambiguous identification, 2=multiple evidence same molecule, 3=both
|
|
871
|
-
if len(
|
|
871
|
+
if len(SME_ids) > 1:
|
|
872
872
|
# Check if all identifications point to the same compound
|
|
873
873
|
unique_cmpds = {
|
|
874
874
|
match["cmpd_uid"]
|
|
875
|
-
for match in
|
|
875
|
+
for match in SME_matches.iter_rows(named=True)
|
|
876
876
|
if match.get("cmpd_uid") is not None
|
|
877
877
|
}
|
|
878
878
|
if len(unique_cmpds) > 1:
|
|
879
|
-
|
|
879
|
+
SME_ambiguity = "1" # Ambiguous identification
|
|
880
880
|
else:
|
|
881
|
-
|
|
881
|
+
SME_ambiguity = "2" # Multiple evidence for same molecule
|
|
882
882
|
else:
|
|
883
|
-
|
|
883
|
+
SME_ambiguity = "null"
|
|
884
884
|
|
|
885
885
|
# Format isotopomer according to mzTab-M specification
|
|
886
886
|
iso_value = row.get("iso_mean", 0)
|
|
@@ -892,8 +892,8 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
892
892
|
smf_row = [
|
|
893
893
|
"SMF",
|
|
894
894
|
str(idx),
|
|
895
|
-
|
|
896
|
-
|
|
895
|
+
SME_refs,
|
|
896
|
+
SME_ambiguity,
|
|
897
897
|
adduct_list[idx - 1], # adduct_ion
|
|
898
898
|
isotopomer, # isotopomer formatted according to mzTab-M specification
|
|
899
899
|
safe_str(row.get("mz", "null")), # exp_mass_to_charge
|
|
@@ -943,16 +943,16 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
943
943
|
for line in smf_lines:
|
|
944
944
|
f.write(line + "\n")
|
|
945
945
|
|
|
946
|
-
# ---
|
|
946
|
+
# --- SME (Small Molecule Evidence) table ---
|
|
947
947
|
if full_id_data is not None and not full_id_data.is_empty():
|
|
948
|
-
|
|
948
|
+
SME_lines = []
|
|
949
949
|
# Add comment about spectra_ref being dummy placeholders
|
|
950
|
-
|
|
950
|
+
SME_lines.append(
|
|
951
951
|
"COM\tThe spectra_ref are dummy placeholders, as the annotation was based on aggregated data",
|
|
952
952
|
)
|
|
953
|
-
|
|
954
|
-
"
|
|
955
|
-
"
|
|
953
|
+
SME_header = [
|
|
954
|
+
"SEH",
|
|
955
|
+
"SME_ID",
|
|
956
956
|
"evidence_input_id",
|
|
957
957
|
"database_identifier",
|
|
958
958
|
"chemical_formula",
|
|
@@ -971,9 +971,9 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
971
971
|
"id_confidence_measure[1]",
|
|
972
972
|
"rank",
|
|
973
973
|
]
|
|
974
|
-
|
|
974
|
+
SME_lines.append("\t".join(SME_header))
|
|
975
975
|
|
|
976
|
-
# Create
|
|
976
|
+
# Create SME entries for all identification results using enriched data
|
|
977
977
|
for consensus_uid in self.consensus_df.select("consensus_uid").to_series().unique():
|
|
978
978
|
# Get consensus feature data for this consensus_uid
|
|
979
979
|
consensus_feature_data = self.consensus_df.filter(
|
|
@@ -984,16 +984,16 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
984
984
|
consensus_row = consensus_feature_data.row(0, named=True)
|
|
985
985
|
|
|
986
986
|
# Get all identification results for this consensus feature from enriched data
|
|
987
|
-
|
|
987
|
+
SME_matches = full_id_data.filter(pl.col("consensus_uid") == consensus_uid)
|
|
988
988
|
|
|
989
|
-
if
|
|
989
|
+
if SME_matches.height > 0:
|
|
990
990
|
# Sort by score descending to maintain rank order
|
|
991
|
-
|
|
991
|
+
SME_matches = SME_matches.sort("score", descending=True)
|
|
992
992
|
|
|
993
|
-
for i,
|
|
994
|
-
# Generate unique
|
|
995
|
-
|
|
996
|
-
|
|
993
|
+
for i, SME_row in enumerate(SME_matches.iter_rows(named=True)):
|
|
994
|
+
# Generate unique SME_ID
|
|
995
|
+
SME_id_base = consensus_uid * 1000
|
|
996
|
+
SME_id = SME_id_base + i + 1
|
|
997
997
|
|
|
998
998
|
# Create evidence input ID using consensus_uid:mz:rt format
|
|
999
999
|
consensus_mz = consensus_row.get("mz", 0)
|
|
@@ -1002,15 +1002,15 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
1002
1002
|
|
|
1003
1003
|
# Database identifier - use db_id if available, otherwise fallback to cmpd_uid
|
|
1004
1004
|
db_id = "null"
|
|
1005
|
-
if
|
|
1006
|
-
db_id = safe_str(
|
|
1007
|
-
elif
|
|
1008
|
-
db_id = f"cmpd:{
|
|
1005
|
+
if SME_row.get("db_id") is not None and SME_row["db_id"] != "":
|
|
1006
|
+
db_id = safe_str(SME_row["db_id"])
|
|
1007
|
+
elif SME_row.get("cmpd_uid") is not None:
|
|
1008
|
+
db_id = f"cmpd:{SME_row['cmpd_uid']}"
|
|
1009
1009
|
|
|
1010
1010
|
# Get adduct information
|
|
1011
1011
|
adduct_ion = "null"
|
|
1012
|
-
if
|
|
1013
|
-
adduct_ion = safe_str(
|
|
1012
|
+
if SME_row.get("adduct") is not None and SME_row["adduct"] != "":
|
|
1013
|
+
adduct_ion = safe_str(SME_row["adduct"])
|
|
1014
1014
|
# Replace ? with H for better mzTab compatibility
|
|
1015
1015
|
adduct_ion = adduct_ion.replace("?", "H")
|
|
1016
1016
|
|
|
@@ -1019,8 +1019,8 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
1019
1019
|
|
|
1020
1020
|
# Identification method
|
|
1021
1021
|
id_method = "[MS, MS:1002888, small molecule confidence measure, ]"
|
|
1022
|
-
if
|
|
1023
|
-
id_method = f"[MS, MS:1002888, {
|
|
1022
|
+
if SME_row.get("matcher") is not None:
|
|
1023
|
+
id_method = f"[MS, MS:1002888, {SME_row['matcher']}, ]"
|
|
1024
1024
|
|
|
1025
1025
|
# MS level - assume MS1 for now
|
|
1026
1026
|
ms_level = "[MS, MS:1000511, ms level, 1]"
|
|
@@ -1030,18 +1030,18 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
1030
1030
|
|
|
1031
1031
|
# Theoretical mass-to-charge from lib_df
|
|
1032
1032
|
theoretical_mz = "null"
|
|
1033
|
-
if
|
|
1034
|
-
theoretical_mz = safe_str(
|
|
1033
|
+
if SME_row.get("mz") is not None: # This comes from lib_df via get_id() join
|
|
1034
|
+
theoretical_mz = safe_str(SME_row["mz"])
|
|
1035
1035
|
|
|
1036
|
-
|
|
1037
|
-
"
|
|
1038
|
-
str(
|
|
1036
|
+
SME_line = [
|
|
1037
|
+
"SME",
|
|
1038
|
+
str(SME_id),
|
|
1039
1039
|
evidence_id,
|
|
1040
1040
|
db_id,
|
|
1041
|
-
safe_str(
|
|
1042
|
-
safe_str(
|
|
1043
|
-
safe_str(
|
|
1044
|
-
safe_str(
|
|
1041
|
+
safe_str(SME_row.get("formula", "null")),
|
|
1042
|
+
safe_str(SME_row.get("smiles", "null")),
|
|
1043
|
+
safe_str(SME_row.get("inchi", "null")),
|
|
1044
|
+
safe_str(SME_row.get("name", "null")),
|
|
1045
1045
|
"null", # uri - not available in current data
|
|
1046
1046
|
"null", # derivatized_form
|
|
1047
1047
|
adduct_ion,
|
|
@@ -1053,15 +1053,15 @@ def export_mztab(self, filename: str | None = None, include_mgf=True, **kwargs)
|
|
|
1053
1053
|
spectra_ref,
|
|
1054
1054
|
id_method,
|
|
1055
1055
|
ms_level,
|
|
1056
|
-
safe_str(
|
|
1056
|
+
safe_str(SME_row.get("score", "null")),
|
|
1057
1057
|
str(i + 1), # rank within this consensus feature
|
|
1058
1058
|
]
|
|
1059
|
-
|
|
1059
|
+
SME_lines.append("\t".join(SME_line))
|
|
1060
1060
|
|
|
1061
|
-
# Write
|
|
1061
|
+
# Write SME table
|
|
1062
1062
|
with open(filename, "a", encoding="utf-8") as f:
|
|
1063
1063
|
f.write("\n")
|
|
1064
|
-
for line in
|
|
1064
|
+
for line in SME_lines:
|
|
1065
1065
|
f.write(line + "\n")
|
|
1066
1066
|
|
|
1067
1067
|
# --- MGF table ---
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|