masster 0.3.9__py3-none-any.whl → 0.3.11__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/docs/SCX_API_Documentation.md +0 -0
- masster/docs/SCX_DLL_Analysis.md +0 -0
- masster/logger.py +92 -78
- masster/sample/defaults/find_features_def.py +90 -94
- masster/sample/defaults/sample_def.py +15 -0
- masster/sample/h5.py +2 -2
- masster/sample/helpers.py +137 -136
- masster/sample/lib.py +11 -11
- masster/sample/load.py +13 -9
- masster/sample/plot.py +167 -60
- masster/sample/processing.py +150 -153
- masster/sample/sample.py +4 -4
- masster/sample/sample5_schema.json +62 -62
- masster/sample/save.py +16 -13
- masster/sample/sciex.py +187 -176
- masster/study/defaults/align_def.py +224 -6
- masster/study/defaults/fill_chrom_def.py +1 -5
- masster/study/defaults/integrate_chrom_def.py +1 -5
- masster/study/defaults/study_def.py +2 -2
- masster/study/export.py +144 -131
- masster/study/h5.py +193 -133
- masster/study/helpers.py +293 -245
- masster/study/helpers_optimized.py +99 -57
- masster/study/load.py +51 -25
- masster/study/plot.py +453 -17
- masster/study/processing.py +197 -123
- masster/study/save.py +7 -7
- masster/study/study.py +97 -88
- masster/study/study5_schema.json +82 -82
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/METADATA +1 -1
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/RECORD +34 -32
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/WHEEL +0 -0
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/entry_points.txt +0 -0
- {masster-0.3.9.dist-info → masster-0.3.11.dist-info}/licenses/LICENSE +0 -0
masster/study/export.py
CHANGED
|
@@ -18,13 +18,13 @@ from masster._version import get_version
|
|
|
18
18
|
def _get_mgf_df(self, **kwargs):
|
|
19
19
|
"""
|
|
20
20
|
Generate MGF data as a Polars DataFrame.
|
|
21
|
-
|
|
21
|
+
|
|
22
22
|
This is the core data generation function used by export_mgf().
|
|
23
|
-
|
|
23
|
+
|
|
24
24
|
Parameters:
|
|
25
|
-
**kwargs: Keyword arguments for export parameters. Same as export_mgf()
|
|
25
|
+
**kwargs: Keyword arguments for export parameters. Same as export_mgf()
|
|
26
26
|
except return_data is not relevant here.
|
|
27
|
-
|
|
27
|
+
|
|
28
28
|
Returns:
|
|
29
29
|
pl.DataFrame: DataFrame with columns:
|
|
30
30
|
- mgf_index: MGF index
|
|
@@ -115,37 +115,37 @@ def _get_mgf_df(self, **kwargs):
|
|
|
115
115
|
"""Create a dictionary representing an ion for the DataFrame."""
|
|
116
116
|
if spect is None:
|
|
117
117
|
return None
|
|
118
|
-
|
|
118
|
+
|
|
119
119
|
# Prepare spectrum data
|
|
120
|
-
spectrum_mz = spect.mz.tolist() if hasattr(spect.mz,
|
|
121
|
-
spectrum_inty = spect.inty.tolist() if hasattr(spect.inty,
|
|
122
|
-
|
|
120
|
+
spectrum_mz = spect.mz.tolist() if hasattr(spect.mz, "tolist") else list(spect.mz)
|
|
121
|
+
spectrum_inty = spect.inty.tolist() if hasattr(spect.inty, "tolist") else list(spect.inty)
|
|
122
|
+
|
|
123
123
|
# Determine MS level
|
|
124
124
|
ms_level = spect.ms_level if spect.ms_level is not None else 1
|
|
125
|
-
|
|
125
|
+
|
|
126
126
|
# Get energy if available
|
|
127
|
-
energy = getattr(spect,
|
|
128
|
-
|
|
127
|
+
energy = getattr(spect, "energy", None)
|
|
128
|
+
|
|
129
129
|
# Determine spectrum type based on MS level
|
|
130
130
|
spec_type = f"MS{ms_level}" if ms_level > 1 else "MS1"
|
|
131
|
-
|
|
131
|
+
|
|
132
132
|
# Calculate spectrum length
|
|
133
133
|
spec_len = len(spectrum_mz)
|
|
134
|
-
|
|
134
|
+
|
|
135
135
|
return {
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
136
|
+
"mgf_index": mgf_id,
|
|
137
|
+
"title": title,
|
|
138
|
+
"feature_id": id,
|
|
139
|
+
"feature_uid": uid,
|
|
140
|
+
"charge": charge,
|
|
141
|
+
"pepmass": mz,
|
|
142
|
+
"rtinseconds": rt,
|
|
143
|
+
"mslevel": ms_level,
|
|
144
|
+
"type": spec_type,
|
|
145
|
+
"energy": energy,
|
|
146
|
+
"spec_len": spec_len,
|
|
147
|
+
"spec_mz": spectrum_mz,
|
|
148
|
+
"spec_int": spectrum_inty,
|
|
149
149
|
}
|
|
150
150
|
|
|
151
151
|
# Collect all ion data
|
|
@@ -153,7 +153,7 @@ def _get_mgf_df(self, **kwargs):
|
|
|
153
153
|
skip = 0
|
|
154
154
|
mgf_counter = 0
|
|
155
155
|
self.logger.info(f"Generating MGF data for {len(grouped)} consensus features...")
|
|
156
|
-
|
|
156
|
+
|
|
157
157
|
tdqm_disable = self.log_level not in ["TRACE", "DEBUG", "INFO"]
|
|
158
158
|
for _consensus_uid, cons_ms2 in tqdm(
|
|
159
159
|
grouped,
|
|
@@ -308,7 +308,7 @@ def _get_mgf_df(self, **kwargs):
|
|
|
308
308
|
# Convert to Polars DataFrame
|
|
309
309
|
if not ion_data:
|
|
310
310
|
return pl.DataFrame()
|
|
311
|
-
|
|
311
|
+
|
|
312
312
|
return pl.DataFrame(ion_data)
|
|
313
313
|
|
|
314
314
|
|
|
@@ -336,13 +336,13 @@ def export_mgf(self, **kwargs):
|
|
|
336
336
|
verbose (bool): Enable verbose logging (default: False).
|
|
337
337
|
precursor_trim (float): Precursor trimming value (default: -10).
|
|
338
338
|
centroid_algo (str): Centroiding algorithm (default: "lmp").
|
|
339
|
-
|
|
339
|
+
|
|
340
340
|
Returns:
|
|
341
341
|
None: Writes MGF file to disk.
|
|
342
342
|
"""
|
|
343
343
|
# Get mgf data as DataFrame
|
|
344
344
|
mgf_data = self._get_mgf_df(**kwargs)
|
|
345
|
-
|
|
345
|
+
|
|
346
346
|
if mgf_data is None or len(mgf_data) == 0:
|
|
347
347
|
self.logger.warning("No MGF data generated.")
|
|
348
348
|
return
|
|
@@ -355,9 +355,9 @@ def export_mgf(self, **kwargs):
|
|
|
355
355
|
else:
|
|
356
356
|
if hasattr(params, key):
|
|
357
357
|
params.set(key, value, validate=True)
|
|
358
|
-
|
|
358
|
+
|
|
359
359
|
filename = params.get("filename")
|
|
360
|
-
|
|
360
|
+
|
|
361
361
|
# Prepare output path
|
|
362
362
|
if not os.path.isabs(filename):
|
|
363
363
|
if self.folder is not None:
|
|
@@ -370,7 +370,7 @@ def export_mgf(self, **kwargs):
|
|
|
370
370
|
for row in mgf_data.iter_rows(named=True):
|
|
371
371
|
# Write BEGIN IONS
|
|
372
372
|
f.write("BEGIN IONS\n")
|
|
373
|
-
|
|
373
|
+
|
|
374
374
|
# Write metadata
|
|
375
375
|
if row["mgf_index"] is not None:
|
|
376
376
|
f.write(f"INDEX={row['mgf_index']}\n")
|
|
@@ -381,19 +381,19 @@ def export_mgf(self, **kwargs):
|
|
|
381
381
|
f.write(f"PEPMASS={row['pepmass']}\n")
|
|
382
382
|
f.write(f"RTINSECONDS={row['rtinseconds']}\n")
|
|
383
383
|
f.write(f"MSLEVEL={row['mslevel']}\n")
|
|
384
|
-
|
|
384
|
+
|
|
385
385
|
if row["energy"] is not None:
|
|
386
386
|
f.write(f"ENERGY={row['energy']}\n")
|
|
387
|
-
|
|
387
|
+
|
|
388
388
|
# Write spectrum data
|
|
389
389
|
spectrum_mz = row["spec_mz"]
|
|
390
390
|
spectrum_inty = row["spec_int"]
|
|
391
391
|
for mz_val, inty in zip(spectrum_mz, spectrum_inty, strict=False):
|
|
392
392
|
f.write(f"{mz_val:.5f} {inty:.0f}\n")
|
|
393
|
-
|
|
393
|
+
|
|
394
394
|
# Write END IONS
|
|
395
395
|
f.write("END IONS\n\n")
|
|
396
|
-
|
|
396
|
+
|
|
397
397
|
self.logger.info(f"Exported {len(mgf_data)} spectra to {filename}")
|
|
398
398
|
|
|
399
399
|
|
|
@@ -414,45 +414,47 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
414
414
|
filename = os.path.join(self.folder, filename)
|
|
415
415
|
else:
|
|
416
416
|
filename = os.path.join(os.getcwd(), filename)
|
|
417
|
-
|
|
417
|
+
|
|
418
418
|
# get mgf data
|
|
419
419
|
mgf_data = self._get_mgf_df(**kwargs)
|
|
420
420
|
# Create mapping from feature_uid to MGF indexes
|
|
421
421
|
mgf_mapping: dict[str, list[int]] = {}
|
|
422
422
|
if mgf_data is not None and len(mgf_data) > 0:
|
|
423
423
|
for row in mgf_data.iter_rows(named=True):
|
|
424
|
-
feature_uid = row[
|
|
425
|
-
mgf_index = row[
|
|
424
|
+
feature_uid = row["feature_uid"]
|
|
425
|
+
mgf_index = row["mgf_index"]
|
|
426
426
|
if feature_uid not in mgf_mapping:
|
|
427
427
|
mgf_mapping[feature_uid] = []
|
|
428
428
|
mgf_mapping[feature_uid].append(mgf_index)
|
|
429
|
-
|
|
429
|
+
|
|
430
430
|
# --- Prepare MTD (metadata) section ---
|
|
431
431
|
mtd_lines = []
|
|
432
432
|
mtd_lines.append(f"COM file generated by MASSter on {datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
|
|
433
433
|
mtd_lines.append("\nMTD\tmzTab-version\t2.2.0-M")
|
|
434
434
|
id = self.label if self.label else self.folder
|
|
435
435
|
mtd_lines.append(f"MTD\tmzTab-id\t{id}")
|
|
436
|
-
mtd_lines.append(
|
|
436
|
+
mtd_lines.append("")
|
|
437
437
|
mtd_lines.append("MTD\tcv[1]-label\tMS")
|
|
438
438
|
mtd_lines.append("MTD\tcv[1]-full_name\tPSI-MS controlled vocabulary")
|
|
439
439
|
mtd_lines.append("MTD\tcv[1]-version\t4.1.199")
|
|
440
440
|
mtd_lines.append("MTD\tcv[1]-uri\thttps://raw.githubusercontent.com/HUPO-PSI/psi-ms-CV/master/psi-ms.obo")
|
|
441
|
-
mtd_lines.append(
|
|
441
|
+
mtd_lines.append("")
|
|
442
442
|
mtd_lines.append("MTD\tsmall_molecule-quantification_unit\t[MS, MS:1001844, MS1 feature area, ]")
|
|
443
443
|
mtd_lines.append("MTD\tsmall_molecule_feature-quantification_unit\t[MS, MS:1001844, MS1 feature area, ]")
|
|
444
|
-
mtd_lines.append(
|
|
444
|
+
mtd_lines.append(
|
|
445
|
+
"MTD\tsmall_molecule-identification_reliability\t[MS, MS:1002955, hr-ms compound identification confidence level, ]"
|
|
446
|
+
)
|
|
445
447
|
mtd_lines.append("MTD\tid_confidence_measure[1]\t[MS, MS:1002888, small molecule confidence measure, ]")
|
|
446
|
-
mtd_lines.append(
|
|
448
|
+
mtd_lines.append("")
|
|
447
449
|
mtd_lines.append("MTD\tsoftware[1]\t[MS, MS:1003430, OpenMS, unknown]")
|
|
448
450
|
mtd_lines.append(f"MTD\tsoftware[2]\t[MS, MS:1002878, MASSter, {get_version()}]")
|
|
449
451
|
mtd_lines.append("MTD\tquantification_method\t[MS, MS:1001834, LC-MS label-free quantitation analysis, ]")
|
|
450
|
-
mtd_lines.append(
|
|
451
|
-
mtd_lines.append(
|
|
452
|
+
mtd_lines.append("")
|
|
453
|
+
mtd_lines.append('MTD\tdatabase[1]\t[, , "no database", null]')
|
|
452
454
|
mtd_lines.append("MTD\tdatabase[1]-prefix\tnull")
|
|
453
455
|
mtd_lines.append("MTD\tdatabase[1]-version\tUnknown")
|
|
454
456
|
mtd_lines.append("MTD\tdatabase[1]-uri\tnull")
|
|
455
|
-
#mtd_lines.append('')
|
|
457
|
+
# mtd_lines.append('')
|
|
456
458
|
for i, row in enumerate(self.samples_df.iter_rows(named=True), 1):
|
|
457
459
|
mtd_lines.append(f"\nMTD\tsample[{i}]\t{row.get('sample_uid', f'sample_{i}')}")
|
|
458
460
|
mtd_lines.append(f"MTD\tsample[{i}]-description\t{row.get('sample_name', 'unknown')}")
|
|
@@ -460,15 +462,15 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
460
462
|
mtd_lines.append(f"MTD\tassay[{i}]\tAssay_{i}")
|
|
461
463
|
mtd_lines.append(f"MTD\tassay[{i}]-sample_ref\tsample[{i}]")
|
|
462
464
|
mtd_lines.append(f"MTD\tassay[{i}]-ms_run_ref\tms_run[{i}]")
|
|
463
|
-
mtd_lines.append(
|
|
465
|
+
mtd_lines.append("")
|
|
464
466
|
mtd_lines.append("MTD\tstudy_variable[1]\tundefined")
|
|
465
467
|
mtd_lines.append("MTD\tstudy_variable[1]_refs\tundefined")
|
|
466
|
-
#assay_refs = '|'.join([f"assay[{i}]" for i in range(1, len(self.samples_df)+1)])
|
|
467
|
-
#mtd_lines.append(f"MTD\tstudy_variable[1]-assay_refs\t{assay_refs}")
|
|
468
|
-
#mtd_lines.append("MTD\tstudy_variable[1]-description\tAll assays grouped (default)")
|
|
469
|
-
with open(filename,
|
|
468
|
+
# assay_refs = '|'.join([f"assay[{i}]" for i in range(1, len(self.samples_df)+1)])
|
|
469
|
+
# mtd_lines.append(f"MTD\tstudy_variable[1]-assay_refs\t{assay_refs}")
|
|
470
|
+
# mtd_lines.append("MTD\tstudy_variable[1]-description\tAll assays grouped (default)")
|
|
471
|
+
with open(filename, "w", encoding="utf-8") as f:
|
|
470
472
|
for line in mtd_lines:
|
|
471
|
-
f.write(line +
|
|
473
|
+
f.write(line + "\n")
|
|
472
474
|
|
|
473
475
|
# --- SML (Small Molecule) table ---
|
|
474
476
|
sml_lines = []
|
|
@@ -487,43 +489,54 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
487
489
|
"reliability",
|
|
488
490
|
"best_id_confidence_measure",
|
|
489
491
|
"best_id_confidence_value",
|
|
490
|
-
"opt_global_mgf_index",
|
|
492
|
+
"opt_global_mgf_index",
|
|
491
493
|
]
|
|
492
|
-
|
|
494
|
+
|
|
493
495
|
abundance_matrix = self.get_consensus_matrix()
|
|
494
496
|
# Use the matrix as-is since it already has the correct sample columns
|
|
495
497
|
# The matrix columns are sample names, which is what we want for the assay columns
|
|
496
|
-
|
|
498
|
+
|
|
497
499
|
# round to int
|
|
498
500
|
abundance_matrix = abundance_matrix.round(0)
|
|
499
501
|
|
|
500
502
|
# Use actual number of samples from the abundance matrix
|
|
501
503
|
n_assays = len(abundance_matrix.columns)
|
|
502
|
-
sml_header += [f"abundance_assay[{i}]" for i in range(1, n_assays+1)]
|
|
504
|
+
sml_header += [f"abundance_assay[{i}]" for i in range(1, n_assays + 1)]
|
|
503
505
|
sml_header += ["abundance_study_variable[1]", "abundance_variation_study_variable[1]"]
|
|
504
|
-
sml_lines.append(
|
|
506
|
+
sml_lines.append("\t".join(sml_header))
|
|
505
507
|
|
|
506
508
|
# get adducts from consensus_df['adducts']. If value is None or [], use 'null'. If there is, take the first element and the first string
|
|
507
509
|
adduct_list = []
|
|
508
|
-
mapping = {
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
|
|
515
|
-
|
|
516
|
-
|
|
517
|
-
|
|
510
|
+
mapping = {
|
|
511
|
+
"H1": "[M+H]+",
|
|
512
|
+
"H2": "[M+2H]2+",
|
|
513
|
+
"Na1": "[M+Na]+",
|
|
514
|
+
"Na2": "[M+2Na]2+",
|
|
515
|
+
"NH4": "[M+NH4]+",
|
|
516
|
+
"HCOO": "[M+HCOO]-",
|
|
517
|
+
"CH3COO": "[M+CH3COO]-",
|
|
518
|
+
"H2O": "[M+H2O]+",
|
|
519
|
+
"HCO2": "[M+HCO2]-",
|
|
520
|
+
"H3PO4": "[M+H3PO4]+",
|
|
521
|
+
"H3O1": "[M+H3O]+",
|
|
522
|
+
"K1": "[M+K]+",
|
|
523
|
+
"H4N1": "[M+NH4]+",
|
|
524
|
+
"H-1": "[M-H]-",
|
|
525
|
+
"Cl1": "[M+Cl]-",
|
|
526
|
+
"Br1": "[M+Br]-",
|
|
527
|
+
"I1": "[M+I]-",
|
|
528
|
+
"H2O2": "[M+H2O2]+",
|
|
529
|
+
"H3O2": "[M+H3O2]+",
|
|
530
|
+
}
|
|
518
531
|
for idx, row in enumerate(self.consensus_df.iter_rows(named=True), 1):
|
|
519
|
-
adduct =
|
|
520
|
-
if
|
|
521
|
-
row_adducts = row[
|
|
532
|
+
adduct = "null"
|
|
533
|
+
if "adducts" in row:
|
|
534
|
+
row_adducts = row["adducts"]
|
|
522
535
|
if isinstance(row_adducts, list) and row_adducts:
|
|
523
536
|
# Each adduct is a dictionary with 'adduct' key
|
|
524
537
|
first_adduct_dict = row_adducts[0]
|
|
525
|
-
if isinstance(first_adduct_dict, dict) and
|
|
526
|
-
adduct_str = first_adduct_dict[
|
|
538
|
+
if isinstance(first_adduct_dict, dict) and "adduct" in first_adduct_dict:
|
|
539
|
+
adduct_str = first_adduct_dict["adduct"]
|
|
527
540
|
if adduct_str in mapping:
|
|
528
541
|
adduct = mapping[adduct_str]
|
|
529
542
|
else:
|
|
@@ -533,46 +546,46 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
533
546
|
|
|
534
547
|
for idx, row in enumerate(self.consensus_df.iter_rows(named=True), 1):
|
|
535
548
|
# Get MGF indexes for this consensus feature
|
|
536
|
-
mgf_indexes = mgf_mapping.get(row[
|
|
537
|
-
|
|
549
|
+
mgf_indexes = mgf_mapping.get(row["consensus_uid"], [])
|
|
550
|
+
|
|
538
551
|
sml_row = [
|
|
539
552
|
"SML",
|
|
540
553
|
str(idx),
|
|
541
554
|
str(idx),
|
|
542
|
-
str(row.get(
|
|
543
|
-
str(row.get(
|
|
544
|
-
str(row.get(
|
|
545
|
-
str(row.get(
|
|
546
|
-
str(row.get(
|
|
547
|
-
str(row.get(
|
|
548
|
-
str(row.get(
|
|
549
|
-
adduct_list[idx-1],
|
|
550
|
-
str(row.get(
|
|
551
|
-
str(row.get(
|
|
552
|
-
str(row.get(
|
|
553
|
-
|
|
555
|
+
str(row.get("database_identifier", "null")),
|
|
556
|
+
str(row.get("chemical_formula", "null")),
|
|
557
|
+
str(row.get("smiles", "null")),
|
|
558
|
+
str(row.get("inchi", "null")),
|
|
559
|
+
str(row.get("chemical_name", "null")),
|
|
560
|
+
str(row.get("uri", "null")),
|
|
561
|
+
str(row.get("theoretical_neutral_mass", "null")),
|
|
562
|
+
adduct_list[idx - 1],
|
|
563
|
+
str(row.get("reliability", "null")),
|
|
564
|
+
str(row.get("best_id_confidence_measure", "null")),
|
|
565
|
+
str(row.get("best_id_confidence_value", "null")),
|
|
566
|
+
",".join(map(str, mgf_indexes)) if mgf_indexes else "null",
|
|
554
567
|
]
|
|
555
568
|
# Add abundance values for each assay
|
|
556
|
-
consensus_uid = row[
|
|
569
|
+
consensus_uid = row["consensus_uid"]
|
|
557
570
|
if consensus_uid in abundance_matrix.index:
|
|
558
571
|
abundance_values = abundance_matrix.loc[consensus_uid].tolist()
|
|
559
|
-
sml_row += [str(val) if pd.notna(val) else
|
|
572
|
+
sml_row += [str(val) if pd.notna(val) else "null" for val in abundance_values]
|
|
560
573
|
else:
|
|
561
|
-
sml_row += [
|
|
562
|
-
sml_row += [
|
|
563
|
-
sml_lines.append(
|
|
564
|
-
with open(filename,
|
|
565
|
-
f.write(
|
|
574
|
+
sml_row += ["null"] * n_assays
|
|
575
|
+
sml_row += ["null", "null"]
|
|
576
|
+
sml_lines.append("\t".join(sml_row))
|
|
577
|
+
with open(filename, "a", encoding="utf-8") as f:
|
|
578
|
+
f.write("\n")
|
|
566
579
|
for line in sml_lines:
|
|
567
|
-
f.write(line +
|
|
580
|
+
f.write(line + "\n")
|
|
568
581
|
|
|
569
582
|
# --- SMF (Small Molecule Feature) table ---
|
|
570
583
|
smf_lines = []
|
|
571
584
|
smf_header = [
|
|
572
585
|
"SFH",
|
|
573
586
|
"SMF_ID",
|
|
574
|
-
"
|
|
575
|
-
"
|
|
587
|
+
"SOME_ID_REFS",
|
|
588
|
+
"SOME_ID_REF_ambiguity_code",
|
|
576
589
|
"adduct_ion",
|
|
577
590
|
"isotopomer",
|
|
578
591
|
"exp_mass_to_charge",
|
|
@@ -581,9 +594,9 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
581
594
|
"retention_time_in_seconds_start",
|
|
582
595
|
"retention_time_in_seconds_end",
|
|
583
596
|
]
|
|
584
|
-
smf_header += [f"abundance_assay[{i}]" for i in range(1, n_assays+1)]
|
|
585
|
-
smf_lines.append(
|
|
586
|
-
|
|
597
|
+
smf_header += [f"abundance_assay[{i}]" for i in range(1, n_assays + 1)]
|
|
598
|
+
smf_lines.append("\t".join(smf_header))
|
|
599
|
+
|
|
587
600
|
# SMF table uses the same consensus features as SML, just different metadata
|
|
588
601
|
for idx, row in enumerate(self.consensus_df.iter_rows(named=True), 1):
|
|
589
602
|
smf_row = [
|
|
@@ -591,26 +604,26 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
591
604
|
str(idx),
|
|
592
605
|
"null",
|
|
593
606
|
"null",
|
|
594
|
-
adduct_list[idx-1], # adduct_ion
|
|
595
|
-
str(row.get(
|
|
596
|
-
str(row.get(
|
|
597
|
-
str(row.get(
|
|
598
|
-
str(row.get(
|
|
599
|
-
str(row.get(
|
|
600
|
-
str(row.get(
|
|
607
|
+
adduct_list[idx - 1], # adduct_ion
|
|
608
|
+
str(row.get("isotopomer", "null")),
|
|
609
|
+
str(row.get("mz", "null")), # exp_mass_to_charge
|
|
610
|
+
str(row.get("charge", "null")),
|
|
611
|
+
str(row.get("rt", "null")), # retention_time_in_seconds
|
|
612
|
+
str(row.get("retention_time_in_seconds_start", "null")),
|
|
613
|
+
str(row.get("retention_time_in_seconds_end", "null")),
|
|
601
614
|
]
|
|
602
615
|
# Add abundance values for each assay - same as SML
|
|
603
|
-
consensus_uid = row[
|
|
616
|
+
consensus_uid = row["consensus_uid"]
|
|
604
617
|
if consensus_uid in abundance_matrix.index:
|
|
605
618
|
abundance_values = abundance_matrix.loc[consensus_uid].tolist()
|
|
606
|
-
smf_row += [str(val) if pd.notna(val) else
|
|
619
|
+
smf_row += [str(val) if pd.notna(val) else "null" for val in abundance_values]
|
|
607
620
|
else:
|
|
608
|
-
smf_row += [
|
|
609
|
-
smf_lines.append(
|
|
610
|
-
with open(filename,
|
|
611
|
-
f.write(
|
|
621
|
+
smf_row += ["null"] * n_assays
|
|
622
|
+
smf_lines.append("\t".join(smf_row))
|
|
623
|
+
with open(filename, "a", encoding="utf-8") as f:
|
|
624
|
+
f.write("\n")
|
|
612
625
|
for line in smf_lines:
|
|
613
|
-
f.write(line +
|
|
626
|
+
f.write(line + "\n")
|
|
614
627
|
|
|
615
628
|
# --- MGF table ---
|
|
616
629
|
if include_mgf and mgf_data is not None and len(mgf_data) > 0:
|
|
@@ -618,9 +631,9 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
618
631
|
# Header
|
|
619
632
|
mgf_header = [
|
|
620
633
|
"COM",
|
|
621
|
-
"MGH",
|
|
634
|
+
"MGH",
|
|
622
635
|
"mgf_id",
|
|
623
|
-
"prec_id",
|
|
636
|
+
"prec_id",
|
|
624
637
|
"prec_rt",
|
|
625
638
|
"prec_mz",
|
|
626
639
|
"prec_int",
|
|
@@ -630,10 +643,10 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
630
643
|
"spec_tic",
|
|
631
644
|
"spec_len",
|
|
632
645
|
"spec_mz",
|
|
633
|
-
"spec_int"
|
|
646
|
+
"spec_int",
|
|
634
647
|
]
|
|
635
|
-
mgf_lines.append(
|
|
636
|
-
|
|
648
|
+
mgf_lines.append("\t".join(mgf_header))
|
|
649
|
+
|
|
637
650
|
# Data rows
|
|
638
651
|
for row in mgf_data.iter_rows(named=True):
|
|
639
652
|
# Calculate spectrum TIC (total ion current) from the spectrum data
|
|
@@ -641,11 +654,11 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
641
654
|
spectrum_inty = row["spec_int"]
|
|
642
655
|
spec_tic = sum(spectrum_inty) if spectrum_inty else 0
|
|
643
656
|
spec_len = row["spec_len"] if row["spec_len"] is not None else 0
|
|
644
|
-
|
|
657
|
+
|
|
645
658
|
# Format spectrum data as pipe-separated strings
|
|
646
|
-
spec_mz_str =
|
|
647
|
-
spec_int_str =
|
|
648
|
-
|
|
659
|
+
spec_mz_str = "|".join([f"{mz:.4f}" for mz in spectrum_mz]) if spectrum_mz else ""
|
|
660
|
+
spec_int_str = "|".join([f"{int(inty)}" for inty in spectrum_inty]) if spectrum_inty else ""
|
|
661
|
+
|
|
649
662
|
mgf_row = [
|
|
650
663
|
"COM",
|
|
651
664
|
"MGF",
|
|
@@ -660,15 +673,15 @@ def export_mztab(self, filename: str = None, include_mgf=True, **kwargs) -> None
|
|
|
660
673
|
f"{int(spec_tic)}" if spec_tic > 0 else "null",
|
|
661
674
|
str(spec_len) if spec_len > 0 else "null",
|
|
662
675
|
spec_mz_str if spec_mz_str else "null",
|
|
663
|
-
spec_int_str if spec_int_str else "null"
|
|
676
|
+
spec_int_str if spec_int_str else "null",
|
|
664
677
|
]
|
|
665
|
-
mgf_lines.append(
|
|
666
|
-
|
|
678
|
+
mgf_lines.append("\t".join(mgf_row))
|
|
679
|
+
|
|
667
680
|
# Write MGF table
|
|
668
|
-
with open(filename,
|
|
669
|
-
f.write(
|
|
681
|
+
with open(filename, "a", encoding="utf-8") as f:
|
|
682
|
+
f.write("\n")
|
|
670
683
|
for line in mgf_lines:
|
|
671
|
-
f.write(line +
|
|
684
|
+
f.write(line + "\n")
|
|
672
685
|
|
|
673
|
-
if include_mgf:
|
|
686
|
+
if include_mgf:
|
|
674
687
|
self.logger.info(f"Exported mzTab-M to {filename}")
|