masster 0.5.0__py3-none-any.whl → 0.5.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/data/libs/aa.csv +22 -0
- masster/lib/lib.py +6 -0
- masster/study/defaults/fill_def.py +1 -1
- masster/study/h5.py +3 -0
- masster/study/id.py +542 -1
- masster/study/load.py +16 -7
- masster/study/merge.py +97 -2
- masster/study/processing.py +2 -10
- masster/study/study.py +82 -0
- masster/study/study5_schema.json +9 -0
- {masster-0.5.0.dist-info → masster-0.5.1.dist-info}/METADATA +1 -1
- {masster-0.5.0.dist-info → masster-0.5.1.dist-info}/RECORD +16 -15
- {masster-0.5.0.dist-info → masster-0.5.1.dist-info}/WHEEL +0 -0
- {masster-0.5.0.dist-info → masster-0.5.1.dist-info}/entry_points.txt +0 -0
- {masster-0.5.0.dist-info → masster-0.5.1.dist-info}/licenses/LICENSE +0 -0
masster/_version.py
CHANGED
masster/data/libs/aa.csv
ADDED
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
name,smiles,inchikey,formula,db_id,db
|
|
2
|
+
L-Glutamic acid,N[C@@H](CCC(O)=O)C(O)=O,WHUUTDBJXJRKMK-VKHMYHEASA-N,C5H9NO4,CID:33032,pubchem
|
|
3
|
+
L-Tyrosine,N[C@@H](CC1=CC=C(O)C=C1)C(O)=O,OUYCCCASQSFEME-QMMMGPOBSA-N,C9H11NO3,CID:6057,pubchem
|
|
4
|
+
L-Phenylalanine,N[C@@H](CC1=CC=CC=C1)C(O)=O,COLNVLDHVKWLRT-QMMMGPOBSA-N,C9H11NO2,CID:6140,pubchem
|
|
5
|
+
L-Alanine,C[C@H](N)C(O)=O,QNAYBMKLOCPYGJ-REOHCLBHSA-N,C3H7NO2,CID:5950,pubchem
|
|
6
|
+
L-Proline,OC(=O)[C@@H]1CCCN1,ONIBWKKTOPOVIA-BYPYZUCNSA-N,C5H9NO2,CID:145742,pubchem
|
|
7
|
+
L-Threonine,C[C@@H](O)[C@H](N)C(O)=O,AYFVYJQAPQTCCC-GBXIJSLDSA-N,C4H9NO3,CID:6288,pubchem
|
|
8
|
+
L-Asparagine,N[C@@H](CC(N)=O)C(O)=O,DCXYFEDJOCDNAF-REOHCLBHSA-N,C4H8N2O3,CID:6267,pubchem
|
|
9
|
+
L-Isoleucine,CC[C@H](C)[C@H](N)C(O)=O,AGPKZVBTJJNPAG-WHFBIAKZSA-N,C6H13NO2,CID:6306,pubchem
|
|
10
|
+
L-Histidine,N[C@@H](CC1=CN=CN1)C(O)=O,HNDVDQJCIGZPNO-YFKPBYRVSA-N,C6H9N3O2,CID:6274,pubchem
|
|
11
|
+
L-Lysine,NCCCC[C@H](N)C(O)=O,KDXKERNSBIXSRK-YFKPBYRVSA-N,C6H14N2O2,CID:5962,pubchem
|
|
12
|
+
L-Serine,N[C@@H](CO)C(O)=O,MTCFGRXMJLQNBG-REOHCLBHSA-N,C3H7NO3,CID:5951,pubchem
|
|
13
|
+
L-Aspartic acid,N[C@@H](CC(O)=O)C(O)=O,CKLJMWTZIZZHCS-REOHCLBHSA-N,C4H7NO4,CID:5960,pubchem
|
|
14
|
+
L-Cystine,N[C@@H](CSSC[C@H](N)C(O)=O)C(O)=O,LEVWYRKDKASIDU-IMJSIDKUSA-N,C6H12N2O4S2,CID:67678,pubchem
|
|
15
|
+
L-Arginine,N[C@@H](CCCNC(N)=N)C(O)=O,ODKSFYDXXFIFQN-BYPYZUCNSA-N,C6H14N4O2,CID:6322,pubchem
|
|
16
|
+
L-Cysteine,N[C@@H](CS)C(O)=O,XUJNEKJLAYXESH-REOHCLBHSA-N,C3H7NO2S,CID:5862,pubchem
|
|
17
|
+
L-Glutamine,N[C@@H](CCC(N)=O)C(O)=O,ZDXPYRJPNDTMRX-VKHMYHEASA-N,C5H10N2O3,CID:5961,pubchem
|
|
18
|
+
L-Leucine,CC(C)C[C@H](N)C(O)=O,ROHFNLRQFUQHCH-YFKPBYRVSA-N,C6H13NO2,CID:6106,pubchem
|
|
19
|
+
L-Methionine,CSCC[C@H](N)C(O)=O,FFEARJCKVFRZRR-BYPYZUCNSA-N,C5H11NO2S,CID:6137,pubchem
|
|
20
|
+
L-Valine,CC(C)[C@H](N)C(O)=O,KZSNJWFQEVHDMF-BYPYZUCNSA-N,C5H11NO2,CID:6287,pubchem
|
|
21
|
+
L-Tryptophan,N[C@@H](CC1=CNC2=C1C=CC=C2)C(O)=O,QIVBCDIJIAJPQS-VIFPVBQESA-N,C11H12N2O2,CID:6305,pubchem
|
|
22
|
+
Glycine,NCC(O)=O,QNAYBMKLOCPYGJ-UHFFFAOYSA-N,C2H5NO2,CID:750,Glycine
|
masster/lib/lib.py
CHANGED
|
@@ -123,11 +123,13 @@ class Lib:
|
|
|
123
123
|
"inchi": pl.Series([], dtype=pl.Utf8),
|
|
124
124
|
"inchikey": pl.Series([], dtype=pl.Utf8),
|
|
125
125
|
"formula": pl.Series([], dtype=pl.Utf8),
|
|
126
|
+
"iso": pl.Series([], dtype=pl.Int64),
|
|
126
127
|
"adduct": pl.Series([], dtype=pl.Utf8),
|
|
127
128
|
"m": pl.Series([], dtype=pl.Float64),
|
|
128
129
|
"z": pl.Series([], dtype=pl.Int8),
|
|
129
130
|
"mz": pl.Series([], dtype=pl.Float64),
|
|
130
131
|
"rt": pl.Series([], dtype=pl.Float64),
|
|
132
|
+
"quant_group": pl.Series([], dtype=pl.Int64),
|
|
131
133
|
"db_id": pl.Series([], dtype=pl.Utf8),
|
|
132
134
|
"db": pl.Series([], dtype=pl.Utf8),
|
|
133
135
|
})
|
|
@@ -245,11 +247,13 @@ class Lib:
|
|
|
245
247
|
"inchi": compound_data.get("inchi", ""),
|
|
246
248
|
"inchikey": compound_data.get("inchikey", ""),
|
|
247
249
|
"formula": compound_data["formula"],
|
|
250
|
+
"iso": 0, # Default to zero
|
|
248
251
|
"adduct": adduct,
|
|
249
252
|
"m": adducted_mass,
|
|
250
253
|
"z": charge,
|
|
251
254
|
"mz": mz,
|
|
252
255
|
"rt": compound_data.get("rt", None),
|
|
256
|
+
"quant_group": counter, # Use same as lib_uid for default
|
|
253
257
|
"db_id": compound_data.get("db_id", None),
|
|
254
258
|
"db": compound_data.get("db", None),
|
|
255
259
|
}
|
|
@@ -526,12 +530,14 @@ class Lib:
|
|
|
526
530
|
"source_id": match_row.get("source_id"),
|
|
527
531
|
"name": match_row["name"],
|
|
528
532
|
"formula": match_row["formula"],
|
|
533
|
+
"iso": match_row.get("iso", 0),
|
|
529
534
|
"adduct": match_row["adduct"],
|
|
530
535
|
"smiles": match_row["smiles"],
|
|
531
536
|
"inchi": match_row["inchi"],
|
|
532
537
|
"inchikey": match_row["inchikey"],
|
|
533
538
|
"lib_mz": match_row["mz"],
|
|
534
539
|
"lib_rt": match_row["rt"],
|
|
540
|
+
"quant_group": match_row.get("quant_group"),
|
|
535
541
|
"delta_mz": abs(feature_mz - match_row["mz"]),
|
|
536
542
|
"delta_rt": abs(feature_rt - match_row["rt"]) if feature_rt is not None and match_row["rt"] is not None else None,
|
|
537
543
|
}
|
masster/study/h5.py
CHANGED
|
@@ -2007,6 +2007,9 @@ def _load_study5(self, filename=None):
|
|
|
2007
2007
|
f"Successfully migrated {sample_count} samples to indexed map_id format (0 to {sample_count - 1})",
|
|
2008
2008
|
)
|
|
2009
2009
|
|
|
2010
|
+
# Sanitize null feature_id and consensus_id values with new UIDs (same method as merge)
|
|
2011
|
+
self._sanitize_null_ids()
|
|
2012
|
+
|
|
2010
2013
|
self.logger.debug("Study loaded")
|
|
2011
2014
|
|
|
2012
2015
|
|
masster/study/id.py
CHANGED
|
@@ -15,6 +15,7 @@ def lib_load(
|
|
|
15
15
|
lib_source,
|
|
16
16
|
polarity: str | None = None,
|
|
17
17
|
adducts: list | None = None,
|
|
18
|
+
iso: str | None = None,
|
|
18
19
|
):
|
|
19
20
|
"""Load a compound library into the study.
|
|
20
21
|
|
|
@@ -23,6 +24,7 @@ def lib_load(
|
|
|
23
24
|
lib_source: either a CSV file path (str) or a Lib instance
|
|
24
25
|
polarity: ionization polarity ("positive" or "negative") - used when lib_source is a CSV path
|
|
25
26
|
adducts: specific adducts to generate - used when lib_source is a CSV path
|
|
27
|
+
iso: isotope generation mode ("13C" to generate 13C isotopes, None for no isotopes)
|
|
26
28
|
|
|
27
29
|
Side effects:
|
|
28
30
|
sets study.lib_df to a Polars DataFrame and stores the lib object on
|
|
@@ -97,6 +99,56 @@ def lib_load(
|
|
|
97
99
|
# Store pointer and DataFrame on study
|
|
98
100
|
study._lib = lib_obj
|
|
99
101
|
|
|
102
|
+
# Add source_id column with filename (without path) if loading from CSV
|
|
103
|
+
if isinstance(lib_source, str):
|
|
104
|
+
import os
|
|
105
|
+
filename_only = os.path.basename(lib_source)
|
|
106
|
+
filtered_lf = filtered_lf.with_columns(pl.lit(filename_only).alias("source_id"))
|
|
107
|
+
|
|
108
|
+
# Ensure required columns exist and set correct values
|
|
109
|
+
required_columns = {
|
|
110
|
+
"quant_group": pl.Int64,
|
|
111
|
+
"iso": pl.Int64
|
|
112
|
+
}
|
|
113
|
+
|
|
114
|
+
for col_name, col_dtype in required_columns.items():
|
|
115
|
+
if col_name == "quant_group":
|
|
116
|
+
# Set quant_group using cmpd_uid (same for isotopomers of same compound)
|
|
117
|
+
if "cmpd_uid" in filtered_lf.columns:
|
|
118
|
+
filtered_lf = filtered_lf.with_columns(pl.col("cmpd_uid").cast(col_dtype).alias("quant_group"))
|
|
119
|
+
else:
|
|
120
|
+
# Fallback to lib_uid if cmpd_uid doesn't exist
|
|
121
|
+
filtered_lf = filtered_lf.with_columns(pl.col("lib_uid").cast(col_dtype).alias("quant_group"))
|
|
122
|
+
elif col_name == "iso":
|
|
123
|
+
if col_name not in filtered_lf.columns:
|
|
124
|
+
# Default to zero for iso
|
|
125
|
+
filtered_lf = filtered_lf.with_columns(pl.lit(0).cast(col_dtype).alias(col_name))
|
|
126
|
+
|
|
127
|
+
# Generate 13C isotopes if requested
|
|
128
|
+
original_count = len(filtered_lf)
|
|
129
|
+
if iso == '13C':
|
|
130
|
+
filtered_lf = _generate_13c_isotopes(filtered_lf)
|
|
131
|
+
# Update the log message to show the correct count after isotope generation
|
|
132
|
+
if isinstance(lib_source, str):
|
|
133
|
+
import os
|
|
134
|
+
filename_only = os.path.basename(lib_source)
|
|
135
|
+
print(f"Generated 13C isotopes: {len(filtered_lf)} total entries ({original_count} original + {len(filtered_lf) - original_count} isotopes) from {filename_only}")
|
|
136
|
+
|
|
137
|
+
# Reorder columns to place quant_group after rt and iso after formula
|
|
138
|
+
column_order = []
|
|
139
|
+
columns_list = list(filtered_lf.columns)
|
|
140
|
+
|
|
141
|
+
for col in columns_list:
|
|
142
|
+
if col not in column_order: # Only add if not already added
|
|
143
|
+
column_order.append(col)
|
|
144
|
+
if col == "rt" and "quant_group" in columns_list and "quant_group" not in column_order:
|
|
145
|
+
column_order.append("quant_group")
|
|
146
|
+
elif col == "formula" and "iso" in columns_list and "iso" not in column_order:
|
|
147
|
+
column_order.append("iso")
|
|
148
|
+
|
|
149
|
+
# Apply the column ordering
|
|
150
|
+
filtered_lf = filtered_lf.select(column_order)
|
|
151
|
+
|
|
100
152
|
# Add to existing lib_df instead of replacing
|
|
101
153
|
if (
|
|
102
154
|
hasattr(study, "lib_df")
|
|
@@ -127,7 +179,7 @@ def lib_load(
|
|
|
127
179
|
if hasattr(study, "update_history"):
|
|
128
180
|
study.update_history(
|
|
129
181
|
["lib_load"],
|
|
130
|
-
{"lib_source": str(lib_source), "polarity": polarity, "adducts": adducts},
|
|
182
|
+
{"lib_source": str(lib_source), "polarity": polarity, "adducts": adducts, "iso": iso},
|
|
131
183
|
)
|
|
132
184
|
|
|
133
185
|
|
|
@@ -349,6 +401,7 @@ def _update_identification_results(study, results, logger):
|
|
|
349
401
|
"rt_delta": match["rt_delta"],
|
|
350
402
|
"matcher": match["matcher"],
|
|
351
403
|
"score": match["score"],
|
|
404
|
+
"iso": 0, # Default to zero
|
|
352
405
|
})
|
|
353
406
|
|
|
354
407
|
# Convert to DataFrame and append to existing results
|
|
@@ -356,6 +409,13 @@ def _update_identification_results(study, results, logger):
|
|
|
356
409
|
|
|
357
410
|
if not new_results_df.is_empty():
|
|
358
411
|
if hasattr(study, "id_df") and study.id_df is not None and not study.id_df.is_empty():
|
|
412
|
+
# Check if existing id_df has the iso column
|
|
413
|
+
if "iso" not in study.id_df.columns:
|
|
414
|
+
# Add iso column to existing id_df with default value 0
|
|
415
|
+
study.id_df = study.id_df.with_columns(pl.lit(0).alias("iso"))
|
|
416
|
+
if logger:
|
|
417
|
+
logger.debug("Added 'iso' column to existing id_df for schema compatibility")
|
|
418
|
+
|
|
359
419
|
study.id_df = pl.concat([study.id_df, new_results_df])
|
|
360
420
|
else:
|
|
361
421
|
study.id_df = new_results_df
|
|
@@ -1043,8 +1103,10 @@ def lib_reset(study):
|
|
|
1043
1103
|
- study.id_df (identification results DataFrame)
|
|
1044
1104
|
- study.lib_df (library DataFrame)
|
|
1045
1105
|
- study._lib (library object reference)
|
|
1106
|
+
- Consensus features created by lib_to_consensus() (number_samples = -1 or 0)
|
|
1046
1107
|
- 'identify' from study.history
|
|
1047
1108
|
- 'lib_load' from study.history (if exists)
|
|
1109
|
+
- 'lib_to_consensus' from study.history (if exists)
|
|
1048
1110
|
- Resets id_top_* columns in consensus_df to null
|
|
1049
1111
|
|
|
1050
1112
|
Args:
|
|
@@ -1053,6 +1115,36 @@ def lib_reset(study):
|
|
|
1053
1115
|
# Get logger from study if available
|
|
1054
1116
|
logger = getattr(study, "logger", None)
|
|
1055
1117
|
|
|
1118
|
+
# Remove consensus features created by lib_to_consensus()
|
|
1119
|
+
# These are identified by number_samples = -1 or 0
|
|
1120
|
+
if hasattr(study, "consensus_df") and not study.consensus_df.is_empty():
|
|
1121
|
+
if logger:
|
|
1122
|
+
logger.debug("Checking for consensus features created by lib_to_consensus()")
|
|
1123
|
+
|
|
1124
|
+
try:
|
|
1125
|
+
# Filter for features with number_samples = -1 or 0
|
|
1126
|
+
# Since consensus_select doesn't support list of discrete values, use direct filtering
|
|
1127
|
+
lib_consensus_features = study.consensus_df.filter(
|
|
1128
|
+
(pl.col("number_samples") == -1) | (pl.col("number_samples") == 0)
|
|
1129
|
+
)
|
|
1130
|
+
|
|
1131
|
+
if lib_consensus_features is not None and not lib_consensus_features.is_empty():
|
|
1132
|
+
num_lib_features = len(lib_consensus_features)
|
|
1133
|
+
if logger:
|
|
1134
|
+
logger.info(f"Removing {num_lib_features} consensus features created by lib_to_consensus()")
|
|
1135
|
+
|
|
1136
|
+
# Use consensus_delete to remove these features and all dependent data
|
|
1137
|
+
study.consensus_delete(lib_consensus_features)
|
|
1138
|
+
|
|
1139
|
+
if logger:
|
|
1140
|
+
logger.debug("Successfully removed library-derived consensus features")
|
|
1141
|
+
else:
|
|
1142
|
+
if logger:
|
|
1143
|
+
logger.debug("No library-derived consensus features found to remove")
|
|
1144
|
+
except Exception as e:
|
|
1145
|
+
if logger:
|
|
1146
|
+
logger.warning(f"Error removing library-derived consensus features: {e}")
|
|
1147
|
+
|
|
1056
1148
|
# Remove id_df
|
|
1057
1149
|
if hasattr(study, "id_df"):
|
|
1058
1150
|
if logger:
|
|
@@ -1099,6 +1191,11 @@ def lib_reset(study):
|
|
|
1099
1191
|
if logger:
|
|
1100
1192
|
logger.debug("Removing 'lib_load' from history")
|
|
1101
1193
|
del study.history["lib_load"]
|
|
1194
|
+
|
|
1195
|
+
if "lib_to_consensus" in study.history:
|
|
1196
|
+
if logger:
|
|
1197
|
+
logger.debug("Removing 'lib_to_consensus' from history")
|
|
1198
|
+
del study.history["lib_to_consensus"]
|
|
1102
1199
|
|
|
1103
1200
|
if logger:
|
|
1104
1201
|
logger.info("Library and identification data reset completed")
|
|
@@ -1438,3 +1535,447 @@ def _format_adduct_name(components: list[dict]) -> str:
|
|
|
1438
1535
|
)
|
|
1439
1536
|
|
|
1440
1537
|
return f"[M{formula}]{charge_str}"
|
|
1538
|
+
|
|
1539
|
+
|
|
1540
|
+
def _generate_13c_isotopes(lib_df):
|
|
1541
|
+
"""
|
|
1542
|
+
Generate 13C isotope variants for library entries.
|
|
1543
|
+
|
|
1544
|
+
For each compound with n carbon atoms, creates n+1 entries:
|
|
1545
|
+
- iso=0: original compound (no 13C)
|
|
1546
|
+
- iso=1: one 13C isotope (+1.00335 Da)
|
|
1547
|
+
- iso=2: two 13C isotopes (+2.00670 Da)
|
|
1548
|
+
- ...
|
|
1549
|
+
- iso=n: n 13C isotopes (+n*1.00335 Da)
|
|
1550
|
+
|
|
1551
|
+
All isotopomers share the same quant_group.
|
|
1552
|
+
|
|
1553
|
+
Args:
|
|
1554
|
+
lib_df: Polars DataFrame with library entries
|
|
1555
|
+
|
|
1556
|
+
Returns:
|
|
1557
|
+
Polars DataFrame with additional 13C isotope entries
|
|
1558
|
+
"""
|
|
1559
|
+
if lib_df.is_empty():
|
|
1560
|
+
return lib_df
|
|
1561
|
+
|
|
1562
|
+
# First, ensure all original entries have iso=0
|
|
1563
|
+
original_df = lib_df.with_columns(pl.lit(0).alias("iso"))
|
|
1564
|
+
|
|
1565
|
+
isotope_entries = []
|
|
1566
|
+
next_lib_uid = lib_df["lib_uid"].max() + 1 if len(lib_df) > 0 else 1
|
|
1567
|
+
|
|
1568
|
+
# Mass difference for one 13C isotope
|
|
1569
|
+
c13_mass_shift = 1.00335 # Mass difference between 13C and 12C
|
|
1570
|
+
|
|
1571
|
+
for row in original_df.iter_rows(named=True):
|
|
1572
|
+
formula = row.get("formula", "")
|
|
1573
|
+
if not formula:
|
|
1574
|
+
continue
|
|
1575
|
+
|
|
1576
|
+
# Count carbon atoms in the formula
|
|
1577
|
+
carbon_count = _count_carbon_atoms(formula)
|
|
1578
|
+
if carbon_count == 0:
|
|
1579
|
+
continue
|
|
1580
|
+
|
|
1581
|
+
# Get the original quant_group to keep it consistent across isotopes
|
|
1582
|
+
# All isotopomers of the same compound should have the same quant_group
|
|
1583
|
+
quant_group = row.get("quant_group", row.get("cmpd_uid", row.get("lib_uid", 1)))
|
|
1584
|
+
|
|
1585
|
+
# Generate isotope variants (1 to n 13C atoms)
|
|
1586
|
+
for iso_num in range(1, carbon_count + 1):
|
|
1587
|
+
# Calculate mass shift for this number of 13C isotopes
|
|
1588
|
+
mass_shift = iso_num * c13_mass_shift
|
|
1589
|
+
|
|
1590
|
+
# Create new entry
|
|
1591
|
+
isotope_entry = dict(row) # Copy all fields
|
|
1592
|
+
isotope_entry["lib_uid"] = next_lib_uid
|
|
1593
|
+
isotope_entry["iso"] = iso_num
|
|
1594
|
+
isotope_entry["m"] = row["m"] + mass_shift
|
|
1595
|
+
isotope_entry["mz"] = (row["m"] + mass_shift) / abs(row["z"]) if row["z"] != 0 else row["m"] + mass_shift
|
|
1596
|
+
isotope_entry["quant_group"] = quant_group # Keep same quant_group
|
|
1597
|
+
|
|
1598
|
+
isotope_entries.append(isotope_entry)
|
|
1599
|
+
next_lib_uid += 1
|
|
1600
|
+
|
|
1601
|
+
# Combine original entries (now with iso=0) with isotope entries
|
|
1602
|
+
if isotope_entries:
|
|
1603
|
+
isotope_df = pl.DataFrame(isotope_entries)
|
|
1604
|
+
# Ensure schema compatibility by aligning data types
|
|
1605
|
+
try:
|
|
1606
|
+
return pl.concat([original_df, isotope_df])
|
|
1607
|
+
except Exception as e:
|
|
1608
|
+
# If concat fails due to schema mismatch, convert to compatible types
|
|
1609
|
+
# Get common schema
|
|
1610
|
+
original_schema = original_df.schema
|
|
1611
|
+
isotope_schema = isotope_df.schema
|
|
1612
|
+
|
|
1613
|
+
# Cast isotope_df columns to match original_df schema where possible
|
|
1614
|
+
cast_exprs = []
|
|
1615
|
+
for col_name in isotope_df.columns:
|
|
1616
|
+
if col_name in original_schema:
|
|
1617
|
+
target_dtype = original_schema[col_name]
|
|
1618
|
+
cast_exprs.append(pl.col(col_name).cast(target_dtype, strict=False))
|
|
1619
|
+
else:
|
|
1620
|
+
cast_exprs.append(pl.col(col_name))
|
|
1621
|
+
|
|
1622
|
+
isotope_df_cast = isotope_df.select(cast_exprs)
|
|
1623
|
+
return pl.concat([original_df, isotope_df_cast])
|
|
1624
|
+
else:
|
|
1625
|
+
return original_df
|
|
1626
|
+
|
|
1627
|
+
|
|
1628
|
+
def _count_carbon_atoms(formula: str) -> int:
|
|
1629
|
+
"""
|
|
1630
|
+
Count the number of carbon atoms in a molecular formula.
|
|
1631
|
+
|
|
1632
|
+
Args:
|
|
1633
|
+
formula: Molecular formula string like "C6H12O6"
|
|
1634
|
+
|
|
1635
|
+
Returns:
|
|
1636
|
+
Number of carbon atoms
|
|
1637
|
+
"""
|
|
1638
|
+
import re
|
|
1639
|
+
|
|
1640
|
+
if not formula or not isinstance(formula, str):
|
|
1641
|
+
return 0
|
|
1642
|
+
|
|
1643
|
+
# Look for carbon followed by optional number
|
|
1644
|
+
# C followed by digits, or just C (which means 1)
|
|
1645
|
+
carbon_matches = re.findall(r'C(\d*)', formula)
|
|
1646
|
+
|
|
1647
|
+
total_carbons = 0
|
|
1648
|
+
for match in carbon_matches:
|
|
1649
|
+
if match == '':
|
|
1650
|
+
# Just 'C' without number means 1 carbon
|
|
1651
|
+
total_carbons += 1
|
|
1652
|
+
else:
|
|
1653
|
+
# 'C' followed by number
|
|
1654
|
+
total_carbons += int(match)
|
|
1655
|
+
|
|
1656
|
+
return total_carbons
|
|
1657
|
+
|
|
1658
|
+
|
|
1659
|
+
def lib_to_consensus(study, chrom_fhwm: float = 5.0, mz_tol: float = 0.01, rt_tol: float = 2.0):
|
|
1660
|
+
"""Create consensus features from library entries instead of features_df.
|
|
1661
|
+
|
|
1662
|
+
This method takes all rows from lib_df and creates corresponding entries in
|
|
1663
|
+
consensus_df with the same columns as merge(). Instead of relying on
|
|
1664
|
+
features_df, it populates consensus features directly from library data.
|
|
1665
|
+
|
|
1666
|
+
Before creating new features, it checks for pre-existing consensus features:
|
|
1667
|
+
- If rt in lib_df is null: picks consensus feature with matching mz and largest inty_mean
|
|
1668
|
+
- If rt is not null: picks consensus feature with matching mz and rt within tolerance
|
|
1669
|
+
- If a match is found, skips to the next library entry
|
|
1670
|
+
|
|
1671
|
+
Args:
|
|
1672
|
+
study: Study instance with lib_df populated
|
|
1673
|
+
chrom_fhwm: Chromatographic full width at half maximum in seconds
|
|
1674
|
+
to infer rt_start_mean and rt_end_mean (default: 5.0)
|
|
1675
|
+
mz_tol: m/z tolerance for matching existing consensus features (default: 0.01)
|
|
1676
|
+
rt_tol: RT tolerance for matching existing consensus features (default: 2.0)
|
|
1677
|
+
|
|
1678
|
+
Side effects:
|
|
1679
|
+
Adds rows to study.consensus_df and study.consensus_mapping_df
|
|
1680
|
+
Calls study.find_ms2() at the end
|
|
1681
|
+
"""
|
|
1682
|
+
# Get logger from study if available
|
|
1683
|
+
logger = getattr(study, "logger", None)
|
|
1684
|
+
|
|
1685
|
+
# Validate inputs
|
|
1686
|
+
if getattr(study, "lib_df", None) is None or study.lib_df.is_empty():
|
|
1687
|
+
if logger:
|
|
1688
|
+
logger.error("Library (study.lib_df) is empty; call lib_load() first")
|
|
1689
|
+
raise ValueError("Library (study.lib_df) is empty; call lib_load() first")
|
|
1690
|
+
|
|
1691
|
+
if logger:
|
|
1692
|
+
logger.info(f"Creating consensus features from {len(study.lib_df)} library entries")
|
|
1693
|
+
|
|
1694
|
+
# Initialize consensus DataFrames if they don't exist
|
|
1695
|
+
if not hasattr(study, "consensus_df") or study.consensus_df is None:
|
|
1696
|
+
study.consensus_df = pl.DataFrame()
|
|
1697
|
+
if not hasattr(study, "consensus_mapping_df") or study.consensus_mapping_df is None:
|
|
1698
|
+
study.consensus_mapping_df = pl.DataFrame()
|
|
1699
|
+
|
|
1700
|
+
# Get cached adducts for consistent adduct handling
|
|
1701
|
+
cached_adducts_df = None
|
|
1702
|
+
cached_valid_adducts = None
|
|
1703
|
+
try:
|
|
1704
|
+
cached_adducts_df = _get_adducts(study)
|
|
1705
|
+
if not cached_adducts_df.is_empty():
|
|
1706
|
+
cached_valid_adducts = set(cached_adducts_df["name"].to_list())
|
|
1707
|
+
else:
|
|
1708
|
+
cached_valid_adducts = set()
|
|
1709
|
+
except Exception as e:
|
|
1710
|
+
if logger:
|
|
1711
|
+
logger.warning(f"Could not retrieve study adducts: {e}")
|
|
1712
|
+
cached_valid_adducts = set()
|
|
1713
|
+
|
|
1714
|
+
# Always allow '?' adducts
|
|
1715
|
+
cached_valid_adducts.add("?")
|
|
1716
|
+
|
|
1717
|
+
# Get starting consensus_uid counter
|
|
1718
|
+
if not study.consensus_df.is_empty():
|
|
1719
|
+
max_existing_uid = study.consensus_df["consensus_uid"].max()
|
|
1720
|
+
consensus_uid_counter = int(max_existing_uid) + 1 if max_existing_uid is not None else 0
|
|
1721
|
+
else:
|
|
1722
|
+
consensus_uid_counter = 0
|
|
1723
|
+
|
|
1724
|
+
# Track [M+H] iso=0 and [M-H] iso=0 entries for adduct grouping
|
|
1725
|
+
base_adduct_groups = {} # key: (mz, adduct_base), value: adduct_group
|
|
1726
|
+
|
|
1727
|
+
# Process each library entry
|
|
1728
|
+
consensus_metadata = []
|
|
1729
|
+
consensus_mapping_list = []
|
|
1730
|
+
matched_count = 0
|
|
1731
|
+
skipped_count = 0
|
|
1732
|
+
|
|
1733
|
+
for lib_row in study.lib_df.iter_rows(named=True):
|
|
1734
|
+
# Extract basic library data
|
|
1735
|
+
lib_uid = lib_row.get("lib_uid")
|
|
1736
|
+
mz = lib_row.get("mz")
|
|
1737
|
+
rt = lib_row.get("rt")
|
|
1738
|
+
iso = lib_row.get("iso", 0)
|
|
1739
|
+
adduct = lib_row.get("adduct")
|
|
1740
|
+
z = lib_row.get("z", 1) # charge
|
|
1741
|
+
|
|
1742
|
+
# Skip entries without essential data
|
|
1743
|
+
if mz is None:
|
|
1744
|
+
if logger:
|
|
1745
|
+
logger.warning(f"Skipping library entry {lib_uid} - no m/z value")
|
|
1746
|
+
continue
|
|
1747
|
+
|
|
1748
|
+
# Check for pre-existing consensus features
|
|
1749
|
+
existing_match = None
|
|
1750
|
+
if not study.consensus_df.is_empty():
|
|
1751
|
+
# Filter by m/z tolerance first
|
|
1752
|
+
mz_matches = study.consensus_df.filter(
|
|
1753
|
+
(pl.col("mz") >= mz - mz_tol) & (pl.col("mz") <= mz + mz_tol)
|
|
1754
|
+
)
|
|
1755
|
+
|
|
1756
|
+
if not mz_matches.is_empty():
|
|
1757
|
+
if rt is None:
|
|
1758
|
+
# If rt is null, pick the consensus feature with largest inty_mean
|
|
1759
|
+
existing_match = mz_matches.sort("inty_mean", descending=True).head(1)
|
|
1760
|
+
else:
|
|
1761
|
+
# If rt is not null, filter by RT tolerance and pick largest inty_mean
|
|
1762
|
+
rt_tolerance = chrom_fhwm # Use chrom_fhwm as RT tolerance range
|
|
1763
|
+
rt_matches = mz_matches.filter(
|
|
1764
|
+
(pl.col("rt") >= rt - rt_tolerance) & (pl.col("rt") <= rt + rt_tolerance)
|
|
1765
|
+
)
|
|
1766
|
+
if not rt_matches.is_empty():
|
|
1767
|
+
existing_match = rt_matches.sort("inty_mean", descending=True).head(1)
|
|
1768
|
+
|
|
1769
|
+
if existing_match is not None and len(existing_match) > 0:
|
|
1770
|
+
# Found a matching consensus feature, skip this library entry
|
|
1771
|
+
matched_count += 1
|
|
1772
|
+
if logger and matched_count <= 5: # Log first few matches
|
|
1773
|
+
match_uid = existing_match["consensus_uid"][0]
|
|
1774
|
+
match_mz = existing_match["mz"][0]
|
|
1775
|
+
match_rt = existing_match["rt"][0]
|
|
1776
|
+
logger.debug(f"Library entry {lib_uid} (mz={mz:.4f}, rt={rt}) matched existing consensus {match_uid} (mz={match_mz:.4f}, rt={match_rt})")
|
|
1777
|
+
continue
|
|
1778
|
+
|
|
1779
|
+
# No match found, create new consensus feature
|
|
1780
|
+
# Handle missing RT - use 0 as placeholder
|
|
1781
|
+
if rt is None:
|
|
1782
|
+
rt = 0.0
|
|
1783
|
+
if logger and skipped_count < 5: # Log first few
|
|
1784
|
+
logger.debug(f"Library entry {lib_uid} has no RT, using 0.0")
|
|
1785
|
+
|
|
1786
|
+
# Calculate RT range based on chrom_fhwm
|
|
1787
|
+
half_width = chrom_fhwm / 2.0
|
|
1788
|
+
rt_start = rt - half_width
|
|
1789
|
+
rt_end = rt + half_width
|
|
1790
|
+
|
|
1791
|
+
# Get adduct information
|
|
1792
|
+
adduct_top = adduct if adduct else "?"
|
|
1793
|
+
adduct_charge_top = None
|
|
1794
|
+
adduct_mass_shift_top = None
|
|
1795
|
+
adduct_mass_neutral_top = None
|
|
1796
|
+
|
|
1797
|
+
# Parse adduct to get charge and mass shift
|
|
1798
|
+
if adduct_top and cached_adducts_df is not None and not cached_adducts_df.is_empty():
|
|
1799
|
+
# Look for exact match in study adducts
|
|
1800
|
+
matching_adduct = cached_adducts_df.filter(pl.col("name") == adduct_top)
|
|
1801
|
+
if not matching_adduct.is_empty():
|
|
1802
|
+
adduct_row = matching_adduct.row(0, named=True)
|
|
1803
|
+
adduct_charge_top = adduct_row["charge"]
|
|
1804
|
+
adduct_mass_shift_top = adduct_row["mass_shift"]
|
|
1805
|
+
|
|
1806
|
+
# Fallback to default values if not found
|
|
1807
|
+
if adduct_charge_top is None:
|
|
1808
|
+
adduct_charge_top = int(z) if z else 1
|
|
1809
|
+
# Default based on study polarity
|
|
1810
|
+
study_polarity = getattr(study, "polarity", "positive")
|
|
1811
|
+
if study_polarity in ["negative", "neg"]:
|
|
1812
|
+
if adduct_charge_top > 0:
|
|
1813
|
+
adduct_charge_top = -adduct_charge_top
|
|
1814
|
+
adduct_mass_shift_top = -1.007825
|
|
1815
|
+
if adduct_top == "?":
|
|
1816
|
+
adduct_top = "[M-?]1-"
|
|
1817
|
+
else:
|
|
1818
|
+
if adduct_charge_top < 0:
|
|
1819
|
+
adduct_charge_top = -adduct_charge_top
|
|
1820
|
+
adduct_mass_shift_top = 1.007825
|
|
1821
|
+
if adduct_top == "?":
|
|
1822
|
+
adduct_top = "[M+?]1+"
|
|
1823
|
+
|
|
1824
|
+
# Calculate neutral mass
|
|
1825
|
+
if adduct_charge_top and adduct_mass_shift_top is not None:
|
|
1826
|
+
adduct_mass_neutral_top = mz * abs(adduct_charge_top) - adduct_mass_shift_top
|
|
1827
|
+
|
|
1828
|
+
# Determine adduct group for isotopologues and related adducts
|
|
1829
|
+
adduct_group = consensus_uid_counter # Default: each entry gets its own group
|
|
1830
|
+
adduct_of = 0 # Default: this is the base adduct
|
|
1831
|
+
|
|
1832
|
+
# Track base adducts ([M+H] iso=0 or [M-H] iso=0) for grouping
|
|
1833
|
+
base_adduct_key = None
|
|
1834
|
+
if iso == 0 and adduct_top in ["[M+H]+", "[M+H]1+", "[M-H]-", "[M-H]1-"]:
|
|
1835
|
+
# This is a base adduct with iso=0
|
|
1836
|
+
base_adduct_key = (round(mz, 4), adduct_top)
|
|
1837
|
+
base_adduct_groups[base_adduct_key] = consensus_uid_counter
|
|
1838
|
+
elif iso > 0:
|
|
1839
|
+
# This is an isotopologue, try to find the base adduct
|
|
1840
|
+
# Calculate the base m/z (subtract isotope mass shifts)
|
|
1841
|
+
c13_mass_shift = 1.00335
|
|
1842
|
+
base_mz = mz - (iso * c13_mass_shift / abs(adduct_charge_top))
|
|
1843
|
+
|
|
1844
|
+
# Look for matching base adduct
|
|
1845
|
+
for (stored_mz, stored_adduct), stored_group in base_adduct_groups.items():
|
|
1846
|
+
if abs(stored_mz - base_mz) < mz_tol and stored_adduct == adduct_top:
|
|
1847
|
+
adduct_group = stored_group
|
|
1848
|
+
adduct_of = stored_group
|
|
1849
|
+
break
|
|
1850
|
+
|
|
1851
|
+
# Create adduct values list with proper structure (format: structured data with fields: adduct, count, percentage, mass)
|
|
1852
|
+
adduct_values = [{"adduct": adduct_top, "count": 1, "percentage": 100.0, "mass": 0.0}]
|
|
1853
|
+
|
|
1854
|
+
# Generate unique consensus_id string
|
|
1855
|
+
import uuid
|
|
1856
|
+
consensus_id_str = str(uuid.uuid4()).replace('-', '')[:16]
|
|
1857
|
+
|
|
1858
|
+
# Build consensus metadata with requested modifications for new entries
|
|
1859
|
+
metadata = {
|
|
1860
|
+
"consensus_uid": consensus_uid_counter,
|
|
1861
|
+
"consensus_id": consensus_id_str,
|
|
1862
|
+
"quality": 1.0,
|
|
1863
|
+
"number_samples": 0.0, # Set to 0.0 for library entries
|
|
1864
|
+
"rt": float(rt),
|
|
1865
|
+
"mz": float(mz),
|
|
1866
|
+
"rt_min": float(rt), # Set to rt as requested
|
|
1867
|
+
"rt_max": float(rt), # Set to rt as requested
|
|
1868
|
+
"rt_mean": float(rt), # Set to rt as requested
|
|
1869
|
+
"rt_start_mean": float(rt_start),
|
|
1870
|
+
"rt_end_mean": float(rt_end),
|
|
1871
|
+
"rt_delta_mean": 0.0, # Set to 0.0 as requested
|
|
1872
|
+
"mz_min": float(mz), # Set to mz as requested
|
|
1873
|
+
"mz_max": float(mz), # Set to mz as requested
|
|
1874
|
+
"mz_mean": float(mz), # Set to mz as requested
|
|
1875
|
+
"mz_start_mean": float(mz), # Set to mz as requested
|
|
1876
|
+
"mz_end_mean": float(mz), # Set to mz as requested
|
|
1877
|
+
"inty_mean": -1.0, # Set to -1.0 as requested
|
|
1878
|
+
"bl": -1.0,
|
|
1879
|
+
"chrom_coherence_mean": -1.0, # Set to -1.0 as requested
|
|
1880
|
+
"chrom_prominence_mean": -1.0, # Set to -1.0 as requested
|
|
1881
|
+
"chrom_prominence_scaled_mean": -1.0, # Set to -1.0 as requested
|
|
1882
|
+
"chrom_height_scaled_mean": -1.0, # Set to -1.0 as requested
|
|
1883
|
+
"iso": iso, # Set to iso from lib_df as requested
|
|
1884
|
+
"iso_mean": float(iso), # Set to iso from lib_df as requested
|
|
1885
|
+
"charge_mean": float(abs(z)) if z else 1.0, # Set to z as requested
|
|
1886
|
+
"number_ms2": 0, # Will be updated by find_ms2
|
|
1887
|
+
"adducts": adduct_values,
|
|
1888
|
+
"adduct_charge_top": adduct_charge_top,
|
|
1889
|
+
"adduct_group": adduct_group, # Use calculated adduct group
|
|
1890
|
+
"adduct_mass_neutral_top": round(adduct_mass_neutral_top, 6) if adduct_mass_neutral_top is not None else None,
|
|
1891
|
+
"adduct_mass_shift_top": round(adduct_mass_shift_top, 6) if adduct_mass_shift_top is not None else None,
|
|
1892
|
+
"adduct_of": adduct_of, # Use calculated adduct_of
|
|
1893
|
+
"adduct_top": adduct_top,
|
|
1894
|
+
"id_top_name": None, # Set to null as requested
|
|
1895
|
+
"id_top_class": None, # Set to null as requested
|
|
1896
|
+
"id_top_adduct": None, # Set to null as requested
|
|
1897
|
+
"id_top_score": None, # Set to null as requested
|
|
1898
|
+
}
|
|
1899
|
+
|
|
1900
|
+
consensus_metadata.append(metadata)
|
|
1901
|
+
|
|
1902
|
+
# Create mapping entry (maps to library entry as "virtual" feature)
|
|
1903
|
+
# Use lib_uid as the feature_uid and a virtual sample_uid of 0
|
|
1904
|
+
# Match existing consensus_mapping_df column order: consensus_uid, feature_uid, sample_uid
|
|
1905
|
+
consensus_mapping_list.append({
|
|
1906
|
+
"consensus_uid": consensus_uid_counter,
|
|
1907
|
+
"feature_uid": lib_uid, # Use lib_uid as feature reference
|
|
1908
|
+
"sample_uid": 0, # Virtual sample for library entries
|
|
1909
|
+
})
|
|
1910
|
+
|
|
1911
|
+
consensus_uid_counter += 1
|
|
1912
|
+
|
|
1913
|
+
# Log matching statistics
|
|
1914
|
+
if logger:
|
|
1915
|
+
total_processed = matched_count + len(consensus_metadata)
|
|
1916
|
+
logger.info(f"Processed {total_processed} library entries: {matched_count} matched existing consensus features, {len(consensus_metadata)} created new features")
|
|
1917
|
+
|
|
1918
|
+
# Convert to DataFrames with proper schema alignment
|
|
1919
|
+
if consensus_metadata:
|
|
1920
|
+
new_consensus_df = pl.DataFrame(consensus_metadata, strict=False)
|
|
1921
|
+
|
|
1922
|
+
# Ensure schema compatibility with existing consensus_df
|
|
1923
|
+
if not study.consensus_df.is_empty():
|
|
1924
|
+
# Cast columns to match existing schema
|
|
1925
|
+
existing_schema = study.consensus_df.schema
|
|
1926
|
+
cast_exprs = []
|
|
1927
|
+
for col_name in new_consensus_df.columns:
|
|
1928
|
+
if col_name in existing_schema:
|
|
1929
|
+
target_dtype = existing_schema[col_name]
|
|
1930
|
+
if target_dtype == pl.Null:
|
|
1931
|
+
# For Null columns, use lit(None) to maintain Null type
|
|
1932
|
+
cast_exprs.append(pl.lit(None).alias(col_name))
|
|
1933
|
+
else:
|
|
1934
|
+
cast_exprs.append(pl.col(col_name).cast(target_dtype, strict=False))
|
|
1935
|
+
else:
|
|
1936
|
+
cast_exprs.append(pl.col(col_name))
|
|
1937
|
+
|
|
1938
|
+
new_consensus_df = new_consensus_df.select(cast_exprs)
|
|
1939
|
+
|
|
1940
|
+
new_consensus_mapping_df = pl.DataFrame(consensus_mapping_list, strict=False)
|
|
1941
|
+
|
|
1942
|
+
# Append to existing DataFrames
|
|
1943
|
+
if not study.consensus_df.is_empty():
|
|
1944
|
+
study.consensus_df = pl.concat([study.consensus_df, new_consensus_df])
|
|
1945
|
+
else:
|
|
1946
|
+
study.consensus_df = new_consensus_df
|
|
1947
|
+
|
|
1948
|
+
if not study.consensus_mapping_df.is_empty():
|
|
1949
|
+
study.consensus_mapping_df = pl.concat([study.consensus_mapping_df, new_consensus_mapping_df])
|
|
1950
|
+
else:
|
|
1951
|
+
study.consensus_mapping_df = new_consensus_mapping_df
|
|
1952
|
+
|
|
1953
|
+
if logger:
|
|
1954
|
+
logger.info(f"Added {len(consensus_metadata)} consensus features from library")
|
|
1955
|
+
else:
|
|
1956
|
+
if logger:
|
|
1957
|
+
logger.warning("No valid consensus features created from library")
|
|
1958
|
+
return
|
|
1959
|
+
|
|
1960
|
+
# Store operation in history
|
|
1961
|
+
if hasattr(study, "update_history"):
|
|
1962
|
+
study.update_history(
|
|
1963
|
+
["lib_to_consensus"],
|
|
1964
|
+
{"chrom_fhwm": chrom_fhwm, "lib_entries": len(study.lib_df)},
|
|
1965
|
+
)
|
|
1966
|
+
|
|
1967
|
+
# Perform find_ms2 at the end
|
|
1968
|
+
try:
|
|
1969
|
+
if hasattr(study, "find_ms2"):
|
|
1970
|
+
if logger:
|
|
1971
|
+
logger.info("Running find_ms2 to link MS2 spectra to library-derived consensus features")
|
|
1972
|
+
study.find_ms2()
|
|
1973
|
+
else:
|
|
1974
|
+
if logger:
|
|
1975
|
+
logger.warning("find_ms2 method not available on study object")
|
|
1976
|
+
except Exception as e:
|
|
1977
|
+
if logger:
|
|
1978
|
+
logger.warning(f"find_ms2 failed: {e}")
|
|
1979
|
+
|
|
1980
|
+
if logger:
|
|
1981
|
+
logger.info(f"lib_to_consensus completed: {len(consensus_metadata)} features added")
|
masster/study/load.py
CHANGED
|
@@ -261,9 +261,14 @@ def _fill_chrom_single_impl(
|
|
|
261
261
|
min_number_abs = 1
|
|
262
262
|
if isinstance(min_samples_rel, float) and min_samples_rel > 0:
|
|
263
263
|
min_number_rel = int(min_samples_rel * len(self.samples_df))
|
|
264
|
-
if isinstance(min_samples_abs, int) and min_samples_abs
|
|
265
|
-
min_number_abs = int(min_samples_abs)
|
|
264
|
+
if isinstance(min_samples_abs, int) and min_samples_abs >= 0:
|
|
265
|
+
min_number_abs = int(min_samples_abs) if min_samples_abs > 0 else 0
|
|
266
266
|
min_number = max(min_number_rel, min_number_abs)
|
|
267
|
+
|
|
268
|
+
# Special case: if min_samples_abs is explicitly 0, allow 0-sample features (like library features)
|
|
269
|
+
if isinstance(min_samples_abs, int) and min_samples_abs == 0:
|
|
270
|
+
min_number = 0
|
|
271
|
+
|
|
267
272
|
self.logger.debug(f"Threshold for gap filling: number_samples>={min_number}")
|
|
268
273
|
|
|
269
274
|
if min_number > 0:
|
|
@@ -277,7 +282,7 @@ def _fill_chrom_single_impl(
|
|
|
277
282
|
)
|
|
278
283
|
self.logger.debug("Identifying missing features...")
|
|
279
284
|
# Instead of building full chromatogram matrix, identify missing consensus/sample combinations directly
|
|
280
|
-
missing_combinations =
|
|
285
|
+
missing_combinations = _get_missing_consensus_sample_combinations(self,uids)
|
|
281
286
|
if not missing_combinations:
|
|
282
287
|
self.logger.info("No missing features found to fill.")
|
|
283
288
|
return
|
|
@@ -754,10 +759,14 @@ def _fill_chrom_impl(
|
|
|
754
759
|
min_number_abs = 1
|
|
755
760
|
if isinstance(min_samples_rel, float) and min_samples_rel > 0:
|
|
756
761
|
min_number_rel = int(min_samples_rel * len(self.samples_df))
|
|
757
|
-
if isinstance(min_samples_abs, int) and min_samples_abs
|
|
758
|
-
min_number_abs = int(min_samples_abs)
|
|
762
|
+
if isinstance(min_samples_abs, int) and min_samples_abs >= 0:
|
|
763
|
+
min_number_abs = int(min_samples_abs) if min_samples_abs > 0 else 0
|
|
759
764
|
min_number = max(min_number_rel, min_number_abs)
|
|
760
765
|
|
|
766
|
+
# Special case: if min_samples_abs is explicitly 0, allow 0-sample features (like library features)
|
|
767
|
+
if isinstance(min_samples_abs, int) and min_samples_abs == 0:
|
|
768
|
+
min_number = 0
|
|
769
|
+
|
|
761
770
|
self.logger.debug(f"Threshold for gap filling: number_samples>={min_number}")
|
|
762
771
|
|
|
763
772
|
if min_number > 0:
|
|
@@ -770,7 +779,7 @@ def _fill_chrom_impl(
|
|
|
770
779
|
|
|
771
780
|
# Get missing consensus/sample combinations using the optimized method
|
|
772
781
|
self.logger.debug("Identifying missing features...")
|
|
773
|
-
missing_combinations =
|
|
782
|
+
missing_combinations = _get_missing_consensus_sample_combinations(self, uids)
|
|
774
783
|
|
|
775
784
|
if not missing_combinations or len(missing_combinations) == 0:
|
|
776
785
|
self.logger.info("No missing features found to fill.")
|
|
@@ -846,7 +855,7 @@ def _fill_chrom_impl(
|
|
|
846
855
|
future_to_sample = {}
|
|
847
856
|
for sample_info in samples_to_process:
|
|
848
857
|
future = executor.submit(
|
|
849
|
-
|
|
858
|
+
_process_sample_for_parallel_fill, self,
|
|
850
859
|
sample_info,
|
|
851
860
|
consensus_info,
|
|
852
861
|
uids,
|
masster/study/merge.py
CHANGED
|
@@ -505,13 +505,99 @@ def _merge_kd(study, params: merge_defaults) -> oms.ConsensusMap:
|
|
|
505
505
|
return consensus_map
|
|
506
506
|
|
|
507
507
|
|
|
508
|
+
def _generate_feature_maps_from_samples(study):
|
|
509
|
+
"""
|
|
510
|
+
Generate feature maps using Study-level features_df instead of Sample-level loading.
|
|
511
|
+
This uses the study's existing features_df which is already loaded.
|
|
512
|
+
|
|
513
|
+
Args:
|
|
514
|
+
study: Study object containing features_df
|
|
515
|
+
|
|
516
|
+
Returns:
|
|
517
|
+
list: List of temporary FeatureMap objects built from Study-level data
|
|
518
|
+
"""
|
|
519
|
+
import pyopenms as oms
|
|
520
|
+
|
|
521
|
+
temp_feature_maps = []
|
|
522
|
+
|
|
523
|
+
study.logger.info(f"Building feature maps using Study-level features_df from {len(study.samples_df)} samples")
|
|
524
|
+
|
|
525
|
+
# Use the features_df from the study that's already loaded
|
|
526
|
+
if not hasattr(study, 'features_df') or study.features_df is None or study.features_df.is_empty():
|
|
527
|
+
study.logger.warning("No features_df available - features must be loaded first")
|
|
528
|
+
return temp_feature_maps
|
|
529
|
+
|
|
530
|
+
# Group features by sample
|
|
531
|
+
study.logger.info(f"Processing {len(study.features_df)} features grouped by sample")
|
|
532
|
+
|
|
533
|
+
# Get unique sample names/indices
|
|
534
|
+
if 'sample_uid' in study.features_df.columns:
|
|
535
|
+
sample_groups = study.features_df.group_by('sample_uid')
|
|
536
|
+
study.logger.debug("Grouping features by 'sample_uid' column")
|
|
537
|
+
elif 'sample_id' in study.features_df.columns:
|
|
538
|
+
sample_groups = study.features_df.group_by('sample_id')
|
|
539
|
+
study.logger.debug("Grouping features by 'sample_id' column")
|
|
540
|
+
elif 'sample' in study.features_df.columns:
|
|
541
|
+
sample_groups = study.features_df.group_by('sample')
|
|
542
|
+
study.logger.debug("Grouping features by 'sample' column")
|
|
543
|
+
else:
|
|
544
|
+
study.logger.warning("No sample grouping column found in features_df")
|
|
545
|
+
study.logger.info(f"Available columns: {study.features_df.columns}")
|
|
546
|
+
return temp_feature_maps
|
|
547
|
+
|
|
548
|
+
# Process each sample group
|
|
549
|
+
processed_samples = 0
|
|
550
|
+
for sample_key, sample_features in sample_groups:
|
|
551
|
+
try:
|
|
552
|
+
feature_map = oms.FeatureMap()
|
|
553
|
+
feature_count = 0
|
|
554
|
+
|
|
555
|
+
# Build features from this sample's features
|
|
556
|
+
for row in sample_features.iter_rows(named=True):
|
|
557
|
+
try:
|
|
558
|
+
feature = oms.Feature()
|
|
559
|
+
|
|
560
|
+
# Set feature properties
|
|
561
|
+
if row.get("feature_id") is not None:
|
|
562
|
+
feature.setUniqueId(int(row["feature_id"]))
|
|
563
|
+
if row.get("mz") is not None:
|
|
564
|
+
feature.setMZ(float(row["mz"]))
|
|
565
|
+
if row.get("rt") is not None:
|
|
566
|
+
feature.setRT(float(row["rt"]))
|
|
567
|
+
if row.get("inty") is not None:
|
|
568
|
+
feature.setIntensity(float(row["inty"]))
|
|
569
|
+
if row.get("quality") is not None:
|
|
570
|
+
feature.setOverallQuality(float(row["quality"]))
|
|
571
|
+
if row.get("charge") is not None:
|
|
572
|
+
feature.setCharge(int(row["charge"]))
|
|
573
|
+
|
|
574
|
+
feature_map.push_back(feature)
|
|
575
|
+
feature_count += 1
|
|
576
|
+
|
|
577
|
+
except (ValueError, TypeError) as e:
|
|
578
|
+
study.logger.warning(f"Skipping feature in sample {sample_key} due to conversion error: {e}")
|
|
579
|
+
continue
|
|
580
|
+
|
|
581
|
+
temp_feature_maps.append(feature_map)
|
|
582
|
+
processed_samples += 1
|
|
583
|
+
study.logger.debug(f"Built feature map for sample {sample_key} with {feature_count} features")
|
|
584
|
+
|
|
585
|
+
except Exception as e:
|
|
586
|
+
study.logger.warning(f"Failed to process sample group {sample_key}: {e}")
|
|
587
|
+
# Add empty feature map for failed samples to maintain sample order
|
|
588
|
+
temp_feature_maps.append(oms.FeatureMap())
|
|
589
|
+
|
|
590
|
+
study.logger.info(f"Generated {len(temp_feature_maps)} feature maps from {processed_samples} samples using Study-level features_df")
|
|
591
|
+
return temp_feature_maps
|
|
592
|
+
|
|
593
|
+
|
|
508
594
|
def _generate_feature_maps_on_demand(study):
|
|
509
595
|
"""
|
|
510
|
-
Generate feature maps on-demand
|
|
596
|
+
Generate feature maps on-demand using Sample-level _load_ms1() for merge operations.
|
|
511
597
|
Returns temporary feature maps that are not cached in the study.
|
|
512
598
|
|
|
513
599
|
Args:
|
|
514
|
-
study: Study object containing
|
|
600
|
+
study: Study object containing samples
|
|
515
601
|
|
|
516
602
|
Returns:
|
|
517
603
|
list: List of temporary FeatureMap objects
|
|
@@ -520,6 +606,15 @@ def _generate_feature_maps_on_demand(study):
|
|
|
520
606
|
import pyopenms as oms
|
|
521
607
|
import numpy as np
|
|
522
608
|
|
|
609
|
+
# Check if we should use Sample-level loading instead of features_df
|
|
610
|
+
use_sample_loading = True # Default to Sample-level loading as requested
|
|
611
|
+
|
|
612
|
+
# Use Sample-level loading if requested and samples_df is available
|
|
613
|
+
if use_sample_loading and hasattr(study, 'samples_df') and study.samples_df is not None and len(study.samples_df) > 0:
|
|
614
|
+
study.logger.debug("Building feature maps using Sample-level _load_ms1() instead of features_df")
|
|
615
|
+
return _generate_feature_maps_from_samples(study)
|
|
616
|
+
|
|
617
|
+
# Fallback to original features_df approach
|
|
523
618
|
if study.features_df is None or len(study.features_df) == 0:
|
|
524
619
|
study.logger.error("No features_df available for generating feature maps")
|
|
525
620
|
return []
|
masster/study/processing.py
CHANGED
|
@@ -62,16 +62,8 @@ def _generate_feature_maps_on_demand_for_align(study):
|
|
|
62
62
|
if feature_row["inty"] is None:
|
|
63
63
|
study.logger.warning("Skipping feature due to missing inty")
|
|
64
64
|
continue
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
if feature_row["feature_id"] is None:
|
|
68
|
-
# Use a simple incremental ID for alignment purposes
|
|
69
|
-
feature_id = len(temp_feature_maps) * 100000 + feature_map.size() + 1
|
|
70
|
-
study.logger.debug(f"Generated new feature_id {feature_id} for feature with missing ID in sample {sample_name}")
|
|
71
|
-
else:
|
|
72
|
-
feature_id = int(feature_row["feature_id"])
|
|
73
|
-
|
|
74
|
-
feature.setUniqueId(feature_id)
|
|
65
|
+
|
|
66
|
+
feature.setUniqueId(int(feature_row["feature_id"]))
|
|
75
67
|
feature.setMZ(float(feature_row["mz"]))
|
|
76
68
|
feature.setRT(float(feature_row["rt"]))
|
|
77
69
|
feature.setIntensity(float(feature_row["inty"]))
|
masster/study/study.py
CHANGED
|
@@ -440,6 +440,11 @@ class Study:
|
|
|
440
440
|
|
|
441
441
|
# === Identification and Library Matching ===
|
|
442
442
|
lib_load = lib_load
|
|
443
|
+
|
|
444
|
+
def lib_to_consensus(self, **kwargs):
|
|
445
|
+
"""Create consensus features from library entries."""
|
|
446
|
+
from masster.study.id import lib_to_consensus as _lib_to_consensus
|
|
447
|
+
return _lib_to_consensus(self, **kwargs)
|
|
443
448
|
identify = identify
|
|
444
449
|
get_id = get_id
|
|
445
450
|
id_reset = id_reset
|
|
@@ -562,6 +567,83 @@ class Study:
|
|
|
562
567
|
except Exception as e:
|
|
563
568
|
self.logger.error(f"Failed to reload current module {current_module}: {e}")
|
|
564
569
|
|
|
570
|
+
def _sanitize_null_ids(self):
|
|
571
|
+
"""
|
|
572
|
+
Sanitize null feature_id and consensus_id values by replacing them with new integer IDs.
|
|
573
|
+
For feature_id: generates large sequential integers that can be converted by merge/align functions.
|
|
574
|
+
For consensus_id: uses 16-character UUID strings (as expected by merge function).
|
|
575
|
+
"""
|
|
576
|
+
import uuid
|
|
577
|
+
import polars as pl
|
|
578
|
+
import time
|
|
579
|
+
|
|
580
|
+
# Sanitize features_df feature_id column
|
|
581
|
+
if hasattr(self, 'features_df') and self.features_df is not None and not self.features_df.is_empty():
|
|
582
|
+
# Check for null feature_ids
|
|
583
|
+
null_feature_ids = self.features_df.filter(pl.col("feature_id").is_null()).shape[0]
|
|
584
|
+
if null_feature_ids > 0:
|
|
585
|
+
self.logger.info(f"Sanitizing {null_feature_ids} null feature_id values with new integer IDs")
|
|
586
|
+
|
|
587
|
+
# Find the maximum existing feature_id (convert strings to int if possible)
|
|
588
|
+
max_existing_id = 0
|
|
589
|
+
existing_ids = self.features_df.filter(pl.col("feature_id").is_not_null())["feature_id"].to_list()
|
|
590
|
+
for fid in existing_ids:
|
|
591
|
+
try:
|
|
592
|
+
int_id = int(fid)
|
|
593
|
+
max_existing_id = max(max_existing_id, int_id)
|
|
594
|
+
except (ValueError, TypeError):
|
|
595
|
+
# Skip non-integer IDs
|
|
596
|
+
pass
|
|
597
|
+
|
|
598
|
+
# Generate new sequential integer IDs starting from max + timestamp offset
|
|
599
|
+
# Use timestamp to ensure uniqueness across different sanitization runs
|
|
600
|
+
base_id = max(max_existing_id + 1, int(time.time() * 1000000)) # Microsecond timestamp
|
|
601
|
+
new_int_ids = [str(base_id + i) for i in range(null_feature_ids)]
|
|
602
|
+
uid_index = 0
|
|
603
|
+
|
|
604
|
+
# Create a list to store all feature_ids
|
|
605
|
+
feature_ids = []
|
|
606
|
+
for feature_id in self.features_df["feature_id"].to_list():
|
|
607
|
+
if feature_id is None:
|
|
608
|
+
feature_ids.append(new_int_ids[uid_index])
|
|
609
|
+
uid_index += 1
|
|
610
|
+
else:
|
|
611
|
+
feature_ids.append(feature_id)
|
|
612
|
+
|
|
613
|
+
# Update the DataFrame with sanitized feature_ids
|
|
614
|
+
self.features_df = self.features_df.with_columns(
|
|
615
|
+
pl.Series("feature_id", feature_ids, dtype=pl.Utf8)
|
|
616
|
+
)
|
|
617
|
+
|
|
618
|
+
self.logger.info(f"Successfully sanitized {null_feature_ids} feature_id values")
|
|
619
|
+
|
|
620
|
+
# Sanitize consensus_df consensus_id column
|
|
621
|
+
if hasattr(self, 'consensus_df') and self.consensus_df is not None and not self.consensus_df.is_empty():
|
|
622
|
+
if "consensus_id" in self.consensus_df.columns:
|
|
623
|
+
null_consensus_ids = self.consensus_df.filter(pl.col("consensus_id").is_null()).shape[0]
|
|
624
|
+
if null_consensus_ids > 0:
|
|
625
|
+
self.logger.info(f"Sanitizing {null_consensus_ids} null consensus_id values with new UIDs")
|
|
626
|
+
|
|
627
|
+
# Generate new UIDs for null values using the same method as merge()
|
|
628
|
+
new_uids = [str(uuid.uuid4()).replace('-', '')[:16] for _ in range(null_consensus_ids)]
|
|
629
|
+
uid_index = 0
|
|
630
|
+
|
|
631
|
+
# Create a list to store all consensus_ids
|
|
632
|
+
consensus_ids = []
|
|
633
|
+
for consensus_id in self.consensus_df["consensus_id"].to_list():
|
|
634
|
+
if consensus_id is None:
|
|
635
|
+
consensus_ids.append(new_uids[uid_index])
|
|
636
|
+
uid_index += 1
|
|
637
|
+
else:
|
|
638
|
+
consensus_ids.append(consensus_id)
|
|
639
|
+
|
|
640
|
+
# Update the DataFrame with sanitized consensus_ids
|
|
641
|
+
self.consensus_df = self.consensus_df.with_columns(
|
|
642
|
+
pl.Series("consensus_id", consensus_ids, dtype=pl.Utf8)
|
|
643
|
+
)
|
|
644
|
+
|
|
645
|
+
self.logger.info(f"Successfully sanitized {null_consensus_ids} consensus_id values")
|
|
646
|
+
|
|
565
647
|
def __dir__(self):
|
|
566
648
|
"""
|
|
567
649
|
Custom __dir__ implementation to hide internal methods starting with '_'
|
masster/study/study5_schema.json
CHANGED
|
@@ -327,6 +327,9 @@
|
|
|
327
327
|
"formula": {
|
|
328
328
|
"dtype": "pl.String"
|
|
329
329
|
},
|
|
330
|
+
"iso": {
|
|
331
|
+
"dtype": "pl.Int64"
|
|
332
|
+
},
|
|
330
333
|
"adduct": {
|
|
331
334
|
"dtype": "pl.String"
|
|
332
335
|
},
|
|
@@ -342,6 +345,9 @@
|
|
|
342
345
|
"rt": {
|
|
343
346
|
"dtype": "pl.Null"
|
|
344
347
|
},
|
|
348
|
+
"quant_group": {
|
|
349
|
+
"dtype": "pl.Int64"
|
|
350
|
+
},
|
|
345
351
|
"db_id": {
|
|
346
352
|
"dtype": "pl.String"
|
|
347
353
|
},
|
|
@@ -369,6 +375,9 @@
|
|
|
369
375
|
},
|
|
370
376
|
"score": {
|
|
371
377
|
"dtype": "pl.Float64"
|
|
378
|
+
},
|
|
379
|
+
"iso": {
|
|
380
|
+
"dtype": "pl.Int64"
|
|
372
381
|
}
|
|
373
382
|
}
|
|
374
383
|
}
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
masster/__init__.py,sha256=ueZ224WPNRRjQEYTaQUol818nwQgJwB93HbEfmtPRmg,1041
|
|
2
|
-
masster/_version.py,sha256=
|
|
2
|
+
masster/_version.py,sha256=dkqPLCQGfsGL65orxLHNgDpbEE9aMOWq4b_vYspojyk,256
|
|
3
3
|
masster/chromatogram.py,sha256=iYpdv8C17zVnlWvOFgAn9ns2uFGiF-GgoYf5QVVAbHs,19319
|
|
4
4
|
masster/logger.py,sha256=tR65N23zfrNpcZNbZm2ot_Aual9XrGB1MWjLrovZkMs,16749
|
|
5
5
|
masster/spectrum.py,sha256=XJSUrqXZSzfpWnD8v5IMClXMRZLKLYIk014qaMOS9_k,49738
|
|
@@ -8,6 +8,7 @@ masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_DDA_OT_C-MiLUT_Q
|
|
|
8
8
|
masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v6_r38_01.sample5,sha256=dSd2cIgYYdRcNSzkhqlZCeWKi3x8Hhhcx8BFMuiVG4c,11382948
|
|
9
9
|
masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C008_v7_r37_01.sample5,sha256=wER8CHSBz54Yx1kwmU7ghPPWVwYvxv_lXGB8-8a1xpQ,9508434
|
|
10
10
|
masster/data/dda/20250530_VH_IQX_KW_RP_HSST3_100mm_12min_pos_v4_MS1_C-MiLUT_C017_v5_r99_01.sample5,sha256=h2OOAWWTwKXzTNewhiYeL-cMYdp_JYLPya8Q9Nv9Lvw,12389587
|
|
11
|
+
masster/data/libs/aa.csv,sha256=Sja1DyMsiaM2NfLcct4kAAcXYwPCukJJW8sDkup9w_c,1924
|
|
11
12
|
masster/data/libs/ccm.csv,sha256=Q6nylV1152uTpX-ydqWeGrc6L9kgv45xN_fBZ4f7Tvo,12754
|
|
12
13
|
masster/data/libs/urine.csv,sha256=iRrR4N8Wzb8KDhHJA4LqoQC35pp93FSaOKvXPrgFHis,653736
|
|
13
14
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data,sha256=01vC6m__Qqm2rLvlTMZoeKIKowFvovBTUnrNl8Uav3E,24576
|
|
@@ -15,7 +16,7 @@ masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecR
|
|
|
15
16
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan,sha256=ahi1Y3UhAj9Bj4Q2MlbgPekNdkJvMOoMXVOoR6CeIxc,13881220
|
|
16
17
|
masster/data/wiff/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2,sha256=TFB0HW4Agkig6yht7FtgjUdbXax8jjKaHpSZSvuU5vs,3252224
|
|
17
18
|
masster/lib/__init__.py,sha256=TcePNx3SYZHz6763TL9Sg4gUNXaRWjlrOtyS6vsu-hg,178
|
|
18
|
-
masster/lib/lib.py,sha256=
|
|
19
|
+
masster/lib/lib.py,sha256=SSN06UtiM-hIdjS3eCiIHsJ_8S4YHRGOLGmdPIh-efo,27481
|
|
19
20
|
masster/sample/__init__.py,sha256=HL0m1ept0PMAYUCQtDDnkdOS12IFl6oLAq4TZQz83uY,170
|
|
20
21
|
masster/sample/adducts.py,sha256=S7meba3L1tSdjoDhkSiTI71H2NJLu4i1dtJwfDKWI1M,32591
|
|
21
22
|
masster/sample/h5.py,sha256=B0gAmhrnoFoybotqsqiT8s-PkeZWUdIQfI-4cnM52Zc,115430
|
|
@@ -39,22 +40,22 @@ masster/sample/defaults/sample_def.py,sha256=keoXyMyrm_iLgbYqfIbqCpJ3XHBVlNwCNmb
|
|
|
39
40
|
masster/study/__init__.py,sha256=55axdFuqRX4aXtJ8ocnhcLB32fNtmmJpCi58moO0r4g,237
|
|
40
41
|
masster/study/analysis.py,sha256=L-wXBnGZCLB5UUDrjIdOiMG9zdej3Tw_SftcEmmTukM,84264
|
|
41
42
|
masster/study/export.py,sha256=Rp1vc5iDl-XFWo_RBVCJDGBNSKakq9f8aC2FeUCP9GA,59398
|
|
42
|
-
masster/study/h5.py,sha256=
|
|
43
|
+
masster/study/h5.py,sha256=6_nyjMGg_dkKkrx_Mv77wGg5SmWsVOZxu7HZasoXbRU,84916
|
|
43
44
|
masster/study/helpers.py,sha256=dU2YxAGPmu1w55mpcgNoHPpg2fNW-vK944aJy3YwLsU,163555
|
|
44
|
-
masster/study/id.py,sha256=
|
|
45
|
-
masster/study/load.py,sha256=
|
|
46
|
-
masster/study/merge.py,sha256=
|
|
45
|
+
masster/study/id.py,sha256=r_vZQYNxqNXf_pjgk_CLkl1doLnLa956mTuVmlHN52o,80075
|
|
46
|
+
masster/study/load.py,sha256=W4mljmYVR71sas4no7vKWIVfdnQjb-rTcEUhE0ZMr0k,71696
|
|
47
|
+
masster/study/merge.py,sha256=XF4NxNuLSxwf2j1__ReIInXVRGDRoSHFeKdcCSayKU4,164298
|
|
47
48
|
masster/study/parameters.py,sha256=bTvmcwX9INxzcrEAmTiFH8qeWVhwkvMTZjuP394pz5o,3279
|
|
48
49
|
masster/study/plot.py,sha256=pAN5uQKYPUpupQVtKBloWjKOKpM_C9o2e3VWkJ-aZN8,102041
|
|
49
|
-
masster/study/processing.py,sha256=
|
|
50
|
+
masster/study/processing.py,sha256=TKeTzRLmaMxUKCt66pXPfx_7xc-R5__ZwEZdFHOxg6A,55916
|
|
50
51
|
masster/study/save.py,sha256=47AP518epJJ9TjaGGyrLKsMsyjIk8_J4ka7bmsnRtFQ,9268
|
|
51
|
-
masster/study/study.py,sha256=
|
|
52
|
-
masster/study/study5_schema.json,sha256=
|
|
52
|
+
masster/study/study.py,sha256=vbP_bPa62-KYN0OTUN6PpSyCoFcW-TdbLbx67ShkEx0,42930
|
|
53
|
+
masster/study/study5_schema.json,sha256=0IZxM9VVI0TUlx74BPzJDT44kySi6NZZ6iLR0j8bU_s,7736
|
|
53
54
|
masster/study/defaults/__init__.py,sha256=m3Z5KXGqsTdh7GjYzZoENERt39yRg0ceVRV1DeCt1P0,610
|
|
54
55
|
masster/study/defaults/align_def.py,sha256=hHQbGgsOqMRHHr0Wn8Onr8XeaRz3-fFE0qGE-OMst80,20324
|
|
55
56
|
masster/study/defaults/export_def.py,sha256=eXl3h4aoLX88XkHTpqahLd-QZ2gjUqrmjq8IJULXeWo,1203
|
|
56
57
|
masster/study/defaults/fill_chrom_def.py,sha256=hB6-tyC9bhx-IpGj2HC8FinQdW4VLYj_pn5t1rlj-Ew,8887
|
|
57
|
-
masster/study/defaults/fill_def.py,sha256=
|
|
58
|
+
masster/study/defaults/fill_def.py,sha256=H-ZNKyiXxBLWdLoCMqxfvphNyc9wrDVFMC7TyRNYEm0,8869
|
|
58
59
|
masster/study/defaults/find_consensus_def.py,sha256=2KRRMsCDP7pwNrLCC6eI5uQgMXqiNdiI6pSvxNJ8L5M,8598
|
|
59
60
|
masster/study/defaults/find_ms2_def.py,sha256=RL0DFG41wQ05U8UQKUGr3vzSl3mU0m0knQus8DpSoJE,5070
|
|
60
61
|
masster/study/defaults/identify_def.py,sha256=96rxoCAPQj_yX-3mRoD2LTkTLJgG27eJQqwarLv5jL0,10580
|
|
@@ -66,8 +67,8 @@ masster/wizard/README.md,sha256=mL1A3YWJZOefpJ6D0-HqGLkVRmUlOpwyVFdvJBeeoZM,1414
|
|
|
66
67
|
masster/wizard/__init__.py,sha256=a2hcZnHASjfuw1lqZhZnvTR58rc33rRnoGAY_JfvGhI,683
|
|
67
68
|
masster/wizard/example.py,sha256=xEZFTH9UZ8HKOm6s3JL8Js0Uw5ChnISWBHSZCL32vsM,7983
|
|
68
69
|
masster/wizard/wizard.py,sha256=esgaifLRyaGxytif9qOkTy-21VxlUQxrvl47K-l-BpE,37666
|
|
69
|
-
masster-0.5.
|
|
70
|
-
masster-0.5.
|
|
71
|
-
masster-0.5.
|
|
72
|
-
masster-0.5.
|
|
73
|
-
masster-0.5.
|
|
70
|
+
masster-0.5.1.dist-info/METADATA,sha256=01v713yHW9RJPqFXY89wd5e21Ls3crfs6kEBDhDrUlc,45113
|
|
71
|
+
masster-0.5.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
72
|
+
masster-0.5.1.dist-info/entry_points.txt,sha256=ZHguQ_vPmdbpqq2uGtmEOLJfgP-DQ1T0c07Lxh30wc8,58
|
|
73
|
+
masster-0.5.1.dist-info/licenses/LICENSE,sha256=bx5iLIKjgAdYQ7sISn7DsfHRKkoCUm1154sJJKhgqnU,35184
|
|
74
|
+
masster-0.5.1.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|