masster 0.2.0__tar.gz → 0.2.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- {masster-0.2.0 → masster-0.2.1}/PKG-INFO +1 -1
- {masster-0.2.0 → masster-0.2.1}/pyproject.toml +1 -1
- {masster-0.2.0 → masster-0.2.1}/src/masster/_version.py +1 -1
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/h5.py +18 -2
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/sample5_schema.json +76 -58
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/h5.py +0 -1
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/helpers.py +6 -39
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/study.py +26 -8
- {masster-0.2.0 → masster-0.2.1}/uv.lock +1 -1
- {masster-0.2.0 → masster-0.2.1}/.github/workflows/publish.yml +0 -0
- {masster-0.2.0 → masster-0.2.1}/.github/workflows/security.yml +0 -0
- {masster-0.2.0 → masster-0.2.1}/.github/workflows/test.yml +0 -0
- {masster-0.2.0 → masster-0.2.1}/.gitignore +0 -0
- {masster-0.2.0 → masster-0.2.1}/.pre-commit-config.yaml +0 -0
- {masster-0.2.0 → masster-0.2.1}/LICENSE +0 -0
- {masster-0.2.0 → masster-0.2.1}/Makefile +0 -0
- {masster-0.2.0 → masster-0.2.1}/README.md +0 -0
- {masster-0.2.0 → masster-0.2.1}/TESTING.md +0 -0
- {masster-0.2.0 → masster-0.2.1}/demo/example_batch_process.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/demo/example_sample_process.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/__init__.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/chromatogram.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.mzML +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.timeseries.data +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff.scan +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/data/examples/2025_01_14_VW_7600_LpMx_DBS_CID_2min_TOP15_030msecMS1_005msecReac_CE35_DBS-ON_3.wiff2 +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/logger.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/__init__.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/defaults/__init__.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/defaults/find_adducts_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/defaults/find_features_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/defaults/find_ms2_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/defaults/get_spectrum_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/defaults/sample_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/helpers.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/load.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/parameters.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/plot.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/processing.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/sample.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/sample/save.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/spectrum.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/__init__.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/defaults/__init__.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/defaults/align_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/defaults/export_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/defaults/fill_chrom_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/defaults/find_consensus_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/defaults/find_ms2_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/defaults/integrate_chrom_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/defaults/study_def.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/export.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/load.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/parameters.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/plot.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/processing.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/save.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/src/masster/study/study5_schema.json +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/conftest.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_chromatogram.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_defaults.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_imports.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_integration.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_logger.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_parameters.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_sample.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_spectrum.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_study.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tests/test_version.py +0 -0
- {masster-0.2.0 → masster-0.2.1}/tox.ini +0 -0
|
@@ -309,7 +309,7 @@ def _load_sample5(self, filename: str, map: bool = True):
|
|
|
309
309
|
missing_columns = []
|
|
310
310
|
for col in schema.get("scans_df", {}).get("columns", []):
|
|
311
311
|
if col not in scans_group:
|
|
312
|
-
self.logger.
|
|
312
|
+
self.logger.debug(f"Column '{col}' not found in sample5/scans.")
|
|
313
313
|
data[col] = None
|
|
314
314
|
missing_columns.append(col)
|
|
315
315
|
continue
|
|
@@ -444,6 +444,14 @@ def _load_sample5(self, filename: str, map: bool = True):
|
|
|
444
444
|
f"Column '{col}' in scans_df not found in schema, keeping original type.",
|
|
445
445
|
)
|
|
446
446
|
|
|
447
|
+
# Ensure column order matches schema order
|
|
448
|
+
if "scans_df" in schema and "columns" in schema["scans_df"]:
|
|
449
|
+
schema_column_order = list(schema["scans_df"]["columns"].keys())
|
|
450
|
+
# Only reorder columns that exist in both schema and DataFrame
|
|
451
|
+
existing_columns = [col for col in schema_column_order if col in self.scans_df.columns]
|
|
452
|
+
if existing_columns:
|
|
453
|
+
self.scans_df = self.scans_df.select(existing_columns)
|
|
454
|
+
|
|
447
455
|
else:
|
|
448
456
|
self.scans_df = None
|
|
449
457
|
else:
|
|
@@ -457,7 +465,7 @@ def _load_sample5(self, filename: str, map: bool = True):
|
|
|
457
465
|
missing_columns = []
|
|
458
466
|
for col in schema.get("features_df", {}).get("columns", []):
|
|
459
467
|
if col not in features_group:
|
|
460
|
-
self.logger.
|
|
468
|
+
self.logger.debug(
|
|
461
469
|
f"Column '{col}' not found in sample5/features.",
|
|
462
470
|
)
|
|
463
471
|
data[col] = None
|
|
@@ -804,6 +812,14 @@ def _load_sample5(self, filename: str, map: bool = True):
|
|
|
804
812
|
pl.col(col).fill_nan(None).alias(col),
|
|
805
813
|
])
|
|
806
814
|
|
|
815
|
+
# Ensure column order matches schema order
|
|
816
|
+
if "features_df" in schema and "columns" in schema["features_df"]:
|
|
817
|
+
schema_column_order = list(schema["features_df"]["columns"].keys())
|
|
818
|
+
# Only reorder columns that exist in both schema and DataFrame
|
|
819
|
+
existing_columns = [col for col in schema_column_order if col in self.features_df.columns]
|
|
820
|
+
if existing_columns:
|
|
821
|
+
self.features_df = self.features_df.select(existing_columns)
|
|
822
|
+
|
|
807
823
|
else:
|
|
808
824
|
self.features_df = None
|
|
809
825
|
else:
|
|
@@ -1,80 +1,80 @@
|
|
|
1
1
|
{
|
|
2
2
|
"features_df": {
|
|
3
3
|
"columns": {
|
|
4
|
-
"
|
|
5
|
-
"dtype": "pl.Utf8"
|
|
6
|
-
},
|
|
7
|
-
"adduct_group": {
|
|
4
|
+
"feature_uid": {
|
|
8
5
|
"dtype": "pl.Int64"
|
|
9
6
|
},
|
|
10
|
-
"
|
|
11
|
-
"dtype": "pl.
|
|
7
|
+
"feature_id": {
|
|
8
|
+
"dtype": "pl.Utf8"
|
|
12
9
|
},
|
|
13
|
-
"
|
|
14
|
-
"dtype": "pl.
|
|
10
|
+
"mz": {
|
|
11
|
+
"dtype": "pl.Float64"
|
|
15
12
|
},
|
|
16
|
-
"
|
|
17
|
-
"dtype": "pl.
|
|
13
|
+
"rt": {
|
|
14
|
+
"dtype": "pl.Float64"
|
|
18
15
|
},
|
|
19
|
-
"
|
|
16
|
+
"rt_original": {
|
|
20
17
|
"dtype": "pl.Float64"
|
|
21
18
|
},
|
|
22
|
-
"
|
|
19
|
+
"rt_start": {
|
|
23
20
|
"dtype": "pl.Float64"
|
|
24
21
|
},
|
|
25
|
-
"
|
|
22
|
+
"rt_end": {
|
|
26
23
|
"dtype": "pl.Float64"
|
|
27
24
|
},
|
|
28
|
-
"
|
|
25
|
+
"rt_delta": {
|
|
29
26
|
"dtype": "pl.Float64"
|
|
30
27
|
},
|
|
31
|
-
"
|
|
32
|
-
"dtype": "pl.
|
|
28
|
+
"mz_start": {
|
|
29
|
+
"dtype": "pl.Float64"
|
|
33
30
|
},
|
|
34
|
-
"
|
|
35
|
-
"dtype": "pl.
|
|
31
|
+
"mz_end": {
|
|
32
|
+
"dtype": "pl.Float64"
|
|
36
33
|
},
|
|
37
34
|
"inty": {
|
|
38
35
|
"dtype": "pl.Float64"
|
|
39
36
|
},
|
|
37
|
+
"quality": {
|
|
38
|
+
"dtype": "pl.Float64"
|
|
39
|
+
},
|
|
40
|
+
"charge": {
|
|
41
|
+
"dtype": "pl.Int32"
|
|
42
|
+
},
|
|
40
43
|
"iso": {
|
|
41
44
|
"dtype": "pl.Int64"
|
|
42
45
|
},
|
|
43
46
|
"iso_of": {
|
|
44
47
|
"dtype": "pl.Int64"
|
|
45
48
|
},
|
|
46
|
-
"
|
|
47
|
-
"dtype": "pl.
|
|
48
|
-
},
|
|
49
|
-
"ms2_specs": {
|
|
50
|
-
"dtype": "pl.Object"
|
|
49
|
+
"adduct_group": {
|
|
50
|
+
"dtype": "pl.Int64"
|
|
51
51
|
},
|
|
52
|
-
"
|
|
53
|
-
"dtype": "pl.
|
|
52
|
+
"adduct": {
|
|
53
|
+
"dtype": "pl.Utf8"
|
|
54
54
|
},
|
|
55
|
-
"
|
|
55
|
+
"adduct_mass": {
|
|
56
56
|
"dtype": "pl.Float64"
|
|
57
57
|
},
|
|
58
|
-
"
|
|
59
|
-
"dtype": "pl.
|
|
58
|
+
"chrom": {
|
|
59
|
+
"dtype": "pl.Object"
|
|
60
60
|
},
|
|
61
|
-
"
|
|
61
|
+
"chrom_coherence": {
|
|
62
62
|
"dtype": "pl.Float64"
|
|
63
63
|
},
|
|
64
|
-
"
|
|
64
|
+
"chrom_prominence": {
|
|
65
65
|
"dtype": "pl.Float64"
|
|
66
66
|
},
|
|
67
|
-
"
|
|
67
|
+
"chrom_prominence_scaled": {
|
|
68
68
|
"dtype": "pl.Float64"
|
|
69
69
|
},
|
|
70
|
-
"
|
|
70
|
+
"chrom_height_scaled": {
|
|
71
71
|
"dtype": "pl.Float64"
|
|
72
72
|
},
|
|
73
|
-
"
|
|
74
|
-
"dtype": "pl.
|
|
73
|
+
"ms2_scans": {
|
|
74
|
+
"dtype": "pl.Object"
|
|
75
75
|
},
|
|
76
|
-
"
|
|
77
|
-
"dtype": "pl.
|
|
76
|
+
"ms2_specs": {
|
|
77
|
+
"dtype": "pl.Object"
|
|
78
78
|
}
|
|
79
79
|
}
|
|
80
80
|
},
|
|
@@ -100,62 +100,80 @@
|
|
|
100
100
|
},
|
|
101
101
|
"scans_df": {
|
|
102
102
|
"columns": {
|
|
103
|
-
"
|
|
104
|
-
"dtype": "pl.
|
|
105
|
-
},
|
|
106
|
-
"comment": {
|
|
107
|
-
"dtype": "pl.Utf8"
|
|
103
|
+
"scan_uid": {
|
|
104
|
+
"dtype": "pl.Int64"
|
|
108
105
|
},
|
|
109
106
|
"cycle": {
|
|
110
107
|
"dtype": "pl.Int64"
|
|
111
108
|
},
|
|
112
|
-
"
|
|
113
|
-
"dtype": "pl.Float64"
|
|
114
|
-
},
|
|
115
|
-
"feature_uid": {
|
|
109
|
+
"ms_level": {
|
|
116
110
|
"dtype": "pl.Int64"
|
|
117
111
|
},
|
|
118
|
-
"
|
|
119
|
-
"dtype": "pl.
|
|
112
|
+
"rt": {
|
|
113
|
+
"dtype": "pl.Float64"
|
|
120
114
|
},
|
|
121
|
-
"
|
|
115
|
+
"inty_tot": {
|
|
122
116
|
"dtype": "pl.Float64"
|
|
123
117
|
},
|
|
124
118
|
"inty_min": {
|
|
125
119
|
"dtype": "pl.Float64"
|
|
126
120
|
},
|
|
127
|
-
"
|
|
121
|
+
"inty_max": {
|
|
128
122
|
"dtype": "pl.Float64"
|
|
129
123
|
},
|
|
130
|
-
"
|
|
131
|
-
"dtype": "pl.Int64"
|
|
132
|
-
},
|
|
133
|
-
"mz_max": {
|
|
124
|
+
"bl": {
|
|
134
125
|
"dtype": "pl.Float64"
|
|
135
126
|
},
|
|
136
127
|
"mz_min": {
|
|
137
128
|
"dtype": "pl.Float64"
|
|
138
129
|
},
|
|
130
|
+
"mz_max": {
|
|
131
|
+
"dtype": "pl.Float64"
|
|
132
|
+
},
|
|
133
|
+
"comment": {
|
|
134
|
+
"dtype": "pl.Utf8"
|
|
135
|
+
},
|
|
139
136
|
"name": {
|
|
140
137
|
"dtype": "pl.Utf8"
|
|
141
138
|
},
|
|
142
|
-
"
|
|
143
|
-
"dtype": "pl.
|
|
139
|
+
"id": {
|
|
140
|
+
"dtype": "pl.Utf8"
|
|
144
141
|
},
|
|
145
142
|
"prec_mz": {
|
|
146
143
|
"dtype": "pl.Float64"
|
|
147
144
|
},
|
|
145
|
+
"prec_mz_min": {
|
|
146
|
+
"dtype": "pl.Float64"
|
|
147
|
+
},
|
|
148
148
|
"prec_mz_max": {
|
|
149
149
|
"dtype": "pl.Float64"
|
|
150
150
|
},
|
|
151
|
-
"
|
|
151
|
+
"prec_inty": {
|
|
152
152
|
"dtype": "pl.Float64"
|
|
153
153
|
},
|
|
154
|
-
"
|
|
154
|
+
"energy": {
|
|
155
155
|
"dtype": "pl.Float64"
|
|
156
156
|
},
|
|
157
|
-
"
|
|
157
|
+
"feature_uid": {
|
|
158
158
|
"dtype": "pl.Int64"
|
|
159
|
+
},
|
|
160
|
+
"ms2_n": {
|
|
161
|
+
"dtype": "pl.Int64"
|
|
162
|
+
},
|
|
163
|
+
"time_cycle": {
|
|
164
|
+
"dtype": "pl.Float64"
|
|
165
|
+
},
|
|
166
|
+
"time_ms1_to_ms1": {
|
|
167
|
+
"dtype": "pl.Float64"
|
|
168
|
+
},
|
|
169
|
+
"time_ms1_to_ms2": {
|
|
170
|
+
"dtype": "pl.Float64"
|
|
171
|
+
},
|
|
172
|
+
"time_ms2_to_ms2": {
|
|
173
|
+
"dtype": "pl.Float64"
|
|
174
|
+
},
|
|
175
|
+
"time_ms2_to_ms1": {
|
|
176
|
+
"dtype": "pl.Float64"
|
|
159
177
|
}
|
|
160
178
|
}
|
|
161
179
|
},
|
|
@@ -530,7 +530,6 @@ def _save_study5(self, filename=None):
|
|
|
530
530
|
_save_dataframe_column(consensus_ms2_group, col, data, dtype, self.logger)
|
|
531
531
|
|
|
532
532
|
self.logger.info(f"Study saved to {filename}")
|
|
533
|
-
self.logger.info(f"Study saved to {filename}")
|
|
534
533
|
|
|
535
534
|
|
|
536
535
|
def _load_study5(self, filename=None):
|
|
@@ -114,45 +114,6 @@ def get_chrom(self, uids=None, samples=None):
|
|
|
114
114
|
# Return as Polars DataFrame (can handle complex objects like Chromatogram)
|
|
115
115
|
return df2_pivoted
|
|
116
116
|
|
|
117
|
-
'''
|
|
118
|
-
def migrate_adduct_columns(self):
|
|
119
|
-
"""
|
|
120
|
-
Migrate adduct_right and adduct_mass_right columns to adduct and adduct_mass.
|
|
121
|
-
This fixes an issue where join operations created _right suffixed columns.
|
|
122
|
-
"""
|
|
123
|
-
if self.features_df.is_empty():
|
|
124
|
-
return
|
|
125
|
-
|
|
126
|
-
# Check if we have the _right suffixed columns
|
|
127
|
-
has_adduct_right = "adduct_right" in self.features_df.columns
|
|
128
|
-
has_adduct_mass_right = "adduct_mass_right" in self.features_df.columns
|
|
129
|
-
has_adduct = "adduct" in self.features_df.columns
|
|
130
|
-
has_adduct_mass = "adduct_mass" in self.features_df.columns
|
|
131
|
-
|
|
132
|
-
if has_adduct_right or has_adduct_mass_right:
|
|
133
|
-
self.logger.info("Migrating adduct column names...")
|
|
134
|
-
|
|
135
|
-
# Start with all columns except those we're replacing/dropping
|
|
136
|
-
columns_to_keep = [
|
|
137
|
-
col
|
|
138
|
-
for col in self.features_df.columns
|
|
139
|
-
if col not in ["adduct_right", "adduct_mass_right", "adduct", "adduct_mass"]
|
|
140
|
-
]
|
|
141
|
-
|
|
142
|
-
# Add the migrated columns
|
|
143
|
-
if has_adduct_right:
|
|
144
|
-
columns_to_keep.append(pl.col("adduct_right").alias("adduct"))
|
|
145
|
-
if has_adduct_mass_right:
|
|
146
|
-
columns_to_keep.append(pl.col("adduct_mass_right").alias("adduct_mass"))
|
|
147
|
-
|
|
148
|
-
# Apply the migration
|
|
149
|
-
self.features_df = self.features_df.select(columns_to_keep)
|
|
150
|
-
|
|
151
|
-
self.logger.success("Adduct column migration completed.")
|
|
152
|
-
else:
|
|
153
|
-
self.logger.info("No adduct column migration needed.")
|
|
154
|
-
'''
|
|
155
|
-
|
|
156
117
|
def set_default_folder(self, folder):
|
|
157
118
|
"""
|
|
158
119
|
Set the default folder for saving and loading files.
|
|
@@ -448,6 +409,12 @@ def _get_sample_uids(self, samples=None, seed=42):
|
|
|
448
409
|
sample_uids = list(set(sample_uids))
|
|
449
410
|
return sample_uids
|
|
450
411
|
|
|
412
|
+
def get_orphans(self):
|
|
413
|
+
"""
|
|
414
|
+
Get all features that are not in the consensus mapping.
|
|
415
|
+
"""
|
|
416
|
+
not_in_consensus = self.features_df.filter(~self.features_df['feature_uid'].is_in(self.consensus_mapping_df['feature_uid'].to_list()))
|
|
417
|
+
return not_in_consensus
|
|
451
418
|
|
|
452
419
|
def compress(self):
|
|
453
420
|
"""
|
|
@@ -63,6 +63,7 @@ from masster.study.helpers import get_chrom
|
|
|
63
63
|
from masster.study.helpers import get_consensus
|
|
64
64
|
from masster.study.helpers import get_consensus_matches
|
|
65
65
|
from masster.study.helpers import get_consensus_matrix
|
|
66
|
+
from masster.study.helpers import get_orphans
|
|
66
67
|
from masster.study.helpers import get_gaps_matrix
|
|
67
68
|
from masster.study.helpers import get_gaps_stats
|
|
68
69
|
from masster.study.helpers import align_reset
|
|
@@ -273,6 +274,7 @@ class Study:
|
|
|
273
274
|
get_consensus_matrix = get_consensus_matrix
|
|
274
275
|
get_gaps_matrix = get_gaps_matrix
|
|
275
276
|
get_gaps_stats = get_gaps_stats
|
|
277
|
+
get_orphans = get_orphans
|
|
276
278
|
set_default_folder = set_default_folder
|
|
277
279
|
fill_chrom_parallel = fill_chrom_parallel
|
|
278
280
|
_process_sample_for_parallel_fill = _process_sample_for_parallel_fill
|
|
@@ -423,6 +425,12 @@ class Study:
|
|
|
423
425
|
mean_samples = 0
|
|
424
426
|
max_samples = 0
|
|
425
427
|
|
|
428
|
+
# Count only features where 'filled' == False
|
|
429
|
+
if not self.features_df.is_empty() and 'filled' in self.features_df.columns:
|
|
430
|
+
unfilled_features_count = self.features_df.filter(~self.features_df['filled']).height
|
|
431
|
+
else:
|
|
432
|
+
unfilled_features_count = 0
|
|
433
|
+
|
|
426
434
|
# Optimize chrom completeness calculation
|
|
427
435
|
if consensus_df_len > 0 and samples_df_len > 0 and not self.features_df.is_empty():
|
|
428
436
|
|
|
@@ -441,8 +449,17 @@ class Study:
|
|
|
441
449
|
chrom_completeness = (
|
|
442
450
|
non_null_chroms / total_possible if total_possible > 0 else 0
|
|
443
451
|
)
|
|
452
|
+
not_in_consensus = len(self.features_df.filter(~self.features_df['feature_uid'].is_in(self.consensus_mapping_df['feature_uid'].to_list())))
|
|
453
|
+
ratio_not_in_consensus_to_total = not_in_consensus / unfilled_features_count if unfilled_features_count > 0 else 0
|
|
454
|
+
ratio_in_consensus_to_total = (unfilled_features_count- not_in_consensus) / len(self.features_df) if len(self.features_df) > 0 else 0
|
|
455
|
+
|
|
444
456
|
else:
|
|
445
457
|
chrom_completeness = 0
|
|
458
|
+
not_in_consensus = 0
|
|
459
|
+
ratio_not_in_consensus_to_total = 0
|
|
460
|
+
ratio_in_consensus_to_total = 0
|
|
461
|
+
|
|
462
|
+
|
|
446
463
|
|
|
447
464
|
# calculate for how many consensus features there is at least one MS2 spectrum linked
|
|
448
465
|
consensus_with_ms2 = self.consensus_ms2.select(
|
|
@@ -458,17 +475,18 @@ class Study:
|
|
|
458
475
|
self.consensus_mapping_df.estimated_size()
|
|
459
476
|
)
|
|
460
477
|
|
|
461
|
-
# Build summary string efficiently
|
|
462
478
|
summary = (
|
|
463
479
|
f"Default folder: {self.default_folder}\n"
|
|
464
|
-
f"Consensus features: {consensus_df_len}\n"
|
|
465
480
|
f"Samples: {samples_df_len}\n"
|
|
466
|
-
f"
|
|
467
|
-
f"
|
|
468
|
-
f"
|
|
469
|
-
f"
|
|
470
|
-
f"
|
|
471
|
-
f"
|
|
481
|
+
f"Features: {unfilled_features_count}\n"
|
|
482
|
+
f"- in consensus: {ratio_in_consensus_to_total*100:.0f}%\n"
|
|
483
|
+
f"- non in consensus: {ratio_not_in_consensus_to_total*100:.0f}%\n"
|
|
484
|
+
f"Consensus: {consensus_df_len}\n"
|
|
485
|
+
f"- Min samples count: {min_samples:.0f}\n"
|
|
486
|
+
f"- Mean samples count: {mean_samples:.0f}\n"
|
|
487
|
+
f"- Max samples count: {max_samples:.0f}\n"
|
|
488
|
+
f"- with MS2: {consensus_with_ms2}\n"
|
|
489
|
+
f"Chrom completeness: {chrom_completeness*100:.0f}%\n"
|
|
472
490
|
f"Memory usage: {memory_usage / (1024 ** 2):.2f} MB\n"
|
|
473
491
|
)
|
|
474
492
|
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|