masster 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of masster might be problematic. Click here for more details.
- masster/_version.py +1 -1
- masster/sample/h5.py +18 -2
- masster/sample/sample5_schema.json +76 -58
- masster/study/h5.py +317 -138
- masster/study/helpers.py +6 -39
- masster/study/load.py +23 -134
- masster/study/study.py +29 -11
- {masster-0.2.0.dist-info → masster-0.2.2.dist-info}/METADATA +31 -55
- {masster-0.2.0.dist-info → masster-0.2.2.dist-info}/RECORD +12 -12
- {masster-0.2.0.dist-info → masster-0.2.2.dist-info}/WHEEL +0 -0
- {masster-0.2.0.dist-info → masster-0.2.2.dist-info}/entry_points.txt +0 -0
- {masster-0.2.0.dist-info → masster-0.2.2.dist-info}/licenses/LICENSE +0 -0
masster/_version.py
CHANGED
masster/sample/h5.py
CHANGED
|
@@ -309,7 +309,7 @@ def _load_sample5(self, filename: str, map: bool = True):
|
|
|
309
309
|
missing_columns = []
|
|
310
310
|
for col in schema.get("scans_df", {}).get("columns", []):
|
|
311
311
|
if col not in scans_group:
|
|
312
|
-
self.logger.
|
|
312
|
+
self.logger.debug(f"Column '{col}' not found in sample5/scans.")
|
|
313
313
|
data[col] = None
|
|
314
314
|
missing_columns.append(col)
|
|
315
315
|
continue
|
|
@@ -444,6 +444,14 @@ def _load_sample5(self, filename: str, map: bool = True):
|
|
|
444
444
|
f"Column '{col}' in scans_df not found in schema, keeping original type.",
|
|
445
445
|
)
|
|
446
446
|
|
|
447
|
+
# Ensure column order matches schema order
|
|
448
|
+
if "scans_df" in schema and "columns" in schema["scans_df"]:
|
|
449
|
+
schema_column_order = list(schema["scans_df"]["columns"].keys())
|
|
450
|
+
# Only reorder columns that exist in both schema and DataFrame
|
|
451
|
+
existing_columns = [col for col in schema_column_order if col in self.scans_df.columns]
|
|
452
|
+
if existing_columns:
|
|
453
|
+
self.scans_df = self.scans_df.select(existing_columns)
|
|
454
|
+
|
|
447
455
|
else:
|
|
448
456
|
self.scans_df = None
|
|
449
457
|
else:
|
|
@@ -457,7 +465,7 @@ def _load_sample5(self, filename: str, map: bool = True):
|
|
|
457
465
|
missing_columns = []
|
|
458
466
|
for col in schema.get("features_df", {}).get("columns", []):
|
|
459
467
|
if col not in features_group:
|
|
460
|
-
self.logger.
|
|
468
|
+
self.logger.debug(
|
|
461
469
|
f"Column '{col}' not found in sample5/features.",
|
|
462
470
|
)
|
|
463
471
|
data[col] = None
|
|
@@ -804,6 +812,14 @@ def _load_sample5(self, filename: str, map: bool = True):
|
|
|
804
812
|
pl.col(col).fill_nan(None).alias(col),
|
|
805
813
|
])
|
|
806
814
|
|
|
815
|
+
# Ensure column order matches schema order
|
|
816
|
+
if "features_df" in schema and "columns" in schema["features_df"]:
|
|
817
|
+
schema_column_order = list(schema["features_df"]["columns"].keys())
|
|
818
|
+
# Only reorder columns that exist in both schema and DataFrame
|
|
819
|
+
existing_columns = [col for col in schema_column_order if col in self.features_df.columns]
|
|
820
|
+
if existing_columns:
|
|
821
|
+
self.features_df = self.features_df.select(existing_columns)
|
|
822
|
+
|
|
807
823
|
else:
|
|
808
824
|
self.features_df = None
|
|
809
825
|
else:
|
|
@@ -1,80 +1,80 @@
|
|
|
1
1
|
{
|
|
2
2
|
"features_df": {
|
|
3
3
|
"columns": {
|
|
4
|
-
"
|
|
5
|
-
"dtype": "pl.Utf8"
|
|
6
|
-
},
|
|
7
|
-
"adduct_group": {
|
|
4
|
+
"feature_uid": {
|
|
8
5
|
"dtype": "pl.Int64"
|
|
9
6
|
},
|
|
10
|
-
"
|
|
11
|
-
"dtype": "pl.
|
|
7
|
+
"feature_id": {
|
|
8
|
+
"dtype": "pl.Utf8"
|
|
12
9
|
},
|
|
13
|
-
"
|
|
14
|
-
"dtype": "pl.
|
|
10
|
+
"mz": {
|
|
11
|
+
"dtype": "pl.Float64"
|
|
15
12
|
},
|
|
16
|
-
"
|
|
17
|
-
"dtype": "pl.
|
|
13
|
+
"rt": {
|
|
14
|
+
"dtype": "pl.Float64"
|
|
18
15
|
},
|
|
19
|
-
"
|
|
16
|
+
"rt_original": {
|
|
20
17
|
"dtype": "pl.Float64"
|
|
21
18
|
},
|
|
22
|
-
"
|
|
19
|
+
"rt_start": {
|
|
23
20
|
"dtype": "pl.Float64"
|
|
24
21
|
},
|
|
25
|
-
"
|
|
22
|
+
"rt_end": {
|
|
26
23
|
"dtype": "pl.Float64"
|
|
27
24
|
},
|
|
28
|
-
"
|
|
25
|
+
"rt_delta": {
|
|
29
26
|
"dtype": "pl.Float64"
|
|
30
27
|
},
|
|
31
|
-
"
|
|
32
|
-
"dtype": "pl.
|
|
28
|
+
"mz_start": {
|
|
29
|
+
"dtype": "pl.Float64"
|
|
33
30
|
},
|
|
34
|
-
"
|
|
35
|
-
"dtype": "pl.
|
|
31
|
+
"mz_end": {
|
|
32
|
+
"dtype": "pl.Float64"
|
|
36
33
|
},
|
|
37
34
|
"inty": {
|
|
38
35
|
"dtype": "pl.Float64"
|
|
39
36
|
},
|
|
37
|
+
"quality": {
|
|
38
|
+
"dtype": "pl.Float64"
|
|
39
|
+
},
|
|
40
|
+
"charge": {
|
|
41
|
+
"dtype": "pl.Int32"
|
|
42
|
+
},
|
|
40
43
|
"iso": {
|
|
41
44
|
"dtype": "pl.Int64"
|
|
42
45
|
},
|
|
43
46
|
"iso_of": {
|
|
44
47
|
"dtype": "pl.Int64"
|
|
45
48
|
},
|
|
46
|
-
"
|
|
47
|
-
"dtype": "pl.
|
|
48
|
-
},
|
|
49
|
-
"ms2_specs": {
|
|
50
|
-
"dtype": "pl.Object"
|
|
49
|
+
"adduct_group": {
|
|
50
|
+
"dtype": "pl.Int64"
|
|
51
51
|
},
|
|
52
|
-
"
|
|
53
|
-
"dtype": "pl.
|
|
52
|
+
"adduct": {
|
|
53
|
+
"dtype": "pl.Utf8"
|
|
54
54
|
},
|
|
55
|
-
"
|
|
55
|
+
"adduct_mass": {
|
|
56
56
|
"dtype": "pl.Float64"
|
|
57
57
|
},
|
|
58
|
-
"
|
|
59
|
-
"dtype": "pl.
|
|
58
|
+
"chrom": {
|
|
59
|
+
"dtype": "pl.Object"
|
|
60
60
|
},
|
|
61
|
-
"
|
|
61
|
+
"chrom_coherence": {
|
|
62
62
|
"dtype": "pl.Float64"
|
|
63
63
|
},
|
|
64
|
-
"
|
|
64
|
+
"chrom_prominence": {
|
|
65
65
|
"dtype": "pl.Float64"
|
|
66
66
|
},
|
|
67
|
-
"
|
|
67
|
+
"chrom_prominence_scaled": {
|
|
68
68
|
"dtype": "pl.Float64"
|
|
69
69
|
},
|
|
70
|
-
"
|
|
70
|
+
"chrom_height_scaled": {
|
|
71
71
|
"dtype": "pl.Float64"
|
|
72
72
|
},
|
|
73
|
-
"
|
|
74
|
-
"dtype": "pl.
|
|
73
|
+
"ms2_scans": {
|
|
74
|
+
"dtype": "pl.Object"
|
|
75
75
|
},
|
|
76
|
-
"
|
|
77
|
-
"dtype": "pl.
|
|
76
|
+
"ms2_specs": {
|
|
77
|
+
"dtype": "pl.Object"
|
|
78
78
|
}
|
|
79
79
|
}
|
|
80
80
|
},
|
|
@@ -100,62 +100,80 @@
|
|
|
100
100
|
},
|
|
101
101
|
"scans_df": {
|
|
102
102
|
"columns": {
|
|
103
|
-
"
|
|
104
|
-
"dtype": "pl.
|
|
105
|
-
},
|
|
106
|
-
"comment": {
|
|
107
|
-
"dtype": "pl.Utf8"
|
|
103
|
+
"scan_uid": {
|
|
104
|
+
"dtype": "pl.Int64"
|
|
108
105
|
},
|
|
109
106
|
"cycle": {
|
|
110
107
|
"dtype": "pl.Int64"
|
|
111
108
|
},
|
|
112
|
-
"
|
|
113
|
-
"dtype": "pl.Float64"
|
|
114
|
-
},
|
|
115
|
-
"feature_uid": {
|
|
109
|
+
"ms_level": {
|
|
116
110
|
"dtype": "pl.Int64"
|
|
117
111
|
},
|
|
118
|
-
"
|
|
119
|
-
"dtype": "pl.
|
|
112
|
+
"rt": {
|
|
113
|
+
"dtype": "pl.Float64"
|
|
120
114
|
},
|
|
121
|
-
"
|
|
115
|
+
"inty_tot": {
|
|
122
116
|
"dtype": "pl.Float64"
|
|
123
117
|
},
|
|
124
118
|
"inty_min": {
|
|
125
119
|
"dtype": "pl.Float64"
|
|
126
120
|
},
|
|
127
|
-
"
|
|
121
|
+
"inty_max": {
|
|
128
122
|
"dtype": "pl.Float64"
|
|
129
123
|
},
|
|
130
|
-
"
|
|
131
|
-
"dtype": "pl.Int64"
|
|
132
|
-
},
|
|
133
|
-
"mz_max": {
|
|
124
|
+
"bl": {
|
|
134
125
|
"dtype": "pl.Float64"
|
|
135
126
|
},
|
|
136
127
|
"mz_min": {
|
|
137
128
|
"dtype": "pl.Float64"
|
|
138
129
|
},
|
|
130
|
+
"mz_max": {
|
|
131
|
+
"dtype": "pl.Float64"
|
|
132
|
+
},
|
|
133
|
+
"comment": {
|
|
134
|
+
"dtype": "pl.Utf8"
|
|
135
|
+
},
|
|
139
136
|
"name": {
|
|
140
137
|
"dtype": "pl.Utf8"
|
|
141
138
|
},
|
|
142
|
-
"
|
|
143
|
-
"dtype": "pl.
|
|
139
|
+
"id": {
|
|
140
|
+
"dtype": "pl.Utf8"
|
|
144
141
|
},
|
|
145
142
|
"prec_mz": {
|
|
146
143
|
"dtype": "pl.Float64"
|
|
147
144
|
},
|
|
145
|
+
"prec_mz_min": {
|
|
146
|
+
"dtype": "pl.Float64"
|
|
147
|
+
},
|
|
148
148
|
"prec_mz_max": {
|
|
149
149
|
"dtype": "pl.Float64"
|
|
150
150
|
},
|
|
151
|
-
"
|
|
151
|
+
"prec_inty": {
|
|
152
152
|
"dtype": "pl.Float64"
|
|
153
153
|
},
|
|
154
|
-
"
|
|
154
|
+
"energy": {
|
|
155
155
|
"dtype": "pl.Float64"
|
|
156
156
|
},
|
|
157
|
-
"
|
|
157
|
+
"feature_uid": {
|
|
158
158
|
"dtype": "pl.Int64"
|
|
159
|
+
},
|
|
160
|
+
"ms2_n": {
|
|
161
|
+
"dtype": "pl.Int64"
|
|
162
|
+
},
|
|
163
|
+
"time_cycle": {
|
|
164
|
+
"dtype": "pl.Float64"
|
|
165
|
+
},
|
|
166
|
+
"time_ms1_to_ms1": {
|
|
167
|
+
"dtype": "pl.Float64"
|
|
168
|
+
},
|
|
169
|
+
"time_ms1_to_ms2": {
|
|
170
|
+
"dtype": "pl.Float64"
|
|
171
|
+
},
|
|
172
|
+
"time_ms2_to_ms2": {
|
|
173
|
+
"dtype": "pl.Float64"
|
|
174
|
+
},
|
|
175
|
+
"time_ms2_to_ms1": {
|
|
176
|
+
"dtype": "pl.Float64"
|
|
159
177
|
}
|
|
160
178
|
}
|
|
161
179
|
},
|