masster 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of masster might be problematic. Click here for more details.

masster/_version.py CHANGED
@@ -1,7 +1,7 @@
1
1
  from __future__ import annotations
2
2
 
3
3
 
4
- __version__ = "0.2.0"
4
+ __version__ = "0.2.2"
5
5
 
6
6
 
7
7
  def get_version():
masster/sample/h5.py CHANGED
@@ -309,7 +309,7 @@ def _load_sample5(self, filename: str, map: bool = True):
309
309
  missing_columns = []
310
310
  for col in schema.get("scans_df", {}).get("columns", []):
311
311
  if col not in scans_group:
312
- self.logger.warning(f"Column '{col}' not found in sample5/scans.")
312
+ self.logger.debug(f"Column '{col}' not found in sample5/scans.")
313
313
  data[col] = None
314
314
  missing_columns.append(col)
315
315
  continue
@@ -444,6 +444,14 @@ def _load_sample5(self, filename: str, map: bool = True):
444
444
  f"Column '{col}' in scans_df not found in schema, keeping original type.",
445
445
  )
446
446
 
447
+ # Ensure column order matches schema order
448
+ if "scans_df" in schema and "columns" in schema["scans_df"]:
449
+ schema_column_order = list(schema["scans_df"]["columns"].keys())
450
+ # Only reorder columns that exist in both schema and DataFrame
451
+ existing_columns = [col for col in schema_column_order if col in self.scans_df.columns]
452
+ if existing_columns:
453
+ self.scans_df = self.scans_df.select(existing_columns)
454
+
447
455
  else:
448
456
  self.scans_df = None
449
457
  else:
@@ -457,7 +465,7 @@ def _load_sample5(self, filename: str, map: bool = True):
457
465
  missing_columns = []
458
466
  for col in schema.get("features_df", {}).get("columns", []):
459
467
  if col not in features_group:
460
- self.logger.warning(
468
+ self.logger.debug(
461
469
  f"Column '{col}' not found in sample5/features.",
462
470
  )
463
471
  data[col] = None
@@ -804,6 +812,14 @@ def _load_sample5(self, filename: str, map: bool = True):
804
812
  pl.col(col).fill_nan(None).alias(col),
805
813
  ])
806
814
 
815
+ # Ensure column order matches schema order
816
+ if "features_df" in schema and "columns" in schema["features_df"]:
817
+ schema_column_order = list(schema["features_df"]["columns"].keys())
818
+ # Only reorder columns that exist in both schema and DataFrame
819
+ existing_columns = [col for col in schema_column_order if col in self.features_df.columns]
820
+ if existing_columns:
821
+ self.features_df = self.features_df.select(existing_columns)
822
+
807
823
  else:
808
824
  self.features_df = None
809
825
  else:
@@ -1,80 +1,80 @@
1
1
  {
2
2
  "features_df": {
3
3
  "columns": {
4
- "adduct": {
5
- "dtype": "pl.Utf8"
6
- },
7
- "adduct_group": {
4
+ "feature_uid": {
8
5
  "dtype": "pl.Int64"
9
6
  },
10
- "adduct_mass": {
11
- "dtype": "pl.Float64"
7
+ "feature_id": {
8
+ "dtype": "pl.Utf8"
12
9
  },
13
- "charge": {
14
- "dtype": "pl.Int32"
10
+ "mz": {
11
+ "dtype": "pl.Float64"
15
12
  },
16
- "chrom": {
17
- "dtype": "pl.Object"
13
+ "rt": {
14
+ "dtype": "pl.Float64"
18
15
  },
19
- "chrom_coherence": {
16
+ "rt_original": {
20
17
  "dtype": "pl.Float64"
21
18
  },
22
- "chrom_height_scaled": {
19
+ "rt_start": {
23
20
  "dtype": "pl.Float64"
24
21
  },
25
- "chrom_prominence": {
22
+ "rt_end": {
26
23
  "dtype": "pl.Float64"
27
24
  },
28
- "chrom_prominence_scaled": {
25
+ "rt_delta": {
29
26
  "dtype": "pl.Float64"
30
27
  },
31
- "feature_id": {
32
- "dtype": "pl.Utf8"
28
+ "mz_start": {
29
+ "dtype": "pl.Float64"
33
30
  },
34
- "feature_uid": {
35
- "dtype": "pl.Int64"
31
+ "mz_end": {
32
+ "dtype": "pl.Float64"
36
33
  },
37
34
  "inty": {
38
35
  "dtype": "pl.Float64"
39
36
  },
37
+ "quality": {
38
+ "dtype": "pl.Float64"
39
+ },
40
+ "charge": {
41
+ "dtype": "pl.Int32"
42
+ },
40
43
  "iso": {
41
44
  "dtype": "pl.Int64"
42
45
  },
43
46
  "iso_of": {
44
47
  "dtype": "pl.Int64"
45
48
  },
46
- "ms2_scans": {
47
- "dtype": "pl.Object"
48
- },
49
- "ms2_specs": {
50
- "dtype": "pl.Object"
49
+ "adduct_group": {
50
+ "dtype": "pl.Int64"
51
51
  },
52
- "mz": {
53
- "dtype": "pl.Float64"
52
+ "adduct": {
53
+ "dtype": "pl.Utf8"
54
54
  },
55
- "mz_end": {
55
+ "adduct_mass": {
56
56
  "dtype": "pl.Float64"
57
57
  },
58
- "mz_start": {
59
- "dtype": "pl.Float64"
58
+ "chrom": {
59
+ "dtype": "pl.Object"
60
60
  },
61
- "quality": {
61
+ "chrom_coherence": {
62
62
  "dtype": "pl.Float64"
63
63
  },
64
- "rt": {
64
+ "chrom_prominence": {
65
65
  "dtype": "pl.Float64"
66
66
  },
67
- "rt_delta": {
67
+ "chrom_prominence_scaled": {
68
68
  "dtype": "pl.Float64"
69
69
  },
70
- "rt_end": {
70
+ "chrom_height_scaled": {
71
71
  "dtype": "pl.Float64"
72
72
  },
73
- "rt_original": {
74
- "dtype": "pl.Float64"
73
+ "ms2_scans": {
74
+ "dtype": "pl.Object"
75
75
  },
76
- "rt_start": {
77
- "dtype": "pl.Float64"
76
+ "ms2_specs": {
77
+ "dtype": "pl.Object"
78
78
  }
79
79
  }
80
80
  },
@@ -100,62 +100,80 @@
100
100
  },
101
101
  "scans_df": {
102
102
  "columns": {
103
- "bl": {
104
- "dtype": "pl.Float64"
105
- },
106
- "comment": {
107
- "dtype": "pl.Utf8"
103
+ "scan_uid": {
104
+ "dtype": "pl.Int64"
108
105
  },
109
106
  "cycle": {
110
107
  "dtype": "pl.Int64"
111
108
  },
112
- "energy": {
113
- "dtype": "pl.Float64"
114
- },
115
- "feature_uid": {
109
+ "ms_level": {
116
110
  "dtype": "pl.Int64"
117
111
  },
118
- "id": {
119
- "dtype": "pl.Utf8"
112
+ "rt": {
113
+ "dtype": "pl.Float64"
120
114
  },
121
- "inty_max": {
115
+ "inty_tot": {
122
116
  "dtype": "pl.Float64"
123
117
  },
124
118
  "inty_min": {
125
119
  "dtype": "pl.Float64"
126
120
  },
127
- "inty_tot": {
121
+ "inty_max": {
128
122
  "dtype": "pl.Float64"
129
123
  },
130
- "ms_level": {
131
- "dtype": "pl.Int64"
132
- },
133
- "mz_max": {
124
+ "bl": {
134
125
  "dtype": "pl.Float64"
135
126
  },
136
127
  "mz_min": {
137
128
  "dtype": "pl.Float64"
138
129
  },
130
+ "mz_max": {
131
+ "dtype": "pl.Float64"
132
+ },
133
+ "comment": {
134
+ "dtype": "pl.Utf8"
135
+ },
139
136
  "name": {
140
137
  "dtype": "pl.Utf8"
141
138
  },
142
- "prec_inty": {
143
- "dtype": "pl.Float64"
139
+ "id": {
140
+ "dtype": "pl.Utf8"
144
141
  },
145
142
  "prec_mz": {
146
143
  "dtype": "pl.Float64"
147
144
  },
145
+ "prec_mz_min": {
146
+ "dtype": "pl.Float64"
147
+ },
148
148
  "prec_mz_max": {
149
149
  "dtype": "pl.Float64"
150
150
  },
151
- "prec_mz_min": {
151
+ "prec_inty": {
152
152
  "dtype": "pl.Float64"
153
153
  },
154
- "rt": {
154
+ "energy": {
155
155
  "dtype": "pl.Float64"
156
156
  },
157
- "scan_uid": {
157
+ "feature_uid": {
158
158
  "dtype": "pl.Int64"
159
+ },
160
+ "ms2_n": {
161
+ "dtype": "pl.Int64"
162
+ },
163
+ "time_cycle": {
164
+ "dtype": "pl.Float64"
165
+ },
166
+ "time_ms1_to_ms1": {
167
+ "dtype": "pl.Float64"
168
+ },
169
+ "time_ms1_to_ms2": {
170
+ "dtype": "pl.Float64"
171
+ },
172
+ "time_ms2_to_ms2": {
173
+ "dtype": "pl.Float64"
174
+ },
175
+ "time_ms2_to_ms1": {
176
+ "dtype": "pl.Float64"
159
177
  }
160
178
  }
161
179
  },