@sjcrh/proteinpaint-rust 2.171.0 → 2.175.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/topGeneByExpressionVariance.rs +12 -279
package/package.json
CHANGED
|
@@ -34,7 +34,7 @@ use std::io;
|
|
|
34
34
|
use std::io::Read;
|
|
35
35
|
use std::str::FromStr;
|
|
36
36
|
// use std::time::Instant;
|
|
37
|
-
use hdf5::types::
|
|
37
|
+
use hdf5::types::VarLenUnicode;
|
|
38
38
|
use hdf5::{File, Result};
|
|
39
39
|
use ndarray::Dim;
|
|
40
40
|
|
|
@@ -57,259 +57,6 @@ use ndarray::Dim;
|
|
|
57
57
|
fn input_data_hdf5(
|
|
58
58
|
filename: &String,
|
|
59
59
|
sample_list: &Vec<&str>,
|
|
60
|
-
) -> Result<(Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>, Vec<String>)> {
|
|
61
|
-
// let now = Instant::now();
|
|
62
|
-
// eprintln!("Reading HDF5 file: {}", filename);
|
|
63
|
-
|
|
64
|
-
// Open the HDF5 file
|
|
65
|
-
let file = match File::open(filename) {
|
|
66
|
-
Ok(f) => f,
|
|
67
|
-
Err(err) => {
|
|
68
|
-
// eprintln!("Failed to open HDF5 file: {}", err);
|
|
69
|
-
// println!(
|
|
70
|
-
// "{}",
|
|
71
|
-
// serde_json::json!({
|
|
72
|
-
// "status": "error",
|
|
73
|
-
// "message": format!("Failed to open HDF5 file: {}", err),
|
|
74
|
-
// "file_path": filename
|
|
75
|
-
// })
|
|
76
|
-
// );
|
|
77
|
-
return Err(hdf5::Error::Internal(format!("Failed to open HDF5 file: {}", err)));
|
|
78
|
-
}
|
|
79
|
-
};
|
|
80
|
-
|
|
81
|
-
// Read gene symbols dataset
|
|
82
|
-
let genes_dataset = match file.dataset("gene_names") {
|
|
83
|
-
Ok(ds) => ds,
|
|
84
|
-
Err(err) => {
|
|
85
|
-
// eprintln!("Failed to open gene_names dataset: {}", err);
|
|
86
|
-
// println!(
|
|
87
|
-
// "{}",
|
|
88
|
-
// serde_json::json!({
|
|
89
|
-
// "status": "error",
|
|
90
|
-
// "message": format!("Failed to open gene_names dataset: {}", err),
|
|
91
|
-
// "file_path": filename
|
|
92
|
-
// })
|
|
93
|
-
// );
|
|
94
|
-
return Err(hdf5::Error::Internal(format!(
|
|
95
|
-
"Failed to open gene_names dataset: {}",
|
|
96
|
-
err
|
|
97
|
-
)));
|
|
98
|
-
}
|
|
99
|
-
};
|
|
100
|
-
|
|
101
|
-
// Read genes as VarLenAscii
|
|
102
|
-
let genes_varlen = match genes_dataset.read_1d::<VarLenAscii>() {
|
|
103
|
-
Ok(g) => g,
|
|
104
|
-
Err(err) => {
|
|
105
|
-
// eprintln!("Failed to read gene symbols: {}", err);
|
|
106
|
-
// println!(
|
|
107
|
-
// "{}",
|
|
108
|
-
// serde_json::json!({
|
|
109
|
-
// "status": "error",
|
|
110
|
-
// "message": format!("Failed to read gene symbols: {}", err),
|
|
111
|
-
// "file_path": filename
|
|
112
|
-
// })
|
|
113
|
-
// );
|
|
114
|
-
return Err(hdf5::Error::Internal(format!("Failed to read gene symbols: {}", err)));
|
|
115
|
-
}
|
|
116
|
-
};
|
|
117
|
-
|
|
118
|
-
// Convert to Vec<String> for easier handling
|
|
119
|
-
let gene_names: Vec<String> = genes_varlen.iter().map(|g| g.to_string()).collect();
|
|
120
|
-
let num_genes = gene_names.len();
|
|
121
|
-
// eprintln!("Found {} gene symbols", num_genes);
|
|
122
|
-
|
|
123
|
-
// Read sample names
|
|
124
|
-
let samples_dataset = match file.dataset("samples") {
|
|
125
|
-
Ok(ds) => ds,
|
|
126
|
-
Err(err) => {
|
|
127
|
-
// eprintln!("Failed to open samples dataset: {}", err);
|
|
128
|
-
println!(
|
|
129
|
-
"{}",
|
|
130
|
-
serde_json::json!({
|
|
131
|
-
"status": "error",
|
|
132
|
-
"message": format!("Failed to open samples dataset: {}", err),
|
|
133
|
-
"file_path": filename
|
|
134
|
-
})
|
|
135
|
-
);
|
|
136
|
-
return Err(hdf5::Error::Internal(format!(
|
|
137
|
-
"Failed to open samples dataset: {}",
|
|
138
|
-
err
|
|
139
|
-
)));
|
|
140
|
-
}
|
|
141
|
-
};
|
|
142
|
-
|
|
143
|
-
// Read samples as VarLenAscii
|
|
144
|
-
let samples_varlen = match samples_dataset.read_1d::<VarLenAscii>() {
|
|
145
|
-
Ok(s) => s,
|
|
146
|
-
Err(err) => {
|
|
147
|
-
// eprintln!("Failed to read sample names: {}", err);
|
|
148
|
-
println!(
|
|
149
|
-
"{}",
|
|
150
|
-
serde_json::json!({
|
|
151
|
-
"status": "error",
|
|
152
|
-
"message": format!("Failed to read sample names: {}", err),
|
|
153
|
-
"file_path": filename
|
|
154
|
-
})
|
|
155
|
-
);
|
|
156
|
-
return Err(hdf5::Error::Internal(format!("Failed to read sample names: {}", err)));
|
|
157
|
-
}
|
|
158
|
-
};
|
|
159
|
-
|
|
160
|
-
// Convert to Vec<String> for easier handling
|
|
161
|
-
let all_samples: Vec<String> = samples_varlen.iter().map(|s| s.to_string()).collect();
|
|
162
|
-
// eprintln!("Found {} total samples", all_samples.len());
|
|
163
|
-
|
|
164
|
-
// Find indices of requested samples
|
|
165
|
-
let mut column_indices: Vec<usize> = Vec::with_capacity(sample_list.len());
|
|
166
|
-
for sample in sample_list {
|
|
167
|
-
if let Some(index) = all_samples.iter().position(|s| s == sample) {
|
|
168
|
-
column_indices.push(index);
|
|
169
|
-
} else {
|
|
170
|
-
// eprintln!("Sample {} not found in the dataset", sample);
|
|
171
|
-
// println!(
|
|
172
|
-
// "{}",
|
|
173
|
-
// serde_json::json!({
|
|
174
|
-
// "status": "error",
|
|
175
|
-
// "message": format!("Sample '{}' not found in the dataset", sample),
|
|
176
|
-
// "file_path": filename,
|
|
177
|
-
// "available_samples": all_samples
|
|
178
|
-
// })
|
|
179
|
-
// );
|
|
180
|
-
return Err(hdf5::Error::Internal(format!(
|
|
181
|
-
"Sample '{}' not found in the dataset",
|
|
182
|
-
sample
|
|
183
|
-
)));
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
// Read the counts dataset
|
|
188
|
-
let counts_dataset = match file.dataset("counts") {
|
|
189
|
-
Ok(ds) => ds,
|
|
190
|
-
Err(err) => {
|
|
191
|
-
// eprintln!("Failed to open counts dataset: {}", err);
|
|
192
|
-
// println!(
|
|
193
|
-
// "{}",
|
|
194
|
-
// serde_json::json!({
|
|
195
|
-
// "status": "error",
|
|
196
|
-
// "message": format!("Failed to open counts dataset: {}", err),
|
|
197
|
-
// "file_path": filename
|
|
198
|
-
// })
|
|
199
|
-
// );
|
|
200
|
-
return Err(hdf5::Error::Internal(format!("Failed to open counts dataset: {}", err)));
|
|
201
|
-
}
|
|
202
|
-
};
|
|
203
|
-
|
|
204
|
-
// Get dataset dimensions for validation
|
|
205
|
-
let dataset_shape = counts_dataset.shape();
|
|
206
|
-
if dataset_shape.len() != 2 {
|
|
207
|
-
// eprintln!("Counts dataset does not have the expected 2D shape");
|
|
208
|
-
// println!(
|
|
209
|
-
// "{}",
|
|
210
|
-
// serde_json::json!({
|
|
211
|
-
// "status": "error",
|
|
212
|
-
// "message": "Expected a 2D dataset for counts",
|
|
213
|
-
// "file_path": filename,
|
|
214
|
-
// "actual_shape": dataset_shape
|
|
215
|
-
// })
|
|
216
|
-
// );
|
|
217
|
-
return Err(hdf5::Error::Internal("Expected a 2D dataset for counts".to_string()));
|
|
218
|
-
}
|
|
219
|
-
|
|
220
|
-
// Check dimensions match expected values
|
|
221
|
-
if dataset_shape[0] != num_genes {
|
|
222
|
-
// eprintln!(
|
|
223
|
-
// "Counts dataset first dimension ({}) doesn't match number of genes ({})",
|
|
224
|
-
// dataset_shape[0], num_genes
|
|
225
|
-
// );
|
|
226
|
-
// println!(
|
|
227
|
-
// "{}",
|
|
228
|
-
// serde_json::json!({
|
|
229
|
-
// "status": "error",
|
|
230
|
-
// "message": format!("Counts dataset first dimension ({}) doesn't match number of genes ({})",
|
|
231
|
-
// dataset_shape[0], num_genes),
|
|
232
|
-
// "file_path": filename
|
|
233
|
-
// })
|
|
234
|
-
// );
|
|
235
|
-
return Err(hdf5::Error::Internal(format!(
|
|
236
|
-
"Counts dataset first dimension ({}) doesn't match number of genes ({})",
|
|
237
|
-
dataset_shape[0], num_genes
|
|
238
|
-
)));
|
|
239
|
-
}
|
|
240
|
-
|
|
241
|
-
if dataset_shape[1] != all_samples.len() {
|
|
242
|
-
// eprintln!(
|
|
243
|
-
// "Counts dataset second dimension ({}) doesn't match number of samples ({})",
|
|
244
|
-
// dataset_shape[1],
|
|
245
|
-
// all_samples.len()
|
|
246
|
-
// );
|
|
247
|
-
// println!(
|
|
248
|
-
// "{}",
|
|
249
|
-
// serde_json::json!({
|
|
250
|
-
// "status": "error",
|
|
251
|
-
// "message": format!("Counts dataset second dimension ({}) doesn't match number of samples ({})",
|
|
252
|
-
// dataset_shape[1], all_samples.len()),
|
|
253
|
-
// "file_path": filename
|
|
254
|
-
// })
|
|
255
|
-
// );
|
|
256
|
-
return Err(hdf5::Error::Internal(format!(
|
|
257
|
-
"Counts dataset second dimension ({}) doesn't match number of samples ({})",
|
|
258
|
-
dataset_shape[1],
|
|
259
|
-
all_samples.len()
|
|
260
|
-
)));
|
|
261
|
-
}
|
|
262
|
-
|
|
263
|
-
// Read the counts dataset
|
|
264
|
-
let all_counts = match counts_dataset.read::<f64, Dim<[usize; 2]>>() {
|
|
265
|
-
Ok(data) => data,
|
|
266
|
-
Err(err) => {
|
|
267
|
-
// eprintln!("Failed to read expression data: {}", err);
|
|
268
|
-
// println!(
|
|
269
|
-
// "{}",
|
|
270
|
-
// serde_json::json!({
|
|
271
|
-
// "status": "error",
|
|
272
|
-
// "message": format!("Failed to read expression data: {}", err),
|
|
273
|
-
// "file_path": filename
|
|
274
|
-
// })
|
|
275
|
-
// );
|
|
276
|
-
return Err(hdf5::Error::Internal(format!(
|
|
277
|
-
"Failed to read expression data: {}",
|
|
278
|
-
err
|
|
279
|
-
)));
|
|
280
|
-
}
|
|
281
|
-
};
|
|
282
|
-
|
|
283
|
-
// Extract only the columns corresponding to the requested samples
|
|
284
|
-
// eprintln!(
|
|
285
|
-
// "Extracting data for {} requested samples",
|
|
286
|
-
// sample_list.len()
|
|
287
|
-
// );
|
|
288
|
-
let mut input_vector: Vec<f64> = Vec::with_capacity(num_genes * sample_list.len());
|
|
289
|
-
|
|
290
|
-
for gene_idx in 0..num_genes {
|
|
291
|
-
for &col_idx in &column_indices {
|
|
292
|
-
input_vector.push(all_counts[[gene_idx, col_idx]]);
|
|
293
|
-
}
|
|
294
|
-
}
|
|
295
|
-
|
|
296
|
-
// Create matrix from the extracted data
|
|
297
|
-
let dm = DMatrix::from_row_slice(num_genes, sample_list.len(), &input_vector);
|
|
298
|
-
|
|
299
|
-
// eprintln!("Time for reading HDF5 data: {:?}", now.elapsed());
|
|
300
|
-
// eprintln!(
|
|
301
|
-
// "Successfully extracted expression data matrix of size {}x{}",
|
|
302
|
-
// dm.nrows(),
|
|
303
|
-
// dm.ncols()
|
|
304
|
-
// );
|
|
305
|
-
|
|
306
|
-
Ok((dm, gene_names))
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
// Similar to input_data_hdf5, but specifically for new H5 format
|
|
310
|
-
fn input_data_hdf5_newformat(
|
|
311
|
-
filename: &String,
|
|
312
|
-
sample_list: &Vec<&str>,
|
|
313
60
|
) -> Result<(Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>, Vec<String>)> {
|
|
314
61
|
// Open the HDF5 file
|
|
315
62
|
let file = match File::open(filename) {
|
|
@@ -724,12 +471,12 @@ fn main() {
|
|
|
724
471
|
}
|
|
725
472
|
|
|
726
473
|
// Determine if the H5 file is new format
|
|
727
|
-
let new_format: bool = match &json_string {
|
|
728
|
-
|
|
729
|
-
|
|
730
|
-
|
|
731
|
-
|
|
732
|
-
};
|
|
474
|
+
//let new_format: bool = match &json_string {
|
|
475
|
+
// json::JsonValue::Object(ref obj) => {
|
|
476
|
+
// obj.get("newformat").and_then(|v| v.as_bool()).map_or(false, |b| b)
|
|
477
|
+
// }
|
|
478
|
+
// _ => false,
|
|
479
|
+
//};
|
|
733
480
|
|
|
734
481
|
let rank_type = &json_string["rank_type"] // Value provide must be either "var" or "iqr"
|
|
735
482
|
.to_owned()
|
|
@@ -817,25 +564,11 @@ fn main() {
|
|
|
817
564
|
// eprintln!("Reading data from {} file: {}", file_type, file_name);
|
|
818
565
|
let (input_matrix, gene_names) = if file_type == "hdf5" {
|
|
819
566
|
// eprintln!("Using HDF5 reader function...");
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
823
|
-
|
|
824
|
-
|
|
825
|
-
return;
|
|
826
|
-
}
|
|
827
|
-
}
|
|
828
|
-
} else {
|
|
829
|
-
match input_data_hdf5(&file_name, &samples_list) {
|
|
830
|
-
Ok(result) => {
|
|
831
|
-
// eprintln!("Successfully read HDF5 data");
|
|
832
|
-
result
|
|
833
|
-
}
|
|
834
|
-
Err(err) => {
|
|
835
|
-
eprintln!("ERROR in HDF5 reader: {:?}", err);
|
|
836
|
-
// Error has already been printed to stdout in JSON format by the function
|
|
837
|
-
return;
|
|
838
|
-
}
|
|
567
|
+
match input_data_hdf5(&file_name, &samples_list) {
|
|
568
|
+
Ok(result) => result,
|
|
569
|
+
Err(err) => {
|
|
570
|
+
eprintln!("ERROR in HDF5 reader: {:?}", err);
|
|
571
|
+
return;
|
|
839
572
|
}
|
|
840
573
|
}
|
|
841
574
|
} else {
|