@sjcrh/proteinpaint-rust 2.117.0 → 2.119.0-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/DEanalysis.rs +51 -44
- package/src/topGeneByExpressionVariance.rs +20 -20
package/package.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "2.
|
|
2
|
+
"version": "2.119.0-0",
|
|
3
3
|
"name": "@sjcrh/proteinpaint-rust",
|
|
4
4
|
"description": "Rust-based utilities for proteinpaint",
|
|
5
5
|
"main": "index.js",
|
|
@@ -38,5 +38,5 @@
|
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"tape": "^5.2.2"
|
|
40
40
|
},
|
|
41
|
-
"pp_release_tag": "v2.
|
|
41
|
+
"pp_release_tag": "v2.119.0-0"
|
|
42
42
|
}
|
package/src/DEanalysis.rs
CHANGED
|
@@ -86,6 +86,18 @@ fn input_data_from_HDF5(
|
|
|
86
86
|
//println!("num_samples bulk:{}", num_samples);
|
|
87
87
|
//println!("num_genes bulk:{}", num_genes);
|
|
88
88
|
|
|
89
|
+
//let now_gene_ids = Instant::now();
|
|
90
|
+
let ds_gene_ids = file.dataset("gene_ids").unwrap();
|
|
91
|
+
//println!("ds_gene_ids:{:?}", ds_gene_ids);
|
|
92
|
+
let gene_ids = ds_gene_ids
|
|
93
|
+
.read::<VarLenAscii, Dim<[usize; 1]>>()
|
|
94
|
+
.unwrap();
|
|
95
|
+
//println!("\tgene_ids = {:?}", gene_ids);
|
|
96
|
+
//println!("\tgene_ids.shape() = {:?}", gene_ids.shape());
|
|
97
|
+
//println!("\tgene_ids.strides() = {:?}", gene_ids.strides());
|
|
98
|
+
//println!("\tgene_ids.ndim() = {:?}", gene_ids.ndim());
|
|
99
|
+
//println!("Time for parsing gene names:{:?}", now_gene_ids.elapsed());
|
|
100
|
+
|
|
89
101
|
//let now_gene_names = Instant::now();
|
|
90
102
|
let ds_gene_names = file.dataset("gene_names").unwrap();
|
|
91
103
|
//println!("ds_gene_names:{:?}", ds_gene_names);
|
|
@@ -96,28 +108,16 @@ fn input_data_from_HDF5(
|
|
|
96
108
|
//println!("\tgene_names.shape() = {:?}", gene_names.shape());
|
|
97
109
|
//println!("\tgene_names.strides() = {:?}", gene_names.strides());
|
|
98
110
|
//println!("\tgene_names.ndim() = {:?}", gene_names.ndim());
|
|
99
|
-
//println!("Time for parsing gene names:{:?}", now_gene_names.elapsed());
|
|
100
|
-
|
|
101
|
-
//let now_gene_symbols = Instant::now();
|
|
102
|
-
let ds_gene_symbols = file.dataset("gene_symbols").unwrap();
|
|
103
|
-
//println!("ds_gene_symbols:{:?}", ds_gene_symbols);
|
|
104
|
-
let gene_symbols = ds_gene_symbols
|
|
105
|
-
.read::<VarLenAscii, Dim<[usize; 1]>>()
|
|
106
|
-
.unwrap();
|
|
107
|
-
//println!("\tgene_symbols = {:?}", gene_symbols);
|
|
108
|
-
//println!("\tgene_symbols.shape() = {:?}", gene_symbols.shape());
|
|
109
|
-
//println!("\tgene_symbols.strides() = {:?}", gene_symbols.strides());
|
|
110
|
-
//println!("\tgene_symbols.ndim() = {:?}", gene_symbols.ndim());
|
|
111
111
|
//println!(
|
|
112
112
|
// "Time for parsing gene symbols:{:?}",
|
|
113
|
-
//
|
|
113
|
+
// now_gene_names.elapsed()
|
|
114
114
|
//);
|
|
115
115
|
|
|
116
|
+
let mut gene_ids_string: Vec<String> = Vec::with_capacity(gene_ids.len());
|
|
116
117
|
let mut gene_names_string: Vec<String> = Vec::with_capacity(gene_names.len());
|
|
117
|
-
|
|
118
|
-
|
|
118
|
+
for i in 0..gene_ids.len() {
|
|
119
|
+
gene_ids_string.push(gene_ids[i].to_string());
|
|
119
120
|
gene_names_string.push(gene_names[i].to_string());
|
|
120
|
-
gene_symbols_string.push(gene_symbols[i].to_string());
|
|
121
121
|
}
|
|
122
122
|
|
|
123
123
|
//let now_samples = Instant::now();
|
|
@@ -154,7 +154,7 @@ fn input_data_from_HDF5(
|
|
|
154
154
|
}
|
|
155
155
|
|
|
156
156
|
let sample_array: Array2<f64> = ds_counts
|
|
157
|
-
.read_slice_2d((0..
|
|
157
|
+
.read_slice_2d((0..gene_ids.len(), sample_index..sample_index + 1))
|
|
158
158
|
.unwrap();
|
|
159
159
|
//println!("Length of gene array:{:?}", sample_array.len()); // Please check the result
|
|
160
160
|
input_vector.append(&mut sample_array.as_slice().unwrap().to_vec());
|
|
@@ -183,7 +183,7 @@ fn input_data_from_HDF5(
|
|
|
183
183
|
//let data_counts: Array1<_> = ds_counts.read::<f64, Dim<[usize; 1]>>().unwrap();
|
|
184
184
|
//println!("Data_counts: {:?}", data_counts);
|
|
185
185
|
let sample_array: Array2<f64> = ds_counts
|
|
186
|
-
.read_slice_2d((0..
|
|
186
|
+
.read_slice_2d((0..gene_ids.len(), sample_index..sample_index + 1))
|
|
187
187
|
.unwrap();
|
|
188
188
|
//println!("Length of gene array:{:?}", sample_array.len()); // Please check the result
|
|
189
189
|
input_vector.append(&mut sample_array.as_slice().unwrap().to_vec());
|
|
@@ -196,19 +196,19 @@ fn input_data_from_HDF5(
|
|
|
196
196
|
// "case + control length:{}",
|
|
197
197
|
// case_list.len() + control_list.len()
|
|
198
198
|
//);
|
|
199
|
-
//println!("
|
|
199
|
+
//println!("gene_ids length:{}", gene_ids.len());
|
|
200
200
|
//println!("input_vector length:{}", input_vector.len());
|
|
201
201
|
let dm = DMatrix::from_row_slice(
|
|
202
202
|
case_list.len() + control_list.len(),
|
|
203
|
-
|
|
203
|
+
gene_ids.len(),
|
|
204
204
|
&input_vector,
|
|
205
205
|
);
|
|
206
206
|
(
|
|
207
207
|
dm.transpose(), // Transposing the matrix
|
|
208
208
|
case_indexes,
|
|
209
209
|
control_indexes,
|
|
210
|
+
gene_ids_string,
|
|
210
211
|
gene_names_string,
|
|
211
|
-
gene_symbols_string,
|
|
212
212
|
)
|
|
213
213
|
}
|
|
214
214
|
|
|
@@ -227,8 +227,8 @@ fn input_data_from_text(
|
|
|
227
227
|
let mut file = File::open(filename).unwrap();
|
|
228
228
|
let mut num_lines: usize = 0;
|
|
229
229
|
let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
|
|
230
|
+
let mut gene_ids: Vec<String> = Vec::with_capacity(65000);
|
|
230
231
|
let mut gene_names: Vec<String> = Vec::with_capacity(65000);
|
|
231
|
-
let mut gene_symbols: Vec<String> = Vec::with_capacity(65000);
|
|
232
232
|
let mut num_columns: usize = 0;
|
|
233
233
|
|
|
234
234
|
// Check headers for samples
|
|
@@ -289,9 +289,9 @@ fn input_data_from_text(
|
|
|
289
289
|
let mut index = 0;
|
|
290
290
|
for field in line.split('\t').collect::<Vec<&str>>() {
|
|
291
291
|
if index == gene_name_index.unwrap() {
|
|
292
|
-
|
|
292
|
+
gene_ids.push(field.to_string());
|
|
293
293
|
} else if index == gene_symbol_index.unwrap() {
|
|
294
|
-
|
|
294
|
+
gene_names.push(field.to_string());
|
|
295
295
|
} else if binary_search(&case_indexes_original, index) != -1 {
|
|
296
296
|
let num = FromStr::from_str(field);
|
|
297
297
|
match num {
|
|
@@ -475,8 +475,8 @@ fn input_data_from_text(
|
|
|
475
475
|
input_vector.append(&mut *input_vector_temp.lock().unwrap());
|
|
476
476
|
case_indexes.append(&mut *case_indexes_temp.lock().unwrap());
|
|
477
477
|
control_indexes.append(&mut *control_indexes_temp.lock().unwrap());
|
|
478
|
-
|
|
479
|
-
|
|
478
|
+
gene_ids.append(&mut *genes_names_temp.lock().unwrap());
|
|
479
|
+
gene_names.append(&mut *genes_symbols_temp.lock().unwrap());
|
|
480
480
|
|
|
481
481
|
num_lines += *num_lines_temp.lock().unwrap();
|
|
482
482
|
num_columns += *num_columns_temp.lock().unwrap();
|
|
@@ -490,7 +490,7 @@ fn input_data_from_text(
|
|
|
490
490
|
//println!("Time for inputting data:{:?}", input_time.elapsed());
|
|
491
491
|
let dm = DMatrix::from_row_slice(num_lines, num_columns, &input_vector);
|
|
492
492
|
//println!("dm:{:?}", dm);
|
|
493
|
-
(dm, case_indexes, control_indexes,
|
|
493
|
+
(dm, case_indexes, control_indexes, gene_ids, gene_names)
|
|
494
494
|
}
|
|
495
495
|
|
|
496
496
|
#[allow(dead_code)]
|
|
@@ -625,16 +625,16 @@ fn main() {
|
|
|
625
625
|
input_matrix,
|
|
626
626
|
case_indexes,
|
|
627
627
|
control_indexes,
|
|
628
|
+
gene_ids,
|
|
628
629
|
gene_names,
|
|
629
|
-
gene_symbols,
|
|
630
630
|
);
|
|
631
631
|
if storage_type == "text" {
|
|
632
632
|
(
|
|
633
633
|
input_matrix,
|
|
634
634
|
case_indexes,
|
|
635
635
|
control_indexes,
|
|
636
|
+
gene_ids,
|
|
636
637
|
gene_names,
|
|
637
|
-
gene_symbols,
|
|
638
638
|
) = input_data_from_text(file_name, &case_list, &control_list);
|
|
639
639
|
} else {
|
|
640
640
|
// Parsing data from a HDF5 file
|
|
@@ -642,8 +642,8 @@ fn main() {
|
|
|
642
642
|
input_matrix,
|
|
643
643
|
case_indexes,
|
|
644
644
|
control_indexes,
|
|
645
|
+
gene_ids,
|
|
645
646
|
gene_names,
|
|
646
|
-
gene_symbols,
|
|
647
647
|
) = input_data_from_HDF5(file_name, &case_list, &control_list);
|
|
648
648
|
}
|
|
649
649
|
//let filtering_time = Instant::now();
|
|
@@ -651,19 +651,27 @@ fn main() {
|
|
|
651
651
|
filtered_matrix,
|
|
652
652
|
lib_sizes,
|
|
653
653
|
filtered_genes,
|
|
654
|
-
|
|
654
|
+
filtered_gene_names,
|
|
655
655
|
) = filter_by_expr(
|
|
656
656
|
min_count,
|
|
657
657
|
min_total_count,
|
|
658
658
|
&input_matrix,
|
|
659
659
|
case_indexes.len(),
|
|
660
660
|
control_indexes.len(),
|
|
661
|
+
gene_ids,
|
|
661
662
|
gene_names,
|
|
662
|
-
gene_symbols,
|
|
663
663
|
);
|
|
664
664
|
//println!("filtering time:{:?}", filtering_time.elapsed());
|
|
665
665
|
//println!("filtered_matrix_rows:{:?}", filtered_matrix.nrows());
|
|
666
666
|
//println!("filtered_matrix_cols:{:?}", filtered_matrix.ncols());
|
|
667
|
+
if filtered_matrix.nrows() == 0 {
|
|
668
|
+
// Its possible after filtering there might not be any genes left in the matrix, in such a case the rust code must exit gracefully with an error.
|
|
669
|
+
panic!("Number of genes after filtering = 0, cannot proceed any further")
|
|
670
|
+
}
|
|
671
|
+
if filtered_matrix.ncols() == 0 {
|
|
672
|
+
// Its possible after filtering there might not be any samples left in the matrix, in such a case the rust code must exit gracefully with an error.
|
|
673
|
+
panic!("Number of samples after filtering = 0, cannot proceed any further")
|
|
674
|
+
}
|
|
667
675
|
//let cpm_normalization_time = Instant::now();
|
|
668
676
|
let mut normalized_matrix = cpm(&filtered_matrix);
|
|
669
677
|
//println!(
|
|
@@ -747,7 +755,7 @@ fn main() {
|
|
|
747
755
|
p_values.push(PValueIndexes {
|
|
748
756
|
index: i,
|
|
749
757
|
gene_name: filtered_genes[i].to_owned(),
|
|
750
|
-
gene_symbol:
|
|
758
|
+
gene_symbol: filtered_gene_names[i].to_owned(),
|
|
751
759
|
fold_change: (treated_mean.unwrap()
|
|
752
760
|
/ control_mean.unwrap())
|
|
753
761
|
.log2(),
|
|
@@ -759,8 +767,8 @@ fn main() {
|
|
|
759
767
|
// Multithreaded implementation of calculating wilcoxon p-values
|
|
760
768
|
let normalized_matrix_temp = Arc::new(normalized_matrix);
|
|
761
769
|
let filtered_genes_temp = Arc::new(filtered_genes);
|
|
762
|
-
let
|
|
763
|
-
Arc::new(
|
|
770
|
+
let filtered_gene_names_temp =
|
|
771
|
+
Arc::new(filtered_gene_names);
|
|
764
772
|
let case_indexes_temp = Arc::new(case_indexes);
|
|
765
773
|
let control_indexes_temp = Arc::new(control_indexes);
|
|
766
774
|
let p_values_temp =
|
|
@@ -774,8 +782,8 @@ fn main() {
|
|
|
774
782
|
Arc::clone(&control_indexes_temp);
|
|
775
783
|
let p_values_temp = Arc::clone(&p_values_temp);
|
|
776
784
|
let filtered_genes_temp = Arc::clone(&filtered_genes_temp);
|
|
777
|
-
let
|
|
778
|
-
Arc::clone(&
|
|
785
|
+
let filtered_gene_names_temp =
|
|
786
|
+
Arc::clone(&filtered_gene_names_temp);
|
|
779
787
|
let handle = thread::spawn(move || {
|
|
780
788
|
let mut p_values_thread: Vec<PValueIndexes> =
|
|
781
789
|
Vec::with_capacity(
|
|
@@ -835,7 +843,7 @@ fn main() {
|
|
|
835
843
|
index: i,
|
|
836
844
|
gene_name: filtered_genes_temp[i]
|
|
837
845
|
.to_owned(),
|
|
838
|
-
gene_symbol:
|
|
846
|
+
gene_symbol: filtered_gene_names_temp
|
|
839
847
|
[i]
|
|
840
848
|
.to_owned(),
|
|
841
849
|
fold_change: (treated_mean.unwrap()
|
|
@@ -1214,7 +1222,6 @@ fn calc_factor_quantile(
|
|
|
1214
1222
|
for i in 0..input_matrix.nrows() {
|
|
1215
1223
|
row_vec.push(input_matrix[(i, j)] as f64);
|
|
1216
1224
|
}
|
|
1217
|
-
//println!("row_vec:{:?}", row_vec);
|
|
1218
1225
|
let quan = calc_quantile(row_vec, P);
|
|
1219
1226
|
//println!("quan:{}", quan);
|
|
1220
1227
|
let num = quan / lib_sizes[j];
|
|
@@ -1245,8 +1252,8 @@ fn filter_by_expr(
|
|
|
1245
1252
|
raw_data: &Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>,
|
|
1246
1253
|
num_diseased: usize,
|
|
1247
1254
|
num_control: usize,
|
|
1255
|
+
gene_ids: Vec<String>,
|
|
1248
1256
|
gene_names: Vec<String>,
|
|
1249
|
-
gene_symbols: Vec<String>,
|
|
1250
1257
|
) -> (
|
|
1251
1258
|
Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>,
|
|
1252
1259
|
Vec<f64>,
|
|
@@ -1338,14 +1345,14 @@ fn filter_by_expr(
|
|
|
1338
1345
|
}
|
|
1339
1346
|
let mut filtered_matrix = DMatrix::from_vec(positives.len(), num_diseased + num_control, blank);
|
|
1340
1347
|
let mut filtered_genes: Vec<String> = Vec::with_capacity(positives.len());
|
|
1341
|
-
let mut
|
|
1348
|
+
let mut filtered_gene_names: Vec<String> = Vec::with_capacity(positives.len());
|
|
1342
1349
|
let mut i = 0;
|
|
1343
1350
|
//println!("filtered_matrix rows:{}", filtered_matrix.nrows());
|
|
1344
1351
|
//println!("filtered_matrix cols:{}", filtered_matrix.ncols());
|
|
1345
1352
|
for index in positives {
|
|
1346
1353
|
let row = raw_data.row(index);
|
|
1347
|
-
filtered_genes.push(
|
|
1348
|
-
|
|
1354
|
+
filtered_genes.push(gene_ids[index].to_owned());
|
|
1355
|
+
filtered_gene_names.push(gene_names[index].to_owned());
|
|
1349
1356
|
let mut j = 0;
|
|
1350
1357
|
for item in &row {
|
|
1351
1358
|
//println!("index:{}", index);
|
|
@@ -1369,7 +1376,7 @@ fn filter_by_expr(
|
|
|
1369
1376
|
filtered_matrix,
|
|
1370
1377
|
modified_lib_sizes,
|
|
1371
1378
|
filtered_genes,
|
|
1372
|
-
|
|
1379
|
+
filtered_gene_names,
|
|
1373
1380
|
)
|
|
1374
1381
|
}
|
|
1375
1382
|
|
|
@@ -81,20 +81,20 @@ fn input_data_hdf5(
|
|
|
81
81
|
};
|
|
82
82
|
|
|
83
83
|
// Read gene symbols dataset
|
|
84
|
-
let genes_dataset = match file.dataset("
|
|
84
|
+
let genes_dataset = match file.dataset("gene_names") {
|
|
85
85
|
Ok(ds) => ds,
|
|
86
86
|
Err(err) => {
|
|
87
|
-
// eprintln!("Failed to open
|
|
87
|
+
// eprintln!("Failed to open gene_names dataset: {}", err);
|
|
88
88
|
// println!(
|
|
89
89
|
// "{}",
|
|
90
90
|
// serde_json::json!({
|
|
91
91
|
// "status": "error",
|
|
92
|
-
// "message": format!("Failed to open
|
|
92
|
+
// "message": format!("Failed to open gene_names dataset: {}", err),
|
|
93
93
|
// "file_path": filename
|
|
94
94
|
// })
|
|
95
95
|
// );
|
|
96
96
|
return Err(hdf5::Error::Internal(format!(
|
|
97
|
-
"Failed to open
|
|
97
|
+
"Failed to open gene_names dataset: {}",
|
|
98
98
|
err
|
|
99
99
|
)));
|
|
100
100
|
}
|
|
@@ -121,8 +121,8 @@ fn input_data_hdf5(
|
|
|
121
121
|
};
|
|
122
122
|
|
|
123
123
|
// Convert to Vec<String> for easier handling
|
|
124
|
-
let
|
|
125
|
-
let num_genes =
|
|
124
|
+
let gene_names: Vec<String> = genes_varlen.iter().map(|g| g.to_string()).collect();
|
|
125
|
+
let num_genes = gene_names.len();
|
|
126
126
|
// eprintln!("Found {} gene symbols", num_genes);
|
|
127
127
|
|
|
128
128
|
// Read sample names
|
|
@@ -316,7 +316,7 @@ fn input_data_hdf5(
|
|
|
316
316
|
// dm.ncols()
|
|
317
317
|
// );
|
|
318
318
|
|
|
319
|
-
Ok((dm,
|
|
319
|
+
Ok((dm, gene_names))
|
|
320
320
|
}
|
|
321
321
|
|
|
322
322
|
// The original input_data function for text files is kept as is
|
|
@@ -330,7 +330,7 @@ fn input_data(
|
|
|
330
330
|
// Build the CSV reader and iterate over each record.
|
|
331
331
|
let mut reader = BGZFReader::new(fs::File::open(filename).unwrap()).unwrap();
|
|
332
332
|
let mut num_lines: usize = 0;
|
|
333
|
-
let mut
|
|
333
|
+
let mut gene_names: Vec<String> = Vec::with_capacity(500);
|
|
334
334
|
|
|
335
335
|
let mut buffer = String::new();
|
|
336
336
|
reader.read_to_string(&mut buffer).unwrap();
|
|
@@ -358,7 +358,7 @@ fn input_data(
|
|
|
358
358
|
} else {
|
|
359
359
|
num_lines += 1;
|
|
360
360
|
//println!("line2:{:?}", line2);
|
|
361
|
-
|
|
361
|
+
gene_names.push(line2[3].to_string());
|
|
362
362
|
for i in &column_numbers {
|
|
363
363
|
let field = line2[*i];
|
|
364
364
|
let num = FromStr::from_str(field);
|
|
@@ -386,7 +386,7 @@ fn input_data(
|
|
|
386
386
|
|
|
387
387
|
let dm = DMatrix::from_row_slice(num_lines, sample_list.len(), &input_vector);
|
|
388
388
|
//println!("dm:{:?}", dm);
|
|
389
|
-
(dm,
|
|
389
|
+
(dm, gene_names)
|
|
390
390
|
}
|
|
391
391
|
|
|
392
392
|
#[allow(dead_code)]
|
|
@@ -398,7 +398,7 @@ struct GeneInfo {
|
|
|
398
398
|
|
|
399
399
|
fn calculate_variance(
|
|
400
400
|
input_matrix: Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>,
|
|
401
|
-
|
|
401
|
+
gene_names: Vec<String>,
|
|
402
402
|
mut min_sample_size: f64,
|
|
403
403
|
filter_extreme_values: bool,
|
|
404
404
|
rank_type: String,
|
|
@@ -478,12 +478,12 @@ fn calculate_variance(
|
|
|
478
478
|
{
|
|
479
479
|
gene_infos.push(GeneInfo {
|
|
480
480
|
rank_type: gene_counts.variance(),
|
|
481
|
-
gene_symbol:
|
|
481
|
+
gene_symbol: gene_names[row].clone(),
|
|
482
482
|
});
|
|
483
483
|
} else if filter_extreme_values == false {
|
|
484
484
|
gene_infos.push(GeneInfo {
|
|
485
485
|
rank_type: gene_counts.variance(),
|
|
486
|
-
gene_symbol:
|
|
486
|
+
gene_symbol: gene_names[row].clone(),
|
|
487
487
|
});
|
|
488
488
|
}
|
|
489
489
|
} else {
|
|
@@ -496,12 +496,12 @@ fn calculate_variance(
|
|
|
496
496
|
{
|
|
497
497
|
gene_infos.push(GeneInfo {
|
|
498
498
|
rank_type: gene_counts_data.interquartile_range(),
|
|
499
|
-
gene_symbol:
|
|
499
|
+
gene_symbol: gene_names[row].clone(),
|
|
500
500
|
});
|
|
501
501
|
} else if filter_extreme_values == false {
|
|
502
502
|
gene_infos.push(GeneInfo {
|
|
503
503
|
rank_type: gene_counts_data.interquartile_range(),
|
|
504
|
-
gene_symbol:
|
|
504
|
+
gene_symbol: gene_names[row].clone(),
|
|
505
505
|
});
|
|
506
506
|
}
|
|
507
507
|
}
|
|
@@ -689,7 +689,7 @@ fn main() {
|
|
|
689
689
|
|
|
690
690
|
// Choose the appropriate input function based on file type
|
|
691
691
|
// eprintln!("Reading data from {} file: {}", file_type, file_name);
|
|
692
|
-
let (input_matrix,
|
|
692
|
+
let (input_matrix, gene_names) = if file_type == "hdf5" {
|
|
693
693
|
// eprintln!("Using HDF5 reader function...");
|
|
694
694
|
match input_data_hdf5(&file_name, &samples_list) {
|
|
695
695
|
Ok(result) => {
|
|
@@ -731,11 +731,11 @@ fn main() {
|
|
|
731
731
|
// input_matrix.nrows(),
|
|
732
732
|
// input_matrix.ncols()
|
|
733
733
|
// );
|
|
734
|
-
// eprintln!("Number of gene symbols: {}",
|
|
735
|
-
if !
|
|
734
|
+
// eprintln!("Number of gene symbols: {}", gene_names.len());
|
|
735
|
+
if !gene_names.is_empty() {
|
|
736
736
|
// eprintln!(
|
|
737
737
|
// "First few gene symbols: {:?}",
|
|
738
|
-
// &
|
|
738
|
+
// &gene_names.iter().take(5).collect::<Vec<_>>()
|
|
739
739
|
// );
|
|
740
740
|
}
|
|
741
741
|
|
|
@@ -749,7 +749,7 @@ fn main() {
|
|
|
749
749
|
let gene_infos = match std::panic::catch_unwind(|| {
|
|
750
750
|
calculate_variance(
|
|
751
751
|
input_matrix,
|
|
752
|
-
|
|
752
|
+
gene_names,
|
|
753
753
|
samples_list.len() as f64,
|
|
754
754
|
filter_extreme_values,
|
|
755
755
|
rank_type.to_string(),
|