@sjcrh/proteinpaint-rust 2.84.0 → 2.108.3-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +26 -7
- package/package.json +2 -2
- package/src/DEanalysis.rs +76 -73
- package/src/gdcmaf.rs +114 -30
package/index.js
CHANGED
|
@@ -43,16 +43,22 @@ exports.run_rust = function (binfile, input_data) {
|
|
|
43
43
|
})
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
-
exports.
|
|
46
|
+
exports.stream_rust = function (binfile, input_data, emitJson) {
|
|
47
47
|
const binpath = path.join(__dirname, '/target/release/', binfile)
|
|
48
48
|
const ps = spawn(binpath)
|
|
49
|
+
const stderr = []
|
|
49
50
|
try {
|
|
51
|
+
// from GDC API -> ps.stdin -> ps.stdout -> transformed stream
|
|
50
52
|
Readable.from(input_data).pipe(ps.stdin)
|
|
53
|
+
//reader.on('data', ps.stdout.pipe)
|
|
54
|
+
//reader.on('error', ps.stderr.pipe)
|
|
55
|
+
//return reader
|
|
51
56
|
} catch (error) {
|
|
52
57
|
ps.kill()
|
|
53
58
|
let errmsg = error
|
|
54
|
-
if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
|
|
55
|
-
reject(errmsg)
|
|
59
|
+
//if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
|
|
60
|
+
//reject(errmsg)
|
|
61
|
+
console.log(59, error)
|
|
56
62
|
}
|
|
57
63
|
|
|
58
64
|
const childStream = new Transform({
|
|
@@ -62,11 +68,24 @@ exports.run_rust_stream = function (binfile, input_data) {
|
|
|
62
68
|
}
|
|
63
69
|
})
|
|
64
70
|
ps.stdout.pipe(childStream)
|
|
65
|
-
|
|
66
|
-
|
|
71
|
+
ps.stderr.on('data', data => stderr.push(data))
|
|
72
|
+
ps.on('close', code => { //console.log(72, stderr.length)
|
|
73
|
+
if (stderr.length) {
|
|
74
|
+
// handle rust stderr
|
|
75
|
+
const errors = stderr.join('').trim().split('\n').map(JSON.parse)
|
|
76
|
+
//const errmsg = `!!! stream_rust('${binfile}') stderr: !!!`
|
|
77
|
+
//console.log(errmsg, errors)
|
|
78
|
+
emitJson({errors})
|
|
79
|
+
} else {
|
|
80
|
+
emitJson({ ok: true, status: 'ok', message: 'Processing complete' })
|
|
81
|
+
}
|
|
67
82
|
})
|
|
68
|
-
|
|
69
|
-
|
|
83
|
+
ps.on('error', err => {
|
|
84
|
+
//console.log(74, `stream_rust().on('error')`, err)
|
|
85
|
+
const errors = stderr.join('').trim().split('\n').map(JSON.parse)
|
|
86
|
+
emitJson({errors})
|
|
70
87
|
})
|
|
88
|
+
// below will duplicate ps.on('close') event above
|
|
89
|
+
// childStream.on('end', () => console.log(`-- childStream done --`))
|
|
71
90
|
return childStream
|
|
72
91
|
}
|
package/package.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "2.
|
|
2
|
+
"version": "2.108.3-0",
|
|
3
3
|
"name": "@sjcrh/proteinpaint-rust",
|
|
4
4
|
"description": "Rust-based utilities for proteinpaint",
|
|
5
5
|
"main": "index.js",
|
|
@@ -38,5 +38,5 @@
|
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"tape": "^5.2.2"
|
|
40
40
|
},
|
|
41
|
-
"pp_release_tag": "v2.
|
|
41
|
+
"pp_release_tag": "v2.108.3-0"
|
|
42
42
|
}
|
package/src/DEanalysis.rs
CHANGED
|
@@ -11,7 +11,7 @@ use nalgebra::base::Matrix;
|
|
|
11
11
|
use nalgebra::base::VecStorage;
|
|
12
12
|
use nalgebra::DMatrix;
|
|
13
13
|
use nalgebra::ViewStorage;
|
|
14
|
-
use ndarray::Array1;
|
|
14
|
+
//use ndarray::Array1;
|
|
15
15
|
use ndarray::Array2;
|
|
16
16
|
use ndarray::Dim;
|
|
17
17
|
use serde::{Deserialize, Serialize};
|
|
@@ -25,7 +25,7 @@ use std::io::Read;
|
|
|
25
25
|
use std::str::FromStr;
|
|
26
26
|
use std::sync::{Arc, Mutex}; // Multithreading library
|
|
27
27
|
use std::thread;
|
|
28
|
-
use std::time::Instant;
|
|
28
|
+
//use std::time::Instant;
|
|
29
29
|
//use std::cmp::Ordering;
|
|
30
30
|
//use std::env;
|
|
31
31
|
use std::io;
|
|
@@ -73,43 +73,45 @@ fn input_data_from_HDF5(
|
|
|
73
73
|
Vec<String>,
|
|
74
74
|
) {
|
|
75
75
|
let file = HDF5File::open(&hdf5_filename).unwrap(); // open for reading
|
|
76
|
-
|
|
76
|
+
|
|
77
|
+
//let ds_dim = file.dataset("dims").unwrap(); // open the dataset
|
|
77
78
|
let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
|
|
78
79
|
let mut case_indexes: Vec<usize> = Vec::with_capacity(case_list.len());
|
|
79
80
|
let mut control_indexes: Vec<usize> = Vec::with_capacity(control_list.len());
|
|
80
81
|
// Check the data type and read the dataset accordingly
|
|
81
|
-
let data_dim: Array1<_> = ds_dim.read::<usize, Dim<[usize; 1]>>().unwrap();
|
|
82
|
-
let num_samples = data_dim[0]; // Number of total columns in the dataset
|
|
83
|
-
let num_genes = data_dim[1]; // Number of total rows in the dataset
|
|
84
|
-
|
|
85
|
-
println!("
|
|
82
|
+
//let data_dim: Array1<_> = ds_dim.read::<usize, Dim<[usize; 1]>>().unwrap();
|
|
83
|
+
//let num_samples = data_dim[0]; // Number of total columns in the dataset
|
|
84
|
+
//let num_genes = data_dim[1]; // Number of total rows in the dataset
|
|
85
|
+
|
|
86
|
+
//println!("num_samples bulk:{}", num_samples);
|
|
87
|
+
//println!("num_genes bulk:{}", num_genes);
|
|
86
88
|
|
|
87
|
-
let now_gene_names = Instant::now();
|
|
89
|
+
//let now_gene_names = Instant::now();
|
|
88
90
|
let ds_gene_names = file.dataset("gene_names").unwrap();
|
|
89
|
-
println!("ds_gene_names:{:?}", ds_gene_names);
|
|
91
|
+
//println!("ds_gene_names:{:?}", ds_gene_names);
|
|
90
92
|
let gene_names = ds_gene_names
|
|
91
93
|
.read::<VarLenAscii, Dim<[usize; 1]>>()
|
|
92
94
|
.unwrap();
|
|
93
|
-
println!("\tgene_names = {:?}", gene_names);
|
|
94
|
-
println!("\tgene_names.shape() = {:?}", gene_names.shape());
|
|
95
|
-
println!("\tgene_names.strides() = {:?}", gene_names.strides());
|
|
96
|
-
println!("\tgene_names.ndim() = {:?}", gene_names.ndim());
|
|
97
|
-
println!("Time for parsing gene names:{:?}", now_gene_names.elapsed());
|
|
95
|
+
//println!("\tgene_names = {:?}", gene_names);
|
|
96
|
+
//println!("\tgene_names.shape() = {:?}", gene_names.shape());
|
|
97
|
+
//println!("\tgene_names.strides() = {:?}", gene_names.strides());
|
|
98
|
+
//println!("\tgene_names.ndim() = {:?}", gene_names.ndim());
|
|
99
|
+
//println!("Time for parsing gene names:{:?}", now_gene_names.elapsed());
|
|
98
100
|
|
|
99
|
-
let now_gene_symbols = Instant::now();
|
|
101
|
+
//let now_gene_symbols = Instant::now();
|
|
100
102
|
let ds_gene_symbols = file.dataset("gene_symbols").unwrap();
|
|
101
|
-
println!("ds_gene_symbols:{:?}", ds_gene_symbols);
|
|
103
|
+
//println!("ds_gene_symbols:{:?}", ds_gene_symbols);
|
|
102
104
|
let gene_symbols = ds_gene_symbols
|
|
103
105
|
.read::<VarLenAscii, Dim<[usize; 1]>>()
|
|
104
106
|
.unwrap();
|
|
105
|
-
println!("\tgene_symbols = {:?}", gene_symbols);
|
|
106
|
-
println!("\tgene_symbols.shape() = {:?}", gene_symbols.shape());
|
|
107
|
-
println!("\tgene_symbols.strides() = {:?}", gene_symbols.strides());
|
|
108
|
-
println!("\tgene_symbols.ndim() = {:?}", gene_symbols.ndim());
|
|
109
|
-
println!(
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
);
|
|
107
|
+
//println!("\tgene_symbols = {:?}", gene_symbols);
|
|
108
|
+
//println!("\tgene_symbols.shape() = {:?}", gene_symbols.shape());
|
|
109
|
+
//println!("\tgene_symbols.strides() = {:?}", gene_symbols.strides());
|
|
110
|
+
//println!("\tgene_symbols.ndim() = {:?}", gene_symbols.ndim());
|
|
111
|
+
//println!(
|
|
112
|
+
// "Time for parsing gene symbols:{:?}",
|
|
113
|
+
// now_gene_symbols.elapsed()
|
|
114
|
+
//);
|
|
113
115
|
|
|
114
116
|
let mut gene_names_string: Vec<String> = Vec::with_capacity(gene_names.len());
|
|
115
117
|
let mut gene_symbols_string: Vec<String> = Vec::with_capacity(gene_symbols.len());
|
|
@@ -118,17 +120,17 @@ fn input_data_from_HDF5(
|
|
|
118
120
|
gene_symbols_string.push(gene_symbols[i].to_string());
|
|
119
121
|
}
|
|
120
122
|
|
|
121
|
-
let now_samples = Instant::now();
|
|
123
|
+
//let now_samples = Instant::now();
|
|
122
124
|
let ds_samples = file.dataset("samples").unwrap();
|
|
123
125
|
let samples = ds_samples.read::<VarLenAscii, Dim<[usize; 1]>>().unwrap();
|
|
124
|
-
println!("\tsamples = {:?}", samples);
|
|
125
|
-
println!("\tsamples.shape() = {:?}", samples.shape());
|
|
126
|
-
println!("\tsamples.strides() = {:?}", samples.strides());
|
|
127
|
-
println!("\tsamples.ndim() = {:?}", samples.ndim());
|
|
128
|
-
println!("Time for parsing samples:{:?}", now_samples.elapsed());
|
|
126
|
+
//println!("\tsamples = {:?}", samples);
|
|
127
|
+
//println!("\tsamples.shape() = {:?}", samples.shape());
|
|
128
|
+
//println!("\tsamples.strides() = {:?}", samples.strides());
|
|
129
|
+
//println!("\tsamples.ndim() = {:?}", samples.ndim());
|
|
130
|
+
//println!("Time for parsing samples:{:?}", now_samples.elapsed());
|
|
129
131
|
|
|
130
132
|
//Find all columns values that are populated for the given gene
|
|
131
|
-
let now_counts = Instant::now();
|
|
133
|
+
//let now_counts = Instant::now();
|
|
132
134
|
let ds_counts = file.dataset("counts").unwrap(); // open the dataset
|
|
133
135
|
|
|
134
136
|
let mut global_sample_index = 0;
|
|
@@ -189,7 +191,7 @@ fn input_data_from_HDF5(
|
|
|
189
191
|
global_sample_index += 1;
|
|
190
192
|
}
|
|
191
193
|
|
|
192
|
-
println!("Time for parsing HDF5 data:{:?}", now_counts.elapsed());
|
|
194
|
+
//println!("Time for parsing HDF5 data:{:?}", now_counts.elapsed());
|
|
193
195
|
//println!(
|
|
194
196
|
// "case + control length:{}",
|
|
195
197
|
// case_list.len() + control_list.len()
|
|
@@ -221,7 +223,7 @@ fn input_data_from_text(
|
|
|
221
223
|
Vec<String>,
|
|
222
224
|
Vec<String>,
|
|
223
225
|
) {
|
|
224
|
-
let input_time = Instant::now();
|
|
226
|
+
//let input_time = Instant::now();
|
|
225
227
|
let mut file = File::open(filename).unwrap();
|
|
226
228
|
let mut num_lines: usize = 0;
|
|
227
229
|
let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
|
|
@@ -350,7 +352,7 @@ fn input_data_from_text(
|
|
|
350
352
|
let genes_symbols_temp = Arc::new(Mutex::new(Vec::<String>::new()));
|
|
351
353
|
let input_vector_temp = Arc::new(Mutex::new(Vec::<f64>::new()));
|
|
352
354
|
let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
|
|
353
|
-
|
|
355
|
+
//println!("Number of threads used:{}", max_threads);
|
|
354
356
|
for thread_num in 0..max_threads {
|
|
355
357
|
let case_indexes_original = Arc::clone(&case_indexes_original);
|
|
356
358
|
let control_indexes_original = Arc::clone(&control_indexes_original);
|
|
@@ -485,7 +487,7 @@ fn input_data_from_text(
|
|
|
485
487
|
//println!("num_columns:{}", num_columns);
|
|
486
488
|
//println!("num_lines * num_columns:{}", num_lines * num_columns);
|
|
487
489
|
//println!("input_vector:{:?}", input_vector.len());
|
|
488
|
-
println!("Time for inputting data:{:?}", input_time.elapsed());
|
|
490
|
+
//println!("Time for inputting data:{:?}", input_time.elapsed());
|
|
489
491
|
let dm = DMatrix::from_row_slice(num_lines, num_columns, &input_vector);
|
|
490
492
|
//println!("dm:{:?}", dm);
|
|
491
493
|
(dm, case_indexes, control_indexes, gene_names, gene_symbols)
|
|
@@ -513,14 +515,15 @@ struct PValueIndexes {
|
|
|
513
515
|
// Used to get the sample names from HDF5 file at PP server startup
|
|
514
516
|
fn get_DE_samples(hdf5_filename: &String) {
|
|
515
517
|
let file = HDF5File::open(&hdf5_filename).unwrap(); // open for reading
|
|
516
|
-
|
|
518
|
+
|
|
519
|
+
//let now_samples = Instant::now();
|
|
517
520
|
let ds_samples = file.dataset("samples").unwrap();
|
|
518
521
|
let samples = ds_samples.read::<VarLenAscii, Dim<[usize; 1]>>().unwrap();
|
|
519
|
-
println!("\tsamples = {:?}", samples);
|
|
520
|
-
println!("\tsamples.shape() = {:?}", samples.shape());
|
|
521
|
-
println!("\tsamples.strides() = {:?}", samples.strides());
|
|
522
|
-
println!("\tsamples.ndim() = {:?}", samples.ndim());
|
|
523
|
-
println!("Time for parsing samples:{:?}", now_samples.elapsed());
|
|
522
|
+
//println!("\tsamples = {:?}", samples);
|
|
523
|
+
//println!("\tsamples.shape() = {:?}", samples.shape());
|
|
524
|
+
//println!("\tsamples.strides() = {:?}", samples.strides());
|
|
525
|
+
//println!("\tsamples.ndim() = {:?}", samples.ndim());
|
|
526
|
+
//println!("Time for parsing samples:{:?}", now_samples.elapsed());
|
|
524
527
|
|
|
525
528
|
let mut output_string = "".to_string();
|
|
526
529
|
for i in 0..samples.len() {
|
|
@@ -544,7 +547,7 @@ fn get_DE_samples(hdf5_filename: &String) {
|
|
|
544
547
|
output_string += &",";
|
|
545
548
|
}
|
|
546
549
|
}
|
|
547
|
-
println!("
|
|
550
|
+
println!("{}", output_string);
|
|
548
551
|
}
|
|
549
552
|
|
|
550
553
|
fn main() {
|
|
@@ -559,7 +562,7 @@ fn main() {
|
|
|
559
562
|
let input_json = json::parse(&input);
|
|
560
563
|
match input_json {
|
|
561
564
|
Ok(json_string) => {
|
|
562
|
-
let now = Instant::now();
|
|
565
|
+
//let now = Instant::now();
|
|
563
566
|
let file_name = &json_string["input_file"]
|
|
564
567
|
.to_owned()
|
|
565
568
|
.as_str()
|
|
@@ -567,7 +570,7 @@ fn main() {
|
|
|
567
570
|
.to_string()
|
|
568
571
|
.split(",")
|
|
569
572
|
.collect();
|
|
570
|
-
println!("file_name:{}", file_name);
|
|
573
|
+
//println!("file_name:{}", file_name);
|
|
571
574
|
let data_type_option = json_string["data_type"].as_str().to_owned();
|
|
572
575
|
match data_type_option {
|
|
573
576
|
Some(x) => {
|
|
@@ -643,7 +646,7 @@ fn main() {
|
|
|
643
646
|
gene_symbols,
|
|
644
647
|
) = input_data_from_HDF5(file_name, &case_list, &control_list);
|
|
645
648
|
}
|
|
646
|
-
let filtering_time = Instant::now();
|
|
649
|
+
//let filtering_time = Instant::now();
|
|
647
650
|
let (
|
|
648
651
|
filtered_matrix,
|
|
649
652
|
lib_sizes,
|
|
@@ -658,21 +661,21 @@ fn main() {
|
|
|
658
661
|
gene_names,
|
|
659
662
|
gene_symbols,
|
|
660
663
|
);
|
|
661
|
-
println!("filtering time:{:?}", filtering_time.elapsed());
|
|
664
|
+
//println!("filtering time:{:?}", filtering_time.elapsed());
|
|
662
665
|
//println!("filtered_matrix_rows:{:?}", filtered_matrix.nrows());
|
|
663
666
|
//println!("filtered_matrix_cols:{:?}", filtered_matrix.ncols());
|
|
664
|
-
let cpm_normalization_time = Instant::now();
|
|
667
|
+
//let cpm_normalization_time = Instant::now();
|
|
665
668
|
let mut normalized_matrix = cpm(&filtered_matrix);
|
|
666
|
-
println!(
|
|
667
|
-
|
|
668
|
-
|
|
669
|
-
);
|
|
670
|
-
let tmm_normalization_time = Instant::now();
|
|
669
|
+
//println!(
|
|
670
|
+
// "cpm normalization time:{:?}",
|
|
671
|
+
// cpm_normalization_time.elapsed()
|
|
672
|
+
//);
|
|
673
|
+
//let tmm_normalization_time = Instant::now();
|
|
671
674
|
let norm_factors = tmm_normalization(filtered_matrix, &lib_sizes);
|
|
672
|
-
println!(
|
|
673
|
-
|
|
674
|
-
|
|
675
|
-
);
|
|
675
|
+
//println!(
|
|
676
|
+
// "tmm normalization time:{:?}",
|
|
677
|
+
// tmm_normalization_time.elapsed()
|
|
678
|
+
//);
|
|
676
679
|
//println!("norm_factors:{:?}", norm_factors);
|
|
677
680
|
|
|
678
681
|
for col in 0..normalized_matrix.ncols() {
|
|
@@ -683,19 +686,19 @@ fn main() {
|
|
|
683
686
|
}
|
|
684
687
|
}
|
|
685
688
|
//println!("normalized_matrix:{:?}", normalized_matrix);
|
|
686
|
-
println!("Number of cases:{}", case_list.len());
|
|
687
|
-
println!("Number of controls:{}", control_list.len());
|
|
688
|
-
println!("Time for pre-processing:{:?}", now.elapsed());
|
|
689
|
+
//println!("Number of cases:{}", case_list.len());
|
|
690
|
+
//println!("Number of controls:{}", control_list.len());
|
|
691
|
+
//println!("Time for pre-processing:{:?}", now.elapsed());
|
|
689
692
|
// Using Wilcoxon test for differential gene expression
|
|
690
693
|
|
|
691
|
-
let now2 = Instant::now();
|
|
694
|
+
//let now2 = Instant::now();
|
|
692
695
|
let mut p_values: Vec<PValueIndexes> =
|
|
693
696
|
Vec::with_capacity(normalized_matrix.nrows());
|
|
694
697
|
const THRESHOLD: usize = 50; // This determines whether the Wilcoxon exact test or the normal test will be used based on sample size.
|
|
695
698
|
|
|
696
699
|
//println!("case_indexes:{:?}", case_indexes);
|
|
697
700
|
//println!("control_indexes:{:?}", control_indexes);
|
|
698
|
-
let num_normalized_rows = normalized_matrix.nrows();
|
|
701
|
+
//let num_normalized_rows = normalized_matrix.nrows();
|
|
699
702
|
if normalized_matrix.nrows() * normalized_matrix.ncols()
|
|
700
703
|
< PAR_CUTOFF
|
|
701
704
|
{
|
|
@@ -857,13 +860,13 @@ fn main() {
|
|
|
857
860
|
p_values.append(&mut *p_values_temp.lock().unwrap());
|
|
858
861
|
}
|
|
859
862
|
//println!("p_values:{:?}", p_values);
|
|
860
|
-
println!(
|
|
861
|
-
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
);
|
|
863
|
+
//println!(
|
|
864
|
+
// "Time for running {} wilcoxon tests:{:?}",
|
|
865
|
+
// num_normalized_rows,
|
|
866
|
+
// now2.elapsed()
|
|
867
|
+
//);
|
|
865
868
|
let adjusted_p_values = adjust_p_values(p_values);
|
|
866
|
-
println!("
|
|
869
|
+
println!("{}", adjusted_p_values);
|
|
867
870
|
//let fold_changes =
|
|
868
871
|
// calculate_fold_change(normalized_matrix, case_indexes, control_indexes);
|
|
869
872
|
}
|
|
@@ -873,10 +876,10 @@ fn main() {
|
|
|
873
876
|
}
|
|
874
877
|
}
|
|
875
878
|
}
|
|
876
|
-
Err(error) =>
|
|
879
|
+
Err(error) => panic!("Incorrect json: {}", error),
|
|
877
880
|
}
|
|
878
881
|
}
|
|
879
|
-
Err(error) =>
|
|
882
|
+
Err(error) => panic!("Piping error: {}", error),
|
|
880
883
|
}
|
|
881
884
|
}
|
|
882
885
|
|
|
@@ -1321,7 +1324,7 @@ fn filter_by_expr(
|
|
|
1321
1324
|
positives.push(row);
|
|
1322
1325
|
}
|
|
1323
1326
|
}
|
|
1324
|
-
println!("positives length:{}", positives.len());
|
|
1327
|
+
//println!("positives length:{}", positives.len());
|
|
1325
1328
|
//println!("row_sums:{:?}", row_sums);
|
|
1326
1329
|
//println!("keep_cpm:{:?}", keep_cpm);
|
|
1327
1330
|
//println!("positive_cpm:{}", positive_cpm);
|
|
@@ -1337,8 +1340,8 @@ fn filter_by_expr(
|
|
|
1337
1340
|
let mut filtered_genes: Vec<String> = Vec::with_capacity(positives.len());
|
|
1338
1341
|
let mut filtered_gene_symbols: Vec<String> = Vec::with_capacity(positives.len());
|
|
1339
1342
|
let mut i = 0;
|
|
1340
|
-
println!("filtered_matrix rows:{}", filtered_matrix.nrows());
|
|
1341
|
-
println!("filtered_matrix cols:{}", filtered_matrix.ncols());
|
|
1343
|
+
//println!("filtered_matrix rows:{}", filtered_matrix.nrows());
|
|
1344
|
+
//println!("filtered_matrix cols:{}", filtered_matrix.ncols());
|
|
1342
1345
|
for index in positives {
|
|
1343
1346
|
let row = raw_data.row(index);
|
|
1344
1347
|
filtered_genes.push(gene_names[index].to_owned());
|
package/src/gdcmaf.rs
CHANGED
|
@@ -19,21 +19,32 @@ use futures::StreamExt;
|
|
|
19
19
|
use std::io::{self,Read,Write};
|
|
20
20
|
|
|
21
21
|
|
|
22
|
+
// Struct to hold error information
|
|
23
|
+
#[derive(serde::Serialize)]
|
|
24
|
+
struct ErrorEntry {
|
|
25
|
+
url: String,
|
|
26
|
+
error: String,
|
|
27
|
+
}
|
|
22
28
|
|
|
23
|
-
fn select_maf_col(d:String,columns:&Vec<String
|
|
29
|
+
fn select_maf_col(d:String,columns:&Vec<String>,url:&str) -> Result<(Vec<u8>,i32), (String, String)> {
|
|
24
30
|
let mut maf_str: String = String::new();
|
|
25
31
|
let mut header_indices: Vec<usize> = Vec::new();
|
|
26
32
|
let lines = d.trim_end().split("\n");
|
|
33
|
+
let mut mafrows = 0;
|
|
27
34
|
for line in lines {
|
|
28
35
|
if line.starts_with("#") {
|
|
29
36
|
continue
|
|
30
37
|
} else if line.contains("Hugo_Symbol") {
|
|
31
38
|
let header: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
|
|
32
39
|
for col in columns {
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
40
|
+
match header.iter().position(|x| x == col) {
|
|
41
|
+
Some(index) => {
|
|
42
|
+
header_indices.push(index);
|
|
43
|
+
}
|
|
44
|
+
None => {
|
|
45
|
+
let error_msg = format!("Column {} was not found", col);
|
|
46
|
+
return Err((url.to_string(), error_msg));
|
|
47
|
+
}
|
|
37
48
|
}
|
|
38
49
|
}
|
|
39
50
|
} else {
|
|
@@ -44,12 +55,14 @@ fn select_maf_col(d:String,columns:&Vec<String>) -> Vec<u8> {
|
|
|
44
55
|
};
|
|
45
56
|
maf_str.push_str(maf_out_lst.join("\t").as_str());
|
|
46
57
|
maf_str.push_str("\n");
|
|
58
|
+
mafrows += 1;
|
|
47
59
|
}
|
|
48
60
|
};
|
|
49
|
-
maf_str.as_bytes().to_vec()
|
|
61
|
+
Ok((maf_str.as_bytes().to_vec(),mafrows))
|
|
50
62
|
}
|
|
51
63
|
|
|
52
64
|
|
|
65
|
+
|
|
53
66
|
#[tokio::main]
|
|
54
67
|
async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
55
68
|
// Accepting the piped input json from jodejs and assign to the variable
|
|
@@ -57,6 +70,8 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
57
70
|
// url: urls to download single maf files
|
|
58
71
|
let mut buffer = String::new();
|
|
59
72
|
io::stdin().read_line(&mut buffer)?;
|
|
73
|
+
|
|
74
|
+
// reading the input from PP
|
|
60
75
|
let file_id_lst_js = serde_json::from_str::<Value>(&buffer).expect("Error reading input and serializing to JSON");
|
|
61
76
|
let host = file_id_lst_js.get("host").expect("Host was not provided").as_str().expect("Host is not a string");
|
|
62
77
|
let mut url: Vec<String> = Vec::new();
|
|
@@ -75,49 +90,118 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
75
90
|
.map(|v| v.to_string().replace("\"",""))
|
|
76
91
|
.collect::<Vec<String>>();
|
|
77
92
|
} else {
|
|
78
|
-
|
|
93
|
+
let column_error = ErrorEntry {
|
|
94
|
+
url: String::new(),
|
|
95
|
+
error: "The columns in arg is not an array".to_string(),
|
|
96
|
+
};
|
|
97
|
+
let column_error_js = serde_json::to_string(&column_error).unwrap();
|
|
98
|
+
writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
|
|
99
|
+
return Err(Box::new(std::io::Error::new(
|
|
100
|
+
std::io::ErrorKind::InvalidInput,
|
|
101
|
+
"The columns in arg is not an array",
|
|
102
|
+
)) as Box<dyn std::error::Error>);
|
|
79
103
|
}
|
|
80
104
|
} else {
|
|
81
|
-
|
|
105
|
+
let column_error = ErrorEntry {
|
|
106
|
+
url: String::new(),
|
|
107
|
+
error: "Columns was not selected".to_string(),
|
|
108
|
+
};
|
|
109
|
+
let column_error_js = serde_json::to_string(&column_error).unwrap();
|
|
110
|
+
writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
|
|
111
|
+
return Err(Box::new(std::io::Error::new(
|
|
112
|
+
std::io::ErrorKind::InvalidInput,
|
|
113
|
+
"Columns was not selected",
|
|
114
|
+
)) as Box<dyn std::error::Error>);
|
|
82
115
|
};
|
|
83
116
|
|
|
84
117
|
//downloading maf files parallelly and merge them into single maf file
|
|
85
118
|
let download_futures = futures::stream::iter(
|
|
86
119
|
url.into_iter().map(|url|{
|
|
87
120
|
async move {
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
121
|
+
match reqwest::get(&url).await {
|
|
122
|
+
Ok(resp) if resp.status().is_success() => {
|
|
123
|
+
match resp.bytes().await {
|
|
124
|
+
Ok(content) => {
|
|
125
|
+
let mut decoder = GzDecoder::new(&content[..]);
|
|
126
|
+
let mut decompressed_content = Vec::new();
|
|
127
|
+
match decoder.read_to_end(&mut decompressed_content) {
|
|
128
|
+
Ok(_) => {
|
|
129
|
+
let text = String::from_utf8_lossy(&decompressed_content).to_string();
|
|
130
|
+
return Ok((url.clone(),text))
|
|
131
|
+
}
|
|
132
|
+
Err(e) => {
|
|
133
|
+
let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
|
|
134
|
+
Err((url.clone(), error_msg))
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
}
|
|
138
|
+
Err(e) => {
|
|
139
|
+
let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
|
|
140
|
+
Err((url.clone(), error_msg))
|
|
141
|
+
}
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
Ok(resp) => {
|
|
145
|
+
let error_msg = format!("HTTP error: {}", resp.status());
|
|
146
|
+
Err((url.clone(), error_msg))
|
|
147
|
+
}
|
|
148
|
+
Err(e) => {
|
|
149
|
+
let error_msg = format!("Server request failed: {}", e);
|
|
150
|
+
Err((url.clone(), error_msg))
|
|
100
151
|
}
|
|
101
|
-
} else {
|
|
102
|
-
let error_msg = "Failed to download: ".to_string() + &url;
|
|
103
|
-
error_msg
|
|
104
152
|
}
|
|
105
153
|
}
|
|
106
154
|
})
|
|
107
155
|
);
|
|
108
156
|
|
|
109
|
-
// output
|
|
157
|
+
// binary output
|
|
110
158
|
let mut encoder = GzEncoder::new(io::stdout(), Compression::default());
|
|
111
159
|
let _ = encoder.write_all(&maf_col.join("\t").as_bytes().to_vec()).expect("Failed to write header");
|
|
112
160
|
let _ = encoder.write_all(b"\n").expect("Failed to write newline");
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
161
|
+
|
|
162
|
+
download_futures.buffer_unordered(20).for_each(|result| {
|
|
163
|
+
match result {
|
|
164
|
+
Ok((url, content)) => {
|
|
165
|
+
match select_maf_col(content, &maf_col, &url) {
|
|
166
|
+
Ok((maf_bit,mafrows)) => {
|
|
167
|
+
if mafrows > 0 {
|
|
168
|
+
encoder.write_all(&maf_bit).expect("Failed to write file");
|
|
169
|
+
} else {
|
|
170
|
+
let error = ErrorEntry {
|
|
171
|
+
url: url.clone(),
|
|
172
|
+
error: "Empty maf file".to_string(),
|
|
173
|
+
};
|
|
174
|
+
let error_js = serde_json::to_string(&error).unwrap();
|
|
175
|
+
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
176
|
+
}
|
|
177
|
+
}
|
|
178
|
+
Err((url,error)) => {
|
|
179
|
+
let error = ErrorEntry {
|
|
180
|
+
url,
|
|
181
|
+
error,
|
|
182
|
+
};
|
|
183
|
+
let error_js = serde_json::to_string(&error).unwrap();
|
|
184
|
+
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
}
|
|
188
|
+
Err((url, error)) => {
|
|
189
|
+
let error = ErrorEntry {
|
|
190
|
+
url,
|
|
191
|
+
error,
|
|
192
|
+
};
|
|
193
|
+
let error_js = serde_json::to_string(&error).unwrap();
|
|
194
|
+
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
195
|
+
}
|
|
119
196
|
};
|
|
120
197
|
async {}
|
|
121
198
|
}).await;
|
|
199
|
+
|
|
200
|
+
// Finalize output and printing errors
|
|
201
|
+
encoder.finish().expect("Maf file output error!");
|
|
202
|
+
// Manually flush stdout and stderr
|
|
203
|
+
io::stdout().flush().expect("Failed to flush stdout");
|
|
204
|
+
io::stderr().flush().expect("Failed to flush stderr");
|
|
205
|
+
|
|
122
206
|
Ok(())
|
|
123
207
|
}
|