@sjcrh/proteinpaint-rust 2.84.0 → 2.108.3-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js CHANGED
@@ -43,16 +43,22 @@ exports.run_rust = function (binfile, input_data) {
43
43
  })
44
44
  }
45
45
 
46
- exports.run_rust_stream = function (binfile, input_data) {
46
+ exports.stream_rust = function (binfile, input_data, emitJson) {
47
47
  const binpath = path.join(__dirname, '/target/release/', binfile)
48
48
  const ps = spawn(binpath)
49
+ const stderr = []
49
50
  try {
51
+ // from GDC API -> ps.stdin -> ps.stdout -> transformed stream
50
52
  Readable.from(input_data).pipe(ps.stdin)
53
+ //reader.on('data', ps.stdout.pipe)
54
+ //reader.on('error', ps.stderr.pipe)
55
+ //return reader
51
56
  } catch (error) {
52
57
  ps.kill()
53
58
  let errmsg = error
54
- if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
55
- reject(errmsg)
59
+ //if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
60
+ //reject(errmsg)
61
+ console.log(59, error)
56
62
  }
57
63
 
58
64
  const childStream = new Transform({
@@ -62,11 +68,24 @@ exports.run_rust_stream = function (binfile, input_data) {
62
68
  }
63
69
  })
64
70
  ps.stdout.pipe(childStream)
65
- childStream.on('error', err => {
66
- reject(err)
71
+ ps.stderr.on('data', data => stderr.push(data))
72
+ ps.on('close', code => { //console.log(72, stderr.length)
73
+ if (stderr.length) {
74
+ // handle rust stderr
75
+ const errors = stderr.join('').trim().split('\n').map(JSON.parse)
76
+ //const errmsg = `!!! stream_rust('${binfile}') stderr: !!!`
77
+ //console.log(errmsg, errors)
78
+ emitJson({errors})
79
+ } else {
80
+ emitJson({ ok: true, status: 'ok', message: 'Processing complete' })
81
+ }
67
82
  })
68
- childStream.on('close', code => {
69
- childStream.end()
83
+ ps.on('error', err => {
84
+ //console.log(74, `stream_rust().on('error')`, err)
85
+ const errors = stderr.join('').trim().split('\n').map(JSON.parse)
86
+ emitJson({errors})
70
87
  })
88
+ // below will duplicate ps.on('close') event above
89
+ // childStream.on('end', () => console.log(`-- childStream done --`))
71
90
  return childStream
72
91
  }
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.84.0",
2
+ "version": "2.108.3-0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
@@ -38,5 +38,5 @@
38
38
  "devDependencies": {
39
39
  "tape": "^5.2.2"
40
40
  },
41
- "pp_release_tag": "v2.84.0"
41
+ "pp_release_tag": "v2.108.3-0"
42
42
  }
package/src/DEanalysis.rs CHANGED
@@ -11,7 +11,7 @@ use nalgebra::base::Matrix;
11
11
  use nalgebra::base::VecStorage;
12
12
  use nalgebra::DMatrix;
13
13
  use nalgebra::ViewStorage;
14
- use ndarray::Array1;
14
+ //use ndarray::Array1;
15
15
  use ndarray::Array2;
16
16
  use ndarray::Dim;
17
17
  use serde::{Deserialize, Serialize};
@@ -25,7 +25,7 @@ use std::io::Read;
25
25
  use std::str::FromStr;
26
26
  use std::sync::{Arc, Mutex}; // Multithreading library
27
27
  use std::thread;
28
- use std::time::Instant;
28
+ //use std::time::Instant;
29
29
  //use std::cmp::Ordering;
30
30
  //use std::env;
31
31
  use std::io;
@@ -73,43 +73,45 @@ fn input_data_from_HDF5(
73
73
  Vec<String>,
74
74
  ) {
75
75
  let file = HDF5File::open(&hdf5_filename).unwrap(); // open for reading
76
- let ds_dim = file.dataset("dims").unwrap(); // open the dataset
76
+
77
+ //let ds_dim = file.dataset("dims").unwrap(); // open the dataset
77
78
  let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
78
79
  let mut case_indexes: Vec<usize> = Vec::with_capacity(case_list.len());
79
80
  let mut control_indexes: Vec<usize> = Vec::with_capacity(control_list.len());
80
81
  // Check the data type and read the dataset accordingly
81
- let data_dim: Array1<_> = ds_dim.read::<usize, Dim<[usize; 1]>>().unwrap();
82
- let num_samples = data_dim[0]; // Number of total columns in the dataset
83
- let num_genes = data_dim[1]; // Number of total rows in the dataset
84
- println!("num_samples bulk:{}", num_samples);
85
- println!("num_genes bulk:{}", num_genes);
82
+ //let data_dim: Array1<_> = ds_dim.read::<usize, Dim<[usize; 1]>>().unwrap();
83
+ //let num_samples = data_dim[0]; // Number of total columns in the dataset
84
+ //let num_genes = data_dim[1]; // Number of total rows in the dataset
85
+
86
+ //println!("num_samples bulk:{}", num_samples);
87
+ //println!("num_genes bulk:{}", num_genes);
86
88
 
87
- let now_gene_names = Instant::now();
89
+ //let now_gene_names = Instant::now();
88
90
  let ds_gene_names = file.dataset("gene_names").unwrap();
89
- println!("ds_gene_names:{:?}", ds_gene_names);
91
+ //println!("ds_gene_names:{:?}", ds_gene_names);
90
92
  let gene_names = ds_gene_names
91
93
  .read::<VarLenAscii, Dim<[usize; 1]>>()
92
94
  .unwrap();
93
- println!("\tgene_names = {:?}", gene_names);
94
- println!("\tgene_names.shape() = {:?}", gene_names.shape());
95
- println!("\tgene_names.strides() = {:?}", gene_names.strides());
96
- println!("\tgene_names.ndim() = {:?}", gene_names.ndim());
97
- println!("Time for parsing gene names:{:?}", now_gene_names.elapsed());
95
+ //println!("\tgene_names = {:?}", gene_names);
96
+ //println!("\tgene_names.shape() = {:?}", gene_names.shape());
97
+ //println!("\tgene_names.strides() = {:?}", gene_names.strides());
98
+ //println!("\tgene_names.ndim() = {:?}", gene_names.ndim());
99
+ //println!("Time for parsing gene names:{:?}", now_gene_names.elapsed());
98
100
 
99
- let now_gene_symbols = Instant::now();
101
+ //let now_gene_symbols = Instant::now();
100
102
  let ds_gene_symbols = file.dataset("gene_symbols").unwrap();
101
- println!("ds_gene_symbols:{:?}", ds_gene_symbols);
103
+ //println!("ds_gene_symbols:{:?}", ds_gene_symbols);
102
104
  let gene_symbols = ds_gene_symbols
103
105
  .read::<VarLenAscii, Dim<[usize; 1]>>()
104
106
  .unwrap();
105
- println!("\tgene_symbols = {:?}", gene_symbols);
106
- println!("\tgene_symbols.shape() = {:?}", gene_symbols.shape());
107
- println!("\tgene_symbols.strides() = {:?}", gene_symbols.strides());
108
- println!("\tgene_symbols.ndim() = {:?}", gene_symbols.ndim());
109
- println!(
110
- "Time for parsing gene symbols:{:?}",
111
- now_gene_symbols.elapsed()
112
- );
107
+ //println!("\tgene_symbols = {:?}", gene_symbols);
108
+ //println!("\tgene_symbols.shape() = {:?}", gene_symbols.shape());
109
+ //println!("\tgene_symbols.strides() = {:?}", gene_symbols.strides());
110
+ //println!("\tgene_symbols.ndim() = {:?}", gene_symbols.ndim());
111
+ //println!(
112
+ // "Time for parsing gene symbols:{:?}",
113
+ // now_gene_symbols.elapsed()
114
+ //);
113
115
 
114
116
  let mut gene_names_string: Vec<String> = Vec::with_capacity(gene_names.len());
115
117
  let mut gene_symbols_string: Vec<String> = Vec::with_capacity(gene_symbols.len());
@@ -118,17 +120,17 @@ fn input_data_from_HDF5(
118
120
  gene_symbols_string.push(gene_symbols[i].to_string());
119
121
  }
120
122
 
121
- let now_samples = Instant::now();
123
+ //let now_samples = Instant::now();
122
124
  let ds_samples = file.dataset("samples").unwrap();
123
125
  let samples = ds_samples.read::<VarLenAscii, Dim<[usize; 1]>>().unwrap();
124
- println!("\tsamples = {:?}", samples);
125
- println!("\tsamples.shape() = {:?}", samples.shape());
126
- println!("\tsamples.strides() = {:?}", samples.strides());
127
- println!("\tsamples.ndim() = {:?}", samples.ndim());
128
- println!("Time for parsing samples:{:?}", now_samples.elapsed());
126
+ //println!("\tsamples = {:?}", samples);
127
+ //println!("\tsamples.shape() = {:?}", samples.shape());
128
+ //println!("\tsamples.strides() = {:?}", samples.strides());
129
+ //println!("\tsamples.ndim() = {:?}", samples.ndim());
130
+ //println!("Time for parsing samples:{:?}", now_samples.elapsed());
129
131
 
130
132
  //Find all columns values that are populated for the given gene
131
- let now_counts = Instant::now();
133
+ //let now_counts = Instant::now();
132
134
  let ds_counts = file.dataset("counts").unwrap(); // open the dataset
133
135
 
134
136
  let mut global_sample_index = 0;
@@ -189,7 +191,7 @@ fn input_data_from_HDF5(
189
191
  global_sample_index += 1;
190
192
  }
191
193
 
192
- println!("Time for parsing HDF5 data:{:?}", now_counts.elapsed());
194
+ //println!("Time for parsing HDF5 data:{:?}", now_counts.elapsed());
193
195
  //println!(
194
196
  // "case + control length:{}",
195
197
  // case_list.len() + control_list.len()
@@ -221,7 +223,7 @@ fn input_data_from_text(
221
223
  Vec<String>,
222
224
  Vec<String>,
223
225
  ) {
224
- let input_time = Instant::now();
226
+ //let input_time = Instant::now();
225
227
  let mut file = File::open(filename).unwrap();
226
228
  let mut num_lines: usize = 0;
227
229
  let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
@@ -350,7 +352,7 @@ fn input_data_from_text(
350
352
  let genes_symbols_temp = Arc::new(Mutex::new(Vec::<String>::new()));
351
353
  let input_vector_temp = Arc::new(Mutex::new(Vec::<f64>::new()));
352
354
  let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
353
- println!("Number of threads used:{}", max_threads);
355
+ //println!("Number of threads used:{}", max_threads);
354
356
  for thread_num in 0..max_threads {
355
357
  let case_indexes_original = Arc::clone(&case_indexes_original);
356
358
  let control_indexes_original = Arc::clone(&control_indexes_original);
@@ -485,7 +487,7 @@ fn input_data_from_text(
485
487
  //println!("num_columns:{}", num_columns);
486
488
  //println!("num_lines * num_columns:{}", num_lines * num_columns);
487
489
  //println!("input_vector:{:?}", input_vector.len());
488
- println!("Time for inputting data:{:?}", input_time.elapsed());
490
+ //println!("Time for inputting data:{:?}", input_time.elapsed());
489
491
  let dm = DMatrix::from_row_slice(num_lines, num_columns, &input_vector);
490
492
  //println!("dm:{:?}", dm);
491
493
  (dm, case_indexes, control_indexes, gene_names, gene_symbols)
@@ -513,14 +515,15 @@ struct PValueIndexes {
513
515
  // Used to get the sample names from HDF5 file at PP server startup
514
516
  fn get_DE_samples(hdf5_filename: &String) {
515
517
  let file = HDF5File::open(&hdf5_filename).unwrap(); // open for reading
516
- let now_samples = Instant::now();
518
+
519
+ //let now_samples = Instant::now();
517
520
  let ds_samples = file.dataset("samples").unwrap();
518
521
  let samples = ds_samples.read::<VarLenAscii, Dim<[usize; 1]>>().unwrap();
519
- println!("\tsamples = {:?}", samples);
520
- println!("\tsamples.shape() = {:?}", samples.shape());
521
- println!("\tsamples.strides() = {:?}", samples.strides());
522
- println!("\tsamples.ndim() = {:?}", samples.ndim());
523
- println!("Time for parsing samples:{:?}", now_samples.elapsed());
522
+ //println!("\tsamples = {:?}", samples);
523
+ //println!("\tsamples.shape() = {:?}", samples.shape());
524
+ //println!("\tsamples.strides() = {:?}", samples.strides());
525
+ //println!("\tsamples.ndim() = {:?}", samples.ndim());
526
+ //println!("Time for parsing samples:{:?}", now_samples.elapsed());
524
527
 
525
528
  let mut output_string = "".to_string();
526
529
  for i in 0..samples.len() {
@@ -544,7 +547,7 @@ fn get_DE_samples(hdf5_filename: &String) {
544
547
  output_string += &",";
545
548
  }
546
549
  }
547
- println!("output_string:{}", output_string);
550
+ println!("{}", output_string);
548
551
  }
549
552
 
550
553
  fn main() {
@@ -559,7 +562,7 @@ fn main() {
559
562
  let input_json = json::parse(&input);
560
563
  match input_json {
561
564
  Ok(json_string) => {
562
- let now = Instant::now();
565
+ //let now = Instant::now();
563
566
  let file_name = &json_string["input_file"]
564
567
  .to_owned()
565
568
  .as_str()
@@ -567,7 +570,7 @@ fn main() {
567
570
  .to_string()
568
571
  .split(",")
569
572
  .collect();
570
- println!("file_name:{}", file_name);
573
+ //println!("file_name:{}", file_name);
571
574
  let data_type_option = json_string["data_type"].as_str().to_owned();
572
575
  match data_type_option {
573
576
  Some(x) => {
@@ -643,7 +646,7 @@ fn main() {
643
646
  gene_symbols,
644
647
  ) = input_data_from_HDF5(file_name, &case_list, &control_list);
645
648
  }
646
- let filtering_time = Instant::now();
649
+ //let filtering_time = Instant::now();
647
650
  let (
648
651
  filtered_matrix,
649
652
  lib_sizes,
@@ -658,21 +661,21 @@ fn main() {
658
661
  gene_names,
659
662
  gene_symbols,
660
663
  );
661
- println!("filtering time:{:?}", filtering_time.elapsed());
664
+ //println!("filtering time:{:?}", filtering_time.elapsed());
662
665
  //println!("filtered_matrix_rows:{:?}", filtered_matrix.nrows());
663
666
  //println!("filtered_matrix_cols:{:?}", filtered_matrix.ncols());
664
- let cpm_normalization_time = Instant::now();
667
+ //let cpm_normalization_time = Instant::now();
665
668
  let mut normalized_matrix = cpm(&filtered_matrix);
666
- println!(
667
- "cpm normalization time:{:?}",
668
- cpm_normalization_time.elapsed()
669
- );
670
- let tmm_normalization_time = Instant::now();
669
+ //println!(
670
+ // "cpm normalization time:{:?}",
671
+ // cpm_normalization_time.elapsed()
672
+ //);
673
+ //let tmm_normalization_time = Instant::now();
671
674
  let norm_factors = tmm_normalization(filtered_matrix, &lib_sizes);
672
- println!(
673
- "tmm normalization time:{:?}",
674
- tmm_normalization_time.elapsed()
675
- );
675
+ //println!(
676
+ // "tmm normalization time:{:?}",
677
+ // tmm_normalization_time.elapsed()
678
+ //);
676
679
  //println!("norm_factors:{:?}", norm_factors);
677
680
 
678
681
  for col in 0..normalized_matrix.ncols() {
@@ -683,19 +686,19 @@ fn main() {
683
686
  }
684
687
  }
685
688
  //println!("normalized_matrix:{:?}", normalized_matrix);
686
- println!("Number of cases:{}", case_list.len());
687
- println!("Number of controls:{}", control_list.len());
688
- println!("Time for pre-processing:{:?}", now.elapsed());
689
+ //println!("Number of cases:{}", case_list.len());
690
+ //println!("Number of controls:{}", control_list.len());
691
+ //println!("Time for pre-processing:{:?}", now.elapsed());
689
692
  // Using Wilcoxon test for differential gene expression
690
693
 
691
- let now2 = Instant::now();
694
+ //let now2 = Instant::now();
692
695
  let mut p_values: Vec<PValueIndexes> =
693
696
  Vec::with_capacity(normalized_matrix.nrows());
694
697
  const THRESHOLD: usize = 50; // This determines whether the Wilcoxon exact test or the normal test will be used based on sample size.
695
698
 
696
699
  //println!("case_indexes:{:?}", case_indexes);
697
700
  //println!("control_indexes:{:?}", control_indexes);
698
- let num_normalized_rows = normalized_matrix.nrows();
701
+ //let num_normalized_rows = normalized_matrix.nrows();
699
702
  if normalized_matrix.nrows() * normalized_matrix.ncols()
700
703
  < PAR_CUTOFF
701
704
  {
@@ -857,13 +860,13 @@ fn main() {
857
860
  p_values.append(&mut *p_values_temp.lock().unwrap());
858
861
  }
859
862
  //println!("p_values:{:?}", p_values);
860
- println!(
861
- "Time for running {} wilcoxon tests:{:?}",
862
- num_normalized_rows,
863
- now2.elapsed()
864
- );
863
+ //println!(
864
+ // "Time for running {} wilcoxon tests:{:?}",
865
+ // num_normalized_rows,
866
+ // now2.elapsed()
867
+ //);
865
868
  let adjusted_p_values = adjust_p_values(p_values);
866
- println!("adjusted_p_values:{}", adjusted_p_values);
869
+ println!("{}", adjusted_p_values);
867
870
  //let fold_changes =
868
871
  // calculate_fold_change(normalized_matrix, case_indexes, control_indexes);
869
872
  }
@@ -873,10 +876,10 @@ fn main() {
873
876
  }
874
877
  }
875
878
  }
876
- Err(error) => println!("Incorrect json: {}", error),
879
+ Err(error) => panic!("Incorrect json: {}", error),
877
880
  }
878
881
  }
879
- Err(error) => println!("Piping error: {}", error),
882
+ Err(error) => panic!("Piping error: {}", error),
880
883
  }
881
884
  }
882
885
 
@@ -1321,7 +1324,7 @@ fn filter_by_expr(
1321
1324
  positives.push(row);
1322
1325
  }
1323
1326
  }
1324
- println!("positives length:{}", positives.len());
1327
+ //println!("positives length:{}", positives.len());
1325
1328
  //println!("row_sums:{:?}", row_sums);
1326
1329
  //println!("keep_cpm:{:?}", keep_cpm);
1327
1330
  //println!("positive_cpm:{}", positive_cpm);
@@ -1337,8 +1340,8 @@ fn filter_by_expr(
1337
1340
  let mut filtered_genes: Vec<String> = Vec::with_capacity(positives.len());
1338
1341
  let mut filtered_gene_symbols: Vec<String> = Vec::with_capacity(positives.len());
1339
1342
  let mut i = 0;
1340
- println!("filtered_matrix rows:{}", filtered_matrix.nrows());
1341
- println!("filtered_matrix cols:{}", filtered_matrix.ncols());
1343
+ //println!("filtered_matrix rows:{}", filtered_matrix.nrows());
1344
+ //println!("filtered_matrix cols:{}", filtered_matrix.ncols());
1342
1345
  for index in positives {
1343
1346
  let row = raw_data.row(index);
1344
1347
  filtered_genes.push(gene_names[index].to_owned());
package/src/gdcmaf.rs CHANGED
@@ -19,21 +19,32 @@ use futures::StreamExt;
19
19
  use std::io::{self,Read,Write};
20
20
 
21
21
 
22
+ // Struct to hold error information
23
+ #[derive(serde::Serialize)]
24
+ struct ErrorEntry {
25
+ url: String,
26
+ error: String,
27
+ }
22
28
 
23
- fn select_maf_col(d:String,columns:&Vec<String>) -> Vec<u8> {
29
+ fn select_maf_col(d:String,columns:&Vec<String>,url:&str) -> Result<(Vec<u8>,i32), (String, String)> {
24
30
  let mut maf_str: String = String::new();
25
31
  let mut header_indices: Vec<usize> = Vec::new();
26
32
  let lines = d.trim_end().split("\n");
33
+ let mut mafrows = 0;
27
34
  for line in lines {
28
35
  if line.starts_with("#") {
29
36
  continue
30
37
  } else if line.contains("Hugo_Symbol") {
31
38
  let header: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
32
39
  for col in columns {
33
- if let Some(index) = header.iter().position(|x| x == col) {
34
- header_indices.push(index);
35
- } else {
36
- panic!("{} was not found!",col);
40
+ match header.iter().position(|x| x == col) {
41
+ Some(index) => {
42
+ header_indices.push(index);
43
+ }
44
+ None => {
45
+ let error_msg = format!("Column {} was not found", col);
46
+ return Err((url.to_string(), error_msg));
47
+ }
37
48
  }
38
49
  }
39
50
  } else {
@@ -44,12 +55,14 @@ fn select_maf_col(d:String,columns:&Vec<String>) -> Vec<u8> {
44
55
  };
45
56
  maf_str.push_str(maf_out_lst.join("\t").as_str());
46
57
  maf_str.push_str("\n");
58
+ mafrows += 1;
47
59
  }
48
60
  };
49
- maf_str.as_bytes().to_vec()
61
+ Ok((maf_str.as_bytes().to_vec(),mafrows))
50
62
  }
51
63
 
52
64
 
65
+
53
66
  #[tokio::main]
54
67
  async fn main() -> Result<(),Box<dyn std::error::Error>> {
55
68
  // Accepting the piped input json from jodejs and assign to the variable
@@ -57,6 +70,8 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
57
70
  // url: urls to download single maf files
58
71
  let mut buffer = String::new();
59
72
  io::stdin().read_line(&mut buffer)?;
73
+
74
+ // reading the input from PP
60
75
  let file_id_lst_js = serde_json::from_str::<Value>(&buffer).expect("Error reading input and serializing to JSON");
61
76
  let host = file_id_lst_js.get("host").expect("Host was not provided").as_str().expect("Host is not a string");
62
77
  let mut url: Vec<String> = Vec::new();
@@ -75,49 +90,118 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
75
90
  .map(|v| v.to_string().replace("\"",""))
76
91
  .collect::<Vec<String>>();
77
92
  } else {
78
- panic!("Columns is not an array");
93
+ let column_error = ErrorEntry {
94
+ url: String::new(),
95
+ error: "The columns in arg is not an array".to_string(),
96
+ };
97
+ let column_error_js = serde_json::to_string(&column_error).unwrap();
98
+ writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
99
+ return Err(Box::new(std::io::Error::new(
100
+ std::io::ErrorKind::InvalidInput,
101
+ "The columns in arg is not an array",
102
+ )) as Box<dyn std::error::Error>);
79
103
  }
80
104
  } else {
81
- panic!("Columns was not selected");
105
+ let column_error = ErrorEntry {
106
+ url: String::new(),
107
+ error: "Columns was not selected".to_string(),
108
+ };
109
+ let column_error_js = serde_json::to_string(&column_error).unwrap();
110
+ writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
111
+ return Err(Box::new(std::io::Error::new(
112
+ std::io::ErrorKind::InvalidInput,
113
+ "Columns was not selected",
114
+ )) as Box<dyn std::error::Error>);
82
115
  };
83
116
 
84
117
  //downloading maf files parallelly and merge them into single maf file
85
118
  let download_futures = futures::stream::iter(
86
119
  url.into_iter().map(|url|{
87
120
  async move {
88
- let result = reqwest::get(&url).await;
89
- if let Ok(resp) = result {
90
- let content = resp.bytes().await.unwrap();
91
- let mut decoder = GzDecoder::new(&content[..]);
92
- let mut decompressed_content = Vec::new();
93
- let read_content = decoder.read_to_end(&mut decompressed_content);
94
- if let Ok(_) = read_content {
95
- let text = String::from_utf8_lossy(&decompressed_content).to_string();
96
- text
97
- } else {
98
- let error_msg = "Failed to read content downloaded from: ".to_string() + &url;
99
- error_msg
121
+ match reqwest::get(&url).await {
122
+ Ok(resp) if resp.status().is_success() => {
123
+ match resp.bytes().await {
124
+ Ok(content) => {
125
+ let mut decoder = GzDecoder::new(&content[..]);
126
+ let mut decompressed_content = Vec::new();
127
+ match decoder.read_to_end(&mut decompressed_content) {
128
+ Ok(_) => {
129
+ let text = String::from_utf8_lossy(&decompressed_content).to_string();
130
+ return Ok((url.clone(),text))
131
+ }
132
+ Err(e) => {
133
+ let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
134
+ Err((url.clone(), error_msg))
135
+ }
136
+ }
137
+ }
138
+ Err(e) => {
139
+ let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
140
+ Err((url.clone(), error_msg))
141
+ }
142
+ }
143
+ }
144
+ Ok(resp) => {
145
+ let error_msg = format!("HTTP error: {}", resp.status());
146
+ Err((url.clone(), error_msg))
147
+ }
148
+ Err(e) => {
149
+ let error_msg = format!("Server request failed: {}", e);
150
+ Err((url.clone(), error_msg))
100
151
  }
101
- } else {
102
- let error_msg = "Failed to download: ".to_string() + &url;
103
- error_msg
104
152
  }
105
153
  }
106
154
  })
107
155
  );
108
156
 
109
- // output
157
+ // binary output
110
158
  let mut encoder = GzEncoder::new(io::stdout(), Compression::default());
111
159
  let _ = encoder.write_all(&maf_col.join("\t").as_bytes().to_vec()).expect("Failed to write header");
112
160
  let _ = encoder.write_all(b"\n").expect("Failed to write newline");
113
- download_futures.buffer_unordered(20).for_each(|item| {
114
- if item.starts_with("Failed") {
115
- eprintln!("{}",item);
116
- } else {
117
- let maf_bit = select_maf_col(item,&maf_col);
118
- let _ = encoder.write_all(&maf_bit).expect("Failed to write file");
161
+
162
+ download_futures.buffer_unordered(20).for_each(|result| {
163
+ match result {
164
+ Ok((url, content)) => {
165
+ match select_maf_col(content, &maf_col, &url) {
166
+ Ok((maf_bit,mafrows)) => {
167
+ if mafrows > 0 {
168
+ encoder.write_all(&maf_bit).expect("Failed to write file");
169
+ } else {
170
+ let error = ErrorEntry {
171
+ url: url.clone(),
172
+ error: "Empty maf file".to_string(),
173
+ };
174
+ let error_js = serde_json::to_string(&error).unwrap();
175
+ writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
176
+ }
177
+ }
178
+ Err((url,error)) => {
179
+ let error = ErrorEntry {
180
+ url,
181
+ error,
182
+ };
183
+ let error_js = serde_json::to_string(&error).unwrap();
184
+ writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
185
+ }
186
+ }
187
+ }
188
+ Err((url, error)) => {
189
+ let error = ErrorEntry {
190
+ url,
191
+ error,
192
+ };
193
+ let error_js = serde_json::to_string(&error).unwrap();
194
+ writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
195
+ }
119
196
  };
120
197
  async {}
121
198
  }).await;
199
+
200
+ // Finalize output and printing errors
201
+ encoder.finish().expect("Maf file output error!");
202
+ // Manually flush stdout and stderr
203
+ io::stdout().flush().expect("Failed to flush stdout");
204
+ io::stderr().flush().expect("Failed to flush stderr");
205
+
122
206
  Ok(())
123
207
  }