@sjcrh/proteinpaint-rust 2.38.0 → 2.38.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/gdcmaf.rs +29 -58
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.38.0",
2
+ "version": "2.38.1",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
package/src/gdcmaf.rs CHANGED
@@ -1,5 +1,5 @@
1
1
  /*
2
- This script download cohort maf files from GDC, combine them into a single file, and output the sorted file based on chromsome and Start_Position.
2
+ This script download cohort maf files from GDC, concatenate them into a single file that includes user specified columns.
3
3
 
4
4
  Input JSON:
5
5
  host: GDC host
@@ -17,16 +17,12 @@ use serde_json::Value;
17
17
  use std::path::Path;
18
18
  use futures::StreamExt;
19
19
  use std::io::{self,Read,Write};
20
- use std::sync::mpsc;
21
20
 
22
21
 
23
22
 
24
- fn gen_vec(d:String) -> (Vec<String>,Vec<Vec<u8>>) {
25
- let mut maf_bit: Vec<Vec<u8>> = Vec::new();
26
- let mut lst_chrom_pos: Vec<String> = Vec::new();
23
+ fn select_maf_col(d:String) -> Vec<u8> {
24
+ let mut maf_str: String = String::new();
27
25
  let mut header_indices: Vec<usize> = Vec::new();
28
- let mut chrom_index: usize = 9999;
29
- let mut pos_index: usize = 9999;
30
26
  let lines = d.trim_end().split("\n");
31
27
  for line in lines {
32
28
  if line.starts_with("#") {
@@ -36,41 +32,20 @@ fn gen_vec(d:String) -> (Vec<String>,Vec<Vec<u8>>) {
36
32
  for col in MAF_COL {
37
33
  let col_index: usize = header.iter().position(|x| x == col).unwrap();
38
34
  header_indices.push(col_index);
39
- if col == "Chromosome" {
40
- chrom_index = col_index;
41
- } else if col == "Start_Position" {
42
- pos_index = col_index;
43
- }
44
35
  }
45
36
  } else {
46
37
  let maf_cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
47
38
  let mut maf_out_lst: Vec<String> = Vec::new();
48
- let mut chrom = String::new();
49
- let mut pos = String::new();
50
- for (i,x) in header_indices.iter().enumerate() {
39
+ for x in header_indices.iter() {
51
40
  maf_out_lst.push(maf_cont_lst[*x].to_string());
52
- if chrom_index != 9999 && i == chrom_index {
53
- chrom = maf_cont_lst[*x].to_string();
54
- } else if pos_index != 9999 && i == pos_index {
55
- pos = maf_cont_lst[*x].to_string();
56
- }
57
41
  };
58
42
  maf_out_lst.push("\n".to_string());
59
- maf_bit.push(maf_out_lst.join("\t").as_bytes().to_vec());
60
- lst_chrom_pos.push(chrom+"\t"+&pos);
43
+ maf_str.push_str(maf_out_lst.join("\t").as_str());
61
44
  }
62
45
  };
63
- (lst_chrom_pos,maf_bit)
46
+ maf_str.as_bytes().to_vec()
64
47
  }
65
48
 
66
- fn get_sorted_indices(lst: &Vec<String>) -> Vec<usize>{
67
- let mut indices = (0..lst.len()).collect::<Vec<usize>>();
68
- indices.sort_by(|a,b| {
69
- lst[*a].split('\t').collect::<Vec<&str>>()[0].cmp(lst[*b].split('\t').collect::<Vec<&str>>()[0])
70
- .then(lst[*a].split('\t').collect::<Vec<&str>>()[1].parse::<u32>().unwrap().cmp(&lst[*b].split('\t').collect::<Vec<&str>>()[1].parse::<u32>().unwrap()))
71
- });
72
- indices
73
- }
74
49
 
75
50
  // GDC MAF columns (96)
76
51
  const MAF_COL: [&str;96] = ["Hugo_Symbol", "Entrez_Gene_Id", "Center", "NCBI_Build", "Chromosome",
@@ -109,46 +84,42 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
109
84
  };
110
85
 
111
86
  //downloading maf files parallelly and merge them into single maf file
112
- let (tx, rx) = mpsc::channel();
113
- let fetches = futures::stream::iter(
87
+ let download_futures = futures::stream::iter(
114
88
  url.into_iter().map(|url|{
115
- let txt = tx.clone();
116
89
  async move {
117
- if let Ok(resp) = reqwest::get(&url).await {
90
+ let result = reqwest::get(&url).await;
91
+ if let Ok(resp) = result {
118
92
  let content = resp.bytes().await.unwrap();
119
93
  let mut decoder = GzDecoder::new(&content[..]);
120
94
  let mut decompressed_content = Vec::new();
121
- if let Ok(_) = decoder.read_to_end(&mut decompressed_content) {
122
- let text = String::from_utf8_lossy(&decompressed_content);
123
- let (lst_chrom_pos,maf_bit) = gen_vec(text.to_string());
124
- txt.send((lst_chrom_pos,maf_bit)).unwrap();
95
+ let read_content = decoder.read_to_end(&mut decompressed_content);
96
+ if let Ok(_) = read_content {
97
+ let text = String::from_utf8_lossy(&decompressed_content).to_string();
98
+ text
99
+ } else {
100
+ let error_msg = "Failed to read content downloaded from: ".to_string() + &url;
101
+ error_msg
125
102
  }
103
+ } else {
104
+ let error_msg = "Failed to download: ".to_string() + &url;
105
+ error_msg
126
106
  }
127
107
  }
128
108
  })
129
- ).buffer_unordered(20).collect::<Vec<()>>();
130
- fetches.await;
131
- drop(tx);
132
-
133
- // write downloaded maf (GZIP format) into a Vector
134
- // lst_chrom_pos: a vector including chromsome&position info for sorting maf
135
- // idx_sorted: indices after sorting basedon chromsome&position
136
- let mut maf_bit: Vec<Vec<u8>> = Vec::new();
137
- let mut lst_chrom_pos: Vec<String> = Vec::new();
138
- for (chr_pos_lst,maf_bit_lst) in rx {
139
- maf_bit.extend_from_slice(&maf_bit_lst);
140
- lst_chrom_pos.extend_from_slice(&chr_pos_lst);
141
- };
142
- let idx_sorted = get_sorted_indices(&lst_chrom_pos);
109
+ );
143
110
 
144
111
  // output
145
- // maf_out_bit: A vector of GZIPPED maf
146
- // compress_header: output header
147
112
  let mut encoder = GzEncoder::new(io::stdout(), Compression::default());
148
113
  let _ = encoder.write_all(&MAF_COL.join("\t").as_bytes().to_vec()).expect("Failed to write header");
149
114
  let _ = encoder.write_all(b"\n").expect("Failed to write newline");
150
- for i in idx_sorted.iter() {
151
- let _ = encoder.write_all(&maf_bit[*i]).expect("Failed to write file");
152
- };
115
+ download_futures.buffer_unordered(20).for_each(|item| {
116
+ if item.starts_with("Failed") {
117
+ eprintln!("{}",item);
118
+ } else {
119
+ let maf_bit = select_maf_col(item);
120
+ let _ = encoder.write_all(&maf_bit).expect("Failed to write file");
121
+ };
122
+ async {}
123
+ }).await;
153
124
  Ok(())
154
125
  }