@sjcrh/proteinpaint-rust 2.30.2 → 2.33.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.js +31 -2
  2. package/package.json +1 -1
  3. package/src/gdcmaf.rs +56 -75
package/index.js CHANGED
@@ -1,8 +1,9 @@
1
1
  const path = require('path'),
2
2
  spawn = require('child_process').spawn,
3
- Readable = require('stream').Readable
3
+ Readable = require('stream').Readable,
4
+ Transform = require('stream').Transform
4
5
 
5
- exports.run_rust = function(binfile, input_data) {
6
+ exports.run_rust = function (binfile, input_data) {
6
7
  return new Promise((resolve, reject) => {
7
8
  const binpath = path.join(__dirname, '/target/release/', binfile)
8
9
  const ps = spawn(binpath)
@@ -41,3 +42,31 @@ exports.run_rust = function(binfile, input_data) {
41
42
  })
42
43
  })
43
44
  }
45
+
46
+ exports.run_rust_stream = function (binfile, input_data) {
47
+ const binpath = path.join(__dirname, '/target/release/', binfile)
48
+ const ps = spawn(binpath)
49
+ try {
50
+ Readable.from(input_data).pipe(ps.stdin)
51
+ } catch (error) {
52
+ ps.kill()
53
+ let errmsg = error
54
+ if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
55
+ reject(errmsg)
56
+ }
57
+
58
+ const childStream = new Transform({
59
+ transform(chunk, encoding, callback) {
60
+ this.push(chunk)
61
+ callback()
62
+ }
63
+ })
64
+ ps.stdout.pipe(childStream)
65
+ childStream.on('error', err => {
66
+ reject(err)
67
+ })
68
+ childStream.on('close', code => {
69
+ childStream.end()
70
+ })
71
+ return childStream
72
+ }
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.30.2",
2
+ "version": "2.33.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
package/src/gdcmaf.rs CHANGED
@@ -1,35 +1,54 @@
1
1
  use flate2::read::GzDecoder;
2
2
  use flate2::write::GzEncoder;
3
+ use flate2::Compression;
3
4
  use serde_json::Value;
4
- use std::fs::File;
5
5
  use std::path::Path;
6
6
  use futures::StreamExt;
7
- use std::io;
8
- use std::io::{Read,Write};
7
+ use std::io::{self,Read,Write};
9
8
  use std::sync::mpsc;
10
- use std::collections::HashMap;
11
9
 
12
10
 
13
- fn gen_map(d:String) -> HashMap<String,Vec<String>> {
14
- let mut map: HashMap<String, Vec<String>> = HashMap::new();
15
- let mut header: Vec<String> = Vec::new();
11
+
12
+ fn gen_vec(d:String) -> (Vec<String>,Vec<Vec<u8>>) {
13
+ let mut maf_bit: Vec<Vec<u8>> = Vec::new();
14
+ let mut lst_chrom_pos: Vec<String> = Vec::new();
15
+ let mut header_indices: Vec<usize> = Vec::new();
16
+ let mut chrom_index: usize = 9999;
17
+ let mut pos_index: usize = 9999;
16
18
  let lines = d.trim_end().split("\n");
17
19
  for line in lines {
18
20
  if line.starts_with("#") {
19
21
  continue
20
22
  } else if line.contains("Hugo_Symbol") {
21
- header = line.split("\t").map(|s| s.to_string()).collect();
22
- for k in &header {
23
- map.insert(k.to_string(),Vec::new());
23
+ let header: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
24
+ for col in MAF_COL {
25
+ let col_index: usize = header.iter().position(|x| x == col).unwrap();
26
+ header_indices.push(col_index);
27
+ if col == "Chromosome" {
28
+ chrom_index = col_index;
29
+ } else if col == "Start_Position" {
30
+ pos_index = col_index;
31
+ }
24
32
  }
25
33
  } else {
26
34
  let maf_cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
27
- for (i,x) in maf_cont_lst.iter().enumerate() {
28
- map.get_mut(&header[i]).map(|val| val.push(x.to_string()));
29
- }
35
+ let mut maf_out_lst: Vec<String> = Vec::new();
36
+ let mut chrom = String::new();
37
+ let mut pos = String::new();
38
+ for (i,x) in header_indices.iter().enumerate() {
39
+ maf_out_lst.push(maf_cont_lst[*x].to_string());
40
+ if chrom_index != 9999 && i == chrom_index {
41
+ chrom = maf_cont_lst[*x].to_string();
42
+ } else if pos_index != 9999 && i == pos_index {
43
+ pos = maf_cont_lst[*x].to_string();
44
+ }
45
+ };
46
+ maf_out_lst.push("\n".to_string());
47
+ maf_bit.push(maf_out_lst.join("\t").as_bytes().to_vec());
48
+ lst_chrom_pos.push(chrom+"\t"+&pos);
30
49
  }
31
- }
32
- map
50
+ };
51
+ (lst_chrom_pos,maf_bit)
33
52
  }
34
53
 
35
54
  fn get_sorted_indices(lst: &Vec<String>) -> Vec<usize>{
@@ -66,13 +85,12 @@ const MAF_COL: [&str;96] = ["Hugo_Symbol", "Entrez_Gene_Id", "Center", "NCBI_Bui
66
85
  async fn main() -> Result<(),Box<dyn std::error::Error>> {
67
86
  // Accepting the piped input json from jodejs and assign to the variable
68
87
  // host: GDC host
69
- // out_file: save maf to out_file under cachedir
88
+ // save output into json string
70
89
  // url: urls to download single maf files
71
90
  let mut buffer = String::new();
72
91
  io::stdin().read_line(&mut buffer)?;
73
92
  let file_id_lst_js = serde_json::from_str::<Value>(&buffer).expect("Error reading input and serializing to JSON");
74
93
  let host = &file_id_lst_js["host"].as_str().unwrap();
75
- let out_file = &file_id_lst_js["outFile"].as_str().unwrap();
76
94
  let mut url: Vec<String> = Vec::new();
77
95
  for v in file_id_lst_js["fileIdLst"].as_array().unwrap() {
78
96
  url.push(Path::new(&host).join(&v.as_str().unwrap()).display().to_string());
@@ -84,16 +102,15 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
84
102
  url.into_iter().map(|url|{
85
103
  let txt = tx.clone();
86
104
  async move {
87
- match reqwest::get(&url).await{
88
- Ok(resp) => {
89
- let content = resp.bytes().await.unwrap();
90
- let mut decoder = GzDecoder::new(&content[..]);
91
- let mut decompressed_content = Vec::new();
92
- decoder.read_to_end(&mut decompressed_content).unwrap();
105
+ if let Ok(resp) = reqwest::get(&url).await {
106
+ let content = resp.bytes().await.unwrap();
107
+ let mut decoder = GzDecoder::new(&content[..]);
108
+ let mut decompressed_content = Vec::new();
109
+ if let Ok(_) = decoder.read_to_end(&mut decompressed_content) {
93
110
  let text = String::from_utf8_lossy(&decompressed_content);
94
- txt.send(text.to_string()).unwrap();
111
+ let (lst_chrom_pos,maf_bit) = gen_vec(text.to_string());
112
+ txt.send((lst_chrom_pos,maf_bit)).unwrap();
95
113
  }
96
- Err(_) => println!("ERROR downloading {}", url),
97
114
  }
98
115
  }
99
116
  })
@@ -101,61 +118,25 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
101
118
  fetches.await;
102
119
  drop(tx);
103
120
 
104
- // write downloaded maf into variable received_values
105
- let mut received_values: Vec<String> = Vec::new();
106
- for value in rx {
107
- received_values.push(value);
108
- }
109
-
110
- // store downloaed mafs into one HashMap data sturcture based on the common column names
111
- let mut maf = HashMap::new();
112
- for maf_data in received_values {
113
- if maf.is_empty() {
114
- maf = gen_map(maf_data);
115
- // remove columns if column name is found from MAF_COL
116
- let mut keys_to_remove_in_maf: Vec<String> = Vec::new();
117
- for key in maf.keys() {
118
- if !(MAF_COL.contains(&key.as_str())) {
119
- keys_to_remove_in_maf.push(key.to_string());
120
- }
121
- };
122
- for key in keys_to_remove_in_maf {
123
- maf.remove(&key);
124
- }
125
- } else {
126
- let maf1 = gen_map(maf_data);
127
- let keys_in_maf1: Vec<String> = maf1.keys().cloned().collect();
128
- for key in keys_in_maf1 {
129
- if maf.contains_key(&key) {
130
- let key_value = maf1[&key].clone();
131
- maf.get_mut(&key).map(|val| val.extend(key_value));
132
- }
133
- }
134
- }
135
- };
136
-
137
-
138
- // generate a Vec with "chrom\tpos" for sorting
139
- // generated indices after sorting
121
+ // write downloaded maf (GZIP format) into a Vector
122
+ // lst_chrom_pos: a vector including chromsome&position info for sorting maf
123
+ // idx_sorted: indices after sorting basedon chromsome&position
124
+ let mut maf_bit: Vec<Vec<u8>> = Vec::new();
140
125
  let mut lst_chrom_pos: Vec<String> = Vec::new();
141
- for (i,v) in maf["Chromosome"].iter().enumerate() {
142
- lst_chrom_pos.push(v.to_owned()+"\t"+&maf["Start_Position"][i]);
126
+ for (chr_pos_lst,maf_bit_lst) in rx {
127
+ maf_bit.extend_from_slice(&maf_bit_lst);
128
+ lst_chrom_pos.extend_from_slice(&chr_pos_lst);
143
129
  };
144
130
  let idx_sorted = get_sorted_indices(&lst_chrom_pos);
145
131
 
146
- // write to file
147
- let file = File::create(out_file).expect("could not create file");
148
- let mut encoder = GzEncoder::new(file, Default::default());
149
- encoder.write_all(MAF_COL.join("\t").as_bytes())?;
150
- encoder.write_all("\n".as_bytes())?;
132
+ // output
133
+ // maf_out_bit: A vector of GZIPPED maf
134
+ // compress_header: output header
135
+ let mut encoder = GzEncoder::new(io::stdout(), Compression::default());
136
+ let _ = encoder.write_all(&MAF_COL.join("\t").as_bytes().to_vec()).expect("Failed to write header");
137
+ let _ = encoder.write_all(b"\n").expect("Failed to write newline");
151
138
  for i in idx_sorted.iter() {
152
- let mut val_lst: Vec<String> = Vec::new();
153
- for k in MAF_COL {
154
- val_lst.push(maf[k][*i].to_owned());
155
- };
156
- let val_out = val_lst.join("\t")+"\n";
157
- encoder.write_all(val_out.as_bytes())?;
139
+ let _ = encoder.write_all(&maf_bit[*i]).expect("Failed to write file");
158
140
  };
159
- encoder.finish()?;
160
141
  Ok(())
161
142
  }