@sjcrh/proteinpaint-rust 2.110.0 → 2.111.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.js +7 -6
  2. package/package.json +2 -2
  3. package/src/gdcmaf.rs +66 -58
package/index.js CHANGED
@@ -72,17 +72,18 @@ exports.stream_rust = function (binfile, input_data, emitJson) {
72
72
  ps.on('close', code => { //console.log(72, stderr.length)
73
73
  if (stderr.length) {
74
74
  // handle rust stderr
75
- const err = stderr.join('').trim()
76
- const errmsg = `!!! stream_rust('${binfile}') stderr: !!!\n${err}`
77
- console.log(errmsg)
78
- emitJson(err)
75
+ const errors = stderr.join('').trim().split('\n').map(JSON.parse)
76
+ //const errmsg = `!!! stream_rust('${binfile}') stderr: !!!`
77
+ //console.log(errmsg, errors)
78
+ emitJson({errors})
79
79
  } else {
80
80
  emitJson({ ok: true, status: 'ok', message: 'Processing complete' })
81
81
  }
82
82
  })
83
83
  ps.on('error', err => {
84
- console.log(74, `stream_rust().on('error')`, err)
85
- emitJson(stderr.join('').trim())
84
+ //console.log(74, `stream_rust().on('error')`, err)
85
+ const errors = stderr.join('').trim().split('\n').map(JSON.parse)
86
+ emitJson({errors})
86
87
  })
87
88
  // below will duplicate ps.on('close') event above
88
89
  // childStream.on('end', () => console.log(`-- childStream done --`))
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.110.0",
2
+ "version": "2.111.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
@@ -38,5 +38,5 @@
38
38
  "devDependencies": {
39
39
  "tape": "^5.2.2"
40
40
  },
41
- "pp_release_tag": "v2.110.0"
41
+ "pp_release_tag": "v2.111.0"
42
42
  }
package/src/gdcmaf.rs CHANGED
@@ -13,12 +13,10 @@
13
13
  use flate2::read::GzDecoder;
14
14
  use flate2::write::GzEncoder;
15
15
  use flate2::Compression;
16
- use serde_json::{Value,json};
16
+ use serde_json::{Value};
17
17
  use std::path::Path;
18
18
  use futures::StreamExt;
19
19
  use std::io::{self,Read,Write};
20
- use std::sync::Mutex;
21
-
22
20
 
23
21
  // Struct to hold error information
24
22
  #[derive(serde::Serialize)]
@@ -27,7 +25,7 @@ struct ErrorEntry {
27
25
  error: String,
28
26
  }
29
27
 
30
- fn select_maf_col(d:String,columns:&Vec<String>,url:&str,errors: &Mutex<Vec<ErrorEntry>>) -> (Vec<u8>,i32) {
28
+ fn select_maf_col(d:String,columns:&Vec<String>,url:&str) -> Result<(Vec<u8>,i32), (String, String)> {
31
29
  let mut maf_str: String = String::new();
32
30
  let mut header_indices: Vec<usize> = Vec::new();
33
31
  let lines = d.trim_end().split("\n");
@@ -38,15 +36,14 @@ fn select_maf_col(d:String,columns:&Vec<String>,url:&str,errors: &Mutex<Vec<Erro
38
36
  } else if line.contains("Hugo_Symbol") {
39
37
  let header: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
40
38
  for col in columns {
41
- if let Some(index) = header.iter().position(|x| x == col) {
42
- header_indices.push(index);
43
- } else {
44
- let error_msg = format!("Column {} was not found", col);
45
- errors.lock().unwrap().push(ErrorEntry {
46
- url: url.to_string().clone(),
47
- error: error_msg.clone(),
48
- });
49
- panic!("{} was not found!",col);
39
+ match header.iter().position(|x| x == col) {
40
+ Some(index) => {
41
+ header_indices.push(index);
42
+ }
43
+ None => {
44
+ let error_msg = format!("Column {} was not found", col);
45
+ return Err((url.to_string(), error_msg));
46
+ }
50
47
  }
51
48
  }
52
49
  } else {
@@ -60,19 +57,20 @@ fn select_maf_col(d:String,columns:&Vec<String>,url:&str,errors: &Mutex<Vec<Erro
60
57
  mafrows += 1;
61
58
  }
62
59
  };
63
- (maf_str.as_bytes().to_vec(),mafrows)
60
+ Ok((maf_str.as_bytes().to_vec(),mafrows))
64
61
  }
65
62
 
66
63
 
64
+
67
65
  #[tokio::main]
68
66
  async fn main() -> Result<(),Box<dyn std::error::Error>> {
69
- // Create a thread-container for errors
70
- let errors = Mutex::new(Vec::<ErrorEntry>::new());
71
67
  // Accepting the piped input json from jodejs and assign to the variable
72
68
  // host: GDC host
73
69
  // url: urls to download single maf files
74
70
  let mut buffer = String::new();
75
71
  io::stdin().read_line(&mut buffer)?;
72
+
73
+ // reading the input from PP
76
74
  let file_id_lst_js = serde_json::from_str::<Value>(&buffer).expect("Error reading input and serializing to JSON");
77
75
  let host = file_id_lst_js.get("host").expect("Host was not provided").as_str().expect("Host is not a string");
78
76
  let mut url: Vec<String> = Vec::new();
@@ -91,18 +89,28 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
91
89
  .map(|v| v.to_string().replace("\"",""))
92
90
  .collect::<Vec<String>>();
93
91
  } else {
94
- errors.lock().unwrap().push(ErrorEntry {
92
+ let column_error = ErrorEntry {
95
93
  url: String::new(),
96
- error: "The columns of arg is not an array".to_string(),
97
- });
98
- panic!("Columns is not an array");
94
+ error: "The columns in arg is not an array".to_string(),
95
+ };
96
+ let column_error_js = serde_json::to_string(&column_error).unwrap();
97
+ writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
98
+ return Err(Box::new(std::io::Error::new(
99
+ std::io::ErrorKind::InvalidInput,
100
+ "The columns in arg is not an array",
101
+ )) as Box<dyn std::error::Error>);
99
102
  }
100
103
  } else {
101
- errors.lock().unwrap().push(ErrorEntry {
104
+ let column_error = ErrorEntry {
102
105
  url: String::new(),
103
- error: "The key columns is missed from arg".to_string(),
104
- });
105
- panic!("Columns was not selected");
106
+ error: "Columns was not selected".to_string(),
107
+ };
108
+ let column_error_js = serde_json::to_string(&column_error).unwrap();
109
+ writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
110
+ return Err(Box::new(std::io::Error::new(
111
+ std::io::ErrorKind::InvalidInput,
112
+ "Columns was not selected",
113
+ )) as Box<dyn std::error::Error>);
106
114
  };
107
115
 
108
116
  //downloading maf files parallelly and merge them into single maf file
@@ -121,13 +129,13 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
121
129
  return Ok((url.clone(),text))
122
130
  }
123
131
  Err(e) => {
124
- let error_msg = format!("Decompression failed: {}", e);
132
+ let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
125
133
  Err((url.clone(), error_msg))
126
134
  }
127
135
  }
128
136
  }
129
137
  Err(e) => {
130
- let error_msg = format!("Decompression failed: {}", e);
138
+ let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
131
139
  Err((url.clone(), error_msg))
132
140
  }
133
141
  }
@@ -150,48 +158,48 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
150
158
  let _ = encoder.write_all(&maf_col.join("\t").as_bytes().to_vec()).expect("Failed to write header");
151
159
  let _ = encoder.write_all(b"\n").expect("Failed to write newline");
152
160
 
153
- // Collect all results before processing
154
- let results = download_futures.buffer_unordered(50).collect::<Vec<_>>().await;
155
-
156
- // Process results after all downloads are complete
157
- for result in results {
161
+ download_futures.buffer_unordered(20).for_each(|result| {
158
162
  match result {
159
163
  Ok((url, content)) => {
160
- let (maf_bit,mafrows) = select_maf_col(content, &maf_col, &url, &errors);
161
- if mafrows > 0 {
162
- let _ = encoder.write_all(&maf_bit).expect("Failed to write file");
163
- } else {
164
- errors.lock().unwrap().push(ErrorEntry {
165
- url: url.clone(),
166
- error: "Empty maf file".to_string(),
167
- });
164
+ match select_maf_col(content, &maf_col, &url) {
165
+ Ok((maf_bit,mafrows)) => {
166
+ if mafrows > 0 {
167
+ encoder.write_all(&maf_bit).expect("Failed to write file");
168
+ } else {
169
+ let error = ErrorEntry {
170
+ url: url.clone(),
171
+ error: "Empty maf file".to_string(),
172
+ };
173
+ let error_js = serde_json::to_string(&error).unwrap();
174
+ writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
175
+ }
176
+ }
177
+ Err((url,error)) => {
178
+ let error = ErrorEntry {
179
+ url,
180
+ error,
181
+ };
182
+ let error_js = serde_json::to_string(&error).unwrap();
183
+ writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
184
+ }
168
185
  }
169
186
  }
170
187
  Err((url, error)) => {
171
- errors.lock().unwrap().push(ErrorEntry {
188
+ let error = ErrorEntry {
172
189
  url,
173
190
  error,
174
- })
191
+ };
192
+ let error_js = serde_json::to_string(&error).unwrap();
193
+ writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
175
194
  }
176
- }
177
- };
178
-
195
+ };
196
+ async {}
197
+ }).await;
198
+
179
199
  // Finalize output and printing errors
180
200
  encoder.finish().expect("Maf file output error!");
181
-
182
- // Manually flush stdout
201
+ // Manually flush stdout and stderr
183
202
  io::stdout().flush().expect("Failed to flush stdout");
184
-
185
- // After processing all downloads, output the errors as JSON to stderr
186
- let errors = errors.lock().unwrap();
187
- if !errors.is_empty() {
188
- let error_json = json!({
189
- "errors": errors.iter().collect::<Vec<&ErrorEntry>>()
190
- });
191
- let mut stderr = io::stderr();
192
- writeln!(stderr, "{}", error_json).expect("Failed to output stderr!");
193
- io::stderr().flush().expect("Failed to flush stderr");
194
- };
195
-
203
+ io::stderr().flush().expect("Failed to flush stderr");
196
204
  Ok(())
197
205
  }