@sjcrh/proteinpaint-rust 2.99.0 → 2.110.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. package/index.js +25 -7
  2. package/package.json +2 -2
  3. package/src/gdcmaf.rs +102 -28
package/index.js CHANGED
@@ -43,16 +43,22 @@ exports.run_rust = function (binfile, input_data) {
43
43
  })
44
44
  }
45
45
 
46
- exports.run_rust_stream = function (binfile, input_data) {
46
+ exports.stream_rust = function (binfile, input_data, emitJson) {
47
47
  const binpath = path.join(__dirname, '/target/release/', binfile)
48
48
  const ps = spawn(binpath)
49
+ const stderr = []
49
50
  try {
51
+ // from GDC API -> ps.stdin -> ps.stdout -> transformed stream
50
52
  Readable.from(input_data).pipe(ps.stdin)
53
+ //reader.on('data', ps.stdout.pipe)
54
+ //reader.on('error', ps.stderr.pipe)
55
+ //return reader
51
56
  } catch (error) {
52
57
  ps.kill()
53
58
  let errmsg = error
54
- if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
55
- reject(errmsg)
59
+ //if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
60
+ //reject(errmsg)
61
+ console.log(59, error)
56
62
  }
57
63
 
58
64
  const childStream = new Transform({
@@ -62,11 +68,23 @@ exports.run_rust_stream = function (binfile, input_data) {
62
68
  }
63
69
  })
64
70
  ps.stdout.pipe(childStream)
65
- childStream.on('error', err => {
66
- reject(err)
71
+ ps.stderr.on('data', data => stderr.push(data))
72
+ ps.on('close', code => { //console.log(72, stderr.length)
73
+ if (stderr.length) {
74
+ // handle rust stderr
75
+ const err = stderr.join('').trim()
76
+ const errmsg = `!!! stream_rust('${binfile}') stderr: !!!\n${err}`
77
+ console.log(errmsg)
78
+ emitJson(err)
79
+ } else {
80
+ emitJson({ ok: true, status: 'ok', message: 'Processing complete' })
81
+ }
67
82
  })
68
- childStream.on('close', code => {
69
- childStream.end()
83
+ ps.on('error', err => {
84
+ console.log(74, `stream_rust().on('error')`, err)
85
+ emitJson(stderr.join('').trim())
70
86
  })
87
+ // below will duplicate ps.on('close') event above
88
+ // childStream.on('end', () => console.log(`-- childStream done --`))
71
89
  return childStream
72
90
  }
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.99.0",
2
+ "version": "2.110.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
@@ -38,5 +38,5 @@
38
38
  "devDependencies": {
39
39
  "tape": "^5.2.2"
40
40
  },
41
- "pp_release_tag": "v2.99.0"
41
+ "pp_release_tag": "v2.110.0"
42
42
  }
package/src/gdcmaf.rs CHANGED
@@ -13,17 +13,25 @@
13
13
  use flate2::read::GzDecoder;
14
14
  use flate2::write::GzEncoder;
15
15
  use flate2::Compression;
16
- use serde_json::Value;
16
+ use serde_json::{Value,json};
17
17
  use std::path::Path;
18
18
  use futures::StreamExt;
19
19
  use std::io::{self,Read,Write};
20
+ use std::sync::Mutex;
20
21
 
21
22
 
23
+ // Struct to hold error information
24
+ #[derive(serde::Serialize)]
25
+ struct ErrorEntry {
26
+ url: String,
27
+ error: String,
28
+ }
22
29
 
23
- fn select_maf_col(d:String,columns:&Vec<String>) -> Vec<u8> {
30
+ fn select_maf_col(d:String,columns:&Vec<String>,url:&str,errors: &Mutex<Vec<ErrorEntry>>) -> (Vec<u8>,i32) {
24
31
  let mut maf_str: String = String::new();
25
32
  let mut header_indices: Vec<usize> = Vec::new();
26
33
  let lines = d.trim_end().split("\n");
34
+ let mut mafrows = 0;
27
35
  for line in lines {
28
36
  if line.starts_with("#") {
29
37
  continue
@@ -33,6 +41,11 @@ fn select_maf_col(d:String,columns:&Vec<String>) -> Vec<u8> {
33
41
  if let Some(index) = header.iter().position(|x| x == col) {
34
42
  header_indices.push(index);
35
43
  } else {
44
+ let error_msg = format!("Column {} was not found", col);
45
+ errors.lock().unwrap().push(ErrorEntry {
46
+ url: url.to_string().clone(),
47
+ error: error_msg.clone(),
48
+ });
36
49
  panic!("{} was not found!",col);
37
50
  }
38
51
  }
@@ -44,14 +57,17 @@ fn select_maf_col(d:String,columns:&Vec<String>) -> Vec<u8> {
44
57
  };
45
58
  maf_str.push_str(maf_out_lst.join("\t").as_str());
46
59
  maf_str.push_str("\n");
60
+ mafrows += 1;
47
61
  }
48
62
  };
49
- maf_str.as_bytes().to_vec()
63
+ (maf_str.as_bytes().to_vec(),mafrows)
50
64
  }
51
65
 
52
66
 
53
67
  #[tokio::main]
54
68
  async fn main() -> Result<(),Box<dyn std::error::Error>> {
69
+ // Create a thread-container for errors
70
+ let errors = Mutex::new(Vec::<ErrorEntry>::new());
55
71
  // Accepting the piped input json from jodejs and assign to the variable
56
72
  // host: GDC host
57
73
  // url: urls to download single maf files
@@ -75,9 +91,17 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
75
91
  .map(|v| v.to_string().replace("\"",""))
76
92
  .collect::<Vec<String>>();
77
93
  } else {
94
+ errors.lock().unwrap().push(ErrorEntry {
95
+ url: String::new(),
96
+ error: "The columns of arg is not an array".to_string(),
97
+ });
78
98
  panic!("Columns is not an array");
79
99
  }
80
100
  } else {
101
+ errors.lock().unwrap().push(ErrorEntry {
102
+ url: String::new(),
103
+ error: "The key columns is missed from arg".to_string(),
104
+ });
81
105
  panic!("Columns was not selected");
82
106
  };
83
107
 
@@ -85,39 +109,89 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
85
109
  let download_futures = futures::stream::iter(
86
110
  url.into_iter().map(|url|{
87
111
  async move {
88
- let result = reqwest::get(&url).await;
89
- if let Ok(resp) = result {
90
- let content = resp.bytes().await.unwrap();
91
- let mut decoder = GzDecoder::new(&content[..]);
92
- let mut decompressed_content = Vec::new();
93
- let read_content = decoder.read_to_end(&mut decompressed_content);
94
- if let Ok(_) = read_content {
95
- let text = String::from_utf8_lossy(&decompressed_content).to_string();
96
- text
97
- } else {
98
- let error_msg = "Failed to read content downloaded from: ".to_string() + &url;
99
- error_msg
112
+ match reqwest::get(&url).await {
113
+ Ok(resp) if resp.status().is_success() => {
114
+ match resp.bytes().await {
115
+ Ok(content) => {
116
+ let mut decoder = GzDecoder::new(&content[..]);
117
+ let mut decompressed_content = Vec::new();
118
+ match decoder.read_to_end(&mut decompressed_content) {
119
+ Ok(_) => {
120
+ let text = String::from_utf8_lossy(&decompressed_content).to_string();
121
+ return Ok((url.clone(),text))
122
+ }
123
+ Err(e) => {
124
+ let error_msg = format!("Decompression failed: {}", e);
125
+ Err((url.clone(), error_msg))
126
+ }
127
+ }
128
+ }
129
+ Err(e) => {
130
+ let error_msg = format!("Decompression failed: {}", e);
131
+ Err((url.clone(), error_msg))
132
+ }
133
+ }
134
+ }
135
+ Ok(resp) => {
136
+ let error_msg = format!("HTTP error: {}", resp.status());
137
+ Err((url.clone(), error_msg))
138
+ }
139
+ Err(e) => {
140
+ let error_msg = format!("Server request failed: {}", e);
141
+ Err((url.clone(), error_msg))
100
142
  }
101
- } else {
102
- let error_msg = "Failed to download: ".to_string() + &url;
103
- error_msg
104
143
  }
105
144
  }
106
145
  })
107
146
  );
108
147
 
109
- // output
148
+ // binary output
110
149
  let mut encoder = GzEncoder::new(io::stdout(), Compression::default());
111
150
  let _ = encoder.write_all(&maf_col.join("\t").as_bytes().to_vec()).expect("Failed to write header");
112
151
  let _ = encoder.write_all(b"\n").expect("Failed to write newline");
113
- download_futures.buffer_unordered(20).for_each(|item| {
114
- if item.starts_with("Failed") {
115
- eprintln!("{}",item);
116
- } else {
117
- let maf_bit = select_maf_col(item,&maf_col);
118
- let _ = encoder.write_all(&maf_bit).expect("Failed to write file");
119
- };
120
- async {}
121
- }).await;
152
+
153
+ // Collect all results before processing
154
+ let results = download_futures.buffer_unordered(50).collect::<Vec<_>>().await;
155
+
156
+ // Process results after all downloads are complete
157
+ for result in results {
158
+ match result {
159
+ Ok((url, content)) => {
160
+ let (maf_bit,mafrows) = select_maf_col(content, &maf_col, &url, &errors);
161
+ if mafrows > 0 {
162
+ let _ = encoder.write_all(&maf_bit).expect("Failed to write file");
163
+ } else {
164
+ errors.lock().unwrap().push(ErrorEntry {
165
+ url: url.clone(),
166
+ error: "Empty maf file".to_string(),
167
+ });
168
+ }
169
+ }
170
+ Err((url, error)) => {
171
+ errors.lock().unwrap().push(ErrorEntry {
172
+ url,
173
+ error,
174
+ })
175
+ }
176
+ }
177
+ };
178
+
179
+ // Finalize output and printing errors
180
+ encoder.finish().expect("Maf file output error!");
181
+
182
+ // Manually flush stdout
183
+ io::stdout().flush().expect("Failed to flush stdout");
184
+
185
+ // After processing all downloads, output the errors as JSON to stderr
186
+ let errors = errors.lock().unwrap();
187
+ if !errors.is_empty() {
188
+ let error_json = json!({
189
+ "errors": errors.iter().collect::<Vec<&ErrorEntry>>()
190
+ });
191
+ let mut stderr = io::stderr();
192
+ writeln!(stderr, "{}", error_json).expect("Failed to output stderr!");
193
+ io::stderr().flush().expect("Failed to flush stderr");
194
+ };
195
+
122
196
  Ok(())
123
197
  }