@sjcrh/proteinpaint-rust 2.99.0 → 2.110.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +25 -7
- package/package.json +2 -2
- package/src/gdcmaf.rs +102 -28
package/index.js
CHANGED
|
@@ -43,16 +43,22 @@ exports.run_rust = function (binfile, input_data) {
|
|
|
43
43
|
})
|
|
44
44
|
}
|
|
45
45
|
|
|
46
|
-
exports.
|
|
46
|
+
exports.stream_rust = function (binfile, input_data, emitJson) {
|
|
47
47
|
const binpath = path.join(__dirname, '/target/release/', binfile)
|
|
48
48
|
const ps = spawn(binpath)
|
|
49
|
+
const stderr = []
|
|
49
50
|
try {
|
|
51
|
+
// from GDC API -> ps.stdin -> ps.stdout -> transformed stream
|
|
50
52
|
Readable.from(input_data).pipe(ps.stdin)
|
|
53
|
+
//reader.on('data', ps.stdout.pipe)
|
|
54
|
+
//reader.on('error', ps.stderr.pipe)
|
|
55
|
+
//return reader
|
|
51
56
|
} catch (error) {
|
|
52
57
|
ps.kill()
|
|
53
58
|
let errmsg = error
|
|
54
|
-
if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
|
|
55
|
-
reject(errmsg)
|
|
59
|
+
//if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
|
|
60
|
+
//reject(errmsg)
|
|
61
|
+
console.log(59, error)
|
|
56
62
|
}
|
|
57
63
|
|
|
58
64
|
const childStream = new Transform({
|
|
@@ -62,11 +68,23 @@ exports.run_rust_stream = function (binfile, input_data) {
|
|
|
62
68
|
}
|
|
63
69
|
})
|
|
64
70
|
ps.stdout.pipe(childStream)
|
|
65
|
-
|
|
66
|
-
|
|
71
|
+
ps.stderr.on('data', data => stderr.push(data))
|
|
72
|
+
ps.on('close', code => { //console.log(72, stderr.length)
|
|
73
|
+
if (stderr.length) {
|
|
74
|
+
// handle rust stderr
|
|
75
|
+
const err = stderr.join('').trim()
|
|
76
|
+
const errmsg = `!!! stream_rust('${binfile}') stderr: !!!\n${err}`
|
|
77
|
+
console.log(errmsg)
|
|
78
|
+
emitJson(err)
|
|
79
|
+
} else {
|
|
80
|
+
emitJson({ ok: true, status: 'ok', message: 'Processing complete' })
|
|
81
|
+
}
|
|
67
82
|
})
|
|
68
|
-
|
|
69
|
-
|
|
83
|
+
ps.on('error', err => {
|
|
84
|
+
console.log(74, `stream_rust().on('error')`, err)
|
|
85
|
+
emitJson(stderr.join('').trim())
|
|
70
86
|
})
|
|
87
|
+
// below will duplicate ps.on('close') event above
|
|
88
|
+
// childStream.on('end', () => console.log(`-- childStream done --`))
|
|
71
89
|
return childStream
|
|
72
90
|
}
|
package/package.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "2.
|
|
2
|
+
"version": "2.110.0",
|
|
3
3
|
"name": "@sjcrh/proteinpaint-rust",
|
|
4
4
|
"description": "Rust-based utilities for proteinpaint",
|
|
5
5
|
"main": "index.js",
|
|
@@ -38,5 +38,5 @@
|
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"tape": "^5.2.2"
|
|
40
40
|
},
|
|
41
|
-
"pp_release_tag": "v2.
|
|
41
|
+
"pp_release_tag": "v2.110.0"
|
|
42
42
|
}
|
package/src/gdcmaf.rs
CHANGED
|
@@ -13,17 +13,25 @@
|
|
|
13
13
|
use flate2::read::GzDecoder;
|
|
14
14
|
use flate2::write::GzEncoder;
|
|
15
15
|
use flate2::Compression;
|
|
16
|
-
use serde_json::Value;
|
|
16
|
+
use serde_json::{Value,json};
|
|
17
17
|
use std::path::Path;
|
|
18
18
|
use futures::StreamExt;
|
|
19
19
|
use std::io::{self,Read,Write};
|
|
20
|
+
use std::sync::Mutex;
|
|
20
21
|
|
|
21
22
|
|
|
23
|
+
// Struct to hold error information
|
|
24
|
+
#[derive(serde::Serialize)]
|
|
25
|
+
struct ErrorEntry {
|
|
26
|
+
url: String,
|
|
27
|
+
error: String,
|
|
28
|
+
}
|
|
22
29
|
|
|
23
|
-
fn select_maf_col(d:String,columns:&Vec<String
|
|
30
|
+
fn select_maf_col(d:String,columns:&Vec<String>,url:&str,errors: &Mutex<Vec<ErrorEntry>>) -> (Vec<u8>,i32) {
|
|
24
31
|
let mut maf_str: String = String::new();
|
|
25
32
|
let mut header_indices: Vec<usize> = Vec::new();
|
|
26
33
|
let lines = d.trim_end().split("\n");
|
|
34
|
+
let mut mafrows = 0;
|
|
27
35
|
for line in lines {
|
|
28
36
|
if line.starts_with("#") {
|
|
29
37
|
continue
|
|
@@ -33,6 +41,11 @@ fn select_maf_col(d:String,columns:&Vec<String>) -> Vec<u8> {
|
|
|
33
41
|
if let Some(index) = header.iter().position(|x| x == col) {
|
|
34
42
|
header_indices.push(index);
|
|
35
43
|
} else {
|
|
44
|
+
let error_msg = format!("Column {} was not found", col);
|
|
45
|
+
errors.lock().unwrap().push(ErrorEntry {
|
|
46
|
+
url: url.to_string().clone(),
|
|
47
|
+
error: error_msg.clone(),
|
|
48
|
+
});
|
|
36
49
|
panic!("{} was not found!",col);
|
|
37
50
|
}
|
|
38
51
|
}
|
|
@@ -44,14 +57,17 @@ fn select_maf_col(d:String,columns:&Vec<String>) -> Vec<u8> {
|
|
|
44
57
|
};
|
|
45
58
|
maf_str.push_str(maf_out_lst.join("\t").as_str());
|
|
46
59
|
maf_str.push_str("\n");
|
|
60
|
+
mafrows += 1;
|
|
47
61
|
}
|
|
48
62
|
};
|
|
49
|
-
maf_str.as_bytes().to_vec()
|
|
63
|
+
(maf_str.as_bytes().to_vec(),mafrows)
|
|
50
64
|
}
|
|
51
65
|
|
|
52
66
|
|
|
53
67
|
#[tokio::main]
|
|
54
68
|
async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
69
|
+
// Create a thread-container for errors
|
|
70
|
+
let errors = Mutex::new(Vec::<ErrorEntry>::new());
|
|
55
71
|
// Accepting the piped input json from jodejs and assign to the variable
|
|
56
72
|
// host: GDC host
|
|
57
73
|
// url: urls to download single maf files
|
|
@@ -75,9 +91,17 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
75
91
|
.map(|v| v.to_string().replace("\"",""))
|
|
76
92
|
.collect::<Vec<String>>();
|
|
77
93
|
} else {
|
|
94
|
+
errors.lock().unwrap().push(ErrorEntry {
|
|
95
|
+
url: String::new(),
|
|
96
|
+
error: "The columns of arg is not an array".to_string(),
|
|
97
|
+
});
|
|
78
98
|
panic!("Columns is not an array");
|
|
79
99
|
}
|
|
80
100
|
} else {
|
|
101
|
+
errors.lock().unwrap().push(ErrorEntry {
|
|
102
|
+
url: String::new(),
|
|
103
|
+
error: "The key columns is missed from arg".to_string(),
|
|
104
|
+
});
|
|
81
105
|
panic!("Columns was not selected");
|
|
82
106
|
};
|
|
83
107
|
|
|
@@ -85,39 +109,89 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
85
109
|
let download_futures = futures::stream::iter(
|
|
86
110
|
url.into_iter().map(|url|{
|
|
87
111
|
async move {
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
112
|
+
match reqwest::get(&url).await {
|
|
113
|
+
Ok(resp) if resp.status().is_success() => {
|
|
114
|
+
match resp.bytes().await {
|
|
115
|
+
Ok(content) => {
|
|
116
|
+
let mut decoder = GzDecoder::new(&content[..]);
|
|
117
|
+
let mut decompressed_content = Vec::new();
|
|
118
|
+
match decoder.read_to_end(&mut decompressed_content) {
|
|
119
|
+
Ok(_) => {
|
|
120
|
+
let text = String::from_utf8_lossy(&decompressed_content).to_string();
|
|
121
|
+
return Ok((url.clone(),text))
|
|
122
|
+
}
|
|
123
|
+
Err(e) => {
|
|
124
|
+
let error_msg = format!("Decompression failed: {}", e);
|
|
125
|
+
Err((url.clone(), error_msg))
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
}
|
|
129
|
+
Err(e) => {
|
|
130
|
+
let error_msg = format!("Decompression failed: {}", e);
|
|
131
|
+
Err((url.clone(), error_msg))
|
|
132
|
+
}
|
|
133
|
+
}
|
|
134
|
+
}
|
|
135
|
+
Ok(resp) => {
|
|
136
|
+
let error_msg = format!("HTTP error: {}", resp.status());
|
|
137
|
+
Err((url.clone(), error_msg))
|
|
138
|
+
}
|
|
139
|
+
Err(e) => {
|
|
140
|
+
let error_msg = format!("Server request failed: {}", e);
|
|
141
|
+
Err((url.clone(), error_msg))
|
|
100
142
|
}
|
|
101
|
-
} else {
|
|
102
|
-
let error_msg = "Failed to download: ".to_string() + &url;
|
|
103
|
-
error_msg
|
|
104
143
|
}
|
|
105
144
|
}
|
|
106
145
|
})
|
|
107
146
|
);
|
|
108
147
|
|
|
109
|
-
// output
|
|
148
|
+
// binary output
|
|
110
149
|
let mut encoder = GzEncoder::new(io::stdout(), Compression::default());
|
|
111
150
|
let _ = encoder.write_all(&maf_col.join("\t").as_bytes().to_vec()).expect("Failed to write header");
|
|
112
151
|
let _ = encoder.write_all(b"\n").expect("Failed to write newline");
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
152
|
+
|
|
153
|
+
// Collect all results before processing
|
|
154
|
+
let results = download_futures.buffer_unordered(50).collect::<Vec<_>>().await;
|
|
155
|
+
|
|
156
|
+
// Process results after all downloads are complete
|
|
157
|
+
for result in results {
|
|
158
|
+
match result {
|
|
159
|
+
Ok((url, content)) => {
|
|
160
|
+
let (maf_bit,mafrows) = select_maf_col(content, &maf_col, &url, &errors);
|
|
161
|
+
if mafrows > 0 {
|
|
162
|
+
let _ = encoder.write_all(&maf_bit).expect("Failed to write file");
|
|
163
|
+
} else {
|
|
164
|
+
errors.lock().unwrap().push(ErrorEntry {
|
|
165
|
+
url: url.clone(),
|
|
166
|
+
error: "Empty maf file".to_string(),
|
|
167
|
+
});
|
|
168
|
+
}
|
|
169
|
+
}
|
|
170
|
+
Err((url, error)) => {
|
|
171
|
+
errors.lock().unwrap().push(ErrorEntry {
|
|
172
|
+
url,
|
|
173
|
+
error,
|
|
174
|
+
})
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// Finalize output and printing errors
|
|
180
|
+
encoder.finish().expect("Maf file output error!");
|
|
181
|
+
|
|
182
|
+
// Manually flush stdout
|
|
183
|
+
io::stdout().flush().expect("Failed to flush stdout");
|
|
184
|
+
|
|
185
|
+
// After processing all downloads, output the errors as JSON to stderr
|
|
186
|
+
let errors = errors.lock().unwrap();
|
|
187
|
+
if !errors.is_empty() {
|
|
188
|
+
let error_json = json!({
|
|
189
|
+
"errors": errors.iter().collect::<Vec<&ErrorEntry>>()
|
|
190
|
+
});
|
|
191
|
+
let mut stderr = io::stderr();
|
|
192
|
+
writeln!(stderr, "{}", error_json).expect("Failed to output stderr!");
|
|
193
|
+
io::stderr().flush().expect("Failed to flush stderr");
|
|
194
|
+
};
|
|
195
|
+
|
|
122
196
|
Ok(())
|
|
123
197
|
}
|