@sjcrh/proteinpaint-rust 2.110.0 → 2.112.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/index.js +142 -31
- package/package.json +2 -2
- package/src/gdcmaf.rs +171 -79
package/index.js
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
const path = require('path'),
|
|
2
|
-
spawn = require('child_process')
|
|
2
|
+
{ spawn, exec } = require('child_process'),
|
|
3
3
|
Readable = require('stream').Readable,
|
|
4
|
-
Transform = require('stream').Transform
|
|
4
|
+
Transform = require('stream').Transform,
|
|
5
|
+
{ promisify } = require('util')
|
|
6
|
+
|
|
7
|
+
const execPromise = promisify(exec)
|
|
5
8
|
|
|
6
9
|
exports.run_rust = function (binfile, input_data) {
|
|
7
10
|
return new Promise((resolve, reject) => {
|
|
@@ -45,46 +48,154 @@ exports.run_rust = function (binfile, input_data) {
|
|
|
45
48
|
|
|
46
49
|
exports.stream_rust = function (binfile, input_data, emitJson) {
|
|
47
50
|
const binpath = path.join(__dirname, '/target/release/', binfile)
|
|
48
|
-
const ps = spawn(binpath)
|
|
49
|
-
const stderr = []
|
|
50
|
-
try {
|
|
51
|
-
// from GDC API -> ps.stdin -> ps.stdout -> transformed stream
|
|
52
|
-
Readable.from(input_data).pipe(ps.stdin)
|
|
53
|
-
//reader.on('data', ps.stdout.pipe)
|
|
54
|
-
//reader.on('error', ps.stderr.pipe)
|
|
55
|
-
//return reader
|
|
56
|
-
} catch (error) {
|
|
57
|
-
ps.kill()
|
|
58
|
-
let errmsg = error
|
|
59
|
-
//if (stderr.length) errmsg += `killed run_rust('${binfile}'), stderr: ${stderr.join('').trim()}`
|
|
60
|
-
//reject(errmsg)
|
|
61
|
-
console.log(59, error)
|
|
62
|
-
}
|
|
63
51
|
|
|
52
|
+
const ps = spawn(binpath)
|
|
64
53
|
const childStream = new Transform({
|
|
65
54
|
transform(chunk, encoding, callback) {
|
|
66
55
|
this.push(chunk)
|
|
67
56
|
callback()
|
|
68
57
|
}
|
|
69
58
|
})
|
|
70
|
-
|
|
59
|
+
// we only want to run this interval loop inside a container, not in dev/test CI
|
|
60
|
+
if (binfile == 'gdcmaf') trackByPid(ps.pid, binfile)
|
|
61
|
+
const stderr = []
|
|
62
|
+
try {
|
|
63
|
+
// from route handler -> input_data -> ps.stdin -> ps.stdout -> transformed stream -> express response.pipe()
|
|
64
|
+
Readable.from(input_data)
|
|
65
|
+
.pipe(ps.stdin)
|
|
66
|
+
.on('error', err => {
|
|
67
|
+
emitErrors({ error: `error piping input data to spawned ${binfile} process` })
|
|
68
|
+
})
|
|
69
|
+
} catch (error) {
|
|
70
|
+
console.log(`Error piping input_data into ${binfile}`, error)
|
|
71
|
+
return
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// uncomment to trigger childStream.destroy()
|
|
75
|
+
// setTimeout(() => { console.log(74, 'childStream.destroy()'); childStream.destroy();}, 1000)
|
|
76
|
+
// childStream.destroy() does not seem to trigger ps.stdout.pipe('...').on('error') callback,
|
|
77
|
+
// which is okay as long as the server doesn't crash and ps get's killed eventually
|
|
78
|
+
ps.stdout.pipe(childStream).on('error', err => console.log('ps.stdout.pipe(childStream) error', err))
|
|
79
|
+
|
|
71
80
|
ps.stderr.on('data', data => stderr.push(data))
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
console.log(errmsg)
|
|
78
|
-
emitJson(err)
|
|
81
|
+
|
|
82
|
+
ps.on('close', code => {
|
|
83
|
+
if (trackedPids.has(ps.pid)) trackedPids.delete(ps.pid)
|
|
84
|
+
if (stderr.length || killedPids.has(ps.pid) || code !== 0) {
|
|
85
|
+
emitErrors(null, ps.pid, code)
|
|
79
86
|
} else {
|
|
80
|
-
emitJson(
|
|
87
|
+
emitJson()
|
|
81
88
|
}
|
|
82
89
|
})
|
|
83
90
|
ps.on('error', err => {
|
|
84
|
-
|
|
85
|
-
|
|
91
|
+
if (trackedPids.has(ps.pid)) trackedPids.delete(ps.pid)
|
|
92
|
+
// console.log(74, `stream_rust().on('error')`, err)
|
|
93
|
+
emitErrors(null, ps.pid)
|
|
94
|
+
})
|
|
95
|
+
ps.on('SIGTERM', err => {
|
|
96
|
+
console.log(err)
|
|
97
|
+
})
|
|
98
|
+
|
|
99
|
+
function emitErrors(error, pid, code = 0) {
|
|
100
|
+
// concatenate stderr uint8arr into a string
|
|
101
|
+
let errors = stderr.join('').trim()
|
|
102
|
+
if (error) errors += `\n` + error
|
|
103
|
+
if (pid && killedPids.has(ps.pid) && !trackedPids.has(ps.pid)) {
|
|
104
|
+
errors += '\n' + JSON.stringify({ error: `server error: MAF file processing terminated (expired process)` })
|
|
105
|
+
killedPids.delete(pid)
|
|
106
|
+
} else if (pid && code !== 0) {
|
|
107
|
+
// may result from errors in spawned process code, or external signal (like `kill -9` in terminal)
|
|
108
|
+
errors += '\n' + JSON.stringify({ error: `server error: MAF file processing terminated (code=${code})` })
|
|
109
|
+
}
|
|
110
|
+
emitJson(errors)
|
|
111
|
+
}
|
|
112
|
+
|
|
113
|
+
// on('end') will duplicate ps.on('close') event above
|
|
114
|
+
// childStream.on('end', () => console.log(`childStream.on(end)`))
|
|
115
|
+
|
|
116
|
+
// this may duplicate ps.on('error'), unless the error happened within the transform
|
|
117
|
+
childStream.on('error', err => {
|
|
118
|
+
console.log('stream_rust childStream.on(error)', err)
|
|
119
|
+
try {
|
|
120
|
+
childStream.destroy(err)
|
|
121
|
+
} catch (e) {
|
|
122
|
+
console.log(e)
|
|
123
|
+
}
|
|
86
124
|
})
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
125
|
+
|
|
126
|
+
function endStream() {
|
|
127
|
+
try {
|
|
128
|
+
if (!childStream.writableEnded) {
|
|
129
|
+
console.log('trigger childStream.destroy() in endStream()')
|
|
130
|
+
childStream.destroy()
|
|
131
|
+
}
|
|
132
|
+
} catch (e) {
|
|
133
|
+
console.log('error triggering childStream.destroy()', e)
|
|
134
|
+
}
|
|
135
|
+
try {
|
|
136
|
+
if (!ps.killed) {
|
|
137
|
+
console.log('trigger ps.kill() in endStream()')
|
|
138
|
+
ps.kill()
|
|
139
|
+
}
|
|
140
|
+
if (trackedPids.has(ps.pid)) trackedPids.delete(ps.pid)
|
|
141
|
+
} catch (e) {
|
|
142
|
+
console.log('error triggering ps.kill()', e)
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
return { rustStream: childStream, endStream }
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
const trackedPids = new Map() // will be used to monitor expired processes
|
|
150
|
+
const killedPids = new Set() // will be used to detect killed processes, to help with error detection
|
|
151
|
+
const PSKILL_INTERVAL_MS = 30000 // every 30 seconds
|
|
152
|
+
let psKillInterval
|
|
153
|
+
|
|
154
|
+
// default maxElapsed = 5 * 60 * 1000 millisecond = 300000 or 5 minutes, change to 0 to test
|
|
155
|
+
// may allow configuration of maxElapsed by dataset/argument
|
|
156
|
+
function trackByPid(pid, name, maxElapsed = 300000) {
|
|
157
|
+
if (!pid) return
|
|
158
|
+
// only track by value (integer, string), not reference object
|
|
159
|
+
// NOTE: a reused/reassigned process.pid will be replaced by the most recent process
|
|
160
|
+
trackedPids.set(pid, { name, expires: Date.now() + maxElapsed })
|
|
161
|
+
if (!psKillInterval) psKillInterval = setInterval(killExpiredProcesses, PSKILL_INTERVAL_MS)
|
|
162
|
+
// uncomment below to test
|
|
163
|
+
// console.log([...trackedPids.entries()])
|
|
164
|
+
// if (maxElapsed < 10000) setTimeout(killExpiredProcesses, 1000) // uncomment for testing only
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
//
|
|
168
|
+
// Use one setInterval() to monitor >= 1 process,
|
|
169
|
+
// instead of a separate setTimeout() for each process.
|
|
170
|
+
// This is more reliable as setTimeout would use spawned ps.kill(),
|
|
171
|
+
// which may not exist when the timeout callback is executed and
|
|
172
|
+
// thus would require clearTimeout(closured_variable). Tracking by
|
|
173
|
+
// pid does not rely on a usable 'ps' variable to kill itself.
|
|
174
|
+
//
|
|
175
|
+
function killExpiredProcesses() {
|
|
176
|
+
//console.log(149, 'killExpiredProcesses()')
|
|
177
|
+
killedPids.clear()
|
|
178
|
+
const time = Date.now()
|
|
179
|
+
for (const [pid, info] of trackedPids.entries()) {
|
|
180
|
+
if (info.expires > time) continue
|
|
181
|
+
try {
|
|
182
|
+
// true if process exists
|
|
183
|
+
process.kill(pid, 0)
|
|
184
|
+
} catch (_) {
|
|
185
|
+
// no need to kill, but remove from tracking
|
|
186
|
+
trackedPids.delete(pid)
|
|
187
|
+
// prevent misleading logs of 'unable to kill ...'
|
|
188
|
+
continue
|
|
189
|
+
}
|
|
190
|
+
const label = `rust process ${info.name} (pid=${pid})`
|
|
191
|
+
try {
|
|
192
|
+
// detect if process exists before killing it
|
|
193
|
+
process.kill(pid, 'SIGTERM')
|
|
194
|
+
trackedPids.delete(pid)
|
|
195
|
+
killedPids.add(pid)
|
|
196
|
+
console.log(`killed ${label}`)
|
|
197
|
+
} catch (err) {
|
|
198
|
+
console.log(`unable to kill ${label}`, err)
|
|
199
|
+
}
|
|
200
|
+
}
|
|
90
201
|
}
|
package/package.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "2.
|
|
2
|
+
"version": "2.112.0",
|
|
3
3
|
"name": "@sjcrh/proteinpaint-rust",
|
|
4
4
|
"description": "Rust-based utilities for proteinpaint",
|
|
5
5
|
"main": "index.js",
|
|
@@ -38,5 +38,5 @@
|
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"tape": "^5.2.2"
|
|
40
40
|
},
|
|
41
|
-
"pp_release_tag": "v2.
|
|
41
|
+
"pp_release_tag": "v2.112.0"
|
|
42
42
|
}
|
package/src/gdcmaf.rs
CHANGED
|
@@ -1,24 +1,25 @@
|
|
|
1
1
|
/*
|
|
2
|
-
|
|
2
|
+
This script download cohort maf files from GDC, concatenate them into a single file that includes user specified columns.
|
|
3
3
|
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
4
|
+
Input JSON:
|
|
5
|
+
host: GDC host
|
|
6
|
+
fileIdLst: An array of uuid
|
|
7
|
+
Output gzip compressed maf file to stdout.
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
|
|
9
|
+
Example of usage:
|
|
10
|
+
echo '{"host": "https://api.gdc.cancer.gov/data/","columns": ["Hugo_Symbol", "Entrez_Gene_Id", "Center", "NCBI_Build", "Chromosome", "Start_Position"], "fileIdLst": ["8b31d6d1-56f7-4aa8-b026-c64bafd531e7", "b429fcc1-2b59-4b4c-a472-fb27758f6249"]}'|./target/release/gdcmaf
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
13
|
use flate2::read::GzDecoder;
|
|
14
14
|
use flate2::write::GzEncoder;
|
|
15
15
|
use flate2::Compression;
|
|
16
|
-
use serde_json::{Value
|
|
17
|
-
use std::path::Path;
|
|
16
|
+
use serde_json::{Value};
|
|
18
17
|
use futures::StreamExt;
|
|
19
18
|
use std::io::{self,Read,Write};
|
|
20
|
-
use std::
|
|
21
|
-
|
|
19
|
+
use std::time::Duration;
|
|
20
|
+
use tokio::io::{AsyncReadExt, BufReader};
|
|
21
|
+
use tokio::time::timeout;
|
|
22
|
+
use std::sync::{Arc, Mutex};
|
|
22
23
|
|
|
23
24
|
// Struct to hold error information
|
|
24
25
|
#[derive(serde::Serialize)]
|
|
@@ -27,7 +28,7 @@ struct ErrorEntry {
|
|
|
27
28
|
error: String,
|
|
28
29
|
}
|
|
29
30
|
|
|
30
|
-
fn select_maf_col(d:String,columns:&Vec<String>,url:&str
|
|
31
|
+
fn select_maf_col(d:String,columns:&Vec<String>,url:&str) -> Result<(Vec<u8>,i32), (String, String)> {
|
|
31
32
|
let mut maf_str: String = String::new();
|
|
32
33
|
let mut header_indices: Vec<usize> = Vec::new();
|
|
33
34
|
let lines = d.trim_end().split("\n");
|
|
@@ -38,16 +39,18 @@ fn select_maf_col(d:String,columns:&Vec<String>,url:&str,errors: &Mutex<Vec<Erro
|
|
|
38
39
|
} else if line.contains("Hugo_Symbol") {
|
|
39
40
|
let header: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
|
|
40
41
|
for col in columns {
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
}
|
|
49
|
-
panic!("{} was not found!",col);
|
|
42
|
+
match header.iter().position(|x| x == col) {
|
|
43
|
+
Some(index) => {
|
|
44
|
+
header_indices.push(index);
|
|
45
|
+
}
|
|
46
|
+
None => {
|
|
47
|
+
let error_msg = format!("Column {} was not found", col);
|
|
48
|
+
return Err((url.to_string(), error_msg));
|
|
49
|
+
}
|
|
50
50
|
}
|
|
51
|
+
};
|
|
52
|
+
if header_indices.is_empty() {
|
|
53
|
+
return Err((url.to_string(), "No matching columns found".to_string()));
|
|
51
54
|
}
|
|
52
55
|
} else {
|
|
53
56
|
let maf_cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
|
|
@@ -60,25 +63,77 @@ fn select_maf_col(d:String,columns:&Vec<String>,url:&str,errors: &Mutex<Vec<Erro
|
|
|
60
63
|
mafrows += 1;
|
|
61
64
|
}
|
|
62
65
|
};
|
|
63
|
-
(maf_str.as_bytes().to_vec(),mafrows)
|
|
66
|
+
Ok((maf_str.as_bytes().to_vec(),mafrows))
|
|
64
67
|
}
|
|
65
68
|
|
|
66
69
|
|
|
70
|
+
|
|
67
71
|
#[tokio::main]
|
|
68
72
|
async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
69
|
-
// Create a thread-container for errors
|
|
70
|
-
let errors = Mutex::new(Vec::<ErrorEntry>::new());
|
|
71
73
|
// Accepting the piped input json from jodejs and assign to the variable
|
|
72
74
|
// host: GDC host
|
|
73
75
|
// url: urls to download single maf files
|
|
74
|
-
let
|
|
75
|
-
|
|
76
|
-
|
|
76
|
+
let timeout_duration = Duration::from_secs(5); // Set a 10-second timeout
|
|
77
|
+
|
|
78
|
+
// Wrap the read operation in a timeout
|
|
79
|
+
let result = timeout(timeout_duration, async {
|
|
80
|
+
let mut buffer = String::new(); // Initialize an empty string to store input
|
|
81
|
+
let mut reader = BufReader::new(tokio::io::stdin()); // Create a buffered reader for stdin
|
|
82
|
+
reader.read_to_string(&mut buffer).await?; // Read a line asynchronously
|
|
83
|
+
Ok::<String, io::Error>(buffer) // Return the input as a Result
|
|
84
|
+
})
|
|
85
|
+
.await;
|
|
86
|
+
// Handle the result of the timeout operation
|
|
87
|
+
let file_id_lst_js: Value = match result {
|
|
88
|
+
Ok(Ok(buffer)) => {
|
|
89
|
+
match serde_json::from_str(&buffer) {
|
|
90
|
+
Ok(js) => js,
|
|
91
|
+
Err(e) => {
|
|
92
|
+
let stdin_error = ErrorEntry {
|
|
93
|
+
url: String::new(),
|
|
94
|
+
error: format!("JSON parsing error: {}", e),
|
|
95
|
+
};
|
|
96
|
+
writeln!(io::stderr(), "{}", serde_json::to_string(&stdin_error).unwrap()).unwrap();
|
|
97
|
+
return Err(Box::new(std::io::Error::new(
|
|
98
|
+
std::io::ErrorKind::InvalidInput,
|
|
99
|
+
"JSON parsing error!",
|
|
100
|
+
)) as Box<dyn std::error::Error>);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
}
|
|
104
|
+
Ok(Err(_e)) => {
|
|
105
|
+
let stdin_error = ErrorEntry {
|
|
106
|
+
url: String::new(),
|
|
107
|
+
error: "Error reading from stdin.".to_string(),
|
|
108
|
+
};
|
|
109
|
+
let stdin_error_js = serde_json::to_string(&stdin_error).unwrap();
|
|
110
|
+
writeln!(io::stderr(), "{}", stdin_error_js).expect("Failed to output stderr!");
|
|
111
|
+
return Err(Box::new(std::io::Error::new(
|
|
112
|
+
std::io::ErrorKind::InvalidInput,
|
|
113
|
+
"Failed to output stderr!",
|
|
114
|
+
)) as Box<dyn std::error::Error>);
|
|
115
|
+
}
|
|
116
|
+
Err(_) => {
|
|
117
|
+
let stdin_error = ErrorEntry {
|
|
118
|
+
url: String::new(),
|
|
119
|
+
error: "Timeout while reading from stdin.".to_string(),
|
|
120
|
+
};
|
|
121
|
+
let stdin_error_js = serde_json::to_string(&stdin_error).unwrap();
|
|
122
|
+
writeln!(io::stderr(), "{}", stdin_error_js).expect("Failed to output stderr!");
|
|
123
|
+
return Err(Box::new(std::io::Error::new(
|
|
124
|
+
std::io::ErrorKind::InvalidInput,
|
|
125
|
+
"The columns in arg is not an array",
|
|
126
|
+
)) as Box<dyn std::error::Error>);
|
|
127
|
+
}
|
|
128
|
+
};
|
|
129
|
+
|
|
130
|
+
// reading the input from PP
|
|
77
131
|
let host = file_id_lst_js.get("host").expect("Host was not provided").as_str().expect("Host is not a string");
|
|
78
132
|
let mut url: Vec<String> = Vec::new();
|
|
79
133
|
let file_id_lst = file_id_lst_js.get("fileIdLst").expect("File ID list is missed!").as_array().expect("File ID list is not an array");
|
|
80
134
|
for v in file_id_lst {
|
|
81
|
-
url.push(Path::new(&host).join(&v.as_str().unwrap()).display().to_string());
|
|
135
|
+
//url.push(Path::new(&host).join(&v.as_str().unwrap()).display().to_string());
|
|
136
|
+
url.push(format!("{}/{}",host.trim_end_matches('/'), v.as_str().unwrap()));
|
|
82
137
|
};
|
|
83
138
|
|
|
84
139
|
// read columns as array from input json and convert data type from Vec<Value> to Vec<String>
|
|
@@ -91,25 +146,47 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
91
146
|
.map(|v| v.to_string().replace("\"",""))
|
|
92
147
|
.collect::<Vec<String>>();
|
|
93
148
|
} else {
|
|
94
|
-
|
|
149
|
+
let column_error = ErrorEntry {
|
|
95
150
|
url: String::new(),
|
|
96
|
-
error: "The columns
|
|
97
|
-
}
|
|
98
|
-
|
|
151
|
+
error: "The columns in arg is not an array".to_string(),
|
|
152
|
+
};
|
|
153
|
+
let column_error_js = serde_json::to_string(&column_error).unwrap();
|
|
154
|
+
writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
|
|
155
|
+
return Err(Box::new(std::io::Error::new(
|
|
156
|
+
std::io::ErrorKind::InvalidInput,
|
|
157
|
+
"The columns in arg is not an array",
|
|
158
|
+
)) as Box<dyn std::error::Error>);
|
|
99
159
|
}
|
|
100
160
|
} else {
|
|
101
|
-
|
|
161
|
+
let column_error = ErrorEntry {
|
|
102
162
|
url: String::new(),
|
|
103
|
-
error: "
|
|
104
|
-
}
|
|
105
|
-
|
|
163
|
+
error: "Columns was not selected".to_string(),
|
|
164
|
+
};
|
|
165
|
+
let column_error_js = serde_json::to_string(&column_error).unwrap();
|
|
166
|
+
writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
|
|
167
|
+
return Err(Box::new(std::io::Error::new(
|
|
168
|
+
std::io::ErrorKind::InvalidInput,
|
|
169
|
+
"Columns was not selected",
|
|
170
|
+
)) as Box<dyn std::error::Error>);
|
|
106
171
|
};
|
|
107
172
|
|
|
108
173
|
//downloading maf files parallelly and merge them into single maf file
|
|
109
174
|
let download_futures = futures::stream::iter(
|
|
110
175
|
url.into_iter().map(|url|{
|
|
111
176
|
async move {
|
|
112
|
-
|
|
177
|
+
let client = reqwest::Client::builder()
|
|
178
|
+
.timeout(Duration::from_secs(60)) // 60-second timeout per request
|
|
179
|
+
.connect_timeout(Duration::from_secs(15))
|
|
180
|
+
.build()
|
|
181
|
+
.map_err(|_e| {
|
|
182
|
+
let client_error = ErrorEntry{
|
|
183
|
+
url: url.clone(),
|
|
184
|
+
error: "Client build error".to_string(),
|
|
185
|
+
};
|
|
186
|
+
let client_error_js = serde_json::to_string(&client_error).unwrap();
|
|
187
|
+
writeln!(io::stderr(), "{}", client_error_js).expect("Failed to build reqwest client!");
|
|
188
|
+
});
|
|
189
|
+
match client.unwrap().get(&url).send().await {
|
|
113
190
|
Ok(resp) if resp.status().is_success() => {
|
|
114
191
|
match resp.bytes().await {
|
|
115
192
|
Ok(content) => {
|
|
@@ -121,13 +198,13 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
121
198
|
return Ok((url.clone(),text))
|
|
122
199
|
}
|
|
123
200
|
Err(e) => {
|
|
124
|
-
let error_msg = format!("
|
|
201
|
+
let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
|
|
125
202
|
Err((url.clone(), error_msg))
|
|
126
203
|
}
|
|
127
204
|
}
|
|
128
205
|
}
|
|
129
206
|
Err(e) => {
|
|
130
|
-
let error_msg = format!("
|
|
207
|
+
let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
|
|
131
208
|
Err((url.clone(), error_msg))
|
|
132
209
|
}
|
|
133
210
|
}
|
|
@@ -146,52 +223,67 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
146
223
|
);
|
|
147
224
|
|
|
148
225
|
// binary output
|
|
149
|
-
let
|
|
150
|
-
let _ = encoder.write_all(&maf_col.join("\t").as_bytes().to_vec()).expect("Failed to write header");
|
|
151
|
-
let _ = encoder.write_all(b"\n").expect("Failed to write newline");
|
|
152
|
-
|
|
153
|
-
// Collect all results before processing
|
|
154
|
-
let results = download_futures.buffer_unordered(50).collect::<Vec<_>>().await;
|
|
226
|
+
let encoder = Arc::new(Mutex::new(GzEncoder::new(io::stdout(), Compression::default())));
|
|
155
227
|
|
|
156
|
-
//
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
228
|
+
// Write the header
|
|
229
|
+
{
|
|
230
|
+
let mut encoder_guard = encoder.lock().unwrap(); // Lock the Mutex to get access to the inner GzEncoder
|
|
231
|
+
encoder_guard.write_all(&maf_col.join("\t").as_bytes().to_vec()).expect("Failed to write header");
|
|
232
|
+
encoder_guard.write_all(b"\n").expect("Failed to write newline");
|
|
233
|
+
}
|
|
234
|
+
|
|
235
|
+
download_futures.buffer_unordered(20).for_each( |result| {
|
|
236
|
+
let encoder = Arc::clone(&encoder); // Clone the Arc for each task
|
|
237
|
+
let maf_col_cp = maf_col.clone();
|
|
238
|
+
async move {
|
|
239
|
+
match result {
|
|
240
|
+
Ok((url, content)) => {
|
|
241
|
+
match select_maf_col(content, &maf_col_cp, &url) {
|
|
242
|
+
Ok((maf_bit,mafrows)) => {
|
|
243
|
+
if mafrows > 0 {
|
|
244
|
+
let mut encoder_guard = encoder.lock().unwrap();
|
|
245
|
+
encoder_guard.write_all(&maf_bit).expect("Failed to write file");
|
|
246
|
+
} else {
|
|
247
|
+
let error = ErrorEntry {
|
|
248
|
+
url: url.clone(),
|
|
249
|
+
error: "Empty maf file".to_string(),
|
|
250
|
+
};
|
|
251
|
+
let error_js = serde_json::to_string(&error).unwrap();
|
|
252
|
+
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
253
|
+
}
|
|
254
|
+
}
|
|
255
|
+
Err((url,error)) => {
|
|
256
|
+
let error = ErrorEntry {
|
|
257
|
+
url,
|
|
258
|
+
error,
|
|
259
|
+
};
|
|
260
|
+
let error_js = serde_json::to_string(&error).unwrap();
|
|
261
|
+
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
262
|
+
}
|
|
263
|
+
}
|
|
168
264
|
}
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
265
|
+
Err((url, error)) => {
|
|
266
|
+
let error = ErrorEntry {
|
|
267
|
+
url,
|
|
268
|
+
error,
|
|
269
|
+
};
|
|
270
|
+
let error_js = serde_json::to_string(&error).unwrap();
|
|
271
|
+
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
272
|
+
}
|
|
273
|
+
};
|
|
176
274
|
}
|
|
177
|
-
};
|
|
275
|
+
}).await;
|
|
276
|
+
|
|
277
|
+
// Finalize output
|
|
178
278
|
|
|
179
|
-
//
|
|
279
|
+
// Replace the value inside the Mutex with a dummy value (e.g., None)
|
|
280
|
+
let mut encoder_guard = encoder.lock().unwrap();
|
|
281
|
+
let encoder = std::mem::replace(&mut *encoder_guard, GzEncoder::new(io::stdout(), Compression::default()));
|
|
282
|
+
// Finalize the encoder
|
|
180
283
|
encoder.finish().expect("Maf file output error!");
|
|
181
284
|
|
|
182
|
-
// Manually flush stdout
|
|
285
|
+
// Manually flush stdout and stderr
|
|
183
286
|
io::stdout().flush().expect("Failed to flush stdout");
|
|
184
|
-
|
|
185
|
-
// After processing all downloads, output the errors as JSON to stderr
|
|
186
|
-
let errors = errors.lock().unwrap();
|
|
187
|
-
if !errors.is_empty() {
|
|
188
|
-
let error_json = json!({
|
|
189
|
-
"errors": errors.iter().collect::<Vec<&ErrorEntry>>()
|
|
190
|
-
});
|
|
191
|
-
let mut stderr = io::stderr();
|
|
192
|
-
writeln!(stderr, "{}", error_json).expect("Failed to output stderr!");
|
|
193
|
-
io::stderr().flush().expect("Failed to flush stderr");
|
|
194
|
-
};
|
|
195
|
-
|
|
287
|
+
io::stderr().flush().expect("Failed to flush stderr");
|
|
196
288
|
Ok(())
|
|
197
289
|
}
|