@sjcrh/proteinpaint-rust 2.129.1-80343740e.0 → 2.129.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +1 -3
- package/package.json +1 -1
- package/src/cerno.rs +73 -170
- package/src/gdcGRIN2.rs +333 -160
package/Cargo.toml
CHANGED
|
@@ -13,7 +13,7 @@ kodama = "0.3"
|
|
|
13
13
|
rayon = "1.7.0"
|
|
14
14
|
bgzip = "0.3.1"
|
|
15
15
|
petgraph = "0.6.3"
|
|
16
|
-
rusqlite="0.
|
|
16
|
+
rusqlite="0.31.0"
|
|
17
17
|
ndarray = "0.16.1"
|
|
18
18
|
hdf5 = { package = "hdf5-metno", version = "0.9.0" }
|
|
19
19
|
nalgebra = {version = "0.32.2", features = ["serde-serialize"]}
|
|
@@ -35,8 +35,6 @@ flate2 = "1"
|
|
|
35
35
|
futures = "0.3"
|
|
36
36
|
num_cpus = "1.16.0"
|
|
37
37
|
memchr = "2"
|
|
38
|
-
r2d2_sqlite = "0.28.0"
|
|
39
|
-
r2d2 = "0.8.10"
|
|
40
38
|
|
|
41
39
|
[profile.release]
|
|
42
40
|
lto = "fat"
|
package/package.json
CHANGED
package/src/cerno.rs
CHANGED
|
@@ -2,20 +2,16 @@
|
|
|
2
2
|
#![allow(non_snake_case)]
|
|
3
3
|
use json::JsonValue;
|
|
4
4
|
use r_mathlib::chi_squared_cdf;
|
|
5
|
-
use r2d2;
|
|
6
|
-
use r2d2_sqlite::SqliteConnectionManager;
|
|
7
5
|
use rusqlite::{Connection, Result};
|
|
8
6
|
use serde::{Deserialize, Serialize};
|
|
9
7
|
use serde_json;
|
|
10
8
|
use std::cmp::Ordering;
|
|
11
9
|
use std::collections::HashSet;
|
|
12
10
|
use std::io;
|
|
13
|
-
use std::sync::{Arc, Mutex}; // Multithreading library
|
|
14
|
-
use std::thread;
|
|
15
11
|
|
|
16
12
|
#[allow(non_camel_case_types)]
|
|
17
13
|
#[allow(non_snake_case)]
|
|
18
|
-
#[derive(Debug
|
|
14
|
+
#[derive(Debug)]
|
|
19
15
|
struct GO_pathway {
|
|
20
16
|
GO_id: String,
|
|
21
17
|
}
|
|
@@ -25,7 +21,7 @@ struct GO_pathway {
|
|
|
25
21
|
#[derive(Debug, Clone, PartialEq, PartialOrd)]
|
|
26
22
|
struct gene_order {
|
|
27
23
|
gene_name: String,
|
|
28
|
-
fold_change:
|
|
24
|
+
fold_change: f64,
|
|
29
25
|
rank: Option<usize>,
|
|
30
26
|
}
|
|
31
27
|
|
|
@@ -35,11 +31,11 @@ struct gene_order {
|
|
|
35
31
|
//#[allow(dead_code)]
|
|
36
32
|
struct pathway_p_value {
|
|
37
33
|
pathway_name: String,
|
|
38
|
-
p_value_original:
|
|
39
|
-
p_value_adjusted: Option<
|
|
34
|
+
p_value_original: f64,
|
|
35
|
+
p_value_adjusted: Option<f64>,
|
|
40
36
|
gene_set_hits: String,
|
|
41
|
-
auc:
|
|
42
|
-
es:
|
|
37
|
+
auc: f64,
|
|
38
|
+
es: f64,
|
|
43
39
|
gene_set_size: usize,
|
|
44
40
|
}
|
|
45
41
|
|
|
@@ -48,16 +44,13 @@ struct pathway_p_value {
|
|
|
48
44
|
#[derive(Debug, Serialize, Deserialize)]
|
|
49
45
|
//#[allow(dead_code)]
|
|
50
46
|
struct output_struct {
|
|
51
|
-
pval:
|
|
52
|
-
fdr:
|
|
47
|
+
pval: f64,
|
|
48
|
+
fdr: f64,
|
|
53
49
|
leading_edge: String,
|
|
54
|
-
auc:
|
|
55
|
-
es:
|
|
50
|
+
auc: f64,
|
|
51
|
+
es: f64,
|
|
56
52
|
geneset_size: usize,
|
|
57
53
|
}
|
|
58
|
-
const PAR_CUTOFF: usize = 1000; // Cutoff for triggering multithreading processing of data
|
|
59
|
-
#[allow(non_upper_case_globals)]
|
|
60
|
-
const max_threads: usize = 3; // Max number of threads in case the parallel processing of reads is invoked
|
|
61
54
|
|
|
62
55
|
fn main() -> Result<()> {
|
|
63
56
|
let mut input = String::new();
|
|
@@ -88,17 +81,17 @@ fn main() -> Result<()> {
|
|
|
88
81
|
//println!("sample_genes:{:?}", sample_genes);
|
|
89
82
|
|
|
90
83
|
let fold_change_input: &JsonValue = &json_string["fold_change"];
|
|
91
|
-
let mut
|
|
84
|
+
let mut fold_change_f64 = Vec::<f64>::new();
|
|
92
85
|
for iter in 0..fold_change_input.len() {
|
|
93
|
-
let item = fold_change_input[iter].
|
|
94
|
-
|
|
86
|
+
let item = fold_change_input[iter].as_f64().unwrap();
|
|
87
|
+
fold_change_f64.push(item);
|
|
95
88
|
}
|
|
96
89
|
|
|
97
90
|
if sample_genes.len() == 0 {
|
|
98
91
|
panic!("No sample genes provided");
|
|
99
92
|
}
|
|
100
93
|
|
|
101
|
-
if sample_genes.len() !=
|
|
94
|
+
if sample_genes.len() != fold_change_f64.len() {
|
|
102
95
|
panic!("Length of genes array and fold change array are not equal");
|
|
103
96
|
}
|
|
104
97
|
|
|
@@ -106,7 +99,7 @@ fn main() -> Result<()> {
|
|
|
106
99
|
for i in 0..sample_genes.len() {
|
|
107
100
|
let item: gene_order = gene_order {
|
|
108
101
|
gene_name: sample_genes[i].to_string(),
|
|
109
|
-
fold_change:
|
|
102
|
+
fold_change: fold_change_f64[i],
|
|
110
103
|
rank: None, // Will be calculated later
|
|
111
104
|
};
|
|
112
105
|
genes_vector.push(item)
|
|
@@ -161,7 +154,7 @@ fn main() -> Result<()> {
|
|
|
161
154
|
//println!("sample_genes:{:?}", sample_genes);
|
|
162
155
|
//println!("background_genes:{:?}", background_genes);
|
|
163
156
|
|
|
164
|
-
let msigdbconn = Connection::open(
|
|
157
|
+
let msigdbconn = Connection::open(msigdb)?;
|
|
165
158
|
let stmt_result = msigdbconn
|
|
166
159
|
.prepare(&("select id from terms where parent_id='".to_owned() + &genesetgroup + "'"));
|
|
167
160
|
match stmt_result {
|
|
@@ -169,148 +162,58 @@ fn main() -> Result<()> {
|
|
|
169
162
|
#[allow(non_snake_case)]
|
|
170
163
|
let GO_iter = stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
|
|
171
164
|
#[allow(non_snake_case)]
|
|
172
|
-
let mut genesets = Vec::<String>::new();
|
|
173
165
|
for GO_term in GO_iter {
|
|
174
166
|
match GO_term {
|
|
175
167
|
Ok(n) => {
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
println!("
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
let mut rows = gene_stmt.query([])?;
|
|
193
|
-
let mut names = HashSet::<String>::new();
|
|
194
|
-
while let Some(row) = rows.next()? {
|
|
195
|
-
let a: String = row.get(0)?;
|
|
196
|
-
let input_gene_json = json::parse(&a);
|
|
197
|
-
match input_gene_json {
|
|
198
|
-
Ok(json_genes) => {
|
|
199
|
-
for json_iter in 0..json_genes.len() {
|
|
200
|
-
names.insert(json_genes[json_iter]["symbol"].to_string());
|
|
201
|
-
}
|
|
202
|
-
}
|
|
203
|
-
Err(_) => {
|
|
204
|
-
panic!("Symbol, ensg, enstCanonical structure is missing!")
|
|
205
|
-
}
|
|
206
|
-
}
|
|
207
|
-
}
|
|
208
|
-
let gene_set_size = names.len();
|
|
209
|
-
let (p_value, auc, es, matches, gene_set_hits) = cerno(&sample_coding_genes, names);
|
|
210
|
-
|
|
211
|
-
if matches >= 1.0
|
|
212
|
-
&& p_value.is_nan() == false
|
|
213
|
-
&& es.is_nan() == false
|
|
214
|
-
&& es != f32::INFINITY
|
|
215
|
-
&& auc != f32::INFINITY
|
|
216
|
-
&& auc.is_nan() == false
|
|
217
|
-
{
|
|
218
|
-
pathway_p_values.push(pathway_p_value {
|
|
219
|
-
pathway_name: gs,
|
|
220
|
-
p_value_original: p_value,
|
|
221
|
-
p_value_adjusted: None,
|
|
222
|
-
auc: auc,
|
|
223
|
-
es: es,
|
|
224
|
-
gene_set_hits: gene_set_hits,
|
|
225
|
-
gene_set_size: gene_set_size,
|
|
226
|
-
})
|
|
227
|
-
}
|
|
228
|
-
}
|
|
229
|
-
} else {
|
|
230
|
-
// Multithreaded implementation
|
|
231
|
-
let manager = SqliteConnectionManager::file(&msigdb); // This enables sqlite query from multiple threads simultaneously
|
|
232
|
-
let pool = r2d2::Pool::new(manager).unwrap(); // This enables sqlite query from multiple threads simultaneously
|
|
233
|
-
let genesets = Arc::new(genesets);
|
|
234
|
-
let pool_arc = Arc::new(pool);
|
|
235
|
-
let sample_coding_genes = Arc::new(sample_coding_genes);
|
|
236
|
-
let pathway_p_values_temp =
|
|
237
|
-
Arc::new(Mutex::new(Vec::<pathway_p_value>::with_capacity(genesets.len())));
|
|
238
|
-
let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
|
|
239
|
-
for thread_num in 0..max_threads {
|
|
240
|
-
let genesets = Arc::clone(&genesets);
|
|
241
|
-
let pool_arc = Arc::clone(&pool_arc);
|
|
242
|
-
let sample_coding_genes = Arc::clone(&sample_coding_genes);
|
|
243
|
-
let pathway_p_values_temp = Arc::clone(&pathway_p_values_temp);
|
|
244
|
-
let handle = thread::spawn(move || {
|
|
245
|
-
let mut pathway_p_values_thread: Vec<pathway_p_value> =
|
|
246
|
-
Vec::with_capacity(10000);
|
|
247
|
-
for iter in 0..genesets.len() {
|
|
248
|
-
let remainder: usize = iter % max_threads;
|
|
249
|
-
if remainder == thread_num {
|
|
250
|
-
let sql_statement = "select genes from term2genes where id='"
|
|
251
|
-
.to_owned()
|
|
252
|
-
+ &genesets[iter]
|
|
253
|
-
+ &"'";
|
|
254
|
-
//println!("sql_statement:{}", sql_statement);
|
|
255
|
-
let conn = pool_arc.get().unwrap();
|
|
256
|
-
let mut gene_stmt = conn.prepare(&sql_statement).unwrap();
|
|
257
|
-
//println!("gene_stmt:{:?}", gene_stmt);
|
|
258
|
-
|
|
259
|
-
let mut rows = gene_stmt.query([]).unwrap();
|
|
260
|
-
let mut names = HashSet::<String>::new();
|
|
261
|
-
while let Some(row) = rows.next().unwrap() {
|
|
262
|
-
let a: String = row.get(0).unwrap();
|
|
263
|
-
let input_gene_json = json::parse(&a);
|
|
264
|
-
match input_gene_json {
|
|
265
|
-
Ok(json_genes) => {
|
|
266
|
-
for json_iter in 0..json_genes.len() {
|
|
267
|
-
names.insert(
|
|
268
|
-
json_genes[json_iter]["symbol"].to_string(),
|
|
269
|
-
);
|
|
270
|
-
}
|
|
271
|
-
}
|
|
272
|
-
Err(_) => {
|
|
273
|
-
panic!("Symbol, ensg, enstCanonical structure is missing!")
|
|
274
|
-
}
|
|
168
|
+
//println!("GO term {:?}", n);
|
|
169
|
+
let sql_statement =
|
|
170
|
+
"select genes from term2genes where id='".to_owned() + &n.GO_id + &"'";
|
|
171
|
+
//println!("sql_statement:{}", sql_statement);
|
|
172
|
+
let mut gene_stmt = msigdbconn.prepare(&(sql_statement))?;
|
|
173
|
+
//println!("gene_stmt:{:?}", gene_stmt);
|
|
174
|
+
|
|
175
|
+
let mut rows = gene_stmt.query([])?;
|
|
176
|
+
let mut names = HashSet::<String>::new();
|
|
177
|
+
while let Some(row) = rows.next()? {
|
|
178
|
+
let a: String = row.get(0)?;
|
|
179
|
+
let input_gene_json = json::parse(&a);
|
|
180
|
+
match input_gene_json {
|
|
181
|
+
Ok(json_genes) => {
|
|
182
|
+
for json_iter in 0..json_genes.len() {
|
|
183
|
+
names.insert(json_genes[json_iter]["symbol"].to_string());
|
|
275
184
|
}
|
|
276
185
|
}
|
|
277
|
-
|
|
278
|
-
|
|
279
|
-
cerno(&sample_coding_genes, names);
|
|
280
|
-
|
|
281
|
-
if matches >= 1.0
|
|
282
|
-
&& p_value.is_nan() == false
|
|
283
|
-
&& es.is_nan() == false
|
|
284
|
-
&& es != f32::INFINITY
|
|
285
|
-
&& auc != f32::INFINITY
|
|
286
|
-
&& auc.is_nan() == false
|
|
287
|
-
{
|
|
288
|
-
pathway_p_values_thread.push(pathway_p_value {
|
|
289
|
-
pathway_name: genesets[iter].clone(),
|
|
290
|
-
p_value_original: p_value,
|
|
291
|
-
p_value_adjusted: None,
|
|
292
|
-
auc: auc,
|
|
293
|
-
es: es,
|
|
294
|
-
gene_set_hits: gene_set_hits,
|
|
295
|
-
gene_set_size: gene_set_size,
|
|
296
|
-
})
|
|
186
|
+
Err(_) => {
|
|
187
|
+
panic!("Symbol, ensg, enstCanonical structure is missing!")
|
|
297
188
|
}
|
|
298
189
|
}
|
|
299
190
|
}
|
|
300
|
-
|
|
301
|
-
|
|
302
|
-
|
|
303
|
-
|
|
304
|
-
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
191
|
+
let gene_set_size = names.len();
|
|
192
|
+
let (p_value, auc, es, matches, gene_set_hits) =
|
|
193
|
+
cerno(&sample_coding_genes, names);
|
|
194
|
+
|
|
195
|
+
if matches >= 1.0
|
|
196
|
+
&& p_value.is_nan() == false
|
|
197
|
+
&& es.is_nan() == false
|
|
198
|
+
&& es != f64::INFINITY
|
|
199
|
+
&& auc != f64::INFINITY
|
|
200
|
+
&& auc.is_nan() == false
|
|
201
|
+
{
|
|
202
|
+
pathway_p_values.push(pathway_p_value {
|
|
203
|
+
pathway_name: n.GO_id,
|
|
204
|
+
p_value_original: p_value,
|
|
205
|
+
p_value_adjusted: None,
|
|
206
|
+
auc: auc,
|
|
207
|
+
es: es,
|
|
208
|
+
gene_set_hits: gene_set_hits,
|
|
209
|
+
gene_set_size: gene_set_size,
|
|
210
|
+
})
|
|
211
|
+
}
|
|
212
|
+
}
|
|
213
|
+
Err(_) => {
|
|
214
|
+
println!("GO term not found!")
|
|
215
|
+
}
|
|
311
216
|
}
|
|
312
|
-
// Combining data from all different threads
|
|
313
|
-
pathway_p_values.append(&mut *pathway_p_values_temp.lock().unwrap());
|
|
314
217
|
}
|
|
315
218
|
}
|
|
316
219
|
Err(_) => panic!("sqlite database file not found"),
|
|
@@ -326,15 +229,15 @@ fn main() -> Result<()> {
|
|
|
326
229
|
Ok(())
|
|
327
230
|
}
|
|
328
231
|
|
|
329
|
-
fn cerno(sample_coding_genes: &Vec<gene_order>, genes_in_pathway: HashSet<String>) -> (
|
|
232
|
+
fn cerno(sample_coding_genes: &Vec<gene_order>, genes_in_pathway: HashSet<String>) -> (f64, f64, f64, f64, String) {
|
|
330
233
|
// Filter the sample_coding_genes vector to only include those whose gene_names are in the HashSet genes_in_pathway
|
|
331
234
|
let gene_intersections: Vec<&gene_order> = sample_coding_genes
|
|
332
235
|
.iter()
|
|
333
236
|
.filter(|sample_coding_genes| genes_in_pathway.contains(&sample_coding_genes.gene_name)) // Check if name is in the HashSet genes_in_pathway
|
|
334
237
|
.collect(); // Collect the results into a new vector
|
|
335
238
|
|
|
336
|
-
let N1 = gene_intersections.len() as
|
|
337
|
-
let N = sample_coding_genes.len() as
|
|
239
|
+
let N1 = gene_intersections.len() as f64;
|
|
240
|
+
let N = sample_coding_genes.len() as f64;
|
|
338
241
|
let mut gene_set_hits: String = "".to_string();
|
|
339
242
|
for gene in &gene_intersections {
|
|
340
243
|
gene_set_hits += &(gene.gene_name.to_string() + &",");
|
|
@@ -349,21 +252,21 @@ fn cerno(sample_coding_genes: &Vec<gene_order>, genes_in_pathway: HashSet<String
|
|
|
349
252
|
.map(|x| x.rank.unwrap())
|
|
350
253
|
.collect::<Vec<usize>>();
|
|
351
254
|
|
|
352
|
-
let cerno:
|
|
255
|
+
let cerno: f64 = ranks // -2 * sum( log(ranks/N) )
|
|
353
256
|
.iter()
|
|
354
|
-
.map(|x| ((*x as
|
|
355
|
-
.collect::<Vec<
|
|
257
|
+
.map(|x| ((*x as f64) / N).ln())
|
|
258
|
+
.collect::<Vec<f64>>()
|
|
356
259
|
.iter()
|
|
357
|
-
.sum::<
|
|
260
|
+
.sum::<f64>()
|
|
358
261
|
* (-2.0);
|
|
359
262
|
|
|
360
|
-
let cES:
|
|
263
|
+
let cES: f64 = cerno / (2.0 * (N1 as f64)); // cES <- cerno/(2*N1)
|
|
361
264
|
let N2 = N - N1; // N2 = N - N1
|
|
362
|
-
let R1 = ranks.iter().sum::<usize>() as
|
|
265
|
+
let R1 = ranks.iter().sum::<usize>() as f64; // R1 <- sum(ranks)
|
|
363
266
|
let U = N1 * N2 + N1 * (N1 + 1.0) / 2.0 - R1; // U <- N1*N2+N1*(N1+1)/2-R1
|
|
364
267
|
let AUC = U / (N1 * N2); // AUC <- U/(N1*N2)
|
|
365
|
-
let p_value = chi_squared_cdf(cerno
|
|
366
|
-
(p_value
|
|
268
|
+
let p_value = chi_squared_cdf(cerno, 2.0 * N1, false, false); // pchisq(ret$cerno, 2*N1, lower.tail=FALSE)
|
|
269
|
+
(p_value, AUC, cES, N1, gene_set_hits)
|
|
367
270
|
}
|
|
368
271
|
|
|
369
272
|
fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>) -> String {
|
|
@@ -375,13 +278,13 @@ fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>) -> String {
|
|
|
375
278
|
});
|
|
376
279
|
|
|
377
280
|
let mut adjusted_p_values: Vec<pathway_p_value> = Vec::with_capacity(original_p_values.len());
|
|
378
|
-
let mut old_p_value:
|
|
379
|
-
let mut rank:
|
|
281
|
+
let mut old_p_value: f64 = 0.0;
|
|
282
|
+
let mut rank: f64 = original_p_values.len() as f64;
|
|
380
283
|
for j in 0..original_p_values.len() {
|
|
381
284
|
let i = original_p_values.len() - j - 1;
|
|
382
285
|
|
|
383
286
|
//println!("p_val:{}", p_val);
|
|
384
|
-
let mut adjusted_p_val:
|
|
287
|
+
let mut adjusted_p_val: f64 = original_p_values[i].p_value_original * (original_p_values.len() as f64 / rank); // adjusted p-value = original_p_value * (N/rank)
|
|
385
288
|
if adjusted_p_val > 1.0 {
|
|
386
289
|
// p_value should NEVER be greater than 1
|
|
387
290
|
adjusted_p_val = 1.0;
|
package/src/gdcGRIN2.rs
CHANGED
|
@@ -1,19 +1,58 @@
|
|
|
1
|
+
/*
|
|
2
|
+
This script downloads cohort maf files from GDC and gracefully handles timeout and other possible errors related to GDC api processing for use by the client file summary div
|
|
3
|
+
|
|
4
|
+
Key improvements:
|
|
5
|
+
1. Graceful error handling - individual file failures don't stop the entire process
|
|
6
|
+
2. Better timeout handling with retries
|
|
7
|
+
3. More detailed error reporting
|
|
8
|
+
4. Continues processing even when some files fail
|
|
9
|
+
|
|
10
|
+
Input JSON:
|
|
11
|
+
caseFiles
|
|
12
|
+
mafOptions: For SNVindel filtering
|
|
13
|
+
Output mutations as JSON array.
|
|
14
|
+
|
|
15
|
+
Example of usage:
|
|
16
|
+
echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}},"mafOptions": {"minTotalDepth": 10,"minAltAlleleCount": 2}}' | ./target/release/gdcGRIN2
|
|
17
|
+
*/
|
|
18
|
+
|
|
1
19
|
use flate2::read::GzDecoder;
|
|
2
20
|
use futures::StreamExt;
|
|
3
21
|
use memchr::memchr;
|
|
4
22
|
use serde::Deserialize;
|
|
5
23
|
use serde_json;
|
|
6
24
|
use std::collections::HashMap;
|
|
7
|
-
use std::io::{self, Read
|
|
25
|
+
use std::io::{self, Read};
|
|
26
|
+
use std::sync::Arc;
|
|
27
|
+
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
8
28
|
use std::time::Duration;
|
|
9
29
|
use tokio::io::{AsyncReadExt, BufReader};
|
|
30
|
+
use tokio::sync::Mutex;
|
|
10
31
|
use tokio::time::timeout;
|
|
11
32
|
|
|
12
|
-
// Struct to hold error information
|
|
13
|
-
#[derive(serde::Serialize)]
|
|
33
|
+
// Struct to hold error information for JSON output
|
|
34
|
+
#[derive(serde::Serialize, Clone)]
|
|
14
35
|
struct ErrorEntry {
|
|
15
|
-
|
|
16
|
-
|
|
36
|
+
case_id: String,
|
|
37
|
+
data_type: String,
|
|
38
|
+
error_type: String,
|
|
39
|
+
error_details: String,
|
|
40
|
+
attempts_made: u32,
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
// Struct for the final output that includes both successful data and errors
|
|
44
|
+
#[derive(serde::Serialize)]
|
|
45
|
+
struct GdcOutput {
|
|
46
|
+
successful_data: Vec<Vec<Vec<String>>>, // Array of successful file data arrays
|
|
47
|
+
failed_files: Vec<ErrorEntry>,
|
|
48
|
+
summary: OutputSummary,
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
#[derive(serde::Serialize)]
|
|
52
|
+
struct OutputSummary {
|
|
53
|
+
total_files: usize,
|
|
54
|
+
successful_files: usize,
|
|
55
|
+
failed_files: usize,
|
|
17
56
|
}
|
|
18
57
|
|
|
19
58
|
// Define the structure for datadd
|
|
@@ -23,32 +62,52 @@ struct DataType {
|
|
|
23
62
|
maf: Option<String>,
|
|
24
63
|
}
|
|
25
64
|
|
|
65
|
+
// Define the structure for mafOptions
|
|
66
|
+
#[derive(Deserialize, Debug)]
|
|
67
|
+
struct MafOptions {
|
|
68
|
+
#[serde(rename = "minTotalDepth")]
|
|
69
|
+
min_total_depth: i32,
|
|
70
|
+
#[serde(rename = "minAltAlleleCount")]
|
|
71
|
+
min_alt_allele_count: i32,
|
|
72
|
+
}
|
|
73
|
+
|
|
74
|
+
// Define the top-level input structure
|
|
75
|
+
#[derive(Deserialize, Debug)]
|
|
76
|
+
struct InputData {
|
|
77
|
+
#[serde(rename = "caseFiles")]
|
|
78
|
+
case_files: HashMap<String, DataType>,
|
|
79
|
+
#[serde(rename = "mafOptions")]
|
|
80
|
+
maf_options: Option<MafOptions>,
|
|
81
|
+
}
|
|
82
|
+
|
|
26
83
|
// Function to parse TSV content
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
84
|
+
fn parse_content(
|
|
85
|
+
content: &str,
|
|
86
|
+
case_id: &str,
|
|
87
|
+
data_type: &str,
|
|
88
|
+
min_total_depth: i32,
|
|
89
|
+
min_alt_allele_count: i32,
|
|
90
|
+
) -> Result<Vec<Vec<String>>, (String, String, String)> {
|
|
33
91
|
let lines = content.lines();
|
|
34
|
-
|
|
35
|
-
let mut parsed_data: String = String::new();
|
|
92
|
+
let mut parsed_data = Vec::new();
|
|
36
93
|
let mut columns_indices: Vec<usize> = Vec::new();
|
|
37
94
|
let mut header_mk: &str = "";
|
|
38
|
-
let mut columns = Vec::new();
|
|
95
|
+
let mut columns = Vec::new();
|
|
96
|
+
|
|
39
97
|
if data_type == "cnv" {
|
|
40
98
|
header_mk = "GDC_Aliquot_ID";
|
|
41
99
|
columns = vec!["Chromosome", "Start", "End", "Segment_Mean"]
|
|
42
100
|
} else if data_type == "maf" {
|
|
43
101
|
header_mk = "Hugo_Symbol";
|
|
44
|
-
columns = vec!["Chromosome", "Start_Position", "End_Position"]
|
|
102
|
+
columns = vec!["Chromosome", "Start_Position", "End_Position", "t_depth", "t_alt_count"]
|
|
45
103
|
};
|
|
46
|
-
|
|
104
|
+
|
|
105
|
+
let mut header: Vec<String> = Vec::new();
|
|
106
|
+
|
|
47
107
|
for line in lines {
|
|
48
108
|
if line.starts_with("#") {
|
|
49
109
|
continue;
|
|
50
110
|
} else if line.contains(&header_mk) {
|
|
51
|
-
// header line
|
|
52
111
|
header = line.split("\t").map(|s| s.to_string()).collect();
|
|
53
112
|
for col in &columns {
|
|
54
113
|
match header.iter().position(|x| x == col) {
|
|
@@ -65,12 +124,12 @@ fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String
|
|
|
65
124
|
let mut keep_ck: bool = true;
|
|
66
125
|
let cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
|
|
67
126
|
let mut out_lst: Vec<String> = Vec::new();
|
|
68
|
-
// add sample ID first
|
|
69
127
|
out_lst.push(case_id.to_string());
|
|
128
|
+
|
|
70
129
|
for x in columns_indices.iter() {
|
|
71
130
|
let mut element = cont_lst[*x].to_string();
|
|
131
|
+
|
|
72
132
|
if data_type == "cnv" && &header[*x] == "Segment_Mean" {
|
|
73
|
-
// convert to f32 (segment_mean)
|
|
74
133
|
let seg_mean = match element.parse::<f32>() {
|
|
75
134
|
Ok(val) => val,
|
|
76
135
|
Err(_e) => {
|
|
@@ -78,9 +137,9 @@ fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String
|
|
|
78
137
|
return Err((case_id.to_string(), data_type.to_string(), error_msg));
|
|
79
138
|
}
|
|
80
139
|
};
|
|
81
|
-
if seg_mean >= 0.
|
|
140
|
+
if seg_mean >= 0.3 {
|
|
82
141
|
element = "gain".to_string();
|
|
83
|
-
} else if seg_mean <= -0.
|
|
142
|
+
} else if seg_mean <= -0.4 {
|
|
84
143
|
element = "loss".to_string();
|
|
85
144
|
} else {
|
|
86
145
|
keep_ck = false;
|
|
@@ -88,16 +147,37 @@ fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String
|
|
|
88
147
|
}
|
|
89
148
|
out_lst.push(element);
|
|
90
149
|
}
|
|
91
|
-
|
|
150
|
+
|
|
92
151
|
if data_type == "maf" {
|
|
93
|
-
out_lst.
|
|
152
|
+
let alle_depth = match out_lst[4].parse::<i32>() {
|
|
153
|
+
Ok(value) => value,
|
|
154
|
+
Err(_) => {
|
|
155
|
+
let error_msg = "Failed to convert t_depth to i32.".to_string();
|
|
156
|
+
return Err((case_id.to_string(), data_type.to_string(), error_msg));
|
|
157
|
+
}
|
|
158
|
+
};
|
|
159
|
+
let alt_count = match out_lst[5].parse::<i32>() {
|
|
160
|
+
Ok(value) => value,
|
|
161
|
+
Err(_) => {
|
|
162
|
+
let error_msg = "Failed to convert t_alt_count to i32.".to_string();
|
|
163
|
+
return Err((case_id.to_string(), data_type.to_string(), error_msg));
|
|
164
|
+
}
|
|
165
|
+
};
|
|
166
|
+
|
|
167
|
+
if alle_depth >= min_total_depth && alt_count >= min_alt_allele_count {
|
|
168
|
+
out_lst = out_lst[0..4].to_vec();
|
|
169
|
+
out_lst.push("mutation".to_string());
|
|
170
|
+
} else {
|
|
171
|
+
keep_ck = false;
|
|
172
|
+
}
|
|
94
173
|
}
|
|
174
|
+
|
|
95
175
|
if keep_ck {
|
|
96
|
-
parsed_data.
|
|
97
|
-
parsed_data.push_str("\n");
|
|
176
|
+
parsed_data.push(out_lst);
|
|
98
177
|
}
|
|
99
178
|
}
|
|
100
179
|
}
|
|
180
|
+
|
|
101
181
|
if columns_indices.is_empty() {
|
|
102
182
|
return Err((
|
|
103
183
|
case_id.to_string(),
|
|
@@ -105,14 +185,121 @@ fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String
|
|
|
105
185
|
"No matching columns found. Problematic file!".to_string(),
|
|
106
186
|
));
|
|
107
187
|
};
|
|
188
|
+
|
|
108
189
|
Ok(parsed_data)
|
|
109
190
|
}
|
|
110
191
|
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
192
|
+
/// Downloads a single file with minimal retry logic for transient failures
|
|
193
|
+
async fn download_single_file(
|
|
194
|
+
case_id: String,
|
|
195
|
+
data_type: String,
|
|
196
|
+
url: String,
|
|
197
|
+
max_attempts: u32,
|
|
198
|
+
) -> Result<(String, String, String), (String, String, String, u32)> {
|
|
199
|
+
let mut last_error = String::new();
|
|
200
|
+
let mut error_type = String::new();
|
|
201
|
+
|
|
202
|
+
for attempt in 0..max_attempts {
|
|
203
|
+
// Build HTTP client with aggressive timeouts for real-time processing
|
|
204
|
+
let client = match reqwest::Client::builder()
|
|
205
|
+
.timeout(Duration::from_secs(10)) // 10 second timeout per request
|
|
206
|
+
.connect_timeout(Duration::from_secs(3)) // 3 second connect timeout
|
|
207
|
+
.build()
|
|
208
|
+
{
|
|
209
|
+
Ok(client) => client,
|
|
210
|
+
Err(e) => {
|
|
211
|
+
last_error = format!("Client build error: {}", e);
|
|
212
|
+
error_type = "client_build_error".to_string();
|
|
213
|
+
continue;
|
|
214
|
+
}
|
|
215
|
+
};
|
|
216
|
+
|
|
217
|
+
// Attempt download with tight timeout - fail fast if server is slow
|
|
218
|
+
match timeout(Duration::from_secs(12), client.get(&url).send()).await {
|
|
219
|
+
Ok(Ok(resp)) if resp.status().is_success() => {
|
|
220
|
+
match resp.bytes().await {
|
|
221
|
+
Ok(content) => {
|
|
222
|
+
// Handle both compressed and uncompressed content
|
|
223
|
+
let text = if memchr(0x00, &content).is_some() {
|
|
224
|
+
// Likely compressed (gzipped) content
|
|
225
|
+
let mut decoder = GzDecoder::new(&content[..]);
|
|
226
|
+
let mut decompressed_content = Vec::new();
|
|
227
|
+
match decoder.read_to_end(&mut decompressed_content) {
|
|
228
|
+
Ok(_) => String::from_utf8_lossy(&decompressed_content).to_string(),
|
|
229
|
+
Err(e) => {
|
|
230
|
+
last_error = format!("Decompression failed: {}", e);
|
|
231
|
+
error_type = "decompression_error".to_string();
|
|
232
|
+
continue; // Retry on decompression failure
|
|
233
|
+
}
|
|
234
|
+
}
|
|
235
|
+
} else {
|
|
236
|
+
// Plain text content
|
|
237
|
+
String::from_utf8_lossy(&content).to_string()
|
|
238
|
+
};
|
|
239
|
+
|
|
240
|
+
// Success! Return immediately
|
|
241
|
+
return Ok((case_id, data_type, text));
|
|
242
|
+
}
|
|
243
|
+
Err(e) => {
|
|
244
|
+
last_error = format!("Failed to read response bytes: {}", e);
|
|
245
|
+
error_type = "connection_error".to_string();
|
|
246
|
+
// This could be "connection closed before message completed"
|
|
247
|
+
// Worth retrying for transient network issues
|
|
248
|
+
}
|
|
249
|
+
}
|
|
250
|
+
}
|
|
251
|
+
Ok(Ok(resp)) => {
|
|
252
|
+
last_error = format!(
|
|
253
|
+
"HTTP error {}: {}",
|
|
254
|
+
resp.status(),
|
|
255
|
+
resp.status().canonical_reason().unwrap_or("Unknown")
|
|
256
|
+
);
|
|
257
|
+
error_type = if resp.status().is_client_error() {
|
|
258
|
+
"client_error".to_string()
|
|
259
|
+
} else {
|
|
260
|
+
"server_error".to_string()
|
|
261
|
+
};
|
|
262
|
+
// Don't retry 4xx errors (client errors), but retry 5xx (server errors)
|
|
263
|
+
if resp.status().is_client_error() {
|
|
264
|
+
break; // No point retrying client errors
|
|
265
|
+
}
|
|
266
|
+
}
|
|
267
|
+
Ok(Err(e)) => {
|
|
268
|
+
last_error = format!("Request error: {}", e);
|
|
269
|
+
error_type = "network_error".to_string();
|
|
270
|
+
// Network errors are worth retrying
|
|
271
|
+
}
|
|
272
|
+
Err(_) => {
|
|
273
|
+
last_error = "Request timeout (12s) - server too slow".to_string();
|
|
274
|
+
error_type = "timeout_error".to_string();
|
|
275
|
+
// Timeouts might be transient, worth a quick retry
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
// If this isn't the last attempt, wait briefly before retrying
|
|
280
|
+
if attempt < max_attempts - 1 {
|
|
281
|
+
// Silent retry - no stderr noise
|
|
282
|
+
tokio::time::sleep(Duration::from_secs(1)).await; // 1 second between retries
|
|
283
|
+
}
|
|
284
|
+
}
|
|
285
|
+
|
|
286
|
+
Err((
|
|
287
|
+
case_id,
|
|
288
|
+
data_type,
|
|
289
|
+
format!("{}: {}", error_type, last_error),
|
|
290
|
+
max_attempts,
|
|
291
|
+
))
|
|
292
|
+
}
|
|
293
|
+
|
|
294
|
+
/// Main download function with structured JSON output including errors
|
|
295
|
+
async fn download_data(
|
|
296
|
+
data4dl: HashMap<String, DataType>,
|
|
297
|
+
host: &str,
|
|
298
|
+
min_total_depth: i32,
|
|
299
|
+
min_alt_allele_count: i32,
|
|
300
|
+
) {
|
|
114
301
|
// Generate URLs from data4dl, handling optional cnv and maf
|
|
115
|
-
let data_urls = data4dl
|
|
302
|
+
let data_urls: Vec<(String, String, String)> = data4dl
|
|
116
303
|
.into_iter()
|
|
117
304
|
.flat_map(|(case_id, data_types)| {
|
|
118
305
|
let mut urls = Vec::new();
|
|
@@ -124,172 +311,158 @@ async fn download_data(data4dl: HashMap<String, DataType>, host: &str) -> () {
|
|
|
124
311
|
}
|
|
125
312
|
urls
|
|
126
313
|
})
|
|
127
|
-
.collect
|
|
314
|
+
.collect();
|
|
315
|
+
|
|
316
|
+
let total_files = data_urls.len();
|
|
317
|
+
|
|
318
|
+
// Use atomic counters that can be safely shared across async closures
|
|
319
|
+
let successful_downloads = Arc::new(AtomicUsize::new(0));
|
|
320
|
+
let failed_downloads = Arc::new(AtomicUsize::new(0));
|
|
321
|
+
|
|
322
|
+
// Create shared vectors to collect successful data and errors
|
|
323
|
+
let successful_data = Arc::new(Mutex::new(Vec::<Vec<Vec<String>>>::new()));
|
|
324
|
+
let errors = Arc::new(Mutex::new(Vec::<ErrorEntry>::new()));
|
|
325
|
+
|
|
326
|
+
// Create download futures with smart retry logic
|
|
128
327
|
let download_futures = futures::stream::iter(data_urls.into_iter().map(|(case_id, data_type, url)| {
|
|
129
328
|
async move {
|
|
130
|
-
//
|
|
131
|
-
|
|
132
|
-
let client = reqwest::Client::builder()
|
|
133
|
-
.timeout(Duration::from_secs(60)) // 60-second timeout per request
|
|
134
|
-
.connect_timeout(Duration::from_secs(30))
|
|
135
|
-
.build()
|
|
136
|
-
.map_err(|_e| "Client build error".to_string());
|
|
137
|
-
// Handle client creation result
|
|
138
|
-
match client {
|
|
139
|
-
Ok(client) => {
|
|
140
|
-
match client.get(&url).send().await {
|
|
141
|
-
Ok(resp) if resp.status().is_success() => {
|
|
142
|
-
match resp.bytes().await {
|
|
143
|
-
Ok(content) => {
|
|
144
|
-
// if data_type == "cnv" {
|
|
145
|
-
if !memchr(0x00, &content).is_some() {
|
|
146
|
-
// CNV files are plain text
|
|
147
|
-
let text = String::from_utf8_lossy(&content).to_string();
|
|
148
|
-
Ok((case_id.clone(), data_type.clone(), text))
|
|
149
|
-
} else {
|
|
150
|
-
let mut decoder = GzDecoder::new(&content[..]);
|
|
151
|
-
let mut decompressed_content = Vec::new();
|
|
152
|
-
match decoder.read_to_end(&mut decompressed_content) {
|
|
153
|
-
Ok(_) => {
|
|
154
|
-
let text = String::from_utf8_lossy(&decompressed_content).to_string();
|
|
155
|
-
Ok((case_id.clone(), data_type.clone(), text))
|
|
156
|
-
}
|
|
157
|
-
Err(e) => {
|
|
158
|
-
let error_msg = format!(
|
|
159
|
-
"Failed to decompress {} file for {}: {}",
|
|
160
|
-
data_type, case_id, e
|
|
161
|
-
);
|
|
162
|
-
Err((case_id.clone(), data_type.clone(), error_msg))
|
|
163
|
-
}
|
|
164
|
-
}
|
|
165
|
-
}
|
|
166
|
-
}
|
|
167
|
-
Err(e) => {
|
|
168
|
-
let error_msg =
|
|
169
|
-
format!("Failed to read bytes for {} file for {}: {}", data_type, case_id, e);
|
|
170
|
-
Err((case_id.clone(), data_type.clone(), error_msg))
|
|
171
|
-
}
|
|
172
|
-
}
|
|
173
|
-
}
|
|
174
|
-
Ok(resp) => {
|
|
175
|
-
let error_msg =
|
|
176
|
-
format!("HTTP error for {} file for {}: {}", data_type, case_id, resp.status());
|
|
177
|
-
Err((case_id.clone(), data_type.clone(), error_msg))
|
|
178
|
-
}
|
|
179
|
-
Err(e) => {
|
|
180
|
-
let error_msg =
|
|
181
|
-
format!("Server request failed for {} file for {}: {}", data_type, case_id, e);
|
|
182
|
-
Err((case_id.clone(), data_type.clone(), error_msg))
|
|
183
|
-
}
|
|
184
|
-
}
|
|
185
|
-
}
|
|
186
|
-
Err(_e) => {
|
|
187
|
-
let error_msg = "Client build error".to_string();
|
|
188
|
-
Err((case_id, data_type, error_msg))
|
|
189
|
-
}
|
|
190
|
-
}
|
|
329
|
+
// Try each file up to 2 times for transient failures
|
|
330
|
+
download_single_file(case_id, data_type, url, 2).await
|
|
191
331
|
}
|
|
192
332
|
}));
|
|
193
333
|
|
|
194
|
-
// Execute downloads concurrently
|
|
334
|
+
// Execute downloads concurrently with high concurrency for speed
|
|
195
335
|
download_futures
|
|
196
|
-
.buffer_unordered(
|
|
197
|
-
.for_each(|
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
336
|
+
.buffer_unordered(15) // Increased to 15 concurrent downloads for speed
|
|
337
|
+
.for_each(|download_result| {
|
|
338
|
+
let successful_downloads = Arc::clone(&successful_downloads);
|
|
339
|
+
let failed_downloads = Arc::clone(&failed_downloads);
|
|
340
|
+
let successful_data = Arc::clone(&successful_data);
|
|
341
|
+
let errors = Arc::clone(&errors);
|
|
342
|
+
|
|
343
|
+
async move {
|
|
344
|
+
match download_result {
|
|
345
|
+
Ok((case_id, data_type, content)) => {
|
|
346
|
+
// Successfully downloaded, now try to parse
|
|
347
|
+
match parse_content(&content, &case_id, &data_type, min_total_depth, min_alt_allele_count) {
|
|
348
|
+
Ok(parsed_data) => {
|
|
349
|
+
// Store successful data
|
|
350
|
+
successful_data.lock().await.push(parsed_data);
|
|
351
|
+
successful_downloads.fetch_add(1, Ordering::Relaxed);
|
|
352
|
+
}
|
|
353
|
+
Err((cid, dtp, error)) => {
|
|
354
|
+
failed_downloads.fetch_add(1, Ordering::Relaxed);
|
|
355
|
+
let error = ErrorEntry {
|
|
356
|
+
case_id: cid,
|
|
357
|
+
data_type: dtp,
|
|
358
|
+
error_type: "parsing_error".to_string(),
|
|
359
|
+
error_details: error,
|
|
360
|
+
attempts_made: 1,
|
|
361
|
+
};
|
|
362
|
+
errors.lock().await.push(error);
|
|
363
|
+
}
|
|
209
364
|
}
|
|
210
|
-
}
|
|
211
|
-
Err((
|
|
365
|
+
}
|
|
366
|
+
Err((case_id, data_type, error_details, attempts)) => {
|
|
367
|
+
failed_downloads.fetch_add(1, Ordering::Relaxed);
|
|
368
|
+
|
|
369
|
+
// Parse error type from error details
|
|
370
|
+
let (error_type, clean_details) = if error_details.contains(":") {
|
|
371
|
+
let parts: Vec<&str> = error_details.splitn(2, ": ").collect();
|
|
372
|
+
(parts[0].to_string(), parts[1].to_string())
|
|
373
|
+
} else {
|
|
374
|
+
("unknown_error".to_string(), error_details)
|
|
375
|
+
};
|
|
376
|
+
|
|
212
377
|
let error = ErrorEntry {
|
|
213
|
-
|
|
214
|
-
|
|
378
|
+
case_id,
|
|
379
|
+
data_type,
|
|
380
|
+
error_type,
|
|
381
|
+
error_details: clean_details,
|
|
382
|
+
attempts_made: attempts,
|
|
215
383
|
};
|
|
216
|
-
|
|
217
|
-
eprintln!("{}", error_js);
|
|
384
|
+
errors.lock().await.push(error);
|
|
218
385
|
}
|
|
219
|
-
},
|
|
220
|
-
Err((case_id, data_type, error)) => {
|
|
221
|
-
let error = ErrorEntry {
|
|
222
|
-
case: format!("{}: {}", case_id, data_type),
|
|
223
|
-
error,
|
|
224
|
-
};
|
|
225
|
-
let error_js = serde_json::to_string(&error).unwrap();
|
|
226
|
-
eprintln!("{}", error_js);
|
|
227
386
|
}
|
|
228
387
|
}
|
|
229
388
|
})
|
|
230
389
|
.await;
|
|
390
|
+
|
|
391
|
+
// Create final output structure
|
|
392
|
+
let success_count = successful_downloads.load(Ordering::Relaxed);
|
|
393
|
+
let failed_count = failed_downloads.load(Ordering::Relaxed);
|
|
394
|
+
|
|
395
|
+
let output = GdcOutput {
|
|
396
|
+
successful_data: successful_data.lock().await.clone(),
|
|
397
|
+
failed_files: errors.lock().await.clone(),
|
|
398
|
+
summary: OutputSummary {
|
|
399
|
+
total_files,
|
|
400
|
+
successful_files: success_count,
|
|
401
|
+
failed_files: failed_count,
|
|
402
|
+
},
|
|
403
|
+
};
|
|
404
|
+
|
|
405
|
+
// Output the complete structure as JSON
|
|
406
|
+
match serde_json::to_string(&output) {
|
|
407
|
+
Ok(json) => println!("{}", json),
|
|
408
|
+
Err(_) => {
|
|
409
|
+
// Silent failure - exit without stderr
|
|
410
|
+
std::process::exit(1);
|
|
411
|
+
}
|
|
412
|
+
}
|
|
231
413
|
}
|
|
232
414
|
|
|
233
415
|
#[tokio::main]
|
|
234
416
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
235
417
|
const HOST: &str = "https://api.gdc.cancer.gov/data/";
|
|
236
418
|
|
|
237
|
-
//
|
|
238
|
-
let timeout_duration = Duration::from_secs(
|
|
419
|
+
// Read input with timeout
|
|
420
|
+
let timeout_duration = Duration::from_secs(10); // Increased timeout for input
|
|
239
421
|
|
|
240
|
-
// Wrap the read operation in a timeout
|
|
241
422
|
let result = timeout(timeout_duration, async {
|
|
242
|
-
let mut buffer = String::new();
|
|
243
|
-
let mut reader = BufReader::new(tokio::io::stdin());
|
|
244
|
-
reader.read_to_string(&mut buffer).await?;
|
|
245
|
-
Ok::<String, io::Error>(buffer)
|
|
423
|
+
let mut buffer = String::new();
|
|
424
|
+
let mut reader = BufReader::new(tokio::io::stdin());
|
|
425
|
+
reader.read_to_string(&mut buffer).await?;
|
|
426
|
+
Ok::<String, io::Error>(buffer)
|
|
246
427
|
})
|
|
247
428
|
.await;
|
|
248
429
|
|
|
249
|
-
// Handle
|
|
250
|
-
let input_js:
|
|
430
|
+
// Handle input parsing (silently)
|
|
431
|
+
let input_js: InputData = match result {
|
|
251
432
|
Ok(Ok(buffer)) => match serde_json::from_str(&buffer) {
|
|
252
433
|
Ok(js) => js,
|
|
253
|
-
Err(
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
error: format!("Input JSON parsing error: {}", e),
|
|
257
|
-
};
|
|
258
|
-
writeln!(io::stderr(), "{}", serde_json::to_string(&stdin_error).unwrap()).unwrap();
|
|
259
|
-
return Err(Box::new(std::io::Error::new(
|
|
260
|
-
std::io::ErrorKind::InvalidInput,
|
|
261
|
-
"Input JSON parsing Error!",
|
|
262
|
-
)) as Box<dyn std::error::Error>);
|
|
434
|
+
Err(_e) => {
|
|
435
|
+
// Silent failure - exit without stderr
|
|
436
|
+
std::process::exit(1);
|
|
263
437
|
}
|
|
264
438
|
},
|
|
265
439
|
Ok(Err(_e)) => {
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
error: "Error reading from stdin.".to_string(),
|
|
269
|
-
};
|
|
270
|
-
let stdin_error_js = serde_json::to_string(&stdin_error).unwrap();
|
|
271
|
-
writeln!(io::stderr(), "{}", stdin_error_js).expect("Failed to output stderr!");
|
|
272
|
-
return Err(Box::new(std::io::Error::new(
|
|
273
|
-
std::io::ErrorKind::InvalidInput,
|
|
274
|
-
"Error reading from stdin!",
|
|
275
|
-
)) as Box<dyn std::error::Error>);
|
|
440
|
+
// Silent failure - exit without stderr
|
|
441
|
+
std::process::exit(1);
|
|
276
442
|
}
|
|
277
443
|
Err(_) => {
|
|
278
|
-
|
|
279
|
-
|
|
280
|
-
error: "Timeout while reading from stdin.".to_string(),
|
|
281
|
-
};
|
|
282
|
-
let stdin_error_js = serde_json::to_string(&stdin_error).unwrap();
|
|
283
|
-
writeln!(io::stderr(), "{}", stdin_error_js).expect("Failed to output stderr!");
|
|
284
|
-
return Err(Box::new(std::io::Error::new(
|
|
285
|
-
std::io::ErrorKind::InvalidInput,
|
|
286
|
-
"Timeout while reading from stdin.",
|
|
287
|
-
)) as Box<dyn std::error::Error>);
|
|
444
|
+
// Silent failure - exit without stderr
|
|
445
|
+
std::process::exit(1);
|
|
288
446
|
}
|
|
289
447
|
};
|
|
290
448
|
|
|
291
|
-
//
|
|
292
|
-
|
|
449
|
+
// Validate input (silently)
|
|
450
|
+
if input_js.case_files.is_empty() {
|
|
451
|
+
// Silent failure - exit without stderr
|
|
452
|
+
std::process::exit(1);
|
|
453
|
+
}
|
|
454
|
+
|
|
455
|
+
let case_files = input_js.case_files;
|
|
456
|
+
|
|
457
|
+
// Set default maf_options
|
|
458
|
+
let (min_total_depth, min_alt_allele_count) = match input_js.maf_options {
|
|
459
|
+
Some(options) => (options.min_total_depth, options.min_alt_allele_count),
|
|
460
|
+
None => (10, 2), // Default values
|
|
461
|
+
};
|
|
462
|
+
|
|
463
|
+
// Download data - this will now handle errors gracefully
|
|
464
|
+
download_data(case_files, HOST, min_total_depth, min_alt_allele_count).await;
|
|
293
465
|
|
|
466
|
+
// Always exit successfully - individual file failures are logged but don't stop the process
|
|
294
467
|
Ok(())
|
|
295
468
|
}
|