npm - @sjcrh/proteinpaint-rust - Versions diffs - 2.129.1-80343740e.0 → 2.129.2 - Mend

@sjcrh/proteinpaint-rust 2.129.1-80343740e.0 → 2.129.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/Cargo.toml CHANGED Viewed

@@ -13,7 +13,7 @@ kodama = "0.3"
 rayon = "1.7.0"
 bgzip = "0.3.1"
 petgraph = "0.6.3"
-rusqlite="0.35"
+rusqlite="0.31.0"
 ndarray = "0.16.1"
 hdf5 = { package = "hdf5-metno", version = "0.9.0" }
 nalgebra = {version = "0.32.2", features = ["serde-serialize"]}
@@ -35,8 +35,6 @@ flate2 = "1"
 futures = "0.3"
 num_cpus = "1.16.0"
 memchr = "2"
-r2d2_sqlite = "0.28.0"
-r2d2 = "0.8.10"
 [profile.release]
 lto = "fat"

package/package.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-	"version": "2.129.1-80343740e.0",
+	"version": "2.129.2",
 	"name": "@sjcrh/proteinpaint-rust",
 	"type": "module",
 	"description": "Rust-based utilities for proteinpaint",

package/src/cerno.rs CHANGED Viewed

@@ -2,20 +2,16 @@
 #![allow(non_snake_case)]
 use json::JsonValue;
 use r_mathlib::chi_squared_cdf;
-use r2d2;
-use r2d2_sqlite::SqliteConnectionManager;
 use rusqlite::{Connection, Result};
 use serde::{Deserialize, Serialize};
 use serde_json;
 use std::cmp::Ordering;
 use std::collections::HashSet;
 use std::io;
-use std::sync::{Arc, Mutex}; // Multithreading library
-use std::thread;
 #[allow(non_camel_case_types)]
 #[allow(non_snake_case)]
-#[derive(Debug, Clone)]
+#[derive(Debug)]
 struct GO_pathway {
     GO_id: String,
 }
@@ -25,7 +21,7 @@ struct GO_pathway {
 #[derive(Debug, Clone, PartialEq, PartialOrd)]
 struct gene_order {
     gene_name: String,
-    fold_change: f32,
+    fold_change: f64,
     rank: Option<usize>,
 }
@@ -35,11 +31,11 @@ struct gene_order {
 //#[allow(dead_code)]
 struct pathway_p_value {
     pathway_name: String,
-    p_value_original: f32,
-    p_value_adjusted: Option<f32>,
+    p_value_original: f64,
+    p_value_adjusted: Option<f64>,
     gene_set_hits: String,
-    auc: f32,
-    es: f32,
+    auc: f64,
+    es: f64,
     gene_set_size: usize,
 }
@@ -48,16 +44,13 @@ struct pathway_p_value {
 #[derive(Debug, Serialize, Deserialize)]
 //#[allow(dead_code)]
 struct output_struct {
-    pval: f32,
-    fdr: f32,
+    pval: f64,
+    fdr: f64,
     leading_edge: String,
-    auc: f32,
-    es: f32,
+    auc: f64,
+    es: f64,
     geneset_size: usize,
 }
-const PAR_CUTOFF: usize = 1000; // Cutoff for triggering multithreading processing of data
-#[allow(non_upper_case_globals)]
-const max_threads: usize = 3; // Max number of threads in case the parallel processing of reads is invoked
 fn main() -> Result<()> {
     let mut input = String::new();
@@ -88,17 +81,17 @@ fn main() -> Result<()> {
                     //println!("sample_genes:{:?}", sample_genes);
                     let fold_change_input: &JsonValue = &json_string["fold_change"];
-                    let mut fold_change_f32 = Vec::<f32>::new();
+                    let mut fold_change_f64 = Vec::<f64>::new();
                     for iter in 0..fold_change_input.len() {
-                        let item = fold_change_input[iter].as_f32().unwrap();
-                        fold_change_f32.push(item);
+                        let item = fold_change_input[iter].as_f64().unwrap();
+                        fold_change_f64.push(item);
                     }
                     if sample_genes.len() == 0 {
                         panic!("No sample genes provided");
                     }
-                    if sample_genes.len() != fold_change_f32.len() {
+                    if sample_genes.len() != fold_change_f64.len() {
                         panic!("Length of genes array and fold change array are not equal");
                     }
@@ -106,7 +99,7 @@ fn main() -> Result<()> {
                     for i in 0..sample_genes.len() {
                         let item: gene_order = gene_order {
                             gene_name: sample_genes[i].to_string(),
-                            fold_change: fold_change_f32[i],
+                            fold_change: fold_change_f64[i],
                             rank: None, // Will be calculated later
                         };
                         genes_vector.push(item)
@@ -161,7 +154,7 @@ fn main() -> Result<()> {
                     //println!("sample_genes:{:?}", sample_genes);
                     //println!("background_genes:{:?}", background_genes);
-                    let msigdbconn = Connection::open(&msigdb)?;
+                    let msigdbconn = Connection::open(msigdb)?;
                     let stmt_result = msigdbconn
                         .prepare(&("select id from terms where parent_id='".to_owned() + &genesetgroup + "'"));
                     match stmt_result {
@@ -169,148 +162,58 @@ fn main() -> Result<()> {
                             #[allow(non_snake_case)]
                             let GO_iter = stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
                             #[allow(non_snake_case)]
-                            let mut genesets = Vec::<String>::new();
                             for GO_term in GO_iter {
                                 match GO_term {
                                     Ok(n) => {
-                                        genesets.push(n.GO_id);
-                                    }
-                                    Err(_) => {
-                                        println!("GO term not found!")
-                                    }
-                                }
-                            }
-                            if genesets.len() < PAR_CUTOFF {
-                                for gs in genesets {
-                                    let sql_statement =
-                                        "select genes from term2genes where id='".to_owned() + &gs + &"'";
-                                    //println!("sql_statement:{}", sql_statement);
-                                    let mut gene_stmt = msigdbconn.prepare(&(sql_statement))?;
-                                    //println!("gene_stmt:{:?}", gene_stmt);
-                                    let mut rows = gene_stmt.query([])?;
-                                    let mut names = HashSet::<String>::new();
-                                    while let Some(row) = rows.next()? {
-                                        let a: String = row.get(0)?;
-                                        let input_gene_json = json::parse(&a);
-                                        match input_gene_json {
-                                            Ok(json_genes) => {
-                                                for json_iter in 0..json_genes.len() {
-                                                    names.insert(json_genes[json_iter]["symbol"].to_string());
-                                                }
-                                            }
-                                            Err(_) => {
-                                                panic!("Symbol, ensg, enstCanonical structure is missing!")
-                                            }
-                                        }
-                                    }
-                                    let gene_set_size = names.len();
-                                    let (p_value, auc, es, matches, gene_set_hits) = cerno(&sample_coding_genes, names);
-                                    if matches >= 1.0
-                                        && p_value.is_nan() == false
-                                        && es.is_nan() == false
-                                        && es != f32::INFINITY
-                                        && auc != f32::INFINITY
-                                        && auc.is_nan() == false
-                                    {
-                                        pathway_p_values.push(pathway_p_value {
-                                            pathway_name: gs,
-                                            p_value_original: p_value,
-                                            p_value_adjusted: None,
-                                            auc: auc,
-                                            es: es,
-                                            gene_set_hits: gene_set_hits,
-                                            gene_set_size: gene_set_size,
-                                        })
-                                    }
-                                }
-                            } else {
-                                // Multithreaded implementation
-                                let manager = SqliteConnectionManager::file(&msigdb); // This enables sqlite query from multiple threads simultaneously
-                                let pool = r2d2::Pool::new(manager).unwrap(); // This enables sqlite query from multiple threads simultaneously
-                                let genesets = Arc::new(genesets);
-                                let pool_arc = Arc::new(pool);
-                                let sample_coding_genes = Arc::new(sample_coding_genes);
-                                let pathway_p_values_temp =
-                                    Arc::new(Mutex::new(Vec::<pathway_p_value>::with_capacity(genesets.len())));
-                                let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
-                                for thread_num in 0..max_threads {
-                                    let genesets = Arc::clone(&genesets);
-                                    let pool_arc = Arc::clone(&pool_arc);
-                                    let sample_coding_genes = Arc::clone(&sample_coding_genes);
-                                    let pathway_p_values_temp = Arc::clone(&pathway_p_values_temp);
-                                    let handle = thread::spawn(move || {
-                                        let mut pathway_p_values_thread: Vec<pathway_p_value> =
-                                            Vec::with_capacity(10000);
-                                        for iter in 0..genesets.len() {
-                                            let remainder: usize = iter % max_threads;
-                                            if remainder == thread_num {
-                                                let sql_statement = "select genes from term2genes where id='"
-                                                    .to_owned()
-                                                    + &genesets[iter]
-                                                    + &"'";
-                                                //println!("sql_statement:{}", sql_statement);
-                                                let conn = pool_arc.get().unwrap();
-                                                let mut gene_stmt = conn.prepare(&sql_statement).unwrap();
-                                                //println!("gene_stmt:{:?}", gene_stmt);
-                                                let mut rows = gene_stmt.query([]).unwrap();
-                                                let mut names = HashSet::<String>::new();
-                                                while let Some(row) = rows.next().unwrap() {
-                                                    let a: String = row.get(0).unwrap();
-                                                    let input_gene_json = json::parse(&a);
-                                                    match input_gene_json {
-                                                        Ok(json_genes) => {
-                                                            for json_iter in 0..json_genes.len() {
-                                                                names.insert(
-                                                                    json_genes[json_iter]["symbol"].to_string(),
-                                                                );
-                                                            }
-                                                        }
-                                                        Err(_) => {
-                                                            panic!("Symbol, ensg, enstCanonical structure is missing!")
-                                                        }
+                                        //println!("GO term {:?}", n);
+                                        let sql_statement =
+                                            "select genes from term2genes where id='".to_owned() + &n.GO_id + &"'";
+                                        //println!("sql_statement:{}", sql_statement);
+                                        let mut gene_stmt = msigdbconn.prepare(&(sql_statement))?;
+                                        //println!("gene_stmt:{:?}", gene_stmt);
+                                        let mut rows = gene_stmt.query([])?;
+                                        let mut names = HashSet::<String>::new();
+                                        while let Some(row) = rows.next()? {
+                                            let a: String = row.get(0)?;
+                                            let input_gene_json = json::parse(&a);
+                                            match input_gene_json {
+                                                Ok(json_genes) => {
+                                                    for json_iter in 0..json_genes.len() {
+                                                        names.insert(json_genes[json_iter]["symbol"].to_string());
                                                     }
                                                 }
-                                                let gene_set_size = names.len();
-                                                let (p_value, auc, es, matches, gene_set_hits) =
-                                                    cerno(&sample_coding_genes, names);
-                                                if matches >= 1.0
-                                                    && p_value.is_nan() == false
-                                                    && es.is_nan() == false
-                                                    && es != f32::INFINITY
-                                                    && auc != f32::INFINITY
-                                                    && auc.is_nan() == false
-                                                {
-                                                    pathway_p_values_thread.push(pathway_p_value {
-                                                        pathway_name: genesets[iter].clone(),
-                                                        p_value_original: p_value,
-                                                        p_value_adjusted: None,
-                                                        auc: auc,
-                                                        es: es,
-                                                        gene_set_hits: gene_set_hits,
-                                                        gene_set_size: gene_set_size,
-                                                    })
+                                                Err(_) => {
+                                                    panic!("Symbol, ensg, enstCanonical structure is missing!")
                                                 }
                                             }
                                         }
-                                        pathway_p_values_temp
-                                            .lock()
-                                            .unwrap()
-                                            .append(&mut pathway_p_values_thread);
-                                        drop(pathway_p_values_temp);
-                                    });
-                                    handles.push(handle); // The handle (which contains the thread) is stored in the handles vector
-                                }
-                                for handle in handles {
-                                    // Wait for all threads to finish before proceeding further
-                                    handle.join().unwrap();
+                                        let gene_set_size = names.len();
+                                        let (p_value, auc, es, matches, gene_set_hits) =
+                                            cerno(&sample_coding_genes, names);
+                                        if matches >= 1.0
+                                            && p_value.is_nan() == false
+                                            && es.is_nan() == false
+                                            && es != f64::INFINITY
+                                            && auc != f64::INFINITY
+                                            && auc.is_nan() == false
+                                        {
+                                            pathway_p_values.push(pathway_p_value {
+                                                pathway_name: n.GO_id,
+                                                p_value_original: p_value,
+                                                p_value_adjusted: None,
+                                                auc: auc,
+                                                es: es,
+                                                gene_set_hits: gene_set_hits,
+                                                gene_set_size: gene_set_size,
+                                            })
+                                        }
+                                    }
+                                    Err(_) => {
+                                        println!("GO term not found!")
+                                    }
                                 }
-                                // Combining data from all different threads
-                                pathway_p_values.append(&mut *pathway_p_values_temp.lock().unwrap());
                             }
                         }
                         Err(_) => panic!("sqlite database file not found"),
@@ -326,15 +229,15 @@ fn main() -> Result<()> {
     Ok(())
 }
-fn cerno(sample_coding_genes: &Vec<gene_order>, genes_in_pathway: HashSet<String>) -> (f32, f32, f32, f32, String) {
+fn cerno(sample_coding_genes: &Vec<gene_order>, genes_in_pathway: HashSet<String>) -> (f64, f64, f64, f64, String) {
     // Filter the sample_coding_genes vector to only include those whose gene_names are in the HashSet genes_in_pathway
     let gene_intersections: Vec<&gene_order> = sample_coding_genes
         .iter()
         .filter(|sample_coding_genes| genes_in_pathway.contains(&sample_coding_genes.gene_name)) // Check if name is in the HashSet genes_in_pathway
         .collect(); // Collect the results into a new vector
-    let N1 = gene_intersections.len() as f32;
-    let N = sample_coding_genes.len() as f32;
+    let N1 = gene_intersections.len() as f64;
+    let N = sample_coding_genes.len() as f64;
     let mut gene_set_hits: String = "".to_string();
     for gene in &gene_intersections {
         gene_set_hits += &(gene.gene_name.to_string() + &",");
@@ -349,21 +252,21 @@ fn cerno(sample_coding_genes: &Vec<gene_order>, genes_in_pathway: HashSet<String
         .map(|x| x.rank.unwrap())
         .collect::<Vec<usize>>();
-    let cerno: f32 = ranks // -2 * sum( log(ranks/N) )
+    let cerno: f64 = ranks // -2 * sum( log(ranks/N) )
         .iter()
-        .map(|x| ((*x as f32) / N).ln())
-        .collect::<Vec<f32>>()
+        .map(|x| ((*x as f64) / N).ln())
+        .collect::<Vec<f64>>()
         .iter()
-        .sum::<f32>()
+        .sum::<f64>()
         * (-2.0);
-    let cES: f32 = cerno / (2.0 * (N1 as f32)); // cES <- cerno/(2*N1)
+    let cES: f64 = cerno / (2.0 * (N1 as f64)); // cES <- cerno/(2*N1)
     let N2 = N - N1; // N2 = N - N1
-    let R1 = ranks.iter().sum::<usize>() as f32; // R1 <- sum(ranks)
+    let R1 = ranks.iter().sum::<usize>() as f64; // R1 <- sum(ranks)
     let U = N1 * N2 + N1 * (N1 + 1.0) / 2.0 - R1; // U  <- N1*N2+N1*(N1+1)/2-R1
     let AUC = U / (N1 * N2); // AUC <- U/(N1*N2)
-    let p_value = chi_squared_cdf(cerno as f64, (2.0 * N1) as f64, false, false); // pchisq(ret$cerno, 2*N1, lower.tail=FALSE)
-    (p_value as f32, AUC, cES, N1, gene_set_hits)
+    let p_value = chi_squared_cdf(cerno, 2.0 * N1, false, false); // pchisq(ret$cerno, 2*N1, lower.tail=FALSE)
+    (p_value, AUC, cES, N1, gene_set_hits)
 }
 fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>) -> String {
@@ -375,13 +278,13 @@ fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>) -> String {
     });
     let mut adjusted_p_values: Vec<pathway_p_value> = Vec::with_capacity(original_p_values.len());
-    let mut old_p_value: f32 = 0.0;
-    let mut rank: f32 = original_p_values.len() as f32;
+    let mut old_p_value: f64 = 0.0;
+    let mut rank: f64 = original_p_values.len() as f64;
     for j in 0..original_p_values.len() {
         let i = original_p_values.len() - j - 1;
         //println!("p_val:{}", p_val);
-        let mut adjusted_p_val: f32 = original_p_values[i].p_value_original * (original_p_values.len() as f32 / rank); // adjusted p-value = original_p_value * (N/rank)
+        let mut adjusted_p_val: f64 = original_p_values[i].p_value_original * (original_p_values.len() as f64 / rank); // adjusted p-value = original_p_value * (N/rank)
         if adjusted_p_val > 1.0 {
             // p_value should NEVER be greater than 1
             adjusted_p_val = 1.0;

package/src/gdcGRIN2.rs CHANGED Viewed

@@ -1,19 +1,58 @@
+/*
+  This script downloads cohort maf files from GDC and gracefully handles timeout and other possible errors related to GDC api processing for use by the client file summary div
+  Key improvements:
+  1. Graceful error handling - individual file failures don't stop the entire process
+  2. Better timeout handling with retries
+  3. More detailed error reporting
+  4. Continues processing even when some files fail
+  Input JSON:
+    caseFiles
+    mafOptions: For SNVindel filtering
+  Output mutations as JSON array.
+  Example of usage:
+    echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}},"mafOptions": {"minTotalDepth": 10,"minAltAlleleCount": 2}}' | ./target/release/gdcGRIN2
+*/
 use flate2::read::GzDecoder;
 use futures::StreamExt;
 use memchr::memchr;
 use serde::Deserialize;
 use serde_json;
 use std::collections::HashMap;
-use std::io::{self, Read, Write};
+use std::io::{self, Read};
+use std::sync::Arc;
+use std::sync::atomic::{AtomicUsize, Ordering};
 use std::time::Duration;
 use tokio::io::{AsyncReadExt, BufReader};
+use tokio::sync::Mutex;
 use tokio::time::timeout;
-// Struct to hold error information
-#[derive(serde::Serialize)]
+// Struct to hold error information for JSON output
+#[derive(serde::Serialize, Clone)]
 struct ErrorEntry {
-    case: String,
-    error: String,
+    case_id: String,
+    data_type: String,
+    error_type: String,
+    error_details: String,
+    attempts_made: u32,
+}
+// Struct for the final output that includes both successful data and errors
+#[derive(serde::Serialize)]
+struct GdcOutput {
+    successful_data: Vec<Vec<Vec<String>>>, // Array of successful file data arrays
+    failed_files: Vec<ErrorEntry>,
+    summary: OutputSummary,
+}
+#[derive(serde::Serialize)]
+struct OutputSummary {
+    total_files: usize,
+    successful_files: usize,
+    failed_files: usize,
 }
 // Define the structure for datadd
@@ -23,32 +62,52 @@ struct DataType {
     maf: Option<String>,
 }
+// Define the structure for mafOptions
+#[derive(Deserialize, Debug)]
+struct MafOptions {
+    #[serde(rename = "minTotalDepth")]
+    min_total_depth: i32,
+    #[serde(rename = "minAltAlleleCount")]
+    min_alt_allele_count: i32,
+}
+// Define the top-level input structure
+#[derive(Deserialize, Debug)]
+struct InputData {
+    #[serde(rename = "caseFiles")]
+    case_files: HashMap<String, DataType>,
+    #[serde(rename = "mafOptions")]
+    maf_options: Option<MafOptions>,
+}
 // Function to parse TSV content
-// CNV:
-// Select cnv columns ["Chromosome","Start","End","Segment_Mean"]
-// Segment_Mean >= 0.2 => gain; Segment_Mean <= -0.2 => loss
-// MAF:
-// Select MAF columns ["Chromosome","Start_Position","End_Position"]
-fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String, (String, String, String)> {
+fn parse_content(
+    content: &str,
+    case_id: &str,
+    data_type: &str,
+    min_total_depth: i32,
+    min_alt_allele_count: i32,
+) -> Result<Vec<Vec<String>>, (String, String, String)> {
     let lines = content.lines();
-    //let mut parsed_data = Vec::new();
-    let mut parsed_data: String = String::new();
+    let mut parsed_data = Vec::new();
     let mut columns_indices: Vec<usize> = Vec::new();
     let mut header_mk: &str = "";
-    let mut columns = Vec::new(); // columns selected from GDC file
+    let mut columns = Vec::new();
     if data_type == "cnv" {
         header_mk = "GDC_Aliquot_ID";
         columns = vec!["Chromosome", "Start", "End", "Segment_Mean"]
     } else if data_type == "maf" {
         header_mk = "Hugo_Symbol";
-        columns = vec!["Chromosome", "Start_Position", "End_Position"]
+        columns = vec!["Chromosome", "Start_Position", "End_Position", "t_depth", "t_alt_count"]
     };
-    let mut header: Vec<String> = Vec::new(); // GDC file header
+    let mut header: Vec<String> = Vec::new();
     for line in lines {
         if line.starts_with("#") {
             continue;
         } else if line.contains(&header_mk) {
-            // header line
             header = line.split("\t").map(|s| s.to_string()).collect();
             for col in &columns {
                 match header.iter().position(|x| x == col) {
@@ -65,12 +124,12 @@ fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String
             let mut keep_ck: bool = true;
             let cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
             let mut out_lst: Vec<String> = Vec::new();
-            // add sample ID first
             out_lst.push(case_id.to_string());
             for x in columns_indices.iter() {
                 let mut element = cont_lst[*x].to_string();
                 if data_type == "cnv" && &header[*x] == "Segment_Mean" {
-                    // convert to f32 (segment_mean)
                     let seg_mean = match element.parse::<f32>() {
                         Ok(val) => val,
                         Err(_e) => {
@@ -78,9 +137,9 @@ fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String
                             return Err((case_id.to_string(), data_type.to_string(), error_msg));
                         }
                     };
-                    if seg_mean >= 0.2 {
+                    if seg_mean >= 0.3 {
                         element = "gain".to_string();
-                    } else if seg_mean <= -0.2 {
+                    } else if seg_mean <= -0.4 {
                         element = "loss".to_string();
                     } else {
                         keep_ck = false;
@@ -88,16 +147,37 @@ fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String
                 }
                 out_lst.push(element);
             }
-            // add lsn.type to snv
             if data_type == "maf" {
-                out_lst.push("mutation".to_string());
+                let alle_depth = match out_lst[4].parse::<i32>() {
+                    Ok(value) => value,
+                    Err(_) => {
+                        let error_msg = "Failed to convert t_depth to i32.".to_string();
+                        return Err((case_id.to_string(), data_type.to_string(), error_msg));
+                    }
+                };
+                let alt_count = match out_lst[5].parse::<i32>() {
+                    Ok(value) => value,
+                    Err(_) => {
+                        let error_msg = "Failed to convert t_alt_count to i32.".to_string();
+                        return Err((case_id.to_string(), data_type.to_string(), error_msg));
+                    }
+                };
+                if alle_depth >= min_total_depth && alt_count >= min_alt_allele_count {
+                    out_lst = out_lst[0..4].to_vec();
+                    out_lst.push("mutation".to_string());
+                } else {
+                    keep_ck = false;
+                }
             }
             if keep_ck {
-                parsed_data.push_str(out_lst.join("\t").as_str());
-                parsed_data.push_str("\n");
+                parsed_data.push(out_lst);
             }
         }
     }
     if columns_indices.is_empty() {
         return Err((
             case_id.to_string(),
@@ -105,14 +185,121 @@ fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String
             "No matching columns found. Problematic file!".to_string(),
         ));
     };
     Ok(parsed_data)
 }
-// Function to download data
-//async fn download_data(data4dl: HashMap<String,DataType>, host: &str) -> Vec<Result<(String, String), (String, String)>> {
-async fn download_data(data4dl: HashMap<String, DataType>, host: &str) -> () {
+/// Downloads a single file with minimal retry logic for transient failures
+async fn download_single_file(
+    case_id: String,
+    data_type: String,
+    url: String,
+    max_attempts: u32,
+) -> Result<(String, String, String), (String, String, String, u32)> {
+    let mut last_error = String::new();
+    let mut error_type = String::new();
+    for attempt in 0..max_attempts {
+        // Build HTTP client with aggressive timeouts for real-time processing
+        let client = match reqwest::Client::builder()
+            .timeout(Duration::from_secs(10)) // 10 second timeout per request
+            .connect_timeout(Duration::from_secs(3)) // 3 second connect timeout
+            .build()
+        {
+            Ok(client) => client,
+            Err(e) => {
+                last_error = format!("Client build error: {}", e);
+                error_type = "client_build_error".to_string();
+                continue;
+            }
+        };
+        // Attempt download with tight timeout - fail fast if server is slow
+        match timeout(Duration::from_secs(12), client.get(&url).send()).await {
+            Ok(Ok(resp)) if resp.status().is_success() => {
+                match resp.bytes().await {
+                    Ok(content) => {
+                        // Handle both compressed and uncompressed content
+                        let text = if memchr(0x00, &content).is_some() {
+                            // Likely compressed (gzipped) content
+                            let mut decoder = GzDecoder::new(&content[..]);
+                            let mut decompressed_content = Vec::new();
+                            match decoder.read_to_end(&mut decompressed_content) {
+                                Ok(_) => String::from_utf8_lossy(&decompressed_content).to_string(),
+                                Err(e) => {
+                                    last_error = format!("Decompression failed: {}", e);
+                                    error_type = "decompression_error".to_string();
+                                    continue; // Retry on decompression failure
+                                }
+                            }
+                        } else {
+                            // Plain text content
+                            String::from_utf8_lossy(&content).to_string()
+                        };
+                        // Success! Return immediately
+                        return Ok((case_id, data_type, text));
+                    }
+                    Err(e) => {
+                        last_error = format!("Failed to read response bytes: {}", e);
+                        error_type = "connection_error".to_string();
+                        // This could be "connection closed before message completed"
+                        // Worth retrying for transient network issues
+                    }
+                }
+            }
+            Ok(Ok(resp)) => {
+                last_error = format!(
+                    "HTTP error {}: {}",
+                    resp.status(),
+                    resp.status().canonical_reason().unwrap_or("Unknown")
+                );
+                error_type = if resp.status().is_client_error() {
+                    "client_error".to_string()
+                } else {
+                    "server_error".to_string()
+                };
+                // Don't retry 4xx errors (client errors), but retry 5xx (server errors)
+                if resp.status().is_client_error() {
+                    break; // No point retrying client errors
+                }
+            }
+            Ok(Err(e)) => {
+                last_error = format!("Request error: {}", e);
+                error_type = "network_error".to_string();
+                // Network errors are worth retrying
+            }
+            Err(_) => {
+                last_error = "Request timeout (12s) - server too slow".to_string();
+                error_type = "timeout_error".to_string();
+                // Timeouts might be transient, worth a quick retry
+            }
+        }
+        // If this isn't the last attempt, wait briefly before retrying
+        if attempt < max_attempts - 1 {
+            // Silent retry - no stderr noise
+            tokio::time::sleep(Duration::from_secs(1)).await; // 1 second between retries
+        }
+    }
+    Err((
+        case_id,
+        data_type,
+        format!("{}: {}", error_type, last_error),
+        max_attempts,
+    ))
+}
+/// Main download function with structured JSON output including errors
+async fn download_data(
+    data4dl: HashMap<String, DataType>,
+    host: &str,
+    min_total_depth: i32,
+    min_alt_allele_count: i32,
+) {
     // Generate URLs from data4dl, handling optional cnv and maf
-    let data_urls = data4dl
+    let data_urls: Vec<(String, String, String)> = data4dl
         .into_iter()
         .flat_map(|(case_id, data_types)| {
             let mut urls = Vec::new();
@@ -124,172 +311,158 @@ async fn download_data(data4dl: HashMap<String, DataType>, host: &str) -> () {
             }
             urls
         })
-        .collect::<Vec<_>>();
+        .collect();
+    let total_files = data_urls.len();
+    // Use atomic counters that can be safely shared across async closures
+    let successful_downloads = Arc::new(AtomicUsize::new(0));
+    let failed_downloads = Arc::new(AtomicUsize::new(0));
+    // Create shared vectors to collect successful data and errors
+    let successful_data = Arc::new(Mutex::new(Vec::<Vec<Vec<String>>>::new()));
+    let errors = Arc::new(Mutex::new(Vec::<ErrorEntry>::new()));
+    // Create download futures with smart retry logic
     let download_futures = futures::stream::iter(data_urls.into_iter().map(|(case_id, data_type, url)| {
         async move {
-            //let case_dt = format!("{}/{}",case_id,data_type).to_string();
-            // Build HTTP client with timeouts
-            let client = reqwest::Client::builder()
-                .timeout(Duration::from_secs(60)) // 60-second timeout per request
-                .connect_timeout(Duration::from_secs(30))
-                .build()
-                .map_err(|_e| "Client build error".to_string());
-            // Handle client creation result
-            match client {
-                Ok(client) => {
-                    match client.get(&url).send().await {
-                        Ok(resp) if resp.status().is_success() => {
-                            match resp.bytes().await {
-                                Ok(content) => {
-                                    // if data_type == "cnv" {
-                                    if !memchr(0x00, &content).is_some() {
-                                        // CNV files are plain text
-                                        let text = String::from_utf8_lossy(&content).to_string();
-                                        Ok((case_id.clone(), data_type.clone(), text))
-                                    } else {
-                                        let mut decoder = GzDecoder::new(&content[..]);
-                                        let mut decompressed_content = Vec::new();
-                                        match decoder.read_to_end(&mut decompressed_content) {
-                                            Ok(_) => {
-                                                let text = String::from_utf8_lossy(&decompressed_content).to_string();
-                                                Ok((case_id.clone(), data_type.clone(), text))
-                                            }
-                                            Err(e) => {
-                                                let error_msg = format!(
-                                                    "Failed to decompress {} file for {}: {}",
-                                                    data_type, case_id, e
-                                                );
-                                                Err((case_id.clone(), data_type.clone(), error_msg))
-                                            }
-                                        }
-                                    }
-                                }
-                                Err(e) => {
-                                    let error_msg =
-                                        format!("Failed to read bytes for {} file for {}: {}", data_type, case_id, e);
-                                    Err((case_id.clone(), data_type.clone(), error_msg))
-                                }
-                            }
-                        }
-                        Ok(resp) => {
-                            let error_msg =
-                                format!("HTTP error for {} file for {}: {}", data_type, case_id, resp.status());
-                            Err((case_id.clone(), data_type.clone(), error_msg))
-                        }
-                        Err(e) => {
-                            let error_msg =
-                                format!("Server request failed for {} file for {}: {}", data_type, case_id, e);
-                            Err((case_id.clone(), data_type.clone(), error_msg))
-                        }
-                    }
-                }
-                Err(_e) => {
-                    let error_msg = "Client build error".to_string();
-                    Err((case_id, data_type, error_msg))
-                }
-            }
+            // Try each file up to 2 times for transient failures
+            download_single_file(case_id, data_type, url, 2).await
         }
     }));
-    // Execute downloads concurrently and collect results
+    // Execute downloads concurrently with high concurrency for speed
     download_futures
-        .buffer_unordered(10)
-        .for_each(|result| async {
-            match result {
-                Ok((case_id, data_type, content)) => match parse_content(&content, &case_id, &data_type) {
-                    Ok(parsed_data) => match serde_json::to_string(&parsed_data) {
-                        Ok(json) => println!("{}", json),
-                        Err(e) => {
-                            let error = ErrorEntry {
-                                case: format!("{}: {}", case_id, data_type),
-                                error: format!("Failed to convert data to JSON {}", e),
-                            };
-                            let error_js = serde_json::to_string(&error).unwrap();
-                            eprintln!("{}", error_js);
+        .buffer_unordered(15) // Increased to 15 concurrent downloads for speed
+        .for_each(|download_result| {
+            let successful_downloads = Arc::clone(&successful_downloads);
+            let failed_downloads = Arc::clone(&failed_downloads);
+            let successful_data = Arc::clone(&successful_data);
+            let errors = Arc::clone(&errors);
+            async move {
+                match download_result {
+                    Ok((case_id, data_type, content)) => {
+                        // Successfully downloaded, now try to parse
+                        match parse_content(&content, &case_id, &data_type, min_total_depth, min_alt_allele_count) {
+                            Ok(parsed_data) => {
+                                // Store successful data
+                                successful_data.lock().await.push(parsed_data);
+                                successful_downloads.fetch_add(1, Ordering::Relaxed);
+                            }
+                            Err((cid, dtp, error)) => {
+                                failed_downloads.fetch_add(1, Ordering::Relaxed);
+                                let error = ErrorEntry {
+                                    case_id: cid,
+                                    data_type: dtp,
+                                    error_type: "parsing_error".to_string(),
+                                    error_details: error,
+                                    attempts_made: 1,
+                                };
+                                errors.lock().await.push(error);
+                            }
                         }
-                    },
-                    Err((cid, dtp, error)) => {
+                    }
+                    Err((case_id, data_type, error_details, attempts)) => {
+                        failed_downloads.fetch_add(1, Ordering::Relaxed);
+                        // Parse error type from error details
+                        let (error_type, clean_details) = if error_details.contains(":") {
+                            let parts: Vec<&str> = error_details.splitn(2, ": ").collect();
+                            (parts[0].to_string(), parts[1].to_string())
+                        } else {
+                            ("unknown_error".to_string(), error_details)
+                        };
                         let error = ErrorEntry {
-                            case: format!("{}: {}", cid, dtp),
-                            error,
+                            case_id,
+                            data_type,
+                            error_type,
+                            error_details: clean_details,
+                            attempts_made: attempts,
                         };
-                        let error_js = serde_json::to_string(&error).unwrap();
-                        eprintln!("{}", error_js);
+                        errors.lock().await.push(error);
                     }
-                },
-                Err((case_id, data_type, error)) => {
-                    let error = ErrorEntry {
-                        case: format!("{}: {}", case_id, data_type),
-                        error,
-                    };
-                    let error_js = serde_json::to_string(&error).unwrap();
-                    eprintln!("{}", error_js);
                 }
             }
         })
         .await;
+    // Create final output structure
+    let success_count = successful_downloads.load(Ordering::Relaxed);
+    let failed_count = failed_downloads.load(Ordering::Relaxed);
+    let output = GdcOutput {
+        successful_data: successful_data.lock().await.clone(),
+        failed_files: errors.lock().await.clone(),
+        summary: OutputSummary {
+            total_files,
+            successful_files: success_count,
+            failed_files: failed_count,
+        },
+    };
+    // Output the complete structure as JSON
+    match serde_json::to_string(&output) {
+        Ok(json) => println!("{}", json),
+        Err(_) => {
+            // Silent failure - exit without stderr
+            std::process::exit(1);
+        }
+    }
 }
 #[tokio::main]
 async fn main() -> Result<(), Box<dyn std::error::Error>> {
     const HOST: &str = "https://api.gdc.cancer.gov/data/";
-    // Accepting the piped input json from nodejs
-    let timeout_duration = Duration::from_secs(5); // Set a 5-second timeout
+    // Read input with timeout
+    let timeout_duration = Duration::from_secs(10); // Increased timeout for input
-    // Wrap the read operation in a timeout
     let result = timeout(timeout_duration, async {
-        let mut buffer = String::new(); // Initialize an empty string to store input
-        let mut reader = BufReader::new(tokio::io::stdin()); // Create a buffered reader for stdin
-        reader.read_to_string(&mut buffer).await?; // Read a line asynchronously
-        Ok::<String, io::Error>(buffer) // Return the input as a Result
+        let mut buffer = String::new();
+        let mut reader = BufReader::new(tokio::io::stdin());
+        reader.read_to_string(&mut buffer).await?;
+        Ok::<String, io::Error>(buffer)
     })
     .await;
-    // Handle the result of the input timeout operation
-    let input_js: HashMap<String, DataType> = match result {
+    // Handle input parsing (silently)
+    let input_js: InputData = match result {
         Ok(Ok(buffer)) => match serde_json::from_str(&buffer) {
             Ok(js) => js,
-            Err(e) => {
-                let stdin_error = ErrorEntry {
-                    case: String::new(),
-                    error: format!("Input JSON parsing error: {}", e),
-                };
-                writeln!(io::stderr(), "{}", serde_json::to_string(&stdin_error).unwrap()).unwrap();
-                return Err(Box::new(std::io::Error::new(
-                    std::io::ErrorKind::InvalidInput,
-                    "Input JSON parsing Error!",
-                )) as Box<dyn std::error::Error>);
+            Err(_e) => {
+                // Silent failure - exit without stderr
+                std::process::exit(1);
             }
         },
         Ok(Err(_e)) => {
-            let stdin_error = ErrorEntry {
-                case: String::new(),
-                error: "Error reading from stdin.".to_string(),
-            };
-            let stdin_error_js = serde_json::to_string(&stdin_error).unwrap();
-            writeln!(io::stderr(), "{}", stdin_error_js).expect("Failed to output stderr!");
-            return Err(Box::new(std::io::Error::new(
-                std::io::ErrorKind::InvalidInput,
-                "Error reading from stdin!",
-            )) as Box<dyn std::error::Error>);
+            // Silent failure - exit without stderr
+            std::process::exit(1);
         }
         Err(_) => {
-            let stdin_error = ErrorEntry {
-                case: String::new(),
-                error: "Timeout while reading from stdin.".to_string(),
-            };
-            let stdin_error_js = serde_json::to_string(&stdin_error).unwrap();
-            writeln!(io::stderr(), "{}", stdin_error_js).expect("Failed to output stderr!");
-            return Err(Box::new(std::io::Error::new(
-                std::io::ErrorKind::InvalidInput,
-                "Timeout while reading from stdin.",
-            )) as Box<dyn std::error::Error>);
+            // Silent failure - exit without stderr
+            std::process::exit(1);
         }
     };
-    // Download data
-    download_data(input_js, HOST).await;
+    // Validate input (silently)
+    if input_js.case_files.is_empty() {
+        // Silent failure - exit without stderr
+        std::process::exit(1);
+    }
+    let case_files = input_js.case_files;
+    // Set default maf_options
+    let (min_total_depth, min_alt_allele_count) = match input_js.maf_options {
+        Some(options) => (options.min_total_depth, options.min_alt_allele_count),
+        None => (10, 2), // Default values
+    };
+    // Download data - this will now handle errors gracefully
+    download_data(case_files, HOST, min_total_depth, min_alt_allele_count).await;
+    // Always exit successfully - individual file failures are logged but don't stop the process
     Ok(())
 }