npm - @sjcrh/proteinpaint-rust - Versions diffs - 2.40.6 → 2.49.0 - Mend

@sjcrh/proteinpaint-rust 2.40.6 → 2.49.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/Cargo.toml +5 -6
package/package.json +35 -35
package/src/DEanalysis.rs +460 -387
package/src/indel.rs +1 -1
package/src/stats_functions.rs +270 -270

package/src/DEanalysis.rs CHANGED Viewed

@@ -1,5 +1,5 @@
 // cd .. && cargo build --release && json='{"case":"SJMB030827,SJMB030838,SJMB032893,SJMB031131,SJMB031227","control":"SJMB030488,SJMB030825,SJMB031110","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | target/release/DEanalysis
-// cd .. && cargo build --release && cat ~/sjpp/test.txt | target/release/DEanalysis
+// cd .. && cargo build --release && time cat ~/sjpp/test.txt | target/release/DEanalysis
 #![allow(non_snake_case)]
 use json;
 use nalgebra::base::dimension::Const;
@@ -8,20 +8,52 @@ use nalgebra::base::Matrix;
 use nalgebra::base::VecStorage;
 use nalgebra::DMatrix;
 use nalgebra::ViewStorage;
-use r_mathlib;
 use serde::{Deserialize, Serialize};
 use serde_json;
 use statrs::statistics::Data;
 use statrs::statistics::Distribution;
 use statrs::statistics::Median;
 use std::cmp::Ordering;
-use std::path::Path;
+use std::fs::File;
+use std::io::Read;
 use std::str::FromStr;
+use std::sync::{Arc, Mutex}; // Multithreading library
+use std::thread;
 use std::time::Instant;
 //use std::cmp::Ordering;
 //use std::env;
 use std::io;
-//mod stats_functions; // Importing Wilcoxon function from stats_functions.rs
+mod stats_functions; // Importing Wilcoxon function from stats_functions.rs
+const PAR_CUTOFF: usize = 100000; // Cutoff for triggering multithreading processing of data
+//const PAR_CUTOFF: usize = 1000000000000000;
+#[allow(non_upper_case_globals)]
+const max_threads: usize = 6; // Max number of threads in case the parallel processing of reads is invoked
+fn binary_search(input: &Vec<usize>, y: usize) -> i64 {
+    let input_dup = &input[..];
+    let mut index: i64 = -1;
+    let mut l: usize = 0;
+    let mut r: usize = input_dup.len() - 1;
+    let mut m: usize;
+    while l <= r {
+        m = l + ((r - l) / 2);
+        if y == input_dup[m] {
+            index = m as i64;
+            break;
+        } else if y > input_dup[m] {
+            l = m + 1;
+        }
+        // If x is smaller, ignore right half
+        else {
+            if m == 0 as usize {
+                break;
+            }
+            r = m - 1;
+        }
+    }
+    index
+}
 fn input_data(
     filename: &String,
@@ -34,9 +66,9 @@ fn input_data(
     Vec<String>,
     Vec<String>,
 ) {
-    // Build the CSV reader and iterate over each record.
-    let path = Path::new(filename);
-    let mut rdr = csv::Reader::from_path(path).unwrap();
+    let input_time = Instant::now();
+    //let mut rdr = csv::Reader::from_path(path).unwrap();
+    let mut file = File::open(filename).unwrap();
     let mut num_lines: usize = 0;
     let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
     let mut gene_names: Vec<String> = Vec::with_capacity(65000);
@@ -44,11 +76,12 @@ fn input_data(
     let mut num_columns: usize = 0;
     // Check headers for samples
-    let header_line = rdr.headers().unwrap();
-    let mut headers: Vec<&str> = Vec::with_capacity(1500);
-    for field in header_line.iter() {
-        headers = field.split('\t').collect::<Vec<&str>>();
-    }
+    let mut buffer = String::new();
+    file.read_to_string(&mut buffer).unwrap();
+    // Check headers for samples
+    let lines: Vec<&str> = buffer.split('\n').collect::<Vec<&str>>();
+    let total_lines = lines.len();
+    let headers: Vec<&str> = lines[0].split('\t').collect::<Vec<&str>>();
     //println!("headers:{:?}", headers);
     let mut case_indexes_original: Vec<usize> = Vec::with_capacity(case_list.len());
     let mut control_indexes_original: Vec<usize> = Vec::with_capacity(control_list.len());
@@ -68,6 +101,7 @@ fn input_data(
             }
         }
     }
+    let num_cases = case_list.len();
     for item in control_list {
         //println!("item:{}", item);
@@ -80,70 +114,223 @@ fn input_data(
             }
         }
     }
+    let num_controls = control_list.len();
     //println!("case_indexes_original:{:?}", case_indexes_original);
     //println!("control_indexes_original:{:?}", control_indexes_original);
+    case_indexes_original.sort();
+    case_indexes_original.dedup();
+    control_indexes_original.sort();
+    control_indexes_original.dedup();
     let mut case_indexes: Vec<usize> = Vec::with_capacity(case_list.len());
     let mut control_indexes: Vec<usize> = Vec::with_capacity(control_list.len());
-    for result in rdr.records() {
-        // The iterator yields Result<StringRecord, Error>, so we check the
-        // error here.
-        let record = result.unwrap();
-        //println!("record:{:?}", record);
-        let mut index = 0;
-        for field in record[0].split('\t').collect::<Vec<&str>>() {
-            if index == gene_name_index.unwrap() {
-                gene_names.push(field.to_string());
-            } else if index == gene_symbol_index.unwrap() {
-                gene_symbols.push(field.to_string());
-            } else if case_indexes_original.contains(&index) {
-                let num = FromStr::from_str(field);
-                match num {
-                    Ok(n) => {
-                        //println!("n:{}", n);
-                        input_vector.push(n);
-                        if num_lines == 0 {
-                            case_indexes.push(num_columns);
-                            num_columns += 1;
+    if lines.len() * (case_indexes_original.len() + control_indexes_original.len()) < PAR_CUTOFF {
+        // If number of lines is below this number
+        let lines_slice = &lines[..];
+        for line_iter in 1..lines_slice.len() - 1 {
+            // Subtracting 1 from total length of lines_slice because the last one will be empty
+            let line = lines_slice[line_iter];
+            let mut index = 0;
+            for field in line.split('\t').collect::<Vec<&str>>() {
+                if index == gene_name_index.unwrap() {
+                    gene_names.push(field.to_string());
+                } else if index == gene_symbol_index.unwrap() {
+                    gene_symbols.push(field.to_string());
+                } else if binary_search(&case_indexes_original, index) != -1 {
+                    let num = FromStr::from_str(field);
+                    match num {
+                        Ok(n) => {
+                            //println!("n:{}", n);
+                            input_vector.push(n);
+                            if num_lines == 0 {
+                                case_indexes.push(num_columns);
+                                num_columns += 1;
+                            }
+                        }
+                        Err(_n) => {
+                            panic!(
+                                "Number {} in line {} and column {} is not a decimal number",
+                                field,
+                                num_lines + 1,
+                                index + 1
+                            );
                         }
                     }
-                    Err(_n) => {
-                        panic!(
-                            "Number {} in line {} and column {} is not a decimal number",
-                            field,
-                            num_lines + 1,
-                            index + 1
-                        );
+                } else if binary_search(&control_indexes_original, index) != -1 {
+                    let num = FromStr::from_str(field);
+                    match num {
+                        Ok(n) => {
+                            //println!("n:{}", n);
+                            input_vector.push(n);
+                            if num_lines == 0 {
+                                control_indexes.push(num_columns);
+                                num_columns += 1;
+                            }
+                        }
+                        Err(_n) => {
+                            panic!(
+                                "Number {} in line {} and column {} is not a decimal number",
+                                field,
+                                num_lines + 1,
+                                index + 1
+                            );
+                        }
                     }
                 }
-            } else if control_indexes_original.contains(&index) {
-                let num = FromStr::from_str(field);
-                match num {
-                    Ok(n) => {
-                        //println!("n:{}", n);
-                        input_vector.push(n);
-                        if num_lines == 0 {
-                            control_indexes.push(num_columns);
-                            num_columns += 1;
+                index += 1;
+            }
+            num_lines += 1;
+        }
+    } else {
+        // Multithreaded implementation for parsing data in parallel starts from here
+        // Generally in rust one variable only own a data at a time, but `Arc` keyword is special and allows for multiple threads to access the same data.
+        let case_indexes_original = Arc::new(case_indexes_original);
+        let control_indexes_original = Arc::new(control_indexes_original);
+        let buffer = Arc::new(buffer);
+        let case_indexes_temp = Arc::new(Mutex::new(Vec::<usize>::with_capacity(case_list.len())));
+        let control_indexes_temp =
+            Arc::new(Mutex::new(Vec::<usize>::with_capacity(control_list.len())));
+        let num_lines_temp = Arc::new(Mutex::<usize>::new(0));
+        let num_columns_temp = Arc::new(Mutex::<usize>::new(0));
+        let genes_names_temp = Arc::new(Mutex::new(Vec::<String>::new()));
+        let genes_symbols_temp = Arc::new(Mutex::new(Vec::<String>::new()));
+        let input_vector_temp = Arc::new(Mutex::new(Vec::<f64>::new()));
+        let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
+        println!("Number of threads used:{}", max_threads);
+        for thread_num in 0..max_threads {
+            let case_indexes_original = Arc::clone(&case_indexes_original);
+            let control_indexes_original = Arc::clone(&control_indexes_original);
+            let case_indexes_temp = Arc::clone(&case_indexes_temp);
+            let control_indexes_temp = Arc::clone(&control_indexes_temp);
+            let input_vector_temp = Arc::clone(&input_vector_temp);
+            let genes_names_temp = Arc::clone(&genes_names_temp);
+            let genes_symbols_temp = Arc::clone(&genes_symbols_temp);
+            let num_lines_temp = Arc::clone(&num_lines_temp);
+            let num_columns_temp = Arc::clone(&num_columns_temp);
+            let buffer = Arc::clone(&buffer);
+            let handle = thread::spawn(move || {
+                let mut case_indexes_thread: Vec<usize> = Vec::with_capacity(num_cases);
+                let mut control_indexes_thread: Vec<usize> = Vec::with_capacity(num_controls);
+                let mut genes_names_thread: Vec<String> = Vec::with_capacity(65000);
+                let mut genes_symbols_thread: Vec<String> = Vec::with_capacity(65000);
+                let mut input_vector_thread: Vec<f64> = Vec::with_capacity(65000);
+                let mut num_columns_thread: usize = 0;
+                let mut num_lines_thread: usize = 0;
+                let lines: Vec<&str> = buffer.split('\n').collect();
+                //println!("case_indexes_original:{:?}", case_indexes_original);
+                //println!("control_indexes:{:?}", control_indexes);
+                for line_iter in 1..total_lines - 1 {
+                    let remainder: usize = line_iter % max_threads; // Calculate remainder of line number divided by max_threads to decide which thread parses this line
+                    if remainder == thread_num {
+                        //println!("buffer:{}", buffer);
+                        // Thread analyzing a particular line must have the same remainder as the thread_num, this avoids multiple threads from parsing the same line
+                        let line = lines[line_iter];
+                        let mut index = 0;
+                        for field in line.split('\t').collect::<Vec<&str>>() {
+                            if index == gene_name_index.unwrap() {
+                                genes_names_thread.push(field.to_string());
+                            } else if index == gene_symbol_index.unwrap() {
+                                genes_symbols_thread.push(field.to_string());
+                            } else if binary_search(&case_indexes_original, index) != -1 {
+                                let num = FromStr::from_str(field);
+                                match num {
+                                    Ok(n) => {
+                                        //println!("n:{}", n);
+                                        input_vector_thread.push(n);
+                                        if line_iter == 1 {
+                                            case_indexes_thread.push(num_columns_thread);
+                                            num_columns_thread += 1;
+                                        }
+                                    }
+                                    Err(_n) => {
+                                        panic!(
+                                        "Number {} in line {} and column {} is not a decimal number",
+                                        field,
+                                        num_lines_thread + 1,
+                                        index + 1
+                                    );
+                                    }
+                                }
+                            } else if binary_search(&control_indexes_original, index) != -1 {
+                                let num = FromStr::from_str(field);
+                                match num {
+                                    Ok(n) => {
+                                        //println!("n:{}", n);
+                                        input_vector_thread.push(n);
+                                        if line_iter == 1 {
+                                            control_indexes_thread.push(num_columns_thread);
+                                            num_columns_thread += 1;
+                                        }
+                                    }
+                                    Err(_n) => {
+                                        panic!(
+                                        "Number {} in line {} and column {} is not a decimal number",
+                                        field,
+                                        num_lines_thread + 1,
+                                        index + 1
+                                    );
+                                    }
+                                }
+                            }
+                            index += 1;
                         }
-                    }
-                    Err(_n) => {
-                        panic!(
-                            "Number {} in line {} and column {} is not a decimal number",
-                            field,
-                            num_lines + 1,
-                            index + 1
-                        );
+                        num_lines_thread += 1;
                     }
                 }
-            }
-            index += 1;
+                input_vector_temp
+                    .lock()
+                    .unwrap()
+                    .append(&mut input_vector_thread);
+                case_indexes_temp
+                    .lock()
+                    .unwrap()
+                    .append(&mut case_indexes_thread);
+                control_indexes_temp
+                    .lock()
+                    .unwrap()
+                    .append(&mut control_indexes_thread);
+                genes_names_temp
+                    .lock()
+                    .unwrap()
+                    .append(&mut genes_names_thread);
+                genes_symbols_temp
+                    .lock()
+                    .unwrap()
+                    .append(&mut genes_symbols_thread);
+                *num_lines_temp.lock().unwrap() += num_lines_thread;
+                if num_columns_thread > 0 {
+                    *num_columns_temp.lock().unwrap() += num_columns_thread;
+                }
+                drop(input_vector_temp);
+                drop(case_indexes_temp);
+                drop(control_indexes_temp);
+                drop(genes_names_temp);
+                drop(genes_symbols_temp);
+                drop(num_lines_temp);
+                drop(num_columns_temp);
+            });
+            handles.push(handle); // The handle (which contains the thread) is stored in the handles vector
+        }
+        for handle in handles {
+            // Wait for all threads to finish before proceeding further
+            handle.join().unwrap();
         }
-        num_lines += 1;
+        // Combining data from all different threads
+        input_vector.append(&mut *input_vector_temp.lock().unwrap());
+        case_indexes.append(&mut *case_indexes_temp.lock().unwrap());
+        control_indexes.append(&mut *control_indexes_temp.lock().unwrap());
+        gene_names.append(&mut *genes_names_temp.lock().unwrap());
+        gene_symbols.append(&mut *genes_symbols_temp.lock().unwrap());
+        num_lines += *num_lines_temp.lock().unwrap();
+        num_columns += *num_columns_temp.lock().unwrap();
     }
     //println!("case_indexes:{:?}", case_indexes);
     //println!("control_indexes:{:?}", control_indexes);
+    //println!("num_lines:{}", num_lines);
+    //println!("num_columns:{}", num_columns);
+    //println!("num_lines * num_columns:{}", num_lines * num_columns);
+    //println!("input_vector:{:?}", input_vector.len());
+    println!("Time for inputting data:{:?}", input_time.elapsed());
     let dm = DMatrix::from_row_slice(num_lines, num_columns, &input_vector);
     //println!("dm:{:?}", dm);
     (dm, case_indexes, control_indexes, gene_names, gene_symbols)
@@ -198,6 +385,7 @@ fn main() {
                     let control_list: Vec<&str> = control_string.split(",").collect();
                     let (input_matrix, case_indexes, control_indexes, gene_names, gene_symbols) =
                         input_data(file_name, &case_list, &control_list);
+                    let filtering_time = Instant::now();
                     let (filtered_matrix, lib_sizes, filtered_genes, filtered_gene_symbols) =
                         filter_by_expr(
                             &input_matrix,
@@ -206,10 +394,21 @@ fn main() {
                             gene_names,
                             gene_symbols,
                         );
+                    println!("filtering time:{:?}", filtering_time.elapsed());
                     //println!("filtered_matrix_rows:{:?}", filtered_matrix.nrows());
                     //println!("filtered_matrix_cols:{:?}", filtered_matrix.ncols());
+                    let cpm_normalization_time = Instant::now();
                     let mut normalized_matrix = cpm(&filtered_matrix);
+                    println!(
+                        "cpm normalization time:{:?}",
+                        cpm_normalization_time.elapsed()
+                    );
+                    let tmm_normalization_time = Instant::now();
                     let norm_factors = tmm_normalization(filtered_matrix, &lib_sizes);
+                    println!(
+                        "tmm normalization time:{:?}",
+                        tmm_normalization_time.elapsed()
+                    );
                     //println!("norm_factors:{:?}", norm_factors);
                     for col in 0..normalized_matrix.ncols() {
@@ -232,58 +431,154 @@ fn main() {
                     //println!("case_indexes:{:?}", case_indexes);
                     //println!("control_indexes:{:?}", control_indexes);
-                    for i in 0..normalized_matrix.nrows() {
-                        let row = normalized_matrix.row(i);
-                        //println!("row:{:?}", row);
-                        let mut treated = Vec::<f64>::new();
-                        let mut control = Vec::<f64>::new();
-                        //println!("conditions:{:?}", conditions);
-                        for j in 0..(case_indexes.len() + control_indexes.len()) {
-                            //println!("row[(0, j)]:{}", row[(0, j)]);
-                            if case_indexes.contains(&j) {
-                                treated.push(row[(0, j)]);
-                                //println!("{},{}", input_data_vec.0[i][j], "Diseased");
-                            } else if control_indexes.contains(&j) {
-                                // + 1 was added because in the input file the first column of thw first row is blank as the first column consists of gene names
-                                control.push(row[(0, j)]);
-                                //println!("{},{}", input_data_vec.0[i][j], "Control");
-                            } else {
-                                panic!("Column {} could not be classified into case/control", j);
+                    let num_normalized_rows = normalized_matrix.nrows();
+                    if normalized_matrix.nrows() * normalized_matrix.ncols() < PAR_CUTOFF {
+                        for i in 0..normalized_matrix.nrows() {
+                            let row = normalized_matrix.row(i);
+                            //println!("row:{:?}", row);
+                            let mut treated = Vec::<f64>::new();
+                            let mut control = Vec::<f64>::new();
+                            //println!("conditions:{:?}", conditions);
+                            for j in 0..(case_indexes.len() + control_indexes.len()) {
+                                //println!("row[(0, j)]:{}", row[(0, j)]);
+                                if case_indexes.contains(&j) {
+                                    treated.push(row[(0, j)]);
+                                    //println!("{},{}", input_data_vec.0[i][j], "Diseased");
+                                } else if control_indexes.contains(&j) {
+                                    // + 1 was added because in the input file the first column of thw first row is blank as the first column consists of gene names
+                                    control.push(row[(0, j)]);
+                                    //println!("{},{}", input_data_vec.0[i][j], "Control");
+                                } else {
+                                    panic!(
+                                        "Column {} could not be classified into case/control",
+                                        j
+                                    );
+                                }
                             }
-                        }
-                        //println!("treated{:?}", treated);
-                        //println!("control{:?}", control);
-                        let p_value = wilcoxon_rank_sum_test(
-                            treated.clone(),
-                            control.clone(),
-                            THRESHOLD,
-                            't',
-                            true,
-                        ); // Setting continuity correction to true in case of normal approximation
-                        let treated_mean = Data::new(treated).mean();
-                        let control_mean = Data::new(control).mean();
-                        if (treated_mean.unwrap() / control_mean.unwrap())
-                            .log2()
-                            .is_nan()
-                            == false
-                            && (treated_mean.unwrap() / control_mean.unwrap())
+                            //println!("treated{:?}", treated);
+                            //println!("control{:?}", control);
+                            let p_value = stats_functions::wilcoxon_rank_sum_test(
+                                treated.clone(),
+                                control.clone(),
+                                THRESHOLD,
+                                't',
+                                true,
+                            ); // Setting continuity correction to true in case of normal approximation
+                            let treated_mean = Data::new(treated).mean();
+                            let control_mean = Data::new(control).mean();
+                            if (treated_mean.unwrap() / control_mean.unwrap())
                                 .log2()
-                                .is_infinite()
+                                .is_nan()
                                 == false
-                        {
-                            p_values.push(PValueIndexes {
-                                index: i,
-                                gene_name: filtered_genes[i].to_owned(),
-                                gene_symbol: filtered_gene_symbols[i].to_owned(),
-                                fold_change: (treated_mean.unwrap() / control_mean.unwrap()).log2(),
-                                p_value: p_value,
+                                && (treated_mean.unwrap() / control_mean.unwrap())
+                                    .log2()
+                                    .is_infinite()
+                                    == false
+                            {
+                                p_values.push(PValueIndexes {
+                                    index: i,
+                                    gene_name: filtered_genes[i].to_owned(),
+                                    gene_symbol: filtered_gene_symbols[i].to_owned(),
+                                    fold_change: (treated_mean.unwrap() / control_mean.unwrap())
+                                        .log2(),
+                                    p_value: p_value,
+                                });
+                            }
+                        }
+                    } else {
+                        // Multithreaded implementation of calculating wilcoxon p-values
+                        let normalized_matrix_temp = Arc::new(normalized_matrix);
+                        let filtered_genes_temp = Arc::new(filtered_genes);
+                        let filtered_gene_symbols_temp = Arc::new(filtered_gene_symbols);
+                        let case_indexes_temp = Arc::new(case_indexes);
+                        let control_indexes_temp = Arc::new(control_indexes);
+                        let p_values_temp = Arc::new(Mutex::new(Vec::<PValueIndexes>::new()));
+                        let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
+                        for thread_num in 0..max_threads {
+                            let normalized_matrix_temp = Arc::clone(&normalized_matrix_temp);
+                            let case_indexes_temp = Arc::clone(&case_indexes_temp);
+                            let control_indexes_temp = Arc::clone(&control_indexes_temp);
+                            let p_values_temp = Arc::clone(&p_values_temp);
+                            let filtered_genes_temp = Arc::clone(&filtered_genes_temp);
+                            let filtered_gene_symbols_temp =
+                                Arc::clone(&filtered_gene_symbols_temp);
+                            let handle = thread::spawn(move || {
+                                let mut p_values_thread: Vec<PValueIndexes> = Vec::with_capacity(
+                                    normalized_matrix_temp.nrows() / max_threads,
+                                );
+                                for i in 0..normalized_matrix_temp.nrows() {
+                                    let remainder: usize = i % max_threads; // Calculate remainder of iteration number divided by max_threads to decide which thread parses the row
+                                    if remainder == thread_num {
+                                        let row = normalized_matrix_temp.row(i);
+                                        //println!("row:{:?}", row);
+                                        let mut treated = Vec::<f64>::new();
+                                        let mut control = Vec::<f64>::new();
+                                        //println!("conditions:{:?}", conditions);
+                                        for j in 0..(case_indexes_temp.len()
+                                            + control_indexes_temp.len())
+                                        {
+                                            //println!("row[(0, j)]:{}", row[(0, j)]);
+                                            if case_indexes_temp.contains(&j) {
+                                                treated.push(row[(0, j)]);
+                                                //println!("{},{}", input_data_vec.0[i][j], "Diseased");
+                                            } else if control_indexes_temp.contains(&j) {
+                                                // + 1 was added because in the input file the first column of thw first row is blank as the first column consists of gene names
+                                                control.push(row[(0, j)]);
+                                                //println!("{},{}", input_data_vec.0[i][j], "Control");
+                                            } else {
+                                                panic!(
+                                        "Column {} could not be classified into case/control",
+                                        j
+                                    );
+                                            }
+                                        }
+                                        //println!("treated{:?}", treated);
+                                        //println!("control{:?}", control);
+                                        let p_value = stats_functions::wilcoxon_rank_sum_test(
+                                            treated.clone(),
+                                            control.clone(),
+                                            THRESHOLD,
+                                            't',
+                                            true,
+                                        ); // Setting continuity correction to true in case of normal approximation
+                                        let treated_mean = Data::new(treated).mean();
+                                        let control_mean = Data::new(control).mean();
+                                        if (treated_mean.unwrap() / control_mean.unwrap())
+                                            .log2()
+                                            .is_nan()
+                                            == false
+                                            && (treated_mean.unwrap() / control_mean.unwrap())
+                                                .log2()
+                                                .is_infinite()
+                                                == false
+                                        {
+                                            p_values_thread.push(PValueIndexes {
+                                                index: i,
+                                                gene_name: filtered_genes_temp[i].to_owned(),
+                                                gene_symbol: filtered_gene_symbols_temp[i]
+                                                    .to_owned(),
+                                                fold_change: (treated_mean.unwrap()
+                                                    / control_mean.unwrap())
+                                                .log2(),
+                                                p_value: p_value,
+                                            });
+                                        }
+                                    }
+                                }
+                                p_values_temp.lock().unwrap().append(&mut p_values_thread);
                             });
+                            handles.push(handle);
+                        }
+                        for handle in handles {
+                            // Wait for all threads to finish before proceeding further
+                            handle.join().unwrap();
                         }
+                        p_values.append(&mut *p_values_temp.lock().unwrap());
                     }
                     //println!("p_values:{:?}", p_values);
                     println!(
                         "Time for running {} wilcoxon tests:{:?}",
-                        normalized_matrix.nrows(),
+                        num_normalized_rows,
                         now2.elapsed()
                     );
                     let adjusted_p_values = adjust_p_values(p_values);
@@ -408,18 +703,62 @@ fn tmm_normalization(
         }
     }
     //println!("ref_column:{}", ref_column);
-    let ref_data = input_matrix.column(ref_column);
-    let ref_lib_size = lib_sizes[ref_column];
+    let num_cols = input_matrix.ncols();
     let mut f: Vec<f64> = Vec::with_capacity(input_matrix.ncols());
-    for col in 0..input_matrix.ncols() {
-        let obs_data = input_matrix.column(col);
-        let obs_lib_size = lib_sizes[col];
-        f.push(calc_factor_tmm(
-            obs_data,
-            &ref_data,
-            ref_lib_size,
-            obs_lib_size,
-        ));
+    if input_matrix.nrows() * input_matrix.ncols() < PAR_CUTOFF {
+        let ref_data = input_matrix.column(ref_column);
+        let ref_lib_size = lib_sizes[ref_column];
+        for col in 0..input_matrix.ncols() {
+            let obs_data = input_matrix.column(col);
+            let obs_lib_size = lib_sizes[col];
+            f.push(calc_factor_tmm(
+                obs_data,
+                &ref_data,
+                ref_lib_size,
+                obs_lib_size,
+            ));
+        }
+    } else {
+        // Multithreaded implementation of TMM normalization
+        let f_temp = Arc::new(Mutex::new(Vec::<f_index>::new()));
+        let lib_sizes_temp = Arc::new(lib_sizes.to_owned());
+        let input_matrix_temp = Arc::new(input_matrix);
+        let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
+        for thread_num in 0..max_threads {
+            let f_temp = Arc::clone(&f_temp);
+            let lib_sizes_temp = Arc::clone(&lib_sizes_temp);
+            let input_matrix_temp = Arc::clone(&input_matrix_temp);
+            let handle = thread::spawn(move || {
+                let mut f_thread: Vec<f_index> =
+                    Vec::with_capacity(input_matrix_temp.ncols() / max_threads);
+                let ref_data = input_matrix_temp.column(ref_column);
+                let ref_lib_size = lib_sizes_temp[ref_column];
+                for col in 0..input_matrix_temp.ncols() {
+                    let remainder: usize = col % max_threads; // Calculate remainder of column number divided by max_threads to decide which thread parses this column
+                    if remainder == thread_num {
+                        let obs_data = input_matrix_temp.column(col);
+                        let obs_lib_size = lib_sizes_temp[col];
+                        f_thread.push(f_index {
+                            f: calc_factor_tmm(obs_data, &ref_data, ref_lib_size, obs_lib_size),
+                            ind: col,
+                        })
+                    }
+                }
+                f_temp.lock().unwrap().append(&mut f_thread);
+            });
+            handles.push(handle);
+        }
+        for handle in handles {
+            // Wait for all threads to finish before proceeding further
+            handle.join().unwrap();
+        }
+        let mut f_orig: Vec<f_index> = Vec::with_capacity(num_cols);
+        f_orig.append(&mut *f_temp.lock().unwrap());
+        // Need to sort vector because the vector will not be ordered accord to ind because of multithreading
+        f_orig
+            .as_mut_slice()
+            .sort_by(|a, b| (a.ind).partial_cmp(&b.ind).unwrap_or(Ordering::Equal));
+        f = f_orig.into_iter().map(|x| x.f).collect::<Vec<f64>>();
     }
     const NATURAL_E: f64 = 2.718281828459;
     let log_f: Vec<f64> = f.clone().into_iter().map(|x| x.log(NATURAL_E)).collect();
@@ -427,6 +766,11 @@ fn tmm_normalization(
     let final_f: Vec<f64> = f.into_iter().map(|x| x / exp_mean_log_f).collect();
     final_f
 }
+#[allow(non_camel_case_types)]
+struct f_index {
+    f: f64,
+    ind: usize,
+}
 fn calc_factor_tmm(
     obs_data: Matrix<f64, Dyn, Const<1>, ViewStorage<'_, f64, Dyn, Const<1>, Const<1>, Dyn>>,
@@ -537,7 +881,7 @@ fn rank_vector(input_vector: &Vec<f64>) -> Vec<f64> {
                     rank: i as f64 + 1.0,
                 });
             } else {
-                frac_rank = calculate_frac_rank(i as f64 + 1.0, num_repeats);
+                frac_rank = stats_functions::calculate_frac_rank(i as f64 + 1.0, num_repeats);
                 ranks.push(RankOutput {
                     orig_index: input_vector_sorted[i].orig_index,
                     rank: frac_rank,
@@ -750,274 +1094,3 @@ fn cpm(
     //println!("output_matrix:{:?}", output_matrix);
     output_matrix
 }
-pub fn wilcoxon_rank_sum_test(
-    mut group1: Vec<f64>,
-    mut group2: Vec<f64>,
-    threshold: usize,
-    alternative: char,
-    correct: bool,
-) -> f64 {
-    // Check if there are any ties between the two groups
-    let mut combined = group1.clone();
-    combined.extend(group2.iter().cloned());
-    combined.sort_by(|a, b| a.partial_cmp(b).unwrap());
-    //println!("combined:{:?}", combined);
-    group1.sort_by(|a, b| a.partial_cmp(b).unwrap());
-    group2.sort_by(|a, b| a.partial_cmp(b).unwrap());
-    //println!("group1:{:?}", group1);
-    //println!("group2:{:?}", group2);
-    let mut group1_iter = 0;
-    let mut group2_iter = 0;
-    let mut xy: Vec<char> = Vec::with_capacity(combined.len()); // Stores X-Y classification
-    let mut ranks: Vec<f64> = Vec::with_capacity(combined.len()); // Stores the rank of each element
-    let mut is_repeat = false;
-    let mut repeat_present = false;
-    let mut frac_rank: f64 = 0.0;
-    let mut num_repeats: f64 = 1.0;
-    let mut repeat_iter: f64 = 1.0;
-    #[allow(unused_variables)]
-    let mut weight_x: f64 = 0.0;
-    let mut weight_y: f64 = 0.0;
-    let mut group_char: char = 'X';
-    let mut rank_frequencies: Vec<f64> = Vec::with_capacity(combined.len());
-    for i in 0..combined.len() {
-        //println!("group1_iter:{}", group1_iter);
-        //println!("group2_iter:{}", group2_iter);
-        //println!("item1:{}", combined[i]);
-        //println!("is_repeat:{}", is_repeat);
-        if group1_iter < group1.len() && combined[i] == group1[group1_iter] {
-            xy.push('X');
-            group1_iter += 1;
-            group_char = 'X';
-        } else if group2_iter < group2.len() && combined[i] == group2[group2_iter] {
-            xy.push('Y');
-            group2_iter += 1;
-            group_char = 'Y';
-        }
-        // Computing ranks
-        if is_repeat == false {
-            // Check if current element has other occurences
-            num_repeats = 1.0;
-            for j in i + 1..combined.len() {
-                if combined[i] == combined[j] {
-                    is_repeat = true;
-                    repeat_present = true;
-                    repeat_iter = 1.0;
-                    num_repeats += 1.0;
-                } else {
-                    break;
-                }
-            }
-            //println!("num_repeats:{}", num_repeats);
-            if is_repeat == false {
-                ranks.push(i as f64 + 1.0);
-                if group_char == 'X' {
-                    weight_x += i as f64 + 1.0;
-                } else if group_char == 'Y' {
-                    weight_y += i as f64 + 1.0;
-                }
-                //rank_frequencies.push(RankFreq {
-                //    rank: i as f64 + 1.0,
-                //    freq: 1,
-                //});
-                rank_frequencies.push(1.0);
-            } else {
-                frac_rank = calculate_frac_rank(i as f64 + 1.0, num_repeats);
-                ranks.push(frac_rank);
-                if group_char == 'X' {
-                    weight_x += frac_rank;
-                } else if group_char == 'Y' {
-                    weight_y += frac_rank
-                }
-                //rank_frequencies.push(RankFreq {
-                //    rank: frac_rank,
-                //    freq: num_repeats as usize,
-                //});
-                rank_frequencies.push(num_repeats);
-            }
-        } else if repeat_iter < num_repeats {
-            // Repeat case
-            ranks.push(frac_rank);
-            repeat_iter += 1.0;
-            if group_char == 'X' {
-                weight_x += frac_rank;
-            } else if group_char == 'Y' {
-                weight_y += frac_rank
-            }
-            if repeat_iter == num_repeats {
-                is_repeat = false;
-            }
-        } else {
-            //println!("i:{}", i);
-            ranks.push(i as f64 + 1.0);
-            repeat_iter = 1.0;
-            num_repeats = 1.0;
-            if group_char == 'X' {
-                weight_x += i as f64 + 1.0;
-            } else if group_char == 'Y' {
-                weight_y += i as f64 + 1.0;
-            }
-        }
-    }
-    //println!("rank_frequencies:{:?}", rank_frequencies);
-    //println!("xy:{:?}", xy);
-    //println!("ranks:{:?}", ranks);
-    //println!("weight_x:{}", weight_x);
-    //println!("weight_y:{}", weight_y);
-    //u_dash (calculated below) calculates the "W Statistic" in wilcox.test function in R
-    let u_y = weight_y - (group2.len() as f64 * (group2.len() as f64 + 1.0) / 2.0) as f64;
-    let u_dash_y = (u_y - (group1.len() * group2.len()) as f64).abs();
-    //println!("u_dash_y:{}", u_dash_y);
-    let u_x = weight_x - (group1.len() as f64 * (group1.len() as f64 + 1.0) / 2.0) as f64;
-    let _u_dash_x = (u_x - (group1.len() * group2.len()) as f64).abs();
-    //println!("u_dash_x:{}", u_dash_x);
-    // Calculate test_statistic
-    //let t1 = weight_x - ((group1.len() as f64) * (group1.len() as f64 + 1.0)) / 2.0;
-    //let t2 = weight_y - ((group2.len() as f64) * (group2.len() as f64 + 1.0)) / 2.0;
-    //
-    //let mut test_statistic = t1;
-    //if t2 < t1 {
-    //    test_statistic = t2;
-    //}
-    //println!("test_statistic:{}", test_statistic);
-    if group1.len() < threshold && group2.len() < threshold && repeat_present == false {
-        // Compute exact p-values
-        // Calculate conditional probability for weight_y
-        if alternative == 'g' {
-            // Alternative "greater"
-            //if group1.len() <= low_cutoff && group2.len() <= low_cutoff {
-            //    iterate_exact_p_values(ranks, weight_y, group2.len())
-            //} else {
-            calculate_exact_probability(u_dash_y, group1.len(), group2.len(), alternative)
-            //}
-        } else if alternative == 'l' {
-            // Alternative "lesser"
-            //if group1.len() <= low_cutoff && group2.len() <= low_cutoff {
-            //    iterate_exact_p_values(ranks, weight_x, group1.len())
-            //} else {
-            calculate_exact_probability(u_dash_y, group1.len(), group2.len(), alternative)
-            //}
-        } else {
-            // Two-sided distribution
-            calculate_exact_probability(u_dash_y, group1.len(), group2.len(), alternative)
-        }
-    } else {
-        // Compute p-values from a normal distribution
-        //println!("group1 length:{}", group1.len());
-        //println!("group2 length:{}", group2.len());
-        let mut z = u_dash_y - ((group1.len() * group2.len()) as f64) / 2.0;
-        //println!("z_original:{}", z);
-        let mut nties_sum: f64 = 0.0;
-        for i in 0..rank_frequencies.len() {
-            nties_sum += rank_frequencies[i] * rank_frequencies[i] * rank_frequencies[i]
-                - rank_frequencies[i];
-        }
-        let sigma = (((group1.len() * group2.len()) as f64) / 12.0
-            * ((group1.len() + group2.len() + 1) as f64
-                - nties_sum
-                    / (((group1.len() + group2.len()) as f64)
-                        * ((group1.len() + group2.len() - 1) as f64))))
-            .sqrt();
-        //println!("sigma:{}", sigma);
-        let mut correction: f64 = 0.0;
-        if correct == true {
-            if alternative == 'g' {
-                // Alternative "greater"
-                correction = 0.5;
-            } else if alternative == 'l' {
-                // Alternative "lesser"
-                correction = -0.5;
-            } else {
-                // Alternative "two-sided"
-                if z > 0.0 {
-                    correction = 0.5;
-                } else if z < 0.0 {
-                    correction = -0.5;
-                } else {
-                    // z=0
-                    correction = 0.0;
-                }
-            }
-        }
-        z = (z - correction) / sigma;
-        //println!("z:{}", z);
-        if alternative == 'g' {
-            // Alternative "greater"
-            //println!("greater:{}", n.cdf(weight_y));
-            //1.0 - n.cdf(z) // Applying continuity correction
-            r_mathlib::normal_cdf(z, 0.0, 1.0, false, false)
-        } else if alternative == 'l' {
-            // Alternative "lesser"
-            //println!("lesser:{}", n.cdf(weight_x));
-            //n.cdf(z) // Applying continuity coorection
-            r_mathlib::normal_cdf(z, 0.0, 1.0, true, false)
-        } else {
-            // Alternative "two-sided"
-            let p_g = r_mathlib::normal_cdf(z, 0.0, 1.0, false, false); // Applying continuity correction
-            let p_l = r_mathlib::normal_cdf(z, 0.0, 1.0, true, false); // Applying continuity correction
-            let mut p_value;
-            if p_g < p_l {
-                p_value = 2.0 * p_g;
-            } else {
-                p_value = 2.0 * p_l;
-            }
-            //println!("p_value:{}", p_value);
-            if p_value > 1.0 {
-                p_value = 1.0;
-            }
-            p_value
-        }
-    }
-}
-// To be used only when there are no ties in the input data
-#[allow(dead_code)]
-fn calculate_exact_probability(weight: f64, x: usize, y: usize, alternative: char) -> f64 {
-    //println!("Using Wilcoxon CDF");
-    let mut p_value;
-    if alternative == 't' {
-        if weight > ((x * y) as f64) / 2.0 {
-            p_value = 2.0 * r_mathlib::wilcox_cdf(weight - 1.0, x as f64, y as f64, false, false);
-        } else {
-            p_value = 2.0 * r_mathlib::wilcox_cdf(weight, x as f64, y as f64, true, false);
-        }
-        if p_value > 1.0 {
-            p_value = 1.0;
-        }
-    } else if alternative == 'g' {
-        p_value = r_mathlib::wilcox_cdf(weight - 1.0, x as f64, y as f64, false, false);
-    } else if alternative == 'l' {
-        p_value = r_mathlib::wilcox_cdf(weight, x as f64, y as f64, true, false);
-    } else {
-        // Should not happen
-        panic!("Unknown alternative option given, please check!");
-    }
-    //println!("p_value:{}", p_value);
-    p_value
-}
-#[allow(dead_code)]
-pub fn calculate_frac_rank(current_rank: f64, num_repeats: f64) -> f64 {
-    let mut sum = 0.0;
-    for i in 0..num_repeats as usize {
-        let rank = current_rank + i as f64;
-        sum += rank;
-    }
-    sum / num_repeats
-}