npm - @sjcrh/proteinpaint-rust - Versions diffs - 2.61.1 → 2.73.0 - Mend

@sjcrh/proteinpaint-rust 2.61.1 → 2.73.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (2) hide show

package/package.json +2 -2
package/src/topGeneByExpressionVariance.rs +50 -20

package/package.json CHANGED Viewed

@@ -1,5 +1,5 @@
 {
-	"version": "2.61.1",
+	"version": "2.73.0",
 	"name": "@sjcrh/proteinpaint-rust",
 	"description": "Rust-based utilities for proteinpaint",
 	"main": "index.js",
@@ -38,5 +38,5 @@
 	"devDependencies": {
 		"tape": "^5.2.2"
 	},
-	"pp_release_tag": "v2.61.1"
+	"pp_release_tag": "v2.73.0"
 }

package/src/topGeneByExpressionVariance.rs CHANGED Viewed

@@ -6,9 +6,9 @@ Various JSON parameters:
    input_file: Path to input file
    filter_extreme_values: boolean (true/false). When true, this filter according to logic filterbyExpr in edgeR. This basically removes genes that have very low gene counts.
    num_genes: The top num_genes (for e.g 10) that need to be reported in the output.
-   param: var/iqr . This parameter decides whether to sort genes using variance or interquartile region. There is an article which states that its better to use interquartile region than variance for selecting genes for clustering https://www.frontiersin.org/articles/10.3389/fgene.2021.632620/full
+   rank_type: var/iqr . This parameter decides whether to sort genes using variance or interquartile region. There is an article which states that its better to use interquartile region than variance for selecting genes for clustering https://www.frontiersin.org/articles/10.3389/fgene.2021.632620/full
- Example syntax: cd .. && cargo build --release && json='{"samples":"sample1,sample2,sample3","input_file":"/path/to/input/file","filter_extreme_values":true,"num_genes":100, "param":"var"}' && time echo $json | target/release/gene_variance
+ Example syntax: cd .. && cargo build --release && json='{"samples":"sample1,sample2,sample3","min_count":30,"min_total_count":20,"input_file":"/path/to/input/file","filter_extreme_values":true,"num_genes":100, "rank_type":"var"}' && time echo $json | target/release/gene_variance
 */
 #![allow(non_snake_case)]
 use bgzip::BGZFReader;
@@ -103,7 +103,7 @@ fn input_data(
 #[derive(Debug, Serialize, Deserialize)]
 struct GeneInfo {
     gene_symbol: String,
-    param: f64,
+    rank_type: f64,
 }
 fn calculate_variance(
@@ -111,10 +111,22 @@ fn calculate_variance(
     gene_symbols: Vec<String>,
     mut min_sample_size: f64,
     filter_extreme_values: bool,
-    param: String,
+    rank_type: String,
+    min_count_option: Option<f64>,
+    min_total_count_option: Option<f64>,
 ) -> Vec<GeneInfo> {
-    const MIN_COUNT: f64 = 10.0; // Value of constant from R implementation
-    const MIN_TOTAL_COUNT: f64 = 15.0; // Value of constant from R implementation
+    let mut min_count: f64 = 10.0;
+    match min_count_option {
+        Some(x) => min_count = x,
+        None => {}
+    }
+    let mut min_total_count: f64 = 15.0;
+    match min_total_count_option {
+        Some(x) => min_total_count = x,
+        None => {}
+    }
+    //const MIN_COUNT: f64 = 10.0; // Value of constant from R implementation
+    //const MIN_TOTAL_COUNT: f64 = 15.0; // Value of constant from R implementation
     const LARGE_N: f64 = 10.0; // Value of constant from R implementation
     const MIN_PROP: f64 = 0.7; // Value of constant from R implementation
@@ -135,7 +147,7 @@ fn calculate_variance(
     //println!("lib_sizes:{:?}", lib_sizes);
     //println!("min_sample_size:{}", min_sample_size);
     let median_lib_size = Data::new(lib_sizes.clone()).median();
-    let cpm_cutoff = (MIN_COUNT / median_lib_size) * 1000000.0;
+    let cpm_cutoff = (min_count / median_lib_size) * 1000000.0;
     //println!("cpm_cutoff:{}", cpm_cutoff);
     let cpm_matrix = cpm(&input_matrix);
     const TOL: f64 = 1e-14; // Value of constant from R implementation
@@ -157,7 +169,7 @@ fn calculate_variance(
         }
         let mut keep_total_bool = false;
-        if row_sums[(row, 0)] as f64 >= MIN_TOTAL_COUNT - TOL {
+        if row_sums[(row, 0)] as f64 >= min_total_count - TOL {
             keep_total_bool = true;
             //keep_total.push(keep_total_bool);
             //positive_total += 1;
@@ -167,7 +179,7 @@ fn calculate_variance(
         for col in 0..input_matrix.ncols() {
             gene_counts.push(input_matrix[(row, col)]);
         }
-        if param == "var" {
+        if rank_type == "var" {
             // Calculating variance
             if gene_counts.clone().variance().is_nan() == true {
             } else if filter_extreme_values == true
@@ -175,12 +187,12 @@ fn calculate_variance(
                 && keep_total_bool == true
             {
                 gene_infos.push(GeneInfo {
-                    param: gene_counts.variance(),
+                    rank_type: gene_counts.variance(),
                     gene_symbol: gene_symbols[row].clone(),
                 });
             } else if filter_extreme_values == false {
                 gene_infos.push(GeneInfo {
-                    param: gene_counts.variance(),
+                    rank_type: gene_counts.variance(),
                     gene_symbol: gene_symbols[row].clone(),
                 });
             }
@@ -193,20 +205,22 @@ fn calculate_variance(
                 && keep_total_bool == true
             {
                 gene_infos.push(GeneInfo {
-                    param: gene_counts_data.interquartile_range(),
+                    rank_type: gene_counts_data.interquartile_range(),
                     gene_symbol: gene_symbols[row].clone(),
                 });
             } else if filter_extreme_values == false {
                 gene_infos.push(GeneInfo {
-                    param: gene_counts_data.interquartile_range(),
+                    rank_type: gene_counts_data.interquartile_range(),
                     gene_symbol: gene_symbols[row].clone(),
                 });
             }
         }
     }
-    gene_infos
-        .as_mut_slice()
-        .sort_by(|a, b| (a.param).partial_cmp(&b.param).unwrap_or(Ordering::Equal));
+    gene_infos.as_mut_slice().sort_by(|a, b| {
+        (a.rank_type)
+            .partial_cmp(&b.rank_type)
+            .unwrap_or(Ordering::Equal)
+    });
     gene_infos
 }
@@ -264,14 +278,14 @@ fn main() {
                         }
                     }
-                    let param = &json_string["param"] // Value provide must be either "var" or "iqr"
+                    let rank_type = &json_string["rank_type"] // Value provide must be either "var" or "iqr"
                         .to_owned()
                         .as_str()
                         .unwrap()
                         .to_string();
-                    if param != "var" && param != "iqr" {
+                    if rank_type != "var" && rank_type != "iqr" {
                         // Check if any unknown method has been provided
-                        panic!("Unknown method:{}", param);
+                        panic!("Unknown method:{}", rank_type);
                     }
                     let filter_extreme_values_result = &json_string["filter_extreme_values"];
@@ -296,6 +310,20 @@ fn main() {
                         }
                     }
+                    let min_count_result = &json_string["min_count"];
+                    let mut min_count: Option<f64> = None;
+                    match min_count_result.as_f64() {
+                        Some(x) => min_count = Some(x),
+                        None => {}
+                    }
+                    let min_total_count_result = &json_string["min_total_count"];
+                    let mut min_total_count: Option<f64> = None;
+                    match min_total_count_result.as_f64() {
+                        Some(x) => min_total_count = Some(x),
+                        None => {}
+                    }
                     let samples_list: Vec<&str> = samples_string.split(",").collect();
                     let (input_matrix, gene_symbols) = input_data(&file_name, &samples_list);
                     let gene_infos = calculate_variance(
@@ -303,7 +331,9 @@ fn main() {
                         gene_symbols,
                         samples_list.len() as f64,
                         filter_extreme_values,
-                        param.to_string(),
+                        rank_type.to_string(),
+                        min_count,
+                        min_total_count,
                     );
                     //println!("gene_infos:{:?}", gene_infos);