@sjcrh/proteinpaint-rust 2.59.0 → 2.60.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.59.0",
2
+ "version": "2.60.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
@@ -38,5 +38,5 @@
38
38
  "devDependencies": {
39
39
  "tape": "^5.2.2"
40
40
  },
41
- "pp_release_tag": "v2.59.0"
41
+ "pp_release_tag": "v2.60.0"
42
42
  }
package/src/DEanalysis.rs CHANGED
@@ -1,4 +1,4 @@
1
- // cd .. && cargo build --release && json='{"case":"SJMB030827,SJMB030838,SJMB032893,SJMB031131,SJMB031227","control":"SJMB030488,SJMB030825,SJMB031110","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | target/release/DEanalysis
1
+ // cd .. && cargo build --release && json='{"min_count":10,"min_total_count":15,"case":"SJMB030827,SJMB030838,SJMB032893,SJMB031131,SJMB031227","control":"SJMB030488,SJMB030825,SJMB031110","input_file":"/Users/rpaul1/pp_data/files/hg38/sjmb12/rnaseq/geneCounts.txt"}' && time echo $json | target/release/DEanalysis
2
2
  // cd .. && cargo build --release && time cat ~/sjpp/test.txt | target/release/DEanalysis
3
3
  #![allow(non_snake_case)]
4
4
  use json;
@@ -368,6 +368,22 @@ fn main() {
368
368
  match input_json {
369
369
  Ok(json_string) => {
370
370
  let now = Instant::now();
371
+ let min_count_option = json_string["min_count"].as_f64().to_owned();
372
+ let min_total_count_option = json_string["min_total_count"].as_f64().to_owned();
373
+ let min_count;
374
+ match min_count_option {
375
+ Some(x) => min_count = x,
376
+ None => {
377
+ panic!("min_count is missing a value")
378
+ }
379
+ }
380
+ let min_total_count;
381
+ match min_total_count_option {
382
+ Some(x) => min_total_count = x,
383
+ None => {
384
+ panic!("min_total_count is missing a value")
385
+ }
386
+ }
371
387
  let case_string = &json_string["case"].to_owned().as_str().unwrap().to_string();
372
388
  let control_string = &json_string["control"]
373
389
  .to_owned()
@@ -388,6 +404,8 @@ fn main() {
388
404
  let filtering_time = Instant::now();
389
405
  let (filtered_matrix, lib_sizes, filtered_genes, filtered_gene_symbols) =
390
406
  filter_by_expr(
407
+ min_count,
408
+ min_total_count,
391
409
  &input_matrix,
392
410
  case_indexes.len(),
393
411
  control_indexes.len(),
@@ -671,6 +689,7 @@ fn adjust_p_values_bonferroni(original_p_values: Vec<PValueIndexes>) -> Vec<Adju
671
689
  adjusted_p_values
672
690
  }
673
691
 
692
+ // Original TMM normalization source code in edgeR: https://rdrr.io/bioc/edgeR/src/R/calcNormFactors.R
674
693
  fn tmm_normalization(
675
694
  input_matrix: Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>,
676
695
  lib_sizes: &Vec<f64>,
@@ -947,7 +966,10 @@ fn calc_quantile(mut input: Vec<f64>, p: f64) -> f64 {
947
966
  qs_final
948
967
  }
949
968
 
969
+ // Original filterByExpr source code in edgeR: https://rdrr.io/bioc/edgeR/src/R/filterByExpr.R
950
970
  fn filter_by_expr(
971
+ min_count: f64,
972
+ min_total_count: f64,
951
973
  raw_data: &Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>,
952
974
  num_diseased: usize,
953
975
  num_control: usize,
@@ -960,9 +982,8 @@ fn filter_by_expr(
960
982
  Vec<String>,
961
983
  ) {
962
984
  // Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>
963
- #[allow(non_upper_case_globals)]
964
- const MIN_COUNT: f64 = 10.0; // Value of constant from R implementation
965
- const MIN_TOTAL_COUNT: f64 = 15.0; // Value of constant from R implementation
985
+ //const min_count: f64 = 10.0; // Value of constant from R implementation
986
+ //const min_total_count: f64 = 15.0; // Value of constant from R implementation
966
987
  const LARGE_N: f64 = 10.0; // Value of constant from R implementation
967
988
  const MIN_PROP: f64 = 0.7; // Value of constant from R implementation
968
989
 
@@ -989,7 +1010,7 @@ fn filter_by_expr(
989
1010
  //println!("lib_sizes:{:?}", lib_sizes);
990
1011
  //println!("min_sample_size:{}", min_sample_size);
991
1012
  let median_lib_size = Data::new(lib_sizes.clone()).median();
992
- let cpm_cutoff = (MIN_COUNT / median_lib_size) * 1000000.0;
1013
+ let cpm_cutoff = (min_count / median_lib_size) * 1000000.0;
993
1014
  //println!("cpm_cutoff:{}", cpm_cutoff);
994
1015
  let cpm_matrix = cpm(&raw_data);
995
1016
  const TOL: f64 = 1e-14; // Value of constant from R implementation
@@ -1018,7 +1039,7 @@ fn filter_by_expr(
1018
1039
  //}
1019
1040
 
1020
1041
  let mut keep_total_bool = false;
1021
- if row_sums[(row, 0)] as f64 >= MIN_TOTAL_COUNT - TOL {
1042
+ if row_sums[(row, 0)] as f64 >= min_total_count - TOL {
1022
1043
  keep_total_bool = true;
1023
1044
  //keep_total.push(keep_total_bool);
1024
1045
  //positive_total += 1;
package/src/genesetORA.rs CHANGED
@@ -112,12 +112,12 @@ fn main() -> Result<()> {
112
112
  + &genesetgroup
113
113
  + "'"),
114
114
  );
115
+ let mut iter = 0;
115
116
  match stmt_result {
116
117
  Ok(mut stmt) => {
117
118
  #[allow(non_snake_case)]
118
119
  let GO_iter =
119
120
  stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
120
- let mut iter = 0;
121
121
  #[allow(non_snake_case)]
122
122
  for GO_term in GO_iter {
123
123
  iter += 1;
@@ -178,14 +178,15 @@ fn main() -> Result<()> {
178
178
  }
179
179
  }
180
180
  }
181
- println!("Number of pathway entries:{}", iter);
182
181
  }
183
182
  Err(_) => panic!("sqlite database file not found"),
184
183
  }
185
- println!(
186
- "pathway_p_values:{}",
187
- adjust_p_values(pathway_p_values, num_items_output)
188
- );
184
+ let output_string = "{\"num_pathways\":".to_string()
185
+ + &iter.to_string()
186
+ + &",\"pathways\":"
187
+ + &adjust_p_values(pathway_p_values, num_items_output)
188
+ + &"}";
189
+ println!("pathway_p_values:{}", output_string);
189
190
  println!(
190
191
  "Time for calculating gene overrepresentation:{:?}",
191
192
  run_time.elapsed()
@@ -199,7 +200,10 @@ fn main() -> Result<()> {
199
200
  Ok(())
200
201
  }
201
202
 
202
- fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>, num_items_output: usize) -> String {
203
+ fn adjust_p_values(
204
+ mut original_p_values: Vec<pathway_p_value>,
205
+ mut num_items_output: usize,
206
+ ) -> String {
203
207
  // Sorting p-values in ascending order
204
208
  original_p_values.as_mut_slice().sort_by(|a, b| {
205
209
  (a.p_value_original)
@@ -243,6 +247,10 @@ fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>, num_items_output
243
247
  .unwrap_or(Ordering::Equal)
244
248
  });
245
249
 
250
+ if num_items_output > adjusted_p_values.len() {
251
+ num_items_output = adjusted_p_values.len()
252
+ }
253
+
246
254
  let mut output_string = "[".to_string();
247
255
  for i in 0..num_items_output {
248
256
  let j = adjusted_p_values.len() - i - 1;