@sjcrh/proteinpaint-rust 2.129.1-80343740e.0 → 2.129.6-2b2fdc7ee.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,6 +1,9 @@
1
+ #![allow(non_snake_case)]
1
2
  use fishers_exact::fishers_exact;
2
3
  //use r_mathlib;
4
+ use r_mathlib::chi_squared_cdf;
3
5
  use statrs::distribution::{ChiSquared, ContinuousCDF};
6
+ use std::collections::HashSet;
4
7
  use std::panic;
5
8
 
6
9
  #[allow(dead_code)]
@@ -99,24 +102,19 @@ fn chi_square_test(
99
102
  {
100
103
  0.05 // Arbitarily put a very high number when there are only forward or reverse reads for alternate/reference
101
104
  } else {
102
- let total: f64 = (alternate_forward_count
103
- + alternate_reverse_count
104
- + reference_forward_count
105
- + reference_reverse_count) as f64;
106
- let expected_alternate_forward_count: f64 = (alternate_forward_count
107
- + alternate_reverse_count) as f64
105
+ let total: f64 =
106
+ (alternate_forward_count + alternate_reverse_count + reference_forward_count + reference_reverse_count)
107
+ as f64;
108
+ let expected_alternate_forward_count: f64 = (alternate_forward_count + alternate_reverse_count) as f64
108
109
  * (alternate_forward_count + reference_forward_count) as f64
109
110
  / total;
110
- let expected_alternate_reverse_count: f64 = (alternate_forward_count
111
- + alternate_reverse_count) as f64
111
+ let expected_alternate_reverse_count: f64 = (alternate_forward_count + alternate_reverse_count) as f64
112
112
  * (alternate_reverse_count + reference_reverse_count) as f64
113
113
  / total;
114
- let expected_reference_forward_count: f64 = (alternate_forward_count
115
- + reference_forward_count) as f64
114
+ let expected_reference_forward_count: f64 = (alternate_forward_count + reference_forward_count) as f64
116
115
  * (reference_forward_count + reference_reverse_count) as f64
117
116
  / total;
118
- let expected_reference_reverse_count: f64 = (reference_forward_count
119
- + reference_reverse_count) as f64
117
+ let expected_reference_reverse_count: f64 = (reference_forward_count + reference_reverse_count) as f64
120
118
  * (alternate_reverse_count + reference_reverse_count) as f64
121
119
  / total;
122
120
 
@@ -315,15 +313,12 @@ pub fn wilcoxon_rank_sum_test(
315
313
  //println!("z_original:{}", z);
316
314
  let mut nties_sum: f64 = 0.0;
317
315
  for i in 0..rank_frequencies.len() {
318
- nties_sum += rank_frequencies[i] * rank_frequencies[i] * rank_frequencies[i]
319
- - rank_frequencies[i];
316
+ nties_sum += rank_frequencies[i] * rank_frequencies[i] * rank_frequencies[i] - rank_frequencies[i];
320
317
  }
321
318
 
322
319
  let sigma = (((group1.len() * group2.len()) as f64) / 12.0
323
320
  * ((group1.len() + group2.len() + 1) as f64
324
- - nties_sum
325
- / (((group1.len() + group2.len()) as f64)
326
- * ((group1.len() + group2.len() - 1) as f64))))
321
+ - nties_sum / (((group1.len() + group2.len()) as f64) * ((group1.len() + group2.len() - 1) as f64))))
327
322
  .sqrt();
328
323
  //println!("sigma:{}", sigma);
329
324
  let mut correction: f64 = 0.0;
@@ -412,3 +407,82 @@ pub fn calculate_frac_rank(current_rank: f64, num_repeats: f64) -> f64 {
412
407
  }
413
408
  sum / num_repeats
414
409
  }
410
+
411
+ #[allow(non_camel_case_types)]
412
+ #[allow(non_snake_case)]
413
+ #[derive(Debug, Clone, PartialEq, PartialOrd)]
414
+ pub struct gene_order {
415
+ pub gene_name: String,
416
+ pub fold_change: f32,
417
+ pub rank: Option<usize>,
418
+ }
419
+
420
+ #[allow(dead_code)]
421
+ pub fn cerno(
422
+ genes_descending: &Vec<gene_order>,
423
+ genes_ascending: &Vec<gene_order>,
424
+ genes_in_pathway: HashSet<String>,
425
+ ) -> (f32, f32, f32, f32, String, f32) {
426
+ // Ensure sample_coding_genes is sorted in decreasing order of fold_change
427
+ // Filter the genes_descending vector to only include those whose gene_names are in the HashSet genes_in_pathway
428
+ let gene_intersections_descending: Vec<&gene_order> = genes_descending
429
+ .iter()
430
+ .filter(|genes_descending| genes_in_pathway.contains(&genes_descending.gene_name)) // Check if name is in the HashSet genes_in_pathway
431
+ .collect(); // Collect the results into a new vector
432
+
433
+ let N1 = gene_intersections_descending.len() as f32;
434
+ let N = genes_descending.len() as f32;
435
+ let mut gene_set_hits: String = "".to_string();
436
+ for gene in &gene_intersections_descending {
437
+ gene_set_hits += &(gene.gene_name.to_string() + &",");
438
+ }
439
+ if gene_intersections_descending.len() > 0 {
440
+ // Remove the last "," in string
441
+ gene_set_hits.pop();
442
+ }
443
+
444
+ let ranks_descending: Vec<usize> = gene_intersections_descending // x <- l %in% mset$gs2gv[[m]] ; ranks <- c(1:N)[x]
445
+ .iter()
446
+ .map(|x| x.rank.unwrap())
447
+ .collect::<Vec<usize>>();
448
+
449
+ let cerno: f32 = ranks_descending // -2 * sum( log(ranks/N) )
450
+ .iter()
451
+ .map(|x| ((*x as f32) / N).ln())
452
+ .collect::<Vec<f32>>()
453
+ .iter()
454
+ .sum::<f32>()
455
+ * (-2.0);
456
+
457
+ let cES;
458
+ let N2 = N - N1; // N2 = N - N1
459
+ let R1 = ranks_descending.iter().sum::<usize>() as f32; // R1 <- sum(ranks)
460
+ let U = N1 * N2 + N1 * (N1 + 1.0) / 2.0 - R1; // U <- N1*N2+N1*(N1+1)/2-R1
461
+ let AUC = U / (N1 * N2); // AUC <- U/(N1*N2)
462
+ let p_value;
463
+ if AUC >= 0.5 {
464
+ // Upregulated geneset
465
+ cES = cerno / (2.0 * (N1 as f32)); // cES <- cerno/(2*N1)
466
+ p_value = chi_squared_cdf(cerno as f64, (2.0 * N1) as f64, false, false);
467
+ // pchisq(ret$cerno, 2*N1, lower.tail=FALSE)
468
+ } else {
469
+ let gene_intersections_ascending: Vec<&gene_order> = genes_ascending
470
+ .iter()
471
+ .filter(|genes_ascending| genes_in_pathway.contains(&genes_ascending.gene_name)) // Check if name is in the HashSet genes_in_pathway
472
+ .collect(); // Collect the results into a new vector
473
+ let ranks_ascending: Vec<usize> = gene_intersections_ascending // x <- l %in% mset$gs2gv[[m]] ; ranks <- c(1:N)[x]
474
+ .iter()
475
+ .map(|x| x.rank.unwrap())
476
+ .collect::<Vec<usize>>();
477
+ let cerno_ascending: f32 = ranks_ascending // -2 * sum( log(ranks/N) )
478
+ .iter()
479
+ .map(|x| ((*x as f32) / N).ln())
480
+ .collect::<Vec<f32>>()
481
+ .iter()
482
+ .sum::<f32>()
483
+ * (-2.0);
484
+ cES = cerno_ascending / (2.0 * (N1 as f32)); // cES <- cerno/(2*N1)
485
+ p_value = chi_squared_cdf(cerno_ascending as f64, (2.0 * N1) as f64, false, false);
486
+ }
487
+ (p_value as f32, AUC, cES, N1, gene_set_hits, cerno)
488
+ }