@sjcrh/proteinpaint-rust 2.129.1-80343740e.0 → 2.129.6-2b2fdc7ee.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +2 -2
- package/package.json +1 -1
- package/src/cerno.rs +29 -60
- package/src/gdcGRIN2.rs +333 -160
- package/src/stats_functions.rs +91 -17
- package/src/test_cerno.rs +21214 -0
package/src/stats_functions.rs
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
#![allow(non_snake_case)]
|
|
1
2
|
use fishers_exact::fishers_exact;
|
|
2
3
|
//use r_mathlib;
|
|
4
|
+
use r_mathlib::chi_squared_cdf;
|
|
3
5
|
use statrs::distribution::{ChiSquared, ContinuousCDF};
|
|
6
|
+
use std::collections::HashSet;
|
|
4
7
|
use std::panic;
|
|
5
8
|
|
|
6
9
|
#[allow(dead_code)]
|
|
@@ -99,24 +102,19 @@ fn chi_square_test(
|
|
|
99
102
|
{
|
|
100
103
|
0.05 // Arbitarily put a very high number when there are only forward or reverse reads for alternate/reference
|
|
101
104
|
} else {
|
|
102
|
-
let total: f64 =
|
|
103
|
-
+ alternate_reverse_count
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
let expected_alternate_forward_count: f64 = (alternate_forward_count
|
|
107
|
-
+ alternate_reverse_count) as f64
|
|
105
|
+
let total: f64 =
|
|
106
|
+
(alternate_forward_count + alternate_reverse_count + reference_forward_count + reference_reverse_count)
|
|
107
|
+
as f64;
|
|
108
|
+
let expected_alternate_forward_count: f64 = (alternate_forward_count + alternate_reverse_count) as f64
|
|
108
109
|
* (alternate_forward_count + reference_forward_count) as f64
|
|
109
110
|
/ total;
|
|
110
|
-
let expected_alternate_reverse_count: f64 = (alternate_forward_count
|
|
111
|
-
+ alternate_reverse_count) as f64
|
|
111
|
+
let expected_alternate_reverse_count: f64 = (alternate_forward_count + alternate_reverse_count) as f64
|
|
112
112
|
* (alternate_reverse_count + reference_reverse_count) as f64
|
|
113
113
|
/ total;
|
|
114
|
-
let expected_reference_forward_count: f64 = (alternate_forward_count
|
|
115
|
-
+ reference_forward_count) as f64
|
|
114
|
+
let expected_reference_forward_count: f64 = (alternate_forward_count + reference_forward_count) as f64
|
|
116
115
|
* (reference_forward_count + reference_reverse_count) as f64
|
|
117
116
|
/ total;
|
|
118
|
-
let expected_reference_reverse_count: f64 = (reference_forward_count
|
|
119
|
-
+ reference_reverse_count) as f64
|
|
117
|
+
let expected_reference_reverse_count: f64 = (reference_forward_count + reference_reverse_count) as f64
|
|
120
118
|
* (alternate_reverse_count + reference_reverse_count) as f64
|
|
121
119
|
/ total;
|
|
122
120
|
|
|
@@ -315,15 +313,12 @@ pub fn wilcoxon_rank_sum_test(
|
|
|
315
313
|
//println!("z_original:{}", z);
|
|
316
314
|
let mut nties_sum: f64 = 0.0;
|
|
317
315
|
for i in 0..rank_frequencies.len() {
|
|
318
|
-
nties_sum += rank_frequencies[i] * rank_frequencies[i] * rank_frequencies[i]
|
|
319
|
-
- rank_frequencies[i];
|
|
316
|
+
nties_sum += rank_frequencies[i] * rank_frequencies[i] * rank_frequencies[i] - rank_frequencies[i];
|
|
320
317
|
}
|
|
321
318
|
|
|
322
319
|
let sigma = (((group1.len() * group2.len()) as f64) / 12.0
|
|
323
320
|
* ((group1.len() + group2.len() + 1) as f64
|
|
324
|
-
- nties_sum
|
|
325
|
-
/ (((group1.len() + group2.len()) as f64)
|
|
326
|
-
* ((group1.len() + group2.len() - 1) as f64))))
|
|
321
|
+
- nties_sum / (((group1.len() + group2.len()) as f64) * ((group1.len() + group2.len() - 1) as f64))))
|
|
327
322
|
.sqrt();
|
|
328
323
|
//println!("sigma:{}", sigma);
|
|
329
324
|
let mut correction: f64 = 0.0;
|
|
@@ -412,3 +407,82 @@ pub fn calculate_frac_rank(current_rank: f64, num_repeats: f64) -> f64 {
|
|
|
412
407
|
}
|
|
413
408
|
sum / num_repeats
|
|
414
409
|
}
|
|
410
|
+
|
|
411
|
+
#[allow(non_camel_case_types)]
|
|
412
|
+
#[allow(non_snake_case)]
|
|
413
|
+
#[derive(Debug, Clone, PartialEq, PartialOrd)]
|
|
414
|
+
pub struct gene_order {
|
|
415
|
+
pub gene_name: String,
|
|
416
|
+
pub fold_change: f32,
|
|
417
|
+
pub rank: Option<usize>,
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
#[allow(dead_code)]
|
|
421
|
+
pub fn cerno(
|
|
422
|
+
genes_descending: &Vec<gene_order>,
|
|
423
|
+
genes_ascending: &Vec<gene_order>,
|
|
424
|
+
genes_in_pathway: HashSet<String>,
|
|
425
|
+
) -> (f32, f32, f32, f32, String, f32) {
|
|
426
|
+
// Ensure sample_coding_genes is sorted in decreasing order of fold_change
|
|
427
|
+
// Filter the genes_descending vector to only include those whose gene_names are in the HashSet genes_in_pathway
|
|
428
|
+
let gene_intersections_descending: Vec<&gene_order> = genes_descending
|
|
429
|
+
.iter()
|
|
430
|
+
.filter(|genes_descending| genes_in_pathway.contains(&genes_descending.gene_name)) // Check if name is in the HashSet genes_in_pathway
|
|
431
|
+
.collect(); // Collect the results into a new vector
|
|
432
|
+
|
|
433
|
+
let N1 = gene_intersections_descending.len() as f32;
|
|
434
|
+
let N = genes_descending.len() as f32;
|
|
435
|
+
let mut gene_set_hits: String = "".to_string();
|
|
436
|
+
for gene in &gene_intersections_descending {
|
|
437
|
+
gene_set_hits += &(gene.gene_name.to_string() + &",");
|
|
438
|
+
}
|
|
439
|
+
if gene_intersections_descending.len() > 0 {
|
|
440
|
+
// Remove the last "," in string
|
|
441
|
+
gene_set_hits.pop();
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
let ranks_descending: Vec<usize> = gene_intersections_descending // x <- l %in% mset$gs2gv[[m]] ; ranks <- c(1:N)[x]
|
|
445
|
+
.iter()
|
|
446
|
+
.map(|x| x.rank.unwrap())
|
|
447
|
+
.collect::<Vec<usize>>();
|
|
448
|
+
|
|
449
|
+
let cerno: f32 = ranks_descending // -2 * sum( log(ranks/N) )
|
|
450
|
+
.iter()
|
|
451
|
+
.map(|x| ((*x as f32) / N).ln())
|
|
452
|
+
.collect::<Vec<f32>>()
|
|
453
|
+
.iter()
|
|
454
|
+
.sum::<f32>()
|
|
455
|
+
* (-2.0);
|
|
456
|
+
|
|
457
|
+
let cES;
|
|
458
|
+
let N2 = N - N1; // N2 = N - N1
|
|
459
|
+
let R1 = ranks_descending.iter().sum::<usize>() as f32; // R1 <- sum(ranks)
|
|
460
|
+
let U = N1 * N2 + N1 * (N1 + 1.0) / 2.0 - R1; // U <- N1*N2+N1*(N1+1)/2-R1
|
|
461
|
+
let AUC = U / (N1 * N2); // AUC <- U/(N1*N2)
|
|
462
|
+
let p_value;
|
|
463
|
+
if AUC >= 0.5 {
|
|
464
|
+
// Upregulated geneset
|
|
465
|
+
cES = cerno / (2.0 * (N1 as f32)); // cES <- cerno/(2*N1)
|
|
466
|
+
p_value = chi_squared_cdf(cerno as f64, (2.0 * N1) as f64, false, false);
|
|
467
|
+
// pchisq(ret$cerno, 2*N1, lower.tail=FALSE)
|
|
468
|
+
} else {
|
|
469
|
+
let gene_intersections_ascending: Vec<&gene_order> = genes_ascending
|
|
470
|
+
.iter()
|
|
471
|
+
.filter(|genes_ascending| genes_in_pathway.contains(&genes_ascending.gene_name)) // Check if name is in the HashSet genes_in_pathway
|
|
472
|
+
.collect(); // Collect the results into a new vector
|
|
473
|
+
let ranks_ascending: Vec<usize> = gene_intersections_ascending // x <- l %in% mset$gs2gv[[m]] ; ranks <- c(1:N)[x]
|
|
474
|
+
.iter()
|
|
475
|
+
.map(|x| x.rank.unwrap())
|
|
476
|
+
.collect::<Vec<usize>>();
|
|
477
|
+
let cerno_ascending: f32 = ranks_ascending // -2 * sum( log(ranks/N) )
|
|
478
|
+
.iter()
|
|
479
|
+
.map(|x| ((*x as f32) / N).ln())
|
|
480
|
+
.collect::<Vec<f32>>()
|
|
481
|
+
.iter()
|
|
482
|
+
.sum::<f32>()
|
|
483
|
+
* (-2.0);
|
|
484
|
+
cES = cerno_ascending / (2.0 * (N1 as f32)); // cES <- cerno/(2*N1)
|
|
485
|
+
p_value = chi_squared_cdf(cerno_ascending as f64, (2.0 * N1) as f64, false, false);
|
|
486
|
+
}
|
|
487
|
+
(p_value as f32, AUC, cES, N1, gene_set_hits, cerno)
|
|
488
|
+
}
|