@sjcrh/proteinpaint-rust 2.129.2 → 2.129.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +3 -1
- package/package.json +1 -1
- package/src/cerno.rs +181 -115
- package/src/gdcGRIN2.rs +402 -133
- package/src/stats_functions.rs +91 -17
- package/src/test_cerno.rs +131 -0
- package/src/wilcoxon.rs +5 -16
package/src/stats_functions.rs
CHANGED
|
@@ -1,6 +1,9 @@
|
|
|
1
|
+
#![allow(non_snake_case)]
|
|
1
2
|
use fishers_exact::fishers_exact;
|
|
2
3
|
//use r_mathlib;
|
|
4
|
+
use r_mathlib::chi_squared_cdf;
|
|
3
5
|
use statrs::distribution::{ChiSquared, ContinuousCDF};
|
|
6
|
+
use std::collections::HashSet;
|
|
4
7
|
use std::panic;
|
|
5
8
|
|
|
6
9
|
#[allow(dead_code)]
|
|
@@ -99,24 +102,19 @@ fn chi_square_test(
|
|
|
99
102
|
{
|
|
100
103
|
0.05 // Arbitarily put a very high number when there are only forward or reverse reads for alternate/reference
|
|
101
104
|
} else {
|
|
102
|
-
let total: f64 =
|
|
103
|
-
+ alternate_reverse_count
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
let expected_alternate_forward_count: f64 = (alternate_forward_count
|
|
107
|
-
+ alternate_reverse_count) as f64
|
|
105
|
+
let total: f64 =
|
|
106
|
+
(alternate_forward_count + alternate_reverse_count + reference_forward_count + reference_reverse_count)
|
|
107
|
+
as f64;
|
|
108
|
+
let expected_alternate_forward_count: f64 = (alternate_forward_count + alternate_reverse_count) as f64
|
|
108
109
|
* (alternate_forward_count + reference_forward_count) as f64
|
|
109
110
|
/ total;
|
|
110
|
-
let expected_alternate_reverse_count: f64 = (alternate_forward_count
|
|
111
|
-
+ alternate_reverse_count) as f64
|
|
111
|
+
let expected_alternate_reverse_count: f64 = (alternate_forward_count + alternate_reverse_count) as f64
|
|
112
112
|
* (alternate_reverse_count + reference_reverse_count) as f64
|
|
113
113
|
/ total;
|
|
114
|
-
let expected_reference_forward_count: f64 = (alternate_forward_count
|
|
115
|
-
+ reference_forward_count) as f64
|
|
114
|
+
let expected_reference_forward_count: f64 = (alternate_forward_count + reference_forward_count) as f64
|
|
116
115
|
* (reference_forward_count + reference_reverse_count) as f64
|
|
117
116
|
/ total;
|
|
118
|
-
let expected_reference_reverse_count: f64 = (reference_forward_count
|
|
119
|
-
+ reference_reverse_count) as f64
|
|
117
|
+
let expected_reference_reverse_count: f64 = (reference_forward_count + reference_reverse_count) as f64
|
|
120
118
|
* (alternate_reverse_count + reference_reverse_count) as f64
|
|
121
119
|
/ total;
|
|
122
120
|
|
|
@@ -315,15 +313,12 @@ pub fn wilcoxon_rank_sum_test(
|
|
|
315
313
|
//println!("z_original:{}", z);
|
|
316
314
|
let mut nties_sum: f64 = 0.0;
|
|
317
315
|
for i in 0..rank_frequencies.len() {
|
|
318
|
-
nties_sum += rank_frequencies[i] * rank_frequencies[i] * rank_frequencies[i]
|
|
319
|
-
- rank_frequencies[i];
|
|
316
|
+
nties_sum += rank_frequencies[i] * rank_frequencies[i] * rank_frequencies[i] - rank_frequencies[i];
|
|
320
317
|
}
|
|
321
318
|
|
|
322
319
|
let sigma = (((group1.len() * group2.len()) as f64) / 12.0
|
|
323
320
|
* ((group1.len() + group2.len() + 1) as f64
|
|
324
|
-
- nties_sum
|
|
325
|
-
/ (((group1.len() + group2.len()) as f64)
|
|
326
|
-
* ((group1.len() + group2.len() - 1) as f64))))
|
|
321
|
+
- nties_sum / (((group1.len() + group2.len()) as f64) * ((group1.len() + group2.len() - 1) as f64))))
|
|
327
322
|
.sqrt();
|
|
328
323
|
//println!("sigma:{}", sigma);
|
|
329
324
|
let mut correction: f64 = 0.0;
|
|
@@ -412,3 +407,82 @@ pub fn calculate_frac_rank(current_rank: f64, num_repeats: f64) -> f64 {
|
|
|
412
407
|
}
|
|
413
408
|
sum / num_repeats
|
|
414
409
|
}
|
|
410
|
+
|
|
411
|
+
#[allow(non_camel_case_types)]
|
|
412
|
+
#[allow(non_snake_case)]
|
|
413
|
+
#[derive(Debug, Clone, PartialEq, PartialOrd)]
|
|
414
|
+
pub struct gene_order {
|
|
415
|
+
pub gene_name: String,
|
|
416
|
+
pub fold_change: f32,
|
|
417
|
+
pub rank: Option<usize>,
|
|
418
|
+
}
|
|
419
|
+
|
|
420
|
+
#[allow(dead_code)]
|
|
421
|
+
pub fn cerno(
|
|
422
|
+
genes_descending: &Vec<gene_order>,
|
|
423
|
+
genes_ascending: &Vec<gene_order>,
|
|
424
|
+
genes_in_pathway: HashSet<String>,
|
|
425
|
+
) -> (f32, f32, f32, f32, String, f32) {
|
|
426
|
+
// Ensure sample_coding_genes is sorted in decreasing order of fold_change
|
|
427
|
+
// Filter the genes_descending vector to only include those whose gene_names are in the HashSet genes_in_pathway
|
|
428
|
+
let gene_intersections_descending: Vec<&gene_order> = genes_descending
|
|
429
|
+
.iter()
|
|
430
|
+
.filter(|genes_descending| genes_in_pathway.contains(&genes_descending.gene_name)) // Check if name is in the HashSet genes_in_pathway
|
|
431
|
+
.collect(); // Collect the results into a new vector
|
|
432
|
+
|
|
433
|
+
let N1 = gene_intersections_descending.len() as f32;
|
|
434
|
+
let N = genes_descending.len() as f32;
|
|
435
|
+
let mut gene_set_hits: String = "".to_string();
|
|
436
|
+
for gene in &gene_intersections_descending {
|
|
437
|
+
gene_set_hits += &(gene.gene_name.to_string() + &",");
|
|
438
|
+
}
|
|
439
|
+
if gene_intersections_descending.len() > 0 {
|
|
440
|
+
// Remove the last "," in string
|
|
441
|
+
gene_set_hits.pop();
|
|
442
|
+
}
|
|
443
|
+
|
|
444
|
+
let ranks_descending: Vec<usize> = gene_intersections_descending // x <- l %in% mset$gs2gv[[m]] ; ranks <- c(1:N)[x]
|
|
445
|
+
.iter()
|
|
446
|
+
.map(|x| x.rank.unwrap())
|
|
447
|
+
.collect::<Vec<usize>>();
|
|
448
|
+
|
|
449
|
+
let cerno: f32 = ranks_descending // -2 * sum( log(ranks/N) )
|
|
450
|
+
.iter()
|
|
451
|
+
.map(|x| ((*x as f32) / N).ln())
|
|
452
|
+
.collect::<Vec<f32>>()
|
|
453
|
+
.iter()
|
|
454
|
+
.sum::<f32>()
|
|
455
|
+
* (-2.0);
|
|
456
|
+
|
|
457
|
+
let cES;
|
|
458
|
+
let N2 = N - N1; // N2 = N - N1
|
|
459
|
+
let R1 = ranks_descending.iter().sum::<usize>() as f32; // R1 <- sum(ranks)
|
|
460
|
+
let U = N1 * N2 + N1 * (N1 + 1.0) / 2.0 - R1; // U <- N1*N2+N1*(N1+1)/2-R1
|
|
461
|
+
let AUC = U / (N1 * N2); // AUC <- U/(N1*N2)
|
|
462
|
+
let p_value;
|
|
463
|
+
if AUC >= 0.5 {
|
|
464
|
+
// Upregulated geneset
|
|
465
|
+
cES = cerno / (2.0 * (N1 as f32)); // cES <- cerno/(2*N1)
|
|
466
|
+
p_value = chi_squared_cdf(cerno as f64, (2.0 * N1) as f64, false, false);
|
|
467
|
+
// pchisq(ret$cerno, 2*N1, lower.tail=FALSE)
|
|
468
|
+
} else {
|
|
469
|
+
let gene_intersections_ascending: Vec<&gene_order> = genes_ascending
|
|
470
|
+
.iter()
|
|
471
|
+
.filter(|genes_ascending| genes_in_pathway.contains(&genes_ascending.gene_name)) // Check if name is in the HashSet genes_in_pathway
|
|
472
|
+
.collect(); // Collect the results into a new vector
|
|
473
|
+
let ranks_ascending: Vec<usize> = gene_intersections_ascending // x <- l %in% mset$gs2gv[[m]] ; ranks <- c(1:N)[x]
|
|
474
|
+
.iter()
|
|
475
|
+
.map(|x| x.rank.unwrap())
|
|
476
|
+
.collect::<Vec<usize>>();
|
|
477
|
+
let cerno_ascending: f32 = ranks_ascending // -2 * sum( log(ranks/N) )
|
|
478
|
+
.iter()
|
|
479
|
+
.map(|x| ((*x as f32) / N).ln())
|
|
480
|
+
.collect::<Vec<f32>>()
|
|
481
|
+
.iter()
|
|
482
|
+
.sum::<f32>()
|
|
483
|
+
* (-2.0);
|
|
484
|
+
cES = cerno_ascending / (2.0 * (N1 as f32)); // cES <- cerno/(2*N1)
|
|
485
|
+
p_value = chi_squared_cdf(cerno_ascending as f64, (2.0 * N1) as f64, false, false);
|
|
486
|
+
}
|
|
487
|
+
(p_value as f32, AUC, cES, N1, gene_set_hits, cerno)
|
|
488
|
+
}
|
|
@@ -0,0 +1,131 @@
|
|
|
1
|
+
// For capturing output from a test, run: cd .. && cargo test -- --nocapture
|
|
2
|
+
#[allow(dead_code)]
|
|
3
|
+
fn main() {}
|
|
4
|
+
|
|
5
|
+
#[cfg(test)]
|
|
6
|
+
mod tests {
|
|
7
|
+
use crate::stats_functions::cerno;
|
|
8
|
+
use flate2::read::GzDecoder;
|
|
9
|
+
use json::JsonValue;
|
|
10
|
+
use std::cmp::Ordering;
|
|
11
|
+
use std::collections::HashSet;
|
|
12
|
+
use std::fs::File;
|
|
13
|
+
use std::io::{BufReader, Read};
|
|
14
|
+
|
|
15
|
+
const P_VALUE_CUTOFF: f32 = 0.01; // Threshold difference between calculated and expected p-value for the test to pass
|
|
16
|
+
const AUC_CUTOFF: f32 = 0.001; // Threshold difference between calculated and expected AUC for the test to pass
|
|
17
|
+
const ES_CUTOFF: f32 = 0.01; // Threshold difference between calculated and expected ES for the test to pass
|
|
18
|
+
const CERNO_CUTOFF: f32 = 1.0; // Threshold difference between calculated and expected CERNO value for the test to pass
|
|
19
|
+
#[test]
|
|
20
|
+
fn cerno_test() {
|
|
21
|
+
// Specify the path to the json file
|
|
22
|
+
let file_path = "test/cerno_test.json.gz";
|
|
23
|
+
// Open the file
|
|
24
|
+
let file = File::open(file_path).unwrap();
|
|
25
|
+
// Create a buffered reader
|
|
26
|
+
let buf_reader = BufReader::new(file);
|
|
27
|
+
|
|
28
|
+
// Create a GzDecoder to read the Gzip data
|
|
29
|
+
let mut gz_decoder = GzDecoder::new(buf_reader);
|
|
30
|
+
|
|
31
|
+
// Create a String to hold the first line
|
|
32
|
+
let mut first_line = String::new();
|
|
33
|
+
|
|
34
|
+
// Read the decompressed data into the String
|
|
35
|
+
gz_decoder.read_to_string(&mut first_line).unwrap();
|
|
36
|
+
|
|
37
|
+
// Read the first line
|
|
38
|
+
//buf_reader.read_line(&mut first_line).unwrap();
|
|
39
|
+
let input_json = json::parse(&first_line);
|
|
40
|
+
match input_json {
|
|
41
|
+
Ok(json_string) => {
|
|
42
|
+
let sample_genes_input: &JsonValue = &json_string["input_genes"];
|
|
43
|
+
let mut sample_genes = Vec::<&str>::new();
|
|
44
|
+
for iter in 0..sample_genes_input.len() {
|
|
45
|
+
let item = sample_genes_input[iter].as_str().unwrap();
|
|
46
|
+
sample_genes.push(item);
|
|
47
|
+
}
|
|
48
|
+
let fold_change_input: &JsonValue = &json_string["input_fold_change"];
|
|
49
|
+
let mut fold_change_f32 = Vec::<f32>::new();
|
|
50
|
+
for iter in 0..fold_change_input.len() {
|
|
51
|
+
let item = fold_change_input[iter].as_f32().unwrap();
|
|
52
|
+
fold_change_f32.push(item);
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
let mut sample_coding_genes: Vec<crate::stats_functions::gene_order> =
|
|
56
|
+
Vec::with_capacity(sample_genes.len());
|
|
57
|
+
for i in 0..sample_genes.len() {
|
|
58
|
+
let item: crate::stats_functions::gene_order = crate::stats_functions::gene_order {
|
|
59
|
+
gene_name: sample_genes[i].to_string(),
|
|
60
|
+
fold_change: fold_change_f32[i],
|
|
61
|
+
rank: None, // Will be calculated later
|
|
62
|
+
};
|
|
63
|
+
sample_coding_genes.push(item)
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
// Sort sample_coding_gene in descending order
|
|
67
|
+
sample_coding_genes
|
|
68
|
+
.as_mut_slice()
|
|
69
|
+
.sort_by(|a, b| (b.fold_change).partial_cmp(&a.fold_change).unwrap_or(Ordering::Equal));
|
|
70
|
+
let mut genes_descending = sample_coding_genes.clone();
|
|
71
|
+
// Sort sample_coding_gene in ascending order
|
|
72
|
+
sample_coding_genes
|
|
73
|
+
.as_mut_slice()
|
|
74
|
+
.sort_by(|a, b| (a.fold_change).partial_cmp(&b.fold_change).unwrap_or(Ordering::Equal));
|
|
75
|
+
let mut genes_ascending = sample_coding_genes.clone();
|
|
76
|
+
|
|
77
|
+
drop(sample_coding_genes); // sample_coding_genes no longer deleted, so the variable is deleted
|
|
78
|
+
|
|
79
|
+
// Assign ranks to each gene
|
|
80
|
+
for i in 0..genes_descending.len() {
|
|
81
|
+
genes_descending[i].rank = Some(i);
|
|
82
|
+
genes_ascending[i].rank = Some(i);
|
|
83
|
+
}
|
|
84
|
+
|
|
85
|
+
let modules_2_genes: &JsonValue = &json_string["MODULES2GENES"];
|
|
86
|
+
let expected_p_values_json: &JsonValue = &json_string["expected_p_values"]; // The expected p-value comes from the original tmod package in R
|
|
87
|
+
let expected_auc_json: &JsonValue = &json_string["expected_auc"]; // The expected auc comes from the original tmod package in R
|
|
88
|
+
let expected_es_json: &JsonValue = &json_string["expected_es"]; // The expected es comes from the original tmod package in R
|
|
89
|
+
let expected_cerno_json: &JsonValue = &json_string["expected_cerno"]; // The expected cerno comes from the original tmod package in R
|
|
90
|
+
|
|
91
|
+
let mut expected_p_values = Vec::<f32>::new();
|
|
92
|
+
let mut expected_auc = Vec::<f32>::new();
|
|
93
|
+
let mut expected_es = Vec::<f32>::new();
|
|
94
|
+
let mut expected_cerno = Vec::<f32>::new();
|
|
95
|
+
|
|
96
|
+
for j in 0..expected_p_values_json.len() {
|
|
97
|
+
expected_p_values.push(expected_p_values_json[j].as_f32().unwrap());
|
|
98
|
+
expected_auc.push(expected_auc_json[j].as_f32().unwrap());
|
|
99
|
+
expected_es.push(expected_es_json[j].as_f32().unwrap());
|
|
100
|
+
expected_cerno.push(expected_cerno_json[j].as_f32().unwrap());
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
let mut iter = 0;
|
|
104
|
+
for item in modules_2_genes.entries() {
|
|
105
|
+
let (key, value) = item;
|
|
106
|
+
let mut geneset = HashSet::<String>::new();
|
|
107
|
+
for item2 in value.members() {
|
|
108
|
+
geneset.insert(item2.to_string());
|
|
109
|
+
}
|
|
110
|
+
let (p_value, auc, es, _matches, _gene_set_hits, cerno_output) =
|
|
111
|
+
cerno(&genes_descending, &genes_ascending, geneset.clone());
|
|
112
|
+
println!("Geneset name:{}", key.to_string());
|
|
113
|
+
println!("p_value:{}", p_value);
|
|
114
|
+
println!("auc:{}", auc);
|
|
115
|
+
println!("es:{}", es);
|
|
116
|
+
println!("cerno:{}", cerno_output);
|
|
117
|
+
//println!("matches:{}", _matches1);
|
|
118
|
+
//println!("gene_set_hits:{}", _gene_set_hits1);
|
|
119
|
+
|
|
120
|
+
assert_eq!((p_value - expected_p_values[iter]).abs() < P_VALUE_CUTOFF, true); // The expected p-value comes from the original tmod package in R
|
|
121
|
+
assert_eq!((auc - expected_auc[iter]).abs() < AUC_CUTOFF, true); // The expected auc comes from the original tmod package in R
|
|
122
|
+
assert_eq!((es - expected_es[iter]).abs() < ES_CUTOFF, true); // The expected es comes from the original tmod package in R
|
|
123
|
+
assert_eq!((cerno_output - expected_cerno[iter]).abs() < CERNO_CUTOFF, true);
|
|
124
|
+
// The expected es comes from the original tmod package in R
|
|
125
|
+
iter += 1;
|
|
126
|
+
}
|
|
127
|
+
}
|
|
128
|
+
Err(error) => println!("Incorrect json:{}", error),
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
}
|
package/src/wilcoxon.rs
CHANGED
|
@@ -48,6 +48,7 @@ use serde::{Deserialize, Serialize};
|
|
|
48
48
|
use std::io;
|
|
49
49
|
|
|
50
50
|
mod stats_functions; // Import Wilcoxon function
|
|
51
|
+
#[cfg(test)]
|
|
51
52
|
mod test_examples; // Contains examples to test the wilcoxon rank sum test
|
|
52
53
|
|
|
53
54
|
#[derive(Debug, Serialize, Deserialize)]
|
|
@@ -101,14 +102,8 @@ fn main() {
|
|
|
101
102
|
if vec1.len() == 0 || vec2.len() == 0 {
|
|
102
103
|
// If one of the vectors has a length of zero, wilcoxon test is not performed and a pvalue of NULL is given.
|
|
103
104
|
output_string += &serde_json::to_string(&OutputJson {
|
|
104
|
-
group1_id: json_string[i]["group1_id"]
|
|
105
|
-
|
|
106
|
-
.unwrap()
|
|
107
|
-
.to_string(),
|
|
108
|
-
group2_id: json_string[i]["group2_id"]
|
|
109
|
-
.as_str()
|
|
110
|
-
.unwrap()
|
|
111
|
-
.to_string(),
|
|
105
|
+
group1_id: json_string[i]["group1_id"].as_str().unwrap().to_string(),
|
|
106
|
+
group2_id: json_string[i]["group2_id"].as_str().unwrap().to_string(),
|
|
112
107
|
group1_values: vec1,
|
|
113
108
|
group2_values: vec2,
|
|
114
109
|
pvalue: None,
|
|
@@ -129,14 +124,8 @@ fn main() {
|
|
|
129
124
|
//}
|
|
130
125
|
//println!("pvalue:{}", pvalue);
|
|
131
126
|
output_string += &serde_json::to_string(&OutputJson {
|
|
132
|
-
group1_id: json_string[i]["group1_id"]
|
|
133
|
-
|
|
134
|
-
.unwrap()
|
|
135
|
-
.to_string(),
|
|
136
|
-
group2_id: json_string[i]["group2_id"]
|
|
137
|
-
.as_str()
|
|
138
|
-
.unwrap()
|
|
139
|
-
.to_string(),
|
|
127
|
+
group1_id: json_string[i]["group1_id"].as_str().unwrap().to_string(),
|
|
128
|
+
group2_id: json_string[i]["group2_id"].as_str().unwrap().to_string(),
|
|
140
129
|
group1_values: vec1,
|
|
141
130
|
group2_values: vec2,
|
|
142
131
|
pvalue: Some(pvalue),
|