@sjcrh/proteinpaint-rust 2.129.1-80343740e.0 → 2.129.6-2b2fdc7ee.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +2 -2
- package/package.json +1 -1
- package/src/cerno.rs +29 -60
- package/src/gdcGRIN2.rs +333 -160
- package/src/stats_functions.rs +91 -17
- package/src/test_cerno.rs +21214 -0
package/Cargo.toml
CHANGED
|
@@ -13,7 +13,7 @@ kodama = "0.3"
|
|
|
13
13
|
rayon = "1.7.0"
|
|
14
14
|
bgzip = "0.3.1"
|
|
15
15
|
petgraph = "0.6.3"
|
|
16
|
-
rusqlite="0.
|
|
16
|
+
rusqlite="0.36.0"
|
|
17
17
|
ndarray = "0.16.1"
|
|
18
18
|
hdf5 = { package = "hdf5-metno", version = "0.9.0" }
|
|
19
19
|
nalgebra = {version = "0.32.2", features = ["serde-serialize"]}
|
|
@@ -35,7 +35,7 @@ flate2 = "1"
|
|
|
35
35
|
futures = "0.3"
|
|
36
36
|
num_cpus = "1.16.0"
|
|
37
37
|
memchr = "2"
|
|
38
|
-
r2d2_sqlite = "0.
|
|
38
|
+
r2d2_sqlite = "0.29.0"
|
|
39
39
|
r2d2 = "0.8.10"
|
|
40
40
|
|
|
41
41
|
[profile.release]
|
package/package.json
CHANGED
package/src/cerno.rs
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
// Syntax: cd .. && cargo build --release && time cat ~/sjpp/test.txt | target/release/cerno
|
|
2
2
|
#![allow(non_snake_case)]
|
|
3
3
|
use json::JsonValue;
|
|
4
|
-
use r_mathlib::chi_squared_cdf;
|
|
5
4
|
use r2d2;
|
|
6
5
|
use r2d2_sqlite::SqliteConnectionManager;
|
|
7
6
|
use rusqlite::{Connection, Result};
|
|
@@ -13,6 +12,10 @@ use std::io;
|
|
|
13
12
|
use std::sync::{Arc, Mutex}; // Multithreading library
|
|
14
13
|
use std::thread;
|
|
15
14
|
|
|
15
|
+
mod stats_functions;
|
|
16
|
+
#[cfg(test)]
|
|
17
|
+
mod test_cerno; // Contains test examples to test cerno
|
|
18
|
+
|
|
16
19
|
#[allow(non_camel_case_types)]
|
|
17
20
|
#[allow(non_snake_case)]
|
|
18
21
|
#[derive(Debug, Clone)]
|
|
@@ -20,15 +23,6 @@ struct GO_pathway {
|
|
|
20
23
|
GO_id: String,
|
|
21
24
|
}
|
|
22
25
|
|
|
23
|
-
#[allow(non_camel_case_types)]
|
|
24
|
-
#[allow(non_snake_case)]
|
|
25
|
-
#[derive(Debug, Clone, PartialEq, PartialOrd)]
|
|
26
|
-
struct gene_order {
|
|
27
|
-
gene_name: String,
|
|
28
|
-
fold_change: f32,
|
|
29
|
-
rank: Option<usize>,
|
|
30
|
-
}
|
|
31
|
-
|
|
32
26
|
#[allow(non_camel_case_types)]
|
|
33
27
|
#[allow(non_snake_case)]
|
|
34
28
|
#[derive(Debug, Serialize, Deserialize)]
|
|
@@ -102,9 +96,9 @@ fn main() -> Result<()> {
|
|
|
102
96
|
panic!("Length of genes array and fold change array are not equal");
|
|
103
97
|
}
|
|
104
98
|
|
|
105
|
-
let mut genes_vector: Vec<gene_order> = Vec::with_capacity(sample_genes.len());
|
|
99
|
+
let mut genes_vector: Vec<stats_functions::gene_order> = Vec::with_capacity(sample_genes.len());
|
|
106
100
|
for i in 0..sample_genes.len() {
|
|
107
|
-
let item: gene_order = gene_order {
|
|
101
|
+
let item: stats_functions::gene_order = stats_functions::gene_order {
|
|
108
102
|
gene_name: sample_genes[i].to_string(),
|
|
109
103
|
fold_change: fold_change_f32[i],
|
|
110
104
|
rank: None, // Will be calculated later
|
|
@@ -125,7 +119,7 @@ fn main() -> Result<()> {
|
|
|
125
119
|
|
|
126
120
|
let genedbconn = Connection::open(genedb)?;
|
|
127
121
|
let genedb_result = genedbconn.prepare(&("select * from codingGenes"));
|
|
128
|
-
let mut sample_coding_genes: Vec<gene_order> = Vec::with_capacity(24000);
|
|
122
|
+
let mut sample_coding_genes: Vec<stats_functions::gene_order> = Vec::with_capacity(24000);
|
|
129
123
|
match genedb_result {
|
|
130
124
|
Ok(mut x) => {
|
|
131
125
|
let mut genes = x.query([])?;
|
|
@@ -152,10 +146,22 @@ fn main() -> Result<()> {
|
|
|
152
146
|
sample_coding_genes
|
|
153
147
|
.as_mut_slice()
|
|
154
148
|
.sort_by(|a, b| (b.fold_change).partial_cmp(&a.fold_change).unwrap_or(Ordering::Equal));
|
|
149
|
+
let mut genes_descending = sample_coding_genes.clone();
|
|
150
|
+
//println!("genes_descending:{:?}", genes_descending);
|
|
151
|
+
|
|
152
|
+
// Sort sample_coding_gene in descending order
|
|
153
|
+
sample_coding_genes
|
|
154
|
+
.as_mut_slice()
|
|
155
|
+
.sort_by(|a, b| (a.fold_change).partial_cmp(&b.fold_change).unwrap_or(Ordering::Equal));
|
|
156
|
+
let mut genes_ascending = sample_coding_genes.clone();
|
|
157
|
+
//println!("genes_ascending:{:?}", genes_ascending);
|
|
158
|
+
|
|
159
|
+
drop(sample_coding_genes); // sample_coding_genes no longer deleted, so the variable is deleted
|
|
155
160
|
|
|
156
161
|
// Assign ranks to each gene
|
|
157
|
-
for i in 0..
|
|
158
|
-
|
|
162
|
+
for i in 0..genes_descending.len() {
|
|
163
|
+
genes_descending[i].rank = Some(i);
|
|
164
|
+
genes_ascending[i].rank = Some(i)
|
|
159
165
|
}
|
|
160
166
|
|
|
161
167
|
//println!("sample_genes:{:?}", sample_genes);
|
|
@@ -206,7 +212,8 @@ fn main() -> Result<()> {
|
|
|
206
212
|
}
|
|
207
213
|
}
|
|
208
214
|
let gene_set_size = names.len();
|
|
209
|
-
let (p_value, auc, es, matches, gene_set_hits) =
|
|
215
|
+
let (p_value, auc, es, matches, gene_set_hits, _cerno_output) =
|
|
216
|
+
stats_functions::cerno(&genes_descending, &genes_ascending, names);
|
|
210
217
|
|
|
211
218
|
if matches >= 1.0
|
|
212
219
|
&& p_value.is_nan() == false
|
|
@@ -232,14 +239,16 @@ fn main() -> Result<()> {
|
|
|
232
239
|
let pool = r2d2::Pool::new(manager).unwrap(); // This enables sqlite query from multiple threads simultaneously
|
|
233
240
|
let genesets = Arc::new(genesets);
|
|
234
241
|
let pool_arc = Arc::new(pool);
|
|
235
|
-
let
|
|
242
|
+
let genes_descending = Arc::new(genes_descending);
|
|
243
|
+
let genes_ascending = Arc::new(genes_ascending);
|
|
236
244
|
let pathway_p_values_temp =
|
|
237
245
|
Arc::new(Mutex::new(Vec::<pathway_p_value>::with_capacity(genesets.len())));
|
|
238
246
|
let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
|
|
239
247
|
for thread_num in 0..max_threads {
|
|
240
248
|
let genesets = Arc::clone(&genesets);
|
|
241
249
|
let pool_arc = Arc::clone(&pool_arc);
|
|
242
|
-
let
|
|
250
|
+
let genes_descending = Arc::clone(&genes_descending);
|
|
251
|
+
let genes_ascending = Arc::clone(&genes_ascending);
|
|
243
252
|
let pathway_p_values_temp = Arc::clone(&pathway_p_values_temp);
|
|
244
253
|
let handle = thread::spawn(move || {
|
|
245
254
|
let mut pathway_p_values_thread: Vec<pathway_p_value> =
|
|
@@ -275,8 +284,8 @@ fn main() -> Result<()> {
|
|
|
275
284
|
}
|
|
276
285
|
}
|
|
277
286
|
let gene_set_size = names.len();
|
|
278
|
-
let (p_value, auc, es, matches, gene_set_hits) =
|
|
279
|
-
cerno(&
|
|
287
|
+
let (p_value, auc, es, matches, gene_set_hits, _cerno_output) =
|
|
288
|
+
stats_functions::cerno(&genes_descending, &genes_ascending, names);
|
|
280
289
|
|
|
281
290
|
if matches >= 1.0
|
|
282
291
|
&& p_value.is_nan() == false
|
|
@@ -326,46 +335,6 @@ fn main() -> Result<()> {
|
|
|
326
335
|
Ok(())
|
|
327
336
|
}
|
|
328
337
|
|
|
329
|
-
fn cerno(sample_coding_genes: &Vec<gene_order>, genes_in_pathway: HashSet<String>) -> (f32, f32, f32, f32, String) {
|
|
330
|
-
// Filter the sample_coding_genes vector to only include those whose gene_names are in the HashSet genes_in_pathway
|
|
331
|
-
let gene_intersections: Vec<&gene_order> = sample_coding_genes
|
|
332
|
-
.iter()
|
|
333
|
-
.filter(|sample_coding_genes| genes_in_pathway.contains(&sample_coding_genes.gene_name)) // Check if name is in the HashSet genes_in_pathway
|
|
334
|
-
.collect(); // Collect the results into a new vector
|
|
335
|
-
|
|
336
|
-
let N1 = gene_intersections.len() as f32;
|
|
337
|
-
let N = sample_coding_genes.len() as f32;
|
|
338
|
-
let mut gene_set_hits: String = "".to_string();
|
|
339
|
-
for gene in &gene_intersections {
|
|
340
|
-
gene_set_hits += &(gene.gene_name.to_string() + &",");
|
|
341
|
-
}
|
|
342
|
-
if gene_intersections.len() > 0 {
|
|
343
|
-
// Remove the last "," in string
|
|
344
|
-
gene_set_hits.pop();
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
let ranks: Vec<usize> = gene_intersections // x <- l %in% mset$gs2gv[[m]] ; ranks <- c(1:N)[x]
|
|
348
|
-
.iter()
|
|
349
|
-
.map(|x| x.rank.unwrap())
|
|
350
|
-
.collect::<Vec<usize>>();
|
|
351
|
-
|
|
352
|
-
let cerno: f32 = ranks // -2 * sum( log(ranks/N) )
|
|
353
|
-
.iter()
|
|
354
|
-
.map(|x| ((*x as f32) / N).ln())
|
|
355
|
-
.collect::<Vec<f32>>()
|
|
356
|
-
.iter()
|
|
357
|
-
.sum::<f32>()
|
|
358
|
-
* (-2.0);
|
|
359
|
-
|
|
360
|
-
let cES: f32 = cerno / (2.0 * (N1 as f32)); // cES <- cerno/(2*N1)
|
|
361
|
-
let N2 = N - N1; // N2 = N - N1
|
|
362
|
-
let R1 = ranks.iter().sum::<usize>() as f32; // R1 <- sum(ranks)
|
|
363
|
-
let U = N1 * N2 + N1 * (N1 + 1.0) / 2.0 - R1; // U <- N1*N2+N1*(N1+1)/2-R1
|
|
364
|
-
let AUC = U / (N1 * N2); // AUC <- U/(N1*N2)
|
|
365
|
-
let p_value = chi_squared_cdf(cerno as f64, (2.0 * N1) as f64, false, false); // pchisq(ret$cerno, 2*N1, lower.tail=FALSE)
|
|
366
|
-
(p_value as f32, AUC, cES, N1, gene_set_hits)
|
|
367
|
-
}
|
|
368
|
-
|
|
369
338
|
fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>) -> String {
|
|
370
339
|
// Sorting p-values in ascending order
|
|
371
340
|
original_p_values.as_mut_slice().sort_by(|a, b| {
|