@sjcrh/proteinpaint-rust 2.129.1-80343740e.0 → 2.129.6-2b2fdc7ee.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -13,7 +13,7 @@ kodama = "0.3"
13
13
  rayon = "1.7.0"
14
14
  bgzip = "0.3.1"
15
15
  petgraph = "0.6.3"
16
- rusqlite="0.35"
16
+ rusqlite="0.36.0"
17
17
  ndarray = "0.16.1"
18
18
  hdf5 = { package = "hdf5-metno", version = "0.9.0" }
19
19
  nalgebra = {version = "0.32.2", features = ["serde-serialize"]}
@@ -35,7 +35,7 @@ flate2 = "1"
35
35
  futures = "0.3"
36
36
  num_cpus = "1.16.0"
37
37
  memchr = "2"
38
- r2d2_sqlite = "0.28.0"
38
+ r2d2_sqlite = "0.29.0"
39
39
  r2d2 = "0.8.10"
40
40
 
41
41
  [profile.release]
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.129.1-80343740e.0",
2
+ "version": "2.129.6-2b2fdc7ee.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
package/src/cerno.rs CHANGED
@@ -1,7 +1,6 @@
1
1
  // Syntax: cd .. && cargo build --release && time cat ~/sjpp/test.txt | target/release/cerno
2
2
  #![allow(non_snake_case)]
3
3
  use json::JsonValue;
4
- use r_mathlib::chi_squared_cdf;
5
4
  use r2d2;
6
5
  use r2d2_sqlite::SqliteConnectionManager;
7
6
  use rusqlite::{Connection, Result};
@@ -13,6 +12,10 @@ use std::io;
13
12
  use std::sync::{Arc, Mutex}; // Multithreading library
14
13
  use std::thread;
15
14
 
15
+ mod stats_functions;
16
+ #[cfg(test)]
17
+ mod test_cerno; // Contains test examples to test cerno
18
+
16
19
  #[allow(non_camel_case_types)]
17
20
  #[allow(non_snake_case)]
18
21
  #[derive(Debug, Clone)]
@@ -20,15 +23,6 @@ struct GO_pathway {
20
23
  GO_id: String,
21
24
  }
22
25
 
23
- #[allow(non_camel_case_types)]
24
- #[allow(non_snake_case)]
25
- #[derive(Debug, Clone, PartialEq, PartialOrd)]
26
- struct gene_order {
27
- gene_name: String,
28
- fold_change: f32,
29
- rank: Option<usize>,
30
- }
31
-
32
26
  #[allow(non_camel_case_types)]
33
27
  #[allow(non_snake_case)]
34
28
  #[derive(Debug, Serialize, Deserialize)]
@@ -102,9 +96,9 @@ fn main() -> Result<()> {
102
96
  panic!("Length of genes array and fold change array are not equal");
103
97
  }
104
98
 
105
- let mut genes_vector: Vec<gene_order> = Vec::with_capacity(sample_genes.len());
99
+ let mut genes_vector: Vec<stats_functions::gene_order> = Vec::with_capacity(sample_genes.len());
106
100
  for i in 0..sample_genes.len() {
107
- let item: gene_order = gene_order {
101
+ let item: stats_functions::gene_order = stats_functions::gene_order {
108
102
  gene_name: sample_genes[i].to_string(),
109
103
  fold_change: fold_change_f32[i],
110
104
  rank: None, // Will be calculated later
@@ -125,7 +119,7 @@ fn main() -> Result<()> {
125
119
 
126
120
  let genedbconn = Connection::open(genedb)?;
127
121
  let genedb_result = genedbconn.prepare(&("select * from codingGenes"));
128
- let mut sample_coding_genes: Vec<gene_order> = Vec::with_capacity(24000);
122
+ let mut sample_coding_genes: Vec<stats_functions::gene_order> = Vec::with_capacity(24000);
129
123
  match genedb_result {
130
124
  Ok(mut x) => {
131
125
  let mut genes = x.query([])?;
@@ -152,10 +146,22 @@ fn main() -> Result<()> {
152
146
  sample_coding_genes
153
147
  .as_mut_slice()
154
148
  .sort_by(|a, b| (b.fold_change).partial_cmp(&a.fold_change).unwrap_or(Ordering::Equal));
149
+ let mut genes_descending = sample_coding_genes.clone();
150
+ //println!("genes_descending:{:?}", genes_descending);
151
+
152
+ // Sort sample_coding_gene in descending order
153
+ sample_coding_genes
154
+ .as_mut_slice()
155
+ .sort_by(|a, b| (a.fold_change).partial_cmp(&b.fold_change).unwrap_or(Ordering::Equal));
156
+ let mut genes_ascending = sample_coding_genes.clone();
157
+ //println!("genes_ascending:{:?}", genes_ascending);
158
+
159
+ drop(sample_coding_genes); // sample_coding_genes no longer deleted, so the variable is deleted
155
160
 
156
161
  // Assign ranks to each gene
157
- for i in 0..sample_coding_genes.len() {
158
- sample_coding_genes[i].rank = Some(i)
162
+ for i in 0..genes_descending.len() {
163
+ genes_descending[i].rank = Some(i);
164
+ genes_ascending[i].rank = Some(i)
159
165
  }
160
166
 
161
167
  //println!("sample_genes:{:?}", sample_genes);
@@ -206,7 +212,8 @@ fn main() -> Result<()> {
206
212
  }
207
213
  }
208
214
  let gene_set_size = names.len();
209
- let (p_value, auc, es, matches, gene_set_hits) = cerno(&sample_coding_genes, names);
215
+ let (p_value, auc, es, matches, gene_set_hits, _cerno_output) =
216
+ stats_functions::cerno(&genes_descending, &genes_ascending, names);
210
217
 
211
218
  if matches >= 1.0
212
219
  && p_value.is_nan() == false
@@ -232,14 +239,16 @@ fn main() -> Result<()> {
232
239
  let pool = r2d2::Pool::new(manager).unwrap(); // This enables sqlite query from multiple threads simultaneously
233
240
  let genesets = Arc::new(genesets);
234
241
  let pool_arc = Arc::new(pool);
235
- let sample_coding_genes = Arc::new(sample_coding_genes);
242
+ let genes_descending = Arc::new(genes_descending);
243
+ let genes_ascending = Arc::new(genes_ascending);
236
244
  let pathway_p_values_temp =
237
245
  Arc::new(Mutex::new(Vec::<pathway_p_value>::with_capacity(genesets.len())));
238
246
  let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
239
247
  for thread_num in 0..max_threads {
240
248
  let genesets = Arc::clone(&genesets);
241
249
  let pool_arc = Arc::clone(&pool_arc);
242
- let sample_coding_genes = Arc::clone(&sample_coding_genes);
250
+ let genes_descending = Arc::clone(&genes_descending);
251
+ let genes_ascending = Arc::clone(&genes_ascending);
243
252
  let pathway_p_values_temp = Arc::clone(&pathway_p_values_temp);
244
253
  let handle = thread::spawn(move || {
245
254
  let mut pathway_p_values_thread: Vec<pathway_p_value> =
@@ -275,8 +284,8 @@ fn main() -> Result<()> {
275
284
  }
276
285
  }
277
286
  let gene_set_size = names.len();
278
- let (p_value, auc, es, matches, gene_set_hits) =
279
- cerno(&sample_coding_genes, names);
287
+ let (p_value, auc, es, matches, gene_set_hits, _cerno_output) =
288
+ stats_functions::cerno(&genes_descending, &genes_ascending, names);
280
289
 
281
290
  if matches >= 1.0
282
291
  && p_value.is_nan() == false
@@ -326,46 +335,6 @@ fn main() -> Result<()> {
326
335
  Ok(())
327
336
  }
328
337
 
329
- fn cerno(sample_coding_genes: &Vec<gene_order>, genes_in_pathway: HashSet<String>) -> (f32, f32, f32, f32, String) {
330
- // Filter the sample_coding_genes vector to only include those whose gene_names are in the HashSet genes_in_pathway
331
- let gene_intersections: Vec<&gene_order> = sample_coding_genes
332
- .iter()
333
- .filter(|sample_coding_genes| genes_in_pathway.contains(&sample_coding_genes.gene_name)) // Check if name is in the HashSet genes_in_pathway
334
- .collect(); // Collect the results into a new vector
335
-
336
- let N1 = gene_intersections.len() as f32;
337
- let N = sample_coding_genes.len() as f32;
338
- let mut gene_set_hits: String = "".to_string();
339
- for gene in &gene_intersections {
340
- gene_set_hits += &(gene.gene_name.to_string() + &",");
341
- }
342
- if gene_intersections.len() > 0 {
343
- // Remove the last "," in string
344
- gene_set_hits.pop();
345
- }
346
-
347
- let ranks: Vec<usize> = gene_intersections // x <- l %in% mset$gs2gv[[m]] ; ranks <- c(1:N)[x]
348
- .iter()
349
- .map(|x| x.rank.unwrap())
350
- .collect::<Vec<usize>>();
351
-
352
- let cerno: f32 = ranks // -2 * sum( log(ranks/N) )
353
- .iter()
354
- .map(|x| ((*x as f32) / N).ln())
355
- .collect::<Vec<f32>>()
356
- .iter()
357
- .sum::<f32>()
358
- * (-2.0);
359
-
360
- let cES: f32 = cerno / (2.0 * (N1 as f32)); // cES <- cerno/(2*N1)
361
- let N2 = N - N1; // N2 = N - N1
362
- let R1 = ranks.iter().sum::<usize>() as f32; // R1 <- sum(ranks)
363
- let U = N1 * N2 + N1 * (N1 + 1.0) / 2.0 - R1; // U <- N1*N2+N1*(N1+1)/2-R1
364
- let AUC = U / (N1 * N2); // AUC <- U/(N1*N2)
365
- let p_value = chi_squared_cdf(cerno as f64, (2.0 * N1) as f64, false, false); // pchisq(ret$cerno, 2*N1, lower.tail=FALSE)
366
- (p_value as f32, AUC, cES, N1, gene_set_hits)
367
- }
368
-
369
338
  fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>) -> String {
370
339
  // Sorting p-values in ascending order
371
340
  original_p_values.as_mut_slice().sort_by(|a, b| {