@sjcrh/proteinpaint-rust 2.74.0 → 2.75.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -2
  2. package/src/genesetORA.rs +36 -19
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.74.0",
2
+ "version": "2.75.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
@@ -38,5 +38,5 @@
38
38
  "devDependencies": {
39
39
  "tape": "^5.2.2"
40
40
  },
41
- "pp_release_tag": "v2.74.0"
41
+ "pp_release_tag": "v2.75.0"
42
42
  }
package/src/genesetORA.rs CHANGED
@@ -34,23 +34,37 @@ struct pathway_p_value {
34
34
  pathway_name: String,
35
35
  p_value_original: f64,
36
36
  p_value_adjusted: Option<f64>,
37
+ gene_set_hits: String,
38
+ gene_set_size: usize,
37
39
  }
38
40
 
39
41
  fn calculate_hypergeometric_p_value(
40
42
  sample_genes: &Vec<&str>,
41
43
  num_background_genes: usize,
42
44
  genes_in_pathway: Vec<pathway_genes>,
43
- ) -> f64 {
44
- let matching_sample_genes_counts: f64 = sample_genes
45
- .iter()
46
- .zip(&genes_in_pathway)
47
- .filter(|&(a, b)| *a.to_string() == b.symbol)
48
- .count() as f64;
45
+ ) -> (f64, f64, String) {
46
+ let mut matching_sample_genes_counts = 0.0;
47
+ let mut gene_set_hits: String = "".to_string();
48
+ for gene in sample_genes {
49
+ for pathway in &genes_in_pathway {
50
+ if pathway.symbol == gene.to_string() {
51
+ matching_sample_genes_counts += 1.0;
52
+ gene_set_hits += &(gene.to_string() + &",");
53
+ }
54
+ }
55
+ }
56
+
57
+ if matching_sample_genes_counts > 0.0 {
58
+ gene_set_hits.pop();
59
+ }
60
+
61
+ //println!("sample_genes:{:?}", sample_genes);
62
+ //println!("genes_in_pathway:{:?}", genes_in_pathway);
49
63
  //println!("k-1:{}", matching_sample_genes_counts - 1.0);
50
64
  //println!("M:{}", genes_in_pathway.len() as f64);
51
65
  //println!(
52
66
  // "N-M:{}",
53
- // background_genes.len() as f64 - genes_in_pathway.len() as f64
67
+ // num_background_genes as f64 - genes_in_pathway.len() as f64
54
68
  //);
55
69
  //println!("n:{}", sample_genes.len() as f64);
56
70
  let p_value = r_mathlib::hypergeometric_cdf(
@@ -62,7 +76,7 @@ fn calculate_hypergeometric_p_value(
62
76
  false,
63
77
  );
64
78
  //println!("p_value:{}", p_value);
65
- p_value
79
+ (p_value, matching_sample_genes_counts, gene_set_hits)
66
80
  }
67
81
 
68
82
  fn main() -> Result<()> {
@@ -136,7 +150,6 @@ fn main() -> Result<()> {
136
150
  + &genesetgroup
137
151
  + "'"),
138
152
  );
139
- let mut iter = 0;
140
153
  match stmt_result {
141
154
  Ok(mut stmt) => {
142
155
  #[allow(non_snake_case)]
@@ -144,7 +157,6 @@ fn main() -> Result<()> {
144
157
  stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
145
158
  #[allow(non_snake_case)]
146
159
  for GO_term in GO_iter {
147
- iter += 1;
148
160
  match GO_term {
149
161
  Ok(n) => {
150
162
  //println!("GO term {:?}", n);
@@ -184,16 +196,20 @@ fn main() -> Result<()> {
184
196
  }
185
197
  }
186
198
  }
187
- let p_value = calculate_hypergeometric_p_value(
188
- &sample_genes,
189
- num_background_genes,
190
- names,
191
- );
192
- if p_value.is_nan() == false {
199
+ let gene_set_size = names.len();
200
+ let (p_value, matches, gene_set_hits) =
201
+ calculate_hypergeometric_p_value(
202
+ &sample_genes,
203
+ num_background_genes,
204
+ names,
205
+ );
206
+ if matches >= 1.0 && p_value.is_nan() == false {
193
207
  pathway_p_values.push(pathway_p_value {
194
208
  pathway_name: n.GO_id,
195
209
  p_value_original: p_value,
196
210
  p_value_adjusted: None,
211
+ gene_set_hits: gene_set_hits,
212
+ gene_set_size: gene_set_size,
197
213
  })
198
214
  }
199
215
  }
@@ -206,7 +222,7 @@ fn main() -> Result<()> {
206
222
  Err(_) => panic!("sqlite database file not found"),
207
223
  }
208
224
  let output_string = "{\"num_pathways\":".to_string()
209
- + &iter.to_string()
225
+ + &pathway_p_values.len().to_string()
210
226
  + &",\"pathways\":"
211
227
  + &adjust_p_values(pathway_p_values, num_items_output)
212
228
  + &"}";
@@ -263,6 +279,8 @@ fn adjust_p_values(
263
279
  pathway_name: original_p_values[i].pathway_name.clone(),
264
280
  p_value_original: original_p_values[i].p_value_original,
265
281
  p_value_adjusted: Some(adjusted_p_val),
282
+ gene_set_hits: original_p_values[i].gene_set_hits.clone(),
283
+ gene_set_size: original_p_values[i].gene_set_size,
266
284
  });
267
285
  }
268
286
  adjusted_p_values.as_mut_slice().sort_by(|a, b| {
@@ -277,8 +295,7 @@ fn adjust_p_values(
277
295
 
278
296
  let mut output_string = "[".to_string();
279
297
  for i in 0..num_items_output {
280
- let j = adjusted_p_values.len() - i - 1;
281
- output_string += &serde_json::to_string(&adjusted_p_values[j]).unwrap();
298
+ output_string += &serde_json::to_string(&adjusted_p_values[i]).unwrap();
282
299
  if i < num_items_output - 1 {
283
300
  output_string += &",".to_string();
284
301
  }