@sjcrh/proteinpaint-rust 2.73.0 → 2.74.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -2
  2. package/src/genesetORA.rs +41 -17
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.73.0",
2
+ "version": "2.74.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
@@ -38,5 +38,5 @@
38
38
  "devDependencies": {
39
39
  "tape": "^5.2.2"
40
40
  },
41
- "pp_release_tag": "v2.73.0"
41
+ "pp_release_tag": "v2.74.0"
42
42
  }
package/src/genesetORA.rs CHANGED
@@ -38,7 +38,7 @@ struct pathway_p_value {
38
38
 
39
39
  fn calculate_hypergeometric_p_value(
40
40
  sample_genes: &Vec<&str>,
41
- background_genes: &Vec<&str>,
41
+ num_background_genes: usize,
42
42
  genes_in_pathway: Vec<pathway_genes>,
43
43
  ) -> f64 {
44
44
  let matching_sample_genes_counts: f64 = sample_genes
@@ -56,7 +56,7 @@ fn calculate_hypergeometric_p_value(
56
56
  let p_value = r_mathlib::hypergeometric_cdf(
57
57
  matching_sample_genes_counts - 1.0,
58
58
  genes_in_pathway.len() as f64,
59
- background_genes.len() as f64 - genes_in_pathway.len() as f64,
59
+ num_background_genes as f64 - genes_in_pathway.len() as f64,
60
60
  sample_genes.len() as f64,
61
61
  false,
62
62
  false,
@@ -74,11 +74,11 @@ fn main() -> Result<()> {
74
74
  match input_json {
75
75
  Ok(json_string) => {
76
76
  let run_time = Instant::now();
77
- let db_input: &JsonValue = &json_string["db"];
78
- let db;
79
- match db_input.as_str() {
80
- Some(db_string) => db = db_string.to_string(),
81
- None => panic!("db file path is missing"),
77
+ let msigdb_input: &JsonValue = &json_string["msigdb"];
78
+ let msigdb;
79
+ match msigdb_input.as_str() {
80
+ Some(db_string) => msigdb = db_string.to_string(),
81
+ None => panic!("msigdb file path is missing"),
82
82
  }
83
83
  let genesetgroup;
84
84
  let genesetgroup_input: &JsonValue = &json_string["gene_set_group"];
@@ -91,23 +91,47 @@ fn main() -> Result<()> {
91
91
  sample_genes_input.as_str().unwrap().split(",").collect();
92
92
  let mut pathway_p_values: Vec<pathway_p_value> = Vec::with_capacity(10000);
93
93
  let background_genes_input: &JsonValue = &json_string["background_genes"];
94
- let background_genes: Vec<&str> = background_genes_input
95
- .as_str()
96
- .unwrap()
97
- .split(",")
98
- .collect();
94
+ let mut num_background_genes: usize = 0;
95
+ match background_genes_input.as_str() {
96
+ Some(x) => {
97
+ let background_genes_str: Vec<&str> = x.split(",").collect(); // Background genes is defined for e.g in case of DE analysis
98
+ num_background_genes = background_genes_str.len();
99
+ }
100
+ None => {
101
+ // Background genes not present for e.g. in hierarchial clustering
102
+ // Get background genes from the gene database
103
+ let genedb_input: &JsonValue = &json_string["genedb"];
104
+ let genedb;
105
+ match genedb_input.as_str() {
106
+ Some(gene_db_string) => genedb = gene_db_string.to_string(),
107
+ None => panic!("genedb file path is missing"),
108
+ }
109
+
110
+ let genedbconn = Connection::open(genedb)?;
111
+ let genedb_result = genedbconn.prepare(&("select * from codingGenes"));
112
+ match genedb_result {
113
+ Ok(mut x) => {
114
+ let mut genes = x.query([])?;
115
+ while let Some(_gene) = genes.next()? {
116
+ num_background_genes += 1;
117
+ }
118
+ }
119
+ Err(_) => {}
120
+ }
121
+ }
122
+ }
99
123
  //println!("sample_genes:{:?}", sample_genes);
100
124
  //println!("background_genes:{:?}", background_genes);
101
125
 
102
126
  if sample_genes.len() == 0 {
103
127
  panic!("No sample genes provided");
104
- } else if background_genes.len() == 0 {
128
+ } else if num_background_genes == 0 {
105
129
  panic!("No background genes provided");
106
130
  }
107
131
  let num_items_output = 100; // Number of top pathways to be specified in the output
108
132
 
109
- let conn = Connection::open(db)?;
110
- let stmt_result = conn.prepare(
133
+ let msigdbconn = Connection::open(msigdb)?;
134
+ let stmt_result = msigdbconn.prepare(
111
135
  &("select id from terms where parent_id='".to_owned()
112
136
  + &genesetgroup
113
137
  + "'"),
@@ -129,7 +153,7 @@ fn main() -> Result<()> {
129
153
  + &n.GO_id
130
154
  + &"'";
131
155
  //println!("sql_statement:{}", sql_statement);
132
- let mut gene_stmt = conn.prepare(&(sql_statement))?;
156
+ let mut gene_stmt = msigdbconn.prepare(&(sql_statement))?;
133
157
  //println!("gene_stmt:{:?}", gene_stmt);
134
158
 
135
159
  let mut rows = gene_stmt.query([])?;
@@ -162,7 +186,7 @@ fn main() -> Result<()> {
162
186
  }
163
187
  let p_value = calculate_hypergeometric_p_value(
164
188
  &sample_genes,
165
- &background_genes,
189
+ num_background_genes,
166
190
  names,
167
191
  );
168
192
  if p_value.is_nan() == false {