@sjcrh/proteinpaint-rust 2.73.0 → 2.74.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/genesetORA.rs +41 -17
package/package.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "2.
|
|
2
|
+
"version": "2.74.0",
|
|
3
3
|
"name": "@sjcrh/proteinpaint-rust",
|
|
4
4
|
"description": "Rust-based utilities for proteinpaint",
|
|
5
5
|
"main": "index.js",
|
|
@@ -38,5 +38,5 @@
|
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"tape": "^5.2.2"
|
|
40
40
|
},
|
|
41
|
-
"pp_release_tag": "v2.
|
|
41
|
+
"pp_release_tag": "v2.74.0"
|
|
42
42
|
}
|
package/src/genesetORA.rs
CHANGED
|
@@ -38,7 +38,7 @@ struct pathway_p_value {
|
|
|
38
38
|
|
|
39
39
|
fn calculate_hypergeometric_p_value(
|
|
40
40
|
sample_genes: &Vec<&str>,
|
|
41
|
-
|
|
41
|
+
num_background_genes: usize,
|
|
42
42
|
genes_in_pathway: Vec<pathway_genes>,
|
|
43
43
|
) -> f64 {
|
|
44
44
|
let matching_sample_genes_counts: f64 = sample_genes
|
|
@@ -56,7 +56,7 @@ fn calculate_hypergeometric_p_value(
|
|
|
56
56
|
let p_value = r_mathlib::hypergeometric_cdf(
|
|
57
57
|
matching_sample_genes_counts - 1.0,
|
|
58
58
|
genes_in_pathway.len() as f64,
|
|
59
|
-
|
|
59
|
+
num_background_genes as f64 - genes_in_pathway.len() as f64,
|
|
60
60
|
sample_genes.len() as f64,
|
|
61
61
|
false,
|
|
62
62
|
false,
|
|
@@ -74,11 +74,11 @@ fn main() -> Result<()> {
|
|
|
74
74
|
match input_json {
|
|
75
75
|
Ok(json_string) => {
|
|
76
76
|
let run_time = Instant::now();
|
|
77
|
-
let
|
|
78
|
-
let
|
|
79
|
-
match
|
|
80
|
-
Some(db_string) =>
|
|
81
|
-
None => panic!("
|
|
77
|
+
let msigdb_input: &JsonValue = &json_string["msigdb"];
|
|
78
|
+
let msigdb;
|
|
79
|
+
match msigdb_input.as_str() {
|
|
80
|
+
Some(db_string) => msigdb = db_string.to_string(),
|
|
81
|
+
None => panic!("msigdb file path is missing"),
|
|
82
82
|
}
|
|
83
83
|
let genesetgroup;
|
|
84
84
|
let genesetgroup_input: &JsonValue = &json_string["gene_set_group"];
|
|
@@ -91,23 +91,47 @@ fn main() -> Result<()> {
|
|
|
91
91
|
sample_genes_input.as_str().unwrap().split(",").collect();
|
|
92
92
|
let mut pathway_p_values: Vec<pathway_p_value> = Vec::with_capacity(10000);
|
|
93
93
|
let background_genes_input: &JsonValue = &json_string["background_genes"];
|
|
94
|
-
let
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
94
|
+
let mut num_background_genes: usize = 0;
|
|
95
|
+
match background_genes_input.as_str() {
|
|
96
|
+
Some(x) => {
|
|
97
|
+
let background_genes_str: Vec<&str> = x.split(",").collect(); // Background genes is defined for e.g in case of DE analysis
|
|
98
|
+
num_background_genes = background_genes_str.len();
|
|
99
|
+
}
|
|
100
|
+
None => {
|
|
101
|
+
// Background genes not present for e.g. in hierarchial clustering
|
|
102
|
+
// Get background genes from the gene database
|
|
103
|
+
let genedb_input: &JsonValue = &json_string["genedb"];
|
|
104
|
+
let genedb;
|
|
105
|
+
match genedb_input.as_str() {
|
|
106
|
+
Some(gene_db_string) => genedb = gene_db_string.to_string(),
|
|
107
|
+
None => panic!("genedb file path is missing"),
|
|
108
|
+
}
|
|
109
|
+
|
|
110
|
+
let genedbconn = Connection::open(genedb)?;
|
|
111
|
+
let genedb_result = genedbconn.prepare(&("select * from codingGenes"));
|
|
112
|
+
match genedb_result {
|
|
113
|
+
Ok(mut x) => {
|
|
114
|
+
let mut genes = x.query([])?;
|
|
115
|
+
while let Some(_gene) = genes.next()? {
|
|
116
|
+
num_background_genes += 1;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
Err(_) => {}
|
|
120
|
+
}
|
|
121
|
+
}
|
|
122
|
+
}
|
|
99
123
|
//println!("sample_genes:{:?}", sample_genes);
|
|
100
124
|
//println!("background_genes:{:?}", background_genes);
|
|
101
125
|
|
|
102
126
|
if sample_genes.len() == 0 {
|
|
103
127
|
panic!("No sample genes provided");
|
|
104
|
-
} else if
|
|
128
|
+
} else if num_background_genes == 0 {
|
|
105
129
|
panic!("No background genes provided");
|
|
106
130
|
}
|
|
107
131
|
let num_items_output = 100; // Number of top pathways to be specified in the output
|
|
108
132
|
|
|
109
|
-
let
|
|
110
|
-
let stmt_result =
|
|
133
|
+
let msigdbconn = Connection::open(msigdb)?;
|
|
134
|
+
let stmt_result = msigdbconn.prepare(
|
|
111
135
|
&("select id from terms where parent_id='".to_owned()
|
|
112
136
|
+ &genesetgroup
|
|
113
137
|
+ "'"),
|
|
@@ -129,7 +153,7 @@ fn main() -> Result<()> {
|
|
|
129
153
|
+ &n.GO_id
|
|
130
154
|
+ &"'";
|
|
131
155
|
//println!("sql_statement:{}", sql_statement);
|
|
132
|
-
let mut gene_stmt =
|
|
156
|
+
let mut gene_stmt = msigdbconn.prepare(&(sql_statement))?;
|
|
133
157
|
//println!("gene_stmt:{:?}", gene_stmt);
|
|
134
158
|
|
|
135
159
|
let mut rows = gene_stmt.query([])?;
|
|
@@ -162,7 +186,7 @@ fn main() -> Result<()> {
|
|
|
162
186
|
}
|
|
163
187
|
let p_value = calculate_hypergeometric_p_value(
|
|
164
188
|
&sample_genes,
|
|
165
|
-
|
|
189
|
+
num_background_genes,
|
|
166
190
|
names,
|
|
167
191
|
);
|
|
168
192
|
if p_value.is_nan() == false {
|