@sjcrh/proteinpaint-rust 2.74.0 → 2.75.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/genesetORA.rs +36 -19
package/package.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "2.
|
|
2
|
+
"version": "2.75.0",
|
|
3
3
|
"name": "@sjcrh/proteinpaint-rust",
|
|
4
4
|
"description": "Rust-based utilities for proteinpaint",
|
|
5
5
|
"main": "index.js",
|
|
@@ -38,5 +38,5 @@
|
|
|
38
38
|
"devDependencies": {
|
|
39
39
|
"tape": "^5.2.2"
|
|
40
40
|
},
|
|
41
|
-
"pp_release_tag": "v2.
|
|
41
|
+
"pp_release_tag": "v2.75.0"
|
|
42
42
|
}
|
package/src/genesetORA.rs
CHANGED
|
@@ -34,23 +34,37 @@ struct pathway_p_value {
|
|
|
34
34
|
pathway_name: String,
|
|
35
35
|
p_value_original: f64,
|
|
36
36
|
p_value_adjusted: Option<f64>,
|
|
37
|
+
gene_set_hits: String,
|
|
38
|
+
gene_set_size: usize,
|
|
37
39
|
}
|
|
38
40
|
|
|
39
41
|
fn calculate_hypergeometric_p_value(
|
|
40
42
|
sample_genes: &Vec<&str>,
|
|
41
43
|
num_background_genes: usize,
|
|
42
44
|
genes_in_pathway: Vec<pathway_genes>,
|
|
43
|
-
) -> f64 {
|
|
44
|
-
let matching_sample_genes_counts
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
45
|
+
) -> (f64, f64, String) {
|
|
46
|
+
let mut matching_sample_genes_counts = 0.0;
|
|
47
|
+
let mut gene_set_hits: String = "".to_string();
|
|
48
|
+
for gene in sample_genes {
|
|
49
|
+
for pathway in &genes_in_pathway {
|
|
50
|
+
if pathway.symbol == gene.to_string() {
|
|
51
|
+
matching_sample_genes_counts += 1.0;
|
|
52
|
+
gene_set_hits += &(gene.to_string() + &",");
|
|
53
|
+
}
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
if matching_sample_genes_counts > 0.0 {
|
|
58
|
+
gene_set_hits.pop();
|
|
59
|
+
}
|
|
60
|
+
|
|
61
|
+
//println!("sample_genes:{:?}", sample_genes);
|
|
62
|
+
//println!("genes_in_pathway:{:?}", genes_in_pathway);
|
|
49
63
|
//println!("k-1:{}", matching_sample_genes_counts - 1.0);
|
|
50
64
|
//println!("M:{}", genes_in_pathway.len() as f64);
|
|
51
65
|
//println!(
|
|
52
66
|
// "N-M:{}",
|
|
53
|
-
//
|
|
67
|
+
// num_background_genes as f64 - genes_in_pathway.len() as f64
|
|
54
68
|
//);
|
|
55
69
|
//println!("n:{}", sample_genes.len() as f64);
|
|
56
70
|
let p_value = r_mathlib::hypergeometric_cdf(
|
|
@@ -62,7 +76,7 @@ fn calculate_hypergeometric_p_value(
|
|
|
62
76
|
false,
|
|
63
77
|
);
|
|
64
78
|
//println!("p_value:{}", p_value);
|
|
65
|
-
p_value
|
|
79
|
+
(p_value, matching_sample_genes_counts, gene_set_hits)
|
|
66
80
|
}
|
|
67
81
|
|
|
68
82
|
fn main() -> Result<()> {
|
|
@@ -136,7 +150,6 @@ fn main() -> Result<()> {
|
|
|
136
150
|
+ &genesetgroup
|
|
137
151
|
+ "'"),
|
|
138
152
|
);
|
|
139
|
-
let mut iter = 0;
|
|
140
153
|
match stmt_result {
|
|
141
154
|
Ok(mut stmt) => {
|
|
142
155
|
#[allow(non_snake_case)]
|
|
@@ -144,7 +157,6 @@ fn main() -> Result<()> {
|
|
|
144
157
|
stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
|
|
145
158
|
#[allow(non_snake_case)]
|
|
146
159
|
for GO_term in GO_iter {
|
|
147
|
-
iter += 1;
|
|
148
160
|
match GO_term {
|
|
149
161
|
Ok(n) => {
|
|
150
162
|
//println!("GO term {:?}", n);
|
|
@@ -184,16 +196,20 @@ fn main() -> Result<()> {
|
|
|
184
196
|
}
|
|
185
197
|
}
|
|
186
198
|
}
|
|
187
|
-
let
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
199
|
+
let gene_set_size = names.len();
|
|
200
|
+
let (p_value, matches, gene_set_hits) =
|
|
201
|
+
calculate_hypergeometric_p_value(
|
|
202
|
+
&sample_genes,
|
|
203
|
+
num_background_genes,
|
|
204
|
+
names,
|
|
205
|
+
);
|
|
206
|
+
if matches >= 1.0 && p_value.is_nan() == false {
|
|
193
207
|
pathway_p_values.push(pathway_p_value {
|
|
194
208
|
pathway_name: n.GO_id,
|
|
195
209
|
p_value_original: p_value,
|
|
196
210
|
p_value_adjusted: None,
|
|
211
|
+
gene_set_hits: gene_set_hits,
|
|
212
|
+
gene_set_size: gene_set_size,
|
|
197
213
|
})
|
|
198
214
|
}
|
|
199
215
|
}
|
|
@@ -206,7 +222,7 @@ fn main() -> Result<()> {
|
|
|
206
222
|
Err(_) => panic!("sqlite database file not found"),
|
|
207
223
|
}
|
|
208
224
|
let output_string = "{\"num_pathways\":".to_string()
|
|
209
|
-
+ &
|
|
225
|
+
+ &pathway_p_values.len().to_string()
|
|
210
226
|
+ &",\"pathways\":"
|
|
211
227
|
+ &adjust_p_values(pathway_p_values, num_items_output)
|
|
212
228
|
+ &"}";
|
|
@@ -263,6 +279,8 @@ fn adjust_p_values(
|
|
|
263
279
|
pathway_name: original_p_values[i].pathway_name.clone(),
|
|
264
280
|
p_value_original: original_p_values[i].p_value_original,
|
|
265
281
|
p_value_adjusted: Some(adjusted_p_val),
|
|
282
|
+
gene_set_hits: original_p_values[i].gene_set_hits.clone(),
|
|
283
|
+
gene_set_size: original_p_values[i].gene_set_size,
|
|
266
284
|
});
|
|
267
285
|
}
|
|
268
286
|
adjusted_p_values.as_mut_slice().sort_by(|a, b| {
|
|
@@ -277,8 +295,7 @@ fn adjust_p_values(
|
|
|
277
295
|
|
|
278
296
|
let mut output_string = "[".to_string();
|
|
279
297
|
for i in 0..num_items_output {
|
|
280
|
-
|
|
281
|
-
output_string += &serde_json::to_string(&adjusted_p_values[j]).unwrap();
|
|
298
|
+
output_string += &serde_json::to_string(&adjusted_p_values[i]).unwrap();
|
|
282
299
|
if i < num_items_output - 1 {
|
|
283
300
|
output_string += &",".to_string();
|
|
284
301
|
}
|