@sjcrh/proteinpaint-rust 2.122.0 → 2.124.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +2 -2
- package/src/gdcmaf.rs +107 -107
- package/src/genesetORA.rs +22 -48
- package/src/readHDF5.rs +31 -70
- package/src/test.rs +3 -0
- package/src/test_examples.rs +380 -473
- package/src/validateHDF5.rs +0 -1
package/package.json
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
{
|
|
2
|
-
"version": "2.
|
|
2
|
+
"version": "2.124.0",
|
|
3
3
|
"name": "@sjcrh/proteinpaint-rust",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "Rust-based utilities for proteinpaint",
|
|
@@ -39,5 +39,5 @@
|
|
|
39
39
|
"devDependencies": {
|
|
40
40
|
"tape": "^5.2.2"
|
|
41
41
|
},
|
|
42
|
-
"pp_release_tag": "v2.
|
|
42
|
+
"pp_release_tag": "v2.124.0"
|
|
43
43
|
}
|
package/src/gdcmaf.rs
CHANGED
|
@@ -10,16 +10,16 @@
|
|
|
10
10
|
echo '{"host": "https://api.gdc.cancer.gov/data/","columns": ["Hugo_Symbol", "Entrez_Gene_Id", "Center", "NCBI_Build", "Chromosome", "Start_Position"], "fileIdLst": ["8b31d6d1-56f7-4aa8-b026-c64bafd531e7", "b429fcc1-2b59-4b4c-a472-fb27758f6249"]}'|./target/release/gdcmaf
|
|
11
11
|
*/
|
|
12
12
|
|
|
13
|
+
use flate2::Compression;
|
|
13
14
|
use flate2::read::GzDecoder;
|
|
14
15
|
use flate2::write::GzEncoder;
|
|
15
|
-
use flate2::Compression;
|
|
16
|
-
use serde_json::{Value};
|
|
17
16
|
use futures::StreamExt;
|
|
18
|
-
use
|
|
17
|
+
use serde_json::Value;
|
|
18
|
+
use std::io::{self, Read, Write};
|
|
19
|
+
use std::sync::{Arc, Mutex};
|
|
19
20
|
use std::time::Duration;
|
|
20
21
|
use tokio::io::{AsyncReadExt, BufReader};
|
|
21
22
|
use tokio::time::timeout;
|
|
22
|
-
use std::sync::{Arc, Mutex};
|
|
23
23
|
|
|
24
24
|
// Struct to hold error information
|
|
25
25
|
#[derive(serde::Serialize)]
|
|
@@ -28,14 +28,14 @@ struct ErrorEntry {
|
|
|
28
28
|
error: String,
|
|
29
29
|
}
|
|
30
30
|
|
|
31
|
-
fn select_maf_col(d:String,columns
|
|
31
|
+
fn select_maf_col(d: String, columns: &Vec<String>, url: &str) -> Result<(Vec<u8>, i32), (String, String)> {
|
|
32
32
|
let mut maf_str: String = String::new();
|
|
33
33
|
let mut header_indices: Vec<usize> = Vec::new();
|
|
34
34
|
let lines = d.trim_end().split("\n");
|
|
35
35
|
let mut mafrows = 0;
|
|
36
36
|
for line in lines {
|
|
37
37
|
if line.starts_with("#") {
|
|
38
|
-
continue
|
|
38
|
+
continue;
|
|
39
39
|
} else if line.contains("Hugo_Symbol") {
|
|
40
40
|
let header: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
|
|
41
41
|
for col in columns {
|
|
@@ -48,7 +48,7 @@ fn select_maf_col(d:String,columns:&Vec<String>,url:&str) -> Result<(Vec<u8>,i32
|
|
|
48
48
|
return Err((url.to_string(), error_msg));
|
|
49
49
|
}
|
|
50
50
|
}
|
|
51
|
-
}
|
|
51
|
+
}
|
|
52
52
|
if header_indices.is_empty() {
|
|
53
53
|
return Err((url.to_string(), "No matching columns found".to_string()));
|
|
54
54
|
}
|
|
@@ -57,19 +57,17 @@ fn select_maf_col(d:String,columns:&Vec<String>,url:&str) -> Result<(Vec<u8>,i32
|
|
|
57
57
|
let mut maf_out_lst: Vec<String> = Vec::new();
|
|
58
58
|
for x in header_indices.iter() {
|
|
59
59
|
maf_out_lst.push(maf_cont_lst[*x].to_string());
|
|
60
|
-
}
|
|
60
|
+
}
|
|
61
61
|
maf_str.push_str(maf_out_lst.join("\t").as_str());
|
|
62
62
|
maf_str.push_str("\n");
|
|
63
63
|
mafrows += 1;
|
|
64
64
|
}
|
|
65
|
-
}
|
|
66
|
-
Ok((maf_str.as_bytes().to_vec(),mafrows))
|
|
65
|
+
}
|
|
66
|
+
Ok((maf_str.as_bytes().to_vec(), mafrows))
|
|
67
67
|
}
|
|
68
68
|
|
|
69
|
-
|
|
70
|
-
|
|
71
69
|
#[tokio::main]
|
|
72
|
-
async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
70
|
+
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
73
71
|
// Accepting the piped input json from jodejs and assign to the variable
|
|
74
72
|
// host: GDC host
|
|
75
73
|
// url: urls to download single maf files
|
|
@@ -84,23 +82,21 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
84
82
|
})
|
|
85
83
|
.await;
|
|
86
84
|
// Handle the result of the timeout operation
|
|
87
|
-
let
|
|
88
|
-
Ok(Ok(buffer)) => {
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
)) as Box<dyn std::error::Error>);
|
|
101
|
-
}
|
|
85
|
+
let file_id_lst_js: Value = match result {
|
|
86
|
+
Ok(Ok(buffer)) => match serde_json::from_str(&buffer) {
|
|
87
|
+
Ok(js) => js,
|
|
88
|
+
Err(e) => {
|
|
89
|
+
let stdin_error = ErrorEntry {
|
|
90
|
+
url: String::new(),
|
|
91
|
+
error: format!("JSON parsing error: {}", e),
|
|
92
|
+
};
|
|
93
|
+
writeln!(io::stderr(), "{}", serde_json::to_string(&stdin_error).unwrap()).unwrap();
|
|
94
|
+
return Err(Box::new(std::io::Error::new(
|
|
95
|
+
std::io::ErrorKind::InvalidInput,
|
|
96
|
+
"JSON parsing error!",
|
|
97
|
+
)) as Box<dyn std::error::Error>);
|
|
102
98
|
}
|
|
103
|
-
}
|
|
99
|
+
},
|
|
104
100
|
Ok(Err(_e)) => {
|
|
105
101
|
let stdin_error = ErrorEntry {
|
|
106
102
|
url: String::new(),
|
|
@@ -128,22 +124,30 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
128
124
|
};
|
|
129
125
|
|
|
130
126
|
// reading the input from PP
|
|
131
|
-
let host = file_id_lst_js
|
|
127
|
+
let host = file_id_lst_js
|
|
128
|
+
.get("host")
|
|
129
|
+
.expect("Host was not provided")
|
|
130
|
+
.as_str()
|
|
131
|
+
.expect("Host is not a string");
|
|
132
132
|
let mut url: Vec<String> = Vec::new();
|
|
133
|
-
let file_id_lst = file_id_lst_js
|
|
133
|
+
let file_id_lst = file_id_lst_js
|
|
134
|
+
.get("fileIdLst")
|
|
135
|
+
.expect("File ID list is missed!")
|
|
136
|
+
.as_array()
|
|
137
|
+
.expect("File ID list is not an array");
|
|
134
138
|
for v in file_id_lst {
|
|
135
139
|
//url.push(Path::new(&host).join(&v.as_str().unwrap()).display().to_string());
|
|
136
|
-
url.push(format!("{}/{}",host.trim_end_matches('/'), v.as_str().unwrap()));
|
|
137
|
-
}
|
|
140
|
+
url.push(format!("{}/{}", host.trim_end_matches('/'), v.as_str().unwrap()));
|
|
141
|
+
}
|
|
138
142
|
|
|
139
143
|
// read columns as array from input json and convert data type from Vec<Value> to Vec<String>
|
|
140
|
-
let maf_col:Vec<String>;
|
|
144
|
+
let maf_col: Vec<String>;
|
|
141
145
|
if let Some(maf_col_value) = file_id_lst_js.get("columns") {
|
|
142
146
|
//convert Vec<Value> to Vec<String>
|
|
143
147
|
if let Some(maf_col_array) = maf_col_value.as_array() {
|
|
144
148
|
maf_col = maf_col_array
|
|
145
149
|
.iter()
|
|
146
|
-
.map(|v| v.to_string().replace("\"",""))
|
|
150
|
+
.map(|v| v.to_string().replace("\"", ""))
|
|
147
151
|
.collect::<Vec<String>>();
|
|
148
152
|
} else {
|
|
149
153
|
let column_error = ErrorEntry {
|
|
@@ -165,62 +169,58 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
165
169
|
let column_error_js = serde_json::to_string(&column_error).unwrap();
|
|
166
170
|
writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
|
|
167
171
|
return Err(Box::new(std::io::Error::new(
|
|
168
|
-
|
|
169
|
-
|
|
172
|
+
std::io::ErrorKind::InvalidInput,
|
|
173
|
+
"Columns was not selected",
|
|
170
174
|
)) as Box<dyn std::error::Error>);
|
|
171
175
|
};
|
|
172
|
-
|
|
176
|
+
|
|
173
177
|
//downloading maf files parallelly and merge them into single maf file
|
|
174
|
-
let download_futures = futures::stream::iter(
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
Ok(
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
let text = String::from_utf8_lossy(&decompressed_content).to_string();
|
|
198
|
-
return Ok((url.clone(),text))
|
|
199
|
-
}
|
|
200
|
-
Err(e) => {
|
|
201
|
-
let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
|
|
202
|
-
Err((url.clone(), error_msg))
|
|
203
|
-
}
|
|
204
|
-
}
|
|
178
|
+
let download_futures = futures::stream::iter(url.into_iter().map(|url| {
|
|
179
|
+
async move {
|
|
180
|
+
let client = reqwest::Client::builder()
|
|
181
|
+
.timeout(Duration::from_secs(60)) // 60-second timeout per request
|
|
182
|
+
.connect_timeout(Duration::from_secs(15))
|
|
183
|
+
.build()
|
|
184
|
+
.map_err(|_e| {
|
|
185
|
+
let client_error = ErrorEntry {
|
|
186
|
+
url: url.clone(),
|
|
187
|
+
error: "Client build error".to_string(),
|
|
188
|
+
};
|
|
189
|
+
let client_error_js = serde_json::to_string(&client_error).unwrap();
|
|
190
|
+
writeln!(io::stderr(), "{}", client_error_js).expect("Failed to build reqwest client!");
|
|
191
|
+
});
|
|
192
|
+
match client.unwrap().get(&url).send().await {
|
|
193
|
+
Ok(resp) if resp.status().is_success() => match resp.bytes().await {
|
|
194
|
+
Ok(content) => {
|
|
195
|
+
let mut decoder = GzDecoder::new(&content[..]);
|
|
196
|
+
let mut decompressed_content = Vec::new();
|
|
197
|
+
match decoder.read_to_end(&mut decompressed_content) {
|
|
198
|
+
Ok(_) => {
|
|
199
|
+
let text = String::from_utf8_lossy(&decompressed_content).to_string();
|
|
200
|
+
return Ok((url.clone(), text));
|
|
205
201
|
}
|
|
206
202
|
Err(e) => {
|
|
207
|
-
let error_msg = format!("Failed to decompress downloaded
|
|
203
|
+
let error_msg = format!("Failed to decompress downloaded MAF file: {}", e);
|
|
208
204
|
Err((url.clone(), error_msg))
|
|
209
205
|
}
|
|
210
206
|
}
|
|
211
207
|
}
|
|
212
|
-
Ok(resp) => {
|
|
213
|
-
let error_msg = format!("HTTP error: {}", resp.status());
|
|
214
|
-
Err((url.clone(), error_msg))
|
|
215
|
-
}
|
|
216
208
|
Err(e) => {
|
|
217
|
-
let error_msg = format!("
|
|
209
|
+
let error_msg = format!("Failed to decompress downloaded MAF file: {}", e);
|
|
218
210
|
Err((url.clone(), error_msg))
|
|
219
211
|
}
|
|
212
|
+
},
|
|
213
|
+
Ok(resp) => {
|
|
214
|
+
let error_msg = format!("HTTP error: {}", resp.status());
|
|
215
|
+
Err((url.clone(), error_msg))
|
|
216
|
+
}
|
|
217
|
+
Err(e) => {
|
|
218
|
+
let error_msg = format!("Server request failed: {}", e);
|
|
219
|
+
Err((url.clone(), error_msg))
|
|
220
220
|
}
|
|
221
221
|
}
|
|
222
|
-
}
|
|
223
|
-
);
|
|
222
|
+
}
|
|
223
|
+
}));
|
|
224
224
|
|
|
225
225
|
// binary output
|
|
226
226
|
let encoder = Arc::new(Mutex::new(GzEncoder::new(io::stdout(), Compression::default())));
|
|
@@ -228,57 +228,57 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
|
|
|
228
228
|
// Write the header
|
|
229
229
|
{
|
|
230
230
|
let mut encoder_guard = encoder.lock().unwrap(); // Lock the Mutex to get access to the inner GzEncoder
|
|
231
|
-
encoder_guard
|
|
231
|
+
encoder_guard
|
|
232
|
+
.write_all(&maf_col.join("\t").as_bytes().to_vec())
|
|
233
|
+
.expect("Failed to write header");
|
|
232
234
|
encoder_guard.write_all(b"\n").expect("Failed to write newline");
|
|
233
235
|
}
|
|
234
|
-
|
|
235
|
-
download_futures
|
|
236
|
-
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
236
|
+
|
|
237
|
+
download_futures
|
|
238
|
+
.buffer_unordered(20)
|
|
239
|
+
.for_each(|result| {
|
|
240
|
+
let encoder = Arc::clone(&encoder); // Clone the Arc for each task
|
|
241
|
+
let maf_col_cp = maf_col.clone();
|
|
242
|
+
async move {
|
|
243
|
+
match result {
|
|
244
|
+
Ok((url, content)) => match select_maf_col(content, &maf_col_cp, &url) {
|
|
245
|
+
Ok((maf_bit, mafrows)) => {
|
|
243
246
|
if mafrows > 0 {
|
|
244
247
|
let mut encoder_guard = encoder.lock().unwrap();
|
|
245
248
|
encoder_guard.write_all(&maf_bit).expect("Failed to write file");
|
|
246
249
|
} else {
|
|
247
250
|
let error = ErrorEntry {
|
|
248
251
|
url: url.clone(),
|
|
249
|
-
error: "Empty
|
|
252
|
+
error: "Empty MAF file".to_string(),
|
|
250
253
|
};
|
|
251
254
|
let error_js = serde_json::to_string(&error).unwrap();
|
|
252
255
|
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
253
256
|
}
|
|
254
257
|
}
|
|
255
|
-
Err((url,error)) => {
|
|
256
|
-
let error = ErrorEntry {
|
|
257
|
-
url,
|
|
258
|
-
error,
|
|
259
|
-
};
|
|
258
|
+
Err((url, error)) => {
|
|
259
|
+
let error = ErrorEntry { url, error };
|
|
260
260
|
let error_js = serde_json::to_string(&error).unwrap();
|
|
261
261
|
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
262
262
|
}
|
|
263
|
+
},
|
|
264
|
+
Err((url, error)) => {
|
|
265
|
+
let error = ErrorEntry { url, error };
|
|
266
|
+
let error_js = serde_json::to_string(&error).unwrap();
|
|
267
|
+
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
263
268
|
}
|
|
264
|
-
}
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
};
|
|
270
|
-
let error_js = serde_json::to_string(&error).unwrap();
|
|
271
|
-
writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
|
|
272
|
-
}
|
|
273
|
-
};
|
|
274
|
-
}
|
|
275
|
-
}).await;
|
|
276
|
-
|
|
269
|
+
};
|
|
270
|
+
}
|
|
271
|
+
})
|
|
272
|
+
.await;
|
|
273
|
+
|
|
277
274
|
// Finalize output
|
|
278
275
|
|
|
279
276
|
// Replace the value inside the Mutex with a dummy value (e.g., None)
|
|
280
277
|
let mut encoder_guard = encoder.lock().unwrap();
|
|
281
|
-
let encoder = std::mem::replace(
|
|
278
|
+
let encoder = std::mem::replace(
|
|
279
|
+
&mut *encoder_guard,
|
|
280
|
+
GzEncoder::new(io::stdout(), Compression::default()),
|
|
281
|
+
);
|
|
282
282
|
// Finalize the encoder
|
|
283
283
|
encoder.finish().expect("Maf file output error!");
|
|
284
284
|
|
package/src/genesetORA.rs
CHANGED
|
@@ -9,7 +9,7 @@ use serde_json;
|
|
|
9
9
|
use std::cmp::Ordering;
|
|
10
10
|
use std::collections::HashSet;
|
|
11
11
|
use std::io;
|
|
12
|
-
use std::time::Instant;
|
|
12
|
+
//use std::time::Instant;
|
|
13
13
|
|
|
14
14
|
#[allow(non_camel_case_types)]
|
|
15
15
|
#[allow(non_snake_case)]
|
|
@@ -37,10 +37,7 @@ fn calculate_hypergeometric_p_value(
|
|
|
37
37
|
) -> (f64, f64, String) {
|
|
38
38
|
let mut gene_set_hits: String = "".to_string();
|
|
39
39
|
|
|
40
|
-
let gene_intersections: HashSet<String> = genes_in_pathway
|
|
41
|
-
.intersection(sample_genes)
|
|
42
|
-
.cloned()
|
|
43
|
-
.collect();
|
|
40
|
+
let gene_intersections: HashSet<String> = genes_in_pathway.intersection(sample_genes).cloned().collect();
|
|
44
41
|
for gene in &gene_intersections {
|
|
45
42
|
gene_set_hits += &(gene.to_string() + &",");
|
|
46
43
|
}
|
|
@@ -78,7 +75,7 @@ fn main() -> Result<()> {
|
|
|
78
75
|
let input_json = json::parse(&input);
|
|
79
76
|
match input_json {
|
|
80
77
|
Ok(json_string) => {
|
|
81
|
-
let run_time = Instant::now();
|
|
78
|
+
//let run_time = Instant::now();
|
|
82
79
|
let msigdb_input: &JsonValue = &json_string["msigdb"];
|
|
83
80
|
let msigdb;
|
|
84
81
|
match msigdb_input.as_str() {
|
|
@@ -92,8 +89,7 @@ fn main() -> Result<()> {
|
|
|
92
89
|
None => panic!("genesetgroup is missing"),
|
|
93
90
|
}
|
|
94
91
|
let sample_genes_input: &JsonValue = &json_string["sample_genes"];
|
|
95
|
-
let sample_genes: Vec<&str> =
|
|
96
|
-
sample_genes_input.as_str().unwrap().split(",").collect();
|
|
92
|
+
let sample_genes: Vec<&str> = sample_genes_input.as_str().unwrap().split(",").collect();
|
|
97
93
|
let mut pathway_p_values: Vec<pathway_p_value> = Vec::with_capacity(10000);
|
|
98
94
|
|
|
99
95
|
let genedb_input: &JsonValue = &json_string["genedb"];
|
|
@@ -103,10 +99,8 @@ fn main() -> Result<()> {
|
|
|
103
99
|
None => panic!("genedb file path is missing"),
|
|
104
100
|
}
|
|
105
101
|
|
|
106
|
-
let filter_non_coding_genes_input: &JsonValue =
|
|
107
|
-
|
|
108
|
-
let filter_non_coding_genes: bool =
|
|
109
|
-
filter_non_coding_genes_input.as_bool().unwrap();
|
|
102
|
+
let filter_non_coding_genes_input: &JsonValue = &json_string["filter_non_coding_genes"];
|
|
103
|
+
let filter_non_coding_genes: bool = filter_non_coding_genes_input.as_bool().unwrap();
|
|
110
104
|
|
|
111
105
|
let genedbconn = Connection::open(genedb)?;
|
|
112
106
|
let genedb_result = genedbconn.prepare(&("select * from codingGenes"));
|
|
@@ -120,8 +114,7 @@ fn main() -> Result<()> {
|
|
|
120
114
|
//println!("coding_gene:{:?}", coding_gene);
|
|
121
115
|
for sample_gene in &sample_genes {
|
|
122
116
|
let code_gene: String = coding_gene.get(0).unwrap();
|
|
123
|
-
if filter_non_coding_genes == true && code_gene == *sample_gene
|
|
124
|
-
{
|
|
117
|
+
if filter_non_coding_genes == true && code_gene == *sample_gene {
|
|
125
118
|
sample_coding_genes.insert(code_gene);
|
|
126
119
|
} else if filter_non_coding_genes == false {
|
|
127
120
|
sample_coding_genes.insert(code_gene);
|
|
@@ -160,25 +153,19 @@ fn main() -> Result<()> {
|
|
|
160
153
|
let num_items_output = 100; // Number of top pathways to be specified in the output
|
|
161
154
|
|
|
162
155
|
let msigdbconn = Connection::open(msigdb)?;
|
|
163
|
-
let stmt_result = msigdbconn
|
|
164
|
-
&("select id from terms where parent_id='".to_owned()
|
|
165
|
-
+ &genesetgroup
|
|
166
|
-
+ "'"),
|
|
167
|
-
);
|
|
156
|
+
let stmt_result = msigdbconn
|
|
157
|
+
.prepare(&("select id from terms where parent_id='".to_owned() + &genesetgroup + "'"));
|
|
168
158
|
match stmt_result {
|
|
169
159
|
Ok(mut stmt) => {
|
|
170
160
|
#[allow(non_snake_case)]
|
|
171
|
-
let GO_iter =
|
|
172
|
-
stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
|
|
161
|
+
let GO_iter = stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
|
|
173
162
|
#[allow(non_snake_case)]
|
|
174
163
|
for GO_term in GO_iter {
|
|
175
164
|
match GO_term {
|
|
176
165
|
Ok(n) => {
|
|
177
166
|
//println!("GO term {:?}", n);
|
|
178
167
|
let sql_statement =
|
|
179
|
-
"select genes from term2genes where id='".to_owned()
|
|
180
|
-
+ &n.GO_id
|
|
181
|
-
+ &"'";
|
|
168
|
+
"select genes from term2genes where id='".to_owned() + &n.GO_id + &"'";
|
|
182
169
|
//println!("sql_statement:{}", sql_statement);
|
|
183
170
|
let mut gene_stmt = msigdbconn.prepare(&(sql_statement))?;
|
|
184
171
|
//println!("gene_stmt:{:?}", gene_stmt);
|
|
@@ -191,26 +178,20 @@ fn main() -> Result<()> {
|
|
|
191
178
|
match input_gene_json {
|
|
192
179
|
Ok(json_genes) => {
|
|
193
180
|
for json_iter in 0..json_genes.len() {
|
|
194
|
-
names.insert(
|
|
195
|
-
json_genes[json_iter]["symbol"]
|
|
196
|
-
.to_string(),
|
|
197
|
-
);
|
|
181
|
+
names.insert(json_genes[json_iter]["symbol"].to_string());
|
|
198
182
|
}
|
|
199
183
|
}
|
|
200
184
|
Err(_) => {
|
|
201
|
-
panic!(
|
|
202
|
-
"Symbol, ensg, enstCanonical structure is missing!"
|
|
203
|
-
)
|
|
185
|
+
panic!("Symbol, ensg, enstCanonical structure is missing!")
|
|
204
186
|
}
|
|
205
187
|
}
|
|
206
188
|
}
|
|
207
189
|
let gene_set_size = names.len();
|
|
208
|
-
let (p_value, matches, gene_set_hits) =
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
);
|
|
190
|
+
let (p_value, matches, gene_set_hits) = calculate_hypergeometric_p_value(
|
|
191
|
+
&sample_coding_genes,
|
|
192
|
+
num_background_genes,
|
|
193
|
+
names,
|
|
194
|
+
);
|
|
214
195
|
if matches >= 1.0 && p_value.is_nan() == false {
|
|
215
196
|
pathway_p_values.push(pathway_p_value {
|
|
216
197
|
pathway_name: n.GO_id,
|
|
@@ -234,11 +215,8 @@ fn main() -> Result<()> {
|
|
|
234
215
|
+ &",\"pathways\":"
|
|
235
216
|
+ &adjust_p_values(pathway_p_values, num_items_output)
|
|
236
217
|
+ &"}";
|
|
237
|
-
println!("
|
|
238
|
-
println!(
|
|
239
|
-
"Time for calculating gene overrepresentation:{:?}",
|
|
240
|
-
run_time.elapsed()
|
|
241
|
-
);
|
|
218
|
+
println!("{}", output_string);
|
|
219
|
+
//println!("Time for calculating gene overrepresentation:{:?}", run_time.elapsed());
|
|
242
220
|
}
|
|
243
221
|
Err(error) => println!("Incorrect json:{}", error),
|
|
244
222
|
}
|
|
@@ -248,10 +226,7 @@ fn main() -> Result<()> {
|
|
|
248
226
|
Ok(())
|
|
249
227
|
}
|
|
250
228
|
|
|
251
|
-
fn adjust_p_values(
|
|
252
|
-
mut original_p_values: Vec<pathway_p_value>,
|
|
253
|
-
mut num_items_output: usize,
|
|
254
|
-
) -> String {
|
|
229
|
+
fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>, mut num_items_output: usize) -> String {
|
|
255
230
|
// Sorting p-values in ascending order
|
|
256
231
|
original_p_values.as_mut_slice().sort_by(|a, b| {
|
|
257
232
|
(a.p_value_original)
|
|
@@ -266,8 +241,7 @@ fn adjust_p_values(
|
|
|
266
241
|
let i = original_p_values.len() - j - 1;
|
|
267
242
|
|
|
268
243
|
//println!("p_val:{}", p_val);
|
|
269
|
-
let mut adjusted_p_val: f64 =
|
|
270
|
-
original_p_values[i].p_value_original * (original_p_values.len() as f64 / rank); // adjusted p-value = original_p_value * (N/rank)
|
|
244
|
+
let mut adjusted_p_val: f64 = original_p_values[i].p_value_original * (original_p_values.len() as f64 / rank); // adjusted p-value = original_p_value * (N/rank)
|
|
271
245
|
if adjusted_p_val > 1.0 {
|
|
272
246
|
// p_value should NEVER be greater than 1
|
|
273
247
|
adjusted_p_val = 1.0;
|