@sjcrh/proteinpaint-rust 2.122.0 → 2.124.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.122.0",
2
+ "version": "2.124.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
@@ -39,5 +39,5 @@
39
39
  "devDependencies": {
40
40
  "tape": "^5.2.2"
41
41
  },
42
- "pp_release_tag": "v2.122.0"
42
+ "pp_release_tag": "v2.124.0"
43
43
  }
package/src/gdcmaf.rs CHANGED
@@ -10,16 +10,16 @@
10
10
  echo '{"host": "https://api.gdc.cancer.gov/data/","columns": ["Hugo_Symbol", "Entrez_Gene_Id", "Center", "NCBI_Build", "Chromosome", "Start_Position"], "fileIdLst": ["8b31d6d1-56f7-4aa8-b026-c64bafd531e7", "b429fcc1-2b59-4b4c-a472-fb27758f6249"]}'|./target/release/gdcmaf
11
11
  */
12
12
 
13
+ use flate2::Compression;
13
14
  use flate2::read::GzDecoder;
14
15
  use flate2::write::GzEncoder;
15
- use flate2::Compression;
16
- use serde_json::{Value};
17
16
  use futures::StreamExt;
18
- use std::io::{self,Read,Write};
17
+ use serde_json::Value;
18
+ use std::io::{self, Read, Write};
19
+ use std::sync::{Arc, Mutex};
19
20
  use std::time::Duration;
20
21
  use tokio::io::{AsyncReadExt, BufReader};
21
22
  use tokio::time::timeout;
22
- use std::sync::{Arc, Mutex};
23
23
 
24
24
  // Struct to hold error information
25
25
  #[derive(serde::Serialize)]
@@ -28,14 +28,14 @@ struct ErrorEntry {
28
28
  error: String,
29
29
  }
30
30
 
31
- fn select_maf_col(d:String,columns:&Vec<String>,url:&str) -> Result<(Vec<u8>,i32), (String, String)> {
31
+ fn select_maf_col(d: String, columns: &Vec<String>, url: &str) -> Result<(Vec<u8>, i32), (String, String)> {
32
32
  let mut maf_str: String = String::new();
33
33
  let mut header_indices: Vec<usize> = Vec::new();
34
34
  let lines = d.trim_end().split("\n");
35
35
  let mut mafrows = 0;
36
36
  for line in lines {
37
37
  if line.starts_with("#") {
38
- continue
38
+ continue;
39
39
  } else if line.contains("Hugo_Symbol") {
40
40
  let header: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
41
41
  for col in columns {
@@ -48,7 +48,7 @@ fn select_maf_col(d:String,columns:&Vec<String>,url:&str) -> Result<(Vec<u8>,i32
48
48
  return Err((url.to_string(), error_msg));
49
49
  }
50
50
  }
51
- };
51
+ }
52
52
  if header_indices.is_empty() {
53
53
  return Err((url.to_string(), "No matching columns found".to_string()));
54
54
  }
@@ -57,19 +57,17 @@ fn select_maf_col(d:String,columns:&Vec<String>,url:&str) -> Result<(Vec<u8>,i32
57
57
  let mut maf_out_lst: Vec<String> = Vec::new();
58
58
  for x in header_indices.iter() {
59
59
  maf_out_lst.push(maf_cont_lst[*x].to_string());
60
- };
60
+ }
61
61
  maf_str.push_str(maf_out_lst.join("\t").as_str());
62
62
  maf_str.push_str("\n");
63
63
  mafrows += 1;
64
64
  }
65
- };
66
- Ok((maf_str.as_bytes().to_vec(),mafrows))
65
+ }
66
+ Ok((maf_str.as_bytes().to_vec(), mafrows))
67
67
  }
68
68
 
69
-
70
-
71
69
  #[tokio::main]
72
- async fn main() -> Result<(),Box<dyn std::error::Error>> {
70
+ async fn main() -> Result<(), Box<dyn std::error::Error>> {
73
71
  // Accepting the piped input json from jodejs and assign to the variable
74
72
  // host: GDC host
75
73
  // url: urls to download single maf files
@@ -84,23 +82,21 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
84
82
  })
85
83
  .await;
86
84
  // Handle the result of the timeout operation
87
- let file_id_lst_js: Value = match result {
88
- Ok(Ok(buffer)) => {
89
- match serde_json::from_str(&buffer) {
90
- Ok(js) => js,
91
- Err(e) => {
92
- let stdin_error = ErrorEntry {
93
- url: String::new(),
94
- error: format!("JSON parsing error: {}", e),
95
- };
96
- writeln!(io::stderr(), "{}", serde_json::to_string(&stdin_error).unwrap()).unwrap();
97
- return Err(Box::new(std::io::Error::new(
98
- std::io::ErrorKind::InvalidInput,
99
- "JSON parsing error!",
100
- )) as Box<dyn std::error::Error>);
101
- }
85
+ let file_id_lst_js: Value = match result {
86
+ Ok(Ok(buffer)) => match serde_json::from_str(&buffer) {
87
+ Ok(js) => js,
88
+ Err(e) => {
89
+ let stdin_error = ErrorEntry {
90
+ url: String::new(),
91
+ error: format!("JSON parsing error: {}", e),
92
+ };
93
+ writeln!(io::stderr(), "{}", serde_json::to_string(&stdin_error).unwrap()).unwrap();
94
+ return Err(Box::new(std::io::Error::new(
95
+ std::io::ErrorKind::InvalidInput,
96
+ "JSON parsing error!",
97
+ )) as Box<dyn std::error::Error>);
102
98
  }
103
- }
99
+ },
104
100
  Ok(Err(_e)) => {
105
101
  let stdin_error = ErrorEntry {
106
102
  url: String::new(),
@@ -128,22 +124,30 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
128
124
  };
129
125
 
130
126
  // reading the input from PP
131
- let host = file_id_lst_js.get("host").expect("Host was not provided").as_str().expect("Host is not a string");
127
+ let host = file_id_lst_js
128
+ .get("host")
129
+ .expect("Host was not provided")
130
+ .as_str()
131
+ .expect("Host is not a string");
132
132
  let mut url: Vec<String> = Vec::new();
133
- let file_id_lst = file_id_lst_js.get("fileIdLst").expect("File ID list is missed!").as_array().expect("File ID list is not an array");
133
+ let file_id_lst = file_id_lst_js
134
+ .get("fileIdLst")
135
+ .expect("File ID list is missed!")
136
+ .as_array()
137
+ .expect("File ID list is not an array");
134
138
  for v in file_id_lst {
135
139
  //url.push(Path::new(&host).join(&v.as_str().unwrap()).display().to_string());
136
- url.push(format!("{}/{}",host.trim_end_matches('/'), v.as_str().unwrap()));
137
- };
140
+ url.push(format!("{}/{}", host.trim_end_matches('/'), v.as_str().unwrap()));
141
+ }
138
142
 
139
143
  // read columns as array from input json and convert data type from Vec<Value> to Vec<String>
140
- let maf_col:Vec<String>;
144
+ let maf_col: Vec<String>;
141
145
  if let Some(maf_col_value) = file_id_lst_js.get("columns") {
142
146
  //convert Vec<Value> to Vec<String>
143
147
  if let Some(maf_col_array) = maf_col_value.as_array() {
144
148
  maf_col = maf_col_array
145
149
  .iter()
146
- .map(|v| v.to_string().replace("\"",""))
150
+ .map(|v| v.to_string().replace("\"", ""))
147
151
  .collect::<Vec<String>>();
148
152
  } else {
149
153
  let column_error = ErrorEntry {
@@ -165,62 +169,58 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
165
169
  let column_error_js = serde_json::to_string(&column_error).unwrap();
166
170
  writeln!(io::stderr(), "{}", column_error_js).expect("Failed to output stderr!");
167
171
  return Err(Box::new(std::io::Error::new(
168
- std::io::ErrorKind::InvalidInput,
169
- "Columns was not selected",
172
+ std::io::ErrorKind::InvalidInput,
173
+ "Columns was not selected",
170
174
  )) as Box<dyn std::error::Error>);
171
175
  };
172
-
176
+
173
177
  //downloading maf files parallelly and merge them into single maf file
174
- let download_futures = futures::stream::iter(
175
- url.into_iter().map(|url|{
176
- async move {
177
- let client = reqwest::Client::builder()
178
- .timeout(Duration::from_secs(60)) // 60-second timeout per request
179
- .connect_timeout(Duration::from_secs(15))
180
- .build()
181
- .map_err(|_e| {
182
- let client_error = ErrorEntry{
183
- url: url.clone(),
184
- error: "Client build error".to_string(),
185
- };
186
- let client_error_js = serde_json::to_string(&client_error).unwrap();
187
- writeln!(io::stderr(), "{}", client_error_js).expect("Failed to build reqwest client!");
188
- });
189
- match client.unwrap().get(&url).send().await {
190
- Ok(resp) if resp.status().is_success() => {
191
- match resp.bytes().await {
192
- Ok(content) => {
193
- let mut decoder = GzDecoder::new(&content[..]);
194
- let mut decompressed_content = Vec::new();
195
- match decoder.read_to_end(&mut decompressed_content) {
196
- Ok(_) => {
197
- let text = String::from_utf8_lossy(&decompressed_content).to_string();
198
- return Ok((url.clone(),text))
199
- }
200
- Err(e) => {
201
- let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
202
- Err((url.clone(), error_msg))
203
- }
204
- }
178
+ let download_futures = futures::stream::iter(url.into_iter().map(|url| {
179
+ async move {
180
+ let client = reqwest::Client::builder()
181
+ .timeout(Duration::from_secs(60)) // 60-second timeout per request
182
+ .connect_timeout(Duration::from_secs(15))
183
+ .build()
184
+ .map_err(|_e| {
185
+ let client_error = ErrorEntry {
186
+ url: url.clone(),
187
+ error: "Client build error".to_string(),
188
+ };
189
+ let client_error_js = serde_json::to_string(&client_error).unwrap();
190
+ writeln!(io::stderr(), "{}", client_error_js).expect("Failed to build reqwest client!");
191
+ });
192
+ match client.unwrap().get(&url).send().await {
193
+ Ok(resp) if resp.status().is_success() => match resp.bytes().await {
194
+ Ok(content) => {
195
+ let mut decoder = GzDecoder::new(&content[..]);
196
+ let mut decompressed_content = Vec::new();
197
+ match decoder.read_to_end(&mut decompressed_content) {
198
+ Ok(_) => {
199
+ let text = String::from_utf8_lossy(&decompressed_content).to_string();
200
+ return Ok((url.clone(), text));
205
201
  }
206
202
  Err(e) => {
207
- let error_msg = format!("Failed to decompress downloaded maf file: {}", e);
203
+ let error_msg = format!("Failed to decompress downloaded MAF file: {}", e);
208
204
  Err((url.clone(), error_msg))
209
205
  }
210
206
  }
211
207
  }
212
- Ok(resp) => {
213
- let error_msg = format!("HTTP error: {}", resp.status());
214
- Err((url.clone(), error_msg))
215
- }
216
208
  Err(e) => {
217
- let error_msg = format!("Server request failed: {}", e);
209
+ let error_msg = format!("Failed to decompress downloaded MAF file: {}", e);
218
210
  Err((url.clone(), error_msg))
219
211
  }
212
+ },
213
+ Ok(resp) => {
214
+ let error_msg = format!("HTTP error: {}", resp.status());
215
+ Err((url.clone(), error_msg))
216
+ }
217
+ Err(e) => {
218
+ let error_msg = format!("Server request failed: {}", e);
219
+ Err((url.clone(), error_msg))
220
220
  }
221
221
  }
222
- })
223
- );
222
+ }
223
+ }));
224
224
 
225
225
  // binary output
226
226
  let encoder = Arc::new(Mutex::new(GzEncoder::new(io::stdout(), Compression::default())));
@@ -228,57 +228,57 @@ async fn main() -> Result<(),Box<dyn std::error::Error>> {
228
228
  // Write the header
229
229
  {
230
230
  let mut encoder_guard = encoder.lock().unwrap(); // Lock the Mutex to get access to the inner GzEncoder
231
- encoder_guard.write_all(&maf_col.join("\t").as_bytes().to_vec()).expect("Failed to write header");
231
+ encoder_guard
232
+ .write_all(&maf_col.join("\t").as_bytes().to_vec())
233
+ .expect("Failed to write header");
232
234
  encoder_guard.write_all(b"\n").expect("Failed to write newline");
233
235
  }
234
-
235
- download_futures.buffer_unordered(20).for_each( |result| {
236
- let encoder = Arc::clone(&encoder); // Clone the Arc for each task
237
- let maf_col_cp = maf_col.clone();
238
- async move {
239
- match result {
240
- Ok((url, content)) => {
241
- match select_maf_col(content, &maf_col_cp, &url) {
242
- Ok((maf_bit,mafrows)) => {
236
+
237
+ download_futures
238
+ .buffer_unordered(20)
239
+ .for_each(|result| {
240
+ let encoder = Arc::clone(&encoder); // Clone the Arc for each task
241
+ let maf_col_cp = maf_col.clone();
242
+ async move {
243
+ match result {
244
+ Ok((url, content)) => match select_maf_col(content, &maf_col_cp, &url) {
245
+ Ok((maf_bit, mafrows)) => {
243
246
  if mafrows > 0 {
244
247
  let mut encoder_guard = encoder.lock().unwrap();
245
248
  encoder_guard.write_all(&maf_bit).expect("Failed to write file");
246
249
  } else {
247
250
  let error = ErrorEntry {
248
251
  url: url.clone(),
249
- error: "Empty maf file".to_string(),
252
+ error: "Empty MAF file".to_string(),
250
253
  };
251
254
  let error_js = serde_json::to_string(&error).unwrap();
252
255
  writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
253
256
  }
254
257
  }
255
- Err((url,error)) => {
256
- let error = ErrorEntry {
257
- url,
258
- error,
259
- };
258
+ Err((url, error)) => {
259
+ let error = ErrorEntry { url, error };
260
260
  let error_js = serde_json::to_string(&error).unwrap();
261
261
  writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
262
262
  }
263
+ },
264
+ Err((url, error)) => {
265
+ let error = ErrorEntry { url, error };
266
+ let error_js = serde_json::to_string(&error).unwrap();
267
+ writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
263
268
  }
264
- }
265
- Err((url, error)) => {
266
- let error = ErrorEntry {
267
- url,
268
- error,
269
- };
270
- let error_js = serde_json::to_string(&error).unwrap();
271
- writeln!(io::stderr(), "{}", error_js).expect("Failed to output stderr!");
272
- }
273
- };
274
- }
275
- }).await;
276
-
269
+ };
270
+ }
271
+ })
272
+ .await;
273
+
277
274
  // Finalize output
278
275
 
279
276
  // Replace the value inside the Mutex with a dummy value (e.g., None)
280
277
  let mut encoder_guard = encoder.lock().unwrap();
281
- let encoder = std::mem::replace(&mut *encoder_guard, GzEncoder::new(io::stdout(), Compression::default()));
278
+ let encoder = std::mem::replace(
279
+ &mut *encoder_guard,
280
+ GzEncoder::new(io::stdout(), Compression::default()),
281
+ );
282
282
  // Finalize the encoder
283
283
  encoder.finish().expect("Maf file output error!");
284
284
 
package/src/genesetORA.rs CHANGED
@@ -9,7 +9,7 @@ use serde_json;
9
9
  use std::cmp::Ordering;
10
10
  use std::collections::HashSet;
11
11
  use std::io;
12
- use std::time::Instant;
12
+ //use std::time::Instant;
13
13
 
14
14
  #[allow(non_camel_case_types)]
15
15
  #[allow(non_snake_case)]
@@ -37,10 +37,7 @@ fn calculate_hypergeometric_p_value(
37
37
  ) -> (f64, f64, String) {
38
38
  let mut gene_set_hits: String = "".to_string();
39
39
 
40
- let gene_intersections: HashSet<String> = genes_in_pathway
41
- .intersection(sample_genes)
42
- .cloned()
43
- .collect();
40
+ let gene_intersections: HashSet<String> = genes_in_pathway.intersection(sample_genes).cloned().collect();
44
41
  for gene in &gene_intersections {
45
42
  gene_set_hits += &(gene.to_string() + &",");
46
43
  }
@@ -78,7 +75,7 @@ fn main() -> Result<()> {
78
75
  let input_json = json::parse(&input);
79
76
  match input_json {
80
77
  Ok(json_string) => {
81
- let run_time = Instant::now();
78
+ //let run_time = Instant::now();
82
79
  let msigdb_input: &JsonValue = &json_string["msigdb"];
83
80
  let msigdb;
84
81
  match msigdb_input.as_str() {
@@ -92,8 +89,7 @@ fn main() -> Result<()> {
92
89
  None => panic!("genesetgroup is missing"),
93
90
  }
94
91
  let sample_genes_input: &JsonValue = &json_string["sample_genes"];
95
- let sample_genes: Vec<&str> =
96
- sample_genes_input.as_str().unwrap().split(",").collect();
92
+ let sample_genes: Vec<&str> = sample_genes_input.as_str().unwrap().split(",").collect();
97
93
  let mut pathway_p_values: Vec<pathway_p_value> = Vec::with_capacity(10000);
98
94
 
99
95
  let genedb_input: &JsonValue = &json_string["genedb"];
@@ -103,10 +99,8 @@ fn main() -> Result<()> {
103
99
  None => panic!("genedb file path is missing"),
104
100
  }
105
101
 
106
- let filter_non_coding_genes_input: &JsonValue =
107
- &json_string["filter_non_coding_genes"];
108
- let filter_non_coding_genes: bool =
109
- filter_non_coding_genes_input.as_bool().unwrap();
102
+ let filter_non_coding_genes_input: &JsonValue = &json_string["filter_non_coding_genes"];
103
+ let filter_non_coding_genes: bool = filter_non_coding_genes_input.as_bool().unwrap();
110
104
 
111
105
  let genedbconn = Connection::open(genedb)?;
112
106
  let genedb_result = genedbconn.prepare(&("select * from codingGenes"));
@@ -120,8 +114,7 @@ fn main() -> Result<()> {
120
114
  //println!("coding_gene:{:?}", coding_gene);
121
115
  for sample_gene in &sample_genes {
122
116
  let code_gene: String = coding_gene.get(0).unwrap();
123
- if filter_non_coding_genes == true && code_gene == *sample_gene
124
- {
117
+ if filter_non_coding_genes == true && code_gene == *sample_gene {
125
118
  sample_coding_genes.insert(code_gene);
126
119
  } else if filter_non_coding_genes == false {
127
120
  sample_coding_genes.insert(code_gene);
@@ -160,25 +153,19 @@ fn main() -> Result<()> {
160
153
  let num_items_output = 100; // Number of top pathways to be specified in the output
161
154
 
162
155
  let msigdbconn = Connection::open(msigdb)?;
163
- let stmt_result = msigdbconn.prepare(
164
- &("select id from terms where parent_id='".to_owned()
165
- + &genesetgroup
166
- + "'"),
167
- );
156
+ let stmt_result = msigdbconn
157
+ .prepare(&("select id from terms where parent_id='".to_owned() + &genesetgroup + "'"));
168
158
  match stmt_result {
169
159
  Ok(mut stmt) => {
170
160
  #[allow(non_snake_case)]
171
- let GO_iter =
172
- stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
161
+ let GO_iter = stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
173
162
  #[allow(non_snake_case)]
174
163
  for GO_term in GO_iter {
175
164
  match GO_term {
176
165
  Ok(n) => {
177
166
  //println!("GO term {:?}", n);
178
167
  let sql_statement =
179
- "select genes from term2genes where id='".to_owned()
180
- + &n.GO_id
181
- + &"'";
168
+ "select genes from term2genes where id='".to_owned() + &n.GO_id + &"'";
182
169
  //println!("sql_statement:{}", sql_statement);
183
170
  let mut gene_stmt = msigdbconn.prepare(&(sql_statement))?;
184
171
  //println!("gene_stmt:{:?}", gene_stmt);
@@ -191,26 +178,20 @@ fn main() -> Result<()> {
191
178
  match input_gene_json {
192
179
  Ok(json_genes) => {
193
180
  for json_iter in 0..json_genes.len() {
194
- names.insert(
195
- json_genes[json_iter]["symbol"]
196
- .to_string(),
197
- );
181
+ names.insert(json_genes[json_iter]["symbol"].to_string());
198
182
  }
199
183
  }
200
184
  Err(_) => {
201
- panic!(
202
- "Symbol, ensg, enstCanonical structure is missing!"
203
- )
185
+ panic!("Symbol, ensg, enstCanonical structure is missing!")
204
186
  }
205
187
  }
206
188
  }
207
189
  let gene_set_size = names.len();
208
- let (p_value, matches, gene_set_hits) =
209
- calculate_hypergeometric_p_value(
210
- &sample_coding_genes,
211
- num_background_genes,
212
- names,
213
- );
190
+ let (p_value, matches, gene_set_hits) = calculate_hypergeometric_p_value(
191
+ &sample_coding_genes,
192
+ num_background_genes,
193
+ names,
194
+ );
214
195
  if matches >= 1.0 && p_value.is_nan() == false {
215
196
  pathway_p_values.push(pathway_p_value {
216
197
  pathway_name: n.GO_id,
@@ -234,11 +215,8 @@ fn main() -> Result<()> {
234
215
  + &",\"pathways\":"
235
216
  + &adjust_p_values(pathway_p_values, num_items_output)
236
217
  + &"}";
237
- println!("pathway_p_values:{}", output_string);
238
- println!(
239
- "Time for calculating gene overrepresentation:{:?}",
240
- run_time.elapsed()
241
- );
218
+ println!("{}", output_string);
219
+ //println!("Time for calculating gene overrepresentation:{:?}", run_time.elapsed());
242
220
  }
243
221
  Err(error) => println!("Incorrect json:{}", error),
244
222
  }
@@ -248,10 +226,7 @@ fn main() -> Result<()> {
248
226
  Ok(())
249
227
  }
250
228
 
251
- fn adjust_p_values(
252
- mut original_p_values: Vec<pathway_p_value>,
253
- mut num_items_output: usize,
254
- ) -> String {
229
+ fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>, mut num_items_output: usize) -> String {
255
230
  // Sorting p-values in ascending order
256
231
  original_p_values.as_mut_slice().sort_by(|a, b| {
257
232
  (a.p_value_original)
@@ -266,8 +241,7 @@ fn adjust_p_values(
266
241
  let i = original_p_values.len() - j - 1;
267
242
 
268
243
  //println!("p_val:{}", p_val);
269
- let mut adjusted_p_val: f64 =
270
- original_p_values[i].p_value_original * (original_p_values.len() as f64 / rank); // adjusted p-value = original_p_value * (N/rank)
244
+ let mut adjusted_p_val: f64 = original_p_values[i].p_value_original * (original_p_values.len() as f64 / rank); // adjusted p-value = original_p_value * (N/rank)
271
245
  if adjusted_p_val > 1.0 {
272
246
  // p_value should NEVER be greater than 1
273
247
  adjusted_p_val = 1.0;