@sjcrh/proteinpaint-rust 2.124.0 → 2.126.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -34,6 +34,7 @@ reqwest = "0.11"
34
34
  flate2 = "1"
35
35
  futures = "0.3"
36
36
  num_cpus = "1.16.0"
37
+ memchr = "2"
37
38
 
38
39
  [profile.release]
39
40
  lto = "fat"
@@ -100,3 +101,11 @@ path="src/readHDF5.rs"
100
101
  [[bin]]
101
102
  name="validateHDF5"
102
103
  path="src/validateHDF5.rs"
104
+
105
+ [[bin]]
106
+ name="gdcGRIN2"
107
+ path="src/gdcGRIN2.rs"
108
+
109
+ [[bin]]
110
+ name="cerno"
111
+ path="src/cerno.rs"
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.124.0",
2
+ "version": "2.126.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
@@ -39,5 +39,5 @@
39
39
  "devDependencies": {
40
40
  "tape": "^5.2.2"
41
41
  },
42
- "pp_release_tag": "v2.124.0"
42
+ "pp_release_tag": "v2.126.0"
43
43
  }
package/src/cerno.rs ADDED
@@ -0,0 +1,341 @@
1
+ // Syntax: cd .. && cargo build --release && time cat ~/sjpp/test.txt | target/release/cerno
2
+ #![allow(non_snake_case)]
3
+ use json::JsonValue;
4
+ use r_mathlib::chi_squared_cdf;
5
+ use rusqlite::{Connection, Result};
6
+ use serde::{Deserialize, Serialize};
7
+ use serde_json;
8
+ use std::cmp::Ordering;
9
+ use std::collections::HashSet;
10
+ use std::io;
11
+
12
+ #[allow(non_camel_case_types)]
13
+ #[allow(non_snake_case)]
14
+ #[derive(Debug)]
15
+ struct GO_pathway {
16
+ GO_id: String,
17
+ }
18
+
19
+ #[allow(non_camel_case_types)]
20
+ #[allow(non_snake_case)]
21
+ #[derive(Debug, Clone, PartialEq, PartialOrd)]
22
+ struct gene_order {
23
+ gene_name: String,
24
+ fold_change: f64,
25
+ rank: Option<usize>,
26
+ }
27
+
28
+ #[allow(non_camel_case_types)]
29
+ #[allow(non_snake_case)]
30
+ #[derive(Debug, Serialize, Deserialize)]
31
+ //#[allow(dead_code)]
32
+ struct pathway_p_value {
33
+ pathway_name: String,
34
+ p_value_original: f64,
35
+ p_value_adjusted: Option<f64>,
36
+ gene_set_hits: String,
37
+ auc: f64,
38
+ es: f64,
39
+ gene_set_size: usize,
40
+ }
41
+
42
+ #[allow(non_camel_case_types)]
43
+ #[allow(non_snake_case)]
44
+ #[derive(Debug, Serialize, Deserialize)]
45
+ //#[allow(dead_code)]
46
+ struct output_struct {
47
+ pval: f64,
48
+ fdr: f64,
49
+ leading_edge: String,
50
+ auc: f64,
51
+ es: f64,
52
+ geneset_size: usize,
53
+ }
54
+
55
+ fn main() -> Result<()> {
56
+ let mut input = String::new();
57
+ match io::stdin().read_line(&mut input) {
58
+ // Accepting the piped input from nodejs (or command line from testing)
59
+ Ok(_n) => {
60
+ let input_json = json::parse(&input);
61
+ match input_json {
62
+ Ok(json_string) => {
63
+ let msigdb_input: &JsonValue = &json_string["db"];
64
+ let msigdb;
65
+ match msigdb_input.as_str() {
66
+ Some(db_string) => msigdb = db_string.to_string(),
67
+ None => panic!("msigdb file path is missing"),
68
+ }
69
+ let genesetgroup;
70
+ let genesetgroup_input: &JsonValue = &json_string["geneset_group"];
71
+ match genesetgroup_input.as_str() {
72
+ Some(genesetgroup_string) => genesetgroup = genesetgroup_string.to_string(),
73
+ None => panic!("genesetgroup is missing"),
74
+ }
75
+ let sample_genes_input: &JsonValue = &json_string["genes"];
76
+ let mut sample_genes = Vec::<&str>::new();
77
+ for iter in 0..sample_genes_input.len() {
78
+ let item = sample_genes_input[iter].as_str().unwrap();
79
+ sample_genes.push(item);
80
+ }
81
+ //println!("sample_genes:{:?}", sample_genes);
82
+
83
+ let fold_change_input: &JsonValue = &json_string["fold_change"];
84
+ let mut fold_change_f64 = Vec::<f64>::new();
85
+ for iter in 0..fold_change_input.len() {
86
+ let item = fold_change_input[iter].as_f64().unwrap();
87
+ fold_change_f64.push(item);
88
+ }
89
+
90
+ if sample_genes.len() == 0 {
91
+ panic!("No sample genes provided");
92
+ }
93
+
94
+ if sample_genes.len() != fold_change_f64.len() {
95
+ panic!("Length of genes array and fold change array are not equal");
96
+ }
97
+
98
+ let mut genes_vector: Vec<gene_order> = Vec::with_capacity(sample_genes.len());
99
+ for i in 0..sample_genes.len() {
100
+ let item: gene_order = gene_order {
101
+ gene_name: sample_genes[i].to_string(),
102
+ fold_change: fold_change_f64[i],
103
+ rank: None, // Will be calculated later
104
+ };
105
+ genes_vector.push(item)
106
+ }
107
+ let mut pathway_p_values: Vec<pathway_p_value> = Vec::with_capacity(10000);
108
+
109
+ let genedb_input: &JsonValue = &json_string["genedb"];
110
+ let genedb;
111
+ match genedb_input.as_str() {
112
+ Some(gene_db_string) => genedb = gene_db_string.to_string(),
113
+ None => panic!("genedb file path is missing"),
114
+ }
115
+
116
+ let filter_non_coding_genes_input: &JsonValue = &json_string["filter_non_coding_genes"];
117
+ let filter_non_coding_genes: bool = filter_non_coding_genes_input.as_bool().unwrap();
118
+
119
+ let genedbconn = Connection::open(genedb)?;
120
+ let genedb_result = genedbconn.prepare(&("select * from codingGenes"));
121
+ let mut sample_coding_genes: Vec<gene_order> = Vec::with_capacity(24000);
122
+ match genedb_result {
123
+ Ok(mut x) => {
124
+ let mut genes = x.query([])?;
125
+ while let Some(coding_gene) = genes.next()? {
126
+ //println!("coding_gene:{:?}", coding_gene);
127
+ for sample_gene in &genes_vector {
128
+ let code_gene: String = coding_gene.get(0).unwrap();
129
+ if filter_non_coding_genes == true && code_gene == *sample_gene.gene_name {
130
+ sample_coding_genes.push(sample_gene.clone());
131
+ } else if filter_non_coding_genes == false {
132
+ sample_coding_genes.push(sample_gene.clone());
133
+ }
134
+ }
135
+ }
136
+ }
137
+ Err(_) => {}
138
+ }
139
+
140
+ if sample_coding_genes.len() == 0 {
141
+ panic!("All query genes are non-coding");
142
+ }
143
+
144
+ // Sort sample_coding_gene in descending order
145
+ sample_coding_genes
146
+ .as_mut_slice()
147
+ .sort_by(|a, b| (b.fold_change).partial_cmp(&a.fold_change).unwrap_or(Ordering::Equal));
148
+
149
+ // Assign ranks to each gene
150
+ for i in 0..sample_coding_genes.len() {
151
+ sample_coding_genes[i].rank = Some(i)
152
+ }
153
+
154
+ //println!("sample_genes:{:?}", sample_genes);
155
+ //println!("background_genes:{:?}", background_genes);
156
+
157
+ let msigdbconn = Connection::open(msigdb)?;
158
+ let stmt_result = msigdbconn
159
+ .prepare(&("select id from terms where parent_id='".to_owned() + &genesetgroup + "'"));
160
+ match stmt_result {
161
+ Ok(mut stmt) => {
162
+ #[allow(non_snake_case)]
163
+ let GO_iter = stmt.query_map([], |row| Ok(GO_pathway { GO_id: row.get(0)? }))?;
164
+ #[allow(non_snake_case)]
165
+ for GO_term in GO_iter {
166
+ match GO_term {
167
+ Ok(n) => {
168
+ //println!("GO term {:?}", n);
169
+ let sql_statement =
170
+ "select genes from term2genes where id='".to_owned() + &n.GO_id + &"'";
171
+ //println!("sql_statement:{}", sql_statement);
172
+ let mut gene_stmt = msigdbconn.prepare(&(sql_statement))?;
173
+ //println!("gene_stmt:{:?}", gene_stmt);
174
+
175
+ let mut rows = gene_stmt.query([])?;
176
+ let mut names = HashSet::<String>::new();
177
+ while let Some(row) = rows.next()? {
178
+ let a: String = row.get(0)?;
179
+ let input_gene_json = json::parse(&a);
180
+ match input_gene_json {
181
+ Ok(json_genes) => {
182
+ for json_iter in 0..json_genes.len() {
183
+ names.insert(json_genes[json_iter]["symbol"].to_string());
184
+ }
185
+ }
186
+ Err(_) => {
187
+ panic!("Symbol, ensg, enstCanonical structure is missing!")
188
+ }
189
+ }
190
+ }
191
+ let gene_set_size = names.len();
192
+ let (p_value, auc, es, matches, gene_set_hits) =
193
+ cerno(&sample_coding_genes, names);
194
+
195
+ if matches >= 1.0
196
+ && p_value.is_nan() == false
197
+ && es.is_nan() == false
198
+ && es != f64::INFINITY
199
+ && auc != f64::INFINITY
200
+ && auc.is_nan() == false
201
+ {
202
+ pathway_p_values.push(pathway_p_value {
203
+ pathway_name: n.GO_id,
204
+ p_value_original: p_value,
205
+ p_value_adjusted: None,
206
+ auc: auc,
207
+ es: es,
208
+ gene_set_hits: gene_set_hits,
209
+ gene_set_size: gene_set_size,
210
+ })
211
+ }
212
+ }
213
+ Err(_) => {
214
+ println!("GO term not found!")
215
+ }
216
+ }
217
+ }
218
+ }
219
+ Err(_) => panic!("sqlite database file not found"),
220
+ }
221
+ let output_string =
222
+ "result: {".to_string() + &"\"data\":" + &adjust_p_values(pathway_p_values) + &"}";
223
+ println!("{}", output_string);
224
+ }
225
+ Err(error) => println!("Incorrect json:{}", error),
226
+ }
227
+ }
228
+ Err(error) => println!("Piping error: {}", error),
229
+ }
230
+ Ok(())
231
+ }
232
+
233
+ fn cerno(sample_coding_genes: &Vec<gene_order>, genes_in_pathway: HashSet<String>) -> (f64, f64, f64, f64, String) {
234
+ // Filter the sample_coding_genes vector to only include those whose gene_names are in the HashSet genes_in_pathway
235
+ let gene_intersections: Vec<&gene_order> = sample_coding_genes
236
+ .iter()
237
+ .filter(|sample_coding_genes| genes_in_pathway.contains(&sample_coding_genes.gene_name)) // Check if name is in the HashSet genes_in_pathway
238
+ .collect(); // Collect the results into a new vector
239
+
240
+ let N1 = gene_intersections.len() as f64;
241
+ let N = sample_coding_genes.len() as f64;
242
+ let mut gene_set_hits: String = "".to_string();
243
+ for gene in &gene_intersections {
244
+ gene_set_hits += &(gene.gene_name.to_string() + &",");
245
+ }
246
+ if gene_intersections.len() > 0 {
247
+ // Remove the last "," in string
248
+ gene_set_hits.pop();
249
+ }
250
+
251
+ let ranks: Vec<usize> = gene_intersections // x <- l %in% mset$gs2gv[[m]] ; ranks <- c(1:N)[x]
252
+ .iter()
253
+ .map(|x| x.rank.unwrap())
254
+ .collect::<Vec<usize>>();
255
+
256
+ let cerno: f64 = ranks // -2 * sum( log(ranks/N) )
257
+ .iter()
258
+ .map(|x| ((*x as f64) / N).ln())
259
+ .collect::<Vec<f64>>()
260
+ .iter()
261
+ .sum::<f64>()
262
+ * (-2.0);
263
+
264
+ let cES: f64 = cerno / (2.0 * (N1 as f64)); // cES <- cerno/(2*N1)
265
+ let N2 = N - N1; // N2 = N - N1
266
+ let R1 = ranks.iter().sum::<usize>() as f64; // R1 <- sum(ranks)
267
+ let U = N1 * N2 + N1 * (N1 + 1.0) / 2.0 - R1; // U <- N1*N2+N1*(N1+1)/2-R1
268
+ let AUC = U / (N1 * N2); // AUC <- U/(N1*N2)
269
+ let p_value = chi_squared_cdf(cerno, 2.0 * N1, false, false); // pchisq(ret$cerno, 2*N1, lower.tail=FALSE)
270
+ (p_value, AUC, cES, N1, gene_set_hits)
271
+ }
272
+
273
+ fn adjust_p_values(mut original_p_values: Vec<pathway_p_value>) -> String {
274
+ // Sorting p-values in ascending order
275
+ original_p_values.as_mut_slice().sort_by(|a, b| {
276
+ (a.p_value_original)
277
+ .partial_cmp(&b.p_value_original)
278
+ .unwrap_or(Ordering::Equal)
279
+ });
280
+
281
+ let mut adjusted_p_values: Vec<pathway_p_value> = Vec::with_capacity(original_p_values.len());
282
+ let mut old_p_value: f64 = 0.0;
283
+ let mut rank: f64 = original_p_values.len() as f64;
284
+ for j in 0..original_p_values.len() {
285
+ let i = original_p_values.len() - j - 1;
286
+
287
+ //println!("p_val:{}", p_val);
288
+ let mut adjusted_p_val: f64 = original_p_values[i].p_value_original * (original_p_values.len() as f64 / rank); // adjusted p-value = original_p_value * (N/rank)
289
+ if adjusted_p_val > 1.0 {
290
+ // p_value should NEVER be greater than 1
291
+ adjusted_p_val = 1.0;
292
+ }
293
+ //println!("Original p_value:{}", original_p_values[i].p_value);
294
+ //println!("Raw adjusted p_value:{}", adjusted_p_value);
295
+ if i != original_p_values.len() - 1 {
296
+ if adjusted_p_val > old_p_value {
297
+ adjusted_p_val = old_p_value;
298
+ }
299
+ }
300
+ old_p_value = adjusted_p_val;
301
+ //println!("adjusted_p_value:{}", adjusted_p_val);
302
+ rank -= 1.0;
303
+
304
+ adjusted_p_values.push(pathway_p_value {
305
+ pathway_name: original_p_values[i].pathway_name.clone(),
306
+ p_value_original: original_p_values[i].p_value_original,
307
+ p_value_adjusted: Some(adjusted_p_val),
308
+ auc: original_p_values[i].auc,
309
+ es: original_p_values[i].es,
310
+ gene_set_hits: original_p_values[i].gene_set_hits.clone(),
311
+ gene_set_size: original_p_values[i].gene_set_size,
312
+ });
313
+ }
314
+ adjusted_p_values.as_mut_slice().sort_by(|a, b| {
315
+ (a.p_value_adjusted.unwrap())
316
+ .partial_cmp(&b.p_value_adjusted.unwrap())
317
+ .unwrap_or(Ordering::Equal)
318
+ });
319
+
320
+ let mut output_string = "{".to_string();
321
+ for i in 0..adjusted_p_values.len() {
322
+ let item = output_struct {
323
+ pval: adjusted_p_values[i].p_value_original,
324
+ fdr: adjusted_p_values[i].p_value_adjusted.unwrap(),
325
+ leading_edge: adjusted_p_values[i].gene_set_hits.clone(),
326
+ geneset_size: adjusted_p_values[i].gene_set_size,
327
+ es: adjusted_p_values[i].es,
328
+ auc: adjusted_p_values[i].auc,
329
+ };
330
+ output_string += &format!(
331
+ "\"{}\":{}",
332
+ adjusted_p_values[i].pathway_name.clone(),
333
+ serde_json::to_string(&item).unwrap()
334
+ );
335
+ if i < adjusted_p_values.len() - 1 {
336
+ output_string += &",".to_string();
337
+ }
338
+ }
339
+ output_string += &"}".to_string();
340
+ output_string
341
+ }
@@ -0,0 +1,295 @@
1
+ use flate2::read::GzDecoder;
2
+ use futures::StreamExt;
3
+ use memchr::memchr;
4
+ use serde::Deserialize;
5
+ use serde_json;
6
+ use std::collections::HashMap;
7
+ use std::io::{self, Read, Write};
8
+ use std::time::Duration;
9
+ use tokio::io::{AsyncReadExt, BufReader};
10
+ use tokio::time::timeout;
11
+
12
+ // Struct to hold error information
13
+ #[derive(serde::Serialize)]
14
+ struct ErrorEntry {
15
+ case: String,
16
+ error: String,
17
+ }
18
+
19
+ // Define the structure for datadd
20
+ #[derive(Deserialize, Debug)]
21
+ struct DataType {
22
+ cnv: Option<String>,
23
+ maf: Option<String>,
24
+ }
25
+
26
+ // Function to parse TSV content
27
+ // CNV:
28
+ // Select cnv columns ["Chromosome","Start","End","Segment_Mean"]
29
+ // Segment_Mean >= 0.2 => gain; Segment_Mean <= -0.2 => loss
30
+ // MAF:
31
+ // Select MAF columns ["Chromosome","Start_Position","End_Position"]
32
+ fn parse_content(content: &str, case_id: &str, data_type: &str) -> Result<String, (String, String, String)> {
33
+ let lines = content.lines();
34
+ //let mut parsed_data = Vec::new();
35
+ let mut parsed_data: String = String::new();
36
+ let mut columns_indices: Vec<usize> = Vec::new();
37
+ let mut header_mk: &str = "";
38
+ let mut columns = Vec::new(); // columns selected from GDC file
39
+ if data_type == "cnv" {
40
+ header_mk = "GDC_Aliquot_ID";
41
+ columns = vec!["Chromosome", "Start", "End", "Segment_Mean"]
42
+ } else if data_type == "maf" {
43
+ header_mk = "Hugo_Symbol";
44
+ columns = vec!["Chromosome", "Start_Position", "End_Position"]
45
+ };
46
+ let mut header: Vec<String> = Vec::new(); // GDC file header
47
+ for line in lines {
48
+ if line.starts_with("#") {
49
+ continue;
50
+ } else if line.contains(&header_mk) {
51
+ // header line
52
+ header = line.split("\t").map(|s| s.to_string()).collect();
53
+ for col in &columns {
54
+ match header.iter().position(|x| x == col) {
55
+ Some(index) => {
56
+ columns_indices.push(index);
57
+ }
58
+ None => {
59
+ let error_msg = format!("Column {} was not found", col);
60
+ return Err((case_id.to_string(), data_type.to_string(), error_msg));
61
+ }
62
+ }
63
+ }
64
+ } else {
65
+ let mut keep_ck: bool = true;
66
+ let cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
67
+ let mut out_lst: Vec<String> = Vec::new();
68
+ // add sample ID first
69
+ out_lst.push(case_id.to_string());
70
+ for x in columns_indices.iter() {
71
+ let mut element = cont_lst[*x].to_string();
72
+ if data_type == "cnv" && &header[*x] == "Segment_Mean" {
73
+ // convert to f32 (segment_mean)
74
+ let seg_mean = match element.parse::<f32>() {
75
+ Ok(val) => val,
76
+ Err(_e) => {
77
+ let error_msg = "Segment_Mean in cnv file is not float".to_string();
78
+ return Err((case_id.to_string(), data_type.to_string(), error_msg));
79
+ }
80
+ };
81
+ if seg_mean >= 0.2 {
82
+ element = "gain".to_string();
83
+ } else if seg_mean <= -0.2 {
84
+ element = "loss".to_string();
85
+ } else {
86
+ keep_ck = false;
87
+ }
88
+ }
89
+ out_lst.push(element);
90
+ }
91
+ // add lsn.type to snv
92
+ if data_type == "maf" {
93
+ out_lst.push("mutation".to_string());
94
+ }
95
+ if keep_ck {
96
+ parsed_data.push_str(out_lst.join("\t").as_str());
97
+ parsed_data.push_str("\n");
98
+ }
99
+ }
100
+ }
101
+ if columns_indices.is_empty() {
102
+ return Err((
103
+ case_id.to_string(),
104
+ data_type.to_string(),
105
+ "No matching columns found. Problematic file!".to_string(),
106
+ ));
107
+ };
108
+ Ok(parsed_data)
109
+ }
110
+
111
+ // Function to download data
112
+ //async fn download_data(data4dl: HashMap<String,DataType>, host: &str) -> Vec<Result<(String, String), (String, String)>> {
113
+ async fn download_data(data4dl: HashMap<String, DataType>, host: &str) -> () {
114
+ // Generate URLs from data4dl, handling optional cnv and maf
115
+ let data_urls = data4dl
116
+ .into_iter()
117
+ .flat_map(|(case_id, data_types)| {
118
+ let mut urls = Vec::new();
119
+ if let Some(cnv_uuid) = &data_types.cnv {
120
+ urls.push((case_id.clone(), "cnv".to_string(), format!("{}{}", host, cnv_uuid)));
121
+ }
122
+ if let Some(maf_uuid) = &data_types.maf {
123
+ urls.push((case_id.clone(), "maf".to_string(), format!("{}{}", host, maf_uuid)));
124
+ }
125
+ urls
126
+ })
127
+ .collect::<Vec<_>>();
128
+ let download_futures = futures::stream::iter(data_urls.into_iter().map(|(case_id, data_type, url)| {
129
+ async move {
130
+ //let case_dt = format!("{}/{}",case_id,data_type).to_string();
131
+ // Build HTTP client with timeouts
132
+ let client = reqwest::Client::builder()
133
+ .timeout(Duration::from_secs(60)) // 60-second timeout per request
134
+ .connect_timeout(Duration::from_secs(30))
135
+ .build()
136
+ .map_err(|_e| "Client build error".to_string());
137
+ // Handle client creation result
138
+ match client {
139
+ Ok(client) => {
140
+ match client.get(&url).send().await {
141
+ Ok(resp) if resp.status().is_success() => {
142
+ match resp.bytes().await {
143
+ Ok(content) => {
144
+ // if data_type == "cnv" {
145
+ if !memchr(0x00, &content).is_some() {
146
+ // CNV files are plain text
147
+ let text = String::from_utf8_lossy(&content).to_string();
148
+ Ok((case_id.clone(), data_type.clone(), text))
149
+ } else {
150
+ let mut decoder = GzDecoder::new(&content[..]);
151
+ let mut decompressed_content = Vec::new();
152
+ match decoder.read_to_end(&mut decompressed_content) {
153
+ Ok(_) => {
154
+ let text = String::from_utf8_lossy(&decompressed_content).to_string();
155
+ Ok((case_id.clone(), data_type.clone(), text))
156
+ }
157
+ Err(e) => {
158
+ let error_msg = format!(
159
+ "Failed to decompress {} file for {}: {}",
160
+ data_type, case_id, e
161
+ );
162
+ Err((case_id.clone(), data_type.clone(), error_msg))
163
+ }
164
+ }
165
+ }
166
+ }
167
+ Err(e) => {
168
+ let error_msg =
169
+ format!("Failed to read bytes for {} file for {}: {}", data_type, case_id, e);
170
+ Err((case_id.clone(), data_type.clone(), error_msg))
171
+ }
172
+ }
173
+ }
174
+ Ok(resp) => {
175
+ let error_msg =
176
+ format!("HTTP error for {} file for {}: {}", data_type, case_id, resp.status());
177
+ Err((case_id.clone(), data_type.clone(), error_msg))
178
+ }
179
+ Err(e) => {
180
+ let error_msg =
181
+ format!("Server request failed for {} file for {}: {}", data_type, case_id, e);
182
+ Err((case_id.clone(), data_type.clone(), error_msg))
183
+ }
184
+ }
185
+ }
186
+ Err(_e) => {
187
+ let error_msg = "Client build error".to_string();
188
+ Err((case_id, data_type, error_msg))
189
+ }
190
+ }
191
+ }
192
+ }));
193
+
194
+ // Execute downloads concurrently and collect results
195
+ download_futures
196
+ .buffer_unordered(10)
197
+ .for_each(|result| async {
198
+ match result {
199
+ Ok((case_id, data_type, content)) => match parse_content(&content, &case_id, &data_type) {
200
+ Ok(parsed_data) => match serde_json::to_string(&parsed_data) {
201
+ Ok(json) => println!("{}", json),
202
+ Err(e) => {
203
+ let error = ErrorEntry {
204
+ case: format!("{}: {}", case_id, data_type),
205
+ error: format!("Failed to convert data to JSON {}", e),
206
+ };
207
+ let error_js = serde_json::to_string(&error).unwrap();
208
+ eprintln!("{}", error_js);
209
+ }
210
+ },
211
+ Err((cid, dtp, error)) => {
212
+ let error = ErrorEntry {
213
+ case: format!("{}: {}", cid, dtp),
214
+ error,
215
+ };
216
+ let error_js = serde_json::to_string(&error).unwrap();
217
+ eprintln!("{}", error_js);
218
+ }
219
+ },
220
+ Err((case_id, data_type, error)) => {
221
+ let error = ErrorEntry {
222
+ case: format!("{}: {}", case_id, data_type),
223
+ error,
224
+ };
225
+ let error_js = serde_json::to_string(&error).unwrap();
226
+ eprintln!("{}", error_js);
227
+ }
228
+ }
229
+ })
230
+ .await;
231
+ }
232
+
233
+ #[tokio::main]
234
+ async fn main() -> Result<(), Box<dyn std::error::Error>> {
235
+ const HOST: &str = "https://api.gdc.cancer.gov/data/";
236
+
237
+ // Accepting the piped input json from nodejs
238
+ let timeout_duration = Duration::from_secs(5); // Set a 5-second timeout
239
+
240
+ // Wrap the read operation in a timeout
241
+ let result = timeout(timeout_duration, async {
242
+ let mut buffer = String::new(); // Initialize an empty string to store input
243
+ let mut reader = BufReader::new(tokio::io::stdin()); // Create a buffered reader for stdin
244
+ reader.read_to_string(&mut buffer).await?; // Read a line asynchronously
245
+ Ok::<String, io::Error>(buffer) // Return the input as a Result
246
+ })
247
+ .await;
248
+
249
+ // Handle the result of the input timeout operation
250
+ let input_js: HashMap<String, DataType> = match result {
251
+ Ok(Ok(buffer)) => match serde_json::from_str(&buffer) {
252
+ Ok(js) => js,
253
+ Err(e) => {
254
+ let stdin_error = ErrorEntry {
255
+ case: String::new(),
256
+ error: format!("Input JSON parsing error: {}", e),
257
+ };
258
+ writeln!(io::stderr(), "{}", serde_json::to_string(&stdin_error).unwrap()).unwrap();
259
+ return Err(Box::new(std::io::Error::new(
260
+ std::io::ErrorKind::InvalidInput,
261
+ "Input JSON parsing Error!",
262
+ )) as Box<dyn std::error::Error>);
263
+ }
264
+ },
265
+ Ok(Err(_e)) => {
266
+ let stdin_error = ErrorEntry {
267
+ case: String::new(),
268
+ error: "Error reading from stdin.".to_string(),
269
+ };
270
+ let stdin_error_js = serde_json::to_string(&stdin_error).unwrap();
271
+ writeln!(io::stderr(), "{}", stdin_error_js).expect("Failed to output stderr!");
272
+ return Err(Box::new(std::io::Error::new(
273
+ std::io::ErrorKind::InvalidInput,
274
+ "Error reading from stdin!",
275
+ )) as Box<dyn std::error::Error>);
276
+ }
277
+ Err(_) => {
278
+ let stdin_error = ErrorEntry {
279
+ case: String::new(),
280
+ error: "Timeout while reading from stdin.".to_string(),
281
+ };
282
+ let stdin_error_js = serde_json::to_string(&stdin_error).unwrap();
283
+ writeln!(io::stderr(), "{}", stdin_error_js).expect("Failed to output stderr!");
284
+ return Err(Box::new(std::io::Error::new(
285
+ std::io::ErrorKind::InvalidInput,
286
+ "Timeout while reading from stdin.",
287
+ )) as Box<dyn std::error::Error>);
288
+ }
289
+ };
290
+
291
+ // Download data
292
+ download_data(input_js, HOST).await;
293
+
294
+ Ok(())
295
+ }
package/src/readHDF5.rs CHANGED
@@ -19,9 +19,9 @@
19
19
  use hdf5::types::{FixedAscii, VarLenAscii};
20
20
  use hdf5::{File, Result};
21
21
  use ndarray::Dim;
22
- use ndarray::{Array1, s};
22
+ use ndarray::{s, Array1};
23
23
  use rayon::prelude::*;
24
- use serde_json::{Map, Value, json};
24
+ use serde_json::{json, Map, Value};
25
25
  use std::io;
26
26
  use std::sync::Arc;
27
27
  use std::time::Instant;
package/src/test.rs DELETED
@@ -1,3 +0,0 @@
1
- fn main() {
2
- println!("Hello, world!");
3
- }