@sjcrh/proteinpaint-rust 2.81.5 → 2.99.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -2
  2. package/src/DEanalysis.rs +80 -76
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.81.5",
2
+ "version": "2.99.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
@@ -38,5 +38,5 @@
38
38
  "devDependencies": {
39
39
  "tape": "^5.2.2"
40
40
  },
41
- "pp_release_tag": "v2.81.5"
41
+ "pp_release_tag": "v2.99.0"
42
42
  }
package/src/DEanalysis.rs CHANGED
@@ -11,7 +11,7 @@ use nalgebra::base::Matrix;
11
11
  use nalgebra::base::VecStorage;
12
12
  use nalgebra::DMatrix;
13
13
  use nalgebra::ViewStorage;
14
- use ndarray::Array1;
14
+ //use ndarray::Array1;
15
15
  use ndarray::Array2;
16
16
  use ndarray::Dim;
17
17
  use serde::{Deserialize, Serialize};
@@ -25,7 +25,7 @@ use std::io::Read;
25
25
  use std::str::FromStr;
26
26
  use std::sync::{Arc, Mutex}; // Multithreading library
27
27
  use std::thread;
28
- use std::time::Instant;
28
+ //use std::time::Instant;
29
29
  //use std::cmp::Ordering;
30
30
  //use std::env;
31
31
  use std::io;
@@ -73,43 +73,45 @@ fn input_data_from_HDF5(
73
73
  Vec<String>,
74
74
  ) {
75
75
  let file = HDF5File::open(&hdf5_filename).unwrap(); // open for reading
76
- let ds_dim = file.dataset("dims").unwrap(); // open the dataset
76
+
77
+ //let ds_dim = file.dataset("dims").unwrap(); // open the dataset
77
78
  let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
78
79
  let mut case_indexes: Vec<usize> = Vec::with_capacity(case_list.len());
79
80
  let mut control_indexes: Vec<usize> = Vec::with_capacity(control_list.len());
80
81
  // Check the data type and read the dataset accordingly
81
- let data_dim: Array1<_> = ds_dim.read::<usize, Dim<[usize; 1]>>().unwrap();
82
- let num_samples = data_dim[0]; // Number of total columns in the dataset
83
- let num_genes = data_dim[1]; // Number of total rows in the dataset
84
- println!("num_samples bulk:{}", num_samples);
85
- println!("num_genes bulk:{}", num_genes);
82
+ //let data_dim: Array1<_> = ds_dim.read::<usize, Dim<[usize; 1]>>().unwrap();
83
+ //let num_samples = data_dim[0]; // Number of total columns in the dataset
84
+ //let num_genes = data_dim[1]; // Number of total rows in the dataset
85
+
86
+ //println!("num_samples bulk:{}", num_samples);
87
+ //println!("num_genes bulk:{}", num_genes);
86
88
 
87
- let now_gene_names = Instant::now();
89
+ //let now_gene_names = Instant::now();
88
90
  let ds_gene_names = file.dataset("gene_names").unwrap();
89
- println!("ds_gene_names:{:?}", ds_gene_names);
91
+ //println!("ds_gene_names:{:?}", ds_gene_names);
90
92
  let gene_names = ds_gene_names
91
93
  .read::<VarLenAscii, Dim<[usize; 1]>>()
92
94
  .unwrap();
93
- println!("\tgene_names = {:?}", gene_names);
94
- println!("\tgene_names.shape() = {:?}", gene_names.shape());
95
- println!("\tgene_names.strides() = {:?}", gene_names.strides());
96
- println!("\tgene_names.ndim() = {:?}", gene_names.ndim());
97
- println!("Time for parsing gene names:{:?}", now_gene_names.elapsed());
95
+ //println!("\tgene_names = {:?}", gene_names);
96
+ //println!("\tgene_names.shape() = {:?}", gene_names.shape());
97
+ //println!("\tgene_names.strides() = {:?}", gene_names.strides());
98
+ //println!("\tgene_names.ndim() = {:?}", gene_names.ndim());
99
+ //println!("Time for parsing gene names:{:?}", now_gene_names.elapsed());
98
100
 
99
- let now_gene_symbols = Instant::now();
101
+ //let now_gene_symbols = Instant::now();
100
102
  let ds_gene_symbols = file.dataset("gene_symbols").unwrap();
101
- println!("ds_gene_symbols:{:?}", ds_gene_symbols);
103
+ //println!("ds_gene_symbols:{:?}", ds_gene_symbols);
102
104
  let gene_symbols = ds_gene_symbols
103
105
  .read::<VarLenAscii, Dim<[usize; 1]>>()
104
106
  .unwrap();
105
- println!("\tgene_symbols = {:?}", gene_symbols);
106
- println!("\tgene_symbols.shape() = {:?}", gene_symbols.shape());
107
- println!("\tgene_symbols.strides() = {:?}", gene_symbols.strides());
108
- println!("\tgene_symbols.ndim() = {:?}", gene_symbols.ndim());
109
- println!(
110
- "Time for parsing gene symbols:{:?}",
111
- now_gene_symbols.elapsed()
112
- );
107
+ //println!("\tgene_symbols = {:?}", gene_symbols);
108
+ //println!("\tgene_symbols.shape() = {:?}", gene_symbols.shape());
109
+ //println!("\tgene_symbols.strides() = {:?}", gene_symbols.strides());
110
+ //println!("\tgene_symbols.ndim() = {:?}", gene_symbols.ndim());
111
+ //println!(
112
+ // "Time for parsing gene symbols:{:?}",
113
+ // now_gene_symbols.elapsed()
114
+ //);
113
115
 
114
116
  let mut gene_names_string: Vec<String> = Vec::with_capacity(gene_names.len());
115
117
  let mut gene_symbols_string: Vec<String> = Vec::with_capacity(gene_symbols.len());
@@ -118,17 +120,17 @@ fn input_data_from_HDF5(
118
120
  gene_symbols_string.push(gene_symbols[i].to_string());
119
121
  }
120
122
 
121
- let now_samples = Instant::now();
123
+ //let now_samples = Instant::now();
122
124
  let ds_samples = file.dataset("samples").unwrap();
123
125
  let samples = ds_samples.read::<VarLenAscii, Dim<[usize; 1]>>().unwrap();
124
- println!("\tsamples = {:?}", samples);
125
- println!("\tsamples.shape() = {:?}", samples.shape());
126
- println!("\tsamples.strides() = {:?}", samples.strides());
127
- println!("\tsamples.ndim() = {:?}", samples.ndim());
128
- println!("Time for parsing samples:{:?}", now_samples.elapsed());
126
+ //println!("\tsamples = {:?}", samples);
127
+ //println!("\tsamples.shape() = {:?}", samples.shape());
128
+ //println!("\tsamples.strides() = {:?}", samples.strides());
129
+ //println!("\tsamples.ndim() = {:?}", samples.ndim());
130
+ //println!("Time for parsing samples:{:?}", now_samples.elapsed());
129
131
 
130
132
  //Find all columns values that are populated for the given gene
131
- let now_counts = Instant::now();
133
+ //let now_counts = Instant::now();
132
134
  let ds_counts = file.dataset("counts").unwrap(); // open the dataset
133
135
 
134
136
  let mut global_sample_index = 0;
@@ -189,7 +191,7 @@ fn input_data_from_HDF5(
189
191
  global_sample_index += 1;
190
192
  }
191
193
 
192
- println!("Time for parsing HDF5 data:{:?}", now_counts.elapsed());
194
+ //println!("Time for parsing HDF5 data:{:?}", now_counts.elapsed());
193
195
  //println!(
194
196
  // "case + control length:{}",
195
197
  // case_list.len() + control_list.len()
@@ -221,7 +223,7 @@ fn input_data_from_text(
221
223
  Vec<String>,
222
224
  Vec<String>,
223
225
  ) {
224
- let input_time = Instant::now();
226
+ //let input_time = Instant::now();
225
227
  let mut file = File::open(filename).unwrap();
226
228
  let mut num_lines: usize = 0;
227
229
  let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
@@ -235,7 +237,8 @@ fn input_data_from_text(
235
237
  // Check headers for samples
236
238
  let lines: Vec<&str> = buffer.split('\n').collect::<Vec<&str>>();
237
239
  let total_lines = lines.len();
238
- let headers: Vec<&str> = lines[0].split('\t').collect::<Vec<&str>>();
240
+ let header_binding = lines[0].replace("\r", "");
241
+ let headers: Vec<&str> = header_binding.split('\t').collect::<Vec<&str>>();
239
242
  //println!("headers:{:?}", headers);
240
243
  let mut case_indexes_original: Vec<usize> = Vec::with_capacity(case_list.len());
241
244
  let mut control_indexes_original: Vec<usize> = Vec::with_capacity(control_list.len());
@@ -282,7 +285,7 @@ fn input_data_from_text(
282
285
  let lines_slice = &lines[..];
283
286
  for line_iter in 1..lines_slice.len() - 1 {
284
287
  // Subtracting 1 from total length of lines_slice because the last one will be empty
285
- let line = lines_slice[line_iter];
288
+ let line = lines_slice[line_iter].replace("\r", "");
286
289
  let mut index = 0;
287
290
  for field in line.split('\t').collect::<Vec<&str>>() {
288
291
  if index == gene_name_index.unwrap() {
@@ -349,7 +352,7 @@ fn input_data_from_text(
349
352
  let genes_symbols_temp = Arc::new(Mutex::new(Vec::<String>::new()));
350
353
  let input_vector_temp = Arc::new(Mutex::new(Vec::<f64>::new()));
351
354
  let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
352
- println!("Number of threads used:{}", max_threads);
355
+ //println!("Number of threads used:{}", max_threads);
353
356
  for thread_num in 0..max_threads {
354
357
  let case_indexes_original = Arc::clone(&case_indexes_original);
355
358
  let control_indexes_original = Arc::clone(&control_indexes_original);
@@ -377,7 +380,7 @@ fn input_data_from_text(
377
380
  if remainder == thread_num {
378
381
  //println!("buffer:{}", buffer);
379
382
  // Thread analyzing a particular line must have the same remainder as the thread_num, this avoids multiple threads from parsing the same line
380
- let line = lines[line_iter];
383
+ let line = lines[line_iter].replace("\r", "");
381
384
  let mut index = 0;
382
385
  for field in line.split('\t').collect::<Vec<&str>>() {
383
386
  if index == gene_name_index.unwrap() {
@@ -484,7 +487,7 @@ fn input_data_from_text(
484
487
  //println!("num_columns:{}", num_columns);
485
488
  //println!("num_lines * num_columns:{}", num_lines * num_columns);
486
489
  //println!("input_vector:{:?}", input_vector.len());
487
- println!("Time for inputting data:{:?}", input_time.elapsed());
490
+ //println!("Time for inputting data:{:?}", input_time.elapsed());
488
491
  let dm = DMatrix::from_row_slice(num_lines, num_columns, &input_vector);
489
492
  //println!("dm:{:?}", dm);
490
493
  (dm, case_indexes, control_indexes, gene_names, gene_symbols)
@@ -512,14 +515,15 @@ struct PValueIndexes {
512
515
  // Used to get the sample names from HDF5 file at PP server startup
513
516
  fn get_DE_samples(hdf5_filename: &String) {
514
517
  let file = HDF5File::open(&hdf5_filename).unwrap(); // open for reading
515
- let now_samples = Instant::now();
518
+
519
+ //let now_samples = Instant::now();
516
520
  let ds_samples = file.dataset("samples").unwrap();
517
521
  let samples = ds_samples.read::<VarLenAscii, Dim<[usize; 1]>>().unwrap();
518
- println!("\tsamples = {:?}", samples);
519
- println!("\tsamples.shape() = {:?}", samples.shape());
520
- println!("\tsamples.strides() = {:?}", samples.strides());
521
- println!("\tsamples.ndim() = {:?}", samples.ndim());
522
- println!("Time for parsing samples:{:?}", now_samples.elapsed());
522
+ //println!("\tsamples = {:?}", samples);
523
+ //println!("\tsamples.shape() = {:?}", samples.shape());
524
+ //println!("\tsamples.strides() = {:?}", samples.strides());
525
+ //println!("\tsamples.ndim() = {:?}", samples.ndim());
526
+ //println!("Time for parsing samples:{:?}", now_samples.elapsed());
523
527
 
524
528
  let mut output_string = "".to_string();
525
529
  for i in 0..samples.len() {
@@ -543,7 +547,7 @@ fn get_DE_samples(hdf5_filename: &String) {
543
547
  output_string += &",";
544
548
  }
545
549
  }
546
- println!("output_string:{}", output_string);
550
+ println!("{}", output_string);
547
551
  }
548
552
 
549
553
  fn main() {
@@ -558,7 +562,7 @@ fn main() {
558
562
  let input_json = json::parse(&input);
559
563
  match input_json {
560
564
  Ok(json_string) => {
561
- let now = Instant::now();
565
+ //let now = Instant::now();
562
566
  let file_name = &json_string["input_file"]
563
567
  .to_owned()
564
568
  .as_str()
@@ -566,7 +570,7 @@ fn main() {
566
570
  .to_string()
567
571
  .split(",")
568
572
  .collect();
569
- println!("file_name:{}", file_name);
573
+ //println!("file_name:{}", file_name);
570
574
  let data_type_option = json_string["data_type"].as_str().to_owned();
571
575
  match data_type_option {
572
576
  Some(x) => {
@@ -642,7 +646,7 @@ fn main() {
642
646
  gene_symbols,
643
647
  ) = input_data_from_HDF5(file_name, &case_list, &control_list);
644
648
  }
645
- let filtering_time = Instant::now();
649
+ //let filtering_time = Instant::now();
646
650
  let (
647
651
  filtered_matrix,
648
652
  lib_sizes,
@@ -657,21 +661,21 @@ fn main() {
657
661
  gene_names,
658
662
  gene_symbols,
659
663
  );
660
- println!("filtering time:{:?}", filtering_time.elapsed());
664
+ //println!("filtering time:{:?}", filtering_time.elapsed());
661
665
  //println!("filtered_matrix_rows:{:?}", filtered_matrix.nrows());
662
666
  //println!("filtered_matrix_cols:{:?}", filtered_matrix.ncols());
663
- let cpm_normalization_time = Instant::now();
667
+ //let cpm_normalization_time = Instant::now();
664
668
  let mut normalized_matrix = cpm(&filtered_matrix);
665
- println!(
666
- "cpm normalization time:{:?}",
667
- cpm_normalization_time.elapsed()
668
- );
669
- let tmm_normalization_time = Instant::now();
669
+ //println!(
670
+ // "cpm normalization time:{:?}",
671
+ // cpm_normalization_time.elapsed()
672
+ //);
673
+ //let tmm_normalization_time = Instant::now();
670
674
  let norm_factors = tmm_normalization(filtered_matrix, &lib_sizes);
671
- println!(
672
- "tmm normalization time:{:?}",
673
- tmm_normalization_time.elapsed()
674
- );
675
+ //println!(
676
+ // "tmm normalization time:{:?}",
677
+ // tmm_normalization_time.elapsed()
678
+ //);
675
679
  //println!("norm_factors:{:?}", norm_factors);
676
680
 
677
681
  for col in 0..normalized_matrix.ncols() {
@@ -682,19 +686,19 @@ fn main() {
682
686
  }
683
687
  }
684
688
  //println!("normalized_matrix:{:?}", normalized_matrix);
685
- println!("Number of cases:{}", case_list.len());
686
- println!("Number of controls:{}", control_list.len());
687
- println!("Time for pre-processing:{:?}", now.elapsed());
689
+ //println!("Number of cases:{}", case_list.len());
690
+ //println!("Number of controls:{}", control_list.len());
691
+ //println!("Time for pre-processing:{:?}", now.elapsed());
688
692
  // Using Wilcoxon test for differential gene expression
689
693
 
690
- let now2 = Instant::now();
694
+ //let now2 = Instant::now();
691
695
  let mut p_values: Vec<PValueIndexes> =
692
696
  Vec::with_capacity(normalized_matrix.nrows());
693
697
  const THRESHOLD: usize = 50; // This determines whether the Wilcoxon exact test or the normal test will be used based on sample size.
694
698
 
695
699
  //println!("case_indexes:{:?}", case_indexes);
696
700
  //println!("control_indexes:{:?}", control_indexes);
697
- let num_normalized_rows = normalized_matrix.nrows();
701
+ //let num_normalized_rows = normalized_matrix.nrows();
698
702
  if normalized_matrix.nrows() * normalized_matrix.ncols()
699
703
  < PAR_CUTOFF
700
704
  {
@@ -856,13 +860,13 @@ fn main() {
856
860
  p_values.append(&mut *p_values_temp.lock().unwrap());
857
861
  }
858
862
  //println!("p_values:{:?}", p_values);
859
- println!(
860
- "Time for running {} wilcoxon tests:{:?}",
861
- num_normalized_rows,
862
- now2.elapsed()
863
- );
863
+ //println!(
864
+ // "Time for running {} wilcoxon tests:{:?}",
865
+ // num_normalized_rows,
866
+ // now2.elapsed()
867
+ //);
864
868
  let adjusted_p_values = adjust_p_values(p_values);
865
- println!("adjusted_p_values:{}", adjusted_p_values);
869
+ println!("{}", adjusted_p_values);
866
870
  //let fold_changes =
867
871
  // calculate_fold_change(normalized_matrix, case_indexes, control_indexes);
868
872
  }
@@ -872,10 +876,10 @@ fn main() {
872
876
  }
873
877
  }
874
878
  }
875
- Err(error) => println!("Incorrect json: {}", error),
879
+ Err(error) => panic!("Incorrect json: {}", error),
876
880
  }
877
881
  }
878
- Err(error) => println!("Piping error: {}", error),
882
+ Err(error) => panic!("Piping error: {}", error),
879
883
  }
880
884
  }
881
885
 
@@ -1320,7 +1324,7 @@ fn filter_by_expr(
1320
1324
  positives.push(row);
1321
1325
  }
1322
1326
  }
1323
- println!("positives length:{}", positives.len());
1327
+ //println!("positives length:{}", positives.len());
1324
1328
  //println!("row_sums:{:?}", row_sums);
1325
1329
  //println!("keep_cpm:{:?}", keep_cpm);
1326
1330
  //println!("positive_cpm:{}", positive_cpm);
@@ -1336,8 +1340,8 @@ fn filter_by_expr(
1336
1340
  let mut filtered_genes: Vec<String> = Vec::with_capacity(positives.len());
1337
1341
  let mut filtered_gene_symbols: Vec<String> = Vec::with_capacity(positives.len());
1338
1342
  let mut i = 0;
1339
- println!("filtered_matrix rows:{}", filtered_matrix.nrows());
1340
- println!("filtered_matrix cols:{}", filtered_matrix.ncols());
1343
+ //println!("filtered_matrix rows:{}", filtered_matrix.nrows());
1344
+ //println!("filtered_matrix cols:{}", filtered_matrix.ncols());
1341
1345
  for index in positives {
1342
1346
  let row = raw_data.row(index);
1343
1347
  filtered_genes.push(gene_names[index].to_owned());