@sjcrh/proteinpaint-rust 2.84.0 → 2.99.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +2 -2
  2. package/src/DEanalysis.rs +76 -73
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.84.0",
2
+ "version": "2.99.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "description": "Rust-based utilities for proteinpaint",
5
5
  "main": "index.js",
@@ -38,5 +38,5 @@
38
38
  "devDependencies": {
39
39
  "tape": "^5.2.2"
40
40
  },
41
- "pp_release_tag": "v2.84.0"
41
+ "pp_release_tag": "v2.99.0"
42
42
  }
package/src/DEanalysis.rs CHANGED
@@ -11,7 +11,7 @@ use nalgebra::base::Matrix;
11
11
  use nalgebra::base::VecStorage;
12
12
  use nalgebra::DMatrix;
13
13
  use nalgebra::ViewStorage;
14
- use ndarray::Array1;
14
+ //use ndarray::Array1;
15
15
  use ndarray::Array2;
16
16
  use ndarray::Dim;
17
17
  use serde::{Deserialize, Serialize};
@@ -25,7 +25,7 @@ use std::io::Read;
25
25
  use std::str::FromStr;
26
26
  use std::sync::{Arc, Mutex}; // Multithreading library
27
27
  use std::thread;
28
- use std::time::Instant;
28
+ //use std::time::Instant;
29
29
  //use std::cmp::Ordering;
30
30
  //use std::env;
31
31
  use std::io;
@@ -73,43 +73,45 @@ fn input_data_from_HDF5(
73
73
  Vec<String>,
74
74
  ) {
75
75
  let file = HDF5File::open(&hdf5_filename).unwrap(); // open for reading
76
- let ds_dim = file.dataset("dims").unwrap(); // open the dataset
76
+
77
+ //let ds_dim = file.dataset("dims").unwrap(); // open the dataset
77
78
  let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
78
79
  let mut case_indexes: Vec<usize> = Vec::with_capacity(case_list.len());
79
80
  let mut control_indexes: Vec<usize> = Vec::with_capacity(control_list.len());
80
81
  // Check the data type and read the dataset accordingly
81
- let data_dim: Array1<_> = ds_dim.read::<usize, Dim<[usize; 1]>>().unwrap();
82
- let num_samples = data_dim[0]; // Number of total columns in the dataset
83
- let num_genes = data_dim[1]; // Number of total rows in the dataset
84
- println!("num_samples bulk:{}", num_samples);
85
- println!("num_genes bulk:{}", num_genes);
82
+ //let data_dim: Array1<_> = ds_dim.read::<usize, Dim<[usize; 1]>>().unwrap();
83
+ //let num_samples = data_dim[0]; // Number of total columns in the dataset
84
+ //let num_genes = data_dim[1]; // Number of total rows in the dataset
85
+
86
+ //println!("num_samples bulk:{}", num_samples);
87
+ //println!("num_genes bulk:{}", num_genes);
86
88
 
87
- let now_gene_names = Instant::now();
89
+ //let now_gene_names = Instant::now();
88
90
  let ds_gene_names = file.dataset("gene_names").unwrap();
89
- println!("ds_gene_names:{:?}", ds_gene_names);
91
+ //println!("ds_gene_names:{:?}", ds_gene_names);
90
92
  let gene_names = ds_gene_names
91
93
  .read::<VarLenAscii, Dim<[usize; 1]>>()
92
94
  .unwrap();
93
- println!("\tgene_names = {:?}", gene_names);
94
- println!("\tgene_names.shape() = {:?}", gene_names.shape());
95
- println!("\tgene_names.strides() = {:?}", gene_names.strides());
96
- println!("\tgene_names.ndim() = {:?}", gene_names.ndim());
97
- println!("Time for parsing gene names:{:?}", now_gene_names.elapsed());
95
+ //println!("\tgene_names = {:?}", gene_names);
96
+ //println!("\tgene_names.shape() = {:?}", gene_names.shape());
97
+ //println!("\tgene_names.strides() = {:?}", gene_names.strides());
98
+ //println!("\tgene_names.ndim() = {:?}", gene_names.ndim());
99
+ //println!("Time for parsing gene names:{:?}", now_gene_names.elapsed());
98
100
 
99
- let now_gene_symbols = Instant::now();
101
+ //let now_gene_symbols = Instant::now();
100
102
  let ds_gene_symbols = file.dataset("gene_symbols").unwrap();
101
- println!("ds_gene_symbols:{:?}", ds_gene_symbols);
103
+ //println!("ds_gene_symbols:{:?}", ds_gene_symbols);
102
104
  let gene_symbols = ds_gene_symbols
103
105
  .read::<VarLenAscii, Dim<[usize; 1]>>()
104
106
  .unwrap();
105
- println!("\tgene_symbols = {:?}", gene_symbols);
106
- println!("\tgene_symbols.shape() = {:?}", gene_symbols.shape());
107
- println!("\tgene_symbols.strides() = {:?}", gene_symbols.strides());
108
- println!("\tgene_symbols.ndim() = {:?}", gene_symbols.ndim());
109
- println!(
110
- "Time for parsing gene symbols:{:?}",
111
- now_gene_symbols.elapsed()
112
- );
107
+ //println!("\tgene_symbols = {:?}", gene_symbols);
108
+ //println!("\tgene_symbols.shape() = {:?}", gene_symbols.shape());
109
+ //println!("\tgene_symbols.strides() = {:?}", gene_symbols.strides());
110
+ //println!("\tgene_symbols.ndim() = {:?}", gene_symbols.ndim());
111
+ //println!(
112
+ // "Time for parsing gene symbols:{:?}",
113
+ // now_gene_symbols.elapsed()
114
+ //);
113
115
 
114
116
  let mut gene_names_string: Vec<String> = Vec::with_capacity(gene_names.len());
115
117
  let mut gene_symbols_string: Vec<String> = Vec::with_capacity(gene_symbols.len());
@@ -118,17 +120,17 @@ fn input_data_from_HDF5(
118
120
  gene_symbols_string.push(gene_symbols[i].to_string());
119
121
  }
120
122
 
121
- let now_samples = Instant::now();
123
+ //let now_samples = Instant::now();
122
124
  let ds_samples = file.dataset("samples").unwrap();
123
125
  let samples = ds_samples.read::<VarLenAscii, Dim<[usize; 1]>>().unwrap();
124
- println!("\tsamples = {:?}", samples);
125
- println!("\tsamples.shape() = {:?}", samples.shape());
126
- println!("\tsamples.strides() = {:?}", samples.strides());
127
- println!("\tsamples.ndim() = {:?}", samples.ndim());
128
- println!("Time for parsing samples:{:?}", now_samples.elapsed());
126
+ //println!("\tsamples = {:?}", samples);
127
+ //println!("\tsamples.shape() = {:?}", samples.shape());
128
+ //println!("\tsamples.strides() = {:?}", samples.strides());
129
+ //println!("\tsamples.ndim() = {:?}", samples.ndim());
130
+ //println!("Time for parsing samples:{:?}", now_samples.elapsed());
129
131
 
130
132
  //Find all columns values that are populated for the given gene
131
- let now_counts = Instant::now();
133
+ //let now_counts = Instant::now();
132
134
  let ds_counts = file.dataset("counts").unwrap(); // open the dataset
133
135
 
134
136
  let mut global_sample_index = 0;
@@ -189,7 +191,7 @@ fn input_data_from_HDF5(
189
191
  global_sample_index += 1;
190
192
  }
191
193
 
192
- println!("Time for parsing HDF5 data:{:?}", now_counts.elapsed());
194
+ //println!("Time for parsing HDF5 data:{:?}", now_counts.elapsed());
193
195
  //println!(
194
196
  // "case + control length:{}",
195
197
  // case_list.len() + control_list.len()
@@ -221,7 +223,7 @@ fn input_data_from_text(
221
223
  Vec<String>,
222
224
  Vec<String>,
223
225
  ) {
224
- let input_time = Instant::now();
226
+ //let input_time = Instant::now();
225
227
  let mut file = File::open(filename).unwrap();
226
228
  let mut num_lines: usize = 0;
227
229
  let mut input_vector: Vec<f64> = Vec::with_capacity(500 * 65000);
@@ -350,7 +352,7 @@ fn input_data_from_text(
350
352
  let genes_symbols_temp = Arc::new(Mutex::new(Vec::<String>::new()));
351
353
  let input_vector_temp = Arc::new(Mutex::new(Vec::<f64>::new()));
352
354
  let mut handles = vec![]; // Vector to store handle which is used to prevent one thread going ahead of another
353
- println!("Number of threads used:{}", max_threads);
355
+ //println!("Number of threads used:{}", max_threads);
354
356
  for thread_num in 0..max_threads {
355
357
  let case_indexes_original = Arc::clone(&case_indexes_original);
356
358
  let control_indexes_original = Arc::clone(&control_indexes_original);
@@ -485,7 +487,7 @@ fn input_data_from_text(
485
487
  //println!("num_columns:{}", num_columns);
486
488
  //println!("num_lines * num_columns:{}", num_lines * num_columns);
487
489
  //println!("input_vector:{:?}", input_vector.len());
488
- println!("Time for inputting data:{:?}", input_time.elapsed());
490
+ //println!("Time for inputting data:{:?}", input_time.elapsed());
489
491
  let dm = DMatrix::from_row_slice(num_lines, num_columns, &input_vector);
490
492
  //println!("dm:{:?}", dm);
491
493
  (dm, case_indexes, control_indexes, gene_names, gene_symbols)
@@ -513,14 +515,15 @@ struct PValueIndexes {
513
515
  // Used to get the sample names from HDF5 file at PP server startup
514
516
  fn get_DE_samples(hdf5_filename: &String) {
515
517
  let file = HDF5File::open(&hdf5_filename).unwrap(); // open for reading
516
- let now_samples = Instant::now();
518
+
519
+ //let now_samples = Instant::now();
517
520
  let ds_samples = file.dataset("samples").unwrap();
518
521
  let samples = ds_samples.read::<VarLenAscii, Dim<[usize; 1]>>().unwrap();
519
- println!("\tsamples = {:?}", samples);
520
- println!("\tsamples.shape() = {:?}", samples.shape());
521
- println!("\tsamples.strides() = {:?}", samples.strides());
522
- println!("\tsamples.ndim() = {:?}", samples.ndim());
523
- println!("Time for parsing samples:{:?}", now_samples.elapsed());
522
+ //println!("\tsamples = {:?}", samples);
523
+ //println!("\tsamples.shape() = {:?}", samples.shape());
524
+ //println!("\tsamples.strides() = {:?}", samples.strides());
525
+ //println!("\tsamples.ndim() = {:?}", samples.ndim());
526
+ //println!("Time for parsing samples:{:?}", now_samples.elapsed());
524
527
 
525
528
  let mut output_string = "".to_string();
526
529
  for i in 0..samples.len() {
@@ -544,7 +547,7 @@ fn get_DE_samples(hdf5_filename: &String) {
544
547
  output_string += &",";
545
548
  }
546
549
  }
547
- println!("output_string:{}", output_string);
550
+ println!("{}", output_string);
548
551
  }
549
552
 
550
553
  fn main() {
@@ -559,7 +562,7 @@ fn main() {
559
562
  let input_json = json::parse(&input);
560
563
  match input_json {
561
564
  Ok(json_string) => {
562
- let now = Instant::now();
565
+ //let now = Instant::now();
563
566
  let file_name = &json_string["input_file"]
564
567
  .to_owned()
565
568
  .as_str()
@@ -567,7 +570,7 @@ fn main() {
567
570
  .to_string()
568
571
  .split(",")
569
572
  .collect();
570
- println!("file_name:{}", file_name);
573
+ //println!("file_name:{}", file_name);
571
574
  let data_type_option = json_string["data_type"].as_str().to_owned();
572
575
  match data_type_option {
573
576
  Some(x) => {
@@ -643,7 +646,7 @@ fn main() {
643
646
  gene_symbols,
644
647
  ) = input_data_from_HDF5(file_name, &case_list, &control_list);
645
648
  }
646
- let filtering_time = Instant::now();
649
+ //let filtering_time = Instant::now();
647
650
  let (
648
651
  filtered_matrix,
649
652
  lib_sizes,
@@ -658,21 +661,21 @@ fn main() {
658
661
  gene_names,
659
662
  gene_symbols,
660
663
  );
661
- println!("filtering time:{:?}", filtering_time.elapsed());
664
+ //println!("filtering time:{:?}", filtering_time.elapsed());
662
665
  //println!("filtered_matrix_rows:{:?}", filtered_matrix.nrows());
663
666
  //println!("filtered_matrix_cols:{:?}", filtered_matrix.ncols());
664
- let cpm_normalization_time = Instant::now();
667
+ //let cpm_normalization_time = Instant::now();
665
668
  let mut normalized_matrix = cpm(&filtered_matrix);
666
- println!(
667
- "cpm normalization time:{:?}",
668
- cpm_normalization_time.elapsed()
669
- );
670
- let tmm_normalization_time = Instant::now();
669
+ //println!(
670
+ // "cpm normalization time:{:?}",
671
+ // cpm_normalization_time.elapsed()
672
+ //);
673
+ //let tmm_normalization_time = Instant::now();
671
674
  let norm_factors = tmm_normalization(filtered_matrix, &lib_sizes);
672
- println!(
673
- "tmm normalization time:{:?}",
674
- tmm_normalization_time.elapsed()
675
- );
675
+ //println!(
676
+ // "tmm normalization time:{:?}",
677
+ // tmm_normalization_time.elapsed()
678
+ //);
676
679
  //println!("norm_factors:{:?}", norm_factors);
677
680
 
678
681
  for col in 0..normalized_matrix.ncols() {
@@ -683,19 +686,19 @@ fn main() {
683
686
  }
684
687
  }
685
688
  //println!("normalized_matrix:{:?}", normalized_matrix);
686
- println!("Number of cases:{}", case_list.len());
687
- println!("Number of controls:{}", control_list.len());
688
- println!("Time for pre-processing:{:?}", now.elapsed());
689
+ //println!("Number of cases:{}", case_list.len());
690
+ //println!("Number of controls:{}", control_list.len());
691
+ //println!("Time for pre-processing:{:?}", now.elapsed());
689
692
  // Using Wilcoxon test for differential gene expression
690
693
 
691
- let now2 = Instant::now();
694
+ //let now2 = Instant::now();
692
695
  let mut p_values: Vec<PValueIndexes> =
693
696
  Vec::with_capacity(normalized_matrix.nrows());
694
697
  const THRESHOLD: usize = 50; // This determines whether the Wilcoxon exact test or the normal test will be used based on sample size.
695
698
 
696
699
  //println!("case_indexes:{:?}", case_indexes);
697
700
  //println!("control_indexes:{:?}", control_indexes);
698
- let num_normalized_rows = normalized_matrix.nrows();
701
+ //let num_normalized_rows = normalized_matrix.nrows();
699
702
  if normalized_matrix.nrows() * normalized_matrix.ncols()
700
703
  < PAR_CUTOFF
701
704
  {
@@ -857,13 +860,13 @@ fn main() {
857
860
  p_values.append(&mut *p_values_temp.lock().unwrap());
858
861
  }
859
862
  //println!("p_values:{:?}", p_values);
860
- println!(
861
- "Time for running {} wilcoxon tests:{:?}",
862
- num_normalized_rows,
863
- now2.elapsed()
864
- );
863
+ //println!(
864
+ // "Time for running {} wilcoxon tests:{:?}",
865
+ // num_normalized_rows,
866
+ // now2.elapsed()
867
+ //);
865
868
  let adjusted_p_values = adjust_p_values(p_values);
866
- println!("adjusted_p_values:{}", adjusted_p_values);
869
+ println!("{}", adjusted_p_values);
867
870
  //let fold_changes =
868
871
  // calculate_fold_change(normalized_matrix, case_indexes, control_indexes);
869
872
  }
@@ -873,10 +876,10 @@ fn main() {
873
876
  }
874
877
  }
875
878
  }
876
- Err(error) => println!("Incorrect json: {}", error),
879
+ Err(error) => panic!("Incorrect json: {}", error),
877
880
  }
878
881
  }
879
- Err(error) => println!("Piping error: {}", error),
882
+ Err(error) => panic!("Piping error: {}", error),
880
883
  }
881
884
  }
882
885
 
@@ -1321,7 +1324,7 @@ fn filter_by_expr(
1321
1324
  positives.push(row);
1322
1325
  }
1323
1326
  }
1324
- println!("positives length:{}", positives.len());
1327
+ //println!("positives length:{}", positives.len());
1325
1328
  //println!("row_sums:{:?}", row_sums);
1326
1329
  //println!("keep_cpm:{:?}", keep_cpm);
1327
1330
  //println!("positive_cpm:{}", positive_cpm);
@@ -1337,8 +1340,8 @@ fn filter_by_expr(
1337
1340
  let mut filtered_genes: Vec<String> = Vec::with_capacity(positives.len());
1338
1341
  let mut filtered_gene_symbols: Vec<String> = Vec::with_capacity(positives.len());
1339
1342
  let mut i = 0;
1340
- println!("filtered_matrix rows:{}", filtered_matrix.nrows());
1341
- println!("filtered_matrix cols:{}", filtered_matrix.ncols());
1343
+ //println!("filtered_matrix rows:{}", filtered_matrix.nrows());
1344
+ //println!("filtered_matrix cols:{}", filtered_matrix.ncols());
1342
1345
  for index in positives {
1343
1346
  let row = raw_data.row(index);
1344
1347
  filtered_genes.push(gene_names[index].to_owned());