@sjcrh/proteinpaint-rust 2.122.0 → 2.124.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/readHDF5.rs CHANGED
@@ -1,9 +1,9 @@
1
1
  //------------------------------------------------------------------------------
2
2
  // readHDF5.rs - HDF5 Gene Expression Data Reader
3
3
  //------------------------------------------------------------------------------
4
- //
4
+ //
5
5
  // Extracts gene expression values from HDF5 files in dense or sparse formats.
6
- // Supports single genes with memory optimization and multiple genes with
6
+ // Supports single genes with memory optimization and multiple genes with
7
7
  // parallel processing.
8
8
  //
9
9
  // Features:
@@ -12,8 +12,8 @@
12
12
  // - Parallel processing for multiple genes
13
13
  // - JSON output with timing metrics
14
14
  //
15
- // Usage:
16
- // HDF5_DIR=/usr/local/Homebrew/Cellar/hdf5/1.14.3_1 &&
15
+ // Usage:
16
+ // HDF5_DIR=/usr/local/Homebrew/Cellar/hdf5/1.14.3_1 &&
17
17
  // echo $json='{"gene":"TP53","hdf5_file":"matrix.h5"}' | target/release/readHDF5
18
18
  //------------------------------------------------------------------------------
19
19
  use hdf5::types::{FixedAscii, VarLenAscii};
@@ -259,11 +259,7 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
259
259
  // Create direct key-value pairs where sample names are the keys
260
260
  for i in 0..gene_expression.len() {
261
261
  // Add each sample name as a key pointing directly to its expression value
262
- output_string += &format!(
263
- "\"{}\":{}",
264
- samples[i].to_string(),
265
- gene_expression[i].to_string()
266
- );
262
+ output_string += &format!("\"{}\":{}", samples[i].to_string(), gene_expression[i].to_string());
267
263
 
268
264
  // Add comma if not the last item
269
265
  if i < gene_expression.len() - 1 {
@@ -296,10 +292,7 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
296
292
  Value::Null
297
293
  };
298
294
 
299
- samples_map.insert(
300
- sample.replace("\\", ""),
301
- value,
302
- );
295
+ samples_map.insert(sample.replace("\\", ""), value);
303
296
  }
304
297
  }
305
298
 
@@ -317,7 +310,7 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
317
310
 
318
311
  /// Reads expression data for a specific gene from a sparse format HDF5 file
319
312
  ///
320
- /// Extracts expression values from sparse matrix HDF5 files using Compressed
313
+ /// Extracts expression values from sparse matrix HDF5 files using Compressed
321
314
  /// Sparse Column (CSC) structure.
322
315
  ///
323
316
  /// # Arguments
@@ -391,15 +384,13 @@ fn query_gene_sparse(hdf5_filename: String, gene_name: String) -> Result<()> {
391
384
  // Find all columns indices that are populated for the given gene
392
385
  let now_i = Instant::now();
393
386
  let ds_i = file.dataset("data/i")?;
394
- let populated_column_ids: Array1<usize> =
395
- ds_i.read_slice_1d(array_start_point..array_stop_point)?;
387
+ let populated_column_ids: Array1<usize> = ds_i.read_slice_1d(array_start_point..array_stop_point)?;
396
388
  println!("Time for i dataset:{:?}", now_i.elapsed());
397
389
 
398
390
  // Find all columns values that are populated for the given gene
399
391
  let now_x = Instant::now();
400
392
  let ds_x = file.dataset("data/x")?;
401
- let populated_column_values: Array1<f64> =
402
- ds_x.read_slice_1d(array_start_point..array_stop_point)?;
393
+ let populated_column_values: Array1<f64> = ds_x.read_slice_1d(array_start_point..array_stop_point)?;
403
394
  println!("Time for x dataset:{:?}", now_x.elapsed());
404
395
 
405
396
  // Generate the complete array from the sparse array
@@ -425,10 +416,7 @@ fn query_gene_sparse(hdf5_filename: String, gene_name: String) -> Result<()> {
425
416
  }
426
417
  output_string += &"}".to_string();
427
418
 
428
- println!(
429
- "Time generating full array:{:?}",
430
- time_generating_full_array.elapsed()
431
- );
419
+ println!("Time generating full array:{:?}", time_generating_full_array.elapsed());
432
420
  println!("output_string:{}", output_string);
433
421
 
434
422
  Ok(())
@@ -465,7 +453,6 @@ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) ->
465
453
  }
466
454
  };
467
455
 
468
-
469
456
  let genes_dataset = match file.dataset("gene_ids") {
470
457
  Ok(ds) => ds,
471
458
  Err(err) => {
@@ -495,7 +482,7 @@ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) ->
495
482
  };
496
483
 
497
484
  let genes: Vec<String> = genes_varlen.iter().map(|g| g.to_string()).collect();
498
-
485
+
499
486
  // Only create HashMap for multiple gene queries
500
487
  let gene_to_index: Option<std::collections::HashMap<String, usize>> = if gene_names.len() > 1 {
501
488
  let hashmap_start_time = Instant::now();
@@ -504,8 +491,8 @@ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) ->
504
491
  map.insert(gene.clone(), idx);
505
492
  }
506
493
  timings.insert(
507
- "build_hashmap_ms".to_string(),
508
- Value::from(hashmap_start_time.elapsed().as_millis() as u64)
494
+ "build_hashmap_ms".to_string(),
495
+ Value::from(hashmap_start_time.elapsed().as_millis() as u64),
509
496
  );
510
497
  Some(map)
511
498
  } else {
@@ -586,14 +573,11 @@ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) ->
586
573
  };
587
574
 
588
575
  // Configurable thread count for testing
589
- let thread_count = 2;
576
+ let thread_count = 2;
590
577
  timings.insert("thread_count".to_string(), Value::from(thread_count));
591
578
 
592
579
  // Create a scoped thread pool with specified number of threads
593
- match rayon::ThreadPoolBuilder::new()
594
- .num_threads(thread_count)
595
- .build()
596
- {
580
+ match rayon::ThreadPoolBuilder::new().num_threads(thread_count).build() {
597
581
  Ok(pool) => {
598
582
  // Use the pool for this specific work
599
583
  pool.install(|| {
@@ -650,26 +634,20 @@ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) ->
650
634
  genes_map.insert(gene_name.clone(), gene_data);
651
635
  } else {
652
636
  // Fallback to per-gene reading if bulk load failed
653
- match counts_dataset
654
- .read_slice_1d::<f64, _>(s![gene_index, ..])
655
- {
637
+ match counts_dataset.read_slice_1d::<f64, _>(s![gene_index, ..]) {
656
638
  Ok(gene_expression) => {
657
639
  // Create samples map for this gene
658
640
  let mut samples_map = Map::new();
659
641
  for (i, sample) in samples.iter().enumerate() {
660
642
  if i < gene_expression.len() {
661
643
  // Handle potential NaN or infinity values
662
- let value =
663
- if gene_expression[i].is_finite() {
664
- Value::from(gene_expression[i])
665
- } else {
666
- Value::Null
667
- };
668
-
669
- samples_map.insert(
670
- sample.replace("\\", ""),
671
- value,
672
- );
644
+ let value = if gene_expression[i].is_finite() {
645
+ Value::from(gene_expression[i])
646
+ } else {
647
+ Value::Null
648
+ };
649
+
650
+ samples_map.insert(sample.replace("\\", ""), value);
673
651
  }
674
652
  }
675
653
 
@@ -693,10 +671,7 @@ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) ->
693
671
  );
694
672
 
695
673
  let mut genes_map = genes_map.lock().unwrap();
696
- genes_map.insert(
697
- gene_name.clone(),
698
- Value::Object(error_map),
699
- );
674
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
700
675
  }
701
676
  }
702
677
  }
@@ -736,7 +711,7 @@ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) ->
736
711
  &counts_dataset,
737
712
  &all_gene_data,
738
713
  &samples,
739
- &genes_map
714
+ &genes_map,
740
715
  );
741
716
  }
742
717
  }
@@ -758,7 +733,6 @@ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) ->
758
733
  // Read just this single gene's data directly
759
734
  match counts_dataset.read_slice_1d::<f64, _>(s![gene_index, ..]) {
760
735
  Ok(gene_expression) => {
761
-
762
736
  // Create samples map for this gene
763
737
  let mut samples_map = Map::new();
764
738
  for (i, sample) in samples.iter().enumerate() {
@@ -786,10 +760,7 @@ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) ->
786
760
  let mut error_map = Map::new();
787
761
  error_map.insert(
788
762
  "error".to_string(),
789
- Value::String(format!(
790
- "Failed to read expression values: {:?}",
791
- err
792
- )),
763
+ Value::String(format!("Failed to read expression values: {:?}", err)),
793
764
  );
794
765
 
795
766
  let mut genes_map = genes_map.lock().unwrap();
@@ -833,7 +804,7 @@ fn process_genes_sequentially(
833
804
  counts_dataset: &hdf5::Dataset,
834
805
  all_gene_data: &Option<ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 2]>>>,
835
806
  samples: &Vec<String>,
836
- genes_map: &Arc<std::sync::Mutex<Map<String, Value>>>
807
+ genes_map: &Arc<std::sync::Mutex<Map<String, Value>>>,
837
808
  ) {
838
809
  for gene_name in gene_names {
839
810
  // Find the index of the requested gene, using HashMap if available
@@ -911,10 +882,7 @@ fn process_genes_sequentially(
911
882
  let mut error_map = Map::new();
912
883
  error_map.insert(
913
884
  "error".to_string(),
914
- Value::String(format!(
915
- "Failed to read expression values: {:?}",
916
- err1
917
- )),
885
+ Value::String(format!("Failed to read expression values: {:?}", err1)),
918
886
  );
919
887
 
920
888
  let mut genes_map = genes_map.lock().unwrap();
@@ -935,7 +903,6 @@ fn process_genes_sequentially(
935
903
  genes_map.insert(gene_name.clone(), Value::Object(error_map));
936
904
  }
937
905
  }
938
-
939
906
  }
940
907
  }
941
908
  /// Queries expression data for multiple genes from a sparse format HDF5 file
@@ -1006,7 +973,6 @@ fn query_multiple_genes_sparse(hdf5_filename: String, gene_names: Vec<String>) -
1006
973
  let num_threads = num_cpus::get();
1007
974
  timings.insert("num_threads".to_string(), Value::from(num_threads as u64));
1008
975
 
1009
-
1010
976
  // Thread-safe maps for results
1011
977
  let genes_map = Arc::new(std::sync::Mutex::new(Map::new()));
1012
978
  let gene_timings = Arc::new(std::sync::Mutex::new(Map::new()));
@@ -1041,8 +1007,7 @@ fn query_multiple_genes_sparse(hdf5_filename: String, gene_names: Vec<String>) -
1041
1007
  // Read data for this gene
1042
1008
  match ds_i.read_slice_1d::<usize, _>(array_start_point..array_stop_point) {
1043
1009
  Ok(populated_column_ids) => {
1044
- match ds_x.read_slice_1d::<f64, _>(array_start_point..array_stop_point)
1045
- {
1010
+ match ds_x.read_slice_1d::<f64, _>(array_start_point..array_stop_point) {
1046
1011
  Ok(populated_column_values) => {
1047
1012
  // Generate the complete array from sparse representation
1048
1013
  let mut gene_array: Array1<f64> = Array1::zeros(num_samples);
@@ -1061,8 +1026,7 @@ fn query_multiple_genes_sparse(hdf5_filename: String, gene_names: Vec<String>) -
1061
1026
  Value::Null
1062
1027
  };
1063
1028
 
1064
- samples_map
1065
- .insert(sample.to_string().replace("\\", ""), value);
1029
+ samples_map.insert(sample.to_string().replace("\\", ""), value);
1066
1030
  }
1067
1031
 
1068
1032
  let gene_data = json!({
@@ -1077,10 +1041,7 @@ fn query_multiple_genes_sparse(hdf5_filename: String, gene_names: Vec<String>) -
1077
1041
  let mut error_map = Map::new();
1078
1042
  error_map.insert(
1079
1043
  "error".to_string(),
1080
- Value::String(format!(
1081
- "Failed to read x dataset: {:?}",
1082
- err
1083
- )),
1044
+ Value::String(format!("Failed to read x dataset: {:?}", err)),
1084
1045
  );
1085
1046
 
1086
1047
  let mut genes_map = genes_map.lock().unwrap();
package/src/test.rs ADDED
@@ -0,0 +1,3 @@
1
+ fn main() {
2
+ println!("Hello, world!");
3
+ }