@sjcrh/proteinpaint-rust 2.116.0 → 2.119.0-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/readHDF5.rs CHANGED
@@ -1,76 +1,43 @@
1
- // Need to set HDF5_DIR and LD_LIBRARY_PATH in ~/.bash_profile
2
- // Syntax: HDF5_DIR=/usr/local/Homebrew/Cellar/hdf5/1.14.3_1 && echo $HDF5_DIR && cd .. && cargo build --release && json='{"gene":"TP53","hdf5_file":"matrix_with_na_comp_9.h5"}' && time echo $json | target/release/rust_hdf5
3
-
4
- // Imports
1
+ //------------------------------------------------------------------------------
2
+ // readHDF5.rs - HDF5 Gene Expression Data Reader
3
+ //------------------------------------------------------------------------------
4
+ //
5
+ // Extracts gene expression values from HDF5 files in dense or sparse formats.
6
+ // Supports single genes with memory optimization and multiple genes with
7
+ // parallel processing.
8
+ //
9
+ // Features:
10
+ // - Auto format detection (dense/sparse)
11
+ // - Optimized single and multi-gene queries
12
+ // - Parallel processing for multiple genes
13
+ // - JSON output with timing metrics
14
+ //
15
+ // Usage:
16
+ // HDF5_DIR=/usr/local/Homebrew/Cellar/hdf5/1.14.3_1 &&
17
+ // echo $json='{"gene":"TP53","hdf5_file":"matrix.h5"}' | target/release/readHDF5
18
+ //------------------------------------------------------------------------------
5
19
  use hdf5::types::{FixedAscii, VarLenAscii};
6
20
  use hdf5::{File, Result};
7
21
  use ndarray::Dim;
8
22
  use ndarray::{Array1, s};
23
+ use rayon::prelude::*;
9
24
  use serde_json::{Map, Value, json};
10
25
  use std::io;
26
+ use std::sync::Arc;
11
27
  use std::time::Instant;
12
28
 
13
29
  /// Determines the format of an HDF5 gene expression file
14
30
  ///
15
- /// This function examines the structure of an HDF5 file to determine its format.
16
- /// It detects whether the file uses a dense matrix representation, a sparse matrix
17
- /// representation, or an unknown format by checking for the presence of specific
18
- /// datasets and groups.
19
- ///
20
- /// # HDF5 Format Specifications
21
- ///
22
- /// The function identifies the following formats:
23
- ///
24
- /// - **Dense format**:
25
- /// - Contains a "counts" dataset (2D matrix of gene expression values)
26
- /// - Contains a "gene_names" dataset (gene identifiers)
27
- /// - Contains a "samples" dataset (sample identifiers)
28
- ///
29
- /// - **Sparse format**:
30
- /// - Contains a "data" group with sparse matrix components
31
- /// - Contains a "sample_names" dataset
32
- ///
33
- /// - **Unknown format**:
34
- /// - Does not match either the dense or sparse format criteria
31
+ /// Examines the structure of an HDF5 file to detect its format:
32
+ /// - "dense": Contains "counts", "gene_names", and "samples" datasets
33
+ /// - "sparse": Contains "data" group and "sample_names" dataset
34
+ /// - "unknown": Does not match either format
35
35
  ///
36
36
  /// # Arguments
37
- ///
38
37
  /// * `hdf5_filename` - Path to the HDF5 file to analyze
39
38
  ///
40
39
  /// # Returns
41
- ///
42
- /// A result containing one of the following static string values:
43
- /// - `"dense"` - If the file is in dense matrix format
44
- /// - `"sparse"` - If the file is in sparse matrix format
45
- /// - `"unknown"` - If the file format cannot be determined
46
- ///
47
- /// # Errors
48
- ///
49
- /// This function will return an error if:
50
- /// - The file cannot be opened
51
- /// - The file is not a valid HDF5 file
52
- ///
53
- /// # Algorithm
54
- ///
55
- /// The detection algorithm works by checking for the presence of specific datasets
56
- /// and groups that are characteristic of each format:
57
- ///
58
- /// 1. Opens the HDF5 file
59
- /// 2. Checks for datasets/groups that indicate dense format
60
- /// 3. Checks for datasets/groups that indicate sparse format
61
- /// 4. Returns the detected format or "unknown"
62
- ///
63
- /// # Examples
64
- ///
65
- /// ```rust
66
- /// // Example usage (not runnable)
67
- /// match detect_hdf5_format("expression_data.h5") {
68
- /// Ok("dense") => println!("Dense format detected"),
69
- /// Ok("sparse") => println!("Sparse format detected"),
70
- /// Ok("unknown") => println!("Unknown format detected"),
71
- /// Err(e) => println!("Error: {}", e),
72
- /// }
73
- /// ```
40
+ /// The detected format as a static string: "dense", "sparse", or "unknown"
74
41
  fn detect_hdf5_format(hdf5_filename: &str) -> Result<&'static str> {
75
42
  let file = File::open(hdf5_filename)?;
76
43
 
@@ -95,63 +62,16 @@ fn detect_hdf5_format(hdf5_filename: &str) -> Result<&'static str> {
95
62
  }
96
63
  }
97
64
 
98
- /// Unified function for querying gene expression data from any supported HDF5 file format
65
+ /// Unified function for querying gene expression data from an HDF5 file
99
66
  ///
100
- /// This function serves as the central entry point for extracting expression values for a specified gene
101
- /// from an HDF5 file. It automatically detects the format of the provided file (dense or sparse)
102
- /// and routes the query to the appropriate specialized handler function.
103
- ///
104
- /// # Supported HDF5 Formats
105
- ///
106
- /// - **Dense format**: Contains explicit "gene_ids", "samples", and "counts" datasets where
107
- /// the expression matrix is stored as a direct 2D array
108
- /// - **Sparse format**: Contains a "data" group with "p", "i", "x" datasets using the
109
- /// Compressed Sparse Column (CSC) representation for the expression matrix
67
+ /// Automatically detects file format (dense or sparse) and routes to the appropriate handler.
110
68
  ///
111
69
  /// # Arguments
112
- ///
113
70
  /// * `hdf5_filename` - Path to the HDF5 file containing gene expression data
114
71
  /// * `gene_name` - Name of the gene whose expression values to extract
115
72
  ///
116
73
  /// # Returns
117
- ///
118
- /// A result indicating success or error. On success, the function prints the gene
119
- /// expression data in JSON format to stdout for dense matrix HDF5 files. For spare matrix files it
120
- /// sends the expression data in JSON format with "output_string:" prefix to stdout.
121
- ///
122
- /// # Example Output Format
123
- ///
124
- /// ```json
125
- /// {
126
- /// "gene": "TP53",
127
- /// "dataId": "TP53",
128
- /// "samples": {
129
- /// "sample1": 10.5,
130
- /// "sample2": 8.2,
131
- /// "sample3": 15.7
132
- /// }
133
- /// }
134
- /// ```
135
- ///
136
- /// # Error Handling
137
- ///
138
- /// The function handles several types of errors:
139
- /// - File format detection failures
140
- /// - Unsupported or unknown file formats
141
- /// - Errors from the format-specific query functions
142
- ///
143
- /// When an error occurs, the function returns a structured JSON error message.
144
- ///
145
- /// # Processing Flow
146
- ///
147
- /// 1. Detects the format of the HDF5 file using `detect_hdf5_format`
148
- /// 2. Routes to the appropriate specialized function:
149
- /// - `query_gene_dense` for dense matrix files
150
- /// - `query_gene_sparse` for sparse matrix files
151
- /// 3. Returns an error for unsupported formats
152
- ///
153
- /// This unified approach allows client code to work with either format without needing
154
- /// to know the specific structure of the underlying HDF5 file.
74
+ /// Outputs gene expression data in JSON format to stdout
155
75
  fn query_gene(hdf5_filename: String, gene_name: String) -> Result<()> {
156
76
  // First, detect the file format
157
77
  let file_format = detect_hdf5_format(&hdf5_filename)?;
@@ -161,7 +81,6 @@ fn query_gene(hdf5_filename: String, gene_name: String) -> Result<()> {
161
81
  "dense" => query_gene_dense(hdf5_filename, gene_name),
162
82
  "sparse" => query_gene_sparse(hdf5_filename, gene_name),
163
83
  _ => {
164
- // For unknown format, return an error
165
84
  println!(
166
85
  "{}",
167
86
  serde_json::json!({
@@ -179,62 +98,18 @@ fn query_gene(hdf5_filename: String, gene_name: String) -> Result<()> {
179
98
 
180
99
  /// Reads expression data for a specific gene from a dense format HDF5 file
181
100
  ///
182
- /// This function extracts expression values for a specified gene from an HDF5 file
183
- /// that follows the dense matrix format. The dense format is characterized by:
184
- /// - A "gene_ids" dataset containing gene identifiers
185
- /// - A "samples" dataset containing sample identifiers
186
- /// - A "counts" dataset containing a gene × sample expression matrix
187
- ///
188
- /// The function returns the expression values in a JSON format where sample names
189
- /// are keys and their corresponding expression values are the values.
101
+ /// Dense format contains "gene_ids", "samples", and "counts" datasets.
190
102
  ///
191
103
  /// # Arguments
192
- ///
193
104
  /// * `hdf5_filename` - Path to the HDF5 file
194
105
  /// * `gene_name` - Name of the gene to query
195
106
  ///
196
107
  /// # Returns
197
- ///
198
- /// A result indicating success or error. On success, the function prints the gene
199
- /// expression data in JSON format to stdout.
200
- ///
201
- /// # Output Format
202
- ///
203
- /// ```json
204
- /// {
205
- /// "gene": "GENE_NAME",
206
- /// "dataId": "GENE_NAME",
207
- /// "samples": {
208
- /// "SAMPLE1": VALUE1,
209
- /// "SAMPLE2": VALUE2,
210
- /// ...
211
- /// }
212
- /// }
213
- /// ```
108
+ /// Prints gene expression data in JSON format to stdout
214
109
  ///
215
110
  /// # Error Handling
216
- ///
217
- /// The function handles several potential errors:
218
- /// - File opening errors
219
- /// - Missing or inaccessible datasets ("gene_ids", "samples", "counts")
220
- /// - Gene not found in the dataset
221
- /// - Out of bounds gene index
222
- /// - Expression data reading failures
223
- ///
224
- /// If an error occurs, the function returns an explanatory error message in JSON format.
225
- ///
226
- /// # Reading Strategy
227
- ///
228
- /// The function tries two methods to read expression data:
229
- /// 1. First attempts to read a 1D slice directly from the counts dataset
230
- /// 2. If that fails, tries reading the entire dataset and extracting the row of interest
231
- ///
232
- /// This dual approach ensures compatibility with different HDF5 library implementations
233
- /// and dataset configurations.
111
+ /// Handles file access issues, missing datasets, and gene not found scenarios
234
112
  fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
235
- // let start_time = Instant::now();
236
-
237
- // Open the HDF5 file
238
113
  let file = match File::open(hdf5_filename) {
239
114
  Ok(f) => f,
240
115
  Err(err) => {
@@ -249,7 +124,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
249
124
  }
250
125
  };
251
126
 
252
- // Read gene ids using VarLenAscii
253
127
  let genes_dataset = match file.dataset("gene_ids") {
254
128
  Ok(ds) => ds,
255
129
  Err(err) => {
@@ -264,7 +138,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
264
138
  }
265
139
  };
266
140
 
267
- // Read genes as VarLenAscii
268
141
  let genes_varlen = match genes_dataset.read_1d::<VarLenAscii>() {
269
142
  Ok(g) => g,
270
143
  Err(err) => {
@@ -282,7 +155,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
282
155
  // Convert to Vec<String> for easier handling
283
156
  let genes: Vec<String> = genes_varlen.iter().map(|g| g.to_string()).collect();
284
157
 
285
- // Read sample names using VarLenAscii
286
158
  let samples_dataset = match file.dataset("samples") {
287
159
  Ok(ds) => ds,
288
160
  Err(err) => {
@@ -297,7 +169,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
297
169
  }
298
170
  };
299
171
 
300
- // Read samples as VarLenAscii
301
172
  let samples_varlen = match samples_dataset.read_1d::<VarLenAscii>() {
302
173
  Ok(s) => s,
303
174
  Err(err) => {
@@ -330,7 +201,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
330
201
  }
331
202
  };
332
203
 
333
- // Read the expression data for the gene
334
204
  let counts_dataset = match file.dataset("counts") {
335
205
  Ok(ds) => ds,
336
206
  Err(err) => {
@@ -345,7 +215,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
345
215
  }
346
216
  };
347
217
 
348
- // Make sure the gene index is valid for this dataset
349
218
  if gene_index >= counts_dataset.shape()[0] {
350
219
  println!(
351
220
  "{}",
@@ -357,7 +226,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
357
226
  return Ok(());
358
227
  }
359
228
 
360
- // Try to read the expression data
361
229
  let gene_expression: Array1<f64>;
362
230
 
363
231
  // Method 1: Try to read a 1D slice directly (for 2D datasets)
@@ -367,8 +235,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
367
235
  }
368
236
  Err(err1) => {
369
237
  // Method 2: Try a different approach
370
-
371
- // First get the dimensions
372
238
  let dataset_shape = counts_dataset.shape();
373
239
  if dataset_shape.len() != 2 {
374
240
  println!(
@@ -388,7 +254,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
388
254
  let row = all_data.slice(s![gene_index, ..]).to_owned();
389
255
  gene_expression = row;
390
256
 
391
- // Start building a flatter JSON structure
392
257
  let mut output_string = String::from("{\"samples\":{");
393
258
 
394
259
  // Create direct key-value pairs where sample names are the keys
@@ -408,8 +273,6 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
408
273
 
409
274
  // Close the JSON object
410
275
  output_string += "}}";
411
-
412
- // println!("{}", output_string);
413
276
  }
414
277
  Err(err2) => {
415
278
  println!(
@@ -424,26 +287,22 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
424
287
  }
425
288
  }
426
289
  }
427
- // Create samples map
428
290
  let mut samples_map = Map::new();
429
291
  for (i, sample) in samples.iter().enumerate() {
430
292
  if i < gene_expression.len() {
431
- // Add each sample to the map, clean the sample name and convert value to JSON Number
432
- // Note: We need to handle potential NaN or infinity values that aren't valid in JSON
433
293
  let value = if gene_expression[i].is_finite() {
434
294
  Value::from(gene_expression[i])
435
295
  } else {
436
- Value::Null // Or choose a different representation for non-finite values
296
+ Value::Null
437
297
  };
438
298
 
439
299
  samples_map.insert(
440
- sample.replace("\\", ""), // Clean the sample name
300
+ sample.replace("\\", ""),
441
301
  value,
442
302
  );
443
303
  }
444
304
  }
445
305
 
446
- // Build the complete JSON structure
447
306
  let output_json = json!({
448
307
  "gene": gene_name,
449
308
  "dataId": gene_name,
@@ -456,74 +315,24 @@ fn query_gene_dense(hdf5_filename: String, gene_name: String) -> Result<()> {
456
315
  Ok(())
457
316
  }
458
317
 
459
- /// Reads expression data for a specific gene from a sparse format HDF5 file (from original readHD5.rs)
460
- ///
461
- /// This function extracts expression values for a specified gene from an HDF5 file
462
- /// that uses a sparse matrix representation. Sparse matrices are efficient for storing
463
- /// genomic data where many genes have zero expression in many samples. The sparse
464
- /// format follows the Compressed Sparse Column (CSC) structure with:
318
+ /// Reads expression data for a specific gene from a sparse format HDF5 file
465
319
  ///
466
- /// - A "data/dim" dataset containing matrix dimensions
467
- /// - A "gene_names" dataset containing gene identifiers
468
- /// - A "sample_names" dataset containing sample identifiers
469
- /// - A "data/p" dataset containing pointers to where each gene's data starts and ends
470
- /// - A "data/i" dataset containing column indices for non-zero values
471
- /// - A "data/x" dataset containing the actual non-zero expression values
320
+ /// Extracts expression values from sparse matrix HDF5 files using Compressed
321
+ /// Sparse Column (CSC) structure.
472
322
  ///
473
323
  /// # Arguments
474
- ///
475
324
  /// * `hdf5_filename` - Path to the HDF5 file
476
325
  /// * `gene_name` - Name of the gene to query
477
326
  ///
478
327
  /// # Returns
328
+ /// Prints gene expression data as JSON to stdout with "output_string:" prefix.
329
+ /// Sample names are keys, expression values are values.
479
330
  ///
480
- /// A result indicating success or error. On success, the function prints the gene
481
- /// expression data in JSON format to stdout with "output_string:" prefix.
482
- ///
483
- /// # Output Format
484
- ///
485
- /// The function outputs a JSON object where sample names are keys and their
486
- /// corresponding expression values are the values:
487
- ///
488
- /// ```json
489
- /// {
490
- /// "sample1": 0.0,
491
- /// "sample2": 4.5,
492
- /// "sample3": 0.0,
493
- /// "sample4": 7.2,
494
- /// ...
495
- /// }
496
- /// ```
497
- ///
498
- /// # Algorithm
499
- ///
500
- /// 1. Opens the HDF5 file and reads matrix dimensions
501
- /// 2. Reads gene and sample names
502
- /// 3. Finds the index of the requested gene
503
- /// 4. Reads the sparse representation:
504
- /// - Gets pointers from "data/p" to determine which values belong to the gene
505
- /// - Reads column indices from "data/i" to know which samples have non-zero values
506
- /// - Reads actual values from "data/x"
507
- /// 5. Reconstructs a dense vector from the sparse representation
508
- /// 6. Formats and outputs the result as JSON
509
- ///
510
- /// # Performance Tracking
511
- ///
512
- /// The function tracks performance at various stages using timestamps:
513
- /// - Time spent parsing genes
514
- /// - Time spent parsing samples
515
- /// - Time spent reading the p, i, and x datasets
516
- /// - Time spent generating the full array from sparse representation
517
- ///
518
- /// # Error Handling
519
- ///
520
- /// The function handles several potential errors:
521
- /// - File opening failures
522
- /// - Dataset access failures
523
- /// - Gene not found in the dataset
524
- /// - Sparse matrix reading failures
525
- ///
526
- /// If an error occurs, the function returns a structured JSON error message.
331
+ /// The sparse format includes:
332
+ /// - "data/dim" - Matrix dimensions
333
+ /// - "gene_names" - Gene identifiers
334
+ /// - "sample_names" - Sample identifiers
335
+ /// - "data/p", "data/i", "data/x" - CSC matrix components
527
336
  fn query_gene_sparse(hdf5_filename: String, gene_name: String) -> Result<()> {
528
337
  let file = File::open(&hdf5_filename)?;
529
338
  let ds_dim = file.dataset("data/dim")?;
@@ -602,7 +411,6 @@ fn query_gene_sparse(hdf5_filename: String, gene_name: String) -> Result<()> {
602
411
  gene_array[col_id] = populated_column_values[idx];
603
412
  }
604
413
 
605
- // Format output as JSON
606
414
  let mut output_string = "{".to_string();
607
415
  for i in 0..gene_array.len() {
608
416
  output_string += &format!(
@@ -626,7 +434,705 @@ fn query_gene_sparse(hdf5_filename: String, gene_name: String) -> Result<()> {
626
434
  Ok(())
627
435
  }
628
436
 
629
- // Main function
437
+ /// Queries expression data for multiple genes from a dense format HDF5 file
438
+ ///
439
+ /// Extracts expression values for multiple genes from a dense matrix HDF5 file,
440
+ /// optimizing for both single gene (linear search) and multi-gene (hashmap) queries.
441
+ ///
442
+ /// # Arguments
443
+ /// * `hdf5_filename` - Path to the HDF5 file
444
+ /// * `gene_names` - Vector of gene names to query
445
+ ///
446
+ /// # Returns
447
+ /// Prints a JSON object with expression data for all requested genes to stdout.
448
+ fn query_multiple_genes_dense(hdf5_filename: String, gene_names: Vec<String>) -> Result<()> {
449
+ let overall_start_time = Instant::now();
450
+
451
+ // Create timing map to store all timing data
452
+ let mut timings = Map::new();
453
+
454
+ let file = match File::open(&hdf5_filename) {
455
+ Ok(f) => f,
456
+ Err(err) => {
457
+ println!(
458
+ "{}",
459
+ serde_json::json!({
460
+ "status": "error",
461
+ "message": format!("Failed to open HDF5 file: {}", err)
462
+ })
463
+ );
464
+ return Ok(());
465
+ }
466
+ };
467
+
468
+
469
+ let genes_dataset = match file.dataset("gene_ids") {
470
+ Ok(ds) => ds,
471
+ Err(err) => {
472
+ println!(
473
+ "{}",
474
+ serde_json::json!({
475
+ "status": "error",
476
+ "message": format!("Failed to open gene_ids dataset: {}", err)
477
+ })
478
+ );
479
+ return Ok(());
480
+ }
481
+ };
482
+
483
+ let genes_varlen = match genes_dataset.read_1d::<VarLenAscii>() {
484
+ Ok(g) => g,
485
+ Err(err) => {
486
+ println!(
487
+ "{}",
488
+ serde_json::json!({
489
+ "status": "error",
490
+ "message": format!("Failed to read gene names as VarLenAscii: {}", err)
491
+ })
492
+ );
493
+ return Ok(());
494
+ }
495
+ };
496
+
497
+ let genes: Vec<String> = genes_varlen.iter().map(|g| g.to_string()).collect();
498
+
499
+ // Only create HashMap for multiple gene queries
500
+ let gene_to_index: Option<std::collections::HashMap<String, usize>> = if gene_names.len() > 1 {
501
+ let hashmap_start_time = Instant::now();
502
+ let mut map = std::collections::HashMap::with_capacity(genes.len());
503
+ for (idx, gene) in genes.iter().enumerate() {
504
+ map.insert(gene.clone(), idx);
505
+ }
506
+ timings.insert(
507
+ "build_hashmap_ms".to_string(),
508
+ Value::from(hashmap_start_time.elapsed().as_millis() as u64)
509
+ );
510
+ Some(map)
511
+ } else {
512
+ // Skip HashMap creation for single gene queries
513
+ None
514
+ };
515
+
516
+ let samples_dataset = match file.dataset("samples") {
517
+ Ok(ds) => ds,
518
+ Err(err) => {
519
+ println!(
520
+ "{}",
521
+ serde_json::json!({
522
+ "status": "error",
523
+ "message": format!("Failed to open samples dataset: {}", err)
524
+ })
525
+ );
526
+ return Ok(());
527
+ }
528
+ };
529
+
530
+ let samples_varlen = match samples_dataset.read_1d::<VarLenAscii>() {
531
+ Ok(s) => s,
532
+ Err(err) => {
533
+ println!(
534
+ "{}",
535
+ serde_json::json!({
536
+ "status": "error",
537
+ "message": format!("Failed to read samples as VarLenAscii: {}", err)
538
+ })
539
+ );
540
+ return Ok(());
541
+ }
542
+ };
543
+
544
+ let samples: Vec<String> = samples_varlen.iter().map(|s| s.to_string()).collect();
545
+
546
+ let counts_dataset = match file.dataset("counts") {
547
+ Ok(ds) => ds,
548
+ Err(err) => {
549
+ println!(
550
+ "{}",
551
+ serde_json::json!({
552
+ "status": "error",
553
+ "message": format!("Failed to open counts dataset: {}", err)
554
+ })
555
+ );
556
+ return Ok(());
557
+ }
558
+ };
559
+
560
+ // Create thread-local storage for results
561
+ let genes_map = Arc::new(std::sync::Mutex::new(Map::new()));
562
+ let gene_timings = Arc::new(std::sync::Mutex::new(Map::new()));
563
+
564
+ if gene_names.len() > 1 {
565
+ // For multiple genes: preload all data and use parallel processing
566
+ timings.insert("parallel_processing".to_string(), Value::from(true));
567
+
568
+ // Load all gene data upfront only when processing multiple genes
569
+ let all_data_start_time = Instant::now();
570
+ let all_gene_data = match counts_dataset.read::<f64, Dim<[usize; 2]>>() {
571
+ Ok(data) => {
572
+ timings.insert(
573
+ "read_all_gene_data_ms".to_string(),
574
+ Value::from(all_data_start_time.elapsed().as_millis() as u64),
575
+ );
576
+ Some(data)
577
+ }
578
+ Err(err) => {
579
+ // Failed to read all data at once, will fallback to per-gene reading
580
+ timings.insert(
581
+ "read_all_gene_data_error".to_string(),
582
+ Value::String(format!("{:?}", err)),
583
+ );
584
+ None
585
+ }
586
+ };
587
+
588
+ // Configurable thread count for testing
589
+ let thread_count = 2;
590
+ timings.insert("thread_count".to_string(), Value::from(thread_count));
591
+
592
+ // Create a scoped thread pool with specified number of threads
593
+ match rayon::ThreadPoolBuilder::new()
594
+ .num_threads(thread_count)
595
+ .build()
596
+ {
597
+ Ok(pool) => {
598
+ // Use the pool for this specific work
599
+ pool.install(|| {
600
+ gene_names.par_iter().for_each(|gene_name| {
601
+ let gene_start_time = Instant::now();
602
+
603
+ // Use HashMap for O(1) lookup for multiple genes
604
+ let gene_index = match &gene_to_index {
605
+ Some(map) => map.get(gene_name).cloned(),
606
+ None => genes.iter().position(|x| *x == *gene_name),
607
+ };
608
+
609
+ match gene_index {
610
+ Some(gene_index) => {
611
+ // Make sure the gene index is valid for this dataset
612
+ if gene_index >= counts_dataset.shape()[0] {
613
+ let mut error_map = Map::new();
614
+ error_map.insert(
615
+ "error".to_string(),
616
+ Value::String("Gene index out of bounds".to_string()),
617
+ );
618
+
619
+ // Store the error result
620
+ let mut genes_map = genes_map.lock().unwrap();
621
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
622
+ } else {
623
+ // Use pre-loaded data if available
624
+ if let Some(ref all_data) = all_gene_data {
625
+ // Extract the row directly from pre-loaded data
626
+ let gene_expression = all_data.slice(s![gene_index, ..]);
627
+
628
+ // Create samples map for this gene
629
+ let mut samples_map = Map::new();
630
+ for (i, sample) in samples.iter().enumerate() {
631
+ if i < gene_expression.len() {
632
+ // Handle potential NaN or infinity values
633
+ let value = if gene_expression[i].is_finite() {
634
+ Value::from(gene_expression[i])
635
+ } else {
636
+ Value::Null
637
+ };
638
+
639
+ samples_map.insert(sample.replace("\\", ""), value);
640
+ }
641
+ }
642
+
643
+ // Create gene data and store it
644
+ let gene_data = json!({
645
+ "dataId": gene_name,
646
+ "samples": samples_map
647
+ });
648
+
649
+ let mut genes_map = genes_map.lock().unwrap();
650
+ genes_map.insert(gene_name.clone(), gene_data);
651
+ } else {
652
+ // Fallback to per-gene reading if bulk load failed
653
+ match counts_dataset
654
+ .read_slice_1d::<f64, _>(s![gene_index, ..])
655
+ {
656
+ Ok(gene_expression) => {
657
+ // Create samples map for this gene
658
+ let mut samples_map = Map::new();
659
+ for (i, sample) in samples.iter().enumerate() {
660
+ if i < gene_expression.len() {
661
+ // Handle potential NaN or infinity values
662
+ let value =
663
+ if gene_expression[i].is_finite() {
664
+ Value::from(gene_expression[i])
665
+ } else {
666
+ Value::Null
667
+ };
668
+
669
+ samples_map.insert(
670
+ sample.replace("\\", ""),
671
+ value,
672
+ );
673
+ }
674
+ }
675
+
676
+ // Create gene data and store it
677
+ let gene_data = json!({
678
+ "dataId": gene_name,
679
+ "samples": samples_map
680
+ });
681
+
682
+ let mut genes_map = genes_map.lock().unwrap();
683
+ genes_map.insert(gene_name.clone(), gene_data);
684
+ }
685
+ Err(err1) => {
686
+ let mut error_map = Map::new();
687
+ error_map.insert(
688
+ "error".to_string(),
689
+ Value::String(format!(
690
+ "Failed to read expression values: {:?}",
691
+ err1
692
+ )),
693
+ );
694
+
695
+ let mut genes_map = genes_map.lock().unwrap();
696
+ genes_map.insert(
697
+ gene_name.clone(),
698
+ Value::Object(error_map),
699
+ );
700
+ }
701
+ }
702
+ }
703
+ }
704
+ }
705
+ None => {
706
+ // Gene not found
707
+ let mut error_map = Map::new();
708
+ error_map.insert(
709
+ "error".to_string(),
710
+ Value::String("Gene not found in dataset".to_string()),
711
+ );
712
+
713
+ let mut genes_map = genes_map.lock().unwrap();
714
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
715
+ }
716
+ }
717
+
718
+ // Record timing
719
+ let elapsed_time = gene_start_time.elapsed().as_millis() as u64;
720
+ let mut gene_timings = gene_timings.lock().unwrap();
721
+ gene_timings.insert(gene_name.clone(), Value::from(elapsed_time));
722
+ });
723
+ });
724
+ }
725
+ Err(err) => {
726
+ // If thread pool creation fails, fall back to sequential processing
727
+ timings.insert(
728
+ "thread_pool_error".to_string(),
729
+ Value::String(format!("Failed to create thread pool: {:?}", err)),
730
+ );
731
+
732
+ process_genes_sequentially(
733
+ &gene_names,
734
+ &genes,
735
+ &gene_to_index,
736
+ &counts_dataset,
737
+ &all_gene_data,
738
+ &samples,
739
+ &genes_map
740
+ );
741
+ }
742
+ }
743
+ } else if gene_names.len() == 1 {
744
+ let gene_name = &gene_names[0];
745
+
746
+ match genes.iter().position(|x| *x == *gene_name) {
747
+ Some(gene_index) => {
748
+ if gene_index >= counts_dataset.shape()[0] {
749
+ let mut error_map = Map::new();
750
+ error_map.insert(
751
+ "error".to_string(),
752
+ Value::String("Gene index out of bounds".to_string()),
753
+ );
754
+
755
+ let mut genes_map = genes_map.lock().unwrap();
756
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
757
+ } else {
758
+ // Read just this single gene's data directly
759
+ match counts_dataset.read_slice_1d::<f64, _>(s![gene_index, ..]) {
760
+ Ok(gene_expression) => {
761
+
762
+ // Create samples map for this gene
763
+ let mut samples_map = Map::new();
764
+ for (i, sample) in samples.iter().enumerate() {
765
+ if i < gene_expression.len() {
766
+ // Handle potential NaN or infinity values
767
+ let value = if gene_expression[i].is_finite() {
768
+ Value::from(gene_expression[i])
769
+ } else {
770
+ Value::Null
771
+ };
772
+
773
+ samples_map.insert(sample.replace("\\", ""), value);
774
+ }
775
+ }
776
+
777
+ let gene_data = json!({
778
+ "dataId": gene_name,
779
+ "samples": samples_map
780
+ });
781
+
782
+ let mut genes_map = genes_map.lock().unwrap();
783
+ genes_map.insert(gene_name.clone(), gene_data);
784
+ }
785
+ Err(err) => {
786
+ let mut error_map = Map::new();
787
+ error_map.insert(
788
+ "error".to_string(),
789
+ Value::String(format!(
790
+ "Failed to read expression values: {:?}",
791
+ err
792
+ )),
793
+ );
794
+
795
+ let mut genes_map = genes_map.lock().unwrap();
796
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
797
+ }
798
+ }
799
+ }
800
+ }
801
+ None => {
802
+ let mut error_map = Map::new();
803
+ error_map.insert(
804
+ "error".to_string(),
805
+ Value::String("Gene not found in dataset".to_string()),
806
+ );
807
+
808
+ let mut genes_map = genes_map.lock().unwrap();
809
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
810
+ }
811
+ }
812
+ }
813
+
814
+ // Get the final maps from the Arc<Mutex<>>
815
+ let genes_map = Arc::try_unwrap(genes_map).unwrap().into_inner().unwrap();
816
+
817
+ let output_json = json!({
818
+ "genes": genes_map,
819
+ "timings": timings,
820
+ "total_time_ms": overall_start_time.elapsed().as_millis() as u64
821
+ });
822
+
823
+ println!("{}", output_json);
824
+
825
+ Ok(())
826
+ }
827
+
828
+ // Helper function to process genes sequentially with optional HashMap lookup
829
+ fn process_genes_sequentially(
830
+ gene_names: &Vec<String>,
831
+ genes: &Vec<String>,
832
+ gene_to_index: &Option<std::collections::HashMap<String, usize>>,
833
+ counts_dataset: &hdf5::Dataset,
834
+ all_gene_data: &Option<ndarray::ArrayBase<ndarray::OwnedRepr<f64>, ndarray::Dim<[usize; 2]>>>,
835
+ samples: &Vec<String>,
836
+ genes_map: &Arc<std::sync::Mutex<Map<String, Value>>>
837
+ ) {
838
+ for gene_name in gene_names {
839
+ // Find the index of the requested gene, using HashMap if available
840
+ let gene_index = match gene_to_index {
841
+ Some(map) => map.get(gene_name).cloned(),
842
+ None => genes.iter().position(|x| *x == *gene_name),
843
+ };
844
+
845
+ match gene_index {
846
+ Some(gene_index) => {
847
+ // Make sure the gene index is valid for this dataset
848
+ if gene_index >= counts_dataset.shape()[0] {
849
+ let mut error_map = Map::new();
850
+ error_map.insert(
851
+ "error".to_string(),
852
+ Value::String("Gene index out of bounds".to_string()),
853
+ );
854
+
855
+ // Store the error result
856
+ let mut genes_map = genes_map.lock().unwrap();
857
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
858
+ } else {
859
+ // Use pre-loaded data if available
860
+ if let Some(ref all_data) = all_gene_data {
861
+ let gene_expression = all_data.slice(s![gene_index, ..]);
862
+
863
+ // Create samples map for this gene
864
+ let mut samples_map = Map::new();
865
+ for (i, sample) in samples.iter().enumerate() {
866
+ if i < gene_expression.len() {
867
+ let value = if gene_expression[i].is_finite() {
868
+ Value::from(gene_expression[i])
869
+ } else {
870
+ Value::Null
871
+ };
872
+
873
+ samples_map.insert(sample.replace("\\", ""), value);
874
+ }
875
+ }
876
+
877
+ let gene_data = json!({
878
+ "dataId": gene_name,
879
+ "samples": samples_map
880
+ });
881
+
882
+ let mut genes_map = genes_map.lock().unwrap();
883
+ genes_map.insert(gene_name.clone(), gene_data);
884
+ } else {
885
+ // Fallback to per-gene reading if bulk load failed
886
+ match counts_dataset.read_slice_1d::<f64, _>(s![gene_index, ..]) {
887
+ Ok(gene_expression) => {
888
+ // Create samples map for this gene
889
+ let mut samples_map = Map::new();
890
+ for (i, sample) in samples.iter().enumerate() {
891
+ if i < gene_expression.len() {
892
+ let value = if gene_expression[i].is_finite() {
893
+ Value::from(gene_expression[i])
894
+ } else {
895
+ Value::Null
896
+ };
897
+
898
+ samples_map.insert(sample.replace("\\", ""), value);
899
+ }
900
+ }
901
+
902
+ let gene_data = json!({
903
+ "dataId": gene_name,
904
+ "samples": samples_map
905
+ });
906
+
907
+ let mut genes_map = genes_map.lock().unwrap();
908
+ genes_map.insert(gene_name.clone(), gene_data);
909
+ }
910
+ Err(err1) => {
911
+ let mut error_map = Map::new();
912
+ error_map.insert(
913
+ "error".to_string(),
914
+ Value::String(format!(
915
+ "Failed to read expression values: {:?}",
916
+ err1
917
+ )),
918
+ );
919
+
920
+ let mut genes_map = genes_map.lock().unwrap();
921
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
922
+ }
923
+ }
924
+ }
925
+ }
926
+ }
927
+ None => {
928
+ let mut error_map = Map::new();
929
+ error_map.insert(
930
+ "error".to_string(),
931
+ Value::String("Gene not found in dataset".to_string()),
932
+ );
933
+
934
+ let mut genes_map = genes_map.lock().unwrap();
935
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
936
+ }
937
+ }
938
+
939
+ }
940
+ }
941
+ /// Queries expression data for multiple genes from a sparse format HDF5 file
942
+ ///
943
+ /// This function extracts expression values for multiple specified genes from an HDF5 file
944
+ /// that uses a sparse matrix representation. It optimizes the query by reading shared datasets only once.
945
+ ///
946
+ /// # Arguments
947
+ ///
948
+ /// * `hdf5_filename` - Path to the HDF5 file
949
+ /// * `gene_names` - Vector of gene names to query
950
+ ///
951
+ /// # Returns
952
+ ///
953
+ /// A result indicating success or error. On success, the function prints a JSON object
954
+ /// containing expression data for all requested genes to stdout.
955
+ fn query_multiple_genes_sparse(hdf5_filename: String, gene_names: Vec<String>) -> Result<()> {
956
+ let overall_start_time = Instant::now();
957
+
958
+ // Create timing map
959
+ let mut timings = Map::new();
960
+ timings.insert("gene_count".to_string(), Value::from(gene_names.len()));
961
+ timings.insert("format".to_string(), Value::String("sparse".to_string()));
962
+
963
+ // Open file and read datasets
964
+ let file_open_start = Instant::now();
965
+ let file = File::open(&hdf5_filename)?;
966
+ timings.insert(
967
+ "file_open_ms".to_string(),
968
+ Value::from(file_open_start.elapsed().as_millis() as u64),
969
+ );
970
+
971
+ let dim_start = Instant::now();
972
+ let ds_dim = file.dataset("data/dim")?;
973
+ let data_dim: Array1<_> = ds_dim.read::<usize, Dim<[usize; 1]>>()?;
974
+ let num_samples = data_dim[0];
975
+ let _num_genes = data_dim[1];
976
+ timings.insert(
977
+ "read_dims_ms".to_string(),
978
+ Value::from(dim_start.elapsed().as_millis() as u64),
979
+ );
980
+
981
+ let ds_genes = file.dataset("gene_names")?;
982
+ let genes = ds_genes.read_1d::<FixedAscii<104>>()?;
983
+
984
+ let ds_samples = file.dataset("sample_names")?;
985
+ let samples = ds_samples.read_1d::<FixedAscii<104>>()?;
986
+
987
+ // Read p dataset (contains pointers for all genes)
988
+ let p_start_time = Instant::now();
989
+ let ds_p = file.dataset("data/p")?;
990
+ let data_p: Array1<usize> = ds_p.read_1d::<usize>()?;
991
+ timings.insert(
992
+ "read_p_dataset_ms".to_string(),
993
+ Value::from(p_start_time.elapsed().as_millis() as u64),
994
+ );
995
+
996
+ // Open i and x datasets
997
+ let ds_start_time = Instant::now();
998
+ let ds_i = file.dataset("data/i")?;
999
+ let ds_x = file.dataset("data/x")?;
1000
+ timings.insert(
1001
+ "open_i_x_datasets_ms".to_string(),
1002
+ Value::from(ds_start_time.elapsed().as_millis() as u64),
1003
+ );
1004
+
1005
+ // Determine number of threads to use
1006
+ let num_threads = num_cpus::get();
1007
+ timings.insert("num_threads".to_string(), Value::from(num_threads as u64));
1008
+
1009
+
1010
+ // Thread-safe maps for results
1011
+ let genes_map = Arc::new(std::sync::Mutex::new(Map::new()));
1012
+ let gene_timings = Arc::new(std::sync::Mutex::new(Map::new()));
1013
+
1014
+ // Use rayon for parallel processing
1015
+ gene_names.par_iter().for_each(|gene_name| {
1016
+ let gene_start_time = Instant::now();
1017
+
1018
+ // Find the index of the requested gene
1019
+ match genes.iter().position(|&x| x == *gene_name) {
1020
+ Some(gene_index) => {
1021
+ // Find start and end points for this gene's data
1022
+ let array_start_point = data_p[gene_index];
1023
+ let array_stop_point = data_p[gene_index + 1];
1024
+ let num_populated_cells = array_stop_point - array_start_point;
1025
+
1026
+ if num_populated_cells == 0 {
1027
+ // Gene has no data, create array of zeros
1028
+ let mut samples_map = Map::new();
1029
+ for (_i, sample) in samples.iter().enumerate() {
1030
+ samples_map.insert(sample.to_string().replace("\\", ""), Value::from(0.0));
1031
+ }
1032
+
1033
+ let gene_data = json!({
1034
+ "dataId": gene_name,
1035
+ "samples": samples_map
1036
+ });
1037
+
1038
+ let mut genes_map = genes_map.lock().unwrap();
1039
+ genes_map.insert(gene_name.clone(), gene_data);
1040
+ } else {
1041
+ // Read data for this gene
1042
+ match ds_i.read_slice_1d::<usize, _>(array_start_point..array_stop_point) {
1043
+ Ok(populated_column_ids) => {
1044
+ match ds_x.read_slice_1d::<f64, _>(array_start_point..array_stop_point)
1045
+ {
1046
+ Ok(populated_column_values) => {
1047
+ // Generate the complete array from sparse representation
1048
+ let mut gene_array: Array1<f64> = Array1::zeros(num_samples);
1049
+
1050
+ // Fill in values at populated column indices
1051
+ for (idx, &col_id) in populated_column_ids.iter().enumerate() {
1052
+ gene_array[col_id] = populated_column_values[idx];
1053
+ }
1054
+
1055
+ // Create samples map
1056
+ let mut samples_map = Map::new();
1057
+ for (_i, sample) in samples.iter().enumerate() {
1058
+ let value = if gene_array[_i].is_finite() {
1059
+ Value::from(gene_array[_i])
1060
+ } else {
1061
+ Value::Null
1062
+ };
1063
+
1064
+ samples_map
1065
+ .insert(sample.to_string().replace("\\", ""), value);
1066
+ }
1067
+
1068
+ let gene_data = json!({
1069
+ "dataId": gene_name,
1070
+ "samples": samples_map
1071
+ });
1072
+
1073
+ let mut genes_map = genes_map.lock().unwrap();
1074
+ genes_map.insert(gene_name.clone(), gene_data);
1075
+ }
1076
+ Err(err) => {
1077
+ let mut error_map = Map::new();
1078
+ error_map.insert(
1079
+ "error".to_string(),
1080
+ Value::String(format!(
1081
+ "Failed to read x dataset: {:?}",
1082
+ err
1083
+ )),
1084
+ );
1085
+
1086
+ let mut genes_map = genes_map.lock().unwrap();
1087
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
1088
+ }
1089
+ }
1090
+ }
1091
+ Err(err) => {
1092
+ let mut error_map = Map::new();
1093
+ error_map.insert(
1094
+ "error".to_string(),
1095
+ Value::String(format!("Failed to read i dataset: {:?}", err)),
1096
+ );
1097
+
1098
+ let mut genes_map = genes_map.lock().unwrap();
1099
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
1100
+ }
1101
+ }
1102
+ }
1103
+ }
1104
+ None => {
1105
+ let mut error_map = Map::new();
1106
+ error_map.insert(
1107
+ "error".to_string(),
1108
+ Value::String("Gene not found in dataset".to_string()),
1109
+ );
1110
+
1111
+ let mut genes_map = genes_map.lock().unwrap();
1112
+ genes_map.insert(gene_name.clone(), Value::Object(error_map));
1113
+ }
1114
+ }
1115
+
1116
+ // Record timing
1117
+ let elapsed_time = gene_start_time.elapsed().as_millis() as u64;
1118
+ let mut gene_timings = gene_timings.lock().unwrap();
1119
+ gene_timings.insert(gene_name.clone(), Value::from(elapsed_time));
1120
+ });
1121
+
1122
+ // Get the final maps from the Arc<Mutex<>>
1123
+ let genes_map = Arc::try_unwrap(genes_map).unwrap().into_inner().unwrap();
1124
+
1125
+ let output_json = json!({
1126
+ "genes": genes_map,
1127
+ "timings": timings,
1128
+ "parallel": true,
1129
+ "total_time_ms": overall_start_time.elapsed().as_millis() as u64
1130
+ });
1131
+
1132
+ println!("{}", output_json);
1133
+
1134
+ Ok(())
1135
+ }
630
1136
  fn main() -> Result<()> {
631
1137
  let mut input = String::new();
632
1138
  match io::stdin().read_line(&mut input) {
@@ -642,12 +1148,75 @@ fn main() -> Result<()> {
642
1148
  }
643
1149
  };
644
1150
 
645
- // Then, check if we have a gene to query
646
- if let Some(gene_name) = json_string["gene"].as_str() {
647
- // let gene_query_time = Instant::now();
1151
+ // Case 1: Check if "genes" field exists and is an array
1152
+ if json_string["genes"].is_array() {
1153
+ // Convert the JsonValue array to a Vec<String>
1154
+ let mut gene_names: Vec<String> = Vec::new();
1155
+ for gene_value in json_string["genes"].members() {
1156
+ if let Some(gene_str) = gene_value.as_str() {
1157
+ gene_names.push(gene_str.to_string());
1158
+ }
1159
+ }
1160
+
1161
+ if !gene_names.is_empty() {
1162
+ match detect_hdf5_format(&hdf5_filename)? {
1163
+ "dense" => query_multiple_genes_dense(hdf5_filename, gene_names)?,
1164
+ "sparse" => query_multiple_genes_sparse(hdf5_filename, gene_names)?,
1165
+ _ => {
1166
+ println!(
1167
+ "{}",
1168
+ serde_json::json!({
1169
+ "status": "failure",
1170
+ "message": "Cannot query genes in unknown file format.",
1171
+ "file_path": hdf5_filename
1172
+ })
1173
+ );
1174
+ }
1175
+ }
1176
+ return Ok(());
1177
+ }
1178
+ }
1179
+ // Case 2: Check if "gene" field exists and is an array (this handles the case we're seeing)
1180
+ else if json_string["gene"].is_array() {
1181
+ // Convert the JsonValue array to a Vec<String>
1182
+ let mut gene_names: Vec<String> = Vec::new();
1183
+ for gene_value in json_string["gene"].members() {
1184
+ if let Some(gene_str) = gene_value.as_str() {
1185
+ gene_names.push(gene_str.to_string());
1186
+ }
1187
+ }
1188
+
1189
+ if !gene_names.is_empty() {
1190
+ // Process multiple genes
1191
+ match detect_hdf5_format(&hdf5_filename)? {
1192
+ "dense" => query_multiple_genes_dense(hdf5_filename, gene_names)?,
1193
+ "sparse" => query_multiple_genes_sparse(hdf5_filename, gene_names)?,
1194
+ _ => {
1195
+ println!(
1196
+ "{}",
1197
+ serde_json::json!({
1198
+ "status": "failure",
1199
+ "message": "Cannot query genes in unknown file format.",
1200
+ "file_path": hdf5_filename
1201
+ })
1202
+ );
1203
+ }
1204
+ }
1205
+ return Ok(());
1206
+ }
1207
+ }
1208
+ // Case 3: Check if "gene" field exists and is a string (original single gene case)
1209
+ else if let Some(gene_name) = json_string["gene"].as_str() {
648
1210
  query_gene(hdf5_filename, gene_name.to_string())?;
649
- // println!("Time for querying gene: {:?}", gene_query_time.elapsed());
1211
+ return Ok(());
650
1212
  }
1213
+ println!(
1214
+ "{}",
1215
+ serde_json::json!({
1216
+ "status": "error",
1217
+ "message": "Neither gene nor genes array provided in input"
1218
+ })
1219
+ );
651
1220
  }
652
1221
  Err(error) => println!("Incorrect json: {}", error),
653
1222
  }