@sjcrh/proteinpaint-rust 2.137.2-0 → 2.138.3-7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +5 -0
- package/package.json +1 -1
- package/src/readH5.rs +617 -0
package/Cargo.toml
CHANGED
package/package.json
CHANGED
package/src/readH5.rs
ADDED
|
@@ -0,0 +1,617 @@
|
|
|
1
|
+
// readH5.rs - validate/read HDF5 file
|
|
2
|
+
//
|
|
3
|
+
// READ:
|
|
4
|
+
// Extracts matrix from HDF5 files.
|
|
5
|
+
// Matrix dataset is hardcoded as "matrix". row_dataset(samples) and col_dataset(genesets)
|
|
6
|
+
// row_dataset and col_dataset are stored as VarLenUnicode.
|
|
7
|
+
// Supports f32, f64, i32 and i64 matrix datatypes.
|
|
8
|
+
//
|
|
9
|
+
// Features:
|
|
10
|
+
// - Hardcoded "matrix" dataset
|
|
11
|
+
// - Supports f32, f64, i32 and i64 matrix datatypes
|
|
12
|
+
// - Parallel processing with dynamic thread count
|
|
13
|
+
// - JSON output with timing metrics
|
|
14
|
+
|
|
15
|
+
// Usage
|
|
16
|
+
// echo '{"query":["HALLMARK_ADIPOGENESIS", "HALLMARK_ANGIOGENESIS"],"hdf5_file":"matrix.h5"}' | ./target/release/readH5
|
|
17
|
+
//
|
|
18
|
+
//
|
|
19
|
+
// VLIDATE:
|
|
20
|
+
// output: JSON with {"samples":[]}
|
|
21
|
+
// Usage
|
|
22
|
+
// echo '{"validate":true,"hdf5_file":"matrix.h5"}' | ./target/release/readH5
|
|
23
|
+
|
|
24
|
+
use hdf5::types::VarLenUnicode;
|
|
25
|
+
use hdf5::{File, Result, Selection};
|
|
26
|
+
use json::JsonValue;
|
|
27
|
+
use rayon::prelude::*;
|
|
28
|
+
use serde_json::{Map, Value, json};
|
|
29
|
+
use std::io;
|
|
30
|
+
use std::sync::Arc;
|
|
31
|
+
use std::time::Instant;
|
|
32
|
+
|
|
33
|
+
/// Creates an error JSON response
|
|
34
|
+
fn error_response(message: impl Into<String>) -> Value {
|
|
35
|
+
json!({
|
|
36
|
+
"status": "error",
|
|
37
|
+
"message": message.into()
|
|
38
|
+
})
|
|
39
|
+
}
|
|
40
|
+
|
|
41
|
+
/// h5 file validation
|
|
42
|
+
// Detects if the HDF5 file contains a valid matrix dataset
|
|
43
|
+
pub fn detect_hdf5_format(
|
|
44
|
+
hdf5_filename: &str,
|
|
45
|
+
matrix_name: &str,
|
|
46
|
+
row_dataset: &str,
|
|
47
|
+
col_dataset: &str,
|
|
48
|
+
) -> Result<&'static str> {
|
|
49
|
+
let file = File::open(hdf5_filename)?;
|
|
50
|
+
|
|
51
|
+
// Check for matrix dataset (must be 2D)
|
|
52
|
+
let matrix_ok = file
|
|
53
|
+
.dataset(matrix_name)
|
|
54
|
+
.map(|dataset| dataset.shape().len() == 2)
|
|
55
|
+
.unwrap_or(false);
|
|
56
|
+
|
|
57
|
+
// Check for row dataset (must exist and contain VarLenAscii)
|
|
58
|
+
let row_ok = file
|
|
59
|
+
.dataset(row_dataset)
|
|
60
|
+
.and_then(|ds| ds.read_1d::<hdf5::types::VarLenUnicode>())
|
|
61
|
+
.is_ok();
|
|
62
|
+
|
|
63
|
+
// Check for column dataset (must exist and contain VarLenAscii)
|
|
64
|
+
let col_ok = file
|
|
65
|
+
.dataset(col_dataset)
|
|
66
|
+
.and_then(|ds| ds.read_1d::<hdf5::types::VarLenUnicode>())
|
|
67
|
+
.is_ok();
|
|
68
|
+
|
|
69
|
+
if matrix_ok && row_ok && col_ok {
|
|
70
|
+
Ok("matrix")
|
|
71
|
+
} else {
|
|
72
|
+
Ok("unknown")
|
|
73
|
+
}
|
|
74
|
+
}
|
|
75
|
+
|
|
76
|
+
pub fn validate_hdf5_file(hdf5_filename: String) -> Result<()> {
|
|
77
|
+
let file = File::open(&hdf5_filename)?;
|
|
78
|
+
let matrix_name = "matrix";
|
|
79
|
+
let row_dataset = "samples";
|
|
80
|
+
let col_dataset = "item";
|
|
81
|
+
let file_format = detect_hdf5_format(&hdf5_filename, matrix_name, row_dataset, col_dataset)?;
|
|
82
|
+
|
|
83
|
+
let output = match file_format {
|
|
84
|
+
"matrix" => {
|
|
85
|
+
let dataset = file.dataset(matrix_name)?;
|
|
86
|
+
let matrix_shape = dataset.shape();
|
|
87
|
+
let datatype = dataset.dtype()?;
|
|
88
|
+
|
|
89
|
+
// Read row_dataset as VarLenUnicode
|
|
90
|
+
let row_dataset_data = file.dataset(row_dataset)?;
|
|
91
|
+
let row_data: Vec<String> = row_dataset_data
|
|
92
|
+
.read_1d::<VarLenUnicode>()?
|
|
93
|
+
.iter()
|
|
94
|
+
.map(|s| s.to_string())
|
|
95
|
+
.collect();
|
|
96
|
+
|
|
97
|
+
// Validate matrix data
|
|
98
|
+
let matrix_valid = if matrix_shape.len() == 2 && matrix_shape[0] > 0 && matrix_shape[1] > 0 {
|
|
99
|
+
// Create a selection for a 1x1 slice at (0,0)
|
|
100
|
+
let selection = hdf5::Selection::from((0..1, 0..1));
|
|
101
|
+
|
|
102
|
+
if datatype.is::<f64>() {
|
|
103
|
+
dataset.read_slice_2d::<f64, _>(selection).is_ok()
|
|
104
|
+
} else if datatype.is::<f32>() {
|
|
105
|
+
dataset.read_slice_2d::<f32, _>(selection).is_ok()
|
|
106
|
+
} else if datatype.is::<i32>() {
|
|
107
|
+
dataset.read_slice_2d::<i32, _>(selection).is_ok()
|
|
108
|
+
} else if datatype.is::<i64>() {
|
|
109
|
+
dataset.read_slice_2d::<i64, _>(selection).is_ok()
|
|
110
|
+
} else {
|
|
111
|
+
false
|
|
112
|
+
}
|
|
113
|
+
} else {
|
|
114
|
+
false
|
|
115
|
+
};
|
|
116
|
+
|
|
117
|
+
json!({
|
|
118
|
+
"status": if matrix_valid { "success" } else { "failure" },
|
|
119
|
+
"message": if matrix_valid {
|
|
120
|
+
"HDF5 matrix file loaded successfully"
|
|
121
|
+
} else {
|
|
122
|
+
"Invalid matrix structure"
|
|
123
|
+
},
|
|
124
|
+
"file_path": hdf5_filename,
|
|
125
|
+
"format": "matrix",
|
|
126
|
+
"matrix_dimensions": {
|
|
127
|
+
"num_rows": matrix_shape.get(0).unwrap_or(&0),
|
|
128
|
+
"num_columns": matrix_shape.get(1).unwrap_or(&0)
|
|
129
|
+
},
|
|
130
|
+
row_dataset.to_string(): row_data
|
|
131
|
+
})
|
|
132
|
+
}
|
|
133
|
+
_ => {
|
|
134
|
+
json!({
|
|
135
|
+
"status": "failure",
|
|
136
|
+
"message": format!(
|
|
137
|
+
"Missing or invalid required datasets: matrix='{}', row_dataset='{}', col_dataset='{}'",
|
|
138
|
+
matrix_name, row_dataset, col_dataset
|
|
139
|
+
),
|
|
140
|
+
"file_path": hdf5_filename,
|
|
141
|
+
"format": "unknown",
|
|
142
|
+
"matrix_dimensions": {
|
|
143
|
+
"num_rows": 0,
|
|
144
|
+
"num_columns": 0
|
|
145
|
+
}
|
|
146
|
+
})
|
|
147
|
+
}
|
|
148
|
+
};
|
|
149
|
+
println!("{}", output);
|
|
150
|
+
Ok(())
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
/// read h5 file
|
|
154
|
+
|
|
155
|
+
// Trait for converting types to f64, allowing lossy conversions
|
|
156
|
+
trait ToF64 {
|
|
157
|
+
fn to_f64(&self) -> f64;
|
|
158
|
+
}
|
|
159
|
+
|
|
160
|
+
impl ToF64 for f32 {
|
|
161
|
+
fn to_f64(&self) -> f64 {
|
|
162
|
+
*self as f64
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
impl ToF64 for f64 {
|
|
167
|
+
fn to_f64(&self) -> f64 {
|
|
168
|
+
*self
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
|
|
172
|
+
impl ToF64 for i32 {
|
|
173
|
+
fn to_f64(&self) -> f64 {
|
|
174
|
+
*self as f64
|
|
175
|
+
}
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
impl ToF64 for i64 {
|
|
179
|
+
fn to_f64(&self) -> f64 {
|
|
180
|
+
if self.abs() > (1 << 53) {
|
|
181
|
+
eprintln!("Warning: i64 value {} may lose precision when converted to f64", self);
|
|
182
|
+
}
|
|
183
|
+
*self as f64
|
|
184
|
+
}
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
// Process matrix data for a given type
|
|
188
|
+
fn process_data<T: ToF64 + Copy>(data: &[T], row_data: &[String]) -> Map<String, Value> {
|
|
189
|
+
let mut row_data_map = Map::new();
|
|
190
|
+
for (i, row) in row_data.iter().enumerate() {
|
|
191
|
+
if i < data.len() {
|
|
192
|
+
let value = data[i].to_f64();
|
|
193
|
+
row_data_map.insert(
|
|
194
|
+
row.replace("\\", ""),
|
|
195
|
+
if value.is_finite() {
|
|
196
|
+
Value::from(value)
|
|
197
|
+
} else {
|
|
198
|
+
Value::Null
|
|
199
|
+
},
|
|
200
|
+
);
|
|
201
|
+
}
|
|
202
|
+
}
|
|
203
|
+
row_data_map
|
|
204
|
+
}
|
|
205
|
+
|
|
206
|
+
// Data query
|
|
207
|
+
// Supports f32, f64, i32 and i64 datatypes for the matrix dataset.
|
|
208
|
+
// Uses hardcoded "matrix", "samples" and "item" dataset.
|
|
209
|
+
// "samples" and "item" datasets are read as VarLenUnicode.
|
|
210
|
+
//
|
|
211
|
+
// # Arguments
|
|
212
|
+
// * `hdf5_filenam` - Path to the HDF5 file
|
|
213
|
+
// * `qry` - Query (non-empty array)
|
|
214
|
+
//
|
|
215
|
+
// # Returns
|
|
216
|
+
// Prints a JSON object with matrix data for query data to stdout
|
|
217
|
+
fn query_dataset(hdf5_filename: String, qry: Vec<String>) -> Result<()> {
|
|
218
|
+
let overall_start_time = Instant::now();
|
|
219
|
+
let mut timings = Map::new();
|
|
220
|
+
timings.insert("query_count".to_string(), Value::from(qry.len()));
|
|
221
|
+
|
|
222
|
+
let file = match File::open(&hdf5_filename) {
|
|
223
|
+
Ok(f) => f,
|
|
224
|
+
Err(err) => {
|
|
225
|
+
println!("{}", error_response(format!("Failed to open HDF5 file: {}", err)));
|
|
226
|
+
return Ok(());
|
|
227
|
+
}
|
|
228
|
+
};
|
|
229
|
+
|
|
230
|
+
let col_dataset_name = String::from("item");
|
|
231
|
+
let col_dataset = match file.dataset(&col_dataset_name) {
|
|
232
|
+
Ok(ds) => ds,
|
|
233
|
+
Err(err) => {
|
|
234
|
+
println!(
|
|
235
|
+
"{}",
|
|
236
|
+
error_response(format!("Failed to open {} dataset: {}", col_dataset_name, err))
|
|
237
|
+
);
|
|
238
|
+
return Ok(());
|
|
239
|
+
}
|
|
240
|
+
};
|
|
241
|
+
|
|
242
|
+
let col_dataset_varlen = match col_dataset.read_1d::<VarLenUnicode>() {
|
|
243
|
+
Ok(g) => g,
|
|
244
|
+
Err(err) => {
|
|
245
|
+
println!(
|
|
246
|
+
"{}",
|
|
247
|
+
error_response(format!("Failed to read {}: {}", col_dataset_name, err))
|
|
248
|
+
);
|
|
249
|
+
return Ok(());
|
|
250
|
+
}
|
|
251
|
+
};
|
|
252
|
+
let col_data: Vec<String> = col_dataset_varlen.iter().map(|g| g.to_string()).collect();
|
|
253
|
+
|
|
254
|
+
let hashmap_start_time = Instant::now();
|
|
255
|
+
let col_data_to_index: std::collections::HashMap<String, usize> =
|
|
256
|
+
col_data.iter().enumerate().map(|(i, g)| (g.clone(), i)).collect();
|
|
257
|
+
timings.insert(
|
|
258
|
+
"build_hashmap_ms".to_string(),
|
|
259
|
+
Value::from(hashmap_start_time.elapsed().as_millis() as u64),
|
|
260
|
+
);
|
|
261
|
+
|
|
262
|
+
let row_dataset_name = String::from("samples");
|
|
263
|
+
let row_dataset = match file.dataset(&row_dataset_name) {
|
|
264
|
+
Ok(ds) => ds,
|
|
265
|
+
Err(err) => {
|
|
266
|
+
println!(
|
|
267
|
+
"{}",
|
|
268
|
+
error_response(format!("Failed to open {} dataset: {}", row_dataset_name, err))
|
|
269
|
+
);
|
|
270
|
+
return Ok(());
|
|
271
|
+
}
|
|
272
|
+
};
|
|
273
|
+
let row_dataset_varlen = match row_dataset.read_1d::<VarLenUnicode>() {
|
|
274
|
+
Ok(s) => s,
|
|
275
|
+
Err(err) => {
|
|
276
|
+
println!(
|
|
277
|
+
"{}",
|
|
278
|
+
error_response(format!("Failed to read {}: {}", row_dataset_name, err))
|
|
279
|
+
);
|
|
280
|
+
return Ok(());
|
|
281
|
+
}
|
|
282
|
+
};
|
|
283
|
+
let row_data: Vec<String> = row_dataset_varlen.iter().map(|s| s.to_string()).collect();
|
|
284
|
+
|
|
285
|
+
let matrix_dataset = match file.dataset("matrix") {
|
|
286
|
+
Ok(ds) => ds,
|
|
287
|
+
Err(err) => {
|
|
288
|
+
println!("{}", error_response(format!("Failed to open matrix dataset: {}", err)));
|
|
289
|
+
return Ok(());
|
|
290
|
+
}
|
|
291
|
+
};
|
|
292
|
+
|
|
293
|
+
let datatype = match matrix_dataset.dtype() {
|
|
294
|
+
Ok(dt) => dt,
|
|
295
|
+
Err(err) => {
|
|
296
|
+
println!("{}", error_response(format!("Failed to read matrix datatype: {}", err)));
|
|
297
|
+
return Ok(());
|
|
298
|
+
}
|
|
299
|
+
};
|
|
300
|
+
|
|
301
|
+
let col_data_map = Arc::new(std::sync::Mutex::new(Map::new()));
|
|
302
|
+
let thread_count = std::cmp::min(4, qry.len());
|
|
303
|
+
timings.insert("thread_count".to_string(), Value::from(thread_count));
|
|
304
|
+
|
|
305
|
+
let results: Vec<(String, Value)> = match rayon::ThreadPoolBuilder::new()
|
|
306
|
+
.num_threads(thread_count)
|
|
307
|
+
.build()
|
|
308
|
+
{
|
|
309
|
+
Ok(pool) => pool.install(|| {
|
|
310
|
+
qry
|
|
311
|
+
.par_iter()
|
|
312
|
+
.map(|query| {
|
|
313
|
+
let query_start_time = Instant::now();
|
|
314
|
+
let result = match col_data_to_index.get(query) {
|
|
315
|
+
Some(&index) => {
|
|
316
|
+
if index >= matrix_dataset.shape()[0] {
|
|
317
|
+
let mut error_map = Map::new();
|
|
318
|
+
error_map.insert(
|
|
319
|
+
"error".to_string(),
|
|
320
|
+
Value::String("Query index out of bounds".to_string()),
|
|
321
|
+
);
|
|
322
|
+
(query.clone(), Value::Object(error_map))
|
|
323
|
+
} else {
|
|
324
|
+
let selection = Selection::from((index..index+1, ..));
|
|
325
|
+
if datatype.is::<f64>() {
|
|
326
|
+
match matrix_dataset.read_slice_2d::<f64,_>(selection) {
|
|
327
|
+
Ok(data) => (
|
|
328
|
+
query.clone(),
|
|
329
|
+
json!({
|
|
330
|
+
"dataId": query,
|
|
331
|
+
row_dataset_name.clone(): process_data(data.as_slice().unwrap(), &row_data)
|
|
332
|
+
}),
|
|
333
|
+
),
|
|
334
|
+
Err(err) => {
|
|
335
|
+
let mut error_map = Map::new();
|
|
336
|
+
error_map.insert(
|
|
337
|
+
"error".to_string(),
|
|
338
|
+
Value::String(format!("Failed to read f64 matrix values: {}", err)),
|
|
339
|
+
);
|
|
340
|
+
(query.clone(), Value::Object(error_map))
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
} else if datatype.is::<f32>() {
|
|
344
|
+
match matrix_dataset.read_slice_2d::<f32,_>(selection) {
|
|
345
|
+
Ok(data) => (
|
|
346
|
+
query.clone(),
|
|
347
|
+
json!({
|
|
348
|
+
"dataId": query,
|
|
349
|
+
row_dataset_name.clone(): process_data(data.as_slice().unwrap(), &row_data)
|
|
350
|
+
}),
|
|
351
|
+
),
|
|
352
|
+
Err(err) => {
|
|
353
|
+
let mut error_map = Map::new();
|
|
354
|
+
error_map.insert(
|
|
355
|
+
"error".to_string(),
|
|
356
|
+
Value::String(format!("Failed to read f32 matrix values: {}", err)),
|
|
357
|
+
);
|
|
358
|
+
(query.clone(), Value::Object(error_map))
|
|
359
|
+
}
|
|
360
|
+
}
|
|
361
|
+
} else if datatype.is::<i32>() {
|
|
362
|
+
match matrix_dataset.read_slice_2d::<i32,_>(selection) {
|
|
363
|
+
Ok(data) => (
|
|
364
|
+
query.clone(),
|
|
365
|
+
json!({
|
|
366
|
+
"dataId": query,
|
|
367
|
+
row_dataset_name.clone(): process_data(data.as_slice().unwrap(), &row_data)
|
|
368
|
+
}),
|
|
369
|
+
),
|
|
370
|
+
Err(err) => {
|
|
371
|
+
let mut error_map = Map::new();
|
|
372
|
+
error_map.insert(
|
|
373
|
+
"error".to_string(),
|
|
374
|
+
Value::String(format!("Failed to read i32 matrix values: {}", err)),
|
|
375
|
+
);
|
|
376
|
+
(query.clone(), Value::Object(error_map))
|
|
377
|
+
}
|
|
378
|
+
}
|
|
379
|
+
} else if datatype.is::<i64>() {
|
|
380
|
+
match matrix_dataset.read_slice_2d::<i64,_>(selection) {
|
|
381
|
+
Ok(data) => (
|
|
382
|
+
query.clone(),
|
|
383
|
+
json!({
|
|
384
|
+
"dataId": query,
|
|
385
|
+
row_dataset_name.clone(): process_data(data.as_slice().unwrap(), &row_data)
|
|
386
|
+
}),
|
|
387
|
+
),
|
|
388
|
+
Err(err) => {
|
|
389
|
+
let mut error_map = Map::new();
|
|
390
|
+
error_map.insert(
|
|
391
|
+
"error".to_string(),
|
|
392
|
+
Value::String(format!("Failed to read i64 matrix values: {}", err)),
|
|
393
|
+
);
|
|
394
|
+
(query.clone(), Value::Object(error_map))
|
|
395
|
+
}
|
|
396
|
+
}
|
|
397
|
+
} else {
|
|
398
|
+
let mut error_map = Map::new();
|
|
399
|
+
error_map.insert(
|
|
400
|
+
"error".to_string(),
|
|
401
|
+
Value::String("Unsupported matrix datatype (expected f64, f32, i64 or i32)".to_string()),
|
|
402
|
+
);
|
|
403
|
+
(query.clone(), Value::Object(error_map))
|
|
404
|
+
}
|
|
405
|
+
}
|
|
406
|
+
}
|
|
407
|
+
None => {
|
|
408
|
+
let mut error_map = Map::new();
|
|
409
|
+
error_map.insert(
|
|
410
|
+
"error".to_string(),
|
|
411
|
+
Value::String(format!("Query '{}' not found in {} dataset", query, col_dataset_name)),
|
|
412
|
+
);
|
|
413
|
+
(query.clone(),Value::Object(error_map))
|
|
414
|
+
}
|
|
415
|
+
};
|
|
416
|
+
let elapsed_time = query_start_time.elapsed().as_millis() as u64;
|
|
417
|
+
let mut query_timings = col_data_map.lock().unwrap();
|
|
418
|
+
query_timings.insert(
|
|
419
|
+
format!("{}_ms", query),
|
|
420
|
+
Value::from(elapsed_time),
|
|
421
|
+
);
|
|
422
|
+
result
|
|
423
|
+
})
|
|
424
|
+
.collect()
|
|
425
|
+
}),
|
|
426
|
+
Err(err) => {
|
|
427
|
+
timings.insert(
|
|
428
|
+
"thread_pool_error".to_string(),
|
|
429
|
+
Value::String(format!("Failed to create thread pool: {}", err)),
|
|
430
|
+
);
|
|
431
|
+
qry
|
|
432
|
+
.iter()
|
|
433
|
+
.map(|query| {
|
|
434
|
+
let query_start_time = Instant::now();
|
|
435
|
+
let result = match col_data_to_index.get(query) {
|
|
436
|
+
Some(&index) => {
|
|
437
|
+
if index >= matrix_dataset.shape()[0] {
|
|
438
|
+
let mut error_map = Map::new();
|
|
439
|
+
error_map.insert(
|
|
440
|
+
"error".to_string(),
|
|
441
|
+
Value::String("Query index out of bounds".to_string()),
|
|
442
|
+
);
|
|
443
|
+
(query.clone(), Value::Object(error_map))
|
|
444
|
+
} else {
|
|
445
|
+
let selection = Selection::from((index..index+1, ..));
|
|
446
|
+
if datatype.is::<f64>() {
|
|
447
|
+
match matrix_dataset.read_slice_1d::<f64,_>(selection) {
|
|
448
|
+
Ok(data) => (
|
|
449
|
+
query.clone(),
|
|
450
|
+
json!({
|
|
451
|
+
"dataId": query,
|
|
452
|
+
row_dataset_name.clone(): process_data(data.as_slice().unwrap(), &row_data)
|
|
453
|
+
}),
|
|
454
|
+
),
|
|
455
|
+
Err(err) => {
|
|
456
|
+
let mut error_map = Map::new();
|
|
457
|
+
error_map.insert(
|
|
458
|
+
"error".to_string(),
|
|
459
|
+
Value::String(format!("Failed to read f64 matrix values: {}", err)),
|
|
460
|
+
);
|
|
461
|
+
(query.clone(), Value::Object(error_map))
|
|
462
|
+
}
|
|
463
|
+
}
|
|
464
|
+
} else if datatype.is::<f32>() {
|
|
465
|
+
match matrix_dataset.read_slice_1d::<f32,_>(selection) {
|
|
466
|
+
Ok(data) => (
|
|
467
|
+
query.clone(),
|
|
468
|
+
json!({
|
|
469
|
+
"dataId": query,
|
|
470
|
+
row_dataset_name.clone(): process_data(data.as_slice().unwrap(), &row_data)
|
|
471
|
+
}),
|
|
472
|
+
),
|
|
473
|
+
Err(err) => {
|
|
474
|
+
let mut error_map = Map::new();
|
|
475
|
+
error_map.insert(
|
|
476
|
+
"error".to_string(),
|
|
477
|
+
Value::String(format!("Failed to read f32 matrix values: {}", err)),
|
|
478
|
+
);
|
|
479
|
+
(query.clone(), Value::Object(error_map))
|
|
480
|
+
}
|
|
481
|
+
}
|
|
482
|
+
} else if datatype.is::<i32>() {
|
|
483
|
+
match matrix_dataset.read_slice_1d::<i32,_>(selection) {
|
|
484
|
+
Ok(data) => (
|
|
485
|
+
query.clone(),
|
|
486
|
+
json!({
|
|
487
|
+
"dataId": query,
|
|
488
|
+
row_dataset_name.clone(): process_data(data.as_slice().unwrap(), &row_data)
|
|
489
|
+
}),
|
|
490
|
+
),
|
|
491
|
+
Err(err) => {
|
|
492
|
+
let mut error_map = Map::new();
|
|
493
|
+
error_map.insert(
|
|
494
|
+
"error".to_string(),
|
|
495
|
+
Value::String(format!("Failed to read i32 matrix values: {}", err)),
|
|
496
|
+
);
|
|
497
|
+
(query.clone(), Value::Object(error_map))
|
|
498
|
+
}
|
|
499
|
+
}
|
|
500
|
+
} else if datatype.is::<i64>() {
|
|
501
|
+
match matrix_dataset.read_slice_1d::<i64,_>(selection) {
|
|
502
|
+
Ok(data) => (
|
|
503
|
+
query.clone(),
|
|
504
|
+
json!({
|
|
505
|
+
"dataId": query,
|
|
506
|
+
row_dataset_name.clone(): process_data(data.as_slice().unwrap(), &row_data)
|
|
507
|
+
}),
|
|
508
|
+
),
|
|
509
|
+
Err(err) => {
|
|
510
|
+
let mut error_map = Map::new();
|
|
511
|
+
error_map.insert(
|
|
512
|
+
"error".to_string(),
|
|
513
|
+
Value::String(format!("Failed to read i64 matrix values: {}", err)),
|
|
514
|
+
);
|
|
515
|
+
(query.clone(), Value::Object(error_map))
|
|
516
|
+
}
|
|
517
|
+
}
|
|
518
|
+
} else {
|
|
519
|
+
let mut error_map = Map::new();
|
|
520
|
+
error_map.insert(
|
|
521
|
+
"error".to_string(),
|
|
522
|
+
Value::String("Unsupported matrix datatype (expected f64, f32, i64 or i32)".to_string()),
|
|
523
|
+
);
|
|
524
|
+
(query.clone(), Value::Object(error_map))
|
|
525
|
+
}
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
None => {
|
|
529
|
+
let mut error_map = Map::new();
|
|
530
|
+
error_map.insert(
|
|
531
|
+
"error".to_string(),
|
|
532
|
+
Value::String(format!("Query '{}' not found in {} dataset", query, col_dataset_name)),
|
|
533
|
+
);
|
|
534
|
+
(query.clone(),Value::Object(error_map))
|
|
535
|
+
}
|
|
536
|
+
};
|
|
537
|
+
let elapsed_time = query_start_time.elapsed().as_millis() as u64;
|
|
538
|
+
let mut query_timings = col_data_map.lock().unwrap();
|
|
539
|
+
query_timings.insert(
|
|
540
|
+
format!("{}_ms", query),
|
|
541
|
+
Value::from(elapsed_time),
|
|
542
|
+
);
|
|
543
|
+
result
|
|
544
|
+
})
|
|
545
|
+
.collect()
|
|
546
|
+
}
|
|
547
|
+
};
|
|
548
|
+
|
|
549
|
+
let mut col_data_map = col_data_map.lock().unwrap();
|
|
550
|
+
for (query, query_data) in results {
|
|
551
|
+
col_data_map.insert(query, query_data);
|
|
552
|
+
}
|
|
553
|
+
|
|
554
|
+
let output_json = json!({
|
|
555
|
+
"query_output": *col_data_map,
|
|
556
|
+
"timings": timings,
|
|
557
|
+
"total_time_ms": overall_start_time.elapsed().as_millis() as u64
|
|
558
|
+
});
|
|
559
|
+
println!("{}", output_json);
|
|
560
|
+
Ok(())
|
|
561
|
+
}
|
|
562
|
+
|
|
563
|
+
/// Main function to handle both validation and read of h5 file
|
|
564
|
+
fn main() -> Result<()> {
|
|
565
|
+
let mut input = String::new();
|
|
566
|
+
match io::stdin().read_line(&mut input) {
|
|
567
|
+
Ok(_bytes_read) => {
|
|
568
|
+
let input_json = match json::parse(&input) {
|
|
569
|
+
Ok(json) => json,
|
|
570
|
+
Err(_err) => {
|
|
571
|
+
panic!("Invalid JSON input");
|
|
572
|
+
}
|
|
573
|
+
};
|
|
574
|
+
|
|
575
|
+
// Extract HDF5 filename
|
|
576
|
+
let hdf5_filename = match input_json["hdf5_file"].as_str() {
|
|
577
|
+
Some(x) => x.to_string(),
|
|
578
|
+
None => {
|
|
579
|
+
println!("{}", error_response("HDF5 filename not provided"));
|
|
580
|
+
return Ok(());
|
|
581
|
+
}
|
|
582
|
+
};
|
|
583
|
+
|
|
584
|
+
// h5 file validation
|
|
585
|
+
if input_json.has_key("validate") {
|
|
586
|
+
let v: bool = match input_json["validate"].as_bool() {
|
|
587
|
+
Some(x) => x,
|
|
588
|
+
None => false,
|
|
589
|
+
};
|
|
590
|
+
if !v {
|
|
591
|
+
println!("{}", error_response("The value of validate is invalid"));
|
|
592
|
+
return Ok(());
|
|
593
|
+
}
|
|
594
|
+
let _ = validate_hdf5_file(hdf5_filename);
|
|
595
|
+
} else if input_json.has_key("query") {
|
|
596
|
+
let qry: Vec<String> = match &input_json["query"] {
|
|
597
|
+
JsonValue::Array(arr) => arr.iter().filter_map(|v| v.as_str().map(|s| s.to_string())).collect(),
|
|
598
|
+
_ => vec![],
|
|
599
|
+
};
|
|
600
|
+
if !qry.is_empty() {
|
|
601
|
+
query_dataset(hdf5_filename, qry)?;
|
|
602
|
+
} else {
|
|
603
|
+
println!("{}", error_response(format!("query is empty")));
|
|
604
|
+
};
|
|
605
|
+
} else {
|
|
606
|
+
println!(
|
|
607
|
+
"{}",
|
|
608
|
+
error_response("validate or query has to be provided in input JSON.")
|
|
609
|
+
);
|
|
610
|
+
}
|
|
611
|
+
}
|
|
612
|
+
Err(error) => {
|
|
613
|
+
println!("{}", error_response(format!("Error reading input: {}", error)));
|
|
614
|
+
}
|
|
615
|
+
}
|
|
616
|
+
Ok(())
|
|
617
|
+
}
|