@sjcrh/proteinpaint-rust 2.111.0 → 2.114.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +4 -0
- package/index.js +142 -32
- package/package.json +2 -2
- package/src/DEanalysis.rs +4 -4
- package/src/gdcmaf.rs +130 -46
- package/src/readHDF5.rs +561 -130
- package/src/validateHDF5.rs +222 -0
|
@@ -0,0 +1,222 @@
|
|
|
1
|
+
// syntax:
|
|
2
|
+
// echo '{"hdf5_file":"/path/to/my/local/file.h5"}' | ./target/release/validateHDF5
|
|
3
|
+
|
|
4
|
+
use hdf5::types::VarLenAscii;
|
|
5
|
+
use hdf5::{File, Result};
|
|
6
|
+
use ndarray::Array1;
|
|
7
|
+
use ndarray::Dim;
|
|
8
|
+
use std::io;
|
|
9
|
+
use serde_json;
|
|
10
|
+
|
|
11
|
+
/// Detects the format of the HDF5 file
|
|
12
|
+
pub fn detect_hdf5_format(hdf5_filename: &str) -> Result<&'static str> {
|
|
13
|
+
let file = File::open(hdf5_filename)?;
|
|
14
|
+
|
|
15
|
+
// Check for dense format (has counts, gene_names, and samples datasets)
|
|
16
|
+
let has_counts = file.dataset("counts").is_ok();
|
|
17
|
+
let has_gene_names = file.dataset("gene_names").is_ok();
|
|
18
|
+
let has_samples = file.dataset("samples").is_ok();
|
|
19
|
+
|
|
20
|
+
// Check for sparse matrix format (has data group and sample_names)
|
|
21
|
+
let has_data_group = file.group("data").is_ok();
|
|
22
|
+
let has_sample_names = file.dataset("sample_names").is_ok();
|
|
23
|
+
|
|
24
|
+
if has_counts && has_gene_names && has_samples {
|
|
25
|
+
// eprintln!("Dense format detected");
|
|
26
|
+
Ok("dense")
|
|
27
|
+
} else if has_data_group && has_sample_names {
|
|
28
|
+
// eprintln!("Sparse format detected");
|
|
29
|
+
Ok("sparse")
|
|
30
|
+
} else {
|
|
31
|
+
// eprintln!("Unknown format detected");
|
|
32
|
+
Ok("unknown")
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/// Validates and loads the HDF5 file
|
|
37
|
+
pub fn validate_hdf5_file(hdf5_filename: String) -> Result<()> {
|
|
38
|
+
// Open the HDF5 file
|
|
39
|
+
let file = File::open(&hdf5_filename)?;
|
|
40
|
+
|
|
41
|
+
// Detect file format
|
|
42
|
+
let file_format = detect_hdf5_format(&hdf5_filename)?;
|
|
43
|
+
|
|
44
|
+
// Get basic information about the file depending on format
|
|
45
|
+
let output = match file_format {
|
|
46
|
+
"dense" => {
|
|
47
|
+
// For dense format, get dimensions from the counts dataset
|
|
48
|
+
let ds_counts = file.dataset("counts")?;
|
|
49
|
+
let data_shape = ds_counts.shape();
|
|
50
|
+
|
|
51
|
+
// Read sample names using VarLenAscii
|
|
52
|
+
let mut sample_names: Vec<String> = Vec::new();
|
|
53
|
+
if let Ok(ds_samples) = file.dataset("samples") {
|
|
54
|
+
if let Ok(samples) = ds_samples.read_1d::<VarLenAscii>() {
|
|
55
|
+
for sample in samples.iter() {
|
|
56
|
+
sample_names.push(sample.to_string());
|
|
57
|
+
}
|
|
58
|
+
} else {
|
|
59
|
+
eprintln!("Error reading samples as VarLenAscii");
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
|
|
63
|
+
// Read gene names using VarLenAscii
|
|
64
|
+
let mut gene_names: Vec<String> = Vec::new();
|
|
65
|
+
if let Ok(ds_genes) = file.dataset("gene_ids") {
|
|
66
|
+
if let Ok(genes) = ds_genes.read_1d::<VarLenAscii>() {
|
|
67
|
+
for gene in genes.iter() {
|
|
68
|
+
gene_names.push(gene.to_string());
|
|
69
|
+
}
|
|
70
|
+
} else {
|
|
71
|
+
eprintln!("Error reading gene_ids as VarLenAscii");
|
|
72
|
+
}
|
|
73
|
+
} else {
|
|
74
|
+
eprintln!("Could not find 'gene_ids' dataset");
|
|
75
|
+
}
|
|
76
|
+
|
|
77
|
+
// Create JSON with both sample names and gene names
|
|
78
|
+
serde_json::json!({
|
|
79
|
+
"status": "success",
|
|
80
|
+
"message": "HDF5 file loaded successfully",
|
|
81
|
+
"file_path": hdf5_filename,
|
|
82
|
+
"format": "dense",
|
|
83
|
+
"sampleNames": sample_names,
|
|
84
|
+
"matrix_dimensions": {
|
|
85
|
+
"num_genes": data_shape[0],
|
|
86
|
+
"num_samples": data_shape[1]
|
|
87
|
+
}
|
|
88
|
+
})
|
|
89
|
+
}
|
|
90
|
+
"sparse" => {
|
|
91
|
+
// For sparse format, get dimensions from the data/dim dataset
|
|
92
|
+
let ds_dim = file.dataset("data/dim")?;
|
|
93
|
+
let data_dim: Array1<usize> = ds_dim.read::<usize, Dim<[usize; 1]>>()?;
|
|
94
|
+
let num_samples = data_dim[0];
|
|
95
|
+
let num_genes = data_dim[1];
|
|
96
|
+
|
|
97
|
+
// Read sample names using VarLenAscii
|
|
98
|
+
let mut sample_names: Vec<String> = Vec::new();
|
|
99
|
+
if let Ok(ds_samples) = file.dataset("sample_names") {
|
|
100
|
+
if let Ok(samples) = ds_samples.read_1d::<VarLenAscii>() {
|
|
101
|
+
for sample in samples.iter() {
|
|
102
|
+
sample_names.push(sample.to_string());
|
|
103
|
+
}
|
|
104
|
+
} else {
|
|
105
|
+
eprintln!("Error reading sample_names as VarLenAscii");
|
|
106
|
+
}
|
|
107
|
+
}
|
|
108
|
+
|
|
109
|
+
// Read gene names using VarLenAscii
|
|
110
|
+
let mut gene_names: Vec<String> = Vec::new();
|
|
111
|
+
if let Ok(ds_genes) = file.dataset("gene_names") {
|
|
112
|
+
if let Ok(genes) = ds_genes.read_1d::<VarLenAscii>() {
|
|
113
|
+
for gene in genes.iter() {
|
|
114
|
+
gene_names.push(gene.to_string());
|
|
115
|
+
}
|
|
116
|
+
} else {
|
|
117
|
+
eprintln!("Error reading gene_names as VarLenAscii");
|
|
118
|
+
}
|
|
119
|
+
} else {
|
|
120
|
+
eprintln!("Could not find 'gene_names' dataset, trying alternatives");
|
|
121
|
+
}
|
|
122
|
+
|
|
123
|
+
// Create JSON with the same structure as dense format
|
|
124
|
+
serde_json::json!({
|
|
125
|
+
"status": "success",
|
|
126
|
+
"message": "HDF5 file loaded successfully",
|
|
127
|
+
"file_path": hdf5_filename,
|
|
128
|
+
"format": "sparse",
|
|
129
|
+
"sampleNames": sample_names,
|
|
130
|
+
"matrix_dimensions": {
|
|
131
|
+
"num_genes": num_genes,
|
|
132
|
+
"num_samples": num_samples
|
|
133
|
+
}
|
|
134
|
+
})
|
|
135
|
+
}
|
|
136
|
+
_ => {
|
|
137
|
+
// For unknown format
|
|
138
|
+
serde_json::json!({
|
|
139
|
+
"status": "failure",
|
|
140
|
+
"message": "Unknown file format cannot be loaded successfully",
|
|
141
|
+
"file_path": hdf5_filename,
|
|
142
|
+
"format": "unknown",
|
|
143
|
+
"sampleNames": [],
|
|
144
|
+
"geneNames": []
|
|
145
|
+
})
|
|
146
|
+
}
|
|
147
|
+
};
|
|
148
|
+
|
|
149
|
+
// Print the output
|
|
150
|
+
println!("{}", output);
|
|
151
|
+
|
|
152
|
+
Ok(())
|
|
153
|
+
}
|
|
154
|
+
|
|
155
|
+
/// Main function to handle the validation process
|
|
156
|
+
fn main() -> Result<()> {
|
|
157
|
+
let mut input = String::new();
|
|
158
|
+
match io::stdin().read_line(&mut input) {
|
|
159
|
+
Ok(_bytes_read) => {
|
|
160
|
+
let input_json = json::parse(&input);
|
|
161
|
+
match input_json {
|
|
162
|
+
Ok(json_string) => {
|
|
163
|
+
// Extract HDF5 filename
|
|
164
|
+
let hdf5_filename = match json_string["hdf5_file"].as_str() {
|
|
165
|
+
Some(x) => x.to_string(),
|
|
166
|
+
None => {
|
|
167
|
+
eprintln!("HDF5 filename not provided");
|
|
168
|
+
println!(
|
|
169
|
+
"{}",
|
|
170
|
+
serde_json::json!({
|
|
171
|
+
"status": "error",
|
|
172
|
+
"message": "HDF5 filename not provided"
|
|
173
|
+
})
|
|
174
|
+
);
|
|
175
|
+
return Ok(());
|
|
176
|
+
}
|
|
177
|
+
};
|
|
178
|
+
|
|
179
|
+
// Log the start of validation
|
|
180
|
+
// let start_time = Instant::now();
|
|
181
|
+
// eprintln!("Starting validation of file: {}", hdf5_filename);
|
|
182
|
+
|
|
183
|
+
// Run the validation
|
|
184
|
+
if let Err(err) = validate_hdf5_file(hdf5_filename.clone()) {
|
|
185
|
+
eprintln!("Error validating HDF5 file: {:?}", err);
|
|
186
|
+
println!(
|
|
187
|
+
"{}",
|
|
188
|
+
serde_json::json!({
|
|
189
|
+
"status": "error",
|
|
190
|
+
"message": format!("Error validating HDF5 file: {}", err)
|
|
191
|
+
})
|
|
192
|
+
);
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
// Log completion time
|
|
196
|
+
// eprintln!("Validation completed in: {:?}", start_time.elapsed());
|
|
197
|
+
}
|
|
198
|
+
Err(error) => {
|
|
199
|
+
eprintln!("Incorrect JSON: {}", error);
|
|
200
|
+
println!(
|
|
201
|
+
"{}",
|
|
202
|
+
serde_json::json!({
|
|
203
|
+
"status": "error",
|
|
204
|
+
"message": format!("Invalid JSON input: {}", error)
|
|
205
|
+
})
|
|
206
|
+
);
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
Err(error) => {
|
|
211
|
+
eprintln!("Piping error: {}", error);
|
|
212
|
+
println!(
|
|
213
|
+
"{}",
|
|
214
|
+
serde_json::json!({
|
|
215
|
+
"status": "error",
|
|
216
|
+
"message": format!("Error reading input: {}", error)
|
|
217
|
+
})
|
|
218
|
+
);
|
|
219
|
+
}
|
|
220
|
+
}
|
|
221
|
+
Ok(())
|
|
222
|
+
}
|