@sjcrh/proteinpaint-rust 2.171.0 → 2.175.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.171.0",
2
+ "version": "2.175.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
@@ -34,7 +34,7 @@ use std::io;
34
34
  use std::io::Read;
35
35
  use std::str::FromStr;
36
36
  // use std::time::Instant;
37
- use hdf5::types::{VarLenAscii, VarLenUnicode};
37
+ use hdf5::types::VarLenUnicode;
38
38
  use hdf5::{File, Result};
39
39
  use ndarray::Dim;
40
40
 
@@ -57,259 +57,6 @@ use ndarray::Dim;
57
57
  fn input_data_hdf5(
58
58
  filename: &String,
59
59
  sample_list: &Vec<&str>,
60
- ) -> Result<(Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>, Vec<String>)> {
61
- // let now = Instant::now();
62
- // eprintln!("Reading HDF5 file: {}", filename);
63
-
64
- // Open the HDF5 file
65
- let file = match File::open(filename) {
66
- Ok(f) => f,
67
- Err(err) => {
68
- // eprintln!("Failed to open HDF5 file: {}", err);
69
- // println!(
70
- // "{}",
71
- // serde_json::json!({
72
- // "status": "error",
73
- // "message": format!("Failed to open HDF5 file: {}", err),
74
- // "file_path": filename
75
- // })
76
- // );
77
- return Err(hdf5::Error::Internal(format!("Failed to open HDF5 file: {}", err)));
78
- }
79
- };
80
-
81
- // Read gene symbols dataset
82
- let genes_dataset = match file.dataset("gene_names") {
83
- Ok(ds) => ds,
84
- Err(err) => {
85
- // eprintln!("Failed to open gene_names dataset: {}", err);
86
- // println!(
87
- // "{}",
88
- // serde_json::json!({
89
- // "status": "error",
90
- // "message": format!("Failed to open gene_names dataset: {}", err),
91
- // "file_path": filename
92
- // })
93
- // );
94
- return Err(hdf5::Error::Internal(format!(
95
- "Failed to open gene_names dataset: {}",
96
- err
97
- )));
98
- }
99
- };
100
-
101
- // Read genes as VarLenAscii
102
- let genes_varlen = match genes_dataset.read_1d::<VarLenAscii>() {
103
- Ok(g) => g,
104
- Err(err) => {
105
- // eprintln!("Failed to read gene symbols: {}", err);
106
- // println!(
107
- // "{}",
108
- // serde_json::json!({
109
- // "status": "error",
110
- // "message": format!("Failed to read gene symbols: {}", err),
111
- // "file_path": filename
112
- // })
113
- // );
114
- return Err(hdf5::Error::Internal(format!("Failed to read gene symbols: {}", err)));
115
- }
116
- };
117
-
118
- // Convert to Vec<String> for easier handling
119
- let gene_names: Vec<String> = genes_varlen.iter().map(|g| g.to_string()).collect();
120
- let num_genes = gene_names.len();
121
- // eprintln!("Found {} gene symbols", num_genes);
122
-
123
- // Read sample names
124
- let samples_dataset = match file.dataset("samples") {
125
- Ok(ds) => ds,
126
- Err(err) => {
127
- // eprintln!("Failed to open samples dataset: {}", err);
128
- println!(
129
- "{}",
130
- serde_json::json!({
131
- "status": "error",
132
- "message": format!("Failed to open samples dataset: {}", err),
133
- "file_path": filename
134
- })
135
- );
136
- return Err(hdf5::Error::Internal(format!(
137
- "Failed to open samples dataset: {}",
138
- err
139
- )));
140
- }
141
- };
142
-
143
- // Read samples as VarLenAscii
144
- let samples_varlen = match samples_dataset.read_1d::<VarLenAscii>() {
145
- Ok(s) => s,
146
- Err(err) => {
147
- // eprintln!("Failed to read sample names: {}", err);
148
- println!(
149
- "{}",
150
- serde_json::json!({
151
- "status": "error",
152
- "message": format!("Failed to read sample names: {}", err),
153
- "file_path": filename
154
- })
155
- );
156
- return Err(hdf5::Error::Internal(format!("Failed to read sample names: {}", err)));
157
- }
158
- };
159
-
160
- // Convert to Vec<String> for easier handling
161
- let all_samples: Vec<String> = samples_varlen.iter().map(|s| s.to_string()).collect();
162
- // eprintln!("Found {} total samples", all_samples.len());
163
-
164
- // Find indices of requested samples
165
- let mut column_indices: Vec<usize> = Vec::with_capacity(sample_list.len());
166
- for sample in sample_list {
167
- if let Some(index) = all_samples.iter().position(|s| s == sample) {
168
- column_indices.push(index);
169
- } else {
170
- // eprintln!("Sample {} not found in the dataset", sample);
171
- // println!(
172
- // "{}",
173
- // serde_json::json!({
174
- // "status": "error",
175
- // "message": format!("Sample '{}' not found in the dataset", sample),
176
- // "file_path": filename,
177
- // "available_samples": all_samples
178
- // })
179
- // );
180
- return Err(hdf5::Error::Internal(format!(
181
- "Sample '{}' not found in the dataset",
182
- sample
183
- )));
184
- }
185
- }
186
-
187
- // Read the counts dataset
188
- let counts_dataset = match file.dataset("counts") {
189
- Ok(ds) => ds,
190
- Err(err) => {
191
- // eprintln!("Failed to open counts dataset: {}", err);
192
- // println!(
193
- // "{}",
194
- // serde_json::json!({
195
- // "status": "error",
196
- // "message": format!("Failed to open counts dataset: {}", err),
197
- // "file_path": filename
198
- // })
199
- // );
200
- return Err(hdf5::Error::Internal(format!("Failed to open counts dataset: {}", err)));
201
- }
202
- };
203
-
204
- // Get dataset dimensions for validation
205
- let dataset_shape = counts_dataset.shape();
206
- if dataset_shape.len() != 2 {
207
- // eprintln!("Counts dataset does not have the expected 2D shape");
208
- // println!(
209
- // "{}",
210
- // serde_json::json!({
211
- // "status": "error",
212
- // "message": "Expected a 2D dataset for counts",
213
- // "file_path": filename,
214
- // "actual_shape": dataset_shape
215
- // })
216
- // );
217
- return Err(hdf5::Error::Internal("Expected a 2D dataset for counts".to_string()));
218
- }
219
-
220
- // Check dimensions match expected values
221
- if dataset_shape[0] != num_genes {
222
- // eprintln!(
223
- // "Counts dataset first dimension ({}) doesn't match number of genes ({})",
224
- // dataset_shape[0], num_genes
225
- // );
226
- // println!(
227
- // "{}",
228
- // serde_json::json!({
229
- // "status": "error",
230
- // "message": format!("Counts dataset first dimension ({}) doesn't match number of genes ({})",
231
- // dataset_shape[0], num_genes),
232
- // "file_path": filename
233
- // })
234
- // );
235
- return Err(hdf5::Error::Internal(format!(
236
- "Counts dataset first dimension ({}) doesn't match number of genes ({})",
237
- dataset_shape[0], num_genes
238
- )));
239
- }
240
-
241
- if dataset_shape[1] != all_samples.len() {
242
- // eprintln!(
243
- // "Counts dataset second dimension ({}) doesn't match number of samples ({})",
244
- // dataset_shape[1],
245
- // all_samples.len()
246
- // );
247
- // println!(
248
- // "{}",
249
- // serde_json::json!({
250
- // "status": "error",
251
- // "message": format!("Counts dataset second dimension ({}) doesn't match number of samples ({})",
252
- // dataset_shape[1], all_samples.len()),
253
- // "file_path": filename
254
- // })
255
- // );
256
- return Err(hdf5::Error::Internal(format!(
257
- "Counts dataset second dimension ({}) doesn't match number of samples ({})",
258
- dataset_shape[1],
259
- all_samples.len()
260
- )));
261
- }
262
-
263
- // Read the counts dataset
264
- let all_counts = match counts_dataset.read::<f64, Dim<[usize; 2]>>() {
265
- Ok(data) => data,
266
- Err(err) => {
267
- // eprintln!("Failed to read expression data: {}", err);
268
- // println!(
269
- // "{}",
270
- // serde_json::json!({
271
- // "status": "error",
272
- // "message": format!("Failed to read expression data: {}", err),
273
- // "file_path": filename
274
- // })
275
- // );
276
- return Err(hdf5::Error::Internal(format!(
277
- "Failed to read expression data: {}",
278
- err
279
- )));
280
- }
281
- };
282
-
283
- // Extract only the columns corresponding to the requested samples
284
- // eprintln!(
285
- // "Extracting data for {} requested samples",
286
- // sample_list.len()
287
- // );
288
- let mut input_vector: Vec<f64> = Vec::with_capacity(num_genes * sample_list.len());
289
-
290
- for gene_idx in 0..num_genes {
291
- for &col_idx in &column_indices {
292
- input_vector.push(all_counts[[gene_idx, col_idx]]);
293
- }
294
- }
295
-
296
- // Create matrix from the extracted data
297
- let dm = DMatrix::from_row_slice(num_genes, sample_list.len(), &input_vector);
298
-
299
- // eprintln!("Time for reading HDF5 data: {:?}", now.elapsed());
300
- // eprintln!(
301
- // "Successfully extracted expression data matrix of size {}x{}",
302
- // dm.nrows(),
303
- // dm.ncols()
304
- // );
305
-
306
- Ok((dm, gene_names))
307
- }
308
-
309
- // Similar to input_data_hdf5, but specifically for new H5 format
310
- fn input_data_hdf5_newformat(
311
- filename: &String,
312
- sample_list: &Vec<&str>,
313
60
  ) -> Result<(Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>, Vec<String>)> {
314
61
  // Open the HDF5 file
315
62
  let file = match File::open(filename) {
@@ -724,12 +471,12 @@ fn main() {
724
471
  }
725
472
 
726
473
  // Determine if the H5 file is new format
727
- let new_format: bool = match &json_string {
728
- json::JsonValue::Object(ref obj) => {
729
- obj.get("newformat").and_then(|v| v.as_bool()).map_or(false, |b| b)
730
- }
731
- _ => false,
732
- };
474
+ //let new_format: bool = match &json_string {
475
+ // json::JsonValue::Object(ref obj) => {
476
+ // obj.get("newformat").and_then(|v| v.as_bool()).map_or(false, |b| b)
477
+ // }
478
+ // _ => false,
479
+ //};
733
480
 
734
481
  let rank_type = &json_string["rank_type"] // Value provide must be either "var" or "iqr"
735
482
  .to_owned()
@@ -817,25 +564,11 @@ fn main() {
817
564
  // eprintln!("Reading data from {} file: {}", file_type, file_name);
818
565
  let (input_matrix, gene_names) = if file_type == "hdf5" {
819
566
  // eprintln!("Using HDF5 reader function...");
820
- if new_format {
821
- match input_data_hdf5_newformat(&file_name, &samples_list) {
822
- Ok(result) => result,
823
- Err(err) => {
824
- eprintln!("ERROR in HDF5 new format reader: {:?}", err);
825
- return;
826
- }
827
- }
828
- } else {
829
- match input_data_hdf5(&file_name, &samples_list) {
830
- Ok(result) => {
831
- // eprintln!("Successfully read HDF5 data");
832
- result
833
- }
834
- Err(err) => {
835
- eprintln!("ERROR in HDF5 reader: {:?}", err);
836
- // Error has already been printed to stdout in JSON format by the function
837
- return;
838
- }
567
+ match input_data_hdf5(&file_name, &samples_list) {
568
+ Ok(result) => result,
569
+ Err(err) => {
570
+ eprintln!("ERROR in HDF5 reader: {:?}", err);
571
+ return;
839
572
  }
840
573
  }
841
574
  } else {