@sjcrh/proteinpaint-rust 2.180.1-0 → 2.181.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -22,6 +22,7 @@ plotters = "0.3.4"
22
22
  tiny-skia = "0.11"
23
23
  colorgrad = "0.6.2"
24
24
  statrs = "^0.16.0"
25
+ libc = "0.2"
25
26
  fishers_exact="^1.0.1"
26
27
  bio = "1.5.0"
27
28
  bigtools = "^0.1.11"
@@ -133,4 +134,8 @@ path="src/query_classification.rs"
133
134
 
134
135
  [[bin]]
135
136
  name="summary_agent"
136
- path="src/summary_agent.rs"
137
+ path="src/summary_agent.rs"
138
+
139
+ [[bin]]
140
+ name="dmrcate"
141
+ path="src/dmrcate.rs"
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.180.1-0",
2
+ "version": "2.181.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
package/src/dmrcate.rs ADDED
@@ -0,0 +1,996 @@
1
+ // dmrcate.rs — Genome-wide empirical Bayes moderated t-test + DMRCate kernel smoothing
2
+ // Smyth 2004 (limma), Peters et al. 2015 (DMRCate), Phipson et al. 2016 (robust eBayes)
3
+ //
4
+ // Reads probe-level beta values from HDF5, runs chromosome-chunked OLS → genome-wide
5
+ // eBayes → regional kernel smoothing → DMR segmentation with proximity fallback.
6
+ // Usage: echo '{"probe_h5_file":"beta.h5","chr":"chr14","start":100000,"stop":105000,
7
+ // "case":"s1,s2","control":"s3,s4"}' | target/release/dmrcate
8
+
9
+ use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
10
+ use hdf5::File;
11
+ use hdf5::types::VarLenUnicode;
12
+ use serde_json::{Value, json};
13
+ use statrs::distribution::{ContinuousCDF, StudentsT};
14
+ use statrs::function::gamma::{digamma, gamma_ur, ln_gamma};
15
+ use std::collections::HashMap;
16
+ use std::io;
17
+ use std::time::Instant;
18
+ use tiny_skia::{FillRule, Paint, PathBuilder, Pixmap, Stroke, StrokeDash, Transform};
19
+
20
+ fn get_rss_mb() -> f64 {
21
+ unsafe {
22
+ let mut usage: libc::rusage = std::mem::zeroed();
23
+ libc::getrusage(libc::RUSAGE_SELF, &mut usage);
24
+ #[cfg(target_os = "macos")]
25
+ {
26
+ usage.ru_maxrss as f64 / 1_048_576.0
27
+ } // bytes → MB
28
+ #[cfg(not(target_os = "macos"))]
29
+ {
30
+ usage.ru_maxrss as f64 / 1024.0
31
+ } // KB → MB
32
+ }
33
+ }
34
+
35
+ fn trigamma(mut x: f64) -> f64 {
36
+ if x <= 0.0 {
37
+ return f64::NAN;
38
+ }
39
+ let mut r = 0.0;
40
+ while x < 6.0 {
41
+ r += 1.0 / (x * x);
42
+ x += 1.0;
43
+ }
44
+ let x2 = x * x;
45
+ r + 1.0 / x + 1.0 / (2.0 * x2) + 1.0 / (6.0 * x2 * x) - 1.0 / (30.0 * x2 * x2 * x) + 1.0 / (42.0 * x2 * x2 * x2 * x)
46
+ }
47
+
48
+ fn trigamma_deriv(mut x: f64) -> f64 {
49
+ let mut r = 0.0;
50
+ while x < 6.0 {
51
+ r -= 2.0 / (x * x * x);
52
+ x += 1.0;
53
+ }
54
+ let x2 = x * x;
55
+ r - 1.0 / x2 - 1.0 / (x2 * x) - 1.0 / (2.0 * x2 * x2) + 1.0 / (6.0 * x2 * x2 * x2)
56
+ }
57
+
58
+ fn trigamma_inverse(x: f64) -> f64 {
59
+ if x.is_nan() || x <= 0.0 {
60
+ return f64::NAN;
61
+ }
62
+ let mut y = if x > 1e-6 { 1.0 / x.sqrt() } else { 1.0 / x };
63
+ for _ in 0..8 {
64
+ let delta = (trigamma(y) - x) / trigamma_deriv(y);
65
+ y -= delta;
66
+ if y <= 0.0 {
67
+ y = 0.5 * (y + delta);
68
+ }
69
+ if delta.abs() < 1e-12 * y.abs() {
70
+ break;
71
+ }
72
+ }
73
+ y
74
+ }
75
+
76
+ fn bh_adjust(pvalues: &[f64]) -> Vec<f64> {
77
+ let n = pvalues.len();
78
+ if n == 0 {
79
+ return vec![];
80
+ }
81
+ let mut idx: Vec<usize> = (0..n).collect();
82
+ idx.sort_by(|&a, &b| pvalues[b].partial_cmp(&pvalues[a]).unwrap_or(std::cmp::Ordering::Equal));
83
+ let mut adj = vec![0.0; n];
84
+ let mut cummin = f64::INFINITY;
85
+ for (rank_from_end, &i) in idx.iter().enumerate() {
86
+ cummin = cummin.min(pvalues[i] * n as f64 / (n - rank_from_end) as f64);
87
+ adj[i] = cummin.min(1.0);
88
+ }
89
+ adj
90
+ }
91
+
92
+ struct ProbeStats {
93
+ chr: String,
94
+ start: i64,
95
+ probe_id: String,
96
+ log_fc: f64,
97
+ residual_var: f64,
98
+ df_residual: f64,
99
+ stdev_unscaled: f64,
100
+ }
101
+
102
+ fn read_h5_metadata(file: &File) -> Result<(Vec<String>, Vec<usize>, Vec<String>, Vec<i64>, Vec<String>), String> {
103
+ let samples: Vec<String> = file
104
+ .dataset("meta/samples/names")
105
+ .map_err(|e| e.to_string())?
106
+ .read_1d::<VarLenUnicode>()
107
+ .map_err(|e| e.to_string())?
108
+ .iter()
109
+ .map(|s| s.to_string())
110
+ .collect();
111
+ let starts: Vec<i64> = file
112
+ .dataset("meta/start")
113
+ .map_err(|e| e.to_string())?
114
+ .read_1d::<i64>()
115
+ .map_err(|e| e.to_string())?
116
+ .to_vec();
117
+ let probes: Vec<String> = file
118
+ .dataset("meta/probe/probeID")
119
+ .map_err(|e| e.to_string())?
120
+ .read_1d::<VarLenUnicode>()
121
+ .map_err(|e| e.to_string())?
122
+ .iter()
123
+ .map(|s| s.to_string())
124
+ .collect();
125
+ let root = file.group("/").map_err(|e| e.to_string())?;
126
+ let cl_json: String = root
127
+ .attr("chrom_lengths")
128
+ .map_err(|e| e.to_string())?
129
+ .read_scalar::<VarLenUnicode>()
130
+ .map_err(|e| e.to_string())?
131
+ .to_string();
132
+ // json crate preserves key order; serde_json::Map sorts alphabetically (wrong for chromosomes)
133
+ let cl_parsed = json::parse(&cl_json).map_err(|e| format!("Failed to parse chrom_lengths: {}", e))?;
134
+ let mut names = Vec::new();
135
+ let mut lens = Vec::new();
136
+ for (k, v) in cl_parsed.entries() {
137
+ names.push(k.to_string());
138
+ lens.push(v.as_u64().unwrap_or(0) as usize);
139
+ }
140
+ Ok((names, lens, samples, starts, probes))
141
+ }
142
+
143
+ fn process_chromosome(
144
+ file: &File,
145
+ row_start: usize,
146
+ row_end: usize,
147
+ case_idx: &[usize],
148
+ ctrl_idx: &[usize],
149
+ chr: &str,
150
+ starts: &[i64],
151
+ probe_ids: &[String],
152
+ min_spg: usize,
153
+ ) -> Result<Vec<ProbeStats>, String> {
154
+ let n_probes = row_end - row_start;
155
+ if n_probes == 0 {
156
+ return Ok(vec![]);
157
+ }
158
+ let ds = file.dataset("beta/values").map_err(|e| format!("beta/values: {}", e))?;
159
+ let mut results = Vec::with_capacity(n_probes);
160
+ const CHUNK: usize = 1000;
161
+ for chunk_i in 0..((n_probes + CHUNK - 1) / CHUNK) {
162
+ let cs = chunk_i * CHUNK;
163
+ let ce = std::cmp::min(cs + CHUNK, n_probes);
164
+ let sel = hdf5::Selection::from((row_start + cs..row_start + ce, ..));
165
+ let data = ds
166
+ .read_slice_2d::<f32, _>(sel)
167
+ .map_err(|e| format!("HDF5 read: {}", e))?;
168
+ for lp in 0..(ce - cs) {
169
+ let row = data.row(lp);
170
+ let (mut cv, mut kv) = (Vec::with_capacity(case_idx.len()), Vec::with_capacity(ctrl_idx.len()));
171
+ for &si in case_idx {
172
+ if si < row.len() {
173
+ let v = row[si] as f64;
174
+ if v.is_finite() {
175
+ cv.push(v);
176
+ }
177
+ }
178
+ }
179
+ for &si in ctrl_idx {
180
+ if si < row.len() {
181
+ let v = row[si] as f64;
182
+ if v.is_finite() {
183
+ kv.push(v);
184
+ }
185
+ }
186
+ }
187
+ if cv.len() < min_spg || kv.len() < min_spg {
188
+ continue;
189
+ }
190
+ let to_m = |b: f64| {
191
+ let c = b.clamp(0.001, 0.999);
192
+ (c / (1.0 - c)).log2()
193
+ };
194
+ let cm: Vec<f64> = cv.iter().map(|&b| to_m(b)).collect();
195
+ let km: Vec<f64> = kv.iter().map(|&b| to_m(b)).collect();
196
+ let all: Vec<f64> = cm.iter().chain(km.iter()).copied().collect();
197
+ let mean_all = all.iter().sum::<f64>() / all.len() as f64;
198
+ let var = all.iter().map(|&x| (x - mean_all).powi(2)).sum::<f64>() / (all.len() as f64 - 1.0);
199
+ if var <= 0.0 || !var.is_finite() {
200
+ continue;
201
+ }
202
+ let (n1, n2) = (cm.len() as f64, km.len() as f64);
203
+ let (mc, mk) = (cm.iter().sum::<f64>() / n1, km.iter().sum::<f64>() / n2);
204
+ let ss: f64 =
205
+ cm.iter().map(|&x| (x - mc).powi(2)).sum::<f64>() + km.iter().map(|&x| (x - mk).powi(2)).sum::<f64>();
206
+ let df = n1 + n2 - 2.0;
207
+ let rv = ss / df;
208
+ if !rv.is_finite() || rv <= 0.0 {
209
+ continue;
210
+ }
211
+ let su = (1.0 / n1 + 1.0 / n2).sqrt();
212
+ let idx = row_start + cs + lp;
213
+ results.push(ProbeStats {
214
+ chr: chr.to_string(),
215
+ start: starts[idx],
216
+ probe_id: probe_ids[idx].clone(),
217
+ log_fc: mc - mk,
218
+ residual_var: rv,
219
+ df_residual: df,
220
+ stdev_unscaled: su,
221
+ });
222
+ }
223
+ }
224
+ Ok(results)
225
+ }
226
+
227
+ fn fit_f_dist(vars: &[f64], dfs: &[f64]) -> (f64, f64) {
228
+ if vars.len() < 3 {
229
+ return (1.0, 0.0);
230
+ }
231
+ // Match R's fitFDist pre-processing:
232
+ // 1. Filter to ok probes (finite df > 1e-15, finite var > -1e-15)
233
+ // 2. Clamp var to max(var, 0), then floor at 1e-5 * median(var)
234
+ let ok: Vec<usize> = (0..vars.len())
235
+ .filter(|&i| dfs[i].is_finite() && dfs[i] > 1e-15 && vars[i].is_finite() && vars[i] > -1e-15)
236
+ .collect();
237
+ if ok.len() < 3 {
238
+ return (1.0, 0.0);
239
+ }
240
+ let mut xv: Vec<f64> = ok.iter().map(|&i| vars[i].max(0.0)).collect();
241
+ let xdf: Vec<f64> = ok.iter().map(|&i| dfs[i]).collect();
242
+ // Median of variances
243
+ let mut sorted_v = xv.clone();
244
+ sorted_v.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
245
+ let median_v = if sorted_v.len() % 2 == 0 {
246
+ (sorted_v[sorted_v.len() / 2 - 1] + sorted_v[sorted_v.len() / 2]) / 2.0
247
+ } else {
248
+ sorted_v[sorted_v.len() / 2]
249
+ };
250
+ if median_v == 0.0 {
251
+ return (1.0, f64::INFINITY);
252
+ }
253
+ // Floor small variances at 1e-5 * median (matches R's fitFDist)
254
+ let floor = 1e-5 * median_v;
255
+ for v in &mut xv {
256
+ if *v < floor {
257
+ *v = floor;
258
+ }
259
+ }
260
+ let n = xv.len() as f64;
261
+ // e = log(var) + logmdigamma(df/2) where logmdigamma(a) = log(a) - digamma(a)
262
+ let e: Vec<f64> = xv
263
+ .iter()
264
+ .zip(xdf.iter())
265
+ .map(|(&v, &d)| v.ln() + (d / 2.0).ln() - digamma(d / 2.0))
266
+ .collect();
267
+ let me = e.iter().sum::<f64>() / n;
268
+ let ve = e.iter().map(|&ei| (ei - me).powi(2)).sum::<f64>() / (n - 1.0);
269
+ let mean_tri: f64 = xdf.iter().map(|&d| trigamma(d / 2.0)).sum::<f64>() / n;
270
+ let target = ve - mean_tri;
271
+ let df0 = if target > 0.0 {
272
+ 2.0 * trigamma_inverse(target)
273
+ } else {
274
+ f64::INFINITY
275
+ };
276
+ let s20 = if df0.is_finite() {
277
+ (me - (df0 / 2.0).ln() + digamma(df0 / 2.0)).exp()
278
+ } else {
279
+ xv.iter().sum::<f64>() / n
280
+ };
281
+ (s20, df0)
282
+ }
283
+
284
+ /// Log of the upper regularized incomplete gamma function Q(a, x).
285
+ /// Uses the continued fraction representation (Numerical Recipes / TOMS 708),
286
+ /// evaluated via modified Lentz's method. Returns the result in log space
287
+ /// so it never underflows, even for Q values as small as exp(-1e6).
288
+ /// This matches R's pgamma(x, a, lower.tail=FALSE, log.p=TRUE).
289
+ fn log_gamma_upper_cf(a: f64, x: f64) -> f64 {
290
+ // Q(a, x) = exp(-x + a*ln(x) - lgamma(a)) * h
291
+ // where h is the continued fraction. h is O(1/x) and well-behaved.
292
+ let eps = 3e-14;
293
+ let tiny = 1e-300;
294
+
295
+ let mut b = x + 1.0 - a;
296
+ let mut c = 1.0 / tiny;
297
+ let mut d = 1.0 / b;
298
+ let mut h = d;
299
+
300
+ for i in 1..=300 {
301
+ let an = -(i as f64) * (i as f64 - a);
302
+ b += 2.0;
303
+ d = an * d + b;
304
+ if d.abs() < tiny {
305
+ d = tiny;
306
+ }
307
+ c = b + an / c;
308
+ if c.abs() < tiny {
309
+ c = tiny;
310
+ }
311
+ d = 1.0 / d;
312
+ let del = d * c;
313
+ h *= del;
314
+ if (del - 1.0).abs() < eps {
315
+ break;
316
+ }
317
+ }
318
+
319
+ -x + a * x.ln() - ln_gamma(a) + h.ln()
320
+ }
321
+
322
+ /// Log chi-squared survival function: returns log P(X > x) for X ~ chi^2(df).
323
+ /// Uses statrs gamma_ur for moderate tails, continued fraction in log space
324
+ /// for extreme tails. Matches R's pchisq(x, df, lower.tail=FALSE, log.p=TRUE).
325
+ fn log_chisq_sf(x: f64, df: f64) -> f64 {
326
+ if x <= 0.0 || !x.is_finite() {
327
+ return 0.0; // log(1) = 0
328
+ }
329
+ let a = df / 2.0;
330
+ let z = x / 2.0;
331
+ // For moderate tails, use statrs (accurate and fast)
332
+ let sf = gamma_ur(a, z);
333
+ if sf > 1e-300 {
334
+ return sf.ln();
335
+ }
336
+ // For extreme tails, use continued fraction in log space
337
+ log_gamma_upper_cf(a, z)
338
+ }
339
+
340
+ /// Kernel smoothing returning LOG p-values (not p-values) to avoid underflow.
341
+ fn kernel_smooth_log(pos: &[i64], t: &[f64], lambda: f64, c: f64) -> Vec<f64> {
342
+ let sigma = lambda / c;
343
+ let max_d = (5.0 * sigma) as i64;
344
+ let two_s2 = 2.0 * sigma * sigma;
345
+ let (n, mut l, mut r) = (pos.len(), 0usize, 0usize);
346
+ let mut out = Vec::with_capacity(n);
347
+ for i in 0..n {
348
+ while r < n && (pos[r] - pos[i]).abs() <= max_d {
349
+ r += 1;
350
+ }
351
+ while l < n && pos[i] - pos[l] > max_d {
352
+ l += 1;
353
+ }
354
+ let (mut sky, mut sk, mut skk) = (0.0, 0.0, 0.0);
355
+ for j in l..r {
356
+ let dx = (pos[i] - pos[j]) as f64;
357
+ let w = (-dx * dx / two_s2).exp();
358
+ sky += w * t[j] * t[j];
359
+ sk += w;
360
+ skk += w * w;
361
+ }
362
+ let log_p = if sk > 0.0 && skk > 0.0 {
363
+ let (exp, var) = (sk, 2.0 * skk);
364
+ let (b, a) = (2.0 * exp * exp / var, var / (2.0 * exp));
365
+ if b > 0.0 && a > 0.0 {
366
+ log_chisq_sf(sky / a, b)
367
+ } else {
368
+ 0.0
369
+ }
370
+ } else {
371
+ 0.0
372
+ };
373
+ out.push(log_p);
374
+ }
375
+ out
376
+ }
377
+
378
+ /// BH adjustment on log-scale p-values. Returns log-scale adjusted p-values.
379
+ /// adj_log_p[i] = cummin(log_p[i] + ln(n) - ln(rank)) capped at 0 (= log(1))
380
+ fn bh_adjust_log(log_pvalues: &[f64]) -> Vec<f64> {
381
+ let n = log_pvalues.len();
382
+ if n == 0 {
383
+ return vec![];
384
+ }
385
+ let ln_n = (n as f64).ln();
386
+ let mut idx: Vec<usize> = (0..n).collect();
387
+ // Sort descending (largest log p first = least significant)
388
+ idx.sort_by(|&a, &b| {
389
+ log_pvalues[b]
390
+ .partial_cmp(&log_pvalues[a])
391
+ .unwrap_or(std::cmp::Ordering::Equal)
392
+ });
393
+ let mut adj = vec![0.0f64; n];
394
+ let mut cummin = 0.0f64; // log(1) = 0
395
+ for (rank_from_end, &i) in idx.iter().enumerate() {
396
+ let rank = n - rank_from_end; // 1-based rank from smallest
397
+ let v = log_pvalues[i] + ln_n - (rank as f64).ln();
398
+ cummin = cummin.min(v);
399
+ adj[i] = cummin.min(0.0); // cap at log(1) = 0
400
+ }
401
+ adj
402
+ }
403
+
404
+ fn build_dmrs(
405
+ chr: &str,
406
+ pos: &[i64],
407
+ fdr: &[f64],
408
+ lfc: &[f64],
409
+ mg1: &[f64],
410
+ mg2: &[f64],
411
+ cutoff: f64,
412
+ lambda: f64,
413
+ min_cpgs: usize,
414
+ min_db: Option<f64>,
415
+ check_direction: bool,
416
+ ) -> Vec<Value> {
417
+ let n = pos.len();
418
+ let sig: Vec<usize> = (0..n)
419
+ .filter(|&i| {
420
+ if fdr[i] > cutoff {
421
+ return false;
422
+ }
423
+ if let Some(db) = min_db {
424
+ (mg2[i] - mg1[i]).abs() >= db
425
+ } else {
426
+ true
427
+ }
428
+ })
429
+ .collect();
430
+ if sig.len() < min_cpgs {
431
+ return vec![];
432
+ }
433
+ let mut groups: Vec<Vec<usize>> = Vec::new();
434
+ let mut grp = vec![sig[0]];
435
+ for k in 1..sig.len() {
436
+ let (p, c) = (*grp.last().unwrap(), sig[k]);
437
+ let same_dir = !check_direction || (lfc[c] >= 0.0) == (lfc[p] >= 0.0);
438
+ if same_dir && (pos[c] - pos[p]) <= lambda as i64 {
439
+ grp.push(c);
440
+ } else {
441
+ groups.push(grp);
442
+ grp = vec![c];
443
+ }
444
+ }
445
+ groups.push(grp);
446
+ groups
447
+ .iter()
448
+ .filter(|g| g.len() >= min_cpgs)
449
+ .map(|g| {
450
+ let deltas: Vec<f64> = g.iter().map(|&j| mg2[j] - mg1[j]).collect();
451
+ let fdrs: Vec<f64> = g.iter().map(|&j| fdr[j]).collect();
452
+ let md = deltas.iter().sum::<f64>() / deltas.len() as f64;
453
+ let mxd = if md >= 0.0 {
454
+ deltas.iter().cloned().fold(f64::NEG_INFINITY, f64::max)
455
+ } else {
456
+ deltas.iter().cloned().fold(f64::INFINITY, f64::min)
457
+ };
458
+ json!({ "chr": chr, "start": pos[*g.first().unwrap()], "stop": pos[*g.last().unwrap()],
459
+ "no_cpgs": g.len(), "min_smoothed_fdr": fdrs.iter().cloned().fold(f64::INFINITY, f64::min),
460
+ "HMFDR": fdrs.len() as f64 / fdrs.iter().map(|&f| 1.0/f.max(1e-300)).sum::<f64>(),
461
+ "maxdiff": mxd, "meandiff": md, "direction": if md >= 0.0 {"hyper"} else {"hypo"},
462
+ "overlapping_genes": null })
463
+ })
464
+ .collect()
465
+ }
466
+
467
+ macro_rules! bail { ($($t:tt)*) => { { println!("{}", json!({"error": format!($($t)*)})); return; } } }
468
+
469
+ /// LOESS (locally weighted scatterplot smoothing) with tricube weights and local linear fit.
470
+ /// Returns (fitted, ci_lower, ci_upper) evaluated at `eval_at` positions, clamped to [0,1].
471
+ fn loess_fit(pos: &[i64], vals: &[f64], eval_at: &[f64], span: f64) -> Option<(Vec<f64>, Vec<f64>, Vec<f64>)> {
472
+ // Collect valid (x, y) pairs (skip NaN)
473
+ let mut xs = Vec::new();
474
+ let mut ys = Vec::new();
475
+ for i in 0..pos.len() {
476
+ if vals[i].is_finite() {
477
+ xs.push(pos[i] as f64);
478
+ ys.push(vals[i]);
479
+ }
480
+ }
481
+ let n = xs.len();
482
+ if n < 4 {
483
+ return None;
484
+ }
485
+
486
+ let k = ((span * n as f64).ceil() as usize).max(3).min(n);
487
+
488
+ let mut fitted = Vec::with_capacity(eval_at.len());
489
+ let mut ci_lo = Vec::with_capacity(eval_at.len());
490
+ let mut ci_hi = Vec::with_capacity(eval_at.len());
491
+
492
+ for &x0 in eval_at {
493
+ // Find k nearest neighbors by distance
494
+ let mut dists: Vec<(usize, f64)> = xs.iter().enumerate().map(|(i, &xi)| (i, (xi - x0).abs())).collect();
495
+ dists.sort_by(|a, b| a.1.partial_cmp(&b.1).unwrap());
496
+ let h = dists[k - 1].1.max(1.0); // bandwidth = distance to k-th nearest
497
+
498
+ // Tricube weights
499
+ let mut w = vec![0.0; n];
500
+ for &(i, d) in dists.iter().take(k) {
501
+ let u = d / h;
502
+ if u < 1.0 {
503
+ let t = 1.0 - u * u * u;
504
+ w[i] = t * t * t;
505
+ }
506
+ }
507
+
508
+ // Weighted linear regression: y = a + b*(x - x0)
509
+ let mut sw = 0.0;
510
+ let mut swx = 0.0;
511
+ let mut swy = 0.0;
512
+ let mut swxx = 0.0;
513
+ let mut swxy = 0.0;
514
+ for i in 0..n {
515
+ if w[i] == 0.0 {
516
+ continue;
517
+ }
518
+ let dx = xs[i] - x0;
519
+ sw += w[i];
520
+ swx += w[i] * dx;
521
+ swy += w[i] * ys[i];
522
+ swxx += w[i] * dx * dx;
523
+ swxy += w[i] * dx * ys[i];
524
+ }
525
+ if sw == 0.0 {
526
+ fitted.push(f64::NAN);
527
+ ci_lo.push(f64::NAN);
528
+ ci_hi.push(f64::NAN);
529
+ continue;
530
+ }
531
+ let det = sw * swxx - swx * swx;
532
+ let (a, _b) = if det.abs() < 1e-20 {
533
+ (swy / sw, 0.0)
534
+ } else {
535
+ ((swxx * swy - swx * swxy) / det, (sw * swxy - swx * swy) / det)
536
+ };
537
+ let y_hat = a; // at x = x0, dx = 0, so y = a
538
+
539
+ // Weighted residual variance for CI
540
+ let mut sse = 0.0;
541
+ let mut sw2 = 0.0;
542
+ for i in 0..n {
543
+ if w[i] == 0.0 {
544
+ continue;
545
+ }
546
+ let dx = xs[i] - x0;
547
+ let pred = a + _b * dx;
548
+ let e = ys[i] - pred;
549
+ sse += w[i] * e * e;
550
+ sw2 += w[i] * w[i];
551
+ }
552
+ // Effective df ≈ sum(w)^2 / sum(w^2) - 2
553
+ let eff_n = (sw * sw / sw2).max(3.0);
554
+ let sigma2 = sse / (eff_n - 2.0).max(1.0);
555
+ let se = (sigma2 / sw).sqrt();
556
+
557
+ // 95% CI with normal approximation (effective n is typically large for LOESS)
558
+ let margin = 1.96 * se;
559
+ fitted.push((y_hat * 10000.0).round() / 10000.0);
560
+ ci_lo.push(((y_hat - margin).max(0.0).min(1.0) * 10000.0).round() / 10000.0);
561
+ ci_hi.push(((y_hat + margin).max(0.0).min(1.0) * 10000.0).round() / 10000.0);
562
+ }
563
+
564
+ // Clamp fitted values
565
+ for v in fitted.iter_mut() {
566
+ *v = v.max(0.0).min(1.0);
567
+ }
568
+
569
+ Some((fitted, ci_lo, ci_hi))
570
+ }
571
+
572
+ fn hex_to_rgba(hex: &str, alpha: u8) -> (u8, u8, u8, u8) {
573
+ let hex = hex.trim_start_matches('#');
574
+ let r = u8::from_str_radix(&hex[0..2], 16).unwrap_or(128);
575
+ let g = u8::from_str_radix(&hex[2..4], 16).unwrap_or(128);
576
+ let b = u8::from_str_radix(&hex[4..6], 16).unwrap_or(128);
577
+ (r, g, b, alpha)
578
+ }
579
+
580
+ /// Render the complete Per-CpG Means track as a transparent PNG.
581
+ fn render_track_png(
582
+ rpos: &[i64],
583
+ mg1: &[f64],
584
+ mg2: &[f64],
585
+ fdr: &[f64],
586
+ _dmrs: &[Value],
587
+ loess_g1: &Option<(Vec<f64>, Vec<f64>, Vec<f64>)>,
588
+ loess_g2: &Option<(Vec<f64>, Vec<f64>, Vec<f64>)>,
589
+ eval_pos: &[f64],
590
+ xmin: f64,
591
+ xmax: f64,
592
+ width: u32,
593
+ height: u32,
594
+ dpr: f32,
595
+ fdr_cutoff: f64,
596
+ max_loess_region: f64,
597
+ colors: &HashMap<String, String>,
598
+ ) -> Option<String> {
599
+ let w = (width as f32 * dpr) as u32;
600
+ let h = (height as f32 * dpr) as u32;
601
+ let mut pixmap = Pixmap::new(w, h)?;
602
+ // Transparent background (default)
603
+
604
+ let wf = w as f32;
605
+ let hf = h as f32;
606
+ let x_range = (xmax - xmin).max(1.0) as f32;
607
+ let scale_x = |pos: f64| -> f32 { ((pos - xmin) as f32 / x_range) * wf };
608
+ let scale_y = |beta: f64| -> f32 { hf - (beta as f32) * hf };
609
+
610
+ let c_g1 = colors.get("group1").map(|s| s.as_str()).unwrap_or("#3b5ee6");
611
+ let c_g2 = colors.get("group2").map(|s| s.as_str()).unwrap_or("#c04e00");
612
+
613
+ // DMR region shading omitted — already shown as a bedj track above
614
+
615
+ // 1. LOESS curves (if region small enough)
616
+ let region_size = xmax - xmin;
617
+ if region_size <= max_loess_region {
618
+ let loess_groups: [(&Option<(Vec<f64>, Vec<f64>, Vec<f64>)>, &str); 2] = [(&loess_g1, c_g1), (&loess_g2, c_g2)];
619
+ for (loess_opt, color_hex) in &loess_groups {
620
+ if let Some((fitted, ci_lo, ci_hi)) = loess_opt {
621
+ if fitted.is_empty() {
622
+ continue;
623
+ }
624
+ let (r, g, b, _) = hex_to_rgba(color_hex, 255);
625
+
626
+ // CI bounds as dashed lines
627
+ for ci_band in [ci_hi, ci_lo] {
628
+ let mut pb = PathBuilder::new();
629
+ let mut started = false;
630
+ for (i, &pos) in eval_pos.iter().enumerate() {
631
+ let px = scale_x(pos);
632
+ let py = scale_y(ci_band[i].max(0.0).min(1.0));
633
+ if !started {
634
+ pb.move_to(px, py);
635
+ started = true;
636
+ } else {
637
+ pb.line_to(px, py);
638
+ }
639
+ }
640
+ if let Some(path) = pb.finish() {
641
+ let mut paint = Paint::default();
642
+ paint.set_color_rgba8(r, g, b, 128); // ~0.5 alpha
643
+ paint.anti_alias = true;
644
+ let mut stroke = Stroke::default();
645
+ stroke.width = 1.0 * dpr;
646
+ stroke.dash = StrokeDash::new(vec![4.0 * dpr, 4.0 * dpr], 0.0);
647
+ pixmap.stroke_path(&path, &paint, &stroke, Transform::identity(), None);
648
+ }
649
+ }
650
+
651
+ // Fitted curve as solid line
652
+ let mut pb = PathBuilder::new();
653
+ let mut started = false;
654
+ for (i, &pos) in eval_pos.iter().enumerate() {
655
+ let px = scale_x(pos);
656
+ let py = scale_y(fitted[i].max(0.0).min(1.0));
657
+ if !started {
658
+ pb.move_to(px, py);
659
+ started = true;
660
+ } else {
661
+ pb.line_to(px, py);
662
+ }
663
+ }
664
+ if let Some(path) = pb.finish() {
665
+ let mut paint = Paint::default();
666
+ paint.set_color_rgba8(r, g, b, 204); // ~0.8 alpha
667
+ paint.anti_alias = true;
668
+ let mut stroke = Stroke::default();
669
+ stroke.width = 2.0 * dpr;
670
+ pixmap.stroke_path(&path, &paint, &stroke, Transform::identity(), None);
671
+ }
672
+ }
673
+ }
674
+ }
675
+
676
+ // 3. Scatter dots
677
+ let dot_radius = 4.0 * dpr;
678
+ for i in 0..rpos.len() {
679
+ let px = scale_x(rpos[i] as f64);
680
+ let is_sig = fdr[i] < fdr_cutoff;
681
+ let alpha = if is_sig { 217u8 } else { 77u8 }; // 0.85 * 255, 0.3 * 255
682
+
683
+ // Group 1
684
+ if mg1[i].is_finite() {
685
+ let py = scale_y(mg1[i]);
686
+ let (r, g, b, _) = hex_to_rgba(c_g1, alpha);
687
+ let mut paint = Paint::default();
688
+ paint.set_color_rgba8(r, g, b, alpha);
689
+ paint.anti_alias = true;
690
+ let mut pb = PathBuilder::new();
691
+ pb.push_circle(px, py, dot_radius);
692
+ if let Some(path) = pb.finish() {
693
+ pixmap.fill_path(&path, &paint, FillRule::Winding, Transform::identity(), None);
694
+ }
695
+ }
696
+
697
+ // Group 2
698
+ if mg2[i].is_finite() {
699
+ let py = scale_y(mg2[i]);
700
+ let (r, g, b, _) = hex_to_rgba(c_g2, alpha);
701
+ let mut paint = Paint::default();
702
+ paint.set_color_rgba8(r, g, b, alpha);
703
+ paint.anti_alias = true;
704
+ let mut pb = PathBuilder::new();
705
+ pb.push_circle(px, py, dot_radius);
706
+ if let Some(path) = pb.finish() {
707
+ pixmap.fill_path(&path, &paint, FillRule::Winding, Transform::identity(), None);
708
+ }
709
+ }
710
+ }
711
+
712
+ let png_bytes = pixmap.encode_png().ok()?;
713
+ Some(format!("data:image/png;base64,{}", BASE64.encode(&png_bytes)))
714
+ }
715
+
716
+ fn main() {
717
+ let t0 = Instant::now();
718
+ let rss_start = get_rss_mb();
719
+ let mut input = String::new();
720
+ if io::stdin().read_line(&mut input).is_err() {
721
+ bail!("Failed to read stdin");
722
+ }
723
+ let p: Value = match serde_json::from_str(&input) {
724
+ Ok(v) => v,
725
+ Err(e) => bail!("Invalid JSON: {}", e),
726
+ };
727
+
728
+ let h5_path = p["probe_h5_file"].as_str().unwrap_or("");
729
+ let cachedir = p["cachedir"].as_str().unwrap_or("/tmp");
730
+ let dmrcate_dir = format!("{}/dmrcate", cachedir);
731
+ let _ = std::fs::create_dir_all(&dmrcate_dir);
732
+ let qchr = p["chr"].as_str().unwrap_or("");
733
+ let (qstart, qstop) = (p["start"].as_i64().unwrap_or(0), p["stop"].as_i64().unwrap_or(0));
734
+ let cases: Vec<&str> = p["case"]
735
+ .as_str()
736
+ .unwrap_or("")
737
+ .split(',')
738
+ .filter(|s| !s.is_empty())
739
+ .collect();
740
+ let ctrls: Vec<&str> = p["control"]
741
+ .as_str()
742
+ .unwrap_or("")
743
+ .split(',')
744
+ .filter(|s| !s.is_empty())
745
+ .collect();
746
+ let fdr_cut = p["fdr_cutoff"].as_f64().unwrap_or(0.05);
747
+ let lambda = p["lambda"].as_f64().unwrap_or(1000.0);
748
+ let c_param = p["C"].as_f64().unwrap_or(2.0);
749
+ let min_db = p["min_delta_beta"].as_f64().unwrap_or(0.05);
750
+ let min_spg = p["min_samples_per_group"].as_u64().unwrap_or(3) as usize;
751
+ let block_width = p["blockWidth"].as_u64().unwrap_or(800) as u32;
752
+ let device_pixel_ratio = p["devicePixelRatio"].as_f64().unwrap_or(1.0) as f32;
753
+ let max_loess_region = p["maxLoessRegion"].as_f64().unwrap_or(50000.0);
754
+ let track_height = 150u32;
755
+ let mut render_colors: HashMap<String, String> = HashMap::new();
756
+ if let Some(obj) = p["colors"].as_object() {
757
+ for (k, v) in obj {
758
+ if let Some(s) = v.as_str() {
759
+ render_colors.insert(k.clone(), s.to_string());
760
+ }
761
+ }
762
+ }
763
+
764
+ if h5_path.is_empty() || qchr.is_empty() || cases.is_empty() || ctrls.is_empty() {
765
+ bail!("Missing required parameters");
766
+ }
767
+
768
+ let file = match File::open(h5_path) {
769
+ Ok(f) => f,
770
+ Err(e) => bail!("HDF5 open: {}", e),
771
+ };
772
+ let (chr_names, chr_lens, sample_names, starts, probe_ids) = match read_h5_metadata(&file) {
773
+ Ok(m) => m,
774
+ Err(e) => bail!("{}", e),
775
+ };
776
+ let smap: HashMap<&str, usize> = sample_names.iter().enumerate().map(|(i, s)| (s.as_str(), i)).collect();
777
+ let ci: Vec<usize> = cases.iter().filter_map(|s| smap.get(s).copied()).collect();
778
+ let ki: Vec<usize> = ctrls.iter().filter_map(|s| smap.get(s).copied()).collect();
779
+ if ci.len() < min_spg || ki.len() < min_spg {
780
+ bail!("Not enough samples: case={}, control={}", ci.len(), ki.len());
781
+ }
782
+ let mut all: Vec<ProbeStats> = Vec::new();
783
+ let mut pfx = 0usize;
784
+ for (i, &cl) in chr_lens.iter().enumerate() {
785
+ if cl == 0 {
786
+ pfx += cl;
787
+ continue;
788
+ }
789
+ match process_chromosome(
790
+ &file,
791
+ pfx,
792
+ pfx + cl,
793
+ &ci,
794
+ &ki,
795
+ &chr_names[i],
796
+ &starts,
797
+ &probe_ids,
798
+ min_spg,
799
+ ) {
800
+ Ok(s) => all.extend(s),
801
+ Err(_e) => {}
802
+ }
803
+ pfx += cl;
804
+ }
805
+ if all.len() < 3 {
806
+ bail!("Too few probes after filtering ({})", all.len());
807
+ }
808
+
809
+ let all_vars: Vec<f64> = all.iter().map(|s| s.residual_var).collect();
810
+ let all_dfs: Vec<f64> = all.iter().map(|s| s.df_residual).collect();
811
+ let (s20, df0) = fit_f_dist(&all_vars, &all_dfs);
812
+ let mut mod_t = Vec::with_capacity(all.len());
813
+ let mut raw_p = Vec::with_capacity(all.len());
814
+ for s in &all {
815
+ let s2p = if df0.is_finite() {
816
+ (df0 * s20 + s.df_residual * s.residual_var) / (df0 + s.df_residual)
817
+ } else {
818
+ s.residual_var
819
+ };
820
+ let t = s.log_fc / (s2p.sqrt() * s.stdev_unscaled);
821
+ let df_tot = s.df_residual + df0;
822
+ let tdist = StudentsT::new(0.0, 1.0, df_tot).unwrap_or_else(|_| StudentsT::new(0.0, 1.0, 100.0).unwrap());
823
+ mod_t.push(t);
824
+ raw_p.push(2.0 * tdist.sf(t.abs()));
825
+ }
826
+ let adj_p = bh_adjust(&raw_p);
827
+
828
+ let ri: Vec<usize> = (0..all.len())
829
+ .filter(|&i| all[i].chr == qchr && all[i].start >= qstart && all[i].start <= qstop)
830
+ .collect();
831
+ if ri.is_empty() {
832
+ println!(
833
+ "{}",
834
+ json!({"dmrs":[],"diagnostic":{"probes":{"positions":[],"mean_group1":[],"mean_group2":[],"fdr":[],"logFC":[]},"probe_spacings":[]}})
835
+ );
836
+ return;
837
+ }
838
+ let rpos: Vec<i64> = ri.iter().map(|&i| all[i].start).collect();
839
+ let rt: Vec<f64> = ri.iter().map(|&i| mod_t[i]).collect();
840
+ let rfdr: Vec<f64> = ri.iter().map(|&i| adj_p[i]).collect();
841
+ let rlfc: Vec<f64> = ri.iter().map(|&i| all[i].log_fc).collect();
842
+
843
+ let (mut mg1, mut mg2) = (Vec::new(), Vec::new());
844
+ let ds = file.dataset("beta/values").ok();
845
+ for &idx in &ri {
846
+ let pid = &all[idx].probe_id;
847
+ if let (Some(abs), Some(ref d)) = (probe_ids.iter().position(|p| p == pid), &ds) {
848
+ let sel = hdf5::Selection::from((abs..abs + 1, ..));
849
+ if let Ok(r2d) = d.read_slice_2d::<f32, _>(sel) {
850
+ let row = r2d.into_raw_vec_and_offset().0;
851
+ let (mut cs, mut cc, mut ks, mut kc) = (0.0, 0, 0.0, 0);
852
+ for &si in &ki {
853
+ if si < row.len() {
854
+ let v = row[si] as f64;
855
+ if v.is_finite() {
856
+ ks += v;
857
+ kc += 1;
858
+ }
859
+ }
860
+ }
861
+ for &si in &ci {
862
+ if si < row.len() {
863
+ let v = row[si] as f64;
864
+ if v.is_finite() {
865
+ cs += v;
866
+ cc += 1;
867
+ }
868
+ }
869
+ }
870
+ mg1.push(if kc > 0 { ks / kc as f64 } else { f64::NAN });
871
+ mg2.push(if cc > 0 { cs / cc as f64 } else { f64::NAN });
872
+ continue;
873
+ }
874
+ }
875
+ mg1.push(f64::NAN);
876
+ mg2.push(f64::NAN);
877
+ }
878
+
879
+ // Kernel smoothing in log space to avoid underflow for extreme t-statistics
880
+ let log_smoothed = kernel_smooth_log(&rpos, &rt, lambda, c_param);
881
+ let log_sfdr = bh_adjust_log(&log_smoothed);
882
+ // Convert log FDR to linear for diagnostic output and Sig. CpGs track
883
+ let sfdr: Vec<f64> = log_sfdr.iter().map(|&v| v.exp()).collect();
884
+ // Adaptive threshold matching R's dmrcate(): select the same NUMBER of CpGs
885
+ // as are per-CpG significant, but ranked by smoothed FDR instead.
886
+ // Work in log space so extreme p-values maintain proper ordering.
887
+ let nsig = rfdr.iter().filter(|&&f| f < fdr_cut).count();
888
+ let adaptive_log_cut = if nsig > 0 && nsig <= log_sfdr.len() {
889
+ let mut sorted_log: Vec<f64> = log_sfdr.clone();
890
+ sorted_log.sort_by(|a, b| a.partial_cmp(b).unwrap_or(std::cmp::Ordering::Equal));
891
+ sorted_log[nsig - 1] // nsig-th smallest log FDR (most negative = most significant)
892
+ } else {
893
+ fdr_cut.ln()
894
+ };
895
+ // Build sig_fdr: probes with log_sfdr <= adaptive_log_cut get 0 (significant), others get 1
896
+ let sig_fdr: Vec<f64> = log_sfdr
897
+ .iter()
898
+ .map(|&v| if v <= adaptive_log_cut { 0.0 } else { 1.0 })
899
+ .collect();
900
+ let mut dmrs = build_dmrs(qchr, &rpos, &sig_fdr, &rlfc, &mg1, &mg2, 0.5, lambda, 2, None, false);
901
+ for dmr in &mut dmrs {
902
+ if let (Some(s), Some(e)) = (dmr["start"].as_i64(), dmr["stop"].as_i64()) {
903
+ let min_sfdr = rpos
904
+ .iter()
905
+ .zip(sfdr.iter())
906
+ .filter(|(&p, _)| p >= s && p <= e)
907
+ .map(|(_, &f)| f)
908
+ .fold(f64::INFINITY, f64::min);
909
+ dmr["min_smoothed_fdr"] = json!(min_sfdr);
910
+ }
911
+ }
912
+ if dmrs.is_empty() {
913
+ dmrs = build_dmrs(
914
+ qchr,
915
+ &rpos,
916
+ &rfdr,
917
+ &rlfc,
918
+ &mg1,
919
+ &mg2,
920
+ fdr_cut,
921
+ lambda,
922
+ 2,
923
+ Some(min_db),
924
+ true,
925
+ );
926
+ }
927
+
928
+ // LOESS curves for both groups
929
+ let n_eval = 200usize;
930
+ let eval_pos: Vec<f64> = (0..n_eval)
931
+ .map(|i| qstart as f64 + (qstop as f64 - qstart as f64) * i as f64 / (n_eval - 1) as f64)
932
+ .collect();
933
+ let loess_g1 = loess_fit(&rpos, &mg1, &eval_pos, 0.75);
934
+ let loess_g2 = loess_fit(&rpos, &mg2, &eval_pos, 0.75);
935
+ let loess_json = json!({
936
+ "positions": eval_pos.iter().map(|&x| x.round() as i64).collect::<Vec<_>>(),
937
+ "group1_fitted": loess_g1.as_ref().map_or(vec![], |l| l.0.clone()),
938
+ "group1_ci_lower": loess_g1.as_ref().map_or(vec![], |l| l.1.clone()),
939
+ "group1_ci_upper": loess_g1.as_ref().map_or(vec![], |l| l.2.clone()),
940
+ "group2_fitted": loess_g2.as_ref().map_or(vec![], |l| l.0.clone()),
941
+ "group2_ci_lower": loess_g2.as_ref().map_or(vec![], |l| l.1.clone()),
942
+ "group2_ci_upper": loess_g2.as_ref().map_or(vec![], |l| l.2.clone()),
943
+ });
944
+
945
+ // Render the complete track as a transparent PNG
946
+ let track_png = render_track_png(
947
+ &rpos,
948
+ &mg1,
949
+ &mg2,
950
+ &rfdr,
951
+ &dmrs,
952
+ &loess_g1,
953
+ &loess_g2,
954
+ &eval_pos,
955
+ qstart as f64,
956
+ qstop as f64,
957
+ block_width,
958
+ track_height,
959
+ device_pixel_ratio,
960
+ fdr_cut,
961
+ max_loess_region,
962
+ &render_colors,
963
+ );
964
+
965
+ let rss_peak = get_rss_mb();
966
+ let elapsed_ms = t0.elapsed().as_millis();
967
+ let r4 = |v: f64| -> Value {
968
+ if v.is_finite() {
969
+ json!((v * 10000.0).round() / 10000.0)
970
+ } else {
971
+ Value::Null
972
+ }
973
+ };
974
+ let spacings: Vec<i64> = if rpos.len() > 1 {
975
+ rpos.windows(2).map(|w| w[1] - w[0]).collect()
976
+ } else {
977
+ vec![]
978
+ };
979
+ println!(
980
+ "{}",
981
+ json!({
982
+ "dmrs": dmrs,
983
+ "diagnostic": { "probes": { "positions": rpos,
984
+ "mean_group1": mg1.iter().map(|&v| r4(v)).collect::<Vec<_>>(),
985
+ "mean_group2": mg2.iter().map(|&v| r4(v)).collect::<Vec<_>>(),
986
+ "fdr": rfdr, "logFC": rlfc.iter().map(|&v| r4(v)).collect::<Vec<_>>() },
987
+ "loess": loess_json,
988
+ "probe_spacings": spacings,
989
+ "total_probes_analyzed": all.len(),
990
+ "peak_memory_mb": (rss_peak * 10.0).round() / 10.0,
991
+ "start_memory_mb": (rss_start * 10.0).round() / 10.0,
992
+ "elapsed_ms": elapsed_ms,
993
+ "track_png": track_png }
994
+ })
995
+ );
996
+ }
package/src/readH5.rs CHANGED
@@ -200,7 +200,7 @@ pub fn detect_hdf5_format(
200
200
  }
201
201
  }
202
202
 
203
- pub fn validate_hdf5_file(hdf5_filename: String) -> Result<()> {
203
+ pub fn validate_hdf5_file(hdf5_filename: String, include_items: bool) -> Result<()> {
204
204
  let file = File::open(&hdf5_filename)?;
205
205
  let matrix_name = "matrix";
206
206
  let row_dataset = "samples";
@@ -241,7 +241,7 @@ pub fn validate_hdf5_file(hdf5_filename: String) -> Result<()> {
241
241
  false
242
242
  };
243
243
 
244
- json!({
244
+ let mut result = json!({
245
245
  "status": if matrix_valid { "success" } else { "failure" },
246
246
  "message": if matrix_valid {
247
247
  "HDF5 matrix file loaded successfully"
@@ -255,7 +255,20 @@ pub fn validate_hdf5_file(hdf5_filename: String) -> Result<()> {
255
255
  "num_columns": matrix_shape.get(1).unwrap_or(&0)
256
256
  },
257
257
  row_dataset.to_string(): row_data
258
- })
258
+ });
259
+
260
+ // Optionally include item names (e.g. isoform IDs) in the response
261
+ if include_items {
262
+ let col_dataset_data = file.dataset(col_dataset)?;
263
+ let col_data: Vec<String> = col_dataset_data
264
+ .read_1d::<VarLenUnicode>()?
265
+ .iter()
266
+ .map(|s| s.to_string())
267
+ .collect();
268
+ result["items"] = json!(col_data);
269
+ }
270
+
271
+ result
259
272
  }
260
273
  _ => {
261
274
  json!({
@@ -717,7 +730,8 @@ fn main() -> Result<()> {
717
730
  None => false,
718
731
  };
719
732
  if v {
720
- let _ = validate_hdf5_file(hdf5_filename);
733
+ let include_items = input_json["include_items"].as_bool().unwrap_or(false);
734
+ let _ = validate_hdf5_file(hdf5_filename, include_items);
721
735
  } else {
722
736
  println!("{}", error_response("The value of validate is invalid"));
723
737
  }