@sjcrh/proteinpaint-rust 2.183.0 → 2.186.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -138,4 +138,8 @@ path="src/summary_agent.rs"
138
138
 
139
139
  [[bin]]
140
140
  name="dmrcate"
141
- path="src/dmrcate.rs"
141
+ path="src/dmrcate.rs"
142
+
143
+ [[bin]]
144
+ name="volcano"
145
+ path="src/volcano.rs"
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.183.0",
2
+ "version": "2.186.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
@@ -302,26 +302,33 @@ fn calculate_variance(
302
302
  min_sample_size = LARGE_N + (min_sample_size - LARGE_N) * MIN_PROP;
303
303
  }
304
304
 
305
- let mut lib_sizes = Vec::<f64>::new();
306
- let lib_sizes_vector = input_matrix.row_sum();
307
- //println!("lib_sizes_vector:{:?}", lib_sizes_vector);
308
- for i in 0..lib_sizes_vector.ncols() {
309
- lib_sizes.push(lib_sizes_vector[(0, i)].into());
305
+ // Per-sample library sizes as nansum — a single NaN gene doesn't
306
+ // poison the whole sample's total.
307
+ let mut lib_sizes = Vec::<f64>::with_capacity(input_matrix.ncols());
308
+ for col in 0..input_matrix.ncols() {
309
+ let mut s = 0.0_f64;
310
+ for row in 0..input_matrix.nrows() {
311
+ let v = input_matrix[(row, col)];
312
+ if v.is_finite() {
313
+ s += v;
314
+ }
315
+ }
316
+ lib_sizes.push(s);
310
317
  }
311
- //println!("lib_sizes:{:?}", lib_sizes);
312
- //println!("min_sample_size:{}", min_sample_size);
318
+
313
319
  let median_lib_size = Data::new(lib_sizes.clone()).median();
314
320
  let cpm_cutoff = (min_count / median_lib_size) * 1000000.0;
315
321
  //println!("cpm_cutoff:{}", cpm_cutoff);
316
- let cpm_matrix = cpm(&input_matrix);
322
+ let cpm_matrix = cpm(&input_matrix, &lib_sizes);
317
323
  const TOL: f64 = 1e-14; // Value of constant from R implementation
318
324
 
319
325
  let mut gene_infos = Vec::<GeneInfo>::new();
320
- let row_sums = input_matrix.column_sum();
321
326
  for row in 0..input_matrix.nrows() {
322
327
  let mut trues = 0.0;
328
+ // CPM filter (NaN-safe)
323
329
  for col in 0..cpm_matrix.ncols() {
324
- if cpm_matrix[(row, col)] >= cpm_cutoff {
330
+ let v = cpm_matrix[(row, col)];
331
+ if v.is_finite() && v >= cpm_cutoff {
325
332
  trues += 1.0;
326
333
  }
327
334
  }
@@ -332,17 +339,32 @@ fn calculate_variance(
332
339
  //positive_cpm += 1;
333
340
  }
334
341
 
342
+ let mut row_sum_finite = 0.0_f64;
343
+ for col in 0..input_matrix.ncols() {
344
+ let v = input_matrix[(row, col)];
345
+ if v.is_finite() {
346
+ row_sum_finite += v;
347
+ }
348
+ }
335
349
  let mut keep_total_bool = false;
336
- if row_sums[(row, 0)] as f64 >= min_total_count - TOL {
350
+ if row_sum_finite >= min_total_count - TOL {
337
351
  keep_total_bool = true;
338
- //keep_total.push(keep_total_bool);
339
- //positive_total += 1;
340
352
  }
341
353
 
342
354
  let mut gene_counts: Vec<f64> = Vec::with_capacity(input_matrix.ncols());
343
355
  for col in 0..input_matrix.ncols() {
344
- gene_counts.push(input_matrix[(row, col)]);
356
+ let v = input_matrix[(row, col)];
357
+ if v.is_finite() {
358
+ gene_counts.push(v);
359
+ }
360
+ }
361
+
362
+ // Skip genes with too few observations to produce a stable statistic
363
+ let min_required = if rank_type == "var" { 2 } else { 4 };
364
+ if gene_counts.len() < min_required {
365
+ continue;
345
366
  }
367
+
346
368
  if rank_type == "var" {
347
369
  // Calculating variance
348
370
  if gene_counts.clone().variance().is_nan() == true {
@@ -382,21 +404,21 @@ fn calculate_variance(
382
404
 
383
405
  fn cpm(
384
406
  input_matrix: &Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>,
407
+ col_sums: &[f64],
385
408
  ) -> Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>> {
386
- //let mut blank = Vec::<f64>::new();
387
- let mut blank = Vec::with_capacity(input_matrix.nrows() * input_matrix.ncols());
388
- for _i in 0..input_matrix.nrows() * input_matrix.ncols() {
389
- blank.push(0.0);
390
- }
391
- let mut output_matrix = DMatrix::from_vec(input_matrix.nrows(), input_matrix.ncols(), blank);
392
- let column_sums = input_matrix.row_sum();
409
+ let mut output_matrix = DMatrix::from_element(input_matrix.nrows(), input_matrix.ncols(), 0.0);
410
+
393
411
  for col in 0..input_matrix.ncols() {
394
- let norm_factor = column_sums[(0, col)];
412
+ let norm = col_sums[col];
395
413
  for row in 0..input_matrix.nrows() {
396
- output_matrix[(row, col)] = (input_matrix[(row, col)] as f64 * 1000000.0) / norm_factor as f64;
414
+ let v = input_matrix[(row, col)];
415
+ output_matrix[(row, col)] = if v.is_finite() && norm > 0.0 {
416
+ v * 1_000_000.0 / norm
417
+ } else {
418
+ f64::NAN
419
+ };
397
420
  }
398
421
  }
399
- //println!("output_matrix:{:?}", output_matrix);
400
422
  output_matrix
401
423
  }
402
424
 
package/src/volcano.rs ADDED
@@ -0,0 +1,314 @@
1
+ // Server-side volcano plot renderer.
2
+ //
3
+ // Reads all DA rows + significance thresholds + render params on stdin (JSON),
4
+ // rasterizes the full scatter to a base64 PNG, and in the same pass emits the
5
+ // threshold-passing rows back sorted ascending by the chosen p-value column.
6
+ // This makes the Rust pass the single source of truth for both the colored
7
+ // dots in the PNG and the interactive top-significant overlay on the client.
8
+
9
+ use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
10
+ use plotters::prelude::*;
11
+ use plotters::style::ShapeStyle;
12
+ use serde::{Deserialize, Serialize};
13
+ use serde_json::Value;
14
+ use std::error::Error;
15
+ use std::io::{self, Read};
16
+
17
+ #[derive(Deserialize)]
18
+ struct Input {
19
+ /// DA rows; each must carry fold_change + original_p_value + adjusted_p_value.
20
+ /// Route-specific extras (gene_name, promoter_id, etc.) are preserved via Value.
21
+ rows: Vec<Value>,
22
+ /// "adjusted" or "original" — which p-value column to threshold and sort by.
23
+ p_value_type: String,
24
+ /// Cutoff on the -log10 scale.
25
+ p_value_cutoff: f64,
26
+ /// Log2 fold-change magnitude cutoff.
27
+ fold_change_cutoff: f64,
28
+ pixel_width: u32,
29
+ pixel_height: u32,
30
+ color_significant: String,
31
+ color_significant_up: Option<String>,
32
+ color_significant_down: Option<String>,
33
+ color_nonsignificant: String,
34
+ dot_radius: f64,
35
+ /// Optional cap on the returned `dots`. The PNG still shows every row;
36
+ /// only the overlay list is truncated to the most-significant N.
37
+ #[serde(default)]
38
+ max_interactive_dots: Option<usize>,
39
+ }
40
+
41
+ #[derive(Serialize)]
42
+ struct PlotExtent {
43
+ /// Padded data extents — used to position overlay dots so points near the
44
+ /// real-data edge stay fully visible (mirror of manhattan's yPlot domain).
45
+ x_min: f64,
46
+ x_max: f64,
47
+ y_min: f64,
48
+ y_max: f64,
49
+ /// Unpadded data extents — used for the visible axis labels/ticks so the
50
+ /// axis only spans the real data region (mirror of manhattan's yAxisScale).
51
+ x_min_unpadded: f64,
52
+ x_max_unpadded: f64,
53
+ y_min_unpadded: f64,
54
+ y_max_unpadded: f64,
55
+ /// Dot radius in pixels (echoed back so the client can size overlay rings
56
+ /// to match the PNG without recomputing the heuristic).
57
+ dot_radius_px: f64,
58
+ pixel_width: u32,
59
+ pixel_height: u32,
60
+ /// Inner drawing rect inside the PNG. Client overlay circles are
61
+ /// positioned against this rect, not the full canvas.
62
+ plot_left: i32,
63
+ plot_top: i32,
64
+ plot_right: i32,
65
+ plot_bottom: i32,
66
+ /// Smallest non-zero p observed; rows with p==0 were positioned at
67
+ /// -log10(min_nonzero_p) so the client must reuse this cap to align.
68
+ min_nonzero_p: f64,
69
+ }
70
+
71
+ #[derive(Serialize)]
72
+ struct Output {
73
+ png: String,
74
+ plot_extent: PlotExtent,
75
+ /// Threshold-passing rows, sorted asc by the chosen p-value column. These
76
+ /// are the only rows sent back — the PNG carries every row.
77
+ dots: Vec<Value>,
78
+ /// Total rows rendered into the PNG; used client-side for stats.
79
+ total_rows: usize,
80
+ /// Total rows that passed the significance thresholds, before any
81
+ /// `max_interactive_dots` truncation. Use this for "% significant" stats.
82
+ total_significant_rows: usize,
83
+ }
84
+
85
+ fn rgb(hex: &str, fallback: (u8, u8, u8)) -> RGBColor {
86
+ let h = hex.trim_start_matches('#');
87
+ let parse = |i: usize| u8::from_str_radix(&h.get(i..i + 2).unwrap_or(""), 16).ok();
88
+ match (h.len(), parse(0), parse(2), parse(4)) {
89
+ (6, Some(r), Some(g), Some(b)) => RGBColor(r, g, b),
90
+ _ => RGBColor(fallback.0, fallback.1, fallback.2),
91
+ }
92
+ }
93
+
94
+ struct Point {
95
+ idx: usize,
96
+ fc: f64,
97
+ /// raw p-value (used for sorting)
98
+ p: f64,
99
+ /// -log10(p) with p==0 capped to min_nonzero_p
100
+ y: f64,
101
+ significant: bool,
102
+ }
103
+
104
+ fn main() -> Result<(), Box<dyn Error>> {
105
+ let mut buf = String::new();
106
+ io::stdin().read_to_string(&mut buf)?;
107
+ let input: Input = serde_json::from_str(&buf)?;
108
+
109
+ let p_field = match input.p_value_type.as_str() {
110
+ "adjusted" => "adjusted_p_value",
111
+ "original" => "original_p_value",
112
+ other => return Err(format!("invalid p_value_type: {other}").into()),
113
+ };
114
+
115
+ // One pass: pull numeric summaries and find the smallest non-zero p so we
116
+ // can cap y for rows with p == 0 (matching the client behavior).
117
+ let mut points: Vec<Point> = Vec::with_capacity(input.rows.len());
118
+ let mut min_nonzero_p = f64::INFINITY;
119
+ for (idx, row) in input.rows.iter().enumerate() {
120
+ let fc = row
121
+ .get("fold_change")
122
+ .and_then(|v| v.as_f64())
123
+ .ok_or_else(|| format!("row {idx} missing numeric fold_change"))?;
124
+ if !fc.is_finite() {
125
+ return Err(format!("row {idx} fold_change is not finite ({fc})").into());
126
+ }
127
+ let p = row
128
+ .get(p_field)
129
+ .and_then(|v| v.as_f64())
130
+ .ok_or_else(|| format!("row {idx} missing numeric {p_field}"))?;
131
+ if !p.is_finite() || p < 0.0 {
132
+ return Err(format!("row {idx} {p_field} must be a finite value >= 0 (got {p})").into());
133
+ }
134
+ if p > 0.0 && p < min_nonzero_p {
135
+ min_nonzero_p = p;
136
+ }
137
+ points.push(Point {
138
+ idx,
139
+ fc,
140
+ p,
141
+ y: 0.0,
142
+ significant: false,
143
+ });
144
+ }
145
+ if !min_nonzero_p.is_finite() {
146
+ min_nonzero_p = 1e-300;
147
+ }
148
+
149
+ // Classify + compute y; track axis extents in the same pass.
150
+ let (mut x_abs_max, mut y_max_data) = (0f64, 0f64);
151
+ for pt in points.iter_mut() {
152
+ let p_for_y = if pt.p <= 0.0 { min_nonzero_p } else { pt.p };
153
+ pt.y = -p_for_y.log10();
154
+ pt.significant = pt.y > input.p_value_cutoff && pt.fc.abs() > input.fold_change_cutoff;
155
+ x_abs_max = x_abs_max.max(pt.fc.abs());
156
+ y_max_data = y_max_data.max(pt.y);
157
+ }
158
+
159
+ // Unpadded axis extents — symmetric on x, raw data bounds. The dot-radius
160
+ // pad below provides pixel-level headroom so we don't need extra data-range
161
+ // breathing room (mirrors manhattan_plot.rs's tighter feel). Fallback to 1.0
162
+ // when the data has zero spread to keep the chart range valid.
163
+ let x_span = if x_abs_max > 0.0 { x_abs_max } else { 1.0 };
164
+ let (x_min_unpadded, x_max_unpadded) = (-x_span, x_span);
165
+ let y_min_unpadded = 0f64;
166
+ let y_max_unpadded = if y_max_data > 0.0 { y_max_data } else { 1.0 };
167
+
168
+ // Normalize the dot radius once into the integer pixel count plotters will
169
+ // actually draw, with a min of 1 so sub-pixel inputs don't collapse to a
170
+ // zero-radius dot. This single value drives both PNG padding and circle
171
+ // rendering, keeping the geometry self-consistent (matches manhattan).
172
+ let radius_px = (input.dot_radius as i32).max(1);
173
+ // Pad PNG by 2*radius_px so dots near the data edges stay fully visible.
174
+ let pad_px = (2 * radius_px) as u32;
175
+ let (w, h) = (input.pixel_width + pad_px, input.pixel_height + pad_px);
176
+ if w == 0 || h == 0 || w > 4000 || h > 4000 {
177
+ return Err(format!("pixel dimensions {}x{} out of range (1–4000)", w, h).into());
178
+ }
179
+
180
+ // Convert pixel padding to data units using the unpadded extents and the
181
+ // unpadded pixel dimensions. Per-axis pad in data space = radius_px * (data
182
+ // range / pixel range) — keeps the data/pixel ratio identical between
183
+ // padded and unpadded space.
184
+ let x_data_per_px = (x_max_unpadded - x_min_unpadded) / input.pixel_width as f64;
185
+ let y_data_per_px = (y_max_unpadded - y_min_unpadded) / input.pixel_height as f64;
186
+ let x_pad_data = radius_px as f64 * x_data_per_px;
187
+ let y_pad_data = radius_px as f64 * y_data_per_px;
188
+ let x_min = x_min_unpadded - x_pad_data;
189
+ let x_max = x_max_unpadded + x_pad_data;
190
+ let y_min = y_min_unpadded - y_pad_data;
191
+ let y_max = y_max_unpadded + y_pad_data;
192
+ let mut buffer = vec![0u8; (w as usize) * (h as usize) * 3];
193
+ // Per-point pixel coords as plotters actually rasterizes them. Returned to
194
+ // the client so the SVG overlay rings sit exactly on top of the PNG dots
195
+ // instead of being recomputed from data coords (which loses sub-pixel
196
+ // precision under plotters' integer truncation).
197
+ let mut all_pixel_coords: Vec<(f64, f64)> = Vec::with_capacity(points.len());
198
+ {
199
+ let backend = BitMapBackend::with_buffer(&mut buffer, (w, h));
200
+ let root = backend.into_drawing_area();
201
+ root.fill(&WHITE)?;
202
+
203
+ let mut chart = ChartBuilder::on(&root)
204
+ .margin(0)
205
+ .set_all_label_area_size(0)
206
+ .build_cartesian_2d(x_min..x_max, y_min..y_max)?;
207
+ chart
208
+ .configure_mesh()
209
+ .disable_x_mesh()
210
+ .disable_y_mesh()
211
+ .disable_axes()
212
+ .draw()?;
213
+
214
+ // Threshold guide lines are drawn by the SVG overlay on the client, not
215
+ // here — double-drawing them would add stray lines offset by axis padding.
216
+
217
+ // Resolve colors once. Up/down fall back to `color_sig` when absent.
218
+ let color_sig = rgb(&input.color_significant, (214, 39, 40));
219
+ let color_non = rgb(&input.color_nonsignificant, (0, 0, 0));
220
+ let resolve = |o: &Option<String>| o.as_deref().map(|s| rgb(s, (214, 39, 40))).unwrap_or(color_sig);
221
+ let color_up = resolve(&input.color_significant_up);
222
+ let color_down = resolve(&input.color_significant_down);
223
+
224
+ // Stroke-only rings at full opacity so each ring is the exact configured
225
+ // group color — matching the hue the SVG overlay uses.
226
+ let ring = |c: RGBColor| ShapeStyle {
227
+ color: c.into(),
228
+ filled: false,
229
+ stroke_width: 1,
230
+ };
231
+
232
+ // Draw non-significant first so significant rings overlay on top.
233
+ chart.draw_series(
234
+ points
235
+ .iter()
236
+ .filter(|p| !p.significant)
237
+ .map(|p| Circle::new((p.fc, p.y), radius_px, ring(color_non))),
238
+ )?;
239
+ chart.draw_series(points.iter().filter(|p| p.significant).map(|p| {
240
+ let c = if p.fc > 0.0 { color_up } else { color_down };
241
+ Circle::new((p.fc, p.y), radius_px, ring(c))
242
+ }))?;
243
+
244
+ // Mirror manhattan_plot.rs: capture the exact pixel coords plotters
245
+ // used for each point so the client overlay can land on them precisely.
246
+ for p in points.iter() {
247
+ let (px, py) = chart.backend_coord(&(p.fc, p.y));
248
+ all_pixel_coords.push((px as f64, py as f64));
249
+ }
250
+
251
+ root.present()?;
252
+ }
253
+
254
+ // Build the interactive `dots` list: threshold-passers sorted asc by the
255
+ // chosen p-value column, optionally capped at `max_interactive_dots`.
256
+ let mut sig_points: Vec<&Point> = points.iter().filter(|p| p.significant).collect();
257
+ sig_points.sort_by(|a, b| a.p.partial_cmp(&b.p).unwrap_or(std::cmp::Ordering::Equal));
258
+ let total_significant_rows = sig_points.len();
259
+ if let Some(cap) = input.max_interactive_dots {
260
+ sig_points.truncate(cap);
261
+ }
262
+ let dots: Vec<Value> = sig_points
263
+ .iter()
264
+ .map(|p| {
265
+ let mut row = input.rows[p.idx].clone();
266
+ let (px, py) = all_pixel_coords[p.idx];
267
+ if let Value::Object(ref mut m) = row {
268
+ m.insert("pixel_x".to_string(), Value::from(px));
269
+ m.insert("pixel_y".to_string(), Value::from(py));
270
+ }
271
+ row
272
+ })
273
+ .collect();
274
+
275
+ let output = Output {
276
+ png: BASE64.encode(&encode_rgb_to_png(&buffer, w, h)?),
277
+ plot_extent: PlotExtent {
278
+ x_min,
279
+ x_max,
280
+ y_min,
281
+ y_max,
282
+ x_min_unpadded,
283
+ x_max_unpadded,
284
+ y_min_unpadded,
285
+ y_max_unpadded,
286
+ // Echo the normalized integer radius plotters actually drew so the
287
+ // SVG overlay sizes its rings to match the rasterized PNG dots.
288
+ dot_radius_px: radius_px as f64,
289
+ pixel_width: w,
290
+ pixel_height: h,
291
+ plot_left: 0,
292
+ plot_top: 0,
293
+ plot_right: w as i32,
294
+ plot_bottom: h as i32,
295
+ min_nonzero_p,
296
+ },
297
+ dots,
298
+ total_rows: input.rows.len(),
299
+ total_significant_rows,
300
+ };
301
+
302
+ println!("{}", serde_json::to_string(&output)?);
303
+ Ok(())
304
+ }
305
+
306
+ /// Convert a plotters RGB buffer (3 bytes/px) to a PNG via tiny-skia (4 bytes/px).
307
+ fn encode_rgb_to_png(rgb: &[u8], w: u32, h: u32) -> Result<Vec<u8>, Box<dyn Error>> {
308
+ let mut pixmap = tiny_skia::Pixmap::new(w, h).ok_or("failed to create pixmap")?;
309
+ for (src, dst) in rgb.chunks_exact(3).zip(pixmap.data_mut().chunks_exact_mut(4)) {
310
+ dst[..3].copy_from_slice(src);
311
+ dst[3] = 255;
312
+ }
313
+ Ok(pixmap.encode_png()?)
314
+ }