@sjcrh/proteinpaint-rust 2.182.2 → 2.185.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/Cargo.toml CHANGED
@@ -138,4 +138,8 @@ path="src/summary_agent.rs"
138
138
 
139
139
  [[bin]]
140
140
  name="dmrcate"
141
- path="src/dmrcate.rs"
141
+ path="src/dmrcate.rs"
142
+
143
+ [[bin]]
144
+ name="volcano"
145
+ path="src/volcano.rs"
package/index.js CHANGED
@@ -26,7 +26,6 @@ const binaryDir = path.join(__dirname, '/target/release/')
26
26
  if (!fs.existsSync(binaryDir)) throw `missing rust binary directory='${binaryDir}'`
27
27
  if (!fs.readdirSync(binaryDir).length) throw `empty rust binary directory='${binaryDir}'`
28
28
 
29
- // use this for non-streamed input/output
30
29
  export function run_rust(binfile, input_data, args = [], { signal } = {}) {
31
30
  return new Promise((resolve, reject) => {
32
31
  const binpath = path.join(__dirname, '/target/release/', binfile)
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.182.2",
2
+ "version": "2.185.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
@@ -302,26 +302,33 @@ fn calculate_variance(
302
302
  min_sample_size = LARGE_N + (min_sample_size - LARGE_N) * MIN_PROP;
303
303
  }
304
304
 
305
- let mut lib_sizes = Vec::<f64>::new();
306
- let lib_sizes_vector = input_matrix.row_sum();
307
- //println!("lib_sizes_vector:{:?}", lib_sizes_vector);
308
- for i in 0..lib_sizes_vector.ncols() {
309
- lib_sizes.push(lib_sizes_vector[(0, i)].into());
305
+ // Per-sample library sizes as nansum — a single NaN gene doesn't
306
+ // poison the whole sample's total.
307
+ let mut lib_sizes = Vec::<f64>::with_capacity(input_matrix.ncols());
308
+ for col in 0..input_matrix.ncols() {
309
+ let mut s = 0.0_f64;
310
+ for row in 0..input_matrix.nrows() {
311
+ let v = input_matrix[(row, col)];
312
+ if v.is_finite() {
313
+ s += v;
314
+ }
315
+ }
316
+ lib_sizes.push(s);
310
317
  }
311
- //println!("lib_sizes:{:?}", lib_sizes);
312
- //println!("min_sample_size:{}", min_sample_size);
318
+
313
319
  let median_lib_size = Data::new(lib_sizes.clone()).median();
314
320
  let cpm_cutoff = (min_count / median_lib_size) * 1000000.0;
315
321
  //println!("cpm_cutoff:{}", cpm_cutoff);
316
- let cpm_matrix = cpm(&input_matrix);
322
+ let cpm_matrix = cpm(&input_matrix, &lib_sizes);
317
323
  const TOL: f64 = 1e-14; // Value of constant from R implementation
318
324
 
319
325
  let mut gene_infos = Vec::<GeneInfo>::new();
320
- let row_sums = input_matrix.column_sum();
321
326
  for row in 0..input_matrix.nrows() {
322
327
  let mut trues = 0.0;
328
+ // CPM filter (NaN-safe)
323
329
  for col in 0..cpm_matrix.ncols() {
324
- if cpm_matrix[(row, col)] >= cpm_cutoff {
330
+ let v = cpm_matrix[(row, col)];
331
+ if v.is_finite() && v >= cpm_cutoff {
325
332
  trues += 1.0;
326
333
  }
327
334
  }
@@ -332,17 +339,32 @@ fn calculate_variance(
332
339
  //positive_cpm += 1;
333
340
  }
334
341
 
342
+ let mut row_sum_finite = 0.0_f64;
343
+ for col in 0..input_matrix.ncols() {
344
+ let v = input_matrix[(row, col)];
345
+ if v.is_finite() {
346
+ row_sum_finite += v;
347
+ }
348
+ }
335
349
  let mut keep_total_bool = false;
336
- if row_sums[(row, 0)] as f64 >= min_total_count - TOL {
350
+ if row_sum_finite >= min_total_count - TOL {
337
351
  keep_total_bool = true;
338
- //keep_total.push(keep_total_bool);
339
- //positive_total += 1;
340
352
  }
341
353
 
342
354
  let mut gene_counts: Vec<f64> = Vec::with_capacity(input_matrix.ncols());
343
355
  for col in 0..input_matrix.ncols() {
344
- gene_counts.push(input_matrix[(row, col)]);
356
+ let v = input_matrix[(row, col)];
357
+ if v.is_finite() {
358
+ gene_counts.push(v);
359
+ }
360
+ }
361
+
362
+ // Skip genes with too few observations to produce a stable statistic
363
+ let min_required = if rank_type == "var" { 2 } else { 4 };
364
+ if gene_counts.len() < min_required {
365
+ continue;
345
366
  }
367
+
346
368
  if rank_type == "var" {
347
369
  // Calculating variance
348
370
  if gene_counts.clone().variance().is_nan() == true {
@@ -382,21 +404,21 @@ fn calculate_variance(
382
404
 
383
405
  fn cpm(
384
406
  input_matrix: &Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>>,
407
+ col_sums: &[f64],
385
408
  ) -> Matrix<f64, Dyn, Dyn, VecStorage<f64, Dyn, Dyn>> {
386
- //let mut blank = Vec::<f64>::new();
387
- let mut blank = Vec::with_capacity(input_matrix.nrows() * input_matrix.ncols());
388
- for _i in 0..input_matrix.nrows() * input_matrix.ncols() {
389
- blank.push(0.0);
390
- }
391
- let mut output_matrix = DMatrix::from_vec(input_matrix.nrows(), input_matrix.ncols(), blank);
392
- let column_sums = input_matrix.row_sum();
409
+ let mut output_matrix = DMatrix::from_element(input_matrix.nrows(), input_matrix.ncols(), 0.0);
410
+
393
411
  for col in 0..input_matrix.ncols() {
394
- let norm_factor = column_sums[(0, col)];
412
+ let norm = col_sums[col];
395
413
  for row in 0..input_matrix.nrows() {
396
- output_matrix[(row, col)] = (input_matrix[(row, col)] as f64 * 1000000.0) / norm_factor as f64;
414
+ let v = input_matrix[(row, col)];
415
+ output_matrix[(row, col)] = if v.is_finite() && norm > 0.0 {
416
+ v * 1_000_000.0 / norm
417
+ } else {
418
+ f64::NAN
419
+ };
397
420
  }
398
421
  }
399
- //println!("output_matrix:{:?}", output_matrix);
400
422
  output_matrix
401
423
  }
402
424
 
package/src/volcano.rs ADDED
@@ -0,0 +1,253 @@
1
+ // Server-side volcano plot renderer.
2
+ //
3
+ // Reads all DA rows + significance thresholds + render params on stdin (JSON),
4
+ // rasterizes the full scatter to a base64 PNG, and in the same pass emits the
5
+ // threshold-passing rows back sorted ascending by the chosen p-value column.
6
+ // This makes the Rust pass the single source of truth for both the colored
7
+ // dots in the PNG and the interactive top-significant overlay on the client.
8
+
9
+ use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
10
+ use plotters::prelude::*;
11
+ use plotters::style::ShapeStyle;
12
+ use serde::{Deserialize, Serialize};
13
+ use serde_json::Value;
14
+ use std::error::Error;
15
+ use std::io::{self, Read};
16
+
17
+ #[derive(Deserialize)]
18
+ struct Input {
19
+ /// DA rows; each must carry fold_change + original_p_value + adjusted_p_value.
20
+ /// Route-specific extras (gene_name, promoter_id, etc.) are preserved via Value.
21
+ rows: Vec<Value>,
22
+ /// "adjusted" or "original" — which p-value column to threshold and sort by.
23
+ p_value_type: String,
24
+ /// Cutoff on the -log10 scale.
25
+ p_value_cutoff: f64,
26
+ /// Log2 fold-change magnitude cutoff.
27
+ fold_change_cutoff: f64,
28
+ pixel_width: u32,
29
+ pixel_height: u32,
30
+ color_significant: String,
31
+ color_significant_up: Option<String>,
32
+ color_significant_down: Option<String>,
33
+ color_nonsignificant: String,
34
+ dot_radius: f64,
35
+ /// Optional cap on the returned `dots`. The PNG still shows every row;
36
+ /// only the overlay list is truncated to the most-significant N.
37
+ #[serde(default)]
38
+ max_interactive_dots: Option<usize>,
39
+ }
40
+
41
+ #[derive(Serialize)]
42
+ struct PlotExtent {
43
+ x_min: f64,
44
+ x_max: f64,
45
+ y_min: f64,
46
+ y_max: f64,
47
+ pixel_width: u32,
48
+ pixel_height: u32,
49
+ /// Inner drawing rect inside the PNG. Client overlay circles are
50
+ /// positioned against this rect, not the full canvas.
51
+ plot_left: i32,
52
+ plot_top: i32,
53
+ plot_right: i32,
54
+ plot_bottom: i32,
55
+ /// Smallest non-zero p observed; rows with p==0 were positioned at
56
+ /// -log10(min_nonzero_p) so the client must reuse this cap to align.
57
+ min_nonzero_p: f64,
58
+ }
59
+
60
+ #[derive(Serialize)]
61
+ struct Output {
62
+ png: String,
63
+ plot_extent: PlotExtent,
64
+ /// Threshold-passing rows, sorted asc by the chosen p-value column. These
65
+ /// are the only rows sent back — the PNG carries every row.
66
+ dots: Vec<Value>,
67
+ /// Total rows rendered into the PNG; used client-side for stats.
68
+ total_rows: usize,
69
+ /// Total rows that passed the significance thresholds, before any
70
+ /// `max_interactive_dots` truncation. Use this for "% significant" stats.
71
+ total_significant_rows: usize,
72
+ }
73
+
74
+ fn rgb(hex: &str, fallback: (u8, u8, u8)) -> RGBColor {
75
+ let h = hex.trim_start_matches('#');
76
+ let parse = |i: usize| u8::from_str_radix(&h.get(i..i + 2).unwrap_or(""), 16).ok();
77
+ match (h.len(), parse(0), parse(2), parse(4)) {
78
+ (6, Some(r), Some(g), Some(b)) => RGBColor(r, g, b),
79
+ _ => RGBColor(fallback.0, fallback.1, fallback.2),
80
+ }
81
+ }
82
+
83
+ struct Point {
84
+ idx: usize,
85
+ fc: f64,
86
+ /// raw p-value (used for sorting)
87
+ p: f64,
88
+ /// -log10(p) with p==0 capped to min_nonzero_p
89
+ y: f64,
90
+ significant: bool,
91
+ }
92
+
93
+ fn main() -> Result<(), Box<dyn Error>> {
94
+ let mut buf = String::new();
95
+ io::stdin().read_to_string(&mut buf)?;
96
+ let input: Input = serde_json::from_str(&buf)?;
97
+
98
+ let p_field = match input.p_value_type.as_str() {
99
+ "adjusted" => "adjusted_p_value",
100
+ "original" => "original_p_value",
101
+ other => return Err(format!("invalid p_value_type: {other}").into()),
102
+ };
103
+
104
+ // One pass: pull numeric summaries and find the smallest non-zero p so we
105
+ // can cap y for rows with p == 0 (matching the client behavior).
106
+ let mut points: Vec<Point> = Vec::with_capacity(input.rows.len());
107
+ let mut min_nonzero_p = f64::INFINITY;
108
+ for (idx, row) in input.rows.iter().enumerate() {
109
+ let fc = row
110
+ .get("fold_change")
111
+ .and_then(|v| v.as_f64())
112
+ .ok_or_else(|| format!("row {idx} missing numeric fold_change"))?;
113
+ if !fc.is_finite() {
114
+ return Err(format!("row {idx} fold_change is not finite ({fc})").into());
115
+ }
116
+ let p = row
117
+ .get(p_field)
118
+ .and_then(|v| v.as_f64())
119
+ .ok_or_else(|| format!("row {idx} missing numeric {p_field}"))?;
120
+ if !p.is_finite() || p < 0.0 {
121
+ return Err(format!("row {idx} {p_field} must be a finite value >= 0 (got {p})").into());
122
+ }
123
+ if p > 0.0 && p < min_nonzero_p {
124
+ min_nonzero_p = p;
125
+ }
126
+ points.push(Point {
127
+ idx,
128
+ fc,
129
+ p,
130
+ y: 0.0,
131
+ significant: false,
132
+ });
133
+ }
134
+ if !min_nonzero_p.is_finite() {
135
+ min_nonzero_p = 1e-300;
136
+ }
137
+
138
+ // Classify + compute y; track axis extents in the same pass.
139
+ let (mut x_abs_max, mut y_max_data) = (0f64, 0f64);
140
+ for pt in points.iter_mut() {
141
+ let p_for_y = if pt.p <= 0.0 { min_nonzero_p } else { pt.p };
142
+ pt.y = -p_for_y.log10();
143
+ pt.significant = pt.y > input.p_value_cutoff && pt.fc.abs() > input.fold_change_cutoff;
144
+ x_abs_max = x_abs_max.max(pt.fc.abs());
145
+ y_max_data = y_max_data.max(pt.y);
146
+ }
147
+
148
+ // Axis extents — symmetric on x, padded 5%.
149
+ let x_span = if x_abs_max > 0.0 { x_abs_max * 1.05 } else { 1.0 };
150
+ let (x_min, x_max, y_min) = (-x_span, x_span, 0f64);
151
+ let y_max = if y_max_data > 0.0 { y_max_data * 1.05 } else { 1.0 };
152
+
153
+ // Render — borderless scatter. No axes/labels/margins. The client owns
154
+ // axes and positions the PNG exactly over its plot rect, so the inner
155
+ // drawing area fills the whole canvas.
156
+ let (w, h) = (input.pixel_width, input.pixel_height);
157
+ if w == 0 || h == 0 || w > 4000 || h > 4000 {
158
+ return Err(format!("pixel dimensions {}x{} out of range (1–4000)", w, h).into());
159
+ }
160
+ let mut buffer = vec![0u8; (w as usize) * (h as usize) * 3];
161
+ {
162
+ let backend = BitMapBackend::with_buffer(&mut buffer, (w, h));
163
+ let root = backend.into_drawing_area();
164
+ root.fill(&WHITE)?;
165
+
166
+ let mut chart = ChartBuilder::on(&root)
167
+ .margin(0)
168
+ .set_all_label_area_size(0)
169
+ .build_cartesian_2d(x_min..x_max, y_min..y_max)?;
170
+ chart
171
+ .configure_mesh()
172
+ .disable_x_mesh()
173
+ .disable_y_mesh()
174
+ .disable_axes()
175
+ .draw()?;
176
+
177
+ // Threshold guide lines are drawn by the SVG overlay on the client, not
178
+ // here — double-drawing them would add stray lines offset by axis padding.
179
+
180
+ // Resolve colors once. Up/down fall back to `color_sig` when absent.
181
+ let color_sig = rgb(&input.color_significant, (214, 39, 40));
182
+ let color_non = rgb(&input.color_nonsignificant, (0, 0, 0));
183
+ let resolve = |o: &Option<String>| o.as_deref().map(|s| rgb(s, (214, 39, 40))).unwrap_or(color_sig);
184
+ let color_up = resolve(&input.color_significant_up);
185
+ let color_down = resolve(&input.color_significant_down);
186
+
187
+ // Stroke-only rings at full opacity so each ring is the exact configured
188
+ // group color — matching the hue the SVG overlay uses.
189
+ let ring = |c: RGBColor| ShapeStyle {
190
+ color: c.into(),
191
+ filled: false,
192
+ stroke_width: 1,
193
+ };
194
+ let radius = input.dot_radius as i32;
195
+
196
+ // Draw non-significant first so significant rings overlay on top.
197
+ chart.draw_series(
198
+ points
199
+ .iter()
200
+ .filter(|p| !p.significant)
201
+ .map(|p| Circle::new((p.fc, p.y), radius, ring(color_non))),
202
+ )?;
203
+ chart.draw_series(points.iter().filter(|p| p.significant).map(|p| {
204
+ let c = if p.fc > 0.0 { color_up } else { color_down };
205
+ Circle::new((p.fc, p.y), radius, ring(c))
206
+ }))?;
207
+
208
+ root.present()?;
209
+ }
210
+
211
+ // Build the interactive `dots` list: threshold-passers sorted asc by the
212
+ // chosen p-value column, optionally capped at `max_interactive_dots`.
213
+ let mut sig_points: Vec<&Point> = points.iter().filter(|p| p.significant).collect();
214
+ sig_points.sort_by(|a, b| a.p.partial_cmp(&b.p).unwrap_or(std::cmp::Ordering::Equal));
215
+ let total_significant_rows = sig_points.len();
216
+ if let Some(cap) = input.max_interactive_dots {
217
+ sig_points.truncate(cap);
218
+ }
219
+ let dots: Vec<Value> = sig_points.iter().map(|p| input.rows[p.idx].clone()).collect();
220
+
221
+ let output = Output {
222
+ png: BASE64.encode(&encode_rgb_to_png(&buffer, w, h)?),
223
+ plot_extent: PlotExtent {
224
+ x_min,
225
+ x_max,
226
+ y_min,
227
+ y_max,
228
+ pixel_width: w,
229
+ pixel_height: h,
230
+ plot_left: 0,
231
+ plot_top: 0,
232
+ plot_right: w as i32,
233
+ plot_bottom: h as i32,
234
+ min_nonzero_p,
235
+ },
236
+ dots,
237
+ total_rows: input.rows.len(),
238
+ total_significant_rows,
239
+ };
240
+
241
+ println!("{}", serde_json::to_string(&output)?);
242
+ Ok(())
243
+ }
244
+
245
+ /// Convert a plotters RGB buffer (3 bytes/px) to a PNG via tiny-skia (4 bytes/px).
246
+ fn encode_rgb_to_png(rgb: &[u8], w: u32, h: u32) -> Result<Vec<u8>, Box<dyn Error>> {
247
+ let mut pixmap = tiny_skia::Pixmap::new(w, h).ok_or("failed to create pixmap")?;
248
+ for (src, dst) in rgb.chunks_exact(3).zip(pixmap.data_mut().chunks_exact_mut(4)) {
249
+ dst[..3].copy_from_slice(src);
250
+ dst[3] = 255;
251
+ }
252
+ Ok(pixmap.encode_png()?)
253
+ }