@sjcrh/proteinpaint-rust 2.189.0 → 2.191.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/Cargo.toml +0 -17
- package/package.json +1 -1
- package/src/aichatbot.rs +0 -1554
- package/src/manhattan_plot.rs +0 -725
- package/src/query_classification.rs +0 -152
- package/src/summary_agent.rs +0 -201
- package/src/test_ai.rs +0 -193
- package/src/volcano.rs +0 -354
package/src/manhattan_plot.rs
DELETED
|
@@ -1,725 +0,0 @@
|
|
|
1
|
-
use base64::{Engine as _, engine::general_purpose::STANDARD as BASE64};
|
|
2
|
-
use plotters::prelude::*;
|
|
3
|
-
use plotters::style::ShapeStyle;
|
|
4
|
-
use serde::{Deserialize, Serialize};
|
|
5
|
-
use serde_json;
|
|
6
|
-
use std::collections::{HashMap, HashSet};
|
|
7
|
-
use std::convert::TryInto;
|
|
8
|
-
use std::error::Error;
|
|
9
|
-
use std::fs::File;
|
|
10
|
-
use std::io::{self, BufRead, BufReader};
|
|
11
|
-
use tiny_skia::{FillRule, PathBuilder, Pixmap, Transform};
|
|
12
|
-
|
|
13
|
-
// Define the JSON input structure
|
|
14
|
-
#[derive(Deserialize, Debug)]
|
|
15
|
-
struct Input {
|
|
16
|
-
file: String,
|
|
17
|
-
#[serde(rename = "type")]
|
|
18
|
-
plot_type: String,
|
|
19
|
-
#[serde(rename = "chrSizes")]
|
|
20
|
-
chromosomelist: HashMap<String, u64>,
|
|
21
|
-
plot_width: u64,
|
|
22
|
-
plot_height: u64,
|
|
23
|
-
device_pixel_ratio: f64,
|
|
24
|
-
png_dot_radius: u64,
|
|
25
|
-
max_capped_points: u64,
|
|
26
|
-
hard_cap: f64,
|
|
27
|
-
bin_size: f64,
|
|
28
|
-
q_value_threshold: f64,
|
|
29
|
-
}
|
|
30
|
-
|
|
31
|
-
// chromosome info
|
|
32
|
-
#[derive(Serialize)]
|
|
33
|
-
struct ChromInfo {
|
|
34
|
-
start: u64,
|
|
35
|
-
size: u64,
|
|
36
|
-
center: u64,
|
|
37
|
-
}
|
|
38
|
-
|
|
39
|
-
#[derive(Serialize)]
|
|
40
|
-
struct PointDetail {
|
|
41
|
-
x: u64,
|
|
42
|
-
y: f64,
|
|
43
|
-
color: String,
|
|
44
|
-
r#type: String,
|
|
45
|
-
gene: String,
|
|
46
|
-
chrom: String,
|
|
47
|
-
start: u64,
|
|
48
|
-
end: u64,
|
|
49
|
-
pos: u64,
|
|
50
|
-
q_value: f64,
|
|
51
|
-
nsubj: Option<i64>,
|
|
52
|
-
pixel_x: f64,
|
|
53
|
-
pixel_y: f64,
|
|
54
|
-
}
|
|
55
|
-
|
|
56
|
-
#[derive(Serialize)]
|
|
57
|
-
struct InteractiveData {
|
|
58
|
-
points: Vec<PointDetail>,
|
|
59
|
-
chrom_data: HashMap<String, ChromInfo>,
|
|
60
|
-
total_genome_length: i64,
|
|
61
|
-
x_buffer: i64,
|
|
62
|
-
y_min: f64,
|
|
63
|
-
y_max: f64,
|
|
64
|
-
device_pixel_ratio: f64,
|
|
65
|
-
default_log_cutoff: f64,
|
|
66
|
-
has_capped_points: bool,
|
|
67
|
-
}
|
|
68
|
-
|
|
69
|
-
#[derive(Serialize)]
|
|
70
|
-
struct Output {
|
|
71
|
-
png: String,
|
|
72
|
-
plot_data: InteractiveData,
|
|
73
|
-
}
|
|
74
|
-
|
|
75
|
-
// Helper function to convert hex color to RGB
|
|
76
|
-
fn hex_to_rgb(hex: &str) -> Option<(u8, u8, u8)> {
|
|
77
|
-
let hex = hex.trim_start_matches('#');
|
|
78
|
-
if hex.len() != 6 {
|
|
79
|
-
return None;
|
|
80
|
-
}
|
|
81
|
-
let r = u8::from_str_radix(&hex[0..2], 16).ok()?;
|
|
82
|
-
let g = u8::from_str_radix(&hex[2..4], 16).ok()?;
|
|
83
|
-
let b = u8::from_str_radix(&hex[4..6], 16).ok()?;
|
|
84
|
-
Some((r, g, b))
|
|
85
|
-
}
|
|
86
|
-
|
|
87
|
-
// Helper function to calculate default log cutoff value from the data coming from GRIN2 file
|
|
88
|
-
// We just find the mean of the -log10 q-values that are below the hard cap and
|
|
89
|
-
// set it as the default log cutoff. If the mean is less than 40, we set it to 40.
|
|
90
|
-
// If it is too low it can cause an error in the setting up of the histogram bins in the dynamic y-cap calculation.
|
|
91
|
-
// The exclude_indices parameter allows us to skip placeholder values (e.g., 0.0 placeholders for zero q-values)
|
|
92
|
-
// that would otherwise contaminate the mean calculation.
|
|
93
|
-
fn get_log_cutoff(ys: &[f64], hard_cap: f64, exclude_indices: &HashSet<usize>) -> f64 {
|
|
94
|
-
let filtered: Vec<f64> = ys
|
|
95
|
-
.iter()
|
|
96
|
-
.enumerate()
|
|
97
|
-
.filter(|(i, &y)| y < hard_cap && !exclude_indices.contains(i))
|
|
98
|
-
.map(|(_, &y)| y)
|
|
99
|
-
.collect();
|
|
100
|
-
let count = filtered.len();
|
|
101
|
-
let sum: f64 = filtered.iter().sum();
|
|
102
|
-
|
|
103
|
-
// If all values are greater than or equal to hard_cap (or excluded), default to hard_cap
|
|
104
|
-
if filtered.is_empty() {
|
|
105
|
-
return hard_cap;
|
|
106
|
-
}
|
|
107
|
-
let mean = sum / count as f64;
|
|
108
|
-
|
|
109
|
-
mean.max(40.0)
|
|
110
|
-
}
|
|
111
|
-
|
|
112
|
-
/// Calculates a dynamic y-axis cap for Manhattan plots to handle outliers gracefully.
|
|
113
|
-
///
|
|
114
|
-
/// # Problem
|
|
115
|
-
/// Manhattan plots often have a few extreme outliers (very significant p-values) that
|
|
116
|
-
/// compress the visual range for the majority of points. This function finds an optimal
|
|
117
|
-
/// y-axis cap that:
|
|
118
|
-
/// - Shows most data at true scale
|
|
119
|
-
/// - Caps only a small number of extreme outliers
|
|
120
|
-
/// - Ensures visible outliers (below hard cap) render at their true positions
|
|
121
|
-
///
|
|
122
|
-
/// # Algorithm
|
|
123
|
-
/// 1. **No outliers**: If `max_y <= default_cap`, return `max_y` (no capping needed)
|
|
124
|
-
/// 2. **Histogram binning**: Partition the range `(default_cap, hard_cap]` into fixed-size bins
|
|
125
|
-
/// 3. **Walk up**: Starting from the lowest bin, find the first cap where at most
|
|
126
|
-
/// `max_capped_points` would be clamped
|
|
127
|
-
/// 4. **Preserve visible outliers**: Ensure the chosen cap is above the highest y-value
|
|
128
|
-
/// that falls below `hard_cap`, so those points render at their true positions
|
|
129
|
-
///
|
|
130
|
-
/// # Parameters
|
|
131
|
-
/// - `ys`: All y-values (-log10 q-values) in the plot
|
|
132
|
-
/// - `max_capped_points`: Maximum points allowed to be clamped to the cap (e.g., 5)
|
|
133
|
-
/// - `default_cap`: Starting threshold; points below this are never capped (e.g., whatever log_cutoff is calculated to be from get_log_cutoff)
|
|
134
|
-
/// - `hard_cap`: Absolute maximum y-axis value; points above are always clamped (e.g., 200)
|
|
135
|
-
/// - `bin_size`: Histogram bin width on -log10 scale (e.g., 10)
|
|
136
|
-
///
|
|
137
|
-
/// # Returns
|
|
138
|
-
/// The optimal y-axis cap, guaranteed to be in the range `[max_y.min(default_cap), hard_cap]`
|
|
139
|
-
///
|
|
140
|
-
/// # Example
|
|
141
|
-
/// With `default_cap=40`, `hard_cap=200`, `bin_size=10`, `max_capped_points=5`:
|
|
142
|
-
/// - If 7 points are above 40, with two at 83 and 183 and five at/above 200:
|
|
143
|
-
/// Returns 200, so the points at 83 and 183 display at their true positions while
|
|
144
|
-
/// the 5 extreme outliers are clamped to 200
|
|
145
|
-
fn calculate_dynamic_y_cap(
|
|
146
|
-
ys: &[f64],
|
|
147
|
-
max_capped_points: usize,
|
|
148
|
-
default_cap: f64,
|
|
149
|
-
hard_cap: f64,
|
|
150
|
-
bin_size: f64,
|
|
151
|
-
) -> f64 {
|
|
152
|
-
let mut num_bins = ((hard_cap - default_cap) / bin_size) as usize;
|
|
153
|
-
if num_bins == 0 {
|
|
154
|
-
// Have to make sure num_bins is positive to avoid issues with histogram later
|
|
155
|
-
num_bins = 1;
|
|
156
|
-
}
|
|
157
|
-
let mut histogram = vec![0usize; num_bins];
|
|
158
|
-
let mut max_y = f64::NEG_INFINITY;
|
|
159
|
-
let mut max_y_below_hard_cap = f64::NEG_INFINITY; // Track highest value that's not hard-capped
|
|
160
|
-
let mut points_above_default = 0usize;
|
|
161
|
-
|
|
162
|
-
// Single pass: find max and build histogram simultaneously
|
|
163
|
-
for &y in ys {
|
|
164
|
-
if y > max_y {
|
|
165
|
-
max_y = y;
|
|
166
|
-
}
|
|
167
|
-
if y > default_cap {
|
|
168
|
-
points_above_default += 1;
|
|
169
|
-
if y > hard_cap {
|
|
170
|
-
histogram[num_bins - 1] += 1;
|
|
171
|
-
} else {
|
|
172
|
-
// Track the max y that's at or below the hard cap
|
|
173
|
-
if y > max_y_below_hard_cap {
|
|
174
|
-
max_y_below_hard_cap = y;
|
|
175
|
-
}
|
|
176
|
-
let bin_idx = ((y - default_cap) / bin_size) as usize;
|
|
177
|
-
histogram[bin_idx] += 1;
|
|
178
|
-
}
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
// Case 1: No points exceed default cap - use actual max
|
|
183
|
-
if max_y <= default_cap {
|
|
184
|
-
return max_y;
|
|
185
|
-
}
|
|
186
|
-
|
|
187
|
-
// Walk up from default_cap to hard_cap
|
|
188
|
-
let mut points_above = points_above_default;
|
|
189
|
-
|
|
190
|
-
for (i, &count) in histogram.iter().enumerate() {
|
|
191
|
-
if points_above <= max_capped_points {
|
|
192
|
-
// Found acceptable number of capped points
|
|
193
|
-
let bin_upper_bound = default_cap + ((i + 1) as f64) * bin_size;
|
|
194
|
-
|
|
195
|
-
// The cap should be:
|
|
196
|
-
// 1. At least above max_y_below_hard_cap (so those points render at true position)
|
|
197
|
-
// 2. At most hard_cap
|
|
198
|
-
// 3. But if all outliers are at/above hard_cap, use the bin boundary
|
|
199
|
-
let cap = if max_y_below_hard_cap > bin_upper_bound {
|
|
200
|
-
// There's a visible outlier above this bin - extend cap to show it
|
|
201
|
-
(max_y_below_hard_cap + bin_size).min(hard_cap)
|
|
202
|
-
} else {
|
|
203
|
-
bin_upper_bound.min(hard_cap)
|
|
204
|
-
};
|
|
205
|
-
|
|
206
|
-
return cap;
|
|
207
|
-
}
|
|
208
|
-
points_above -= count;
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
// All points are hard-capped outliers
|
|
212
|
-
hard_cap
|
|
213
|
-
}
|
|
214
|
-
|
|
215
|
-
// Function to Build cumulative chromosome map
|
|
216
|
-
fn cumulative_chrom(
|
|
217
|
-
chrom_size: &HashMap<String, u64>,
|
|
218
|
-
) -> Result<(HashMap<String, ChromInfo>, u64, Vec<String>), Box<dyn Error>> {
|
|
219
|
-
let mut chrom_data: HashMap<String, ChromInfo> = HashMap::new();
|
|
220
|
-
let mut cumulative_pos: u64 = 0;
|
|
221
|
-
|
|
222
|
-
// Sort chromosomes
|
|
223
|
-
let mut sorted_chroms: Vec<String> = chrom_size.keys().cloned().collect();
|
|
224
|
-
sorted_chroms.sort_by_key(|chr| {
|
|
225
|
-
let s = chr.trim_start_matches("chr");
|
|
226
|
-
match s.parse::<u32>() {
|
|
227
|
-
Ok(n) => (0, n),
|
|
228
|
-
Err(_) => match s {
|
|
229
|
-
"X" => (1, 23),
|
|
230
|
-
"Y" => (1, 24),
|
|
231
|
-
"M" | "MT" => (1, 100),
|
|
232
|
-
_ => (2, u32::MAX),
|
|
233
|
-
},
|
|
234
|
-
}
|
|
235
|
-
});
|
|
236
|
-
|
|
237
|
-
for chrom in &sorted_chroms {
|
|
238
|
-
if let Some(&size) = chrom_size.get(chrom) {
|
|
239
|
-
chrom_data.insert(
|
|
240
|
-
chrom.clone(),
|
|
241
|
-
ChromInfo {
|
|
242
|
-
start: cumulative_pos,
|
|
243
|
-
size: size,
|
|
244
|
-
center: cumulative_pos + size / 2,
|
|
245
|
-
},
|
|
246
|
-
);
|
|
247
|
-
cumulative_pos += size;
|
|
248
|
-
}
|
|
249
|
-
}
|
|
250
|
-
Ok((chrom_data, cumulative_pos, sorted_chroms))
|
|
251
|
-
}
|
|
252
|
-
|
|
253
|
-
// Function to read the GRIN2 file
|
|
254
|
-
fn grin2_file_read(
|
|
255
|
-
grin2_file: &str,
|
|
256
|
-
chrom_data: &HashMap<String, ChromInfo>,
|
|
257
|
-
q_value_threshold: f64,
|
|
258
|
-
) -> Result<
|
|
259
|
-
(
|
|
260
|
-
Vec<u64>,
|
|
261
|
-
Vec<f64>,
|
|
262
|
-
Vec<String>,
|
|
263
|
-
Vec<PointDetail>,
|
|
264
|
-
Vec<usize>,
|
|
265
|
-
Vec<usize>,
|
|
266
|
-
),
|
|
267
|
-
Box<dyn Error>,
|
|
268
|
-
> {
|
|
269
|
-
// Default colours
|
|
270
|
-
let mut colors: HashMap<String, String> = HashMap::new();
|
|
271
|
-
colors.insert("gain".into(), "#FF4444".into());
|
|
272
|
-
colors.insert("loss".into(), "#4444FF".into());
|
|
273
|
-
colors.insert("mutation".into(), "#44AA44".into());
|
|
274
|
-
colors.insert("fusion".into(), "#FFA500".into());
|
|
275
|
-
colors.insert("sv".into(), "#9932CC".into());
|
|
276
|
-
|
|
277
|
-
let mut xs = Vec::new();
|
|
278
|
-
let mut ys = Vec::new();
|
|
279
|
-
let mut colors_vec = Vec::new();
|
|
280
|
-
let mut point_details = Vec::new();
|
|
281
|
-
let mut sig_indices: Vec<usize> = Vec::new();
|
|
282
|
-
let mut zero_q_indices: Vec<usize> = Vec::new();
|
|
283
|
-
|
|
284
|
-
let grin2_file = File::open(grin2_file).expect("Failed to open grin2_result_file");
|
|
285
|
-
let mut reader = BufReader::new(grin2_file);
|
|
286
|
-
// get the first line (header line)
|
|
287
|
-
let mut header_line = String::new();
|
|
288
|
-
reader
|
|
289
|
-
.read_line(&mut header_line)
|
|
290
|
-
.expect("Failed to read the first line of grin2_result_file");
|
|
291
|
-
let header: Vec<String> = header_line
|
|
292
|
-
.trim_end()
|
|
293
|
-
.split('\t')
|
|
294
|
-
.map(|s| s.trim().to_string())
|
|
295
|
-
.collect();
|
|
296
|
-
|
|
297
|
-
// define the mutation types from the header of grin2 result file
|
|
298
|
-
let mutation_types = ["gain", "loss", "mutation", "fusion", "sv"];
|
|
299
|
-
let mut mutation_indices: HashMap<&str, (usize, Option<usize>)> = HashMap::new();
|
|
300
|
-
for name in &mutation_types {
|
|
301
|
-
let q_col = format!("q.nsubj.{name}");
|
|
302
|
-
let n_col = format!("nsubj.{name}");
|
|
303
|
-
if let Some(q_idx) = header.iter().position(|h| h == &q_col) {
|
|
304
|
-
let n_idx = header.iter().position(|h| h == &n_col);
|
|
305
|
-
mutation_indices.insert(*name, (q_idx, n_idx));
|
|
306
|
-
}
|
|
307
|
-
}
|
|
308
|
-
|
|
309
|
-
// extract the index for each required info
|
|
310
|
-
let chrom_idx = header
|
|
311
|
-
.iter()
|
|
312
|
-
.position(|h| h == "chrom")
|
|
313
|
-
.expect("Missing 'chrom' column");
|
|
314
|
-
let gene_idx = header.iter().position(|h| h == "gene").expect("Missing 'gene' column");
|
|
315
|
-
let loc_start_idx = header
|
|
316
|
-
.iter()
|
|
317
|
-
.position(|h| h == "loc.start")
|
|
318
|
-
.expect("Missing 'loc.start' column");
|
|
319
|
-
let loc_end_idx = header
|
|
320
|
-
.iter()
|
|
321
|
-
.position(|h| h == "loc.end")
|
|
322
|
-
.expect("Missing 'loc.end' column");
|
|
323
|
-
|
|
324
|
-
// loop all lines
|
|
325
|
-
let mut mut_num: usize = 0;
|
|
326
|
-
for line_result in reader.lines() {
|
|
327
|
-
let line = match line_result {
|
|
328
|
-
Ok(l) => l,
|
|
329
|
-
Err(e) => {
|
|
330
|
-
eprintln!("Error reading line: {}", e);
|
|
331
|
-
continue;
|
|
332
|
-
}
|
|
333
|
-
};
|
|
334
|
-
|
|
335
|
-
let fields: Vec<&str> = line.trim_end().split('\t').collect();
|
|
336
|
-
let chrom = match fields.get(chrom_idx).map(|s| s.trim()) {
|
|
337
|
-
Some(s) if !s.is_empty() => s,
|
|
338
|
-
_ => continue,
|
|
339
|
-
};
|
|
340
|
-
let chrom_info = match chrom_data.get(chrom) {
|
|
341
|
-
Some(info) => info,
|
|
342
|
-
None => continue,
|
|
343
|
-
};
|
|
344
|
-
let gene_name = fields.get(gene_idx).unwrap_or(&"").to_string();
|
|
345
|
-
let loc_start_str = match fields.get(loc_start_idx).map(|s| s.trim()) {
|
|
346
|
-
Some(s) if !s.is_empty() => s,
|
|
347
|
-
_ => continue,
|
|
348
|
-
};
|
|
349
|
-
let gene_start: u64 = loc_start_str
|
|
350
|
-
.parse()
|
|
351
|
-
.unwrap_or_else(|_| panic!("Invalid integer for loc.start: '{}' in line: {}", loc_start_str, line));
|
|
352
|
-
let loc_end_str = match fields.get(loc_end_idx).map(|s| s.trim()) {
|
|
353
|
-
Some(s) if !s.is_empty() => s,
|
|
354
|
-
_ => continue,
|
|
355
|
-
};
|
|
356
|
-
let gene_end: u64 = loc_end_str
|
|
357
|
-
.parse()
|
|
358
|
-
.unwrap_or_else(|_| panic!("Invalid integer for loc.end: '{}' in line: {}", loc_end_str, line));
|
|
359
|
-
let x_pos = chrom_info.start + gene_start as u64;
|
|
360
|
-
|
|
361
|
-
for (mtype, (q_idx, n_idx_opt)) in &mutation_indices {
|
|
362
|
-
let q_val_str = match fields.get(*q_idx) {
|
|
363
|
-
Some(q) => q,
|
|
364
|
-
None => continue,
|
|
365
|
-
};
|
|
366
|
-
let original_q_val: f64 = match q_val_str.parse() {
|
|
367
|
-
Ok(v) if v >= 0.0 => v,
|
|
368
|
-
_ => continue,
|
|
369
|
-
};
|
|
370
|
-
|
|
371
|
-
// Use a placeholder for zero q-values - these will be updated later
|
|
372
|
-
// after we calculate the dynamic y_cap from the full dataset
|
|
373
|
-
let neg_log10_q = if original_q_val == 0.0 {
|
|
374
|
-
zero_q_indices.push(mut_num);
|
|
375
|
-
0.0 // Placeholder - will be set to y_cap later in plot_grin2_manhattan
|
|
376
|
-
} else {
|
|
377
|
-
-original_q_val.log10()
|
|
378
|
-
};
|
|
379
|
-
|
|
380
|
-
let n_subj_count: Option<i64> = n_idx_opt
|
|
381
|
-
.and_then(|i| fields.get(i))
|
|
382
|
-
.and_then(|s| s.parse::<i64>().ok());
|
|
383
|
-
let color = colors.get(*mtype).unwrap_or(&"#888888".to_string()).clone();
|
|
384
|
-
// Add to plotting vectors
|
|
385
|
-
xs.push(x_pos);
|
|
386
|
-
ys.push(neg_log10_q);
|
|
387
|
-
colors_vec.push(color.clone());
|
|
388
|
-
|
|
389
|
-
// only add significant points for interactivity
|
|
390
|
-
// We check against the original q-value here so we send back the correct values instead of the 1e-300 used for log transform
|
|
391
|
-
if original_q_val <= q_value_threshold {
|
|
392
|
-
point_details.push(PointDetail {
|
|
393
|
-
x: x_pos,
|
|
394
|
-
y: neg_log10_q,
|
|
395
|
-
color,
|
|
396
|
-
r#type: mtype.to_string(),
|
|
397
|
-
gene: gene_name.clone(),
|
|
398
|
-
chrom: chrom.to_string(),
|
|
399
|
-
start: gene_start,
|
|
400
|
-
end: gene_end,
|
|
401
|
-
pos: gene_start,
|
|
402
|
-
q_value: original_q_val,
|
|
403
|
-
nsubj: n_subj_count,
|
|
404
|
-
pixel_x: 0.0,
|
|
405
|
-
pixel_y: 0.0,
|
|
406
|
-
});
|
|
407
|
-
sig_indices.push(mut_num);
|
|
408
|
-
};
|
|
409
|
-
mut_num += 1;
|
|
410
|
-
}
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
Ok((xs, ys, colors_vec, point_details, sig_indices, zero_q_indices))
|
|
414
|
-
}
|
|
415
|
-
|
|
416
|
-
// Function to create the GRIN2 Manhattan plot
|
|
417
|
-
fn plot_grin2_manhattan(
|
|
418
|
-
grin2_result_file: String,
|
|
419
|
-
chrom_size: HashMap<String, u64>,
|
|
420
|
-
plot_width: u64,
|
|
421
|
-
plot_height: u64,
|
|
422
|
-
device_pixel_ratio: f64,
|
|
423
|
-
png_dot_radius: u64,
|
|
424
|
-
bin_size: f64,
|
|
425
|
-
max_capped_points: u64,
|
|
426
|
-
hard_cap: f64,
|
|
427
|
-
q_value_threshold: f64,
|
|
428
|
-
) -> Result<(String, InteractiveData), Box<dyn Error>> {
|
|
429
|
-
// ------------------------------------------------
|
|
430
|
-
// 1. Build cumulative chromosome map
|
|
431
|
-
// ------------------------------------------------
|
|
432
|
-
|
|
433
|
-
let mut chrom_data: HashMap<String, ChromInfo> = HashMap::new();
|
|
434
|
-
let mut cumulative_pos: u64 = 0;
|
|
435
|
-
let mut sorted_chroms: Vec<String> = Vec::new();
|
|
436
|
-
|
|
437
|
-
if let Ok((chr_data, cum_pos, chrom_sort)) = cumulative_chrom(&chrom_size) {
|
|
438
|
-
chrom_data = chr_data;
|
|
439
|
-
cumulative_pos = cum_pos;
|
|
440
|
-
sorted_chroms = chrom_sort;
|
|
441
|
-
};
|
|
442
|
-
let total_genome_length: i64 = cumulative_pos.try_into().unwrap();
|
|
443
|
-
let x_buffer = (total_genome_length as f64 * 0.005) as i64; // 0.5 % buffer
|
|
444
|
-
|
|
445
|
-
// ------------------------------------------------
|
|
446
|
-
// 2. Read file & collect points
|
|
447
|
-
// ------------------------------------------------
|
|
448
|
-
|
|
449
|
-
// Declare all data
|
|
450
|
-
let mut xs = Vec::new();
|
|
451
|
-
let mut ys = Vec::new();
|
|
452
|
-
let mut colors_vec = Vec::new();
|
|
453
|
-
let mut point_details = Vec::new();
|
|
454
|
-
let mut sig_indices = Vec::new();
|
|
455
|
-
let mut zero_q_indices: Vec<usize> = Vec::new();
|
|
456
|
-
|
|
457
|
-
if let Ok((x, y, c, pd, si, zq)) = grin2_file_read(&grin2_result_file, &chrom_data, q_value_threshold) {
|
|
458
|
-
xs = x;
|
|
459
|
-
ys = y;
|
|
460
|
-
colors_vec = c;
|
|
461
|
-
point_details = pd;
|
|
462
|
-
sig_indices = si;
|
|
463
|
-
zero_q_indices = zq;
|
|
464
|
-
}
|
|
465
|
-
|
|
466
|
-
// ------------------------------------------------
|
|
467
|
-
// 3. Calculate log_cutoff from data and update zero q-values
|
|
468
|
-
// ------------------------------------------------
|
|
469
|
-
// Convert zero_q_indices to HashSet for O(1) lookup when excluding placeholders
|
|
470
|
-
let zero_q_set: HashSet<usize> = zero_q_indices.iter().cloned().collect();
|
|
471
|
-
let log_cutoff = get_log_cutoff(&ys, hard_cap, &zero_q_set);
|
|
472
|
-
|
|
473
|
-
// ------------------------------------------------
|
|
474
|
-
// 4. Y-axis capping with dynamic cap
|
|
475
|
-
// ------------------------------------------------
|
|
476
|
-
let y_padding = png_dot_radius as f64;
|
|
477
|
-
let y_min = 0.0 - y_padding;
|
|
478
|
-
|
|
479
|
-
// Dynamic y-cap calculation:
|
|
480
|
-
// - log_cutoff: the baseline cap (calculated from data mean)
|
|
481
|
-
// - max_capped_points: maximum number of points allowed above cap before raising it
|
|
482
|
-
// - hard_cap: absolute maximum cap regardless of data distribution
|
|
483
|
-
// - bin_size: size of bins for histogram approach
|
|
484
|
-
let max_capped_points = max_capped_points as usize;
|
|
485
|
-
|
|
486
|
-
let y_cap = calculate_dynamic_y_cap(&ys, max_capped_points, log_cutoff, hard_cap, bin_size);
|
|
487
|
-
|
|
488
|
-
let (y_max, has_capped_points) = if !ys.is_empty() {
|
|
489
|
-
let max_y = ys.iter().cloned().fold(f64::MIN, f64::max);
|
|
490
|
-
|
|
491
|
-
// has_capped_points is true if any points exceed the default cap (log_cutoff)
|
|
492
|
-
let has_capped = max_y > log_cutoff;
|
|
493
|
-
|
|
494
|
-
// Set q=0 points (currently placeholders at 0.0) to y_cap so they appear at the top
|
|
495
|
-
for &idx in &zero_q_indices {
|
|
496
|
-
ys[idx] = y_cap;
|
|
497
|
-
}
|
|
498
|
-
for p in point_details.iter_mut() {
|
|
499
|
-
if p.q_value == 0.0 {
|
|
500
|
-
p.y = y_cap;
|
|
501
|
-
}
|
|
502
|
-
}
|
|
503
|
-
|
|
504
|
-
if max_y > y_cap {
|
|
505
|
-
// Clamp values above the cap
|
|
506
|
-
for y in ys.iter_mut() {
|
|
507
|
-
if *y > y_cap {
|
|
508
|
-
*y = y_cap;
|
|
509
|
-
}
|
|
510
|
-
}
|
|
511
|
-
for p in point_details.iter_mut() {
|
|
512
|
-
if p.y > y_cap {
|
|
513
|
-
p.y = y_cap;
|
|
514
|
-
}
|
|
515
|
-
}
|
|
516
|
-
(y_cap + 0.35 + y_padding, has_capped)
|
|
517
|
-
} else {
|
|
518
|
-
(max_y + 0.35 + y_padding, has_capped)
|
|
519
|
-
}
|
|
520
|
-
} else {
|
|
521
|
-
(1.0 + y_padding, false)
|
|
522
|
-
};
|
|
523
|
-
|
|
524
|
-
// ------------------------------------------------
|
|
525
|
-
// 4. Setup high-DPR bitmap dimensions
|
|
526
|
-
// ------------------------------------------------
|
|
527
|
-
|
|
528
|
-
let dpr = device_pixel_ratio.max(1.0);
|
|
529
|
-
|
|
530
|
-
let png_width = plot_width + 2 * png_dot_radius;
|
|
531
|
-
let png_height = plot_height + 2 * png_dot_radius;
|
|
532
|
-
|
|
533
|
-
let w: u32 = ((png_width as f64) * dpr) as u32;
|
|
534
|
-
let h: u32 = ((png_height as f64) * dpr) as u32;
|
|
535
|
-
|
|
536
|
-
// Create RGB buffer for Plotters
|
|
537
|
-
let mut buffer = vec![0u8; w as usize * h as usize * 3];
|
|
538
|
-
|
|
539
|
-
// Make Plotters backend that draws into the RGB buffer (scale-aware)
|
|
540
|
-
|
|
541
|
-
let mut pixel_positions: Vec<(f64, f64)> = Vec::with_capacity(xs.len());
|
|
542
|
-
{
|
|
543
|
-
let backend = BitMapBackend::with_buffer(&mut buffer, (w, h));
|
|
544
|
-
let root = backend.into_drawing_area();
|
|
545
|
-
root.fill(&WHITE)?;
|
|
546
|
-
|
|
547
|
-
// ------------------------------------------------
|
|
548
|
-
// 5. Build the chart (no axes, no margins)
|
|
549
|
-
// ------------------------------------------------
|
|
550
|
-
let mut chart = ChartBuilder::on(&root)
|
|
551
|
-
.margin(0)
|
|
552
|
-
.set_all_label_area_size(0)
|
|
553
|
-
.build_cartesian_2d((-x_buffer)..(total_genome_length + x_buffer), y_min..y_max)?;
|
|
554
|
-
|
|
555
|
-
chart
|
|
556
|
-
.configure_mesh()
|
|
557
|
-
.disable_x_mesh()
|
|
558
|
-
.disable_y_mesh()
|
|
559
|
-
.disable_axes()
|
|
560
|
-
.draw()?;
|
|
561
|
-
|
|
562
|
-
// ------------------------------------------------
|
|
563
|
-
// 6. Alternating chromosome backgrounds
|
|
564
|
-
// ------------------------------------------------
|
|
565
|
-
for (i, chrom) in sorted_chroms.iter().enumerate() {
|
|
566
|
-
if let Some(info) = chrom_data.get(chrom) {
|
|
567
|
-
let bg = if i % 2 == 0 { WHITE } else { RGBColor(211, 211, 211) };
|
|
568
|
-
let fill_style: ShapeStyle = bg.mix(0.5).filled();
|
|
569
|
-
let rect = Rectangle::new(
|
|
570
|
-
[
|
|
571
|
-
(info.start as i64, (y_min + y_padding)),
|
|
572
|
-
((info.start + info.size) as i64, (y_max - y_padding)),
|
|
573
|
-
],
|
|
574
|
-
fill_style,
|
|
575
|
-
);
|
|
576
|
-
chart.draw_series(vec![rect])?;
|
|
577
|
-
}
|
|
578
|
-
}
|
|
579
|
-
|
|
580
|
-
// ------------------------------------------------
|
|
581
|
-
// 7. Capture high-DPR pixel mapping for the points
|
|
582
|
-
// we do not draw the points with plotters (will use tiny-skia for AA)
|
|
583
|
-
// but use charts.backend_coord to map data->pixel in the high-DPR backend
|
|
584
|
-
// ------------------------------------------------
|
|
585
|
-
|
|
586
|
-
if !xs.is_empty() {
|
|
587
|
-
for (x, y) in xs.iter().zip(ys.iter()) {
|
|
588
|
-
// convert data coords -> high-DPR pixel coords
|
|
589
|
-
let (px, py) = chart.backend_coord(&(*x as i64, *y));
|
|
590
|
-
pixel_positions.push((px as f64, py as f64));
|
|
591
|
-
}
|
|
592
|
-
};
|
|
593
|
-
|
|
594
|
-
for (i, p) in point_details.iter_mut().enumerate() {
|
|
595
|
-
let (px, py) = pixel_positions[*&sig_indices[i]];
|
|
596
|
-
p.pixel_x = px / dpr;
|
|
597
|
-
p.pixel_y = py / dpr;
|
|
598
|
-
}
|
|
599
|
-
|
|
600
|
-
// flush root drawing area
|
|
601
|
-
root.present()?;
|
|
602
|
-
}
|
|
603
|
-
|
|
604
|
-
// Convert Plotters RGB buffer into tiny-skia RGBA pixmap
|
|
605
|
-
let mut pixmap = Pixmap::new(w, h).ok_or("Failed to create pixmap")?;
|
|
606
|
-
{
|
|
607
|
-
let data = pixmap.data_mut();
|
|
608
|
-
let mut src_i = 0usize;
|
|
609
|
-
let mut dst_i = 0usize;
|
|
610
|
-
for _ in 0..(w as usize * h as usize) {
|
|
611
|
-
let r = buffer[src_i];
|
|
612
|
-
let g = buffer[src_i + 1];
|
|
613
|
-
let b = buffer[src_i + 2];
|
|
614
|
-
data[dst_i] = r;
|
|
615
|
-
data[dst_i + 1] = g;
|
|
616
|
-
data[dst_i + 2] = b;
|
|
617
|
-
data[dst_i + 3] = 255u8; // opaque alpha
|
|
618
|
-
src_i += 3;
|
|
619
|
-
dst_i += 4;
|
|
620
|
-
}
|
|
621
|
-
}
|
|
622
|
-
|
|
623
|
-
// Draw anti-aliased circles using tiny-skia into the pixmap
|
|
624
|
-
// radius in HIGH-DPR pixels:
|
|
625
|
-
let radius_high_dpr = (png_dot_radius as f32) * (dpr as f32);
|
|
626
|
-
|
|
627
|
-
// Paint template
|
|
628
|
-
let mut paint = tiny_skia::Paint::default();
|
|
629
|
-
|
|
630
|
-
// for perfomance: reuse a PathBuilder to create circles
|
|
631
|
-
// will create a small path per point
|
|
632
|
-
for i in 0..xs.len() {
|
|
633
|
-
let (px, py) = pixel_positions[i]; // pixel coordinates for this point
|
|
634
|
-
let color_hex = &colors_vec[i];
|
|
635
|
-
|
|
636
|
-
let (r_u8, g_u8, b_u8) = match hex_to_rgb(color_hex) {
|
|
637
|
-
Some(rgb) => rgb,
|
|
638
|
-
None => (136u8, 136u8, 136u8),
|
|
639
|
-
};
|
|
640
|
-
paint.set_color_rgba8(r_u8, g_u8, b_u8, 255u8);
|
|
641
|
-
let mut pb = PathBuilder::new();
|
|
642
|
-
pb.push_circle(px as f32, py as f32, radius_high_dpr);
|
|
643
|
-
|
|
644
|
-
if let Some(path) = pb.finish() {
|
|
645
|
-
pixmap.fill_path(&path, &paint, FillRule::Winding, Transform::identity(), None);
|
|
646
|
-
};
|
|
647
|
-
}
|
|
648
|
-
|
|
649
|
-
// encode pixmap to PNG bytes
|
|
650
|
-
let png_bytes = pixmap.encode_png()?;
|
|
651
|
-
let png_data = BASE64.encode(&png_bytes);
|
|
652
|
-
|
|
653
|
-
// ------------------------------------------------
|
|
654
|
-
// 8. Generate interactive data
|
|
655
|
-
// ------------------------------------------------
|
|
656
|
-
let interactive_data = InteractiveData {
|
|
657
|
-
points: point_details,
|
|
658
|
-
chrom_data,
|
|
659
|
-
total_genome_length,
|
|
660
|
-
x_buffer,
|
|
661
|
-
y_min,
|
|
662
|
-
y_max,
|
|
663
|
-
device_pixel_ratio: dpr,
|
|
664
|
-
default_log_cutoff: log_cutoff,
|
|
665
|
-
has_capped_points,
|
|
666
|
-
};
|
|
667
|
-
Ok((png_data, interactive_data))
|
|
668
|
-
}
|
|
669
|
-
|
|
670
|
-
fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
671
|
-
let mut input = String::new();
|
|
672
|
-
match io::stdin().read_line(&mut input) {
|
|
673
|
-
Ok(_bytes_read) => {
|
|
674
|
-
let input_json: Input = match serde_json::from_str(&input) {
|
|
675
|
-
Ok(json) => json,
|
|
676
|
-
Err(_err) => {
|
|
677
|
-
panic!("Invalid JSON input");
|
|
678
|
-
}
|
|
679
|
-
};
|
|
680
|
-
|
|
681
|
-
// input data type
|
|
682
|
-
// *** might need to change the key later
|
|
683
|
-
let input_data = &input_json.plot_type;
|
|
684
|
-
|
|
685
|
-
if input_data == "grin2" {
|
|
686
|
-
let grin2_file = &input_json.file;
|
|
687
|
-
let chrom_size = &input_json.chromosomelist;
|
|
688
|
-
let plot_width = &input_json.plot_width;
|
|
689
|
-
let plot_height = &input_json.plot_height;
|
|
690
|
-
let device_pixel_ratio = &input_json.device_pixel_ratio;
|
|
691
|
-
let png_dot_radius = &input_json.png_dot_radius;
|
|
692
|
-
let max_capped_points = &input_json.max_capped_points;
|
|
693
|
-
let hard_cap = &input_json.hard_cap;
|
|
694
|
-
let bin_size = &input_json.bin_size;
|
|
695
|
-
let q_value_threshold = &input_json.q_value_threshold;
|
|
696
|
-
if let Ok((base64_string, plot_data)) = plot_grin2_manhattan(
|
|
697
|
-
grin2_file.clone(),
|
|
698
|
-
chrom_size.clone(),
|
|
699
|
-
plot_width.clone(),
|
|
700
|
-
plot_height.clone(),
|
|
701
|
-
device_pixel_ratio.clone(),
|
|
702
|
-
png_dot_radius.clone(),
|
|
703
|
-
bin_size.clone(),
|
|
704
|
-
max_capped_points.clone(),
|
|
705
|
-
hard_cap.clone(),
|
|
706
|
-
q_value_threshold.clone(),
|
|
707
|
-
) {
|
|
708
|
-
let output = Output {
|
|
709
|
-
png: base64_string,
|
|
710
|
-
plot_data,
|
|
711
|
-
};
|
|
712
|
-
if let Ok(json) = serde_json::to_string(&output) {
|
|
713
|
-
println!("{}", json);
|
|
714
|
-
}
|
|
715
|
-
} else {
|
|
716
|
-
eprintln!("Failed to generate Manhattan plot");
|
|
717
|
-
};
|
|
718
|
-
}
|
|
719
|
-
}
|
|
720
|
-
Err(_err) => {
|
|
721
|
-
panic!("Error reading input JSON!");
|
|
722
|
-
}
|
|
723
|
-
}
|
|
724
|
-
Ok(())
|
|
725
|
-
}
|