@sjcrh/proteinpaint-rust 2.130.0 → 2.132.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/gdcGRIN2.rs +103 -49
package/package.json
CHANGED
package/src/gdcGRIN2.rs
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
Output mutations as JSON array.
|
|
14
14
|
|
|
15
15
|
Example of usage:
|
|
16
|
-
echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000}}' | ./target/release/gdcGRIN2
|
|
16
|
+
echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20,"hyperMutator":1000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000}}' | ./target/release/gdcGRIN2
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
19
|
use flate2::read::GzDecoder;
|
|
@@ -55,6 +55,8 @@ struct MafOptions {
|
|
|
55
55
|
min_total_depth: i32,
|
|
56
56
|
#[serde(rename = "minAltAlleleCount")]
|
|
57
57
|
min_alt_allele_count: i32,
|
|
58
|
+
#[serde(rename = "hyperMutator")]
|
|
59
|
+
hyper_mutator: i32,
|
|
58
60
|
consequences: Option<Vec<String>>, // Optional list of consequences to filter MAF files
|
|
59
61
|
}
|
|
60
62
|
|
|
@@ -82,7 +84,8 @@ struct SuccessfulFileOutput {
|
|
|
82
84
|
// struct for MAF filter details
|
|
83
85
|
#[derive(Clone, Serialize, Default)]
|
|
84
86
|
struct FilteredMafDetails {
|
|
85
|
-
|
|
87
|
+
matched_consequences: HashMap<String, usize>,
|
|
88
|
+
rejected_consequences: HashMap<String, usize>,
|
|
86
89
|
t_alt_count: usize,
|
|
87
90
|
t_depth: usize,
|
|
88
91
|
invalid_rows: usize,
|
|
@@ -115,7 +118,10 @@ struct FinalSummary {
|
|
|
115
118
|
filtered_records: usize,
|
|
116
119
|
filtered_maf_records: usize,
|
|
117
120
|
filtered_cnv_records: usize,
|
|
121
|
+
included_maf_records: usize,
|
|
122
|
+
included_cnv_records: usize,
|
|
118
123
|
filtered_records_by_case: HashMap<String, FilteredCaseDetails>,
|
|
124
|
+
hyper_mutator_records: Vec<String>,
|
|
119
125
|
}
|
|
120
126
|
|
|
121
127
|
// Define the top-level input structure
|
|
@@ -143,6 +149,7 @@ async fn parse_content(
|
|
|
143
149
|
data_type: &str,
|
|
144
150
|
min_total_depth: i32,
|
|
145
151
|
min_alt_allele_count: i32,
|
|
152
|
+
hyper_mutator: i32,
|
|
146
153
|
consequences: &Option<Vec<String>>,
|
|
147
154
|
gain_threshold: f32,
|
|
148
155
|
loss_threshold: f32,
|
|
@@ -150,6 +157,9 @@ async fn parse_content(
|
|
|
150
157
|
filtered_records: &Arc<Mutex<HashMap<String, FilteredCaseDetails>>>,
|
|
151
158
|
filtered_maf_records: &AtomicUsize,
|
|
152
159
|
filtered_cnv_records: &AtomicUsize,
|
|
160
|
+
included_maf_records: &AtomicUsize,
|
|
161
|
+
included_cnv_records: &AtomicUsize,
|
|
162
|
+
hyper_mutator_records: &Arc<Mutex<Vec<String>>>,
|
|
153
163
|
) -> Result<Vec<Vec<String>>, (String, String, String)> {
|
|
154
164
|
let config = match data_type {
|
|
155
165
|
"cnv" => DataTypeConfig {
|
|
@@ -169,6 +179,18 @@ async fn parse_content(
|
|
|
169
179
|
}
|
|
170
180
|
};
|
|
171
181
|
|
|
182
|
+
// check hyperMutator for MAF files
|
|
183
|
+
if data_type == "maf" && hyper_mutator > 0 {
|
|
184
|
+
let line_count = content.lines().count();
|
|
185
|
+
if line_count as i32 > hyper_mutator {
|
|
186
|
+
let mut hyper_records = hyper_mutator_records.lock().await;
|
|
187
|
+
if !hyper_records.contains(&case_id.to_string()) {
|
|
188
|
+
hyper_records.push(case_id.to_string());
|
|
189
|
+
}
|
|
190
|
+
return Ok(Vec::new());
|
|
191
|
+
}
|
|
192
|
+
};
|
|
193
|
+
|
|
172
194
|
let lines = content.lines();
|
|
173
195
|
let mut parsed_data = Vec::new();
|
|
174
196
|
let mut columns_indices: Vec<usize> = Vec::new();
|
|
@@ -213,6 +235,8 @@ async fn parse_content(
|
|
|
213
235
|
filtered_records,
|
|
214
236
|
filtered_maf_records,
|
|
215
237
|
filtered_cnv_records,
|
|
238
|
+
included_maf_records,
|
|
239
|
+
included_cnv_records,
|
|
216
240
|
)
|
|
217
241
|
.await?;
|
|
218
242
|
|
|
@@ -255,7 +279,7 @@ fn setup_columns(
|
|
|
255
279
|
}
|
|
256
280
|
|
|
257
281
|
if data_type == "maf" {
|
|
258
|
-
*variant_classification_index = header.iter().position(|x| x == "
|
|
282
|
+
*variant_classification_index = header.iter().position(|x| x == "One_Consequence");
|
|
259
283
|
if variant_classification_index.is_none() {
|
|
260
284
|
return Err((
|
|
261
285
|
case_id.to_string(),
|
|
@@ -285,6 +309,8 @@ async fn process_row(
|
|
|
285
309
|
filtered_records: &Arc<Mutex<HashMap<String, FilteredCaseDetails>>>,
|
|
286
310
|
filtered_maf_records: &AtomicUsize,
|
|
287
311
|
filtered_cnv_records: &AtomicUsize,
|
|
312
|
+
included_maf_records: &AtomicUsize,
|
|
313
|
+
included_cnv_records: &AtomicUsize,
|
|
288
314
|
) -> Result<Option<Vec<String>>, (String, String, String)> {
|
|
289
315
|
let cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
|
|
290
316
|
let mut out_lst = vec![case_id.to_string()];
|
|
@@ -300,11 +326,56 @@ async fn process_row(
|
|
|
300
326
|
|
|
301
327
|
let case_details = filtered_map.get_mut(case_id).unwrap();
|
|
302
328
|
|
|
303
|
-
//
|
|
304
|
-
if data_type == "maf"
|
|
305
|
-
|
|
306
|
-
|
|
307
|
-
|
|
329
|
+
// Handle consequence filtering and counting for MAF files
|
|
330
|
+
if data_type == "maf" {
|
|
331
|
+
if let Some(var_class_idx) = variant_classification_index {
|
|
332
|
+
if var_class_idx < cont_lst.len() {
|
|
333
|
+
let variant_classification = &cont_lst[var_class_idx];
|
|
334
|
+
if let Some(consequence_filter) = consequences {
|
|
335
|
+
if !consequence_filter.is_empty() {
|
|
336
|
+
if consequence_filter.contains(variant_classification) {
|
|
337
|
+
// Matched consequence
|
|
338
|
+
*case_details
|
|
339
|
+
.maf
|
|
340
|
+
.matched_consequences
|
|
341
|
+
.entry(variant_classification.to_string())
|
|
342
|
+
.or_insert(0) += 1;
|
|
343
|
+
} else {
|
|
344
|
+
// Unmatched consequence
|
|
345
|
+
*case_details
|
|
346
|
+
.maf
|
|
347
|
+
.rejected_consequences
|
|
348
|
+
.entry(variant_classification.to_string())
|
|
349
|
+
.or_insert(0) += 1;
|
|
350
|
+
filtered_maf_records.fetch_add(1, Ordering::Relaxed);
|
|
351
|
+
return Ok(None);
|
|
352
|
+
}
|
|
353
|
+
} else {
|
|
354
|
+
// Empty filter, count as matched
|
|
355
|
+
*case_details
|
|
356
|
+
.maf
|
|
357
|
+
.matched_consequences
|
|
358
|
+
.entry(variant_classification.to_string())
|
|
359
|
+
.or_insert(0) += 1;
|
|
360
|
+
}
|
|
361
|
+
} else {
|
|
362
|
+
// No filter, count as matched
|
|
363
|
+
*case_details
|
|
364
|
+
.maf
|
|
365
|
+
.matched_consequences
|
|
366
|
+
.entry(variant_classification.to_string())
|
|
367
|
+
.or_insert(0) += 1;
|
|
368
|
+
}
|
|
369
|
+
} else {
|
|
370
|
+
case_details.maf.invalid_rows += 1;
|
|
371
|
+
filtered_maf_records.fetch_add(1, Ordering::Relaxed);
|
|
372
|
+
return Ok(None);
|
|
373
|
+
}
|
|
374
|
+
} else {
|
|
375
|
+
case_details.maf.invalid_rows += 1;
|
|
376
|
+
filtered_maf_records.fetch_add(1, Ordering::Relaxed);
|
|
377
|
+
return Ok(None);
|
|
378
|
+
}
|
|
308
379
|
}
|
|
309
380
|
|
|
310
381
|
// Extract relevant columns
|
|
@@ -374,6 +445,9 @@ async fn process_row(
|
|
|
374
445
|
// Keep case_id, chr, start, end, and add "mutation"
|
|
375
446
|
out_lst = out_lst[0..4].to_vec();
|
|
376
447
|
out_lst.push("mutation".to_string());
|
|
448
|
+
|
|
449
|
+
// Update counters for included MAF records
|
|
450
|
+
included_maf_records.fetch_add(1, Ordering::Relaxed);
|
|
377
451
|
}
|
|
378
452
|
|
|
379
453
|
// filter cnvs based on segment length. Default: 0 (no filtering)
|
|
@@ -404,33 +478,12 @@ async fn process_row(
|
|
|
404
478
|
filtered_cnv_records.fetch_add(1, Ordering::Relaxed);
|
|
405
479
|
return Ok(None);
|
|
406
480
|
}
|
|
481
|
+
included_cnv_records.fetch_add(1, Ordering::Relaxed);
|
|
407
482
|
}
|
|
408
483
|
|
|
409
484
|
Ok(Some(out_lst))
|
|
410
485
|
}
|
|
411
486
|
|
|
412
|
-
// Check if the row meets consequence filtering criteria
|
|
413
|
-
fn is_valid_consequence(
|
|
414
|
-
cont_lst: &[String],
|
|
415
|
-
variant_classification_index: Option<usize>,
|
|
416
|
-
consequences: &Option<Vec<String>>,
|
|
417
|
-
) -> bool {
|
|
418
|
-
if let Some(consequence_filter) = consequences {
|
|
419
|
-
if !consequence_filter.is_empty() {
|
|
420
|
-
if let Some(var_class_idx) = variant_classification_index {
|
|
421
|
-
if var_class_idx < cont_lst.len() {
|
|
422
|
-
let variant_classification = &cont_lst[var_class_idx];
|
|
423
|
-
if let Some(normalized_consequence) = normalize_consequence(variant_classification) {
|
|
424
|
-
return consequence_filter.contains(&normalized_consequence);
|
|
425
|
-
}
|
|
426
|
-
}
|
|
427
|
-
return false; // Invalid row or unknown consequence
|
|
428
|
-
}
|
|
429
|
-
}
|
|
430
|
-
}
|
|
431
|
-
true // No filtering or empty filter
|
|
432
|
-
}
|
|
433
|
-
|
|
434
487
|
// Process Segment_Mean for CNV files
|
|
435
488
|
fn process_segment_mean(
|
|
436
489
|
element: &str,
|
|
@@ -457,23 +510,6 @@ fn process_segment_mean(
|
|
|
457
510
|
}
|
|
458
511
|
|
|
459
512
|
/// Updated helper function to normalize MAF consequence types to frontend format
|
|
460
|
-
/// Returns None for unknown consequence types (which will be filtered out)
|
|
461
|
-
fn normalize_consequence(maf_consequence: &str) -> Option<String> {
|
|
462
|
-
match maf_consequence.to_lowercase().as_str() {
|
|
463
|
-
// Only map the consequence types we actually support
|
|
464
|
-
"missense_mutation" => Some("missense".to_string()),
|
|
465
|
-
"nonsense_mutation" | "stop_gained" | "stop_lost" => Some("nonsense".to_string()),
|
|
466
|
-
"frame_shift_del" | "frame_shift_ins" | "frameshift_variant" => Some("frameshift".to_string()),
|
|
467
|
-
"silent" | "synonymous_variant" => Some("silent".to_string()),
|
|
468
|
-
"in_frame_del" => Some("deletion".to_string()),
|
|
469
|
-
"in_frame_ins" => Some("insertion".to_string()),
|
|
470
|
-
"splice_site" | "splice_acceptor_variant" | "splice_donor_variant" => Some("splice_site".to_string()),
|
|
471
|
-
"tandem_duplication" | "duplication" => Some("duplication".to_string()),
|
|
472
|
-
"inversion" => Some("inversion".to_string()),
|
|
473
|
-
// Return None for all unknown consequence types - they will be filtered out
|
|
474
|
-
_ => None,
|
|
475
|
-
}
|
|
476
|
-
}
|
|
477
513
|
/// Downloads a single file with minimal retry logic for transient failures
|
|
478
514
|
async fn download_single_file(
|
|
479
515
|
case_id: String,
|
|
@@ -584,6 +620,7 @@ async fn download_data_streaming(
|
|
|
584
620
|
host: &str,
|
|
585
621
|
min_total_depth: i32,
|
|
586
622
|
min_alt_allele_count: i32,
|
|
623
|
+
hyper_mutator: i32,
|
|
587
624
|
consequences: &Option<Vec<String>>,
|
|
588
625
|
gain_threshold: f32,
|
|
589
626
|
loss_threshold: f32,
|
|
@@ -611,6 +648,9 @@ async fn download_data_streaming(
|
|
|
611
648
|
let filtered_maf_records = Arc::new(AtomicUsize::new(0));
|
|
612
649
|
let filtered_cnv_records = Arc::new(AtomicUsize::new(0));
|
|
613
650
|
let filtered_records = Arc::new(Mutex::new(HashMap::<String, FilteredCaseDetails>::new()));
|
|
651
|
+
let hyper_mutator_records = Arc::new(Mutex::new(Vec::<String>::new()));
|
|
652
|
+
let included_maf_records = Arc::new(AtomicUsize::new(0));
|
|
653
|
+
let included_cnv_records = Arc::new(AtomicUsize::new(0));
|
|
614
654
|
|
|
615
655
|
// Only collect errors (successful data is output immediately)
|
|
616
656
|
let errors = Arc::new(Mutex::new(Vec::<ErrorEntry>::new()));
|
|
@@ -630,6 +670,9 @@ async fn download_data_streaming(
|
|
|
630
670
|
let filtered_maf_records = Arc::clone(&filtered_maf_records);
|
|
631
671
|
let filtered_cnv_records = Arc::clone(&filtered_cnv_records);
|
|
632
672
|
let filtered_records = Arc::clone(&filtered_records);
|
|
673
|
+
let included_maf_records = Arc::clone(&included_maf_records);
|
|
674
|
+
let included_cnv_records = Arc::clone(&included_cnv_records);
|
|
675
|
+
let hyper_mutator_records = Arc::clone(&hyper_mutator_records);
|
|
633
676
|
let errors = Arc::clone(&errors);
|
|
634
677
|
|
|
635
678
|
async move {
|
|
@@ -642,6 +685,7 @@ async fn download_data_streaming(
|
|
|
642
685
|
&data_type,
|
|
643
686
|
min_total_depth,
|
|
644
687
|
min_alt_allele_count,
|
|
688
|
+
hyper_mutator,
|
|
645
689
|
&consequences,
|
|
646
690
|
gain_threshold,
|
|
647
691
|
loss_threshold,
|
|
@@ -649,6 +693,9 @@ async fn download_data_streaming(
|
|
|
649
693
|
&filtered_records,
|
|
650
694
|
&filtered_maf_records,
|
|
651
695
|
&filtered_cnv_records,
|
|
696
|
+
&included_maf_records,
|
|
697
|
+
&included_cnv_records,
|
|
698
|
+
&hyper_mutator_records,
|
|
652
699
|
)
|
|
653
700
|
.await
|
|
654
701
|
{
|
|
@@ -717,6 +764,8 @@ async fn download_data_streaming(
|
|
|
717
764
|
let failed_count = failed_downloads.load(Ordering::Relaxed);
|
|
718
765
|
let filtered_maf_count = filtered_maf_records.load(Ordering::Relaxed);
|
|
719
766
|
let filtered_cnv_count = filtered_cnv_records.load(Ordering::Relaxed);
|
|
767
|
+
let included_maf_count = included_maf_records.load(Ordering::Relaxed);
|
|
768
|
+
let included_cnv_count = included_cnv_records.load(Ordering::Relaxed);
|
|
720
769
|
|
|
721
770
|
let summary = FinalSummary {
|
|
722
771
|
output_type: "summary".to_string(),
|
|
@@ -728,6 +777,9 @@ async fn download_data_streaming(
|
|
|
728
777
|
filtered_maf_records: filtered_maf_count,
|
|
729
778
|
filtered_cnv_records: filtered_cnv_count,
|
|
730
779
|
filtered_records_by_case: filtered_records.lock().await.clone(),
|
|
780
|
+
included_maf_records: included_maf_count,
|
|
781
|
+
included_cnv_records: included_cnv_count,
|
|
782
|
+
hyper_mutator_records: hyper_mutator_records.lock().await.clone(),
|
|
731
783
|
};
|
|
732
784
|
|
|
733
785
|
// Output final summary - Node.js will know processing is complete when it sees this
|
|
@@ -781,13 +833,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
781
833
|
let case_files = input_js.case_files;
|
|
782
834
|
|
|
783
835
|
// Set default maf_options
|
|
784
|
-
let (min_total_depth, min_alt_allele_count, consequences) = match input_js.maf_options {
|
|
836
|
+
let (min_total_depth, min_alt_allele_count, hyper_mutator, consequences) = match input_js.maf_options {
|
|
785
837
|
Some(options) => (
|
|
786
838
|
options.min_total_depth,
|
|
787
839
|
options.min_alt_allele_count,
|
|
840
|
+
options.hyper_mutator,
|
|
788
841
|
options.consequences.clone(),
|
|
789
842
|
),
|
|
790
|
-
None => (10, 2, None), // Default values
|
|
843
|
+
None => (10, 2, 8000, None), // Default values
|
|
791
844
|
};
|
|
792
845
|
|
|
793
846
|
// Set default cnv_options
|
|
@@ -802,6 +855,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
802
855
|
HOST,
|
|
803
856
|
min_total_depth,
|
|
804
857
|
min_alt_allele_count,
|
|
858
|
+
hyper_mutator,
|
|
805
859
|
&consequences,
|
|
806
860
|
gain_threshold,
|
|
807
861
|
loss_threshold,
|