@sjcrh/proteinpaint-rust 2.130.0 → 2.132.1-0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/gdcGRIN2.rs +135 -49
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.130.0",
2
+ "version": "2.132.1-0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
package/src/gdcGRIN2.rs CHANGED
@@ -13,7 +13,7 @@
13
13
  Output mutations as JSON array.
14
14
 
15
15
  Example of usage:
16
- echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000}}' | ./target/release/gdcGRIN2
16
+ echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20,"hyperMutator":1000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000}}' | ./target/release/gdcGRIN2
17
17
  */
18
18
 
19
19
  use flate2::read::GzDecoder;
@@ -55,6 +55,8 @@ struct MafOptions {
55
55
  min_total_depth: i32,
56
56
  #[serde(rename = "minAltAlleleCount")]
57
57
  min_alt_allele_count: i32,
58
+ #[serde(rename = "hyperMutator")]
59
+ hyper_mutator: i32,
58
60
  consequences: Option<Vec<String>>, // Optional list of consequences to filter MAF files
59
61
  }
60
62
 
@@ -82,10 +84,16 @@ struct SuccessfulFileOutput {
82
84
  // struct for MAF filter details
83
85
  #[derive(Clone, Serialize, Default)]
84
86
  struct FilteredMafDetails {
85
- invalid_consequences: usize,
87
+ matched_consequences: HashMap<String, usize>,
88
+ rejected_consequences: HashMap<String, usize>,
86
89
  t_alt_count: usize,
87
90
  t_depth: usize,
88
91
  invalid_rows: usize,
92
+ excluded_by_min_depth: usize,
93
+ excluded_by_min_alt_count: usize,
94
+ excluded_by_consequence_type: usize,
95
+ total_processed: usize,
96
+ total_included: usize,
89
97
  }
90
98
 
91
99
  // struct for CNV filter details
@@ -94,6 +102,11 @@ struct FilteredCnvDetails {
94
102
  segment_mean: usize,
95
103
  seg_length: usize,
96
104
  invalid_rows: usize,
105
+ excluded_by_loss_threshold: usize,
106
+ excluded_by_gain_threshold: usize,
107
+ excluded_by_segment_length: usize,
108
+ total_processed: usize,
109
+ total_included: usize,
97
110
  }
98
111
 
99
112
  // struct for per-case filter details
@@ -115,7 +128,10 @@ struct FinalSummary {
115
128
  filtered_records: usize,
116
129
  filtered_maf_records: usize,
117
130
  filtered_cnv_records: usize,
131
+ included_maf_records: usize,
132
+ included_cnv_records: usize,
118
133
  filtered_records_by_case: HashMap<String, FilteredCaseDetails>,
134
+ hyper_mutator_records: Vec<String>,
119
135
  }
120
136
 
121
137
  // Define the top-level input structure
@@ -143,6 +159,7 @@ async fn parse_content(
143
159
  data_type: &str,
144
160
  min_total_depth: i32,
145
161
  min_alt_allele_count: i32,
162
+ hyper_mutator: i32,
146
163
  consequences: &Option<Vec<String>>,
147
164
  gain_threshold: f32,
148
165
  loss_threshold: f32,
@@ -150,6 +167,9 @@ async fn parse_content(
150
167
  filtered_records: &Arc<Mutex<HashMap<String, FilteredCaseDetails>>>,
151
168
  filtered_maf_records: &AtomicUsize,
152
169
  filtered_cnv_records: &AtomicUsize,
170
+ included_maf_records: &AtomicUsize,
171
+ included_cnv_records: &AtomicUsize,
172
+ hyper_mutator_records: &Arc<Mutex<Vec<String>>>,
153
173
  ) -> Result<Vec<Vec<String>>, (String, String, String)> {
154
174
  let config = match data_type {
155
175
  "cnv" => DataTypeConfig {
@@ -169,6 +189,18 @@ async fn parse_content(
169
189
  }
170
190
  };
171
191
 
192
+ // check hyperMutator for MAF files
193
+ if data_type == "maf" && hyper_mutator > 0 {
194
+ let line_count = content.lines().count();
195
+ if line_count as i32 > hyper_mutator {
196
+ let mut hyper_records = hyper_mutator_records.lock().await;
197
+ if !hyper_records.contains(&case_id.to_string()) {
198
+ hyper_records.push(case_id.to_string());
199
+ }
200
+ return Ok(Vec::new());
201
+ }
202
+ };
203
+
172
204
  let lines = content.lines();
173
205
  let mut parsed_data = Vec::new();
174
206
  let mut columns_indices: Vec<usize> = Vec::new();
@@ -213,6 +245,8 @@ async fn parse_content(
213
245
  filtered_records,
214
246
  filtered_maf_records,
215
247
  filtered_cnv_records,
248
+ included_maf_records,
249
+ included_cnv_records,
216
250
  )
217
251
  .await?;
218
252
 
@@ -255,7 +289,7 @@ fn setup_columns(
255
289
  }
256
290
 
257
291
  if data_type == "maf" {
258
- *variant_classification_index = header.iter().position(|x| x == "Variant_Classification");
292
+ *variant_classification_index = header.iter().position(|x| x == "One_Consequence");
259
293
  if variant_classification_index.is_none() {
260
294
  return Err((
261
295
  case_id.to_string(),
@@ -285,6 +319,8 @@ async fn process_row(
285
319
  filtered_records: &Arc<Mutex<HashMap<String, FilteredCaseDetails>>>,
286
320
  filtered_maf_records: &AtomicUsize,
287
321
  filtered_cnv_records: &AtomicUsize,
322
+ included_maf_records: &AtomicUsize,
323
+ included_cnv_records: &AtomicUsize,
288
324
  ) -> Result<Option<Vec<String>>, (String, String, String)> {
289
325
  let cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
290
326
  let mut out_lst = vec![case_id.to_string()];
@@ -300,11 +336,64 @@ async fn process_row(
300
336
 
301
337
  let case_details = filtered_map.get_mut(case_id).unwrap();
302
338
 
303
- // Check consequence filtering for MAF files
304
- if data_type == "maf" && !is_valid_consequence(&cont_lst, variant_classification_index, consequences) {
305
- case_details.maf.invalid_consequences += 1;
306
- filtered_maf_records.fetch_add(1, Ordering::Relaxed);
307
- return Ok(None);
339
+ // Track total processed records
340
+ if data_type == "maf" {
341
+ case_details.maf.total_processed += 1;
342
+ } else if data_type == "cnv" {
343
+ case_details.cnv.total_processed += 1;
344
+ }
345
+
346
+ // Handle consequence filtering and counting for MAF files
347
+ if data_type == "maf" {
348
+ if let Some(var_class_idx) = variant_classification_index {
349
+ if var_class_idx < cont_lst.len() {
350
+ let variant_classification = &cont_lst[var_class_idx];
351
+ if let Some(consequence_filter) = consequences {
352
+ if !consequence_filter.is_empty() {
353
+ if consequence_filter.contains(variant_classification) {
354
+ // Matched consequence
355
+ *case_details
356
+ .maf
357
+ .matched_consequences
358
+ .entry(variant_classification.to_string())
359
+ .or_insert(0) += 1;
360
+ } else {
361
+ // Unmatched consequence
362
+ *case_details
363
+ .maf
364
+ .rejected_consequences
365
+ .entry(variant_classification.to_string())
366
+ .or_insert(0) += 1;
367
+ case_details.maf.excluded_by_consequence_type += 1;
368
+ filtered_maf_records.fetch_add(1, Ordering::Relaxed);
369
+ return Ok(None);
370
+ }
371
+ } else {
372
+ // Empty filter, count as matched
373
+ *case_details
374
+ .maf
375
+ .matched_consequences
376
+ .entry(variant_classification.to_string())
377
+ .or_insert(0) += 1;
378
+ }
379
+ } else {
380
+ // No filter, count as matched
381
+ *case_details
382
+ .maf
383
+ .matched_consequences
384
+ .entry(variant_classification.to_string())
385
+ .or_insert(0) += 1;
386
+ }
387
+ } else {
388
+ case_details.maf.invalid_rows += 1;
389
+ filtered_maf_records.fetch_add(1, Ordering::Relaxed);
390
+ return Ok(None);
391
+ }
392
+ } else {
393
+ case_details.maf.invalid_rows += 1;
394
+ filtered_maf_records.fetch_add(1, Ordering::Relaxed);
395
+ return Ok(None);
396
+ }
308
397
  }
309
398
 
310
399
  // Extract relevant columns
@@ -325,6 +414,15 @@ async fn process_row(
325
414
  element = process_segment_mean(&element, case_id, data_type, gain_threshold, loss_threshold)?;
326
415
  if element.is_empty() {
327
416
  case_details.cnv.segment_mean += 1;
417
+ let seg_mean = cont_lst[x].parse::<f32>().unwrap_or(0.0);
418
+ if seg_mean > loss_threshold && seg_mean < gain_threshold {
419
+ // Between thresholds - not a significant gain or loss
420
+ if seg_mean >= 0.0 {
421
+ case_details.cnv.excluded_by_gain_threshold += 1;
422
+ } else {
423
+ case_details.cnv.excluded_by_loss_threshold += 1;
424
+ }
425
+ }
328
426
  filtered_cnv_records.fetch_add(1, Ordering::Relaxed);
329
427
  return Ok(None);
330
428
  }
@@ -362,11 +460,13 @@ async fn process_row(
362
460
 
363
461
  if alle_depth < min_total_depth {
364
462
  case_details.maf.t_depth += 1;
463
+ case_details.maf.excluded_by_min_depth += 1;
365
464
  filtered_maf_records.fetch_add(1, Ordering::Relaxed);
366
465
  return Ok(None);
367
466
  }
368
467
  if alt_count < min_alt_allele_count {
369
468
  case_details.maf.t_alt_count += 1;
469
+ case_details.maf.excluded_by_min_alt_count += 1;
370
470
  filtered_maf_records.fetch_add(1, Ordering::Relaxed);
371
471
  return Ok(None);
372
472
  }
@@ -374,6 +474,10 @@ async fn process_row(
374
474
  // Keep case_id, chr, start, end, and add "mutation"
375
475
  out_lst = out_lst[0..4].to_vec();
376
476
  out_lst.push("mutation".to_string());
477
+
478
+ // Update counters for included MAF records
479
+ case_details.maf.total_included += 1;
480
+ included_maf_records.fetch_add(1, Ordering::Relaxed);
377
481
  }
378
482
 
379
483
  // filter cnvs based on segment length. Default: 0 (no filtering)
@@ -401,36 +505,17 @@ async fn process_row(
401
505
  let cnv_length = end_position - start_position;
402
506
  if seg_length > 0 && cnv_length > seg_length {
403
507
  case_details.cnv.seg_length += 1;
508
+ case_details.cnv.excluded_by_segment_length += 1;
404
509
  filtered_cnv_records.fetch_add(1, Ordering::Relaxed);
405
510
  return Ok(None);
406
511
  }
512
+ case_details.cnv.total_included += 1;
513
+ included_cnv_records.fetch_add(1, Ordering::Relaxed);
407
514
  }
408
515
 
409
516
  Ok(Some(out_lst))
410
517
  }
411
518
 
412
- // Check if the row meets consequence filtering criteria
413
- fn is_valid_consequence(
414
- cont_lst: &[String],
415
- variant_classification_index: Option<usize>,
416
- consequences: &Option<Vec<String>>,
417
- ) -> bool {
418
- if let Some(consequence_filter) = consequences {
419
- if !consequence_filter.is_empty() {
420
- if let Some(var_class_idx) = variant_classification_index {
421
- if var_class_idx < cont_lst.len() {
422
- let variant_classification = &cont_lst[var_class_idx];
423
- if let Some(normalized_consequence) = normalize_consequence(variant_classification) {
424
- return consequence_filter.contains(&normalized_consequence);
425
- }
426
- }
427
- return false; // Invalid row or unknown consequence
428
- }
429
- }
430
- }
431
- true // No filtering or empty filter
432
- }
433
-
434
519
  // Process Segment_Mean for CNV files
435
520
  fn process_segment_mean(
436
521
  element: &str,
@@ -457,23 +542,6 @@ fn process_segment_mean(
457
542
  }
458
543
 
459
544
  /// Updated helper function to normalize MAF consequence types to frontend format
460
- /// Returns None for unknown consequence types (which will be filtered out)
461
- fn normalize_consequence(maf_consequence: &str) -> Option<String> {
462
- match maf_consequence.to_lowercase().as_str() {
463
- // Only map the consequence types we actually support
464
- "missense_mutation" => Some("missense".to_string()),
465
- "nonsense_mutation" | "stop_gained" | "stop_lost" => Some("nonsense".to_string()),
466
- "frame_shift_del" | "frame_shift_ins" | "frameshift_variant" => Some("frameshift".to_string()),
467
- "silent" | "synonymous_variant" => Some("silent".to_string()),
468
- "in_frame_del" => Some("deletion".to_string()),
469
- "in_frame_ins" => Some("insertion".to_string()),
470
- "splice_site" | "splice_acceptor_variant" | "splice_donor_variant" => Some("splice_site".to_string()),
471
- "tandem_duplication" | "duplication" => Some("duplication".to_string()),
472
- "inversion" => Some("inversion".to_string()),
473
- // Return None for all unknown consequence types - they will be filtered out
474
- _ => None,
475
- }
476
- }
477
545
  /// Downloads a single file with minimal retry logic for transient failures
478
546
  async fn download_single_file(
479
547
  case_id: String,
@@ -584,6 +652,7 @@ async fn download_data_streaming(
584
652
  host: &str,
585
653
  min_total_depth: i32,
586
654
  min_alt_allele_count: i32,
655
+ hyper_mutator: i32,
587
656
  consequences: &Option<Vec<String>>,
588
657
  gain_threshold: f32,
589
658
  loss_threshold: f32,
@@ -611,6 +680,9 @@ async fn download_data_streaming(
611
680
  let filtered_maf_records = Arc::new(AtomicUsize::new(0));
612
681
  let filtered_cnv_records = Arc::new(AtomicUsize::new(0));
613
682
  let filtered_records = Arc::new(Mutex::new(HashMap::<String, FilteredCaseDetails>::new()));
683
+ let hyper_mutator_records = Arc::new(Mutex::new(Vec::<String>::new()));
684
+ let included_maf_records = Arc::new(AtomicUsize::new(0));
685
+ let included_cnv_records = Arc::new(AtomicUsize::new(0));
614
686
 
615
687
  // Only collect errors (successful data is output immediately)
616
688
  let errors = Arc::new(Mutex::new(Vec::<ErrorEntry>::new()));
@@ -630,6 +702,9 @@ async fn download_data_streaming(
630
702
  let filtered_maf_records = Arc::clone(&filtered_maf_records);
631
703
  let filtered_cnv_records = Arc::clone(&filtered_cnv_records);
632
704
  let filtered_records = Arc::clone(&filtered_records);
705
+ let included_maf_records = Arc::clone(&included_maf_records);
706
+ let included_cnv_records = Arc::clone(&included_cnv_records);
707
+ let hyper_mutator_records = Arc::clone(&hyper_mutator_records);
633
708
  let errors = Arc::clone(&errors);
634
709
 
635
710
  async move {
@@ -642,6 +717,7 @@ async fn download_data_streaming(
642
717
  &data_type,
643
718
  min_total_depth,
644
719
  min_alt_allele_count,
720
+ hyper_mutator,
645
721
  &consequences,
646
722
  gain_threshold,
647
723
  loss_threshold,
@@ -649,6 +725,9 @@ async fn download_data_streaming(
649
725
  &filtered_records,
650
726
  &filtered_maf_records,
651
727
  &filtered_cnv_records,
728
+ &included_maf_records,
729
+ &included_cnv_records,
730
+ &hyper_mutator_records,
652
731
  )
653
732
  .await
654
733
  {
@@ -717,6 +796,8 @@ async fn download_data_streaming(
717
796
  let failed_count = failed_downloads.load(Ordering::Relaxed);
718
797
  let filtered_maf_count = filtered_maf_records.load(Ordering::Relaxed);
719
798
  let filtered_cnv_count = filtered_cnv_records.load(Ordering::Relaxed);
799
+ let included_maf_count = included_maf_records.load(Ordering::Relaxed);
800
+ let included_cnv_count = included_cnv_records.load(Ordering::Relaxed);
720
801
 
721
802
  let summary = FinalSummary {
722
803
  output_type: "summary".to_string(),
@@ -728,6 +809,9 @@ async fn download_data_streaming(
728
809
  filtered_maf_records: filtered_maf_count,
729
810
  filtered_cnv_records: filtered_cnv_count,
730
811
  filtered_records_by_case: filtered_records.lock().await.clone(),
812
+ included_maf_records: included_maf_count,
813
+ included_cnv_records: included_cnv_count,
814
+ hyper_mutator_records: hyper_mutator_records.lock().await.clone(),
731
815
  };
732
816
 
733
817
  // Output final summary - Node.js will know processing is complete when it sees this
@@ -781,13 +865,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
781
865
  let case_files = input_js.case_files;
782
866
 
783
867
  // Set default maf_options
784
- let (min_total_depth, min_alt_allele_count, consequences) = match input_js.maf_options {
868
+ let (min_total_depth, min_alt_allele_count, hyper_mutator, consequences) = match input_js.maf_options {
785
869
  Some(options) => (
786
870
  options.min_total_depth,
787
871
  options.min_alt_allele_count,
872
+ options.hyper_mutator,
788
873
  options.consequences.clone(),
789
874
  ),
790
- None => (10, 2, None), // Default values
875
+ None => (10, 2, 8000, None), // Default values
791
876
  };
792
877
 
793
878
  // Set default cnv_options
@@ -802,6 +887,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
802
887
  HOST,
803
888
  min_total_depth,
804
889
  min_alt_allele_count,
890
+ hyper_mutator,
805
891
  &consequences,
806
892
  gain_threshold,
807
893
  loss_threshold,