@sjcrh/proteinpaint-rust 2.130.0 → 2.132.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/gdcGRIN2.rs +103 -49
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.130.0",
2
+ "version": "2.132.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
package/src/gdcGRIN2.rs CHANGED
@@ -13,7 +13,7 @@
13
13
  Output mutations as JSON array.
14
14
 
15
15
  Example of usage:
16
- echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000}}' | ./target/release/gdcGRIN2
16
+ echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20,"hyperMutator":1000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000}}' | ./target/release/gdcGRIN2
17
17
  */
18
18
 
19
19
  use flate2::read::GzDecoder;
@@ -55,6 +55,8 @@ struct MafOptions {
55
55
  min_total_depth: i32,
56
56
  #[serde(rename = "minAltAlleleCount")]
57
57
  min_alt_allele_count: i32,
58
+ #[serde(rename = "hyperMutator")]
59
+ hyper_mutator: i32,
58
60
  consequences: Option<Vec<String>>, // Optional list of consequences to filter MAF files
59
61
  }
60
62
 
@@ -82,7 +84,8 @@ struct SuccessfulFileOutput {
82
84
  // struct for MAF filter details
83
85
  #[derive(Clone, Serialize, Default)]
84
86
  struct FilteredMafDetails {
85
- invalid_consequences: usize,
87
+ matched_consequences: HashMap<String, usize>,
88
+ rejected_consequences: HashMap<String, usize>,
86
89
  t_alt_count: usize,
87
90
  t_depth: usize,
88
91
  invalid_rows: usize,
@@ -115,7 +118,10 @@ struct FinalSummary {
115
118
  filtered_records: usize,
116
119
  filtered_maf_records: usize,
117
120
  filtered_cnv_records: usize,
121
+ included_maf_records: usize,
122
+ included_cnv_records: usize,
118
123
  filtered_records_by_case: HashMap<String, FilteredCaseDetails>,
124
+ hyper_mutator_records: Vec<String>,
119
125
  }
120
126
 
121
127
  // Define the top-level input structure
@@ -143,6 +149,7 @@ async fn parse_content(
143
149
  data_type: &str,
144
150
  min_total_depth: i32,
145
151
  min_alt_allele_count: i32,
152
+ hyper_mutator: i32,
146
153
  consequences: &Option<Vec<String>>,
147
154
  gain_threshold: f32,
148
155
  loss_threshold: f32,
@@ -150,6 +157,9 @@ async fn parse_content(
150
157
  filtered_records: &Arc<Mutex<HashMap<String, FilteredCaseDetails>>>,
151
158
  filtered_maf_records: &AtomicUsize,
152
159
  filtered_cnv_records: &AtomicUsize,
160
+ included_maf_records: &AtomicUsize,
161
+ included_cnv_records: &AtomicUsize,
162
+ hyper_mutator_records: &Arc<Mutex<Vec<String>>>,
153
163
  ) -> Result<Vec<Vec<String>>, (String, String, String)> {
154
164
  let config = match data_type {
155
165
  "cnv" => DataTypeConfig {
@@ -169,6 +179,18 @@ async fn parse_content(
169
179
  }
170
180
  };
171
181
 
182
+ // check hyperMutator for MAF files
183
+ if data_type == "maf" && hyper_mutator > 0 {
184
+ let line_count = content.lines().count();
185
+ if line_count as i32 > hyper_mutator {
186
+ let mut hyper_records = hyper_mutator_records.lock().await;
187
+ if !hyper_records.contains(&case_id.to_string()) {
188
+ hyper_records.push(case_id.to_string());
189
+ }
190
+ return Ok(Vec::new());
191
+ }
192
+ };
193
+
172
194
  let lines = content.lines();
173
195
  let mut parsed_data = Vec::new();
174
196
  let mut columns_indices: Vec<usize> = Vec::new();
@@ -213,6 +235,8 @@ async fn parse_content(
213
235
  filtered_records,
214
236
  filtered_maf_records,
215
237
  filtered_cnv_records,
238
+ included_maf_records,
239
+ included_cnv_records,
216
240
  )
217
241
  .await?;
218
242
 
@@ -255,7 +279,7 @@ fn setup_columns(
255
279
  }
256
280
 
257
281
  if data_type == "maf" {
258
- *variant_classification_index = header.iter().position(|x| x == "Variant_Classification");
282
+ *variant_classification_index = header.iter().position(|x| x == "One_Consequence");
259
283
  if variant_classification_index.is_none() {
260
284
  return Err((
261
285
  case_id.to_string(),
@@ -285,6 +309,8 @@ async fn process_row(
285
309
  filtered_records: &Arc<Mutex<HashMap<String, FilteredCaseDetails>>>,
286
310
  filtered_maf_records: &AtomicUsize,
287
311
  filtered_cnv_records: &AtomicUsize,
312
+ included_maf_records: &AtomicUsize,
313
+ included_cnv_records: &AtomicUsize,
288
314
  ) -> Result<Option<Vec<String>>, (String, String, String)> {
289
315
  let cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
290
316
  let mut out_lst = vec![case_id.to_string()];
@@ -300,11 +326,56 @@ async fn process_row(
300
326
 
301
327
  let case_details = filtered_map.get_mut(case_id).unwrap();
302
328
 
303
- // Check consequence filtering for MAF files
304
- if data_type == "maf" && !is_valid_consequence(&cont_lst, variant_classification_index, consequences) {
305
- case_details.maf.invalid_consequences += 1;
306
- filtered_maf_records.fetch_add(1, Ordering::Relaxed);
307
- return Ok(None);
329
+ // Handle consequence filtering and counting for MAF files
330
+ if data_type == "maf" {
331
+ if let Some(var_class_idx) = variant_classification_index {
332
+ if var_class_idx < cont_lst.len() {
333
+ let variant_classification = &cont_lst[var_class_idx];
334
+ if let Some(consequence_filter) = consequences {
335
+ if !consequence_filter.is_empty() {
336
+ if consequence_filter.contains(variant_classification) {
337
+ // Matched consequence
338
+ *case_details
339
+ .maf
340
+ .matched_consequences
341
+ .entry(variant_classification.to_string())
342
+ .or_insert(0) += 1;
343
+ } else {
344
+ // Unmatched consequence
345
+ *case_details
346
+ .maf
347
+ .rejected_consequences
348
+ .entry(variant_classification.to_string())
349
+ .or_insert(0) += 1;
350
+ filtered_maf_records.fetch_add(1, Ordering::Relaxed);
351
+ return Ok(None);
352
+ }
353
+ } else {
354
+ // Empty filter, count as matched
355
+ *case_details
356
+ .maf
357
+ .matched_consequences
358
+ .entry(variant_classification.to_string())
359
+ .or_insert(0) += 1;
360
+ }
361
+ } else {
362
+ // No filter, count as matched
363
+ *case_details
364
+ .maf
365
+ .matched_consequences
366
+ .entry(variant_classification.to_string())
367
+ .or_insert(0) += 1;
368
+ }
369
+ } else {
370
+ case_details.maf.invalid_rows += 1;
371
+ filtered_maf_records.fetch_add(1, Ordering::Relaxed);
372
+ return Ok(None);
373
+ }
374
+ } else {
375
+ case_details.maf.invalid_rows += 1;
376
+ filtered_maf_records.fetch_add(1, Ordering::Relaxed);
377
+ return Ok(None);
378
+ }
308
379
  }
309
380
 
310
381
  // Extract relevant columns
@@ -374,6 +445,9 @@ async fn process_row(
374
445
  // Keep case_id, chr, start, end, and add "mutation"
375
446
  out_lst = out_lst[0..4].to_vec();
376
447
  out_lst.push("mutation".to_string());
448
+
449
+ // Update counters for included MAF records
450
+ included_maf_records.fetch_add(1, Ordering::Relaxed);
377
451
  }
378
452
 
379
453
  // filter cnvs based on segment length. Default: 0 (no filtering)
@@ -404,33 +478,12 @@ async fn process_row(
404
478
  filtered_cnv_records.fetch_add(1, Ordering::Relaxed);
405
479
  return Ok(None);
406
480
  }
481
+ included_cnv_records.fetch_add(1, Ordering::Relaxed);
407
482
  }
408
483
 
409
484
  Ok(Some(out_lst))
410
485
  }
411
486
 
412
- // Check if the row meets consequence filtering criteria
413
- fn is_valid_consequence(
414
- cont_lst: &[String],
415
- variant_classification_index: Option<usize>,
416
- consequences: &Option<Vec<String>>,
417
- ) -> bool {
418
- if let Some(consequence_filter) = consequences {
419
- if !consequence_filter.is_empty() {
420
- if let Some(var_class_idx) = variant_classification_index {
421
- if var_class_idx < cont_lst.len() {
422
- let variant_classification = &cont_lst[var_class_idx];
423
- if let Some(normalized_consequence) = normalize_consequence(variant_classification) {
424
- return consequence_filter.contains(&normalized_consequence);
425
- }
426
- }
427
- return false; // Invalid row or unknown consequence
428
- }
429
- }
430
- }
431
- true // No filtering or empty filter
432
- }
433
-
434
487
  // Process Segment_Mean for CNV files
435
488
  fn process_segment_mean(
436
489
  element: &str,
@@ -457,23 +510,6 @@ fn process_segment_mean(
457
510
  }
458
511
 
459
512
  /// Updated helper function to normalize MAF consequence types to frontend format
460
- /// Returns None for unknown consequence types (which will be filtered out)
461
- fn normalize_consequence(maf_consequence: &str) -> Option<String> {
462
- match maf_consequence.to_lowercase().as_str() {
463
- // Only map the consequence types we actually support
464
- "missense_mutation" => Some("missense".to_string()),
465
- "nonsense_mutation" | "stop_gained" | "stop_lost" => Some("nonsense".to_string()),
466
- "frame_shift_del" | "frame_shift_ins" | "frameshift_variant" => Some("frameshift".to_string()),
467
- "silent" | "synonymous_variant" => Some("silent".to_string()),
468
- "in_frame_del" => Some("deletion".to_string()),
469
- "in_frame_ins" => Some("insertion".to_string()),
470
- "splice_site" | "splice_acceptor_variant" | "splice_donor_variant" => Some("splice_site".to_string()),
471
- "tandem_duplication" | "duplication" => Some("duplication".to_string()),
472
- "inversion" => Some("inversion".to_string()),
473
- // Return None for all unknown consequence types - they will be filtered out
474
- _ => None,
475
- }
476
- }
477
513
  /// Downloads a single file with minimal retry logic for transient failures
478
514
  async fn download_single_file(
479
515
  case_id: String,
@@ -584,6 +620,7 @@ async fn download_data_streaming(
584
620
  host: &str,
585
621
  min_total_depth: i32,
586
622
  min_alt_allele_count: i32,
623
+ hyper_mutator: i32,
587
624
  consequences: &Option<Vec<String>>,
588
625
  gain_threshold: f32,
589
626
  loss_threshold: f32,
@@ -611,6 +648,9 @@ async fn download_data_streaming(
611
648
  let filtered_maf_records = Arc::new(AtomicUsize::new(0));
612
649
  let filtered_cnv_records = Arc::new(AtomicUsize::new(0));
613
650
  let filtered_records = Arc::new(Mutex::new(HashMap::<String, FilteredCaseDetails>::new()));
651
+ let hyper_mutator_records = Arc::new(Mutex::new(Vec::<String>::new()));
652
+ let included_maf_records = Arc::new(AtomicUsize::new(0));
653
+ let included_cnv_records = Arc::new(AtomicUsize::new(0));
614
654
 
615
655
  // Only collect errors (successful data is output immediately)
616
656
  let errors = Arc::new(Mutex::new(Vec::<ErrorEntry>::new()));
@@ -630,6 +670,9 @@ async fn download_data_streaming(
630
670
  let filtered_maf_records = Arc::clone(&filtered_maf_records);
631
671
  let filtered_cnv_records = Arc::clone(&filtered_cnv_records);
632
672
  let filtered_records = Arc::clone(&filtered_records);
673
+ let included_maf_records = Arc::clone(&included_maf_records);
674
+ let included_cnv_records = Arc::clone(&included_cnv_records);
675
+ let hyper_mutator_records = Arc::clone(&hyper_mutator_records);
633
676
  let errors = Arc::clone(&errors);
634
677
 
635
678
  async move {
@@ -642,6 +685,7 @@ async fn download_data_streaming(
642
685
  &data_type,
643
686
  min_total_depth,
644
687
  min_alt_allele_count,
688
+ hyper_mutator,
645
689
  &consequences,
646
690
  gain_threshold,
647
691
  loss_threshold,
@@ -649,6 +693,9 @@ async fn download_data_streaming(
649
693
  &filtered_records,
650
694
  &filtered_maf_records,
651
695
  &filtered_cnv_records,
696
+ &included_maf_records,
697
+ &included_cnv_records,
698
+ &hyper_mutator_records,
652
699
  )
653
700
  .await
654
701
  {
@@ -717,6 +764,8 @@ async fn download_data_streaming(
717
764
  let failed_count = failed_downloads.load(Ordering::Relaxed);
718
765
  let filtered_maf_count = filtered_maf_records.load(Ordering::Relaxed);
719
766
  let filtered_cnv_count = filtered_cnv_records.load(Ordering::Relaxed);
767
+ let included_maf_count = included_maf_records.load(Ordering::Relaxed);
768
+ let included_cnv_count = included_cnv_records.load(Ordering::Relaxed);
720
769
 
721
770
  let summary = FinalSummary {
722
771
  output_type: "summary".to_string(),
@@ -728,6 +777,9 @@ async fn download_data_streaming(
728
777
  filtered_maf_records: filtered_maf_count,
729
778
  filtered_cnv_records: filtered_cnv_count,
730
779
  filtered_records_by_case: filtered_records.lock().await.clone(),
780
+ included_maf_records: included_maf_count,
781
+ included_cnv_records: included_cnv_count,
782
+ hyper_mutator_records: hyper_mutator_records.lock().await.clone(),
731
783
  };
732
784
 
733
785
  // Output final summary - Node.js will know processing is complete when it sees this
@@ -781,13 +833,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
781
833
  let case_files = input_js.case_files;
782
834
 
783
835
  // Set default maf_options
784
- let (min_total_depth, min_alt_allele_count, consequences) = match input_js.maf_options {
836
+ let (min_total_depth, min_alt_allele_count, hyper_mutator, consequences) = match input_js.maf_options {
785
837
  Some(options) => (
786
838
  options.min_total_depth,
787
839
  options.min_alt_allele_count,
840
+ options.hyper_mutator,
788
841
  options.consequences.clone(),
789
842
  ),
790
- None => (10, 2, None), // Default values
843
+ None => (10, 2, 8000, None), // Default values
791
844
  };
792
845
 
793
846
  // Set default cnv_options
@@ -802,6 +855,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
802
855
  HOST,
803
856
  min_total_depth,
804
857
  min_alt_allele_count,
858
+ hyper_mutator,
805
859
  &consequences,
806
860
  gain_threshold,
807
861
  loss_threshold,