@sjcrh/proteinpaint-rust 2.132.0 → 2.133.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/gdcGRIN2.rs +70 -16
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.132.0",
2
+ "version": "2.133.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
package/src/gdcGRIN2.rs CHANGED
@@ -13,7 +13,7 @@
13
13
  Output mutations as JSON array.
14
14
 
15
15
  Example of usage:
16
- echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20,"hyperMutator":1000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000}}' | ./target/release/gdcGRIN2
16
+ echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20,"hyperMutator":8000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000, "hyperMutator":8000}}' | ./target/release/gdcGRIN2
17
17
  */
18
18
 
19
19
  use flate2::read::GzDecoder;
@@ -69,6 +69,8 @@ struct CnvOptions {
69
69
  gain_threshold: f32,
70
70
  #[serde(rename = "segLength")]
71
71
  seg_length: i32,
72
+ #[serde(rename = "hyperMutator")]
73
+ hyper_mutator: i32,
72
74
  }
73
75
 
74
76
  // Individual successful file output (JSONL format)
@@ -89,6 +91,11 @@ struct FilteredMafDetails {
89
91
  t_alt_count: usize,
90
92
  t_depth: usize,
91
93
  invalid_rows: usize,
94
+ excluded_by_min_depth: usize,
95
+ excluded_by_min_alt_count: usize,
96
+ excluded_by_consequence_type: usize,
97
+ total_processed: usize,
98
+ total_included: usize,
92
99
  }
93
100
 
94
101
  // struct for CNV filter details
@@ -97,6 +104,11 @@ struct FilteredCnvDetails {
97
104
  segment_mean: usize,
98
105
  seg_length: usize,
99
106
  invalid_rows: usize,
107
+ excluded_by_loss_threshold: usize,
108
+ excluded_by_gain_threshold: usize,
109
+ excluded_by_segment_length: usize,
110
+ total_processed: usize,
111
+ total_included: usize,
100
112
  }
101
113
 
102
114
  // struct for per-case filter details
@@ -121,7 +133,7 @@ struct FinalSummary {
121
133
  included_maf_records: usize,
122
134
  included_cnv_records: usize,
123
135
  filtered_records_by_case: HashMap<String, FilteredCaseDetails>,
124
- hyper_mutator_records: Vec<String>,
136
+ hyper_mutator_records: HashMap<String, Vec<String>>,
125
137
  }
126
138
 
127
139
  // Define the top-level input structure
@@ -149,17 +161,18 @@ async fn parse_content(
149
161
  data_type: &str,
150
162
  min_total_depth: i32,
151
163
  min_alt_allele_count: i32,
152
- hyper_mutator: i32,
164
+ maf_hyper_mutator: i32,
153
165
  consequences: &Option<Vec<String>>,
154
166
  gain_threshold: f32,
155
167
  loss_threshold: f32,
156
168
  seg_length: i32,
169
+ cnv_hyper_mutator: i32,
157
170
  filtered_records: &Arc<Mutex<HashMap<String, FilteredCaseDetails>>>,
158
171
  filtered_maf_records: &AtomicUsize,
159
172
  filtered_cnv_records: &AtomicUsize,
160
173
  included_maf_records: &AtomicUsize,
161
174
  included_cnv_records: &AtomicUsize,
162
- hyper_mutator_records: &Arc<Mutex<Vec<String>>>,
175
+ hyper_mutator_records: &Arc<Mutex<HashMap<String, Vec<String>>>>,
163
176
  ) -> Result<Vec<Vec<String>>, (String, String, String)> {
164
177
  let config = match data_type {
165
178
  "cnv" => DataTypeConfig {
@@ -179,13 +192,24 @@ async fn parse_content(
179
192
  }
180
193
  };
181
194
 
182
- // check hyperMutator for MAF files
183
- if data_type == "maf" && hyper_mutator > 0 {
195
+ // check hyperMutator for MAF and CNV files
196
+ let hyper_mutator = if data_type == "maf" {
197
+ maf_hyper_mutator
198
+ } else {
199
+ cnv_hyper_mutator
200
+ };
201
+ if hyper_mutator > 0 {
184
202
  let line_count = content.lines().count();
185
203
  if line_count as i32 > hyper_mutator {
186
204
  let mut hyper_records = hyper_mutator_records.lock().await;
187
- if !hyper_records.contains(&case_id.to_string()) {
188
- hyper_records.push(case_id.to_string());
205
+ hyper_records
206
+ .entry(data_type.to_string())
207
+ .or_insert_with(Vec::new)
208
+ .push(case_id.to_string());
209
+ if data_type == "maf" {
210
+ filtered_maf_records.fetch_add(line_count, Ordering::Relaxed);
211
+ } else if data_type == "cnv" {
212
+ filtered_cnv_records.fetch_add(line_count, Ordering::Relaxed);
189
213
  }
190
214
  return Ok(Vec::new());
191
215
  }
@@ -326,6 +350,13 @@ async fn process_row(
326
350
 
327
351
  let case_details = filtered_map.get_mut(case_id).unwrap();
328
352
 
353
+ // Track total processed records
354
+ if data_type == "maf" {
355
+ case_details.maf.total_processed += 1;
356
+ } else if data_type == "cnv" {
357
+ case_details.cnv.total_processed += 1;
358
+ }
359
+
329
360
  // Handle consequence filtering and counting for MAF files
330
361
  if data_type == "maf" {
331
362
  if let Some(var_class_idx) = variant_classification_index {
@@ -347,6 +378,7 @@ async fn process_row(
347
378
  .rejected_consequences
348
379
  .entry(variant_classification.to_string())
349
380
  .or_insert(0) += 1;
381
+ case_details.maf.excluded_by_consequence_type += 1;
350
382
  filtered_maf_records.fetch_add(1, Ordering::Relaxed);
351
383
  return Ok(None);
352
384
  }
@@ -396,6 +428,15 @@ async fn process_row(
396
428
  element = process_segment_mean(&element, case_id, data_type, gain_threshold, loss_threshold)?;
397
429
  if element.is_empty() {
398
430
  case_details.cnv.segment_mean += 1;
431
+ let seg_mean = cont_lst[x].parse::<f32>().unwrap_or(0.0);
432
+ if seg_mean > loss_threshold && seg_mean < gain_threshold {
433
+ // Between thresholds - not a significant gain or loss
434
+ if seg_mean >= 0.0 {
435
+ case_details.cnv.excluded_by_gain_threshold += 1;
436
+ } else {
437
+ case_details.cnv.excluded_by_loss_threshold += 1;
438
+ }
439
+ }
399
440
  filtered_cnv_records.fetch_add(1, Ordering::Relaxed);
400
441
  return Ok(None);
401
442
  }
@@ -433,11 +474,13 @@ async fn process_row(
433
474
 
434
475
  if alle_depth < min_total_depth {
435
476
  case_details.maf.t_depth += 1;
477
+ case_details.maf.excluded_by_min_depth += 1;
436
478
  filtered_maf_records.fetch_add(1, Ordering::Relaxed);
437
479
  return Ok(None);
438
480
  }
439
481
  if alt_count < min_alt_allele_count {
440
482
  case_details.maf.t_alt_count += 1;
483
+ case_details.maf.excluded_by_min_alt_count += 1;
441
484
  filtered_maf_records.fetch_add(1, Ordering::Relaxed);
442
485
  return Ok(None);
443
486
  }
@@ -447,6 +490,7 @@ async fn process_row(
447
490
  out_lst.push("mutation".to_string());
448
491
 
449
492
  // Update counters for included MAF records
493
+ case_details.maf.total_included += 1;
450
494
  included_maf_records.fetch_add(1, Ordering::Relaxed);
451
495
  }
452
496
 
@@ -475,9 +519,11 @@ async fn process_row(
475
519
  let cnv_length = end_position - start_position;
476
520
  if seg_length > 0 && cnv_length > seg_length {
477
521
  case_details.cnv.seg_length += 1;
522
+ case_details.cnv.excluded_by_segment_length += 1;
478
523
  filtered_cnv_records.fetch_add(1, Ordering::Relaxed);
479
524
  return Ok(None);
480
525
  }
526
+ case_details.cnv.total_included += 1;
481
527
  included_cnv_records.fetch_add(1, Ordering::Relaxed);
482
528
  }
483
529
 
@@ -620,11 +666,12 @@ async fn download_data_streaming(
620
666
  host: &str,
621
667
  min_total_depth: i32,
622
668
  min_alt_allele_count: i32,
623
- hyper_mutator: i32,
669
+ maf_hyper_mutator: i32,
624
670
  consequences: &Option<Vec<String>>,
625
671
  gain_threshold: f32,
626
672
  loss_threshold: f32,
627
673
  seg_length: i32,
674
+ cnv_hyper_mutator: i32,
628
675
  ) {
629
676
  let data_urls: Vec<(String, String, String)> = data4dl
630
677
  .into_iter()
@@ -648,7 +695,7 @@ async fn download_data_streaming(
648
695
  let filtered_maf_records = Arc::new(AtomicUsize::new(0));
649
696
  let filtered_cnv_records = Arc::new(AtomicUsize::new(0));
650
697
  let filtered_records = Arc::new(Mutex::new(HashMap::<String, FilteredCaseDetails>::new()));
651
- let hyper_mutator_records = Arc::new(Mutex::new(Vec::<String>::new()));
698
+ let hyper_mutator_records = Arc::new(Mutex::new(HashMap::<String, Vec<String>>::new()));
652
699
  let included_maf_records = Arc::new(AtomicUsize::new(0));
653
700
  let included_cnv_records = Arc::new(AtomicUsize::new(0));
654
701
 
@@ -685,11 +732,12 @@ async fn download_data_streaming(
685
732
  &data_type,
686
733
  min_total_depth,
687
734
  min_alt_allele_count,
688
- hyper_mutator,
735
+ maf_hyper_mutator,
689
736
  &consequences,
690
737
  gain_threshold,
691
738
  loss_threshold,
692
739
  seg_length,
740
+ cnv_hyper_mutator,
693
741
  &filtered_records,
694
742
  &filtered_maf_records,
695
743
  &filtered_cnv_records,
@@ -833,7 +881,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
833
881
  let case_files = input_js.case_files;
834
882
 
835
883
  // Set default maf_options
836
- let (min_total_depth, min_alt_allele_count, hyper_mutator, consequences) = match input_js.maf_options {
884
+ let (min_total_depth, min_alt_allele_count, maf_hyper_mutator, consequences) = match input_js.maf_options {
837
885
  Some(options) => (
838
886
  options.min_total_depth,
839
887
  options.min_alt_allele_count,
@@ -844,9 +892,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
844
892
  };
845
893
 
846
894
  // Set default cnv_options
847
- let (gain_threshold, loss_threshold, seg_length) = match input_js.cnv_options {
848
- Some(options) => (options.gain_threshold, options.loss_threshold, options.seg_length),
849
- None => (0.3, -0.4, 0), // Default values
895
+ let (gain_threshold, loss_threshold, seg_length, cnv_hyper_mutator) = match input_js.cnv_options {
896
+ Some(options) => (
897
+ options.gain_threshold,
898
+ options.loss_threshold,
899
+ options.seg_length,
900
+ options.hyper_mutator,
901
+ ),
902
+ None => (0.3, -0.4, 0, 500), // Default values
850
903
  };
851
904
 
852
905
  // Download data - this will now handle errors gracefully
@@ -855,11 +908,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
855
908
  HOST,
856
909
  min_total_depth,
857
910
  min_alt_allele_count,
858
- hyper_mutator,
911
+ maf_hyper_mutator,
859
912
  &consequences,
860
913
  gain_threshold,
861
914
  loss_threshold,
862
915
  seg_length,
916
+ cnv_hyper_mutator,
863
917
  )
864
918
  .await;
865
919