@sjcrh/proteinpaint-rust 2.132.1-0 → 2.133.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. package/package.json +1 -1
  2. package/src/gdcGRIN2.rs +38 -16
package/package.json CHANGED
@@ -1,5 +1,5 @@
1
1
  {
2
- "version": "2.132.1-0",
2
+ "version": "2.133.0",
3
3
  "name": "@sjcrh/proteinpaint-rust",
4
4
  "type": "module",
5
5
  "description": "Rust-based utilities for proteinpaint",
package/src/gdcGRIN2.rs CHANGED
@@ -13,7 +13,7 @@
13
13
  Output mutations as JSON array.
14
14
 
15
15
  Example of usage:
16
- echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20,"hyperMutator":1000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000}}' | ./target/release/gdcGRIN2
16
+ echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20,"hyperMutator":8000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000, "hyperMutator":8000}}' | ./target/release/gdcGRIN2
17
17
  */
18
18
 
19
19
  use flate2::read::GzDecoder;
@@ -69,6 +69,8 @@ struct CnvOptions {
69
69
  gain_threshold: f32,
70
70
  #[serde(rename = "segLength")]
71
71
  seg_length: i32,
72
+ #[serde(rename = "hyperMutator")]
73
+ hyper_mutator: i32,
72
74
  }
73
75
 
74
76
  // Individual successful file output (JSONL format)
@@ -131,7 +133,7 @@ struct FinalSummary {
131
133
  included_maf_records: usize,
132
134
  included_cnv_records: usize,
133
135
  filtered_records_by_case: HashMap<String, FilteredCaseDetails>,
134
- hyper_mutator_records: Vec<String>,
136
+ hyper_mutator_records: HashMap<String, Vec<String>>,
135
137
  }
136
138
 
137
139
  // Define the top-level input structure
@@ -159,17 +161,18 @@ async fn parse_content(
159
161
  data_type: &str,
160
162
  min_total_depth: i32,
161
163
  min_alt_allele_count: i32,
162
- hyper_mutator: i32,
164
+ maf_hyper_mutator: i32,
163
165
  consequences: &Option<Vec<String>>,
164
166
  gain_threshold: f32,
165
167
  loss_threshold: f32,
166
168
  seg_length: i32,
169
+ cnv_hyper_mutator: i32,
167
170
  filtered_records: &Arc<Mutex<HashMap<String, FilteredCaseDetails>>>,
168
171
  filtered_maf_records: &AtomicUsize,
169
172
  filtered_cnv_records: &AtomicUsize,
170
173
  included_maf_records: &AtomicUsize,
171
174
  included_cnv_records: &AtomicUsize,
172
- hyper_mutator_records: &Arc<Mutex<Vec<String>>>,
175
+ hyper_mutator_records: &Arc<Mutex<HashMap<String, Vec<String>>>>,
173
176
  ) -> Result<Vec<Vec<String>>, (String, String, String)> {
174
177
  let config = match data_type {
175
178
  "cnv" => DataTypeConfig {
@@ -189,13 +192,24 @@ async fn parse_content(
189
192
  }
190
193
  };
191
194
 
192
- // check hyperMutator for MAF files
193
- if data_type == "maf" && hyper_mutator > 0 {
195
+ // check hyperMutator for MAF and CNV files
196
+ let hyper_mutator = if data_type == "maf" {
197
+ maf_hyper_mutator
198
+ } else {
199
+ cnv_hyper_mutator
200
+ };
201
+ if hyper_mutator > 0 {
194
202
  let line_count = content.lines().count();
195
203
  if line_count as i32 > hyper_mutator {
196
204
  let mut hyper_records = hyper_mutator_records.lock().await;
197
- if !hyper_records.contains(&case_id.to_string()) {
198
- hyper_records.push(case_id.to_string());
205
+ hyper_records
206
+ .entry(data_type.to_string())
207
+ .or_insert_with(Vec::new)
208
+ .push(case_id.to_string());
209
+ if data_type == "maf" {
210
+ filtered_maf_records.fetch_add(line_count, Ordering::Relaxed);
211
+ } else if data_type == "cnv" {
212
+ filtered_cnv_records.fetch_add(line_count, Ordering::Relaxed);
199
213
  }
200
214
  return Ok(Vec::new());
201
215
  }
@@ -652,11 +666,12 @@ async fn download_data_streaming(
652
666
  host: &str,
653
667
  min_total_depth: i32,
654
668
  min_alt_allele_count: i32,
655
- hyper_mutator: i32,
669
+ maf_hyper_mutator: i32,
656
670
  consequences: &Option<Vec<String>>,
657
671
  gain_threshold: f32,
658
672
  loss_threshold: f32,
659
673
  seg_length: i32,
674
+ cnv_hyper_mutator: i32,
660
675
  ) {
661
676
  let data_urls: Vec<(String, String, String)> = data4dl
662
677
  .into_iter()
@@ -680,7 +695,7 @@ async fn download_data_streaming(
680
695
  let filtered_maf_records = Arc::new(AtomicUsize::new(0));
681
696
  let filtered_cnv_records = Arc::new(AtomicUsize::new(0));
682
697
  let filtered_records = Arc::new(Mutex::new(HashMap::<String, FilteredCaseDetails>::new()));
683
- let hyper_mutator_records = Arc::new(Mutex::new(Vec::<String>::new()));
698
+ let hyper_mutator_records = Arc::new(Mutex::new(HashMap::<String, Vec<String>>::new()));
684
699
  let included_maf_records = Arc::new(AtomicUsize::new(0));
685
700
  let included_cnv_records = Arc::new(AtomicUsize::new(0));
686
701
 
@@ -717,11 +732,12 @@ async fn download_data_streaming(
717
732
  &data_type,
718
733
  min_total_depth,
719
734
  min_alt_allele_count,
720
- hyper_mutator,
735
+ maf_hyper_mutator,
721
736
  &consequences,
722
737
  gain_threshold,
723
738
  loss_threshold,
724
739
  seg_length,
740
+ cnv_hyper_mutator,
725
741
  &filtered_records,
726
742
  &filtered_maf_records,
727
743
  &filtered_cnv_records,
@@ -865,7 +881,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
865
881
  let case_files = input_js.case_files;
866
882
 
867
883
  // Set default maf_options
868
- let (min_total_depth, min_alt_allele_count, hyper_mutator, consequences) = match input_js.maf_options {
884
+ let (min_total_depth, min_alt_allele_count, maf_hyper_mutator, consequences) = match input_js.maf_options {
869
885
  Some(options) => (
870
886
  options.min_total_depth,
871
887
  options.min_alt_allele_count,
@@ -876,9 +892,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
876
892
  };
877
893
 
878
894
  // Set default cnv_options
879
- let (gain_threshold, loss_threshold, seg_length) = match input_js.cnv_options {
880
- Some(options) => (options.gain_threshold, options.loss_threshold, options.seg_length),
881
- None => (0.3, -0.4, 0), // Default values
895
+ let (gain_threshold, loss_threshold, seg_length, cnv_hyper_mutator) = match input_js.cnv_options {
896
+ Some(options) => (
897
+ options.gain_threshold,
898
+ options.loss_threshold,
899
+ options.seg_length,
900
+ options.hyper_mutator,
901
+ ),
902
+ None => (0.3, -0.4, 0, 500), // Default values
882
903
  };
883
904
 
884
905
  // Download data - this will now handle errors gracefully
@@ -887,11 +908,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
887
908
  HOST,
888
909
  min_total_depth,
889
910
  min_alt_allele_count,
890
- hyper_mutator,
911
+ maf_hyper_mutator,
891
912
  &consequences,
892
913
  gain_threshold,
893
914
  loss_threshold,
894
915
  seg_length,
916
+ cnv_hyper_mutator,
895
917
  )
896
918
  .await;
897
919