@sjcrh/proteinpaint-rust 2.132.0 → 2.133.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/gdcGRIN2.rs +70 -16
package/package.json
CHANGED
package/src/gdcGRIN2.rs
CHANGED
|
@@ -13,7 +13,7 @@
|
|
|
13
13
|
Output mutations as JSON array.
|
|
14
14
|
|
|
15
15
|
Example of usage:
|
|
16
|
-
echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20,"hyperMutator":
|
|
16
|
+
echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20,"hyperMutator":8000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000, "hyperMutator":8000}}' | ./target/release/gdcGRIN2
|
|
17
17
|
*/
|
|
18
18
|
|
|
19
19
|
use flate2::read::GzDecoder;
|
|
@@ -69,6 +69,8 @@ struct CnvOptions {
|
|
|
69
69
|
gain_threshold: f32,
|
|
70
70
|
#[serde(rename = "segLength")]
|
|
71
71
|
seg_length: i32,
|
|
72
|
+
#[serde(rename = "hyperMutator")]
|
|
73
|
+
hyper_mutator: i32,
|
|
72
74
|
}
|
|
73
75
|
|
|
74
76
|
// Individual successful file output (JSONL format)
|
|
@@ -89,6 +91,11 @@ struct FilteredMafDetails {
|
|
|
89
91
|
t_alt_count: usize,
|
|
90
92
|
t_depth: usize,
|
|
91
93
|
invalid_rows: usize,
|
|
94
|
+
excluded_by_min_depth: usize,
|
|
95
|
+
excluded_by_min_alt_count: usize,
|
|
96
|
+
excluded_by_consequence_type: usize,
|
|
97
|
+
total_processed: usize,
|
|
98
|
+
total_included: usize,
|
|
92
99
|
}
|
|
93
100
|
|
|
94
101
|
// struct for CNV filter details
|
|
@@ -97,6 +104,11 @@ struct FilteredCnvDetails {
|
|
|
97
104
|
segment_mean: usize,
|
|
98
105
|
seg_length: usize,
|
|
99
106
|
invalid_rows: usize,
|
|
107
|
+
excluded_by_loss_threshold: usize,
|
|
108
|
+
excluded_by_gain_threshold: usize,
|
|
109
|
+
excluded_by_segment_length: usize,
|
|
110
|
+
total_processed: usize,
|
|
111
|
+
total_included: usize,
|
|
100
112
|
}
|
|
101
113
|
|
|
102
114
|
// struct for per-case filter details
|
|
@@ -121,7 +133,7 @@ struct FinalSummary {
|
|
|
121
133
|
included_maf_records: usize,
|
|
122
134
|
included_cnv_records: usize,
|
|
123
135
|
filtered_records_by_case: HashMap<String, FilteredCaseDetails>,
|
|
124
|
-
hyper_mutator_records: Vec<String
|
|
136
|
+
hyper_mutator_records: HashMap<String, Vec<String>>,
|
|
125
137
|
}
|
|
126
138
|
|
|
127
139
|
// Define the top-level input structure
|
|
@@ -149,17 +161,18 @@ async fn parse_content(
|
|
|
149
161
|
data_type: &str,
|
|
150
162
|
min_total_depth: i32,
|
|
151
163
|
min_alt_allele_count: i32,
|
|
152
|
-
|
|
164
|
+
maf_hyper_mutator: i32,
|
|
153
165
|
consequences: &Option<Vec<String>>,
|
|
154
166
|
gain_threshold: f32,
|
|
155
167
|
loss_threshold: f32,
|
|
156
168
|
seg_length: i32,
|
|
169
|
+
cnv_hyper_mutator: i32,
|
|
157
170
|
filtered_records: &Arc<Mutex<HashMap<String, FilteredCaseDetails>>>,
|
|
158
171
|
filtered_maf_records: &AtomicUsize,
|
|
159
172
|
filtered_cnv_records: &AtomicUsize,
|
|
160
173
|
included_maf_records: &AtomicUsize,
|
|
161
174
|
included_cnv_records: &AtomicUsize,
|
|
162
|
-
hyper_mutator_records: &Arc<Mutex<Vec<String
|
|
175
|
+
hyper_mutator_records: &Arc<Mutex<HashMap<String, Vec<String>>>>,
|
|
163
176
|
) -> Result<Vec<Vec<String>>, (String, String, String)> {
|
|
164
177
|
let config = match data_type {
|
|
165
178
|
"cnv" => DataTypeConfig {
|
|
@@ -179,13 +192,24 @@ async fn parse_content(
|
|
|
179
192
|
}
|
|
180
193
|
};
|
|
181
194
|
|
|
182
|
-
// check hyperMutator for MAF files
|
|
183
|
-
if data_type == "maf"
|
|
195
|
+
// check hyperMutator for MAF and CNV files
|
|
196
|
+
let hyper_mutator = if data_type == "maf" {
|
|
197
|
+
maf_hyper_mutator
|
|
198
|
+
} else {
|
|
199
|
+
cnv_hyper_mutator
|
|
200
|
+
};
|
|
201
|
+
if hyper_mutator > 0 {
|
|
184
202
|
let line_count = content.lines().count();
|
|
185
203
|
if line_count as i32 > hyper_mutator {
|
|
186
204
|
let mut hyper_records = hyper_mutator_records.lock().await;
|
|
187
|
-
|
|
188
|
-
|
|
205
|
+
hyper_records
|
|
206
|
+
.entry(data_type.to_string())
|
|
207
|
+
.or_insert_with(Vec::new)
|
|
208
|
+
.push(case_id.to_string());
|
|
209
|
+
if data_type == "maf" {
|
|
210
|
+
filtered_maf_records.fetch_add(line_count, Ordering::Relaxed);
|
|
211
|
+
} else if data_type == "cnv" {
|
|
212
|
+
filtered_cnv_records.fetch_add(line_count, Ordering::Relaxed);
|
|
189
213
|
}
|
|
190
214
|
return Ok(Vec::new());
|
|
191
215
|
}
|
|
@@ -326,6 +350,13 @@ async fn process_row(
|
|
|
326
350
|
|
|
327
351
|
let case_details = filtered_map.get_mut(case_id).unwrap();
|
|
328
352
|
|
|
353
|
+
// Track total processed records
|
|
354
|
+
if data_type == "maf" {
|
|
355
|
+
case_details.maf.total_processed += 1;
|
|
356
|
+
} else if data_type == "cnv" {
|
|
357
|
+
case_details.cnv.total_processed += 1;
|
|
358
|
+
}
|
|
359
|
+
|
|
329
360
|
// Handle consequence filtering and counting for MAF files
|
|
330
361
|
if data_type == "maf" {
|
|
331
362
|
if let Some(var_class_idx) = variant_classification_index {
|
|
@@ -347,6 +378,7 @@ async fn process_row(
|
|
|
347
378
|
.rejected_consequences
|
|
348
379
|
.entry(variant_classification.to_string())
|
|
349
380
|
.or_insert(0) += 1;
|
|
381
|
+
case_details.maf.excluded_by_consequence_type += 1;
|
|
350
382
|
filtered_maf_records.fetch_add(1, Ordering::Relaxed);
|
|
351
383
|
return Ok(None);
|
|
352
384
|
}
|
|
@@ -396,6 +428,15 @@ async fn process_row(
|
|
|
396
428
|
element = process_segment_mean(&element, case_id, data_type, gain_threshold, loss_threshold)?;
|
|
397
429
|
if element.is_empty() {
|
|
398
430
|
case_details.cnv.segment_mean += 1;
|
|
431
|
+
let seg_mean = cont_lst[x].parse::<f32>().unwrap_or(0.0);
|
|
432
|
+
if seg_mean > loss_threshold && seg_mean < gain_threshold {
|
|
433
|
+
// Between thresholds - not a significant gain or loss
|
|
434
|
+
if seg_mean >= 0.0 {
|
|
435
|
+
case_details.cnv.excluded_by_gain_threshold += 1;
|
|
436
|
+
} else {
|
|
437
|
+
case_details.cnv.excluded_by_loss_threshold += 1;
|
|
438
|
+
}
|
|
439
|
+
}
|
|
399
440
|
filtered_cnv_records.fetch_add(1, Ordering::Relaxed);
|
|
400
441
|
return Ok(None);
|
|
401
442
|
}
|
|
@@ -433,11 +474,13 @@ async fn process_row(
|
|
|
433
474
|
|
|
434
475
|
if alle_depth < min_total_depth {
|
|
435
476
|
case_details.maf.t_depth += 1;
|
|
477
|
+
case_details.maf.excluded_by_min_depth += 1;
|
|
436
478
|
filtered_maf_records.fetch_add(1, Ordering::Relaxed);
|
|
437
479
|
return Ok(None);
|
|
438
480
|
}
|
|
439
481
|
if alt_count < min_alt_allele_count {
|
|
440
482
|
case_details.maf.t_alt_count += 1;
|
|
483
|
+
case_details.maf.excluded_by_min_alt_count += 1;
|
|
441
484
|
filtered_maf_records.fetch_add(1, Ordering::Relaxed);
|
|
442
485
|
return Ok(None);
|
|
443
486
|
}
|
|
@@ -447,6 +490,7 @@ async fn process_row(
|
|
|
447
490
|
out_lst.push("mutation".to_string());
|
|
448
491
|
|
|
449
492
|
// Update counters for included MAF records
|
|
493
|
+
case_details.maf.total_included += 1;
|
|
450
494
|
included_maf_records.fetch_add(1, Ordering::Relaxed);
|
|
451
495
|
}
|
|
452
496
|
|
|
@@ -475,9 +519,11 @@ async fn process_row(
|
|
|
475
519
|
let cnv_length = end_position - start_position;
|
|
476
520
|
if seg_length > 0 && cnv_length > seg_length {
|
|
477
521
|
case_details.cnv.seg_length += 1;
|
|
522
|
+
case_details.cnv.excluded_by_segment_length += 1;
|
|
478
523
|
filtered_cnv_records.fetch_add(1, Ordering::Relaxed);
|
|
479
524
|
return Ok(None);
|
|
480
525
|
}
|
|
526
|
+
case_details.cnv.total_included += 1;
|
|
481
527
|
included_cnv_records.fetch_add(1, Ordering::Relaxed);
|
|
482
528
|
}
|
|
483
529
|
|
|
@@ -620,11 +666,12 @@ async fn download_data_streaming(
|
|
|
620
666
|
host: &str,
|
|
621
667
|
min_total_depth: i32,
|
|
622
668
|
min_alt_allele_count: i32,
|
|
623
|
-
|
|
669
|
+
maf_hyper_mutator: i32,
|
|
624
670
|
consequences: &Option<Vec<String>>,
|
|
625
671
|
gain_threshold: f32,
|
|
626
672
|
loss_threshold: f32,
|
|
627
673
|
seg_length: i32,
|
|
674
|
+
cnv_hyper_mutator: i32,
|
|
628
675
|
) {
|
|
629
676
|
let data_urls: Vec<(String, String, String)> = data4dl
|
|
630
677
|
.into_iter()
|
|
@@ -648,7 +695,7 @@ async fn download_data_streaming(
|
|
|
648
695
|
let filtered_maf_records = Arc::new(AtomicUsize::new(0));
|
|
649
696
|
let filtered_cnv_records = Arc::new(AtomicUsize::new(0));
|
|
650
697
|
let filtered_records = Arc::new(Mutex::new(HashMap::<String, FilteredCaseDetails>::new()));
|
|
651
|
-
let hyper_mutator_records = Arc::new(Mutex::new(
|
|
698
|
+
let hyper_mutator_records = Arc::new(Mutex::new(HashMap::<String, Vec<String>>::new()));
|
|
652
699
|
let included_maf_records = Arc::new(AtomicUsize::new(0));
|
|
653
700
|
let included_cnv_records = Arc::new(AtomicUsize::new(0));
|
|
654
701
|
|
|
@@ -685,11 +732,12 @@ async fn download_data_streaming(
|
|
|
685
732
|
&data_type,
|
|
686
733
|
min_total_depth,
|
|
687
734
|
min_alt_allele_count,
|
|
688
|
-
|
|
735
|
+
maf_hyper_mutator,
|
|
689
736
|
&consequences,
|
|
690
737
|
gain_threshold,
|
|
691
738
|
loss_threshold,
|
|
692
739
|
seg_length,
|
|
740
|
+
cnv_hyper_mutator,
|
|
693
741
|
&filtered_records,
|
|
694
742
|
&filtered_maf_records,
|
|
695
743
|
&filtered_cnv_records,
|
|
@@ -833,7 +881,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
833
881
|
let case_files = input_js.case_files;
|
|
834
882
|
|
|
835
883
|
// Set default maf_options
|
|
836
|
-
let (min_total_depth, min_alt_allele_count,
|
|
884
|
+
let (min_total_depth, min_alt_allele_count, maf_hyper_mutator, consequences) = match input_js.maf_options {
|
|
837
885
|
Some(options) => (
|
|
838
886
|
options.min_total_depth,
|
|
839
887
|
options.min_alt_allele_count,
|
|
@@ -844,9 +892,14 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
844
892
|
};
|
|
845
893
|
|
|
846
894
|
// Set default cnv_options
|
|
847
|
-
let (gain_threshold, loss_threshold, seg_length) = match input_js.cnv_options {
|
|
848
|
-
Some(options) => (
|
|
849
|
-
|
|
895
|
+
let (gain_threshold, loss_threshold, seg_length, cnv_hyper_mutator) = match input_js.cnv_options {
|
|
896
|
+
Some(options) => (
|
|
897
|
+
options.gain_threshold,
|
|
898
|
+
options.loss_threshold,
|
|
899
|
+
options.seg_length,
|
|
900
|
+
options.hyper_mutator,
|
|
901
|
+
),
|
|
902
|
+
None => (0.3, -0.4, 0, 500), // Default values
|
|
850
903
|
};
|
|
851
904
|
|
|
852
905
|
// Download data - this will now handle errors gracefully
|
|
@@ -855,11 +908,12 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
855
908
|
HOST,
|
|
856
909
|
min_total_depth,
|
|
857
910
|
min_alt_allele_count,
|
|
858
|
-
|
|
911
|
+
maf_hyper_mutator,
|
|
859
912
|
&consequences,
|
|
860
913
|
gain_threshold,
|
|
861
914
|
loss_threshold,
|
|
862
915
|
seg_length,
|
|
916
|
+
cnv_hyper_mutator,
|
|
863
917
|
)
|
|
864
918
|
.await;
|
|
865
919
|
|