@sjcrh/proteinpaint-rust 2.135.0 → 2.135.2-0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +1 -1
- package/src/gdcGRIN2.rs +299 -52
package/package.json
CHANGED
package/src/gdcGRIN2.rs
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
/*
|
|
2
|
-
This script
|
|
2
|
+
This script can either download cohort maf/cnv files from GDC or read them from local files, with default behavior being to download from GDC. It gracefully handles timeout and other possible errors related to GDC API processing or file reading for use by the client file summary div.
|
|
3
3
|
|
|
4
4
|
Key improvements:
|
|
5
5
|
1. Graceful error handling - individual file failures don't stop the entire process
|
|
@@ -7,6 +7,10 @@
|
|
|
7
7
|
3. More detailed error reporting
|
|
8
8
|
4. Continues processing even when some files fail
|
|
9
9
|
5. Added chromosome filtering
|
|
10
|
+
6. Supports reading from local files with --from-file flag
|
|
11
|
+
|
|
12
|
+
Command-line arguments:
|
|
13
|
+
- --from-file: Read data from local files instead of downloading from GDC
|
|
10
14
|
|
|
11
15
|
Input JSON:
|
|
12
16
|
caseFiles
|
|
@@ -15,9 +19,16 @@
|
|
|
15
19
|
chromosomes: chromosomes will be included:[]
|
|
16
20
|
|
|
17
21
|
Output mutations as JSON array.
|
|
22
|
+
{
|
|
23
|
+
grin2lesion:str,
|
|
24
|
+
summary:{}
|
|
25
|
+
}
|
|
18
26
|
|
|
19
27
|
Example of usage:
|
|
20
|
-
echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth":
|
|
28
|
+
echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 10,"minAltAlleleCount": 2,"hyperMutator":8000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-0.4, "gainThreshold": 0.3, "segLength":2000000, "hyperMutator":500}, "chromosomes":["chr1","chr2","chr3"], "max_record": 100000}' | ./target/release/gdcGRIN2
|
|
29
|
+
Example of usage (read from local files):
|
|
30
|
+
echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 10,"minAltAlleleCount": 2,"hyperMutator":8000,"consequences":["missense_variant","frameshift_variant"]}, "cnvOptions":{"lossThreshold":-0.4, "gainThreshold": 0.3, "segLength":2000000, "hyperMutator":500}, "chromosomes":["chr1","chr2","chr3"], "max_record": 100000}' | ./target/release/gdcGRIN2 --from-file
|
|
31
|
+
|
|
21
32
|
*/
|
|
22
33
|
|
|
23
34
|
use flate2::read::GzDecoder;
|
|
@@ -26,10 +37,11 @@ use memchr::memchr;
|
|
|
26
37
|
use serde::{Deserialize, Serialize};
|
|
27
38
|
use serde_json;
|
|
28
39
|
use std::collections::{HashMap, HashSet};
|
|
40
|
+
use std::env;
|
|
41
|
+
use std::fs;
|
|
29
42
|
use std::io::{self, Read};
|
|
30
43
|
use std::sync::Arc;
|
|
31
44
|
use std::sync::atomic::{AtomicUsize, Ordering};
|
|
32
|
-
use std::thread::sleep;
|
|
33
45
|
use std::time::Duration;
|
|
34
46
|
use tokio::io::{AsyncReadExt, BufReader};
|
|
35
47
|
use tokio::sync::Mutex;
|
|
@@ -77,16 +89,6 @@ struct CnvOptions {
|
|
|
77
89
|
hyper_mutator: i32,
|
|
78
90
|
}
|
|
79
91
|
|
|
80
|
-
// Individual successful file output (JSONL format)
|
|
81
|
-
#[derive(serde::Serialize)]
|
|
82
|
-
struct SuccessfulFileOutput {
|
|
83
|
-
#[serde(rename = "type")]
|
|
84
|
-
output_type: String, // Always "data"
|
|
85
|
-
case_id: String,
|
|
86
|
-
data_type: String,
|
|
87
|
-
data: Vec<Vec<String>>,
|
|
88
|
-
}
|
|
89
|
-
|
|
90
92
|
// struct for MAF filter details
|
|
91
93
|
#[derive(Clone, Serialize, Default)]
|
|
92
94
|
struct FilteredMafDetails {
|
|
@@ -127,8 +129,6 @@ struct FilteredCaseDetails {
|
|
|
127
129
|
// Final summary output (JSONL format)
|
|
128
130
|
#[derive(serde::Serialize)]
|
|
129
131
|
struct FinalSummary {
|
|
130
|
-
#[serde(rename = "type")]
|
|
131
|
-
output_type: String, // Always "summary"
|
|
132
132
|
total_files: usize,
|
|
133
133
|
successful_files: usize,
|
|
134
134
|
failed_files: usize,
|
|
@@ -140,6 +140,14 @@ struct FinalSummary {
|
|
|
140
140
|
included_cnv_records: usize,
|
|
141
141
|
filtered_records_by_case: HashMap<String, FilteredCaseDetails>,
|
|
142
142
|
hyper_mutator_records: HashMap<String, Vec<String>>,
|
|
143
|
+
excluded_by_max_record: HashMap<String, Vec<String>>,
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Enum to hold both SuccessfulFileoutput and FinalSummary
|
|
147
|
+
#[derive(Serialize)]
|
|
148
|
+
struct Output {
|
|
149
|
+
grin2lesion: String,
|
|
150
|
+
summary: FinalSummary,
|
|
143
151
|
}
|
|
144
152
|
|
|
145
153
|
// Define the top-level input structure
|
|
@@ -152,6 +160,7 @@ struct InputData {
|
|
|
152
160
|
#[serde(rename = "cnvOptions")]
|
|
153
161
|
cnv_options: Option<CnvOptions>,
|
|
154
162
|
chromosomes: Vec<String>,
|
|
163
|
+
max_record: usize,
|
|
155
164
|
}
|
|
156
165
|
|
|
157
166
|
// Configuration for different data types
|
|
@@ -744,10 +753,9 @@ async fn download_single_file(
|
|
|
744
753
|
))
|
|
745
754
|
}
|
|
746
755
|
|
|
747
|
-
///
|
|
756
|
+
/// Downloading from GDC
|
|
748
757
|
/// Outputs JSONL format: one JSON object per line
|
|
749
|
-
|
|
750
|
-
async fn download_data_streaming(
|
|
758
|
+
async fn download_data(
|
|
751
759
|
data4dl: HashMap<String, DataType>,
|
|
752
760
|
host: &str,
|
|
753
761
|
min_total_depth: i32,
|
|
@@ -759,6 +767,7 @@ async fn download_data_streaming(
|
|
|
759
767
|
seg_length: i32,
|
|
760
768
|
cnv_hyper_mutator: i32,
|
|
761
769
|
chromosomes: &HashSet<String>,
|
|
770
|
+
max_record: usize,
|
|
762
771
|
) {
|
|
763
772
|
let data_urls: Vec<(String, String, String)> = data4dl
|
|
764
773
|
.into_iter()
|
|
@@ -783,8 +792,11 @@ async fn download_data_streaming(
|
|
|
783
792
|
let filtered_cnv_records = Arc::new(AtomicUsize::new(0));
|
|
784
793
|
let filtered_records = Arc::new(Mutex::new(HashMap::<String, FilteredCaseDetails>::new()));
|
|
785
794
|
let hyper_mutator_records = Arc::new(Mutex::new(HashMap::<String, Vec<String>>::new()));
|
|
795
|
+
let excluded_by_max_record = Arc::new(Mutex::new(HashMap::<String, Vec<String>>::new()));
|
|
786
796
|
let included_maf_records = Arc::new(AtomicUsize::new(0));
|
|
787
797
|
let included_cnv_records = Arc::new(AtomicUsize::new(0));
|
|
798
|
+
let all_records = Arc::new(Mutex::new(Vec::<Vec<String>>::new()));
|
|
799
|
+
let data_count = Arc::new(AtomicUsize::new(0));
|
|
788
800
|
|
|
789
801
|
// Only collect errors (successful data is output immediately)
|
|
790
802
|
let errors = Arc::new(Mutex::new(Vec::<ErrorEntry>::new()));
|
|
@@ -807,9 +819,25 @@ async fn download_data_streaming(
|
|
|
807
819
|
let included_maf_records = Arc::clone(&included_maf_records);
|
|
808
820
|
let included_cnv_records = Arc::clone(&included_cnv_records);
|
|
809
821
|
let hyper_mutator_records = Arc::clone(&hyper_mutator_records);
|
|
822
|
+
let excluded_by_max_record = Arc::clone(&excluded_by_max_record);
|
|
810
823
|
let errors = Arc::clone(&errors);
|
|
824
|
+
let all_records = Arc::clone(&all_records);
|
|
825
|
+
let data_count = Arc::clone(&data_count);
|
|
811
826
|
|
|
812
827
|
async move {
|
|
828
|
+
let current_count = data_count.load(Ordering::Relaxed);
|
|
829
|
+
if current_count >= max_record {
|
|
830
|
+
// Skip processing and mark as excluded by max_record
|
|
831
|
+
if let Ok((case_id, data_type, _)) = download_result {
|
|
832
|
+
let mut exclud_max_record = excluded_by_max_record.lock().await;
|
|
833
|
+
exclud_max_record
|
|
834
|
+
.entry(data_type.to_string())
|
|
835
|
+
.or_insert_with(Vec::new)
|
|
836
|
+
.push(case_id.to_string());
|
|
837
|
+
successful_downloads.fetch_add(1, Ordering::Relaxed);
|
|
838
|
+
}
|
|
839
|
+
return;
|
|
840
|
+
}
|
|
813
841
|
match download_result {
|
|
814
842
|
Ok((case_id, data_type, content)) => {
|
|
815
843
|
// Try to parse the content
|
|
@@ -836,24 +864,18 @@ async fn download_data_streaming(
|
|
|
836
864
|
.await
|
|
837
865
|
{
|
|
838
866
|
Ok(parsed_data) => {
|
|
839
|
-
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
|
|
845
|
-
|
|
846
|
-
|
|
847
|
-
|
|
848
|
-
|
|
849
|
-
|
|
850
|
-
// Force flush to ensure Node.js sees it immediately
|
|
851
|
-
use std::io::Write;
|
|
852
|
-
let _ = std::io::stdout().flush();
|
|
853
|
-
// Optional: Add small delay to separate lines
|
|
854
|
-
sleep(Duration::from_millis(10));
|
|
867
|
+
let remaining = max_record - current_count;
|
|
868
|
+
if parsed_data.len() <= remaining {
|
|
869
|
+
data_count.fetch_add(parsed_data.len(), Ordering::Relaxed);
|
|
870
|
+
all_records.lock().await.extend(parsed_data);
|
|
871
|
+
} else {
|
|
872
|
+
// Skip file if it would exceed max_record
|
|
873
|
+
let mut exclud_max_record = excluded_by_max_record.lock().await;
|
|
874
|
+
exclud_max_record
|
|
875
|
+
.entry(data_type.to_string())
|
|
876
|
+
.or_insert_with(Vec::new)
|
|
877
|
+
.push(case_id.to_string());
|
|
855
878
|
}
|
|
856
|
-
|
|
857
879
|
successful_downloads.fetch_add(1, Ordering::Relaxed);
|
|
858
880
|
}
|
|
859
881
|
Err((cid, dtp, error)) => {
|
|
@@ -904,7 +926,6 @@ async fn download_data_streaming(
|
|
|
904
926
|
let included_cnv_count = included_cnv_records.load(Ordering::Relaxed);
|
|
905
927
|
|
|
906
928
|
let summary = FinalSummary {
|
|
907
|
-
output_type: "summary".to_string(),
|
|
908
929
|
total_files,
|
|
909
930
|
successful_files: success_count,
|
|
910
931
|
failed_files: failed_count,
|
|
@@ -916,10 +937,214 @@ async fn download_data_streaming(
|
|
|
916
937
|
included_maf_records: included_maf_count,
|
|
917
938
|
included_cnv_records: included_cnv_count,
|
|
918
939
|
hyper_mutator_records: hyper_mutator_records.lock().await.clone(),
|
|
940
|
+
excluded_by_max_record: excluded_by_max_record.lock().await.clone(),
|
|
919
941
|
};
|
|
920
942
|
|
|
943
|
+
let grin2lesion = serde_json::to_string(&all_records.lock().await.drain(..).collect::<Vec<Vec<String>>>())
|
|
944
|
+
.unwrap_or_else(|_| "[]".to_string());
|
|
945
|
+
let output = Output { grin2lesion, summary };
|
|
946
|
+
|
|
921
947
|
// Output final summary - Node.js will know processing is complete when it sees this
|
|
922
|
-
if let Ok(json) = serde_json::to_string(&summary) {
|
|
948
|
+
// if let Ok(json) = serde_json::to_string(&summary) {
|
|
949
|
+
if let Ok(json) = serde_json::to_string(&output) {
|
|
950
|
+
println!("{}", json);
|
|
951
|
+
use std::io::Write;
|
|
952
|
+
let _ = std::io::stdout().flush();
|
|
953
|
+
}
|
|
954
|
+
}
|
|
955
|
+
|
|
956
|
+
/// Read data from local file
|
|
957
|
+
async fn localread_data(
|
|
958
|
+
case_files: HashMap<String, DataType>,
|
|
959
|
+
min_total_depth: i32,
|
|
960
|
+
min_alt_allele_count: i32,
|
|
961
|
+
maf_hyper_mutator: i32,
|
|
962
|
+
consequences: &Option<Vec<String>>,
|
|
963
|
+
gain_threshold: f32,
|
|
964
|
+
loss_threshold: f32,
|
|
965
|
+
seg_length: i32,
|
|
966
|
+
cnv_hyper_mutator: i32,
|
|
967
|
+
chromosomes: &HashSet<String>,
|
|
968
|
+
max_record: usize,
|
|
969
|
+
) {
|
|
970
|
+
let data_files: Vec<(String, String, String)> = case_files
|
|
971
|
+
.into_iter()
|
|
972
|
+
.flat_map(|(case_id, data_types)| {
|
|
973
|
+
let mut files = Vec::new();
|
|
974
|
+
if let Some(cnv_file) = &data_types.cnv {
|
|
975
|
+
files.push((case_id.clone(), "cnv".to_string(), cnv_file.clone()));
|
|
976
|
+
}
|
|
977
|
+
if let Some(maf_file) = &data_types.maf {
|
|
978
|
+
files.push((case_id.clone(), "maf".to_string(), maf_file.clone()));
|
|
979
|
+
}
|
|
980
|
+
files
|
|
981
|
+
})
|
|
982
|
+
.collect();
|
|
983
|
+
let total_files = data_files.len();
|
|
984
|
+
|
|
985
|
+
// Counters for final summary
|
|
986
|
+
let successful_reads = Arc::new(AtomicUsize::new(0));
|
|
987
|
+
let failed_reads = Arc::new(AtomicUsize::new(0));
|
|
988
|
+
let filtered_maf_records = Arc::new(AtomicUsize::new(0));
|
|
989
|
+
let filtered_cnv_records = Arc::new(AtomicUsize::new(0));
|
|
990
|
+
let filtered_records = Arc::new(Mutex::new(HashMap::<String, FilteredCaseDetails>::new()));
|
|
991
|
+
let hyper_mutator_records = Arc::new(Mutex::new(HashMap::<String, Vec<String>>::new()));
|
|
992
|
+
let excluded_by_max_record = Arc::new(Mutex::new(HashMap::<String, Vec<String>>::new()));
|
|
993
|
+
let included_maf_records = Arc::new(AtomicUsize::new(0));
|
|
994
|
+
let included_cnv_records = Arc::new(AtomicUsize::new(0));
|
|
995
|
+
let errors = Arc::new(Mutex::new(Vec::<ErrorEntry>::new()));
|
|
996
|
+
let all_records = Arc::new(Mutex::new(Vec::<Vec<String>>::new()));
|
|
997
|
+
let data_count = Arc::new(AtomicUsize::new(0));
|
|
998
|
+
|
|
999
|
+
// Process files concurrently
|
|
1000
|
+
let read_futures = futures::stream::iter(data_files.into_iter().map(
|
|
1001
|
+
|(case_id, data_type, file_path)| async move {
|
|
1002
|
+
// read the local file
|
|
1003
|
+
match fs::read_to_string(&file_path) {
|
|
1004
|
+
Ok(content) => Ok((case_id, data_type, content)),
|
|
1005
|
+
Err(e) => Err((
|
|
1006
|
+
case_id,
|
|
1007
|
+
data_type,
|
|
1008
|
+
format!("file_read_error: {}", e),
|
|
1009
|
+
1, // Single attempt for local file readng
|
|
1010
|
+
)),
|
|
1011
|
+
}
|
|
1012
|
+
},
|
|
1013
|
+
));
|
|
1014
|
+
|
|
1015
|
+
// Process files and output results
|
|
1016
|
+
read_futures
|
|
1017
|
+
.buffer_unordered(3)
|
|
1018
|
+
.for_each(|read_result| {
|
|
1019
|
+
let successful_reads = Arc::clone(&successful_reads);
|
|
1020
|
+
let failed_reads = Arc::clone(&failed_reads);
|
|
1021
|
+
let filtered_maf_records = Arc::clone(&filtered_maf_records);
|
|
1022
|
+
let filtered_cnv_records = Arc::clone(&filtered_cnv_records);
|
|
1023
|
+
let filtered_records = Arc::clone(&filtered_records);
|
|
1024
|
+
let included_maf_records = Arc::clone(&included_maf_records);
|
|
1025
|
+
let included_cnv_records = Arc::clone(&included_cnv_records);
|
|
1026
|
+
let hyper_mutator_records = Arc::clone(&hyper_mutator_records);
|
|
1027
|
+
let excluded_by_max_record = Arc::clone(&excluded_by_max_record);
|
|
1028
|
+
let errors = Arc::clone(&errors);
|
|
1029
|
+
let all_records = Arc::clone(&all_records);
|
|
1030
|
+
let data_count = Arc::clone(&data_count);
|
|
1031
|
+
|
|
1032
|
+
async move {
|
|
1033
|
+
let current_count = data_count.load(Ordering::Relaxed);
|
|
1034
|
+
if current_count >= max_record {
|
|
1035
|
+
// Skip processing and mark as excluded by max_record
|
|
1036
|
+
if let Ok((case_id, data_type, _)) = read_result {
|
|
1037
|
+
let mut exclud_max_record = excluded_by_max_record.lock().await;
|
|
1038
|
+
exclud_max_record
|
|
1039
|
+
.entry(data_type.to_string())
|
|
1040
|
+
.or_insert_with(Vec::new)
|
|
1041
|
+
.push(case_id.to_string());
|
|
1042
|
+
successful_reads.fetch_add(1, Ordering::Relaxed);
|
|
1043
|
+
}
|
|
1044
|
+
return;
|
|
1045
|
+
}
|
|
1046
|
+
match read_result {
|
|
1047
|
+
Ok((case_id, data_type, content)) => {
|
|
1048
|
+
match parse_content(
|
|
1049
|
+
&content,
|
|
1050
|
+
&case_id,
|
|
1051
|
+
&data_type,
|
|
1052
|
+
min_total_depth,
|
|
1053
|
+
min_alt_allele_count,
|
|
1054
|
+
maf_hyper_mutator,
|
|
1055
|
+
consequences,
|
|
1056
|
+
gain_threshold,
|
|
1057
|
+
loss_threshold,
|
|
1058
|
+
seg_length,
|
|
1059
|
+
cnv_hyper_mutator,
|
|
1060
|
+
chromosomes,
|
|
1061
|
+
&filtered_records,
|
|
1062
|
+
&filtered_maf_records,
|
|
1063
|
+
&filtered_cnv_records,
|
|
1064
|
+
&included_maf_records,
|
|
1065
|
+
&included_cnv_records,
|
|
1066
|
+
&hyper_mutator_records,
|
|
1067
|
+
)
|
|
1068
|
+
.await
|
|
1069
|
+
{
|
|
1070
|
+
Ok(parsed_data) => {
|
|
1071
|
+
let remaining = max_record - current_count;
|
|
1072
|
+
if parsed_data.len() <= remaining {
|
|
1073
|
+
data_count.fetch_add(parsed_data.len(), Ordering::Relaxed);
|
|
1074
|
+
all_records.lock().await.extend(parsed_data);
|
|
1075
|
+
} else {
|
|
1076
|
+
// Skip file if it would exceed max_record
|
|
1077
|
+
let mut exclud_max_record = excluded_by_max_record.lock().await;
|
|
1078
|
+
exclud_max_record
|
|
1079
|
+
.entry(data_type.to_string())
|
|
1080
|
+
.or_insert_with(Vec::new)
|
|
1081
|
+
.push(case_id.to_string());
|
|
1082
|
+
}
|
|
1083
|
+
successful_reads.fetch_add(1, Ordering::Relaxed);
|
|
1084
|
+
}
|
|
1085
|
+
Err((cid, dtp, error)) => {
|
|
1086
|
+
failed_reads.fetch_add(1, Ordering::Relaxed);
|
|
1087
|
+
let error = ErrorEntry {
|
|
1088
|
+
case_id: cid,
|
|
1089
|
+
data_type: dtp,
|
|
1090
|
+
error_type: "parsing_error".to_string(),
|
|
1091
|
+
error_details: error,
|
|
1092
|
+
attempts_made: 1,
|
|
1093
|
+
};
|
|
1094
|
+
errors.lock().await.push(error);
|
|
1095
|
+
}
|
|
1096
|
+
}
|
|
1097
|
+
}
|
|
1098
|
+
Err((case_id, data_type, error_details, attempts)) => {
|
|
1099
|
+
failed_reads.fetch_add(1, Ordering::Relaxed);
|
|
1100
|
+
let (error_type, clean_details) = if error_details.contains(":") {
|
|
1101
|
+
let parts: Vec<&str> = error_details.splitn(2, ": ").collect();
|
|
1102
|
+
(parts[0].to_string(), parts[1].to_string())
|
|
1103
|
+
} else {
|
|
1104
|
+
("unknown_error".to_string(), error_details)
|
|
1105
|
+
};
|
|
1106
|
+
let error = ErrorEntry {
|
|
1107
|
+
case_id,
|
|
1108
|
+
data_type,
|
|
1109
|
+
error_type,
|
|
1110
|
+
error_details: clean_details,
|
|
1111
|
+
attempts_made: attempts,
|
|
1112
|
+
};
|
|
1113
|
+
errors.lock().await.push(error);
|
|
1114
|
+
}
|
|
1115
|
+
}
|
|
1116
|
+
}
|
|
1117
|
+
})
|
|
1118
|
+
.await;
|
|
1119
|
+
// Output final summary as the last line
|
|
1120
|
+
let success_count = successful_reads.load(Ordering::Relaxed);
|
|
1121
|
+
let failed_count = failed_reads.load(Ordering::Relaxed);
|
|
1122
|
+
let filtered_maf_count = filtered_maf_records.load(Ordering::Relaxed);
|
|
1123
|
+
let filtered_cnv_count = filtered_cnv_records.load(Ordering::Relaxed);
|
|
1124
|
+
let included_maf_count = included_maf_records.load(Ordering::Relaxed);
|
|
1125
|
+
let included_cnv_count = included_cnv_records.load(Ordering::Relaxed);
|
|
1126
|
+
|
|
1127
|
+
let summary = FinalSummary {
|
|
1128
|
+
total_files,
|
|
1129
|
+
successful_files: success_count,
|
|
1130
|
+
failed_files: failed_count,
|
|
1131
|
+
errors: errors.lock().await.clone(),
|
|
1132
|
+
filtered_records: filtered_maf_count + filtered_cnv_count,
|
|
1133
|
+
filtered_maf_records: filtered_maf_count,
|
|
1134
|
+
filtered_cnv_records: filtered_cnv_count,
|
|
1135
|
+
filtered_records_by_case: filtered_records.lock().await.clone(),
|
|
1136
|
+
included_maf_records: included_maf_count,
|
|
1137
|
+
included_cnv_records: included_cnv_count,
|
|
1138
|
+
hyper_mutator_records: hyper_mutator_records.lock().await.clone(),
|
|
1139
|
+
excluded_by_max_record: excluded_by_max_record.lock().await.clone(),
|
|
1140
|
+
};
|
|
1141
|
+
|
|
1142
|
+
let grin2lesion = serde_json::to_string(&all_records.lock().await.drain(..).collect::<Vec<Vec<String>>>())
|
|
1143
|
+
.unwrap_or_else(|_| "[]".to_string());
|
|
1144
|
+
let output = Output { grin2lesion, summary };
|
|
1145
|
+
|
|
1146
|
+
// Output final JSON array
|
|
1147
|
+
if let Ok(json) = serde_json::to_string(&output) {
|
|
923
1148
|
println!("{}", json);
|
|
924
1149
|
use std::io::Write;
|
|
925
1150
|
let _ = std::io::stdout().flush();
|
|
@@ -928,6 +1153,9 @@ async fn download_data_streaming(
|
|
|
928
1153
|
|
|
929
1154
|
#[tokio::main]
|
|
930
1155
|
async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
1156
|
+
let args: Vec<String> = env::args().collect();
|
|
1157
|
+
let from_file = args.contains(&"--from-file".to_string());
|
|
1158
|
+
|
|
931
1159
|
const HOST: &str = "https://api.gdc.cancer.gov/data/";
|
|
932
1160
|
|
|
933
1161
|
// Read input with timeout
|
|
@@ -967,6 +1195,7 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
967
1195
|
}
|
|
968
1196
|
|
|
969
1197
|
let case_files = input_js.case_files;
|
|
1198
|
+
let max_record: usize = input_js.max_record;
|
|
970
1199
|
|
|
971
1200
|
// Set default maf_options
|
|
972
1201
|
let (min_total_depth, min_alt_allele_count, maf_hyper_mutator, consequences) = match input_js.maf_options {
|
|
@@ -993,21 +1222,39 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
|
|
|
993
1222
|
// Convert Vec<String> to HashSet<String> for faster lookup
|
|
994
1223
|
let chromosomes = input_js.chromosomes.into_iter().collect::<HashSet<String>>();
|
|
995
1224
|
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1000
|
-
|
|
1001
|
-
|
|
1002
|
-
|
|
1003
|
-
|
|
1004
|
-
|
|
1005
|
-
|
|
1006
|
-
|
|
1007
|
-
|
|
1008
|
-
|
|
1009
|
-
|
|
1010
|
-
|
|
1225
|
+
if from_file {
|
|
1226
|
+
localread_data(
|
|
1227
|
+
case_files,
|
|
1228
|
+
min_total_depth,
|
|
1229
|
+
min_alt_allele_count,
|
|
1230
|
+
maf_hyper_mutator,
|
|
1231
|
+
&consequences,
|
|
1232
|
+
gain_threshold,
|
|
1233
|
+
loss_threshold,
|
|
1234
|
+
seg_length,
|
|
1235
|
+
cnv_hyper_mutator,
|
|
1236
|
+
&chromosomes,
|
|
1237
|
+
max_record,
|
|
1238
|
+
)
|
|
1239
|
+
.await;
|
|
1240
|
+
} else {
|
|
1241
|
+
// Download data from GDC- this will now handle errors gracefully
|
|
1242
|
+
download_data(
|
|
1243
|
+
case_files,
|
|
1244
|
+
HOST,
|
|
1245
|
+
min_total_depth,
|
|
1246
|
+
min_alt_allele_count,
|
|
1247
|
+
maf_hyper_mutator,
|
|
1248
|
+
&consequences,
|
|
1249
|
+
gain_threshold,
|
|
1250
|
+
loss_threshold,
|
|
1251
|
+
seg_length,
|
|
1252
|
+
cnv_hyper_mutator,
|
|
1253
|
+
&chromosomes,
|
|
1254
|
+
max_record,
|
|
1255
|
+
)
|
|
1256
|
+
.await;
|
|
1257
|
+
}
|
|
1011
1258
|
|
|
1012
1259
|
// Always exit successfully - individual file failures are logged but don't stop the process
|
|
1013
1260
|
Ok(())
|