@sjcrh/proteinpaint-rust 2.129.2 → 2.129.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/gdcGRIN2.rs CHANGED
@@ -13,7 +13,7 @@
13
13
  Output mutations as JSON array.
14
14
 
15
15
  Example of usage:
16
- echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}},"mafOptions": {"minTotalDepth": 10,"minAltAlleleCount": 2}}' | ./target/release/gdcGRIN2
16
+ echo '{"caseFiles": {"MP2PRT-PATFJE": {"maf": "26ea7b6f-8bc4-4e83-ace1-2125b493a361"},"MP2PRT-PAPIGD": {"maf": "653d7458-f4af-4328-a1ce-3bbf22a2e347"}, "TCGA-CG-4300": { "cnv":"46372ec2-ff79-4d07-b375-9ba8a12c11f3", "maf":"c09b208d-2e7b-4116-9580-27f20f4c7e67"}},"mafOptions": {"minTotalDepth": 100,"minAltAlleleCount": 20}, "cnvOptions":{"lossThreshold":-1, "gainThreshold": 1.5, "segLength":2000000}}' | ./target/release/gdcGRIN2
17
17
  */
18
18
 
19
19
  use flate2::read::GzDecoder;
@@ -40,21 +40,6 @@ struct ErrorEntry {
40
40
  attempts_made: u32,
41
41
  }
42
42
 
43
- // Struct for the final output that includes both successful data and errors
44
- #[derive(serde::Serialize)]
45
- struct GdcOutput {
46
- successful_data: Vec<Vec<Vec<String>>>, // Array of successful file data arrays
47
- failed_files: Vec<ErrorEntry>,
48
- summary: OutputSummary,
49
- }
50
-
51
- #[derive(serde::Serialize)]
52
- struct OutputSummary {
53
- total_files: usize,
54
- successful_files: usize,
55
- failed_files: usize,
56
- }
57
-
58
43
  // Define the structure for datadd
59
44
  #[derive(Deserialize, Debug)]
60
45
  struct DataType {
@@ -69,6 +54,39 @@ struct MafOptions {
69
54
  min_total_depth: i32,
70
55
  #[serde(rename = "minAltAlleleCount")]
71
56
  min_alt_allele_count: i32,
57
+ consequences: Option<Vec<String>>, // Optional list of consequences to filter MAF files
58
+ }
59
+
60
+ // Define the structure for cnvOptions
61
+ #[derive(Deserialize, Debug)]
62
+ struct CnvOptions {
63
+ #[serde(rename = "lossThreshold")]
64
+ loss_threshold: f32,
65
+ #[serde(rename = "gainThreshold")]
66
+ gain_threshold: f32,
67
+ #[serde(rename = "segLength")]
68
+ seg_length: i32,
69
+ }
70
+
71
+ // Individual successful file output (JSONL format)
72
+ #[derive(serde::Serialize)]
73
+ struct SuccessfulFileOutput {
74
+ #[serde(rename = "type")]
75
+ output_type: String, // Always "data"
76
+ case_id: String,
77
+ data_type: String,
78
+ data: Vec<Vec<String>>,
79
+ }
80
+
81
+ // Final summary output (JSONL format)
82
+ #[derive(serde::Serialize)]
83
+ struct FinalSummary {
84
+ #[serde(rename = "type")]
85
+ output_type: String, // Always "summary"
86
+ total_files: usize,
87
+ successful_files: usize,
88
+ failed_files: usize,
89
+ errors: Vec<ErrorEntry>,
72
90
  }
73
91
 
74
92
  // Define the top-level input structure
@@ -78,104 +96,114 @@ struct InputData {
78
96
  case_files: HashMap<String, DataType>,
79
97
  #[serde(rename = "mafOptions")]
80
98
  maf_options: Option<MafOptions>,
99
+ #[serde(rename = "cnvOptions")]
100
+ cnv_options: Option<CnvOptions>,
101
+ }
102
+
103
+ // Configuration for different data types
104
+ #[derive(Deserialize, Debug)]
105
+ struct DataTypeConfig {
106
+ header_marker: &'static str,
107
+ output_columns: Vec<&'static str>,
108
+ }
109
+
110
+ // Function to check if CNV file has Segment_Mean column
111
+ fn has_segment_mean_column(content: &str) -> bool {
112
+ for line in content.lines() {
113
+ // Check if this line contains Segment_Mean (likely the header)
114
+ if line.contains("Segment_Mean") {
115
+ return true;
116
+ }
117
+ // Stop checking after a few non-comment lines to avoid parsing entire file
118
+ if !line.trim().is_empty() {
119
+ break;
120
+ }
121
+ }
122
+ false
81
123
  }
82
124
 
83
125
  // Function to parse TSV content
126
+ // Updated parse_content function with better consequence filtering
84
127
  fn parse_content(
85
128
  content: &str,
86
129
  case_id: &str,
87
130
  data_type: &str,
88
131
  min_total_depth: i32,
89
132
  min_alt_allele_count: i32,
133
+ consequences: &Option<Vec<String>>,
134
+ gain_threshold: f32,
135
+ loss_threshold: f32,
136
+ seg_length: i32,
90
137
  ) -> Result<Vec<Vec<String>>, (String, String, String)> {
138
+ // Early filter for CNV files - only process files with Segment_Mean
139
+ if data_type == "cnv" && !has_segment_mean_column(content) {
140
+ return Ok(Vec::new()); // Return empty result, no error
141
+ }
142
+
143
+ let config = match data_type {
144
+ "cnv" => DataTypeConfig {
145
+ header_marker: "Segment_Mean",
146
+ output_columns: vec!["Chromosome", "Start", "End", "Segment_Mean"],
147
+ },
148
+ "maf" => DataTypeConfig {
149
+ header_marker: "Hugo_Symbol",
150
+ output_columns: vec!["Chromosome", "Start_Position", "End_Position", "t_depth", "t_alt_count"],
151
+ },
152
+ _ => {
153
+ return Err((
154
+ case_id.to_string(),
155
+ data_type.to_string(),
156
+ "Invalid data type".to_string(),
157
+ ));
158
+ }
159
+ };
160
+
91
161
  let lines = content.lines();
92
162
  let mut parsed_data = Vec::new();
93
163
  let mut columns_indices: Vec<usize> = Vec::new();
94
- let mut header_mk: &str = "";
95
- let mut columns = Vec::new();
96
-
97
- if data_type == "cnv" {
98
- header_mk = "GDC_Aliquot_ID";
99
- columns = vec!["Chromosome", "Start", "End", "Segment_Mean"]
100
- } else if data_type == "maf" {
101
- header_mk = "Hugo_Symbol";
102
- columns = vec!["Chromosome", "Start_Position", "End_Position", "t_depth", "t_alt_count"]
103
- };
164
+ let mut variant_classification_index: Option<usize> = None;
165
+ //let mut header_mk: &str = "";
166
+ //let mut columns = Vec::new();
104
167
 
105
168
  let mut header: Vec<String> = Vec::new();
106
169
 
107
170
  for line in lines {
108
171
  if line.starts_with("#") {
109
172
  continue;
110
- } else if line.contains(&header_mk) {
173
+ };
174
+ if line.contains(config.header_marker) {
111
175
  header = line.split("\t").map(|s| s.to_string()).collect();
112
- for col in &columns {
113
- match header.iter().position(|x| x == col) {
114
- Some(index) => {
115
- columns_indices.push(index);
116
- }
117
- None => {
118
- let error_msg = format!("Column {} was not found", col);
119
- return Err((case_id.to_string(), data_type.to_string(), error_msg));
120
- }
121
- }
122
- }
123
- } else {
124
- let mut keep_ck: bool = true;
125
- let cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
126
- let mut out_lst: Vec<String> = Vec::new();
127
- out_lst.push(case_id.to_string());
128
-
129
- for x in columns_indices.iter() {
130
- let mut element = cont_lst[*x].to_string();
131
-
132
- if data_type == "cnv" && &header[*x] == "Segment_Mean" {
133
- let seg_mean = match element.parse::<f32>() {
134
- Ok(val) => val,
135
- Err(_e) => {
136
- let error_msg = "Segment_Mean in cnv file is not float".to_string();
137
- return Err((case_id.to_string(), data_type.to_string(), error_msg));
138
- }
139
- };
140
- if seg_mean >= 0.3 {
141
- element = "gain".to_string();
142
- } else if seg_mean <= -0.4 {
143
- element = "loss".to_string();
144
- } else {
145
- keep_ck = false;
146
- }
147
- }
148
- out_lst.push(element);
149
- }
150
-
151
- if data_type == "maf" {
152
- let alle_depth = match out_lst[4].parse::<i32>() {
153
- Ok(value) => value,
154
- Err(_) => {
155
- let error_msg = "Failed to convert t_depth to i32.".to_string();
156
- return Err((case_id.to_string(), data_type.to_string(), error_msg));
157
- }
158
- };
159
- let alt_count = match out_lst[5].parse::<i32>() {
160
- Ok(value) => value,
161
- Err(_) => {
162
- let error_msg = "Failed to convert t_alt_count to i32.".to_string();
163
- return Err((case_id.to_string(), data_type.to_string(), error_msg));
164
- }
165
- };
166
-
167
- if alle_depth >= min_total_depth && alt_count >= min_alt_allele_count {
168
- out_lst = out_lst[0..4].to_vec();
169
- out_lst.push("mutation".to_string());
170
- } else {
171
- keep_ck = false;
172
- }
176
+ if let Err(err) = setup_columns(
177
+ &header,
178
+ &config,
179
+ &mut columns_indices,
180
+ &mut variant_classification_index,
181
+ case_id,
182
+ data_type,
183
+ ) {
184
+ return Err(err);
173
185
  }
186
+ continue;
187
+ };
174
188
 
175
- if keep_ck {
176
- parsed_data.push(out_lst);
177
- }
178
- }
189
+ let row = process_row(
190
+ line,
191
+ case_id,
192
+ data_type,
193
+ &header,
194
+ &columns_indices,
195
+ variant_classification_index,
196
+ consequences,
197
+ min_total_depth,
198
+ min_alt_allele_count,
199
+ gain_threshold,
200
+ loss_threshold,
201
+ seg_length,
202
+ )?;
203
+
204
+ if let Some(out_lst) = row {
205
+ parsed_data.push(out_lst);
206
+ };
179
207
  }
180
208
 
181
209
  if columns_indices.is_empty() {
@@ -189,6 +217,204 @@ fn parse_content(
189
217
  Ok(parsed_data)
190
218
  }
191
219
 
220
+ // Set up column indices for processing
221
+ fn setup_columns(
222
+ header: &[String],
223
+ config: &DataTypeConfig,
224
+ columns_indices: &mut Vec<usize>,
225
+ variant_classification_index: &mut Option<usize>,
226
+ case_id: &str,
227
+ data_type: &str,
228
+ ) -> Result<(), (String, String, String)> {
229
+ for col in &config.output_columns {
230
+ match header.iter().position(|x| x == col) {
231
+ Some(index) => columns_indices.push(index),
232
+ None => {
233
+ return Err((
234
+ case_id.to_string(),
235
+ data_type.to_string(),
236
+ format!("Column {} was not found", col),
237
+ ));
238
+ }
239
+ }
240
+ }
241
+
242
+ if data_type == "maf" {
243
+ *variant_classification_index = header.iter().position(|x| x == "Variant_Classification");
244
+ if variant_classification_index.is_none() {
245
+ return Err((
246
+ case_id.to_string(),
247
+ data_type.to_string(),
248
+ "Column Variant_Classification was not found".to_string(),
249
+ ));
250
+ }
251
+ }
252
+
253
+ Ok(())
254
+ }
255
+
256
+ // Process a single row of data
257
+ fn process_row(
258
+ line: &str,
259
+ case_id: &str,
260
+ data_type: &str,
261
+ header: &[String],
262
+ columns_indices: &[usize],
263
+ variant_classification_index: Option<usize>,
264
+ consequences: &Option<Vec<String>>,
265
+ min_total_depth: i32,
266
+ min_alt_allele_count: i32,
267
+ gain_threshold: f32,
268
+ loss_threshold: f32,
269
+ seg_length: i32,
270
+ ) -> Result<Option<Vec<String>>, (String, String, String)> {
271
+ let cont_lst: Vec<String> = line.split("\t").map(|s| s.to_string()).collect();
272
+ let mut out_lst = vec![case_id.to_string()];
273
+
274
+ // Check consequence filtering for MAF files
275
+ if data_type == "maf" && !is_valid_consequence(&cont_lst, variant_classification_index, consequences) {
276
+ return Ok(None);
277
+ }
278
+
279
+ // Extract relevant columns
280
+ for &x in columns_indices {
281
+ if x >= cont_lst.len() {
282
+ return Ok(None); // Invalid row
283
+ }
284
+
285
+ let mut element = cont_lst[x].to_string();
286
+ if data_type == "cnv" && header[x] == "Segment_Mean" {
287
+ element = process_segment_mean(&element, case_id, data_type, gain_threshold, loss_threshold)?;
288
+ if element.is_empty() {
289
+ return Ok(None);
290
+ }
291
+ }
292
+ out_lst.push(element);
293
+ }
294
+
295
+ // Additional MAF-specific processing
296
+ if data_type == "maf" {
297
+ if out_lst.len() < 6 {
298
+ return Ok(None); // Not enough columns
299
+ }
300
+
301
+ let alle_depth = out_lst[4].parse::<i32>().map_err(|_| {
302
+ (
303
+ case_id.to_string(),
304
+ data_type.to_string(),
305
+ "Failed to convert t_depth to integer.".to_string(),
306
+ )
307
+ })?;
308
+
309
+ let alt_count = out_lst[5].parse::<i32>().map_err(|_| {
310
+ (
311
+ case_id.to_string(),
312
+ data_type.to_string(),
313
+ "Failed to convert t_alt_count to integer.".to_string(),
314
+ )
315
+ })?;
316
+
317
+ if alle_depth < min_total_depth || alt_count < min_alt_allele_count {
318
+ return Ok(None);
319
+ }
320
+
321
+ // Keep case_id, chr, start, end, and add "mutation"
322
+ out_lst = out_lst[0..4].to_vec();
323
+ out_lst.push("mutation".to_string());
324
+ }
325
+
326
+ // filter cnvs based on segment length. Default: 2000000
327
+ if data_type == "cnv" {
328
+ // calculate segment length (End_Position - Start_Position)
329
+ let end_position = out_lst[3].parse::<i32>().map_err(|_| {
330
+ (
331
+ case_id.to_string(),
332
+ data_type.to_string(),
333
+ "Failed to convert End Position of cnv to integer.".to_string(),
334
+ )
335
+ })?;
336
+
337
+ let start_position = out_lst[2].parse::<i32>().map_err(|_| {
338
+ (
339
+ case_id.to_string(),
340
+ data_type.to_string(),
341
+ "Failed to convert Start Position of cnv to integer.".to_string(),
342
+ )
343
+ })?;
344
+ let cnv_length = end_position - start_position;
345
+ if cnv_length > seg_length {
346
+ return Ok(None);
347
+ }
348
+ }
349
+
350
+ Ok(Some(out_lst))
351
+ }
352
+
353
+ // Check if the row meets consequence filtering criteria
354
+ fn is_valid_consequence(
355
+ cont_lst: &[String],
356
+ variant_classification_index: Option<usize>,
357
+ consequences: &Option<Vec<String>>,
358
+ ) -> bool {
359
+ if let Some(consequence_filter) = consequences {
360
+ if !consequence_filter.is_empty() {
361
+ if let Some(var_class_idx) = variant_classification_index {
362
+ if var_class_idx < cont_lst.len() {
363
+ let variant_classification = &cont_lst[var_class_idx];
364
+ if let Some(normalized_consequence) = normalize_consequence(variant_classification) {
365
+ return consequence_filter.contains(&normalized_consequence);
366
+ }
367
+ }
368
+ return false; // Invalid row or unknown consequence
369
+ }
370
+ }
371
+ }
372
+ true // No filtering or empty filter
373
+ }
374
+
375
+ // Process Segment_Mean for CNV files
376
+ fn process_segment_mean(
377
+ element: &str,
378
+ case_id: &str,
379
+ data_type: &str,
380
+ gain_threshold: f32,
381
+ loss_threshold: f32,
382
+ ) -> Result<String, (String, String, String)> {
383
+ let seg_mean = element.parse::<f32>().map_err(|_| {
384
+ (
385
+ case_id.to_string(),
386
+ data_type.to_string(),
387
+ "Segment_Mean in cnv file is not float".to_string(),
388
+ )
389
+ })?;
390
+
391
+ if seg_mean >= gain_threshold {
392
+ Ok("gain".to_string())
393
+ } else if seg_mean <= loss_threshold {
394
+ Ok("loss".to_string())
395
+ } else {
396
+ Ok(String::new())
397
+ }
398
+ }
399
+
400
+ /// Updated helper function to normalize MAF consequence types to frontend format
401
+ /// Returns None for unknown consequence types (which will be filtered out)
402
+ fn normalize_consequence(maf_consequence: &str) -> Option<String> {
403
+ match maf_consequence.to_lowercase().as_str() {
404
+ // Only map the consequence types we actually support
405
+ "missense_mutation" => Some("missense".to_string()),
406
+ "nonsense_mutation" | "stop_gained" | "stop_lost" => Some("nonsense".to_string()),
407
+ "frame_shift_del" | "frame_shift_ins" | "frameshift_variant" => Some("frameshift".to_string()),
408
+ "silent" | "synonymous_variant" => Some("silent".to_string()),
409
+ "in_frame_del" => Some("deletion".to_string()),
410
+ "in_frame_ins" => Some("insertion".to_string()),
411
+ "splice_site" | "splice_acceptor_variant" | "splice_donor_variant" => Some("splice_site".to_string()),
412
+ "tandem_duplication" | "duplication" => Some("duplication".to_string()),
413
+ "inversion" => Some("inversion".to_string()),
414
+ // Return None for all unknown consequence types - they will be filtered out
415
+ _ => None,
416
+ }
417
+ }
192
418
  /// Downloads a single file with minimal retry logic for transient failures
193
419
  async fn download_single_file(
194
420
  case_id: String,
@@ -291,14 +517,19 @@ async fn download_single_file(
291
517
  ))
292
518
  }
293
519
 
294
- /// Main download function with structured JSON output including errors
295
- async fn download_data(
520
+ /// NEW: Phase 1 streaming download function
521
+ /// Outputs JSONL format: one JSON object per line
522
+ /// Node.js will read this line-by-line but still wait for completion
523
+ async fn download_data_streaming(
296
524
  data4dl: HashMap<String, DataType>,
297
525
  host: &str,
298
526
  min_total_depth: i32,
299
527
  min_alt_allele_count: i32,
528
+ consequences: &Option<Vec<String>>,
529
+ gain_threshold: f32,
530
+ loss_threshold: f32,
531
+ seg_length: i32,
300
532
  ) {
301
- // Generate URLs from data4dl, handling optional cnv and maf
302
533
  let data_urls: Vec<(String, String, String)> = data4dl
303
534
  .into_iter()
304
535
  .flat_map(|(case_id, data_types)| {
@@ -315,42 +546,63 @@ async fn download_data(
315
546
 
316
547
  let total_files = data_urls.len();
317
548
 
318
- // Use atomic counters that can be safely shared across async closures
549
+ // Counters for final summary
319
550
  let successful_downloads = Arc::new(AtomicUsize::new(0));
320
551
  let failed_downloads = Arc::new(AtomicUsize::new(0));
321
552
 
322
- // Create shared vectors to collect successful data and errors
323
- let successful_data = Arc::new(Mutex::new(Vec::<Vec<Vec<String>>>::new()));
553
+ // Only collect errors (successful data is output immediately)
324
554
  let errors = Arc::new(Mutex::new(Vec::<ErrorEntry>::new()));
325
555
 
326
- // Create download futures with smart retry logic
327
- let download_futures = futures::stream::iter(data_urls.into_iter().map(|(case_id, data_type, url)| {
328
- async move {
329
- // Try each file up to 2 times for transient failures
330
- download_single_file(case_id, data_type, url, 2).await
331
- }
332
- }));
556
+ let download_futures = futures::stream::iter(
557
+ data_urls
558
+ .into_iter()
559
+ .map(|(case_id, data_type, url)| async move { download_single_file(case_id, data_type, url, 2).await }),
560
+ );
333
561
 
334
- // Execute downloads concurrently with high concurrency for speed
562
+ // Process downloads and output results immediately as JSONL
335
563
  download_futures
336
- .buffer_unordered(15) // Increased to 15 concurrent downloads for speed
564
+ .buffer_unordered(20) // Increased concurrency for better performance
337
565
  .for_each(|download_result| {
338
566
  let successful_downloads = Arc::clone(&successful_downloads);
339
567
  let failed_downloads = Arc::clone(&failed_downloads);
340
- let successful_data = Arc::clone(&successful_data);
341
568
  let errors = Arc::clone(&errors);
342
569
 
343
570
  async move {
344
571
  match download_result {
345
572
  Ok((case_id, data_type, content)) => {
346
- // Successfully downloaded, now try to parse
347
- match parse_content(&content, &case_id, &data_type, min_total_depth, min_alt_allele_count) {
573
+ // Try to parse the content
574
+ match parse_content(
575
+ &content,
576
+ &case_id,
577
+ &data_type,
578
+ min_total_depth,
579
+ min_alt_allele_count,
580
+ &consequences,
581
+ gain_threshold,
582
+ loss_threshold,
583
+ seg_length,
584
+ ) {
348
585
  Ok(parsed_data) => {
349
- // Store successful data
350
- successful_data.lock().await.push(parsed_data);
586
+ // SUCCESS: Output immediately as JSONL
587
+ let success_output = SuccessfulFileOutput {
588
+ output_type: "data".to_string(),
589
+ case_id: case_id.clone(),
590
+ data_type: data_type.clone(),
591
+ data: parsed_data,
592
+ };
593
+
594
+ // Output this successful result immediately - Node.js will see this in real-time
595
+ if let Ok(json) = serde_json::to_string(&success_output) {
596
+ println!("{}", json); // IMMEDIATE output to stdout
597
+ // Force flush to ensure Node.js sees it immediately
598
+ use std::io::Write;
599
+ let _ = std::io::stdout().flush();
600
+ }
601
+
351
602
  successful_downloads.fetch_add(1, Ordering::Relaxed);
352
603
  }
353
604
  Err((cid, dtp, error)) => {
605
+ // Parsing failed - add to errors
354
606
  failed_downloads.fetch_add(1, Ordering::Relaxed);
355
607
  let error = ErrorEntry {
356
608
  case_id: cid,
@@ -364,9 +616,9 @@ async fn download_data(
364
616
  }
365
617
  }
366
618
  Err((case_id, data_type, error_details, attempts)) => {
619
+ // Download failed - add to errors
367
620
  failed_downloads.fetch_add(1, Ordering::Relaxed);
368
621
 
369
- // Parse error type from error details
370
622
  let (error_type, clean_details) = if error_details.contains(":") {
371
623
  let parts: Vec<&str> = error_details.splitn(2, ": ").collect();
372
624
  (parts[0].to_string(), parts[1].to_string())
@@ -388,27 +640,23 @@ async fn download_data(
388
640
  })
389
641
  .await;
390
642
 
391
- // Create final output structure
643
+ // Output final summary as the last line
392
644
  let success_count = successful_downloads.load(Ordering::Relaxed);
393
645
  let failed_count = failed_downloads.load(Ordering::Relaxed);
394
646
 
395
- let output = GdcOutput {
396
- successful_data: successful_data.lock().await.clone(),
397
- failed_files: errors.lock().await.clone(),
398
- summary: OutputSummary {
399
- total_files,
400
- successful_files: success_count,
401
- failed_files: failed_count,
402
- },
647
+ let summary = FinalSummary {
648
+ output_type: "summary".to_string(),
649
+ total_files,
650
+ successful_files: success_count,
651
+ failed_files: failed_count,
652
+ errors: errors.lock().await.clone(),
403
653
  };
404
654
 
405
- // Output the complete structure as JSON
406
- match serde_json::to_string(&output) {
407
- Ok(json) => println!("{}", json),
408
- Err(_) => {
409
- // Silent failure - exit without stderr
410
- std::process::exit(1);
411
- }
655
+ // Output final summary - Node.js will know processing is complete when it sees this
656
+ if let Ok(json) = serde_json::to_string(&summary) {
657
+ println!("{}", json);
658
+ use std::io::Write;
659
+ let _ = std::io::stdout().flush();
412
660
  }
413
661
  }
414
662
 
@@ -455,13 +703,34 @@ async fn main() -> Result<(), Box<dyn std::error::Error>> {
455
703
  let case_files = input_js.case_files;
456
704
 
457
705
  // Set default maf_options
458
- let (min_total_depth, min_alt_allele_count) = match input_js.maf_options {
459
- Some(options) => (options.min_total_depth, options.min_alt_allele_count),
460
- None => (10, 2), // Default values
706
+ let (min_total_depth, min_alt_allele_count, consequences) = match input_js.maf_options {
707
+ Some(options) => (
708
+ options.min_total_depth,
709
+ options.min_alt_allele_count,
710
+ options.consequences.clone(),
711
+ ),
712
+ None => (10, 2, None), // Default values
713
+ };
714
+
715
+ // Set default cnv_options
716
+ let (gain_threshold, loss_threshold, seg_length) = match input_js.cnv_options {
717
+ Some(options) => (options.gain_threshold, options.loss_threshold, options.seg_length),
718
+ None => (0.3, -0.4, 2000000), // Default values
461
719
  };
462
720
 
463
721
  // Download data - this will now handle errors gracefully
464
- download_data(case_files, HOST, min_total_depth, min_alt_allele_count).await;
722
+ // download_data(case_files, HOST, min_total_depth, min_alt_allele_count, &consequences).await;
723
+ download_data_streaming(
724
+ case_files,
725
+ HOST,
726
+ min_total_depth,
727
+ min_alt_allele_count,
728
+ &consequences,
729
+ gain_threshold,
730
+ loss_threshold,
731
+ seg_length,
732
+ )
733
+ .await;
465
734
 
466
735
  // Always exit successfully - individual file failures are logged but don't stop the process
467
736
  Ok(())