viral_seq 1.2.5 → 1.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5fef5f765c741aa14885673a2f980726956ae341a39926929d10ac0c7b4a6ece
4
- data.tar.gz: d1e0cb4b691d4aff657209f5f87f02653421dc1bf9ba6dd4f87e43320ad143d5
3
+ metadata.gz: 2ccdd3f318deb458540b703ae00afcbdd195a92145c22dfb6c95ad14ac366146
4
+ data.tar.gz: 4f131cd69fef8548c1a466c2e23be008b08bed50ca78ef12195e4d4b39e052eb
5
5
  SHA512:
6
- metadata.gz: 98b18297e15a5fb0eff8706029dcfdecdc5c39df28b6ae3ea8fe5b7611f63f91276f7ed5e459a478b1af25ec25662804290decc923b1c52f946a184d197807be
7
- data.tar.gz: 7e359a05ff783971beced635cb2d913c4a9b418391425eba5ce9344b0137b989ea007d1ed5dc07a677a1775d9c73055aad552e971a89ca2207156db5233b243f
6
+ metadata.gz: 4cb44299c0061794b8c32c23c850cd9c53eb9245d7124ed75d35367225e600e2c630df9853d13a8003a8a1c6c793edea58a188871f328866043132585846e825
7
+ data.tar.gz: 92594266725b7691c5203cd478e84a0fea7fc6d08124efad6311a4e2bdcc7dbd0d6218a54e5b63d19e3c3a8ad789c49906388bac7ff59611bc5810e1c614d8ef
data/README.md CHANGED
@@ -179,11 +179,32 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
179
179
 
180
180
  ## Updates
181
181
 
182
+ ### Version 1.2.9-08022021
183
+
184
+ 1. Fixed a bug when reading the input primer sequences in lowercases.
185
+ 2. Fix a bug in the method ViralSeq::Math::RandomGaussian
186
+
187
+ ### Version 1.2.8-07292021
188
+
189
+ 1. Fixed an issue when reading .fastq files containing blank_lines.
190
+
191
+ ### Version 1.2.7-07152021
192
+
193
+ 1. Optimzed the workflow of the `tcs` pipeline on raw data with uneven lengths.
194
+ `tcs` version to v2.3.6.
195
+
196
+
197
+ ### Version 1.2.6-07122021
198
+
199
+ 1. Optimized the workflow of the `tcs` pipeline in the "end-join/QC/Trimming" section.
200
+ `tcs` version to v2.3.5.
201
+
202
+
182
203
  ### Version 1.2.5-06232021
183
204
 
184
205
  1. Add error rescue and report in the `tcs` pipeline.
185
206
  error messages are stored in the .tcs_error file. `tcs` pipeline updated to v2.3.4.
186
- 2. Use simple majority for the consensus cut-off in the default setting of the `tcs -dr` pipeline.
207
+ 2. Use simple majority for the consensus cut-off in the default setting of the `tcs -dr` pipeline.
187
208
 
188
209
  ### Version 1.2.2-05272021
189
210
 
data/bin/tcs CHANGED
@@ -137,7 +137,7 @@ begin
137
137
  end
138
138
 
139
139
  primers = params[:primer_pairs]
140
- if primers.empty?
140
+ if primers.empty? or primers.nil?
141
141
  ViralSeq::TcsCore.log_and_abort log, "No primer information. Script terminated."
142
142
  end
143
143
 
@@ -152,8 +152,8 @@ begin
152
152
  primer[:region] ? region = primer[:region] : region = "region"
153
153
  summary_json[:primer_set_name] = region
154
154
 
155
- cdna_primer = primer[:cdna]
156
- forward_primer = primer[:forward]
155
+ cdna_primer = primer[:cdna].upcase
156
+ forward_primer = primer[:forward].upcase
157
157
 
158
158
  export_raw = primer[:export_raw]
159
159
  limit_raw = primer[:limit_raw]
@@ -353,7 +353,8 @@ begin
353
353
  r1_seq_length = consensus_filtered.values[0][0].size
354
354
  r2_seq_length = consensus_filtered.values[0][1].size
355
355
  else
356
- next
356
+ r1_seq_length = "n/a"
357
+ r2_seq_length = "n/a"
357
358
  end
358
359
  log.puts Time.now.to_s + "\t" + "R1 sequence #{r1_seq_length} bp"
359
360
  log.puts Time.now.to_s + "\t" + "R1 sequence #{r2_seq_length} bp"
@@ -415,71 +416,65 @@ begin
415
416
  joined_sh_raw = end_join(out_dir_raw, primer[:end_join_option], primer[:overlap])
416
417
  end
417
418
 
418
- else
419
- File.open(outfile_log, "w") do |f|
420
- f.puts JSON.pretty_generate(summary_json)
421
- end
422
- next
423
- end
424
-
425
- if primer[:TCS_QC]
426
- ref_start = primer[:ref_start]
427
- ref_end = primer[:ref_end]
428
- ref_genome = primer[:ref_genome].to_sym
429
- indel = primer[:indel]
430
- if ref_start == 0
431
- ref_start = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
432
- end
433
- if ref_end == 0
434
- ref_end = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
435
- end
436
- if primer[:end_join_option] == 1 and primer[:overlap] == 0
437
- r1_sh = ViralSeq::SeqHash.fa(outfile_r1)
438
- r2_sh = ViralSeq::SeqHash.fa(outfile_r2)
439
- r1_sh = r1_sh.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
440
- r2_sh = r2_sh.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
441
- new_r1_seq = r1_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
442
- new_r2_seq = r2_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
443
- joined_seq = {}
444
- new_r1_seq.each do |seq_name, seq|
445
- next unless seq
446
- next unless new_r2_seq[seq_name]
447
- joined_seq[seq_name] = seq + new_r2_seq[seq_name]
419
+ if primer[:TCS_QC]
420
+ ref_start = primer[:ref_start]
421
+ ref_end = primer[:ref_end]
422
+ ref_genome = primer[:ref_genome].to_sym
423
+ indel = primer[:indel]
424
+ if ref_start == 0
425
+ ref_start = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
426
+ end
427
+ if ref_end == 0
428
+ ref_end = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
448
429
  end
449
- joined_sh = ViralSeq::SeqHash.new(joined_seq)
450
-
451
- if export_raw
452
- r1_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r1)
453
- r2_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r2)
454
- r1_sh_raw = r1_sh_raw.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
455
- r2_sh_raw = r2_sh_raw.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
456
- new_r1_seq_raw = r1_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
457
- new_r2_seq_raw = r2_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
458
- joined_seq_raw = {}
459
- new_r1_seq_raw.each do |seq_name, seq|
430
+ if primer[:end_join_option] == 1
431
+ r1_sh = ViralSeq::SeqHash.fa(outfile_r1)
432
+ r2_sh = ViralSeq::SeqHash.fa(outfile_r2)
433
+ r1_sh = r1_sh.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
434
+ r2_sh = r2_sh.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
435
+ new_r1_seq = r1_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
436
+ new_r2_seq = r2_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
437
+ joined_seq = {}
438
+ new_r1_seq.each do |seq_name, seq|
460
439
  next unless seq
461
- next unless new_r2_seq_raw[seq_name]
462
- joined_seq_raw[seq_name] = seq + new_r2_seq_raw[seq_name]
440
+ next unless new_r2_seq[seq_name]
441
+ joined_seq[seq_name] = seq + new_r2_seq[seq_name]
463
442
  end
464
- joined_sh_raw = ViralSeq::SeqHash.new(joined_seq_raw)
465
- end
466
- else
467
- joined_sh = joined_sh.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
443
+ joined_sh = ViralSeq::SeqHash.new(joined_seq)
444
+
445
+ if export_raw
446
+ r1_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r1)
447
+ r2_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r2)
448
+ r1_sh_raw = r1_sh_raw.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
449
+ r2_sh_raw = r2_sh_raw.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
450
+ new_r1_seq_raw = r1_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
451
+ new_r2_seq_raw = r2_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
452
+ joined_seq_raw = {}
453
+ new_r1_seq_raw.each do |seq_name, seq|
454
+ next unless seq
455
+ next unless new_r2_seq_raw[seq_name]
456
+ joined_seq_raw[seq_name] = seq + new_r2_seq_raw[seq_name]
457
+ end
458
+ joined_sh_raw = ViralSeq::SeqHash.new(joined_seq_raw)
459
+ end
460
+ else
461
+ joined_sh = joined_sh.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
468
462
 
469
- if export_raw
470
- joined_sh_raw = joined_sh_raw.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
463
+ if export_raw
464
+ joined_sh_raw = joined_sh_raw.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
465
+ end
471
466
  end
472
- end
473
467
 
474
- log.puts Time.now.to_s + "\t" + "Paired TCS number after QC based on reference genome: " + joined_sh.size.to_s
475
- summary_json[:combined_tcs_after_qc] = joined_sh.size
476
- if primer[:trim]
477
- trim_start = primer[:trim_ref_start]
478
- trim_end = primer[:trim_ref_end]
479
- trim_ref = primer[:trim_ref].to_sym
480
- joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
481
- if export_raw
482
- joined_sh_raw = joined_sh_raw.trim(trim_start, trim_end, trim_ref)
468
+ log.puts Time.now.to_s + "\t" + "Paired TCS number after QC based on reference genome: " + joined_sh.size.to_s
469
+ summary_json[:combined_tcs_after_qc] = joined_sh.size
470
+ if primer[:trim]
471
+ trim_start = primer[:trim_ref_start]
472
+ trim_end = primer[:trim_ref_end]
473
+ trim_ref = primer[:trim_ref].to_sym
474
+ joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
475
+ if export_raw
476
+ joined_sh_raw = joined_sh_raw.trim(trim_start, trim_end, trim_ref)
477
+ end
483
478
  end
484
479
  end
485
480
 
@@ -487,11 +482,13 @@ begin
487
482
  if export_raw
488
483
  joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
489
484
  end
485
+
490
486
  end
491
487
 
492
488
  File.open(outfile_log, "w") do |f|
493
489
  f.puts JSON.pretty_generate(summary_json)
494
490
  end
491
+
495
492
  end
496
493
 
497
494
  unless options[:keep]
@@ -31,7 +31,7 @@ module ViralSeq
31
31
  def rand
32
32
  if (@compute_next_pair = !@compute_next_pair)
33
33
  theta = 2 * ::Math::PI * @rng.call
34
- scale = @sd * ::Math.sqrt(-2 * Math.log(1 - @rng.call))
34
+ scale = @sd * ::Math.sqrt(-2 * ::Math.log(1 - @rng.call))
35
35
  @g1 = @mean + scale * ::Math.sin(theta)
36
36
  @g0 = @mean + scale * ::Math.cos(theta)
37
37
  else
@@ -116,6 +116,8 @@ module ViralSeq
116
116
 
117
117
  File.open(fastq_file,'r') do |file|
118
118
  file.readlines.collect do |line|
119
+ line.tr!("\u0000","")
120
+ next if line == "\n"
119
121
  count +=1
120
122
  count_m = count % 4
121
123
  if count_m == 1
@@ -397,7 +399,9 @@ module ViralSeq
397
399
  (0..(seq_length - 1)).each do |position|
398
400
  all_base = []
399
401
  seq_array.each do |seq|
400
- all_base << seq[position]
402
+ if seq[position]
403
+ all_base << seq[position]
404
+ end
401
405
  end
402
406
  base_count = all_base.count_freq
403
407
  max_base_list = []
@@ -305,7 +305,8 @@ module ViralSeq
305
305
  end
306
306
 
307
307
  def general_filter(seq)
308
- if seq.size < $platform_sequencing_length
308
+ return false unless seq
309
+ if seq.size < ($platform_sequencing_length - 10)
309
310
  return false
310
311
  elsif seq[1..-2] =~ /N/ # sequences with ambiguities except the 1st and last position removed
311
312
  return false
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.2.5"
6
- TCS_VERSION = "2.3.4"
5
+ VERSION = "1.2.9"
6
+ TCS_VERSION = "2.3.8"
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.5
4
+ version: 1.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-06-23 00:00:00.000000000 Z
12
+ date: 2021-08-02 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler