viral_seq 1.2.5 → 1.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +22 -1
- data/bin/tcs +60 -63
- data/lib/viral_seq/math.rb +1 -1
- data/lib/viral_seq/seq_hash.rb +5 -1
- data/lib/viral_seq/tcs_core.rb +2 -1
- data/lib/viral_seq/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 2ccdd3f318deb458540b703ae00afcbdd195a92145c22dfb6c95ad14ac366146
|
4
|
+
data.tar.gz: 4f131cd69fef8548c1a466c2e23be008b08bed50ca78ef12195e4d4b39e052eb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4cb44299c0061794b8c32c23c850cd9c53eb9245d7124ed75d35367225e600e2c630df9853d13a8003a8a1c6c793edea58a188871f328866043132585846e825
|
7
|
+
data.tar.gz: 92594266725b7691c5203cd478e84a0fea7fc6d08124efad6311a4e2bdcc7dbd0d6218a54e5b63d19e3c3a8ad789c49906388bac7ff59611bc5810e1c614d8ef
|
data/README.md
CHANGED
@@ -179,11 +179,32 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
179
179
|
|
180
180
|
## Updates
|
181
181
|
|
182
|
+
### Version 1.2.9-08022021
|
183
|
+
|
184
|
+
1. Fixed a bug when reading the input primer sequences in lowercases.
|
185
|
+
2. Fix a bug in the method ViralSeq::Math::RandomGaussian
|
186
|
+
|
187
|
+
### Version 1.2.8-07292021
|
188
|
+
|
189
|
+
1. Fixed an issue when reading .fastq files containing blank_lines.
|
190
|
+
|
191
|
+
### Version 1.2.7-07152021
|
192
|
+
|
193
|
+
1. Optimzed the workflow of the `tcs` pipeline on raw data with uneven lengths.
|
194
|
+
`tcs` version to v2.3.6.
|
195
|
+
|
196
|
+
|
197
|
+
### Version 1.2.6-07122021
|
198
|
+
|
199
|
+
1. Optimized the workflow of the `tcs` pipeline in the "end-join/QC/Trimming" section.
|
200
|
+
`tcs` version to v2.3.5.
|
201
|
+
|
202
|
+
|
182
203
|
### Version 1.2.5-06232021
|
183
204
|
|
184
205
|
1. Add error rescue and report in the `tcs` pipeline.
|
185
206
|
error messages are stored in the .tcs_error file. `tcs` pipeline updated to v2.3.4.
|
186
|
-
2. Use simple majority for the consensus cut-off in the default setting of the `tcs -dr` pipeline.
|
207
|
+
2. Use simple majority for the consensus cut-off in the default setting of the `tcs -dr` pipeline.
|
187
208
|
|
188
209
|
### Version 1.2.2-05272021
|
189
210
|
|
data/bin/tcs
CHANGED
@@ -137,7 +137,7 @@ begin
|
|
137
137
|
end
|
138
138
|
|
139
139
|
primers = params[:primer_pairs]
|
140
|
-
if primers.empty?
|
140
|
+
if primers.empty? or primers.nil?
|
141
141
|
ViralSeq::TcsCore.log_and_abort log, "No primer information. Script terminated."
|
142
142
|
end
|
143
143
|
|
@@ -152,8 +152,8 @@ begin
|
|
152
152
|
primer[:region] ? region = primer[:region] : region = "region"
|
153
153
|
summary_json[:primer_set_name] = region
|
154
154
|
|
155
|
-
cdna_primer = primer[:cdna]
|
156
|
-
forward_primer = primer[:forward]
|
155
|
+
cdna_primer = primer[:cdna].upcase
|
156
|
+
forward_primer = primer[:forward].upcase
|
157
157
|
|
158
158
|
export_raw = primer[:export_raw]
|
159
159
|
limit_raw = primer[:limit_raw]
|
@@ -353,7 +353,8 @@ begin
|
|
353
353
|
r1_seq_length = consensus_filtered.values[0][0].size
|
354
354
|
r2_seq_length = consensus_filtered.values[0][1].size
|
355
355
|
else
|
356
|
-
|
356
|
+
r1_seq_length = "n/a"
|
357
|
+
r2_seq_length = "n/a"
|
357
358
|
end
|
358
359
|
log.puts Time.now.to_s + "\t" + "R1 sequence #{r1_seq_length} bp"
|
359
360
|
log.puts Time.now.to_s + "\t" + "R1 sequence #{r2_seq_length} bp"
|
@@ -415,71 +416,65 @@ begin
|
|
415
416
|
joined_sh_raw = end_join(out_dir_raw, primer[:end_join_option], primer[:overlap])
|
416
417
|
end
|
417
418
|
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
ref_genome = primer[:ref_genome].to_sym
|
429
|
-
indel = primer[:indel]
|
430
|
-
if ref_start == 0
|
431
|
-
ref_start = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
|
432
|
-
end
|
433
|
-
if ref_end == 0
|
434
|
-
ref_end = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
|
435
|
-
end
|
436
|
-
if primer[:end_join_option] == 1 and primer[:overlap] == 0
|
437
|
-
r1_sh = ViralSeq::SeqHash.fa(outfile_r1)
|
438
|
-
r2_sh = ViralSeq::SeqHash.fa(outfile_r2)
|
439
|
-
r1_sh = r1_sh.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
|
440
|
-
r2_sh = r2_sh.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
|
441
|
-
new_r1_seq = r1_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
442
|
-
new_r2_seq = r2_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
443
|
-
joined_seq = {}
|
444
|
-
new_r1_seq.each do |seq_name, seq|
|
445
|
-
next unless seq
|
446
|
-
next unless new_r2_seq[seq_name]
|
447
|
-
joined_seq[seq_name] = seq + new_r2_seq[seq_name]
|
419
|
+
if primer[:TCS_QC]
|
420
|
+
ref_start = primer[:ref_start]
|
421
|
+
ref_end = primer[:ref_end]
|
422
|
+
ref_genome = primer[:ref_genome].to_sym
|
423
|
+
indel = primer[:indel]
|
424
|
+
if ref_start == 0
|
425
|
+
ref_start = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
|
426
|
+
end
|
427
|
+
if ref_end == 0
|
428
|
+
ref_end = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
|
448
429
|
end
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
joined_seq_raw = {}
|
459
|
-
new_r1_seq_raw.each do |seq_name, seq|
|
430
|
+
if primer[:end_join_option] == 1
|
431
|
+
r1_sh = ViralSeq::SeqHash.fa(outfile_r1)
|
432
|
+
r2_sh = ViralSeq::SeqHash.fa(outfile_r2)
|
433
|
+
r1_sh = r1_sh.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
|
434
|
+
r2_sh = r2_sh.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
|
435
|
+
new_r1_seq = r1_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
436
|
+
new_r2_seq = r2_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
437
|
+
joined_seq = {}
|
438
|
+
new_r1_seq.each do |seq_name, seq|
|
460
439
|
next unless seq
|
461
|
-
next unless
|
462
|
-
|
440
|
+
next unless new_r2_seq[seq_name]
|
441
|
+
joined_seq[seq_name] = seq + new_r2_seq[seq_name]
|
463
442
|
end
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
443
|
+
joined_sh = ViralSeq::SeqHash.new(joined_seq)
|
444
|
+
|
445
|
+
if export_raw
|
446
|
+
r1_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r1)
|
447
|
+
r2_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r2)
|
448
|
+
r1_sh_raw = r1_sh_raw.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
|
449
|
+
r2_sh_raw = r2_sh_raw.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
|
450
|
+
new_r1_seq_raw = r1_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
451
|
+
new_r2_seq_raw = r2_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
452
|
+
joined_seq_raw = {}
|
453
|
+
new_r1_seq_raw.each do |seq_name, seq|
|
454
|
+
next unless seq
|
455
|
+
next unless new_r2_seq_raw[seq_name]
|
456
|
+
joined_seq_raw[seq_name] = seq + new_r2_seq_raw[seq_name]
|
457
|
+
end
|
458
|
+
joined_sh_raw = ViralSeq::SeqHash.new(joined_seq_raw)
|
459
|
+
end
|
460
|
+
else
|
461
|
+
joined_sh = joined_sh.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
|
468
462
|
|
469
|
-
|
470
|
-
|
463
|
+
if export_raw
|
464
|
+
joined_sh_raw = joined_sh_raw.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
|
465
|
+
end
|
471
466
|
end
|
472
|
-
end
|
473
467
|
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
468
|
+
log.puts Time.now.to_s + "\t" + "Paired TCS number after QC based on reference genome: " + joined_sh.size.to_s
|
469
|
+
summary_json[:combined_tcs_after_qc] = joined_sh.size
|
470
|
+
if primer[:trim]
|
471
|
+
trim_start = primer[:trim_ref_start]
|
472
|
+
trim_end = primer[:trim_ref_end]
|
473
|
+
trim_ref = primer[:trim_ref].to_sym
|
474
|
+
joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
|
475
|
+
if export_raw
|
476
|
+
joined_sh_raw = joined_sh_raw.trim(trim_start, trim_end, trim_ref)
|
477
|
+
end
|
483
478
|
end
|
484
479
|
end
|
485
480
|
|
@@ -487,11 +482,13 @@ begin
|
|
487
482
|
if export_raw
|
488
483
|
joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
|
489
484
|
end
|
485
|
+
|
490
486
|
end
|
491
487
|
|
492
488
|
File.open(outfile_log, "w") do |f|
|
493
489
|
f.puts JSON.pretty_generate(summary_json)
|
494
490
|
end
|
491
|
+
|
495
492
|
end
|
496
493
|
|
497
494
|
unless options[:keep]
|
data/lib/viral_seq/math.rb
CHANGED
@@ -31,7 +31,7 @@ module ViralSeq
|
|
31
31
|
def rand
|
32
32
|
if (@compute_next_pair = !@compute_next_pair)
|
33
33
|
theta = 2 * ::Math::PI * @rng.call
|
34
|
-
scale = @sd * ::Math.sqrt(-2 * Math.log(1 - @rng.call))
|
34
|
+
scale = @sd * ::Math.sqrt(-2 * ::Math.log(1 - @rng.call))
|
35
35
|
@g1 = @mean + scale * ::Math.sin(theta)
|
36
36
|
@g0 = @mean + scale * ::Math.cos(theta)
|
37
37
|
else
|
data/lib/viral_seq/seq_hash.rb
CHANGED
@@ -116,6 +116,8 @@ module ViralSeq
|
|
116
116
|
|
117
117
|
File.open(fastq_file,'r') do |file|
|
118
118
|
file.readlines.collect do |line|
|
119
|
+
line.tr!("\u0000","")
|
120
|
+
next if line == "\n"
|
119
121
|
count +=1
|
120
122
|
count_m = count % 4
|
121
123
|
if count_m == 1
|
@@ -397,7 +399,9 @@ module ViralSeq
|
|
397
399
|
(0..(seq_length - 1)).each do |position|
|
398
400
|
all_base = []
|
399
401
|
seq_array.each do |seq|
|
400
|
-
|
402
|
+
if seq[position]
|
403
|
+
all_base << seq[position]
|
404
|
+
end
|
401
405
|
end
|
402
406
|
base_count = all_base.count_freq
|
403
407
|
max_base_list = []
|
data/lib/viral_seq/tcs_core.rb
CHANGED
@@ -305,7 +305,8 @@ module ViralSeq
|
|
305
305
|
end
|
306
306
|
|
307
307
|
def general_filter(seq)
|
308
|
-
|
308
|
+
return false unless seq
|
309
|
+
if seq.size < ($platform_sequencing_length - 10)
|
309
310
|
return false
|
310
311
|
elsif seq[1..-2] =~ /N/ # sequences with ambiguities except the 1st and last position removed
|
311
312
|
return false
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.2.
|
4
|
+
version: 1.2.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-08-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|