viral_seq 1.7.0 → 1.7.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +7 -0
- data/bin/tcs +22 -4
- data/bin/tcs_log +1 -1
- data/lib/viral_seq/sequence.rb +5 -5
- data/lib/viral_seq/string.rb +37 -0
- data/lib/viral_seq/tcs_core.rb +4 -4
- data/lib/viral_seq/version.rb +2 -2
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4e6d55ab37ecd3b9c5688c99772fc49792a5319bac853ac768367a8b42c0e0b6
|
4
|
+
data.tar.gz: a69e78c80f22848facb41ad4f9d9fb64e6d4e47ff6e18afa3421d64513ce6558
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ae34ac12bd2b86d4c7fc040765b26b94d41cfe239a206b2e84bf55841988826bcfbf685e788b93224ee78e29b1280454059991d644f81cbf24f1b97fff3f2294
|
7
|
+
data.tar.gz: 254993ea2126ca51d0ad5e2b6be2dca90e1b3ed817266e46b5ca46f91d2a69288c0a87c58906ef3da8ad6465e08788c850d6bc72013e30f1ead13e186ba16dfd
|
data/README.md
CHANGED
@@ -187,6 +187,13 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
187
187
|
|
188
188
|
## Updates
|
189
189
|
|
190
|
+
### Version-1.7.1-05120203
|
191
|
+
|
192
|
+
1. Add a size check for the raw sequences. If the size smaller than the input params, error messages will be sent to users. IF the actual size is greater than the input params, extra bases will be truncated.
|
193
|
+
2. Now allows mismatch for the primer region sequences. Forward primer region allows 2 nt differences and cDNA primer region allows 3 nt differences.
|
194
|
+
3. Bug fix.
|
195
|
+
4. TCS version to 2.5.2
|
196
|
+
|
190
197
|
### Version-1.7.0-08242022
|
191
198
|
|
192
199
|
1. Add warnings if `tcs` pipeline is excecuting through source instead of installing from `gem`.
|
data/bin/tcs
CHANGED
@@ -41,7 +41,7 @@ if gem_installed?('viral_seq')
|
|
41
41
|
require 'viral_seq'
|
42
42
|
else
|
43
43
|
printf "\n****************************************************\n"
|
44
|
-
printf "**** THIS PACKAGE CANNOT BE
|
44
|
+
printf "**** THIS PACKAGE CANNOT BE RUN FROM SOURCE ********\n"
|
45
45
|
printf "**** PLEASE INSTALL USING `gem install viral_seq` **\n"
|
46
46
|
printf "****************************************************\n\n"
|
47
47
|
exit 1
|
@@ -104,7 +104,7 @@ end.parse!
|
|
104
104
|
if options[:json_generator]
|
105
105
|
params = ViralSeq::TcsJson.generate
|
106
106
|
elsif options[:dr]
|
107
|
-
params = ViralSeq::TcsDr::PARAMS
|
107
|
+
params = ViralSeq::TcsDr::PARAMS
|
108
108
|
elsif (options[:params_json] && File.exist?(options[:params_json]))
|
109
109
|
params = JSON.parse(File.read(options[:params_json]), symbolize_names: true)
|
110
110
|
else
|
@@ -163,6 +163,24 @@ begin
|
|
163
163
|
$platform_sequencing_length = 300
|
164
164
|
end
|
165
165
|
|
166
|
+
r1_raw_size = r1_fastq_sh.dna_hash.values[0].size
|
167
|
+
r2_raw_size = r2_fastq_sh.dna_hash.values[0].size
|
168
|
+
|
169
|
+
if r1_raw_size >= $platform_sequencing_length
|
170
|
+
r1_size_diff = r1_raw_size - $platform_sequencing_length
|
171
|
+
else
|
172
|
+
raise StandardError.new "R1 size smaller than the input platform format #{$platform_sequencing_length} bp."
|
173
|
+
end
|
174
|
+
|
175
|
+
if r2_raw_size >= $platform_sequencing_length
|
176
|
+
r2_size_diff = r2_raw_size - $platform_sequencing_length
|
177
|
+
else
|
178
|
+
raise StandardError.new "R2 size smaller than the input platform format #{$platform_sequencing_length} bp."
|
179
|
+
end
|
180
|
+
|
181
|
+
r1_truncate_base_number = 2 + r1_size_diff
|
182
|
+
r2_truncate_base_number = 2 + r2_size_diff
|
183
|
+
|
166
184
|
primers = params[:primer_pairs]
|
167
185
|
if primers.empty? or primers.nil?
|
168
186
|
ViralSeq::TcsCore.log_and_abort log, "No primer information. Script terminated."
|
@@ -235,8 +253,8 @@ begin
|
|
235
253
|
r2_seq = r2_passed_seq[seqtag]
|
236
254
|
pid = r2_seq[0, pid_length]
|
237
255
|
id[seqtag] = pid
|
238
|
-
bio_r2[seqtag] = r2_seq[filter_r2[:reverse_starting_number]..-
|
239
|
-
bio_r1[seqtag] = r1_seq[filter_r1[:forward_starting_number]..-
|
256
|
+
bio_r2[seqtag] = r2_seq[filter_r2[:reverse_starting_number]..-r2_truncate_base_number]
|
257
|
+
bio_r1[seqtag] = r1_seq[filter_r1[:forward_starting_number]..-r1_truncate_base_number]
|
240
258
|
end
|
241
259
|
|
242
260
|
# TCS cut-off
|
data/bin/tcs_log
CHANGED
data/lib/viral_seq/sequence.rb
CHANGED
@@ -180,7 +180,7 @@ module ViralSeq
|
|
180
180
|
l1 = 0
|
181
181
|
l2 = 0
|
182
182
|
|
183
|
-
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :
|
183
|
+
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :Super5, path_to_muscle)
|
184
184
|
aln_test = aln_seq[1]
|
185
185
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
186
186
|
gap_begin = $1.size
|
@@ -214,7 +214,7 @@ module ViralSeq
|
|
214
214
|
l2 = l2 + (post_aln - b2)
|
215
215
|
end
|
216
216
|
|
217
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, :
|
217
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
|
218
218
|
aln_test = aln_seq[1]
|
219
219
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
220
220
|
gap_begin = $1.size
|
@@ -263,7 +263,7 @@ module ViralSeq
|
|
263
263
|
end
|
264
264
|
|
265
265
|
while repeat == 1
|
266
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, :
|
266
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
|
267
267
|
aln_test = aln_seq[1]
|
268
268
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
269
269
|
gap_begin = $1.size
|
@@ -293,7 +293,7 @@ module ViralSeq
|
|
293
293
|
end
|
294
294
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
295
295
|
|
296
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, :
|
296
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
|
297
297
|
aln_test = aln_seq[1]
|
298
298
|
ref = aln_seq[0]
|
299
299
|
|
@@ -307,7 +307,7 @@ module ViralSeq
|
|
307
307
|
|
308
308
|
if (ori_ref_l - l2 - 1) >= l1
|
309
309
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
310
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, :
|
310
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
|
311
311
|
aln_test = aln_seq[1]
|
312
312
|
ref = aln_seq[0]
|
313
313
|
|
data/lib/viral_seq/string.rb
CHANGED
@@ -56,6 +56,43 @@ class String
|
|
56
56
|
Regexp.new match
|
57
57
|
end
|
58
58
|
|
59
|
+
# parse the nucleotide sequences as an Array of Array
|
60
|
+
# @return [Array] Array of Array at each position
|
61
|
+
# @example parse a sequence with ambiguities to Array of Array
|
62
|
+
# "ATRWCG".nt_to_array
|
63
|
+
# => [["A"], ["T"], ["A", "G"], ["A", "T"], ["C"], ["G"]]
|
64
|
+
|
65
|
+
def nt_to_array
|
66
|
+
return_array = []
|
67
|
+
self.each_char.each do |base|
|
68
|
+
base_array = base.to_list
|
69
|
+
return_array.append base_array
|
70
|
+
end
|
71
|
+
return return_array
|
72
|
+
end
|
73
|
+
|
74
|
+
|
75
|
+
# compare the given nt sequence string with the ref sequence string
|
76
|
+
# @param ref [String] the ref sequence string to compare with
|
77
|
+
# @return [Interger] Number of differences
|
78
|
+
# @example parse a sequence with ambiguities to Array of Array
|
79
|
+
# "ATRWCG".nt_to_array
|
80
|
+
# => [["A"], ["T"], ["A", "G"], ["A", "T"], ["C"], ["G"]]
|
81
|
+
|
82
|
+
def nt_diff(ref)
|
83
|
+
count_diff = 0
|
84
|
+
self_array = self.split("")
|
85
|
+
ref_array = ref.nt_to_array
|
86
|
+
self_array.each_with_index do |nt, i|
|
87
|
+
ref_nt = ref_array[i]
|
88
|
+
unless ref_nt.include? nt
|
89
|
+
count_diff += 1
|
90
|
+
end
|
91
|
+
end
|
92
|
+
return count_diff
|
93
|
+
end
|
94
|
+
|
95
|
+
|
59
96
|
# parse IUPAC nucleotide ambiguity codes (W S M K R Y B D H V N) as String if String.size == 1
|
60
97
|
# @return [Array] parsed nt bases
|
61
98
|
# @example parse IUPAC `R`
|
data/lib/viral_seq/tcs_core.rb
CHANGED
@@ -223,7 +223,7 @@ module ViralSeq
|
|
223
223
|
end
|
224
224
|
forward_bio_primer_size = forward_bio_primer.size
|
225
225
|
forward_starting_number = forward_n + forward_bio_primer_size
|
226
|
-
forward_primer_ref = forward_bio_primer.nt_parser
|
226
|
+
#forward_primer_ref = forward_bio_primer.nt_parser
|
227
227
|
|
228
228
|
r1_passed_seq = {}
|
229
229
|
r1_raw = r1_sh.dna_hash
|
@@ -232,7 +232,7 @@ module ViralSeq
|
|
232
232
|
seq = r1_raw[name]
|
233
233
|
next unless general_filter seq
|
234
234
|
primer_region_seq = seq[forward_n, forward_bio_primer_size]
|
235
|
-
if primer_region_seq
|
235
|
+
if primer_region_seq.nt_diff(forward_bio_primer) < 3
|
236
236
|
new_name = remove_tag name
|
237
237
|
r1_passed_seq[new_name] = seq
|
238
238
|
end
|
@@ -255,13 +255,13 @@ module ViralSeq
|
|
255
255
|
cdna_bio_primer = $2
|
256
256
|
cdna_bio_primer_size = cdna_bio_primer.size
|
257
257
|
reverse_starting_number = pid_length + cdna_bio_primer_size
|
258
|
-
|
258
|
+
# cdna_primer_ref = cdna_bio_primer.nt_to_array
|
259
259
|
r2_passed_seq = {}
|
260
260
|
proc_filter = proc do |name|
|
261
261
|
seq = r2_raw[name]
|
262
262
|
next unless general_filter seq
|
263
263
|
primer_region_seq = seq[pid_length, cdna_bio_primer_size]
|
264
|
-
if primer_region_seq
|
264
|
+
if primer_region_seq.nt_diff(cdna_bio_primer) < 4
|
265
265
|
new_name = remove_tag name
|
266
266
|
r2_passed_seq[new_name] = seq
|
267
267
|
end
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.7.
|
4
|
+
version: 1.7.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2023-05-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|