viral_seq 1.6.1 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +3 -3
- data/README.md +26 -1
- data/bin/tcs +23 -5
- data/bin/tcs_sdrm +32 -2
- data/docs/dr.json +15 -0
- data/lib/viral_seq/hivdr.rb +1 -1
- data/lib/viral_seq/muscle.rb +8 -2
- data/lib/viral_seq/seq_hash.rb +19 -11
- data/lib/viral_seq/sequence.rb +12 -13
- data/lib/viral_seq/tcs_dr.rb +20 -5
- data/lib/viral_seq/version.rb +2 -2
- data/viral_seq.gemspec +1 -1
- metadata +4 -4
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8be7a521e58d5335122db011b5f003407cfaab95480062337451377ee2fdfca9
|
|
4
|
+
data.tar.gz: 5c437afa58d63d0bde9dc6acf6c98904b8a7b364618fb3ebebd2cb36a44daa2c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 23622009f3f39961e3d2d760bdde3b9f9b831d001aca68b6eee3d44305a77d3e964c48541811fd9dddc26ad9427383716ccdc64436789b01eb11c51f762d2a6b
|
|
7
|
+
data.tar.gz: c1a1ac49930c24f61bfa0872f518fea8146e701a5a874de45e373d4d3d20eca50d138bd44f9d59ea1102d525b392dd9b6ed053647b1c25d97ad0244eb4fe15ff
|
data/Gemfile.lock
CHANGED
|
@@ -1,10 +1,10 @@
|
|
|
1
1
|
PATH
|
|
2
2
|
remote: .
|
|
3
3
|
specs:
|
|
4
|
-
viral_seq (1.6.
|
|
4
|
+
viral_seq (1.6.5)
|
|
5
5
|
colorize (~> 0.1)
|
|
6
6
|
combine_pdf (~> 1.0, >= 1.0.0)
|
|
7
|
-
muscle_bio (~> 0.
|
|
7
|
+
muscle_bio (~> 0.5)
|
|
8
8
|
prawn (~> 2.3, >= 2.3.0)
|
|
9
9
|
prawn-table (~> 0.2, >= 0.2.0)
|
|
10
10
|
|
|
@@ -15,7 +15,7 @@ GEM
|
|
|
15
15
|
combine_pdf (1.0.21)
|
|
16
16
|
ruby-rc4 (>= 0.1.5)
|
|
17
17
|
diff-lcs (1.3)
|
|
18
|
-
muscle_bio (0.
|
|
18
|
+
muscle_bio (0.5.0)
|
|
19
19
|
pdf-core (0.9.0)
|
|
20
20
|
prawn (2.4.0)
|
|
21
21
|
pdf-core (~> 0.9.0)
|
data/README.md
CHANGED
|
@@ -10,6 +10,8 @@ A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
|
|
10
10
|
|
|
11
11
|
Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
|
12
12
|
|
|
13
|
+
CLI tools `tcs`, `tcs_sdrm`, `tcs_log` and `locator` included in the gem.
|
|
14
|
+
|
|
13
15
|
#### tcs web app - https://primer-id.org/
|
|
14
16
|
|
|
15
17
|
## Illustration for the Primer ID Sequencing
|
|
@@ -22,6 +24,12 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
|
|
22
24
|
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
|
23
25
|
[Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
|
|
24
26
|
|
|
27
|
+
## Requirements
|
|
28
|
+
|
|
29
|
+
Required Ruby Version: >= 2.5
|
|
30
|
+
|
|
31
|
+
Required RubyGems version: >= 1.3.6
|
|
32
|
+
|
|
25
33
|
## Install
|
|
26
34
|
|
|
27
35
|
```bash
|
|
@@ -179,10 +187,27 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
|
179
187
|
|
|
180
188
|
## Updates
|
|
181
189
|
|
|
190
|
+
### Version-1.7.0-08242022
|
|
191
|
+
|
|
192
|
+
1. Add warnings if `tcs` pipeline is excecuting through source instead of installing from `gem`.
|
|
193
|
+
2. Optimized `ViralSeq:SeqHash#a3g` hypermut algorithm. Allowing a external reference other than the sample reference.
|
|
194
|
+
|
|
195
|
+
### Version-1.6.4-07182022
|
|
196
|
+
|
|
197
|
+
1. Included region "P17" in the default `tcs -d` pipeline setting. `tcs` pipeline updated to version 2.5.1.
|
|
198
|
+
2. Loosen the locator params for the "V1V3" end region for rare alignment issues. Now the default "V1V3" region end with position 7205 to 7210 instead of 7208.
|
|
199
|
+
3. `tcs_sdrm` now analyse "P17" region for pairwise diversity.
|
|
200
|
+
|
|
201
|
+
### Version-1.6.3-02052022
|
|
202
|
+
|
|
203
|
+
1. Updated on `ViralSeq::Muscle` module along with the update of `muscle` from version 3.8.1 to 5.1.
|
|
204
|
+
2. Optimized the `locator` algorithm based on `muscle` v5.1.
|
|
205
|
+
3. Optimized the `tcs_sdrm` pipeline based on `muscle` v5.1.
|
|
206
|
+
|
|
182
207
|
### Version-1.6.1-02022022
|
|
183
208
|
|
|
184
209
|
1. Fixed the `nav bar` in tcs_log html file.
|
|
185
|
-
2. Fixed a typo in `tcs`.
|
|
210
|
+
2. Fixed a typo in `tcs`.
|
|
186
211
|
|
|
187
212
|
### Version 1.6.0-01042022
|
|
188
213
|
|
data/bin/tcs
CHANGED
|
@@ -22,20 +22,38 @@
|
|
|
22
22
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
23
23
|
# THE SOFTWARE.
|
|
24
24
|
|
|
25
|
+
# Install using `gem install viral_seq`
|
|
25
26
|
# Use JSON file as the run param
|
|
26
27
|
# run `tcs -j` to generate param json file.
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
def gem_installed?(gem_name)
|
|
30
|
+
found_gem = false
|
|
31
|
+
begin
|
|
32
|
+
found_gem = Gem::Specification.find_by_name(gem_name)
|
|
33
|
+
rescue Gem::LoadError
|
|
34
|
+
return false
|
|
35
|
+
else
|
|
36
|
+
return true
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
if gem_installed?('viral_seq')
|
|
41
|
+
require 'viral_seq'
|
|
42
|
+
else
|
|
43
|
+
printf "\n****************************************************\n"
|
|
44
|
+
printf "**** THIS PACKAGE CANNOT BE RAN FROM SOURCE ********\n"
|
|
45
|
+
printf "**** PLEASE INSTALL USING `gem install viral_seq` **\n"
|
|
46
|
+
printf "****************************************************\n\n"
|
|
47
|
+
exit 1
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
|
|
29
51
|
require 'json'
|
|
30
52
|
require 'colorize'
|
|
31
53
|
require 'optparse'
|
|
32
54
|
|
|
33
55
|
options = {}
|
|
34
56
|
|
|
35
|
-
# banner = '-'*50 + "\n" +
|
|
36
|
-
# '| The TCS Pipeline ' + "Version #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |' + "\n" +
|
|
37
|
-
# '-'*50 + "\n"
|
|
38
|
-
|
|
39
57
|
banner = "\n" +
|
|
40
58
|
"████████ ██████ ███████ ██████ ██ ██████ ███████ ██ ██ ███ ██ ███████\n".light_red +
|
|
41
59
|
" ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ████ ██ ██\n".light_yellow +
|
data/bin/tcs_sdrm
CHANGED
|
@@ -172,6 +172,25 @@ libs.each do |lib|
|
|
|
172
172
|
linkage_list += sdrm[1]
|
|
173
173
|
aa_report_list += sdrm[2]
|
|
174
174
|
|
|
175
|
+
elsif seq_basename =~/P17/i
|
|
176
|
+
a3g_check = seqs.a3g
|
|
177
|
+
a3g_seqs = a3g_check[:a3g_seq]
|
|
178
|
+
a3g_filtered_seqs = a3g_check[:filtered_seq]
|
|
179
|
+
stop_codon_check = a3g_filtered_seqs.stop_codon(2)
|
|
180
|
+
stop_codon_seqs = stop_codon_check[:with_stop_codon]
|
|
181
|
+
filtered_seqs = stop_codon_check[:without_stop_codon]
|
|
182
|
+
poisson_minority_cutoff = filtered_seqs.pm
|
|
183
|
+
fdr_hash = filtered_seqs.fdr
|
|
184
|
+
summary_hash[:P17] = [
|
|
185
|
+
seqs.size.to_s,
|
|
186
|
+
a3g_seqs.size.to_s,
|
|
187
|
+
stop_codon_seqs.size.to_s,
|
|
188
|
+
filtered_seqs.size.to_s,
|
|
189
|
+
poisson_minority_cutoff.to_s
|
|
190
|
+
].join(',')
|
|
191
|
+
next if filtered_seqs.size < 3
|
|
192
|
+
filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
|
193
|
+
|
|
175
194
|
elsif seq_basename =~/RT/i
|
|
176
195
|
rt_seq1 = {}
|
|
177
196
|
rt_seq2 = {}
|
|
@@ -229,7 +248,7 @@ libs.each do |lib|
|
|
|
229
248
|
filtered_seq_files.each do |seq_file|
|
|
230
249
|
filtered_sh = ViralSeq::SeqHash.fa(seq_file)
|
|
231
250
|
next if filtered_sh.size < 3
|
|
232
|
-
aligned_sh = filtered_sh.random_select(1000).align
|
|
251
|
+
aligned_sh = filtered_sh.random_select(1000).align(:Super5)
|
|
233
252
|
aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
|
|
234
253
|
end
|
|
235
254
|
|
|
@@ -249,7 +268,7 @@ libs.each do |lib|
|
|
|
249
268
|
tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
|
|
250
269
|
summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
|
|
251
270
|
end
|
|
252
|
-
[:PR, :RT, :IN, :V1V3].each do |regions|
|
|
271
|
+
[:PR, :RT, :IN, :V1V3, :P17].each do |regions|
|
|
253
272
|
next unless summary_hash[regions]
|
|
254
273
|
seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
|
|
255
274
|
end
|
|
@@ -270,10 +289,13 @@ libs.each do |lib|
|
|
|
270
289
|
tcs_RT = 0
|
|
271
290
|
tcs_IN = 0
|
|
272
291
|
tcs_V1V3 = 0
|
|
292
|
+
tcs_P17 = 0
|
|
273
293
|
pi_RT = 0.0
|
|
274
294
|
pi_V1V3 = 0.0
|
|
295
|
+
pi_P17 = 0.0
|
|
275
296
|
dist20_RT = 0.0
|
|
276
297
|
dist20_V1V3 = 0.0
|
|
298
|
+
dist20_P17 = 0.0
|
|
277
299
|
summary_lines.each do |line|
|
|
278
300
|
data = line.chomp.split(",")
|
|
279
301
|
if data[0] == "PR"
|
|
@@ -288,6 +310,10 @@ libs.each do |lib|
|
|
|
288
310
|
tcs_V1V3 = data[1].to_i
|
|
289
311
|
pi_V1V3 = data[6].to_f
|
|
290
312
|
dist20_V1V3 = data[7].to_f
|
|
313
|
+
elsif data[0] == "P17"
|
|
314
|
+
tcs_P17 = data[4].to_i
|
|
315
|
+
pi_P17 = data[6].to_f
|
|
316
|
+
dist20_P17 = data[7].to_f
|
|
291
317
|
end
|
|
292
318
|
end
|
|
293
319
|
|
|
@@ -323,9 +349,13 @@ libs.each do |lib|
|
|
|
323
349
|
tcs_RT: tcs_RT,
|
|
324
350
|
tcs_IN: tcs_IN,
|
|
325
351
|
tcs_V1V3: tcs_V1V3,
|
|
352
|
+
tcs_P17: tcs_P17,
|
|
326
353
|
pi_RT: pi_RT,
|
|
354
|
+
pi_V1V3: pi_V1V3,
|
|
355
|
+
pi_P17: pi_P17,
|
|
327
356
|
dist20_RT: dist20_RT,
|
|
328
357
|
dist20_V1V3: dist20_V1V3,
|
|
358
|
+
dist20_P17: dist20_P17,
|
|
329
359
|
recency: recency,
|
|
330
360
|
sdrm_PR: sdrm_PR,
|
|
331
361
|
sdrm_RT: sdrm_RT,
|
data/docs/dr.json
CHANGED
|
@@ -62,6 +62,21 @@
|
|
|
62
62
|
"ref_end": 7208,
|
|
63
63
|
"indel": true,
|
|
64
64
|
"trim": false
|
|
65
|
+
},
|
|
66
|
+
{
|
|
67
|
+
"region": "P17",
|
|
68
|
+
"cdna": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
|
|
69
|
+
"forward": "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
|
|
70
|
+
"majority": 0.5,
|
|
71
|
+
"end_join": true,
|
|
72
|
+
"end_join_option": 1,
|
|
73
|
+
"overlap": 0,
|
|
74
|
+
"TCS_QC": true,
|
|
75
|
+
"ref_genome": "HXB2",
|
|
76
|
+
"ref_start": 1196,
|
|
77
|
+
"ref_end": 1725,
|
|
78
|
+
"indel": true,
|
|
79
|
+
"trim": false
|
|
65
80
|
}
|
|
66
81
|
]
|
|
67
82
|
}
|
data/lib/viral_seq/hivdr.rb
CHANGED
|
@@ -5,7 +5,7 @@ module ViralSeq
|
|
|
5
5
|
# functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
|
|
6
6
|
# works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
|
|
7
7
|
# PR codon 1-99
|
|
8
|
-
# RT codon 34-122 (HXB2
|
|
8
|
+
# RT codon 34-122 (HXB2 2649-2914) and 152-236(3001-3257)
|
|
9
9
|
# IN codon 53-174 (HXB2 4384-4751)
|
|
10
10
|
# @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
|
|
11
11
|
# can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
|
data/lib/viral_seq/muscle.rb
CHANGED
|
@@ -28,6 +28,8 @@ module ViralSeq
|
|
|
28
28
|
# align a sequence with reference sequence Strings
|
|
29
29
|
# @param ref_seq [String] reference sequence
|
|
30
30
|
# @param test_seq [String] test sequence
|
|
31
|
+
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
|
32
|
+
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
|
31
33
|
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
|
32
34
|
# @return [Array] a pair of [:ref_seq_aligned, :test_seq_aligned] or nil
|
|
33
35
|
# if the cannot find MUSCLE excutable
|
|
@@ -37,7 +39,7 @@ module ViralSeq
|
|
|
37
39
|
# aligned_seqs = ViralSeq::Muscle.align(seq1,seq2)
|
|
38
40
|
# => ["AAGGCGTAGGAC-", "-AAGCTTAGGACG"]
|
|
39
41
|
|
|
40
|
-
def self.align(ref_seq = "", test_seq = "", path_to_muscle = false)
|
|
42
|
+
def self.align(ref_seq = "", test_seq = "", algorithm = :PPP, path_to_muscle = false)
|
|
41
43
|
temp_dir = Dir.home
|
|
42
44
|
temp_name = "_" + SecureRandom.alphanumeric
|
|
43
45
|
temp_file = File.join(temp_dir, temp_name)
|
|
@@ -56,7 +58,11 @@ module ViralSeq
|
|
|
56
58
|
end
|
|
57
59
|
print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
|
|
58
60
|
else
|
|
59
|
-
MuscleBio.
|
|
61
|
+
if MuscleBio::VERSION.to_f < 0.5
|
|
62
|
+
MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
|
|
63
|
+
else
|
|
64
|
+
MuscleBio.exec(temp_file, temp_aln, algorithm)
|
|
65
|
+
end
|
|
60
66
|
end
|
|
61
67
|
aln_seq_hash = ViralSeq::SeqHash.fa(temp_aln).dna_hash
|
|
62
68
|
File.unlink(temp_file)
|
data/lib/viral_seq/seq_hash.rb
CHANGED
|
@@ -223,7 +223,7 @@ module ViralSeq
|
|
|
223
223
|
|
|
224
224
|
# check the size range of the DNA sequences of the SeqHash object
|
|
225
225
|
# @return [Hash] Hash of {max: MAX_SIZE, min: MIN_SIZE}
|
|
226
|
-
|
|
226
|
+
|
|
227
227
|
def check_nt_size
|
|
228
228
|
dna_hash = self.dna_hash
|
|
229
229
|
size_array = []
|
|
@@ -450,7 +450,7 @@ module ViralSeq
|
|
|
450
450
|
# function to determine if the sequences have APOBEC3g/f hypermutation.
|
|
451
451
|
# # APOBEC3G/F pattern: GRD -> ARD
|
|
452
452
|
# # control pattern: G[YN|RC] -> A[YN|RC]
|
|
453
|
-
# # use the sample consensus to determine potential a3g sites
|
|
453
|
+
# # use the sample consensus to determine potential a3g sites (default) or provide external reference sequences as a `String`
|
|
454
454
|
# # Two criteria to identify hypermutation
|
|
455
455
|
# # 1. Fisher's exact test on the frequencies of G to A mutation at A3G positions vs. non-A3G positions
|
|
456
456
|
# # 2. Poisson distribution of G to A mutations at A3G positions, outliers sequences
|
|
@@ -486,7 +486,7 @@ module ViralSeq
|
|
|
486
486
|
# # but it is still called as hypermutation sequence b/c it's Poisson outlier sequence.
|
|
487
487
|
# @see https://www.hiv.lanl.gov/content/sequence/HYPERMUT/hypermut.html LANL Hypermut
|
|
488
488
|
|
|
489
|
-
def a3g_hypermut
|
|
489
|
+
def a3g_hypermut(ref = nil)
|
|
490
490
|
# mut_hash number of apobec3g/f mutations per sequence
|
|
491
491
|
mut_hash = {}
|
|
492
492
|
hm_hash = {}
|
|
@@ -495,8 +495,10 @@ module ViralSeq
|
|
|
495
495
|
# total G->A mutations at apobec3g/f positions.
|
|
496
496
|
total = 0
|
|
497
497
|
|
|
498
|
-
|
|
499
|
-
|
|
498
|
+
unless ref
|
|
499
|
+
# make consensus sequence for the input sequence hash
|
|
500
|
+
ref = self.consensus
|
|
501
|
+
end
|
|
500
502
|
|
|
501
503
|
# obtain apobec3g positions and control positions
|
|
502
504
|
apobec = apobec3gf(ref)
|
|
@@ -509,7 +511,6 @@ module ViralSeq
|
|
|
509
511
|
c = 0 # control muts
|
|
510
512
|
d = 0 # potenrial controls
|
|
511
513
|
mut.each do |n|
|
|
512
|
-
next if v[n] == "-"
|
|
513
514
|
if v[n] == "A"
|
|
514
515
|
a += 1
|
|
515
516
|
b += 1
|
|
@@ -521,7 +522,6 @@ module ViralSeq
|
|
|
521
522
|
total += a
|
|
522
523
|
|
|
523
524
|
control.each do |n|
|
|
524
|
-
next if v[n] == "-"
|
|
525
525
|
if v[n] == "A"
|
|
526
526
|
c += 1
|
|
527
527
|
d += 1
|
|
@@ -544,7 +544,7 @@ module ViralSeq
|
|
|
544
544
|
end
|
|
545
545
|
end
|
|
546
546
|
|
|
547
|
-
if self.dna_hash.size >
|
|
547
|
+
if self.dna_hash.size > 200
|
|
548
548
|
rate = total.to_f/(self.dna_hash.size)
|
|
549
549
|
count_mut = mut_hash.values.count_freq
|
|
550
550
|
maxi_count = count_mut.values.max
|
|
@@ -566,10 +566,12 @@ module ViralSeq
|
|
|
566
566
|
end
|
|
567
567
|
end
|
|
568
568
|
end
|
|
569
|
+
|
|
569
570
|
hm_seq_hash = ViralSeq::SeqHash.new
|
|
570
571
|
hm_hash.each do |k,_v|
|
|
571
572
|
hm_seq_hash.dna_hash[k] = self.dna_hash[k]
|
|
572
573
|
end
|
|
574
|
+
|
|
573
575
|
hm_seq_hash.title = self.title + "_hypermut"
|
|
574
576
|
hm_seq_hash.file = self.file
|
|
575
577
|
filtered_seq_hash = self.sub(self.dna_hash.keys - hm_hash.keys)
|
|
@@ -711,10 +713,11 @@ module ViralSeq
|
|
|
711
713
|
|
|
712
714
|
|
|
713
715
|
# align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
|
|
716
|
+
# @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
|
|
714
717
|
# @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
|
|
715
718
|
# @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
|
|
716
719
|
|
|
717
|
-
def align(path_to_muscle = false)
|
|
720
|
+
def align(algorithm = :PPP, path_to_muscle = false)
|
|
718
721
|
seq_hash = self.dna_hash
|
|
719
722
|
if self.file.size > 0
|
|
720
723
|
temp_dir = File.dirname(self.file)
|
|
@@ -732,7 +735,11 @@ module ViralSeq
|
|
|
732
735
|
end
|
|
733
736
|
print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
|
|
734
737
|
else
|
|
735
|
-
MuscleBio.
|
|
738
|
+
if MuscleBio::VERSION.to_f < 0.5
|
|
739
|
+
MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
|
|
740
|
+
else
|
|
741
|
+
MuscleBio.exec(temp_file, temp_aln, algorithm)
|
|
742
|
+
end
|
|
736
743
|
end
|
|
737
744
|
out_seq_hash = ViralSeq::SeqHash.fa(temp_aln)
|
|
738
745
|
out_seq_hash.title = self.title + "_aligned"
|
|
@@ -1351,7 +1358,7 @@ module ViralSeq
|
|
|
1351
1358
|
# APOBEC3G/F pattern: GRD -> ARD,
|
|
1352
1359
|
# control pattern: G[YN|RC] -> A[YN|RC],
|
|
1353
1360
|
def apobec3gf(seq = '')
|
|
1354
|
-
seq.tr!("-", "")
|
|
1361
|
+
#seq.tr!("-", "")
|
|
1355
1362
|
seq_length = seq.size
|
|
1356
1363
|
apobec_position = []
|
|
1357
1364
|
control_position = []
|
|
@@ -1363,6 +1370,7 @@ module ViralSeq
|
|
|
1363
1370
|
control_position << n
|
|
1364
1371
|
end
|
|
1365
1372
|
end
|
|
1373
|
+
|
|
1366
1374
|
return [apobec_position,control_position]
|
|
1367
1375
|
end # end of #apobec3gf
|
|
1368
1376
|
|
data/lib/viral_seq/sequence.rb
CHANGED
|
@@ -180,7 +180,7 @@ module ViralSeq
|
|
|
180
180
|
l1 = 0
|
|
181
181
|
l2 = 0
|
|
182
182
|
|
|
183
|
-
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, path_to_muscle)
|
|
183
|
+
aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
|
|
184
184
|
aln_test = aln_seq[1]
|
|
185
185
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
|
186
186
|
gap_begin = $1.size
|
|
@@ -214,7 +214,7 @@ module ViralSeq
|
|
|
214
214
|
l2 = l2 + (post_aln - b2)
|
|
215
215
|
end
|
|
216
216
|
|
|
217
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
|
217
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
|
218
218
|
aln_test = aln_seq[1]
|
|
219
219
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
|
220
220
|
gap_begin = $1.size
|
|
@@ -240,22 +240,22 @@ module ViralSeq
|
|
|
240
240
|
repeat = 0
|
|
241
241
|
|
|
242
242
|
if g1 == g2 and (s1 + g1 + s2) == ref.size
|
|
243
|
-
if s1 > s2 and g2
|
|
243
|
+
if s1 > s2 and g2 >= s2
|
|
244
244
|
ref = ref[0..(-g2-1)]
|
|
245
245
|
repeat = 1
|
|
246
246
|
l2 = l2 + g2
|
|
247
|
-
elsif s1 < s2 and g1
|
|
247
|
+
elsif s1 < s2 and g1 >= s1
|
|
248
248
|
ref = ref[g1..-1]
|
|
249
249
|
repeat = 1
|
|
250
250
|
l1 = l1 + g1
|
|
251
251
|
end
|
|
252
252
|
else
|
|
253
|
-
if g1
|
|
253
|
+
if g1 >= s1
|
|
254
254
|
ref = ref[g1..-1]
|
|
255
255
|
repeat = 1
|
|
256
256
|
l1 = l1 + g1
|
|
257
257
|
end
|
|
258
|
-
if g2
|
|
258
|
+
if g2 >= s2
|
|
259
259
|
ref = ref[0..(-g2 - 1)]
|
|
260
260
|
repeat = 1
|
|
261
261
|
l2 = l2 + g2
|
|
@@ -263,7 +263,7 @@ module ViralSeq
|
|
|
263
263
|
end
|
|
264
264
|
|
|
265
265
|
while repeat == 1
|
|
266
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
|
266
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
|
267
267
|
aln_test = aln_seq[1]
|
|
268
268
|
aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
|
|
269
269
|
gap_begin = $1.size
|
|
@@ -280,12 +280,12 @@ module ViralSeq
|
|
|
280
280
|
l1 = l1 + gap_begin
|
|
281
281
|
l2 = l2 + gap_end
|
|
282
282
|
repeat = 0
|
|
283
|
-
if g1
|
|
283
|
+
if g1 >= s1
|
|
284
284
|
ref = ref[g1..-1]
|
|
285
285
|
repeat = 1
|
|
286
286
|
l1 = l1 + g1
|
|
287
287
|
end
|
|
288
|
-
if g2
|
|
288
|
+
if g2 >= s2
|
|
289
289
|
ref = ref[0..(-g2 - 1)]
|
|
290
290
|
repeat = 1
|
|
291
291
|
l2 = l2 + g2
|
|
@@ -293,8 +293,7 @@ module ViralSeq
|
|
|
293
293
|
end
|
|
294
294
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
|
295
295
|
|
|
296
|
-
|
|
297
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
|
296
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
|
298
297
|
aln_test = aln_seq[1]
|
|
299
298
|
ref = aln_seq[0]
|
|
300
299
|
|
|
@@ -303,12 +302,12 @@ module ViralSeq
|
|
|
303
302
|
if ref =~ /^(\-+)/
|
|
304
303
|
l1 = l1 - $1.size
|
|
305
304
|
elsif ref =~ /(\-+)$/
|
|
306
|
-
l2 = l2
|
|
305
|
+
l2 = l2 - $1.size
|
|
307
306
|
end
|
|
308
307
|
|
|
309
308
|
if (ori_ref_l - l2 - 1) >= l1
|
|
310
309
|
ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
|
|
311
|
-
aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
|
|
310
|
+
aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
|
|
312
311
|
aln_test = aln_seq[1]
|
|
313
312
|
ref = aln_seq[0]
|
|
314
313
|
|
data/lib/viral_seq/tcs_dr.rb
CHANGED
|
@@ -16,7 +16,7 @@ module ViralSeq
|
|
|
16
16
|
:ref_genome=>"HXB2",
|
|
17
17
|
:ref_start=>2648,
|
|
18
18
|
:ref_end=>3257,
|
|
19
|
-
:indel=>
|
|
19
|
+
:indel=>true,
|
|
20
20
|
:trim=>false},
|
|
21
21
|
{:region=>"PR",
|
|
22
22
|
:cdna=>
|
|
@@ -41,7 +41,7 @@ module ViralSeq
|
|
|
41
41
|
:forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
|
|
42
42
|
:majority=>0,
|
|
43
43
|
:end_join=>true,
|
|
44
|
-
:end_join_option=>
|
|
44
|
+
:end_join_option=>2,
|
|
45
45
|
:overlap=>171,
|
|
46
46
|
:TCS_QC=>true,
|
|
47
47
|
:ref_genome=>"HXB2",
|
|
@@ -61,11 +61,26 @@ module ViralSeq
|
|
|
61
61
|
:TCS_QC=>true,
|
|
62
62
|
:ref_genome=>"HXB2",
|
|
63
63
|
:ref_start=>6585,
|
|
64
|
-
:ref_end=>
|
|
64
|
+
:ref_end=>7205..7210,
|
|
65
65
|
:indel=>true,
|
|
66
|
-
:trim=>false}
|
|
66
|
+
:trim=>false},
|
|
67
|
+
{:region=>"P17",
|
|
68
|
+
:cdna=>
|
|
69
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
|
|
70
|
+
:forward=>
|
|
71
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
|
|
72
|
+
:majority=>0,
|
|
73
|
+
:end_join=>true,
|
|
74
|
+
:end_join_option=>1,
|
|
75
|
+
:overlap=>0,
|
|
76
|
+
:TCS_QC=>true,
|
|
77
|
+
:ref_genome=>"HXB2",
|
|
78
|
+
:ref_start=>1196,
|
|
79
|
+
:ref_end=>1725,
|
|
80
|
+
:indel=>true,
|
|
81
|
+
:trim=>false}
|
|
67
82
|
]
|
|
68
|
-
|
|
83
|
+
}
|
|
69
84
|
end
|
|
70
85
|
|
|
71
86
|
end
|
data/lib/viral_seq/version.rb
CHANGED
data/viral_seq.gemspec
CHANGED
|
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
|
|
|
35
35
|
spec.required_rubygems_version = '>= 1.3.6'
|
|
36
36
|
|
|
37
37
|
# muscle_bio gem required
|
|
38
|
-
spec.add_runtime_dependency "muscle_bio", "~> 0.
|
|
38
|
+
spec.add_runtime_dependency "muscle_bio", "~> 0.5"
|
|
39
39
|
|
|
40
40
|
# colorize gem required
|
|
41
41
|
spec.add_runtime_dependency "colorize", "~> 0.1"
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: viral_seq
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.7.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shuntai Zhou
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2022-
|
|
12
|
+
date: 2022-08-25 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: bundler
|
|
@@ -59,14 +59,14 @@ dependencies:
|
|
|
59
59
|
requirements:
|
|
60
60
|
- - "~>"
|
|
61
61
|
- !ruby/object:Gem::Version
|
|
62
|
-
version: '0.
|
|
62
|
+
version: '0.5'
|
|
63
63
|
type: :runtime
|
|
64
64
|
prerelease: false
|
|
65
65
|
version_requirements: !ruby/object:Gem::Requirement
|
|
66
66
|
requirements:
|
|
67
67
|
- - "~>"
|
|
68
68
|
- !ruby/object:Gem::Version
|
|
69
|
-
version: '0.
|
|
69
|
+
version: '0.5'
|
|
70
70
|
- !ruby/object:Gem::Dependency
|
|
71
71
|
name: colorize
|
|
72
72
|
requirement: !ruby/object:Gem::Requirement
|