viral_seq 1.6.4 → 1.7.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +13 -0
- data/bin/tcs +23 -5
- data/lib/viral_seq/seq_hash.rb +11 -8
- data/lib/viral_seq/version.rb +1 -1
- metadata +2 -3
- data/rc_swans.svc@longleaf.unc.edu +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 8be7a521e58d5335122db011b5f003407cfaab95480062337451377ee2fdfca9
|
4
|
+
data.tar.gz: 5c437afa58d63d0bde9dc6acf6c98904b8a7b364618fb3ebebd2cb36a44daa2c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 23622009f3f39961e3d2d760bdde3b9f9b831d001aca68b6eee3d44305a77d3e964c48541811fd9dddc26ad9427383716ccdc64436789b01eb11c51f762d2a6b
|
7
|
+
data.tar.gz: c1a1ac49930c24f61bfa0872f518fea8146e701a5a874de45e373d4d3d20eca50d138bd44f9d59ea1102d525b392dd9b6ed053647b1c25d97ad0244eb4fe15ff
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -10,6 +10,8 @@ A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
|
10
10
|
|
11
11
|
Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
12
12
|
|
13
|
+
CLI tools `tcs`, `tcs_sdrm`, `tcs_log` and `locator` included in the gem.
|
14
|
+
|
13
15
|
#### tcs web app - https://primer-id.org/
|
14
16
|
|
15
17
|
## Illustration for the Primer ID Sequencing
|
@@ -22,6 +24,12 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
|
22
24
|
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
23
25
|
[Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
|
24
26
|
|
27
|
+
## Requirements
|
28
|
+
|
29
|
+
Required Ruby Version: >= 2.5
|
30
|
+
|
31
|
+
Required RubyGems version: >= 1.3.6
|
32
|
+
|
25
33
|
## Install
|
26
34
|
|
27
35
|
```bash
|
@@ -179,6 +187,11 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
179
187
|
|
180
188
|
## Updates
|
181
189
|
|
190
|
+
### Version-1.7.0-08242022
|
191
|
+
|
192
|
+
1. Add warnings if `tcs` pipeline is excecuting through source instead of installing from `gem`.
|
193
|
+
2. Optimized `ViralSeq:SeqHash#a3g` hypermut algorithm. Allowing a external reference other than the sample reference.
|
194
|
+
|
182
195
|
### Version-1.6.4-07182022
|
183
196
|
|
184
197
|
1. Included region "P17" in the default `tcs -d` pipeline setting. `tcs` pipeline updated to version 2.5.1.
|
data/bin/tcs
CHANGED
@@ -22,20 +22,38 @@
|
|
22
22
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
23
23
|
# THE SOFTWARE.
|
24
24
|
|
25
|
+
# Install using `gem install viral_seq`
|
25
26
|
# Use JSON file as the run param
|
26
27
|
# run `tcs -j` to generate param json file.
|
27
28
|
|
28
|
-
|
29
|
+
def gem_installed?(gem_name)
|
30
|
+
found_gem = false
|
31
|
+
begin
|
32
|
+
found_gem = Gem::Specification.find_by_name(gem_name)
|
33
|
+
rescue Gem::LoadError
|
34
|
+
return false
|
35
|
+
else
|
36
|
+
return true
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
if gem_installed?('viral_seq')
|
41
|
+
require 'viral_seq'
|
42
|
+
else
|
43
|
+
printf "\n****************************************************\n"
|
44
|
+
printf "**** THIS PACKAGE CANNOT BE RAN FROM SOURCE ********\n"
|
45
|
+
printf "**** PLEASE INSTALL USING `gem install viral_seq` **\n"
|
46
|
+
printf "****************************************************\n\n"
|
47
|
+
exit 1
|
48
|
+
end
|
49
|
+
|
50
|
+
|
29
51
|
require 'json'
|
30
52
|
require 'colorize'
|
31
53
|
require 'optparse'
|
32
54
|
|
33
55
|
options = {}
|
34
56
|
|
35
|
-
# banner = '-'*50 + "\n" +
|
36
|
-
# '| The TCS Pipeline ' + "Version #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |' + "\n" +
|
37
|
-
# '-'*50 + "\n"
|
38
|
-
|
39
57
|
banner = "\n" +
|
40
58
|
"████████ ██████ ███████ ██████ ██ ██████ ███████ ██ ██ ███ ██ ███████\n".light_red +
|
41
59
|
" ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ████ ██ ██\n".light_yellow +
|
data/lib/viral_seq/seq_hash.rb
CHANGED
@@ -450,7 +450,7 @@ module ViralSeq
|
|
450
450
|
# function to determine if the sequences have APOBEC3g/f hypermutation.
|
451
451
|
# # APOBEC3G/F pattern: GRD -> ARD
|
452
452
|
# # control pattern: G[YN|RC] -> A[YN|RC]
|
453
|
-
# # use the sample consensus to determine potential a3g sites
|
453
|
+
# # use the sample consensus to determine potential a3g sites (default) or provide external reference sequences as a `String`
|
454
454
|
# # Two criteria to identify hypermutation
|
455
455
|
# # 1. Fisher's exact test on the frequencies of G to A mutation at A3G positions vs. non-A3G positions
|
456
456
|
# # 2. Poisson distribution of G to A mutations at A3G positions, outliers sequences
|
@@ -486,7 +486,7 @@ module ViralSeq
|
|
486
486
|
# # but it is still called as hypermutation sequence b/c it's Poisson outlier sequence.
|
487
487
|
# @see https://www.hiv.lanl.gov/content/sequence/HYPERMUT/hypermut.html LANL Hypermut
|
488
488
|
|
489
|
-
def a3g_hypermut
|
489
|
+
def a3g_hypermut(ref = nil)
|
490
490
|
# mut_hash number of apobec3g/f mutations per sequence
|
491
491
|
mut_hash = {}
|
492
492
|
hm_hash = {}
|
@@ -495,8 +495,10 @@ module ViralSeq
|
|
495
495
|
# total G->A mutations at apobec3g/f positions.
|
496
496
|
total = 0
|
497
497
|
|
498
|
-
|
499
|
-
|
498
|
+
unless ref
|
499
|
+
# make consensus sequence for the input sequence hash
|
500
|
+
ref = self.consensus
|
501
|
+
end
|
500
502
|
|
501
503
|
# obtain apobec3g positions and control positions
|
502
504
|
apobec = apobec3gf(ref)
|
@@ -509,7 +511,6 @@ module ViralSeq
|
|
509
511
|
c = 0 # control muts
|
510
512
|
d = 0 # potenrial controls
|
511
513
|
mut.each do |n|
|
512
|
-
next if v[n] == "-"
|
513
514
|
if v[n] == "A"
|
514
515
|
a += 1
|
515
516
|
b += 1
|
@@ -521,7 +522,6 @@ module ViralSeq
|
|
521
522
|
total += a
|
522
523
|
|
523
524
|
control.each do |n|
|
524
|
-
next if v[n] == "-"
|
525
525
|
if v[n] == "A"
|
526
526
|
c += 1
|
527
527
|
d += 1
|
@@ -544,7 +544,7 @@ module ViralSeq
|
|
544
544
|
end
|
545
545
|
end
|
546
546
|
|
547
|
-
if self.dna_hash.size >
|
547
|
+
if self.dna_hash.size > 200
|
548
548
|
rate = total.to_f/(self.dna_hash.size)
|
549
549
|
count_mut = mut_hash.values.count_freq
|
550
550
|
maxi_count = count_mut.values.max
|
@@ -566,10 +566,12 @@ module ViralSeq
|
|
566
566
|
end
|
567
567
|
end
|
568
568
|
end
|
569
|
+
|
569
570
|
hm_seq_hash = ViralSeq::SeqHash.new
|
570
571
|
hm_hash.each do |k,_v|
|
571
572
|
hm_seq_hash.dna_hash[k] = self.dna_hash[k]
|
572
573
|
end
|
574
|
+
|
573
575
|
hm_seq_hash.title = self.title + "_hypermut"
|
574
576
|
hm_seq_hash.file = self.file
|
575
577
|
filtered_seq_hash = self.sub(self.dna_hash.keys - hm_hash.keys)
|
@@ -1356,7 +1358,7 @@ module ViralSeq
|
|
1356
1358
|
# APOBEC3G/F pattern: GRD -> ARD,
|
1357
1359
|
# control pattern: G[YN|RC] -> A[YN|RC],
|
1358
1360
|
def apobec3gf(seq = '')
|
1359
|
-
seq.tr!("-", "")
|
1361
|
+
#seq.tr!("-", "")
|
1360
1362
|
seq_length = seq.size
|
1361
1363
|
apobec_position = []
|
1362
1364
|
control_position = []
|
@@ -1368,6 +1370,7 @@ module ViralSeq
|
|
1368
1370
|
control_position << n
|
1369
1371
|
end
|
1370
1372
|
end
|
1373
|
+
|
1371
1374
|
return [apobec_position,control_position]
|
1372
1375
|
end # end of #apobec3gf
|
1373
1376
|
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.7.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2022-
|
12
|
+
date: 2022-08-25 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -193,7 +193,6 @@ files:
|
|
193
193
|
- lib/viral_seq/tcs_dr.rb
|
194
194
|
- lib/viral_seq/tcs_json.rb
|
195
195
|
- lib/viral_seq/version.rb
|
196
|
-
- rc_swans.svc@longleaf.unc.edu
|
197
196
|
- viral_seq.gemspec
|
198
197
|
homepage: https://github.com/ViralSeq/viral_seq
|
199
198
|
licenses:
|
Binary file
|