viral_seq 1.6.4 → 1.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +13 -0
- data/bin/tcs +23 -5
- data/lib/viral_seq/seq_hash.rb +11 -8
- data/lib/viral_seq/version.rb +1 -1
- metadata +2 -3
- data/rc_swans.svc@longleaf.unc.edu +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 8be7a521e58d5335122db011b5f003407cfaab95480062337451377ee2fdfca9
|
|
4
|
+
data.tar.gz: 5c437afa58d63d0bde9dc6acf6c98904b8a7b364618fb3ebebd2cb36a44daa2c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 23622009f3f39961e3d2d760bdde3b9f9b831d001aca68b6eee3d44305a77d3e964c48541811fd9dddc26ad9427383716ccdc64436789b01eb11c51f762d2a6b
|
|
7
|
+
data.tar.gz: c1a1ac49930c24f61bfa0872f518fea8146e701a5a874de45e373d4d3d20eca50d138bd44f9d59ea1102d525b392dd9b6ed053647b1c25d97ad0244eb4fe15ff
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
|
@@ -10,6 +10,8 @@ A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
|
|
10
10
|
|
|
11
11
|
Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
|
12
12
|
|
|
13
|
+
CLI tools `tcs`, `tcs_sdrm`, `tcs_log` and `locator` included in the gem.
|
|
14
|
+
|
|
13
15
|
#### tcs web app - https://primer-id.org/
|
|
14
16
|
|
|
15
17
|
## Illustration for the Primer ID Sequencing
|
|
@@ -22,6 +24,12 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
|
|
22
24
|
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
|
23
25
|
[Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
|
|
24
26
|
|
|
27
|
+
## Requirements
|
|
28
|
+
|
|
29
|
+
Required Ruby Version: >= 2.5
|
|
30
|
+
|
|
31
|
+
Required RubyGems version: >= 1.3.6
|
|
32
|
+
|
|
25
33
|
## Install
|
|
26
34
|
|
|
27
35
|
```bash
|
|
@@ -179,6 +187,11 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
|
179
187
|
|
|
180
188
|
## Updates
|
|
181
189
|
|
|
190
|
+
### Version-1.7.0-08242022
|
|
191
|
+
|
|
192
|
+
1. Add warnings if `tcs` pipeline is excecuting through source instead of installing from `gem`.
|
|
193
|
+
2. Optimized `ViralSeq:SeqHash#a3g` hypermut algorithm. Allowing a external reference other than the sample reference.
|
|
194
|
+
|
|
182
195
|
### Version-1.6.4-07182022
|
|
183
196
|
|
|
184
197
|
1. Included region "P17" in the default `tcs -d` pipeline setting. `tcs` pipeline updated to version 2.5.1.
|
data/bin/tcs
CHANGED
|
@@ -22,20 +22,38 @@
|
|
|
22
22
|
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
23
23
|
# THE SOFTWARE.
|
|
24
24
|
|
|
25
|
+
# Install using `gem install viral_seq`
|
|
25
26
|
# Use JSON file as the run param
|
|
26
27
|
# run `tcs -j` to generate param json file.
|
|
27
28
|
|
|
28
|
-
|
|
29
|
+
def gem_installed?(gem_name)
|
|
30
|
+
found_gem = false
|
|
31
|
+
begin
|
|
32
|
+
found_gem = Gem::Specification.find_by_name(gem_name)
|
|
33
|
+
rescue Gem::LoadError
|
|
34
|
+
return false
|
|
35
|
+
else
|
|
36
|
+
return true
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
if gem_installed?('viral_seq')
|
|
41
|
+
require 'viral_seq'
|
|
42
|
+
else
|
|
43
|
+
printf "\n****************************************************\n"
|
|
44
|
+
printf "**** THIS PACKAGE CANNOT BE RAN FROM SOURCE ********\n"
|
|
45
|
+
printf "**** PLEASE INSTALL USING `gem install viral_seq` **\n"
|
|
46
|
+
printf "****************************************************\n\n"
|
|
47
|
+
exit 1
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
|
|
29
51
|
require 'json'
|
|
30
52
|
require 'colorize'
|
|
31
53
|
require 'optparse'
|
|
32
54
|
|
|
33
55
|
options = {}
|
|
34
56
|
|
|
35
|
-
# banner = '-'*50 + "\n" +
|
|
36
|
-
# '| The TCS Pipeline ' + "Version #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |' + "\n" +
|
|
37
|
-
# '-'*50 + "\n"
|
|
38
|
-
|
|
39
57
|
banner = "\n" +
|
|
40
58
|
"████████ ██████ ███████ ██████ ██ ██████ ███████ ██ ██ ███ ██ ███████\n".light_red +
|
|
41
59
|
" ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ████ ██ ██\n".light_yellow +
|
data/lib/viral_seq/seq_hash.rb
CHANGED
|
@@ -450,7 +450,7 @@ module ViralSeq
|
|
|
450
450
|
# function to determine if the sequences have APOBEC3g/f hypermutation.
|
|
451
451
|
# # APOBEC3G/F pattern: GRD -> ARD
|
|
452
452
|
# # control pattern: G[YN|RC] -> A[YN|RC]
|
|
453
|
-
# # use the sample consensus to determine potential a3g sites
|
|
453
|
+
# # use the sample consensus to determine potential a3g sites (default) or provide external reference sequences as a `String`
|
|
454
454
|
# # Two criteria to identify hypermutation
|
|
455
455
|
# # 1. Fisher's exact test on the frequencies of G to A mutation at A3G positions vs. non-A3G positions
|
|
456
456
|
# # 2. Poisson distribution of G to A mutations at A3G positions, outliers sequences
|
|
@@ -486,7 +486,7 @@ module ViralSeq
|
|
|
486
486
|
# # but it is still called as hypermutation sequence b/c it's Poisson outlier sequence.
|
|
487
487
|
# @see https://www.hiv.lanl.gov/content/sequence/HYPERMUT/hypermut.html LANL Hypermut
|
|
488
488
|
|
|
489
|
-
def a3g_hypermut
|
|
489
|
+
def a3g_hypermut(ref = nil)
|
|
490
490
|
# mut_hash number of apobec3g/f mutations per sequence
|
|
491
491
|
mut_hash = {}
|
|
492
492
|
hm_hash = {}
|
|
@@ -495,8 +495,10 @@ module ViralSeq
|
|
|
495
495
|
# total G->A mutations at apobec3g/f positions.
|
|
496
496
|
total = 0
|
|
497
497
|
|
|
498
|
-
|
|
499
|
-
|
|
498
|
+
unless ref
|
|
499
|
+
# make consensus sequence for the input sequence hash
|
|
500
|
+
ref = self.consensus
|
|
501
|
+
end
|
|
500
502
|
|
|
501
503
|
# obtain apobec3g positions and control positions
|
|
502
504
|
apobec = apobec3gf(ref)
|
|
@@ -509,7 +511,6 @@ module ViralSeq
|
|
|
509
511
|
c = 0 # control muts
|
|
510
512
|
d = 0 # potenrial controls
|
|
511
513
|
mut.each do |n|
|
|
512
|
-
next if v[n] == "-"
|
|
513
514
|
if v[n] == "A"
|
|
514
515
|
a += 1
|
|
515
516
|
b += 1
|
|
@@ -521,7 +522,6 @@ module ViralSeq
|
|
|
521
522
|
total += a
|
|
522
523
|
|
|
523
524
|
control.each do |n|
|
|
524
|
-
next if v[n] == "-"
|
|
525
525
|
if v[n] == "A"
|
|
526
526
|
c += 1
|
|
527
527
|
d += 1
|
|
@@ -544,7 +544,7 @@ module ViralSeq
|
|
|
544
544
|
end
|
|
545
545
|
end
|
|
546
546
|
|
|
547
|
-
if self.dna_hash.size >
|
|
547
|
+
if self.dna_hash.size > 200
|
|
548
548
|
rate = total.to_f/(self.dna_hash.size)
|
|
549
549
|
count_mut = mut_hash.values.count_freq
|
|
550
550
|
maxi_count = count_mut.values.max
|
|
@@ -566,10 +566,12 @@ module ViralSeq
|
|
|
566
566
|
end
|
|
567
567
|
end
|
|
568
568
|
end
|
|
569
|
+
|
|
569
570
|
hm_seq_hash = ViralSeq::SeqHash.new
|
|
570
571
|
hm_hash.each do |k,_v|
|
|
571
572
|
hm_seq_hash.dna_hash[k] = self.dna_hash[k]
|
|
572
573
|
end
|
|
574
|
+
|
|
573
575
|
hm_seq_hash.title = self.title + "_hypermut"
|
|
574
576
|
hm_seq_hash.file = self.file
|
|
575
577
|
filtered_seq_hash = self.sub(self.dna_hash.keys - hm_hash.keys)
|
|
@@ -1356,7 +1358,7 @@ module ViralSeq
|
|
|
1356
1358
|
# APOBEC3G/F pattern: GRD -> ARD,
|
|
1357
1359
|
# control pattern: G[YN|RC] -> A[YN|RC],
|
|
1358
1360
|
def apobec3gf(seq = '')
|
|
1359
|
-
seq.tr!("-", "")
|
|
1361
|
+
#seq.tr!("-", "")
|
|
1360
1362
|
seq_length = seq.size
|
|
1361
1363
|
apobec_position = []
|
|
1362
1364
|
control_position = []
|
|
@@ -1368,6 +1370,7 @@ module ViralSeq
|
|
|
1368
1370
|
control_position << n
|
|
1369
1371
|
end
|
|
1370
1372
|
end
|
|
1373
|
+
|
|
1371
1374
|
return [apobec_position,control_position]
|
|
1372
1375
|
end # end of #apobec3gf
|
|
1373
1376
|
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: viral_seq
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 1.
|
|
4
|
+
version: 1.7.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shuntai Zhou
|
|
@@ -9,7 +9,7 @@ authors:
|
|
|
9
9
|
autorequire:
|
|
10
10
|
bindir: bin
|
|
11
11
|
cert_chain: []
|
|
12
|
-
date: 2022-
|
|
12
|
+
date: 2022-08-25 00:00:00.000000000 Z
|
|
13
13
|
dependencies:
|
|
14
14
|
- !ruby/object:Gem::Dependency
|
|
15
15
|
name: bundler
|
|
@@ -193,7 +193,6 @@ files:
|
|
|
193
193
|
- lib/viral_seq/tcs_dr.rb
|
|
194
194
|
- lib/viral_seq/tcs_json.rb
|
|
195
195
|
- lib/viral_seq/version.rb
|
|
196
|
-
- rc_swans.svc@longleaf.unc.edu
|
|
197
196
|
- viral_seq.gemspec
|
|
198
197
|
homepage: https://github.com/ViralSeq/viral_seq
|
|
199
198
|
licenses:
|
|
Binary file
|