viral_seq 1.0.13 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +0 -1
- data/Gemfile.lock +16 -3
- data/README.md +102 -13
- data/bin/tcs +51 -10
- data/bin/tcs_log +102 -0
- data/bin/tcs_sdrm +402 -0
- data/docs/assets/img/cover.jpg +0 -0
- data/docs/dr.json +67 -0
- data/docs/sample_miseq_data/hivdr_control/r1.fastq.gz +0 -0
- data/docs/sample_miseq_data/hivdr_control/r2.fastq.gz +0 -0
- data/lib/viral_seq.rb +5 -1
- data/lib/viral_seq/constant.rb +35 -5
- data/lib/viral_seq/hivdr.rb +1 -1
- data/lib/viral_seq/muscle.rb +3 -2
- data/lib/viral_seq/recency.rb +52 -0
- data/lib/viral_seq/sdrm.rb +101 -35
- data/lib/viral_seq/seq_hash.rb +25 -5
- data/lib/viral_seq/seq_hash_pair.rb +6 -4
- data/lib/viral_seq/sequence.rb +1 -84
- data/lib/viral_seq/tcs_core.rb +34 -5
- data/lib/viral_seq/tcs_dr.rb +71 -0
- data/lib/viral_seq/tcs_json.rb +41 -10
- data/lib/viral_seq/version.rb +2 -2
- data/viral_seq.gemspec +11 -0
- metadata +74 -4
data/lib/viral_seq/tcs_core.rb
CHANGED
@@ -102,9 +102,9 @@ module ViralSeq
|
|
102
102
|
end
|
103
103
|
|
104
104
|
# sort array of file names to determine if there is potential errors
|
105
|
-
#
|
106
|
-
#
|
107
|
-
|
105
|
+
# @param name_array [Array] array of file names
|
106
|
+
# @return [hash] name check results
|
107
|
+
|
108
108
|
def validate_file_name(name_array)
|
109
109
|
errors = {
|
110
110
|
file_type_error: [] ,
|
@@ -165,6 +165,13 @@ module ViralSeq
|
|
165
165
|
end
|
166
166
|
end
|
167
167
|
|
168
|
+
file_name_with_lib_name = {}
|
169
|
+
passed_libs.each do |lib_name, files|
|
170
|
+
files.each do |f|
|
171
|
+
file_name_with_lib_name[f] = lib_name
|
172
|
+
end
|
173
|
+
end
|
174
|
+
|
168
175
|
passed_names = []
|
169
176
|
|
170
177
|
passed_libs.values.each { |names| passed_names += names}
|
@@ -175,7 +182,27 @@ module ViralSeq
|
|
175
182
|
pass = true
|
176
183
|
end
|
177
184
|
|
178
|
-
|
185
|
+
file_name_with_error_type = {}
|
186
|
+
|
187
|
+
errors.each do |type, files|
|
188
|
+
files.each do |f|
|
189
|
+
file_name_with_error_type[f] ||= []
|
190
|
+
file_name_with_error_type[f] << type.to_s.tr("_", "\s")
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
file_check = []
|
195
|
+
|
196
|
+
name_array.each do |name|
|
197
|
+
file_check_hash = {}
|
198
|
+
file_check_hash[:fileName] = name
|
199
|
+
file_check_hash[:errors] = file_name_with_error_type[name]
|
200
|
+
file_check_hash[:libName] = file_name_with_lib_name[name]
|
201
|
+
|
202
|
+
file_check << file_check_hash
|
203
|
+
end
|
204
|
+
|
205
|
+
return { allPass: pass, files: file_check }
|
179
206
|
end
|
180
207
|
|
181
208
|
# filter r1 raw sequences for non-specific primers.
|
@@ -278,7 +305,9 @@ module ViralSeq
|
|
278
305
|
end
|
279
306
|
|
280
307
|
def general_filter(seq)
|
281
|
-
if seq
|
308
|
+
if seq.size < $platform_sequencing_length
|
309
|
+
return false
|
310
|
+
elsif seq[1..-2] =~ /N/ # sequences with ambiguities except the 1st and last position removed
|
282
311
|
return false
|
283
312
|
elsif seq =~ /A{11}/ # a string of poly-A indicates adaptor sequence
|
284
313
|
return false
|
@@ -0,0 +1,71 @@
|
|
1
|
+
module ViralSeq
|
2
|
+
|
3
|
+
class TcsDr
|
4
|
+
PARAMS = {:platform_error_rate=>0.02,
|
5
|
+
:primer_pairs=>
|
6
|
+
[{:region=>"RT",
|
7
|
+
:cdna=>
|
8
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCACTATAGGCTGTACTGTCCATTTATC",
|
9
|
+
:forward=>
|
10
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGGCCATTGACAGAAGAAAAAATAAAAGC",
|
11
|
+
:majority=>0.5,
|
12
|
+
:end_join=>true,
|
13
|
+
:end_join_option=>1,
|
14
|
+
:overlap=>0,
|
15
|
+
:TCS_QC=>true,
|
16
|
+
:ref_genome=>"HXB2",
|
17
|
+
:ref_start=>2648,
|
18
|
+
:ref_end=>3257,
|
19
|
+
:indel=>false,
|
20
|
+
:trim=>false},
|
21
|
+
{:region=>"PR",
|
22
|
+
:cdna=>
|
23
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNCAGTTTAACTTTTGGGCCATCCATTCC",
|
24
|
+
:forward=>
|
25
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTCAGAGCAGACCAGAGCCAACAGCCCCA",
|
26
|
+
:majority=>0.5,
|
27
|
+
:end_join=>true,
|
28
|
+
:end_join_option=>3,
|
29
|
+
:TCS_QC=>true,
|
30
|
+
:ref_genome=>"HXB2",
|
31
|
+
:ref_start=>0,
|
32
|
+
:ref_end=>2591,
|
33
|
+
:indel=>true,
|
34
|
+
:trim=>true,
|
35
|
+
:trim_ref=>"HXB2",
|
36
|
+
:trim_ref_start=>2253,
|
37
|
+
:trim_ref_end=>2549},
|
38
|
+
{:region=>"IN",
|
39
|
+
:cdna=>
|
40
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNATCGAATACTGCCATTTGTACTGC",
|
41
|
+
:forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
|
42
|
+
:majority=>0.5,
|
43
|
+
:end_join=>true,
|
44
|
+
:end_join_option=>3,
|
45
|
+
:overlap=>171,
|
46
|
+
:TCS_QC=>true,
|
47
|
+
:ref_genome=>"HXB2",
|
48
|
+
:ref_start=>4384,
|
49
|
+
:ref_end=>4751,
|
50
|
+
:indel=>false,
|
51
|
+
:trim=>false},
|
52
|
+
{:region=>"V1V3",
|
53
|
+
:cdna=>
|
54
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCCATTTTGCTYTAYTRABVTTACAATRTGC",
|
55
|
+
:forward=>
|
56
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTTATGGGATCAAAGCCTAAAGCCATGTGTA",
|
57
|
+
:majority=>0.5,
|
58
|
+
:end_join=>true,
|
59
|
+
:end_join_option=>1,
|
60
|
+
:overlap=>0,
|
61
|
+
:TCS_QC=>true,
|
62
|
+
:ref_genome=>"HXB2",
|
63
|
+
:ref_start=>6585,
|
64
|
+
:ref_end=>7208,
|
65
|
+
:indel=>true,
|
66
|
+
:trim=>false}
|
67
|
+
]
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
data/lib/viral_seq/tcs_json.rb
CHANGED
@@ -13,6 +13,22 @@ module ViralSeq
|
|
13
13
|
print '> '
|
14
14
|
param[:raw_sequence_dir] = gets.chomp.rstrip
|
15
15
|
|
16
|
+
puts "Choose MiSeq Platform (1-3):\n1. 150x7x150\n2. 250x7x250\n3. 300x7x300 (default)"
|
17
|
+
print "> "
|
18
|
+
pf_option = gets.chomp.rstrip
|
19
|
+
# while ![1,2,3].include?(pf_option.to_i)
|
20
|
+
# print "Entered MiSeq Platform #{pf_option.red.bold} not valid (choose 1-3), try again\n> "
|
21
|
+
# pf_option = gets.chomp.rstrip
|
22
|
+
# end
|
23
|
+
case pf_option.to_i
|
24
|
+
when 1
|
25
|
+
param[:platform_format] = 150
|
26
|
+
when 2
|
27
|
+
param[:platform_format] = 250
|
28
|
+
else
|
29
|
+
param[:platform_format] = 300
|
30
|
+
end
|
31
|
+
|
16
32
|
puts 'Enter the estimated platform error rate (for TCS cut-off calculation), default as ' + '0.02'.red.bold
|
17
33
|
print '> '
|
18
34
|
input_error = gets.chomp.rstrip.to_f
|
@@ -52,12 +68,12 @@ module ViralSeq
|
|
52
68
|
if ej =~ /y|yes/i
|
53
69
|
data[:end_join] = true
|
54
70
|
|
55
|
-
|
56
|
-
1: simple join, no overlap
|
57
|
-
2: known overlap
|
58
|
-
3: unknow overlap, use sample consensus to determine overlap, all sequence pairs have same overlap
|
59
|
-
4: unknow overlap, determine overlap by individual sequence pairs, sequence pairs can have different overlap
|
60
|
-
> "
|
71
|
+
puts "End-join option? Choose from (1-4):"
|
72
|
+
puts "1: simple join, no overlap"
|
73
|
+
puts "2: known overlap"
|
74
|
+
puts "3: unknow overlap, use sample consensus to determine overlap, all sequence pairs have same overlap"
|
75
|
+
puts "4: unknow overlap, determine overlap by individual sequence pairs, sequence pairs can have different overlap"
|
76
|
+
print "> "
|
61
77
|
ej_option = gets.chomp.rstrip
|
62
78
|
while ![1,2,3,4].include?(ej_option.to_i)
|
63
79
|
puts "Entered end-join option #{ej_option.red.bold} not valid (choose 1-4), try again"
|
@@ -138,7 +154,12 @@ module ViralSeq
|
|
138
154
|
if save_option =~ /y|yes/i
|
139
155
|
print "Path to save JSON file:\n> "
|
140
156
|
path = gets.chomp.rstrip
|
141
|
-
|
157
|
+
while !validate_path_name(path)
|
158
|
+
print "Entered path no valid, try again.\n".red.bold
|
159
|
+
print "Path to save JSON file:\n> "
|
160
|
+
path = gets.chomp.rstrip
|
161
|
+
end
|
162
|
+
File.open(validate_path_name(path), 'w') {|f| f.puts JSON.pretty_generate(param)}
|
142
163
|
end
|
143
164
|
|
144
165
|
print "\nDo you wish to execute tcs pipeline with the input params now? Y/N \n> "
|
@@ -147,7 +168,7 @@ module ViralSeq
|
|
147
168
|
if rsp =~ /y/i
|
148
169
|
return param
|
149
170
|
else
|
150
|
-
abort "Params json file generated. You can execute tcs pipeline using `tcs -p [params.json]`"
|
171
|
+
abort "Params json file generated. You can execute tcs pipeline using `tcs -p [params.json]`".blue
|
151
172
|
end
|
152
173
|
|
153
174
|
end
|
@@ -172,7 +193,17 @@ module ViralSeq
|
|
172
193
|
when 3
|
173
194
|
:MAC239
|
174
195
|
end
|
175
|
-
end
|
176
|
-
|
196
|
+
end # end of get_ref
|
197
|
+
|
198
|
+
def validate_path_name(path)
|
199
|
+
if path.empty?
|
200
|
+
return false
|
201
|
+
elsif File.directory? path
|
202
|
+
return File.join(path, 'params.json')
|
203
|
+
elsif File.directory?(File.dirname(path))
|
204
|
+
return path
|
205
|
+
end
|
206
|
+
end # end of validate_path_name
|
207
|
+
end # end of class << self
|
177
208
|
end # end TcsJson
|
178
209
|
end # end main module
|
data/lib/viral_seq/version.rb
CHANGED
data/viral_seq.gemspec
CHANGED
@@ -29,11 +29,22 @@ Gem::Specification.new do |spec|
|
|
29
29
|
spec.add_development_dependency "rake", "~> 13.0"
|
30
30
|
spec.add_development_dependency "rspec", "~> 3.0"
|
31
31
|
|
32
|
+
# This gem will work with Ruby version 2.5.0 or greater...
|
33
|
+
spec.required_ruby_version = '>= 2.5'
|
34
|
+
# This gem will work with RubyGem version 1.3.6 or greater...
|
35
|
+
spec.required_rubygems_version = '>= 1.3.6'
|
36
|
+
|
32
37
|
# muscle_bio gem required
|
33
38
|
spec.add_runtime_dependency "muscle_bio", "~> 0.4"
|
34
39
|
|
35
40
|
# colorize gem required
|
36
41
|
spec.add_runtime_dependency "colorize", "~> 0.1"
|
37
42
|
|
43
|
+
spec.add_runtime_dependency "prawn", "~> 2.3", '>= 2.3.0'
|
44
|
+
|
45
|
+
spec.add_runtime_dependency "prawn-table", "~> 0.2", '>= 0.2.0'
|
46
|
+
|
47
|
+
spec.add_runtime_dependency "combine_pdf", "~> 1.0", '>= 1.0.0'
|
48
|
+
|
38
49
|
spec.requirements << 'R required for some functions'
|
39
50
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-05-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -81,6 +81,66 @@ dependencies:
|
|
81
81
|
- - "~>"
|
82
82
|
- !ruby/object:Gem::Version
|
83
83
|
version: '0.1'
|
84
|
+
- !ruby/object:Gem::Dependency
|
85
|
+
name: prawn
|
86
|
+
requirement: !ruby/object:Gem::Requirement
|
87
|
+
requirements:
|
88
|
+
- - "~>"
|
89
|
+
- !ruby/object:Gem::Version
|
90
|
+
version: '2.3'
|
91
|
+
- - ">="
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: 2.3.0
|
94
|
+
type: :runtime
|
95
|
+
prerelease: false
|
96
|
+
version_requirements: !ruby/object:Gem::Requirement
|
97
|
+
requirements:
|
98
|
+
- - "~>"
|
99
|
+
- !ruby/object:Gem::Version
|
100
|
+
version: '2.3'
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: 2.3.0
|
104
|
+
- !ruby/object:Gem::Dependency
|
105
|
+
name: prawn-table
|
106
|
+
requirement: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - "~>"
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0.2'
|
111
|
+
- - ">="
|
112
|
+
- !ruby/object:Gem::Version
|
113
|
+
version: 0.2.0
|
114
|
+
type: :runtime
|
115
|
+
prerelease: false
|
116
|
+
version_requirements: !ruby/object:Gem::Requirement
|
117
|
+
requirements:
|
118
|
+
- - "~>"
|
119
|
+
- !ruby/object:Gem::Version
|
120
|
+
version: '0.2'
|
121
|
+
- - ">="
|
122
|
+
- !ruby/object:Gem::Version
|
123
|
+
version: 0.2.0
|
124
|
+
- !ruby/object:Gem::Dependency
|
125
|
+
name: combine_pdf
|
126
|
+
requirement: !ruby/object:Gem::Requirement
|
127
|
+
requirements:
|
128
|
+
- - "~>"
|
129
|
+
- !ruby/object:Gem::Version
|
130
|
+
version: '1.0'
|
131
|
+
- - ">="
|
132
|
+
- !ruby/object:Gem::Version
|
133
|
+
version: 1.0.0
|
134
|
+
type: :runtime
|
135
|
+
prerelease: false
|
136
|
+
version_requirements: !ruby/object:Gem::Requirement
|
137
|
+
requirements:
|
138
|
+
- - "~>"
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '1.0'
|
141
|
+
- - ">="
|
142
|
+
- !ruby/object:Gem::Version
|
143
|
+
version: 1.0.0
|
84
144
|
description: |-
|
85
145
|
A Ruby Gem with bioinformatics tools for processing viral NGS data.
|
86
146
|
Specifically for Primer-ID sequencing and HIV drug resistance analysis.
|
@@ -90,6 +150,8 @@ email:
|
|
90
150
|
executables:
|
91
151
|
- locator
|
92
152
|
- tcs
|
153
|
+
- tcs_log
|
154
|
+
- tcs_sdrm
|
93
155
|
extensions: []
|
94
156
|
extra_rdoc_files: []
|
95
157
|
files:
|
@@ -104,6 +166,12 @@ files:
|
|
104
166
|
- Rakefile
|
105
167
|
- bin/locator
|
106
168
|
- bin/tcs
|
169
|
+
- bin/tcs_log
|
170
|
+
- bin/tcs_sdrm
|
171
|
+
- docs/assets/img/cover.jpg
|
172
|
+
- docs/dr.json
|
173
|
+
- docs/sample_miseq_data/hivdr_control/r1.fastq.gz
|
174
|
+
- docs/sample_miseq_data/hivdr_control/r2.fastq.gz
|
107
175
|
- lib/viral_seq.rb
|
108
176
|
- lib/viral_seq/constant.rb
|
109
177
|
- lib/viral_seq/enumerable.rb
|
@@ -112,6 +180,7 @@ files:
|
|
112
180
|
- lib/viral_seq/math.rb
|
113
181
|
- lib/viral_seq/muscle.rb
|
114
182
|
- lib/viral_seq/pid.rb
|
183
|
+
- lib/viral_seq/recency.rb
|
115
184
|
- lib/viral_seq/ref_seq.rb
|
116
185
|
- lib/viral_seq/rubystats.rb
|
117
186
|
- lib/viral_seq/sdrm.rb
|
@@ -120,6 +189,7 @@ files:
|
|
120
189
|
- lib/viral_seq/sequence.rb
|
121
190
|
- lib/viral_seq/string.rb
|
122
191
|
- lib/viral_seq/tcs_core.rb
|
192
|
+
- lib/viral_seq/tcs_dr.rb
|
123
193
|
- lib/viral_seq/tcs_json.rb
|
124
194
|
- lib/viral_seq/version.rb
|
125
195
|
- viral_seq.gemspec
|
@@ -135,12 +205,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
135
205
|
requirements:
|
136
206
|
- - ">="
|
137
207
|
- !ruby/object:Gem::Version
|
138
|
-
version: '
|
208
|
+
version: '2.5'
|
139
209
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
140
210
|
requirements:
|
141
211
|
- - ">="
|
142
212
|
- !ruby/object:Gem::Version
|
143
|
-
version:
|
213
|
+
version: 1.3.6
|
144
214
|
requirements:
|
145
215
|
- R required for some functions
|
146
216
|
rubygems_version: 3.2.2
|