viral_seq 1.0.9 → 1.0.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +67 -32
- data/bin/tcs +78 -143
- data/lib/viral_seq.rb +3 -0
- data/lib/viral_seq/constant.rb +5 -1
- data/lib/viral_seq/enumerable.rb +0 -10
- data/lib/viral_seq/hivdr.rb +1 -1
- data/lib/viral_seq/math.rb +3 -3
- data/lib/viral_seq/sdrm.rb +43 -0
- data/lib/viral_seq/seq_hash.rb +15 -8
- data/lib/viral_seq/seq_hash_pair.rb +6 -0
- data/lib/viral_seq/tcs_core.rb +332 -0
- data/lib/viral_seq/tcs_json.rb +178 -0
- data/lib/viral_seq/version.rb +2 -2
- metadata +6 -5
- data/bin/tcs_json_generator +0 -166
@@ -0,0 +1,178 @@
|
|
1
|
+
module ViralSeq
|
2
|
+
class TcsJson
|
3
|
+
class << self
|
4
|
+
|
5
|
+
def generate
|
6
|
+
puts '-'*58
|
7
|
+
puts '| JSON Parameter Generator for ' + "TCS #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |'
|
8
|
+
puts '-'*58 + "\n"
|
9
|
+
|
10
|
+
param = {}
|
11
|
+
|
12
|
+
puts 'Enter the path to the directory that contains the MiSeq pair-end R1 and R2 .fastq or .fastq.gz file'
|
13
|
+
print '> '
|
14
|
+
param[:raw_sequence_dir] = gets.chomp.rstrip
|
15
|
+
|
16
|
+
puts 'Enter the estimated platform error rate (for TCS cut-off calculation), default as ' + '0.02'.red.bold
|
17
|
+
print '> '
|
18
|
+
input_error = gets.chomp.rstrip.to_f
|
19
|
+
if input_error == 0.0
|
20
|
+
param[:platform_error_rate] = 0.02
|
21
|
+
else
|
22
|
+
param[:platform_error_rate] = input_error
|
23
|
+
end
|
24
|
+
|
25
|
+
param[:primer_pairs] = []
|
26
|
+
|
27
|
+
loop do
|
28
|
+
data = {}
|
29
|
+
puts "Enter the name for the sequenced region: "
|
30
|
+
print '> '
|
31
|
+
data[:region] = gets.chomp.rstrip
|
32
|
+
|
33
|
+
puts "Enter the #{"cDNA".red.bold} primer sequence: "
|
34
|
+
print '> '
|
35
|
+
data[:cdna] = gets.chomp.rstrip
|
36
|
+
|
37
|
+
puts "Enter the #{"forward".blue.bold} primer sequence: "
|
38
|
+
print '> '
|
39
|
+
data[:forward] = gets.chomp.rstrip
|
40
|
+
|
41
|
+
puts "Enter supermajority cut-off (0.5 - 1.0). Default Simple Majority"
|
42
|
+
print '> '
|
43
|
+
mj = gets.chomp.rstrip.to_f
|
44
|
+
if (0.5..1.0).include?(mj)
|
45
|
+
data[:majority] = mj
|
46
|
+
else
|
47
|
+
data[:majority] = 0
|
48
|
+
end
|
49
|
+
|
50
|
+
print "Need end-join? Y/N \n> "
|
51
|
+
ej = gets.chomp.rstrip
|
52
|
+
if ej =~ /y|yes/i
|
53
|
+
data[:end_join] = true
|
54
|
+
|
55
|
+
print "End-join option? Choose from (1-4):\n
|
56
|
+
1: simple join, no overlap
|
57
|
+
2: known overlap \n
|
58
|
+
3: unknow overlap, use sample consensus to determine overlap, all sequence pairs have same overlap\n
|
59
|
+
4: unknow overlap, determine overlap by individual sequence pairs, sequence pairs can have different overlap\n
|
60
|
+
> "
|
61
|
+
ej_option = gets.chomp.rstrip
|
62
|
+
while ![1,2,3,4].include?(ej_option.to_i)
|
63
|
+
puts "Entered end-join option #{ej_option.red.bold} not valid (choose 1-4), try again"
|
64
|
+
ej_option = gets.chomp.rstrip.to_i
|
65
|
+
end
|
66
|
+
case ej_option.to_i
|
67
|
+
when 1
|
68
|
+
data[:end_join_option] = 1
|
69
|
+
data[:overlap] = 0
|
70
|
+
when 2
|
71
|
+
data[:end_join_option] = 1
|
72
|
+
print "overlap bases: \n> "
|
73
|
+
ol = gets.chomp.rstrip.to_i
|
74
|
+
data[:overlap] = ol
|
75
|
+
when 3
|
76
|
+
data[:end_join_option] = 3
|
77
|
+
when 4
|
78
|
+
data[:end_join_option] = 4
|
79
|
+
end
|
80
|
+
|
81
|
+
print "Need QC for TCS? (support for HIV-1 and SIV)? Y/N \n> "
|
82
|
+
qc = gets.chomp.rstrip
|
83
|
+
if qc =~ /y|yes/i
|
84
|
+
data[:TCS_QC] = true
|
85
|
+
|
86
|
+
data[:ref_genome] = get_ref
|
87
|
+
|
88
|
+
print "reference 5'end ref position or posiiton range, 0 if no need to match this end \n> "
|
89
|
+
data[:ref_start] = gets.chomp.rstrip.to_i
|
90
|
+
|
91
|
+
print "reference 3'end ref position or posiiton range: 0 if no need to match this end \n> "
|
92
|
+
data[:ref_end] = gets.chomp.rstrip.to_i
|
93
|
+
|
94
|
+
print "allow indels? (default as yes) Y/N \n> "
|
95
|
+
indel = gets.chomp.rstrip
|
96
|
+
if indel =~ /n|no/i
|
97
|
+
data[:indel] = false
|
98
|
+
else
|
99
|
+
data[:indel] = true
|
100
|
+
end
|
101
|
+
else
|
102
|
+
data[:TCS_QC] = false
|
103
|
+
end
|
104
|
+
|
105
|
+
print "Need trimming to a reference genome? Y/N \n> "
|
106
|
+
trim_option = gets.chomp.rstrip
|
107
|
+
if trim_option =~ /y|yes/i
|
108
|
+
data[:trim] = true
|
109
|
+
data[:trim_ref] = get_ref
|
110
|
+
|
111
|
+
print "reference 5'end ref position \n> "
|
112
|
+
data[:trim_ref_start] = gets.chomp.rstrip.to_i
|
113
|
+
|
114
|
+
print "reference 3'end ref position \n> "
|
115
|
+
data[:trim_ref_end] = gets.chomp.rstrip.to_i
|
116
|
+
|
117
|
+
else
|
118
|
+
data[:trim] = false
|
119
|
+
end
|
120
|
+
|
121
|
+
else
|
122
|
+
data[:end_join] = false
|
123
|
+
end
|
124
|
+
|
125
|
+
param[:primer_pairs] << data
|
126
|
+
print "Do you wish to conintue? Y/N \n> "
|
127
|
+
continue_sig = gets.chomp.rstrip
|
128
|
+
break unless continue_sig =~ /y|yes/i
|
129
|
+
|
130
|
+
end
|
131
|
+
|
132
|
+
puts "\nYour JSON string is:"
|
133
|
+
puts JSON.pretty_generate(param)
|
134
|
+
|
135
|
+
print "\nDo you wish to save it as a file? Y/N \n> "
|
136
|
+
save_option = gets.chomp.rstrip
|
137
|
+
|
138
|
+
if save_option =~ /y|yes/i
|
139
|
+
print "Path to save JSON file:\n> "
|
140
|
+
path = gets.chomp.rstrip
|
141
|
+
File.open(path, 'w') {|f| f.puts JSON.pretty_generate(param)}
|
142
|
+
end
|
143
|
+
|
144
|
+
print "\nDo you wish to execute tcs pipeline with the input params now? Y/N \n> "
|
145
|
+
|
146
|
+
rsp = gets.chomp.rstrip
|
147
|
+
if rsp =~ /y/i
|
148
|
+
return param
|
149
|
+
else
|
150
|
+
abort "Params json file generated. You can execute tcs pipeline using `tcs -p [params.json]`"
|
151
|
+
end
|
152
|
+
|
153
|
+
end
|
154
|
+
|
155
|
+
private
|
156
|
+
def get_ref
|
157
|
+
puts "Choose reference genome (1-3):"
|
158
|
+
puts "1. HIV-1 HXB2".red.bold
|
159
|
+
puts "2. HIV-1 NL4-3".blue.bold
|
160
|
+
puts "3. SIV MAC239".magenta.bold
|
161
|
+
print "> "
|
162
|
+
ref_option = gets.chomp.rstrip
|
163
|
+
while ![1,2,3].include?(ref_option.to_i)
|
164
|
+
print "Entered end-join option #{ref_option.to_s.red.bold} not valid (choose 1-3), try again\n> "
|
165
|
+
ref_option = gets.chomp.rstrip.to_i
|
166
|
+
end
|
167
|
+
ref = case ref_option.to_i
|
168
|
+
when 1
|
169
|
+
:HXB2
|
170
|
+
when 2
|
171
|
+
:NL43
|
172
|
+
when 3
|
173
|
+
:MAC239
|
174
|
+
end
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end # end TcsJson
|
178
|
+
end # end main module
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.14
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date:
|
12
|
+
date: 2021-03-06 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -90,7 +90,6 @@ email:
|
|
90
90
|
executables:
|
91
91
|
- locator
|
92
92
|
- tcs
|
93
|
-
- tcs_json_generator
|
94
93
|
extensions: []
|
95
94
|
extra_rdoc_files: []
|
96
95
|
files:
|
@@ -105,7 +104,6 @@ files:
|
|
105
104
|
- Rakefile
|
106
105
|
- bin/locator
|
107
106
|
- bin/tcs
|
108
|
-
- bin/tcs_json_generator
|
109
107
|
- lib/viral_seq.rb
|
110
108
|
- lib/viral_seq/constant.rb
|
111
109
|
- lib/viral_seq/enumerable.rb
|
@@ -116,10 +114,13 @@ files:
|
|
116
114
|
- lib/viral_seq/pid.rb
|
117
115
|
- lib/viral_seq/ref_seq.rb
|
118
116
|
- lib/viral_seq/rubystats.rb
|
117
|
+
- lib/viral_seq/sdrm.rb
|
119
118
|
- lib/viral_seq/seq_hash.rb
|
120
119
|
- lib/viral_seq/seq_hash_pair.rb
|
121
120
|
- lib/viral_seq/sequence.rb
|
122
121
|
- lib/viral_seq/string.rb
|
122
|
+
- lib/viral_seq/tcs_core.rb
|
123
|
+
- lib/viral_seq/tcs_json.rb
|
123
124
|
- lib/viral_seq/version.rb
|
124
125
|
- viral_seq.gemspec
|
125
126
|
homepage: https://github.com/ViralSeq/viral_seq
|
@@ -142,7 +143,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
142
143
|
version: '0'
|
143
144
|
requirements:
|
144
145
|
- R required for some functions
|
145
|
-
rubygems_version: 3.
|
146
|
+
rubygems_version: 3.2.2
|
146
147
|
signing_key:
|
147
148
|
specification_version: 4
|
148
149
|
summary: A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
data/bin/tcs_json_generator
DELETED
@@ -1,166 +0,0 @@
|
|
1
|
-
#!/usr/bin/env ruby
|
2
|
-
|
3
|
-
# TCS pipeline JSON params generator.
|
4
|
-
|
5
|
-
require 'viral_seq'
|
6
|
-
require 'colorize'
|
7
|
-
require 'json'
|
8
|
-
|
9
|
-
def get_ref
|
10
|
-
puts "Choose reference genome (1-3):"
|
11
|
-
puts "1. HIV-1 HXB2".red.bold
|
12
|
-
puts "2. HIV-1 NL4-3".blue.bold
|
13
|
-
puts "3. SIV MAC239".magenta.bold
|
14
|
-
print "> "
|
15
|
-
ref_option = gets.chomp.rstrip
|
16
|
-
while ![1,2,3].include?(ref_option.to_i)
|
17
|
-
print "Entered end-join option #{ref_option.to_s.red.bold} not valid (choose 1-3), try again\n> "
|
18
|
-
ref_option = gets.chomp.rstrip.to_i
|
19
|
-
end
|
20
|
-
ref = case ref_option.to_i
|
21
|
-
when 1
|
22
|
-
:HXB2
|
23
|
-
when 2
|
24
|
-
:NL43
|
25
|
-
when 3
|
26
|
-
:MAC239
|
27
|
-
end
|
28
|
-
end
|
29
|
-
|
30
|
-
puts "\n" + '-'*58
|
31
|
-
puts '| JSON Parameter Generator for ' + "TCS #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |'
|
32
|
-
puts '-'*58 + "\n"
|
33
|
-
|
34
|
-
param = {}
|
35
|
-
|
36
|
-
puts 'Enter the path to the directory that contains the MiSeq pair-end R1 and R2 .fastq or .fastq.gz file'
|
37
|
-
print '> '
|
38
|
-
param[:raw_sequence_dir] = gets.chomp.rstrip
|
39
|
-
|
40
|
-
puts 'Enter the estimated platform error rate (for TCS cut-off calculation), default as ' + '0.02'.red.bold
|
41
|
-
print '> '
|
42
|
-
input_error = gets.chomp.rstrip.to_f
|
43
|
-
if input_error == 0.0
|
44
|
-
param[:platform_error_rate] = 0.02
|
45
|
-
else
|
46
|
-
param[:platform_error_rate] = input_error
|
47
|
-
end
|
48
|
-
|
49
|
-
param[:primer_pairs] = []
|
50
|
-
|
51
|
-
loop do
|
52
|
-
data = {}
|
53
|
-
puts "Enter the name for the sequenced region: "
|
54
|
-
print '> '
|
55
|
-
data[:region] = gets.chomp.rstrip
|
56
|
-
|
57
|
-
puts "Enter the #{"cDNA".red.bold} primer sequence: "
|
58
|
-
print '> '
|
59
|
-
data[:cdna] = gets.chomp.rstrip
|
60
|
-
|
61
|
-
puts "Enter the #{"forward".blue.bold} primer sequence: "
|
62
|
-
print '> '
|
63
|
-
data[:forward] = gets.chomp.rstrip
|
64
|
-
|
65
|
-
puts "Enter supermajority cut-off (0.5 - 0.9). Default: " + "0.5".blue.bold + " (simple majority)"
|
66
|
-
print '> '
|
67
|
-
mj = gets.chomp.rstrip.to_f
|
68
|
-
if (0.5..0.9).include?(mj)
|
69
|
-
data[:majority] = mj
|
70
|
-
else
|
71
|
-
data[:majority] = 0.5
|
72
|
-
end
|
73
|
-
|
74
|
-
print "Need end-join? Y/N \n> "
|
75
|
-
ej = gets.chomp.rstrip
|
76
|
-
if ej =~ /y|yes/i
|
77
|
-
data[:end_join] = true
|
78
|
-
|
79
|
-
print "End-join option? Choose from (1-4):\n
|
80
|
-
1: simple join, no overlap
|
81
|
-
2: known overlap \n
|
82
|
-
3: unknow overlap, use sample consensus to determine overlap, all sequence pairs have same overlap\n
|
83
|
-
4: unknow overlap, determine overlap by individual sequence pairs, sequence pairs can have different overlap\n
|
84
|
-
> "
|
85
|
-
ej_option = gets.chomp.rstrip
|
86
|
-
while ![1,2,3,4].include?(ej_option.to_i)
|
87
|
-
puts "Entered end-join option #{ej_option.red.bold} not valid (choose 1-4), try again"
|
88
|
-
ej_option = gets.chomp.rstrip.to_i
|
89
|
-
end
|
90
|
-
case ej_option.to_i
|
91
|
-
when 1
|
92
|
-
data[:end_join_option] = 1
|
93
|
-
data[:overlap] = 0
|
94
|
-
when 2
|
95
|
-
data[:end_join_option] = 1
|
96
|
-
print "overlap bases: \n> "
|
97
|
-
ol = gets.chomp.rstrip.to_i
|
98
|
-
data[:overlap] = ol
|
99
|
-
when 3
|
100
|
-
data[:end_join_option] = 3
|
101
|
-
when 4
|
102
|
-
data[:end_join_option] = 4
|
103
|
-
end
|
104
|
-
|
105
|
-
print "Need QC for TCS? (support for HIV-1 and SIV)? Y/N \n> "
|
106
|
-
qc = gets.chomp.rstrip
|
107
|
-
if qc =~ /y|yes/i
|
108
|
-
data[:TCS_QC] = true
|
109
|
-
|
110
|
-
data[:ref_genome] = get_ref
|
111
|
-
|
112
|
-
print "reference 5'end ref position or posiiton range, 0 if no need to match this end \n> "
|
113
|
-
data[:ref_start] = gets.chomp.rstrip.to_i
|
114
|
-
|
115
|
-
print "reference 3'end ref position or posiiton range: 0 if no need to match this end \n> "
|
116
|
-
data[:ref_end] = gets.chomp.rstrip.to_i
|
117
|
-
|
118
|
-
print "allow indels? (default as yes) Y/N \n> "
|
119
|
-
indel = gets.chomp.rstrip
|
120
|
-
if indel =~ /n|no/i
|
121
|
-
data[:indel] = false
|
122
|
-
else
|
123
|
-
data[:indel] = true
|
124
|
-
end
|
125
|
-
else
|
126
|
-
data[:TCS_QC] = false
|
127
|
-
end
|
128
|
-
|
129
|
-
print "Need trimming to a reference genome? Y/N \n> "
|
130
|
-
trim_option = gets.chomp.rstrip
|
131
|
-
if trim_option =~ /y|yes/i
|
132
|
-
data[:trim] = true
|
133
|
-
data[:trim_ref] = get_ref
|
134
|
-
|
135
|
-
print "reference 5'end ref position \n> "
|
136
|
-
data[:trim_ref_start] = gets.chomp.rstrip.to_i
|
137
|
-
|
138
|
-
print "reference 3'end ref position \n> "
|
139
|
-
data[:trim_ref_end] = gets.chomp.rstrip.to_i
|
140
|
-
|
141
|
-
else
|
142
|
-
data[:trim] = false
|
143
|
-
end
|
144
|
-
|
145
|
-
else
|
146
|
-
data[:end_join] = false
|
147
|
-
end
|
148
|
-
|
149
|
-
param[:primer_pairs] << data
|
150
|
-
print "Do you wish to conintue? Y/N \n> "
|
151
|
-
continue_sig = gets.chomp.rstrip
|
152
|
-
break unless continue_sig =~ /y|yes/i
|
153
|
-
|
154
|
-
end
|
155
|
-
|
156
|
-
puts "\nYour JSON string is:"
|
157
|
-
puts JSON.pretty_generate(param)
|
158
|
-
|
159
|
-
print "\nDo you wish to save it as a file? Y/N \n> "
|
160
|
-
save_option = gets.chomp.rstrip
|
161
|
-
|
162
|
-
if save_option =~ /y|yes/i
|
163
|
-
print "Path to save JSON file:\n> "
|
164
|
-
path = gets.chomp.rstrip
|
165
|
-
File.open(path, 'w') {|f| f.puts JSON.pretty_generate(param)}
|
166
|
-
end
|