viral_seq 1.0.4 → 1.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +6 -4
- data/README.md +95 -26
- data/bin/locator +40 -35
- data/bin/tcs +519 -0
- data/bin/tcs_json_generator +166 -0
- data/lib/viral_seq.rb +1 -1
- data/lib/viral_seq/hash.rb +1 -1
- data/lib/viral_seq/hivdr.rb +2 -0
- data/lib/viral_seq/muscle.rb +2 -2
- data/lib/viral_seq/seq_hash.rb +214 -36
- data/lib/viral_seq/seq_hash_pair.rb +10 -6
- data/lib/viral_seq/version.rb +2 -1
- data/viral_seq.gemspec +5 -1
- metadata +23 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4921d3609d6ffc7fd6fbafd7a4a86e5818d47ed855393addd68b20f28b9d214f
|
4
|
+
data.tar.gz: a9e18c01b287885f8f6238343d9633a52d4ae5ea061347e73bd4f3e86788b2a4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: dd21b57e17751f6c3e475f05b7a565d295ac7592b7c02f8d89ed49192834bee444f08ee9ebf48e41922c8caaf37a03651d5d0c9aa89d97ccc2edb9aad8224d5f
|
7
|
+
data.tar.gz: d1162424ea877d9839c179cacc330c81cd3508fcff07b64a1e753c7c706485d1dcb9a6b60aec9ce02ed33b91bbd4386ed58329c17e247ba086e7d81ed107bfd4
|
data/Gemfile.lock
CHANGED
@@ -1,15 +1,17 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
viral_seq (1.0.
|
4
|
+
viral_seq (1.0.9)
|
5
|
+
colorize (~> 0.1)
|
5
6
|
muscle_bio (~> 0.4)
|
6
7
|
|
7
8
|
GEM
|
8
9
|
remote: https://rubygems.org/
|
9
10
|
specs:
|
11
|
+
colorize (0.8.1)
|
10
12
|
diff-lcs (1.3)
|
11
13
|
muscle_bio (0.4.0)
|
12
|
-
rake (
|
14
|
+
rake (13.0.1)
|
13
15
|
rspec (3.8.0)
|
14
16
|
rspec-core (~> 3.8.0)
|
15
17
|
rspec-expectations (~> 3.8.0)
|
@@ -29,9 +31,9 @@ PLATFORMS
|
|
29
31
|
|
30
32
|
DEPENDENCIES
|
31
33
|
bundler (~> 2.0)
|
32
|
-
rake (~>
|
34
|
+
rake (~> 13.0)
|
33
35
|
rspec (~> 3.0)
|
34
36
|
viral_seq!
|
35
37
|
|
36
38
|
BUNDLED WITH
|
37
|
-
2.
|
39
|
+
2.1.4
|
data/README.md
CHANGED
@@ -10,66 +10,135 @@ Specifically for Primer-ID sequencing and HIV drug resistance analysis.
|
|
10
10
|
|
11
11
|
## Usage
|
12
12
|
|
13
|
-
Load all ViralSeq classes by requiring 'viral_seq.rb'
|
13
|
+
#### Load all ViralSeq classes by requiring 'viral_seq.rb'
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
#!/usr/bin/env ruby
|
17
|
+
require 'viral_seq'
|
18
|
+
```
|
19
|
+
|
20
|
+
#### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
|
21
|
+
|
22
|
+
$ locator -i sequence.fasta -o sequence.fasta.csv
|
23
|
+
|
24
|
+
|
25
|
+
#### Use executable `tcs` pipeline to process Primer ID MiSeq sequencing data. Parameter json file can be generated using `tcs_json_generator` or at https://tcs-dr-dept-tcs.cloudapps.unc.edu/generator.php
|
26
|
+
|
27
|
+
$ tcs params.json
|
28
|
+
|
29
|
+
#### Use executable `tcs_json_generator` to generate params .json file for the `tcs` pipeline.
|
30
|
+
|
31
|
+
$ tcs_json_generator
|
14
32
|
|
15
|
-
#!/usr/bin/env ruby
|
16
|
-
require 'viral_seq'
|
17
33
|
|
18
34
|
## Some Examples
|
19
35
|
|
20
|
-
Load nucleotide sequences from a FASTA format sequence file
|
36
|
+
#### Load nucleotide sequences from a FASTA format sequence file
|
21
37
|
|
22
|
-
|
38
|
+
```ruby
|
39
|
+
my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
|
40
|
+
```
|
23
41
|
|
24
|
-
Make an alignment (using MUSCLE)
|
42
|
+
#### Make an alignment (using MUSCLE)
|
25
43
|
|
26
|
-
|
44
|
+
```ruby
|
45
|
+
aligned_seqhash = my_seqhash.align
|
46
|
+
```
|
27
47
|
|
28
|
-
Filter nucleotide sequences with the reference coordinates (HIV Protease)
|
48
|
+
#### Filter nucleotide sequences with the reference coordinates (HIV Protease)
|
29
49
|
|
30
|
-
|
50
|
+
```ruby
|
51
|
+
qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
|
52
|
+
```
|
31
53
|
|
32
|
-
Further filter out sequences with Apobec3g/f hypermutations
|
54
|
+
#### Further filter out sequences with Apobec3g/f hypermutations
|
33
55
|
|
34
|
-
|
56
|
+
```ruby
|
57
|
+
qc_seqhash = qc_seqhash.a3g
|
58
|
+
```
|
35
59
|
|
36
|
-
Calculate nucleotide diveristy π
|
60
|
+
#### Calculate nucleotide diveristy π
|
37
61
|
|
38
|
-
|
62
|
+
```ruby
|
63
|
+
qc_seqhash.pi
|
64
|
+
```
|
39
65
|
|
40
|
-
Calculate cut-off for minority variants based on Poisson model
|
66
|
+
#### Calculate cut-off for minority variants based on Poisson model
|
41
67
|
|
42
|
-
|
68
|
+
```ruby
|
69
|
+
cut_off = qc_seqhash.pm
|
70
|
+
```
|
43
71
|
|
44
|
-
Examine for drug resistance mutations for HIV PR region
|
72
|
+
#### Examine for drug resistance mutations for HIV PR region
|
45
73
|
|
46
|
-
|
74
|
+
```ruby
|
75
|
+
qc_seqhash.sdrm_hiv_pr(cut_off)
|
76
|
+
```
|
47
77
|
|
48
78
|
## Updates
|
49
79
|
|
80
|
+
Version 1.0.9-07182020:
|
81
|
+
|
82
|
+
1. Change ViralSeq::SeqHash#stop_codon and ViralSeq::SeqHash#a3g_hypermut return value to hash object.
|
83
|
+
|
84
|
+
2. TCS pipeline updated to version 2.0.1. Add optional `export_raw: TRUE/FALSE` in json params. If `export_raw` is `TRUE`, raw sequence reads (have to pass quality filters) will be exported, along with TCS reads.
|
85
|
+
|
86
|
+
Version 1.0.8-02282020:
|
87
|
+
|
88
|
+
1. TCS pipeline (version 2.0.0) added as executable.
|
89
|
+
tcs - main TCS pipeline script.
|
90
|
+
tcs_json_generator - step-by-step script to generate json file for tcs pipeline.
|
91
|
+
|
92
|
+
2. Methods added:
|
93
|
+
ViralSeq::SeqHash#trim
|
94
|
+
|
95
|
+
3. Bug fix for several methods.
|
96
|
+
|
97
|
+
Version 1.0.7-01282020:
|
98
|
+
|
99
|
+
1. Several methods added, including
|
100
|
+
ViralSeq::SeqHash#error_table
|
101
|
+
ViralSeq::SeqHash#random_select
|
102
|
+
2. Improved performance for several functions.
|
103
|
+
|
104
|
+
Version 1.0.6-07232019:
|
105
|
+
|
106
|
+
1. Several methods added to ViralSeq::SeqHash, including
|
107
|
+
ViralSeq::SeqHash#size
|
108
|
+
ViralSeq::SeqHash#+
|
109
|
+
ViralSeq::SeqHash#write_nt_fa
|
110
|
+
ViralSeq::SeqHash#mutation
|
111
|
+
2. Update documentations and rspec samples.
|
112
|
+
|
113
|
+
Version 1.0.5-07112019:
|
114
|
+
|
115
|
+
1. Update ViralSeq::SeqHash#sequence_locator.
|
116
|
+
Program will try to determine the direction (`+` or `-` of the query sequence)
|
117
|
+
2. update executable `locator` to have a column of `direction` in output .csv file
|
118
|
+
|
50
119
|
Version 1.0.4-07102019:
|
51
120
|
|
52
|
-
|
53
|
-
|
121
|
+
1. Use home directory (Dir.home) instead of the directory of the script file for temp MUSCLE file.
|
122
|
+
2. Fix bugs in bin `locator`
|
54
123
|
|
55
124
|
Version 1.0.3-07102019:
|
56
125
|
|
57
|
-
|
126
|
+
1. Bug fix.
|
58
127
|
|
59
128
|
Version 1.0.2-07102019:
|
60
129
|
|
61
|
-
|
130
|
+
1. Fixed a gem loading issue.
|
62
131
|
|
63
132
|
Version 1.0.1-07102019:
|
64
133
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
134
|
+
1. Add keyword argument :model to ViralSeq::SeqHashPair#join2.
|
135
|
+
2. Add method ViralSeq::SeqHash#sequence_locator (also: #loc), a function to locate sequences on HIV/SIV reference genomes, as HIV Sequence Locator from LANL.
|
136
|
+
3. Add executable 'locator'. An HIV/SIV sequence locator tool similar to LANL Sequence Locator.
|
137
|
+
4. update documentations
|
69
138
|
|
70
139
|
Version 1.0.0-07092019:
|
71
140
|
|
72
|
-
|
141
|
+
1. Rewrote the whole ViralSeq gem, grouping methods into modules and classes under main Module::ViralSeq
|
73
142
|
|
74
143
|
## Development
|
75
144
|
|
data/bin/locator
CHANGED
@@ -1,46 +1,44 @@
|
|
1
1
|
#!/usr/bin/env ruby
|
2
2
|
|
3
|
+
# Copyright (c) 2020 Shuntai Zhou (shuntai.zhou@gmail.com)
|
4
|
+
#
|
5
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
# of this software and associated documentation files (the "Software"), to deal
|
7
|
+
# in the Software without restriction, including without limitation the rights
|
8
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
# copies of the Software, and to permit persons to whom the Software is
|
10
|
+
# furnished to do so, subject to the following conditions:
|
11
|
+
#
|
12
|
+
# The above copyright notice and this permission notice shall be included in
|
13
|
+
# all copies or substantial portions of the Software.
|
14
|
+
#
|
15
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
# THE SOFTWARE.
|
22
|
+
|
3
23
|
require 'viral_seq'
|
4
24
|
require 'csv'
|
5
25
|
require 'optparse'
|
6
|
-
|
7
|
-
module ViralSeq
|
8
|
-
class SeqHash
|
9
|
-
|
10
|
-
def sequence_locator(ref_option = :HXB2)
|
11
|
-
out_array = []
|
12
|
-
dna_seq = self.dna_hash
|
13
|
-
title = self.title
|
14
|
-
|
15
|
-
uniq_dna = dna_seq.uniq_hash
|
16
|
-
|
17
|
-
uniq_dna.each do |seq,names|
|
18
|
-
s = ViralSeq::Sequence.new('',seq)
|
19
|
-
loc = s.locator(ref_option)
|
20
|
-
names.each do |name|
|
21
|
-
out_array << ([title, name, ref_option.to_s] + loc)
|
22
|
-
end
|
23
|
-
end
|
24
|
-
return out_array
|
25
|
-
end # end of locator
|
26
|
-
alias_method :loc, :sequence_locator
|
27
|
-
end
|
28
|
-
end
|
26
|
+
require 'colorize'
|
29
27
|
|
30
28
|
def myparser
|
31
29
|
options = {}
|
32
30
|
OptionParser.new do |opts|
|
33
|
-
opts.banner = "Usage: locator -i [nt_sequence_fasta_file] -o [locator_info_csv_file] -r [reference_genome_option]"
|
31
|
+
opts.banner = "#{"Usage:".red.bold} locator #{"-i".blue.bold} [nt_sequence_fasta_file] #{"-o".blue.bold} [locator_info_csv_file] #{"-r".blue.bold} [reference_genome_option]"
|
34
32
|
|
35
|
-
opts.on('-i', '--infile FASTA_FILE',
|
33
|
+
opts.on('-i', '--infile FASTA_FILE', "#{"nt sequence".blue.bold} file in FASTA format") do |i|
|
36
34
|
options[:infile] = i
|
37
35
|
end
|
38
36
|
|
39
|
-
opts.on('-o', '--outfile CSV_FILE',
|
37
|
+
opts.on('-o', '--outfile CSV_FILE', "output .csv file for locator info, default as \#\{infile\}.csv") do |o|
|
40
38
|
options[:outfile] = o
|
41
39
|
end
|
42
40
|
|
43
|
-
opts.on('-r', '--ref_option OPTION',
|
41
|
+
opts.on('-r', '--ref_option OPTION', "reference genome option, choose from #{"`HXB2` (default), `NL43`, `MAC239`".blue.bold}") do |o|
|
44
42
|
options[:ref_option] = o.to_sym
|
45
43
|
end
|
46
44
|
|
@@ -48,13 +46,19 @@ def myparser
|
|
48
46
|
puts opts
|
49
47
|
exit
|
50
48
|
end
|
49
|
+
|
50
|
+
opts.on("-v", "--version", "Version number of RubyGem::ViralSeq") do
|
51
|
+
puts opts
|
52
|
+
exit
|
53
|
+
end
|
54
|
+
|
51
55
|
end.parse!
|
52
56
|
return options
|
53
57
|
end
|
54
58
|
|
55
|
-
puts "\
|
56
|
-
puts "See details at https://github.com/ViralSeq/viral_seq\n"
|
57
|
-
puts "Resembling Sequence Locator from LANL (https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html)\n
|
59
|
+
puts "\n" + "Sequence Locator (RubyGem::ViralSeq Version #{ViralSeq::VERSION})".red.bold + " by " + "Shuntai Zhou".blue.bold
|
60
|
+
puts "See details at " + "https://github.com/ViralSeq/viral_seq\n".blue
|
61
|
+
puts "Resembling" + " Sequence Locator ".magenta.bold + "from LANL" + " (https://www.hiv.lanl.gov/content/sequence/LOCATE/locate.html)\n".blue
|
58
62
|
|
59
63
|
ARGV << '-h' if ARGV.size == 0
|
60
64
|
|
@@ -64,35 +68,36 @@ begin
|
|
64
68
|
if options[:infile]
|
65
69
|
seq_file = options[:infile]
|
66
70
|
else
|
67
|
-
raise StandardError.new("Input file sequence file not found")
|
71
|
+
raise StandardError.new("Input file sequence file not found".red.bold)
|
68
72
|
end
|
69
73
|
|
70
74
|
if options[:outfile]
|
71
75
|
csv_file = options[:outfile]
|
72
76
|
else
|
73
|
-
|
77
|
+
csv_file = seq_file + ".csv"
|
74
78
|
end
|
75
79
|
|
76
80
|
unless File.exist?(seq_file)
|
77
|
-
raise StandardError.new("Input file sequence file not found")
|
81
|
+
raise StandardError.new("Input file sequence file not found".red.bold)
|
78
82
|
end
|
79
83
|
|
80
84
|
seqs = ViralSeq::SeqHash.fa(seq_file)
|
81
85
|
opt = options[:ref_option] ? options[:ref_option] : :HXB2
|
82
86
|
|
83
87
|
unless [:HXB2, :NL43, :MAC239].include? opt
|
84
|
-
puts "Reference option
|
88
|
+
puts "Reference option `#{opt}` not recognized, using `HXB2` as the reference genome.".red.bold
|
85
89
|
opt = :HXB2
|
86
90
|
end
|
87
91
|
|
88
92
|
locs = seqs.loc(opt)
|
89
|
-
head = ["title", "sequence", "ref", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
|
93
|
+
head = ["title", "sequence", "ref", "direction", "start", "end", "similarity", "indel", "aligned_input", "aligned_ref"]
|
90
94
|
locs.unshift(head)
|
91
95
|
data = CSV.generate do |csv|
|
92
96
|
locs.each {|loc| csv << loc}
|
93
97
|
end
|
94
98
|
|
95
99
|
File.write(csv_file, data)
|
100
|
+
puts "Output file found at #{csv_file.green.bold}"
|
96
101
|
rescue StandardError => e
|
97
102
|
puts e.message
|
98
103
|
puts "\n"
|
data/bin/tcs
ADDED
@@ -0,0 +1,519 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
# TCS pipeline for Primer ID sequencing data analysis.
|
4
|
+
|
5
|
+
# Copyright (c) 2020 Shuntai Zhou (shuntai.zhou@gmail.com)
|
6
|
+
#
|
7
|
+
# Permission is hereby granted, free of charge, to any person obtaining a copy
|
8
|
+
# of this software and associated documentation files (the "Software"), to deal
|
9
|
+
# in the Software without restriction, including without limitation the rights
|
10
|
+
# to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
11
|
+
# copies of the Software, and to permit persons to whom the Software is
|
12
|
+
# furnished to do so, subject to the following conditions:
|
13
|
+
#
|
14
|
+
# The above copyright notice and this permission notice shall be included in
|
15
|
+
# all copies or substantial portions of the Software.
|
16
|
+
#
|
17
|
+
# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
18
|
+
# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
19
|
+
# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
20
|
+
# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
21
|
+
# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
22
|
+
# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
23
|
+
# THE SOFTWARE.
|
24
|
+
|
25
|
+
# Use JSON file as the run param
|
26
|
+
# run tcs_json_generator.rb to generate param json file.
|
27
|
+
|
28
|
+
require 'viral_seq'
|
29
|
+
require 'json'
|
30
|
+
require 'colorize'
|
31
|
+
|
32
|
+
|
33
|
+
# calculate consensus cutoff
|
34
|
+
|
35
|
+
def calculate_cut_off(m, error_rate = 0.02)
|
36
|
+
n = 0
|
37
|
+
case error_rate
|
38
|
+
when 0.005...0.015
|
39
|
+
if m <= 10
|
40
|
+
n = 2
|
41
|
+
else
|
42
|
+
n = 1.09*10**-26*m**6 + 7.82*10**-22*m**5 - 1.93*10**-16*m**4 + 1.01*10**-11*m**3 - 2.31*10**-7*m**2 + 0.00645*m + 2.872
|
43
|
+
end
|
44
|
+
|
45
|
+
when 0...0.005
|
46
|
+
if m <= 10
|
47
|
+
n = 2
|
48
|
+
else
|
49
|
+
n = -9.59*10**-27*m**6 + 3.27*10**-21*m**5 - 3.05*10**-16*m**4 + 1.2*10**-11*m**3 - 2.19*10**-7*m**2 + 0.004044*m + 2.273
|
50
|
+
end
|
51
|
+
|
52
|
+
else
|
53
|
+
if m <= 10
|
54
|
+
n = 2
|
55
|
+
elsif m <= 8500
|
56
|
+
n = -1.24*10**-21*m**6 + 3.53*10**-17*m**5 - 3.90*10**-13*m**4 + 2.12*10**-9*m**3 - 6.06*10**-6*m**2 + 1.80*10**-2*m + 3.15
|
57
|
+
else
|
58
|
+
n = 0.0079 * m + 9.4869
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
n = n.round
|
63
|
+
n = 2 if n < 3
|
64
|
+
return n
|
65
|
+
end
|
66
|
+
|
67
|
+
puts "\n" + '-'*50
|
68
|
+
puts '| The TCS Pipeline ' + "Version #{ViralSeq::TCS_VERSION}".red.bold + " by " + "Shuntai Zhou".blue.bold + ' |'
|
69
|
+
puts '-'*50 + "\n"
|
70
|
+
|
71
|
+
unless ARGV[0]
|
72
|
+
raise "No JSON param file found. Script terminated."
|
73
|
+
end
|
74
|
+
|
75
|
+
params = JSON.parse(File.read(ARGV[0]), symbolize_names: true)
|
76
|
+
|
77
|
+
indir = params[:raw_sequence_dir]
|
78
|
+
|
79
|
+
unless File.exist?(indir)
|
80
|
+
raise "No input sequence directory found. Script terminated."
|
81
|
+
end
|
82
|
+
|
83
|
+
libname = File.basename(indir)
|
84
|
+
|
85
|
+
# obtain R1 and R2 file path
|
86
|
+
files = []
|
87
|
+
Dir.chdir(indir) do
|
88
|
+
files = Dir.glob("*")
|
89
|
+
end
|
90
|
+
|
91
|
+
if files.empty?
|
92
|
+
raise "Input dir does not contain files. Script terminated."
|
93
|
+
end
|
94
|
+
|
95
|
+
r1_f = ""
|
96
|
+
r2_f = ""
|
97
|
+
|
98
|
+
# unzip .fasta.gz
|
99
|
+
def unzip_r(indir, f)
|
100
|
+
r_file = indir + "/" + f
|
101
|
+
if f =~ /.gz/
|
102
|
+
`gzip -d #{r_file}`
|
103
|
+
new_f = f.sub ".gz", ""
|
104
|
+
r_file = File.join(indir, new_f)
|
105
|
+
end
|
106
|
+
return r_file
|
107
|
+
end
|
108
|
+
runtime_log_file = File.join(indir,"runtime.log")
|
109
|
+
log = File.open(runtime_log_file, "w")
|
110
|
+
log.puts "TSC pipeline Version " + ViralSeq::TCS_VERSION.to_s
|
111
|
+
log.puts "viral_seq Version " + ViralSeq::VERSION.to_s
|
112
|
+
log.puts Time.now.to_s + "\t" + "Start TCS pipeline..."
|
113
|
+
|
114
|
+
|
115
|
+
files.each do |f|
|
116
|
+
t = f.split("_")
|
117
|
+
if t.size == 1
|
118
|
+
tag = f
|
119
|
+
else
|
120
|
+
tag = f.split("_")[1..-1].join("_")
|
121
|
+
end
|
122
|
+
|
123
|
+
if tag =~ /r1/i
|
124
|
+
r1_f = unzip_r(indir, f)
|
125
|
+
elsif tag =~ /r2/i
|
126
|
+
r2_f = unzip_r(indir, f)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
|
130
|
+
|
131
|
+
unless File.exist?(r1_f)
|
132
|
+
log.puts "R1 file not found. Script terminated."
|
133
|
+
raise "R1 file not found. Script terminated."
|
134
|
+
end
|
135
|
+
|
136
|
+
unless File.exist?(r2_f)
|
137
|
+
log.puts "R2 file not found. Script terminated."
|
138
|
+
raise "R2 file not found. Script terminated."
|
139
|
+
end
|
140
|
+
|
141
|
+
r1_fastq_sh = ViralSeq::SeqHash.fq(r1_f)
|
142
|
+
r2_fastq_sh = ViralSeq::SeqHash.fq(r2_f)
|
143
|
+
|
144
|
+
raw_sequence_number = r1_fastq_sh.size
|
145
|
+
log.puts Time.now.to_s + "\tRaw sequence number: #{raw_sequence_number.to_s}"
|
146
|
+
|
147
|
+
if params[:platform_error_rate]
|
148
|
+
error_rate = params[:platform_error_rate]
|
149
|
+
else
|
150
|
+
error_rate = 0.02
|
151
|
+
end
|
152
|
+
|
153
|
+
primers = params[:primer_pairs]
|
154
|
+
if primers.empty?
|
155
|
+
log.puts "No primer information. Script terminated."
|
156
|
+
raise "No primer information. Script terminated."
|
157
|
+
end
|
158
|
+
|
159
|
+
primers.each do |primer|
|
160
|
+
summary_json = {}
|
161
|
+
summary_json[:tcs_version] = ViralSeq::TCS_VERSION
|
162
|
+
summary_json[:viralseq_version] = ViralSeq::VERSION
|
163
|
+
summary_json[:runtime] = Time.now.to_s
|
164
|
+
|
165
|
+
primer[:region] ? region = primer[:region] : region = "region"
|
166
|
+
summary_json[:primer_set_name] = region
|
167
|
+
|
168
|
+
cdna_primer = primer[:cdna]
|
169
|
+
forward_primer = primer[:forward]
|
170
|
+
|
171
|
+
export_raw = primer[:export_raw]
|
172
|
+
|
173
|
+
unless cdna_primer
|
174
|
+
log.puts Time.now.to_s + "\t" + region + " does not have cDNA primer sequence. #{region} skipped."
|
175
|
+
end
|
176
|
+
unless forward_primer
|
177
|
+
log.puts Time.now.to_s + "\t" + region + " does not have forward primer sequence. #{region} skipped."
|
178
|
+
end
|
179
|
+
summary_json[:cdan_primer] = cdna_primer
|
180
|
+
summary_json[:forward_primer] = forward_primer
|
181
|
+
|
182
|
+
primer[:majority] ? majority_cut_off = primer[:majority] : majority_cut_off = 0.5
|
183
|
+
summary_json[:majority_cut_off] = majority_cut_off
|
184
|
+
|
185
|
+
summary_json[:total_raw_sequence] = raw_sequence_number
|
186
|
+
|
187
|
+
log.puts Time.now.to_s + "\t" + "Porcessing #{region}..."
|
188
|
+
|
189
|
+
r1_raw = r1_fastq_sh.dna_hash
|
190
|
+
r2_raw = r2_fastq_sh.dna_hash
|
191
|
+
|
192
|
+
log.puts Time.now.to_s + "\t" + "filtering R1..."
|
193
|
+
# obtain biological forward primer sequence
|
194
|
+
if forward_primer.match(/(N+)(\w+)$/)
|
195
|
+
forward_n = $1.size
|
196
|
+
forward_bio_primer = $2
|
197
|
+
else
|
198
|
+
forward_n = 0
|
199
|
+
forward_bio_primer = forward_primer
|
200
|
+
end
|
201
|
+
forward_bio_primer_size = forward_bio_primer.size
|
202
|
+
forward_starting_number = forward_n + forward_bio_primer_size
|
203
|
+
|
204
|
+
# filter R1 sequences with forward primers.
|
205
|
+
forward_primer_ref = forward_bio_primer.nt_parser
|
206
|
+
r1_passed_seq = {}
|
207
|
+
r1_raw.each do |name,seq|
|
208
|
+
next if seq[1..-2] =~ /N/ # sequences with ambiguities except the 1st and last position removed
|
209
|
+
next if seq =~ /A{11}/ # a string of poly-A indicates adaptor sequence
|
210
|
+
next if seq =~ /T{11}/ # a string of poly-T indicates adaptor sequence
|
211
|
+
|
212
|
+
primer_region_seq = seq[forward_n, forward_bio_primer_size]
|
213
|
+
if primer_region_seq =~ forward_primer_ref
|
214
|
+
r1_passed_seq[name.split("\s")[0]] = seq
|
215
|
+
end
|
216
|
+
end
|
217
|
+
log.puts Time.now.to_s + "\t" + "R1 filtered: #{r1_passed_seq.size.to_s}"
|
218
|
+
|
219
|
+
summary_json[:r1_filtered_raw] = r1_passed_seq.size
|
220
|
+
|
221
|
+
log.puts Time.now.to_s + "\t" + "filtering R2..."
|
222
|
+
# obtain biological reverse primer sequence
|
223
|
+
cdna_primer.match(/(N+)(\w+)$/)
|
224
|
+
pid_length = $1.size
|
225
|
+
cdna_bio_primer = $2
|
226
|
+
cdna_bio_primer_size = cdna_bio_primer.size
|
227
|
+
reverse_starting_number = pid_length + cdna_bio_primer_size
|
228
|
+
|
229
|
+
# filter R2 sequences with cDNA primers.
|
230
|
+
cdna_primer_ref = cdna_bio_primer.nt_parser
|
231
|
+
r2_passed_seq = {}
|
232
|
+
r2_raw.each do |name, seq|
|
233
|
+
next if seq[1..-2] =~ /N/ # sequences with ambiguities except the 1st and last position removed
|
234
|
+
next if seq =~ /A{11}/ # a string of poly-A indicates adaptor sequence
|
235
|
+
next if seq =~ /T{11}/ # a string of poly-T indicates adaptor sequence
|
236
|
+
|
237
|
+
primer_region_seq = seq[pid_length, cdna_bio_primer_size]
|
238
|
+
if primer_region_seq =~ cdna_primer_ref
|
239
|
+
r2_passed_seq[name.split("\s")[0]] = seq
|
240
|
+
end
|
241
|
+
end
|
242
|
+
log.puts Time.now.to_s + "\t" + "R2 filtered: #{r2_passed_seq.size.to_s}"
|
243
|
+
summary_json[:r2_filtered_raw] = r2_passed_seq.size
|
244
|
+
|
245
|
+
# pair-end
|
246
|
+
log.puts Time.now.to_s + "\t" + "Pairing R1 and R2 seqs..."
|
247
|
+
id = {} # hash for :sequence_tag => primer_id
|
248
|
+
bio_r2 = {} # hash for :sequence_tag => primer_trimmed_r2_sequence
|
249
|
+
bio_r1 = {} # hash for :sequence_tag => primer_trimmed_r1_sequence
|
250
|
+
common_keys = r1_passed_seq.keys & r2_passed_seq.keys
|
251
|
+
paired_seq_number = common_keys.size
|
252
|
+
log.puts Time.now.to_s + "\t" + "Paired raw sequences are : #{paired_seq_number.to_s}"
|
253
|
+
summary_json[:paired_raw_sequence] = paired_seq_number
|
254
|
+
|
255
|
+
common_keys.each do |seqtag|
|
256
|
+
r1_seq = r1_passed_seq[seqtag]
|
257
|
+
r2_seq = r2_passed_seq[seqtag]
|
258
|
+
pid = r2_seq[0, pid_length]
|
259
|
+
id[seqtag] = pid
|
260
|
+
bio_r2[seqtag] = r2_seq[reverse_starting_number..-2]
|
261
|
+
bio_r1[seqtag] = r1_seq[forward_starting_number..-2]
|
262
|
+
end
|
263
|
+
|
264
|
+
# TCS cut-off
|
265
|
+
log.puts Time.now.to_s + "\t" + "Calculate consensus cutoff...."
|
266
|
+
|
267
|
+
primer_id_list = id.values
|
268
|
+
primer_id_count = primer_id_list.count_freq
|
269
|
+
primer_id_dis = primer_id_count.values.count_freq
|
270
|
+
|
271
|
+
# calculate distinct_to_raw
|
272
|
+
distinct_to_raw = (primer_id_count.size/primer_id_list.size.to_f).round(3)
|
273
|
+
summary_json[:distinct_to_raw] = distinct_to_raw
|
274
|
+
|
275
|
+
if primer_id_dis.keys.size < 5
|
276
|
+
log.puts Time.now.to_s + "\t" + "Less than 5 Primer IDs detected. Region #{region} aborted."
|
277
|
+
next
|
278
|
+
end
|
279
|
+
|
280
|
+
max_id = primer_id_dis.keys.sort[-5..-1].mean
|
281
|
+
consensus_cutoff = calculate_cut_off(max_id,error_rate)
|
282
|
+
log.puts Time.now.to_s + "\t" + "Consensus cut-off is #{consensus_cutoff.to_s}"
|
283
|
+
summary_json[:consensus_cutoff] = consensus_cutoff
|
284
|
+
summary_json[:length_of_pid] = pid_length
|
285
|
+
|
286
|
+
log.puts Time.now.to_s + "\t" + "Creating consensus..."
|
287
|
+
|
288
|
+
# Primer ID over the cut-off
|
289
|
+
primer_id_count_over_n = []
|
290
|
+
primer_id_count.each do |primer_id,count|
|
291
|
+
primer_id_count_over_n << primer_id if count > consensus_cutoff
|
292
|
+
end
|
293
|
+
pid_to_process = primer_id_count_over_n.size
|
294
|
+
log.puts Time.now.to_s + "\t" + "Number of consensus to process: #{pid_to_process.to_s}"
|
295
|
+
summary_json[:total_tcs_with_ambiguities] = pid_to_process
|
296
|
+
|
297
|
+
# setup output path
|
298
|
+
out_dir_set = File.join(indir, region)
|
299
|
+
Dir.mkdir(out_dir_set) unless File.directory?(out_dir_set)
|
300
|
+
out_dir_consensus = File.join(out_dir_set, "consensus")
|
301
|
+
Dir.mkdir(out_dir_consensus) unless File.directory?(out_dir_consensus)
|
302
|
+
|
303
|
+
outfile_r1 = File.join(out_dir_consensus, 'r1.fasta')
|
304
|
+
outfile_r2 = File.join(out_dir_consensus, 'r2.fasta')
|
305
|
+
outfile_log = File.join(out_dir_set, 'log.json')
|
306
|
+
|
307
|
+
# if export_raw is true, create dir for raw sequence
|
308
|
+
if export_raw
|
309
|
+
out_dir_raw = File.join(out_dir_set, "raw")
|
310
|
+
Dir.mkdir(out_dir_raw) unless File.directory?(out_dir_raw)
|
311
|
+
outfile_raw_r1 = File.join(out_dir_raw, 'r1.raw.fasta')
|
312
|
+
outfile_raw_r2 = File.join(out_dir_raw, 'r2.raw.fasta')
|
313
|
+
raw_r1_f = File.open(outfile_raw_r1, 'w')
|
314
|
+
raw_r2_f = File.open(outfile_raw_r2, 'w')
|
315
|
+
|
316
|
+
bio_r1.keys.each do |k|
|
317
|
+
raw_r1_f.puts k + "_r1"
|
318
|
+
raw_r2_f.puts k + "_r2"
|
319
|
+
raw_r1_f.puts bio_r1[k]
|
320
|
+
raw_r2_f.puts bio_r2[k].rc
|
321
|
+
end
|
322
|
+
|
323
|
+
raw_r1_f.close
|
324
|
+
raw_r2_f.close
|
325
|
+
end
|
326
|
+
|
327
|
+
# create TCS
|
328
|
+
|
329
|
+
pid_seqtag_hash = {}
|
330
|
+
id.each do |name, pid|
|
331
|
+
if pid_seqtag_hash[pid]
|
332
|
+
pid_seqtag_hash[pid] << name
|
333
|
+
else
|
334
|
+
pid_seqtag_hash[pid] = []
|
335
|
+
pid_seqtag_hash[pid] << name
|
336
|
+
end
|
337
|
+
end
|
338
|
+
|
339
|
+
consensus = {}
|
340
|
+
r1_temp = {}
|
341
|
+
r2_temp = {}
|
342
|
+
m = 0
|
343
|
+
primer_id_count_over_n.each do |primer_id|
|
344
|
+
m += 1
|
345
|
+
log.puts Time.now.to_s + "\t" + "Now processing number #{m}" if m%100 == 0
|
346
|
+
seq_with_same_primer_id = pid_seqtag_hash[primer_id]
|
347
|
+
r1_sub_seq = []
|
348
|
+
r2_sub_seq = []
|
349
|
+
seq_with_same_primer_id.each do |seq_name|
|
350
|
+
r1_sub_seq << bio_r1[seq_name]
|
351
|
+
r2_sub_seq << bio_r2[seq_name]
|
352
|
+
end
|
353
|
+
|
354
|
+
#consensus name including the Primer ID and number of raw sequences of that Primer ID, library name and setname.
|
355
|
+
consensus_name = ">" + primer_id + "_" + seq_with_same_primer_id.size.to_s + "_" + libname + "_" + region
|
356
|
+
r1_consensus = ViralSeq::SeqHash.array(r1_sub_seq).consensus(majority_cut_off)
|
357
|
+
r2_consensus = ViralSeq::SeqHash.array(r2_sub_seq).consensus(majority_cut_off)
|
358
|
+
next if r1_consensus =~ /[^ATCG]/
|
359
|
+
next if r2_consensus =~ /[^ATCG]/
|
360
|
+
|
361
|
+
# reverse complement sequence of the R2 region
|
362
|
+
r2_consensus = r2_consensus.rc
|
363
|
+
consensus[consensus_name] = [r1_consensus, r2_consensus]
|
364
|
+
r1_temp[consensus_name] = r1_consensus
|
365
|
+
r2_temp[consensus_name] = r2_consensus
|
366
|
+
end
|
367
|
+
r1_temp_sh = ViralSeq::SeqHash.new(r1_temp)
|
368
|
+
r2_temp_sh = ViralSeq::SeqHash.new(r2_temp)
|
369
|
+
|
370
|
+
# filter consensus sequences for residual offspring PIDs
|
371
|
+
consensus_filtered = {}
|
372
|
+
consensus_number_temp = consensus.size
|
373
|
+
max_pid_comb = 4**pid_length
|
374
|
+
if consensus_number_temp < 0.003*max_pid_comb
|
375
|
+
log.puts Time.now.to_s + "\t" + "Applying PID post TCS filter..."
|
376
|
+
r1_consensus_filtered = r1_temp_sh.filter_similar_pid.dna_hash
|
377
|
+
r2_consensus_filtered = r2_temp_sh.filter_similar_pid.dna_hash
|
378
|
+
common_pid = r1_consensus_filtered.keys & r2_consensus_filtered.keys
|
379
|
+
common_pid.each do |pid|
|
380
|
+
consensus_filtered[pid] = [r1_consensus_filtered[pid], r2_consensus_filtered[pid]]
|
381
|
+
end
|
382
|
+
else
|
383
|
+
consensus_filtered = consensus
|
384
|
+
end
|
385
|
+
n_con = consensus_filtered.size
|
386
|
+
log.puts Time.now.to_s + "\t" + "Number of consensus sequences: " + n_con.to_s
|
387
|
+
summary_json[:total_tcs] = n_con
|
388
|
+
summary_json[:resampling_param] = (n_con/pid_to_process.to_f).round(3)
|
389
|
+
|
390
|
+
log.puts Time.now.to_s + "\t" + "Writing R1 and R2 files..."
|
391
|
+
# r1_file output
|
392
|
+
f1 = File.open(outfile_r1, 'w')
|
393
|
+
f2 = File.open(outfile_r2, 'w')
|
394
|
+
primer_id_in_use = {}
|
395
|
+
r1_seq_length = consensus_filtered.values[0][0].size
|
396
|
+
r2_seq_length = consensus_filtered.values[0][1].size
|
397
|
+
log.puts Time.now.to_s + "\t" + "R1 sequence #{r1_seq_length} bp"
|
398
|
+
log.puts Time.now.to_s + "\t" + "R1 sequence #{r2_seq_length} bp"
|
399
|
+
consensus_filtered.each do |seq_name,seq|
|
400
|
+
f1.print seq_name + "_r1\n" + seq[0] + "\n"
|
401
|
+
f2.print seq_name + "_r2\n" + seq[1] + "\n"
|
402
|
+
primer_id_in_use[seq_name.split("_")[0][1..-1]] = seq_name.split("_")[1].to_i
|
403
|
+
end
|
404
|
+
f1.close
|
405
|
+
f2.close
|
406
|
+
|
407
|
+
out_pid_json = File.join(out_dir_set, 'primer_id.json')
|
408
|
+
pid_json = {}
|
409
|
+
pid_json[:primer_id_in_use] = Hash[*(primer_id_in_use.sort_by {|k, v| [-v,k]}.flatten)]
|
410
|
+
pid_json[:primer_id_distribution] = Hash[*(primer_id_dis.sort_by{|k,v| k}.flatten)]
|
411
|
+
pid_json[:primer_id_frequency] = Hash[*(primer_id_count.sort_by {|k, v| [-v,k]}.flatten)]
|
412
|
+
File.open(out_pid_json, 'w') do |f|
|
413
|
+
f.puts JSON.pretty_generate(pid_json)
|
414
|
+
end
|
415
|
+
|
416
|
+
def end_join(dir, option, overlap)
|
417
|
+
shp = ViralSeq::SeqHashPair.fa(dir)
|
418
|
+
case option
|
419
|
+
when 1
|
420
|
+
joined_sh = shp.join1()
|
421
|
+
when 3
|
422
|
+
joined_sh = shp.join2
|
423
|
+
when 4
|
424
|
+
joined_sh = shp.join2(model: :indiv)
|
425
|
+
end
|
426
|
+
return joined_sh
|
427
|
+
end
|
428
|
+
|
429
|
+
if primer[:end_join]
|
430
|
+
log.puts Time.now.to_s + "\t" + "Start end-pairing for TCS..."
|
431
|
+
shp = ViralSeq::SeqHashPair.fa(out_dir_consensus)
|
432
|
+
joined_sh = end_join(out_dir_consensus, primer[:end_join_option], primer[:overlap])
|
433
|
+
log.puts Time.now.to_s + "\t" + "Paired TCS number: " + joined_sh.size.to_s
|
434
|
+
summary_json[:combined_tcs] = joined_sh.size
|
435
|
+
|
436
|
+
if export_raw
|
437
|
+
joined_sh_raw = end_join(out_dir_raw, primer[:end_join_option], primer[:overlap])
|
438
|
+
end
|
439
|
+
|
440
|
+
else
|
441
|
+
File.open(outfile_log, "w") do |f|
|
442
|
+
f.puts JSON.pretty_generate(summary_json)
|
443
|
+
end
|
444
|
+
next
|
445
|
+
end
|
446
|
+
|
447
|
+
if primer[:TCS_QC]
|
448
|
+
ref_start = primer[:ref_start]
|
449
|
+
ref_end = primer[:ref_end]
|
450
|
+
ref_genome = primer[:ref_genome].to_sym
|
451
|
+
indel = primer[:indel]
|
452
|
+
if ref_start == 0
|
453
|
+
ref_start = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
|
454
|
+
end
|
455
|
+
if ref_end == 0
|
456
|
+
ref_end = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
|
457
|
+
end
|
458
|
+
if primer[:end_join_option] == 1 and primer[:overlap] == 0
|
459
|
+
r1_sh = ViralSeq::SeqHash.fa(outfile_r1)
|
460
|
+
r2_sh = ViralSeq::SeqHash.fa(outfile_r2)
|
461
|
+
r1_sh = r1_sh.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
|
462
|
+
r2_sh = r2_sh.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
|
463
|
+
new_r1_seq = r1_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
464
|
+
new_r2_seq = r2_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
465
|
+
joined_seq = {}
|
466
|
+
new_r1_seq.each do |seq_name, seq|
|
467
|
+
next unless seq
|
468
|
+
next unless new_r2_seq[seq_name]
|
469
|
+
joined_seq[seq_name] = seq + new_r2_seq[seq_name]
|
470
|
+
end
|
471
|
+
joined_sh = ViralSeq::SeqHash.new(joined_seq)
|
472
|
+
|
473
|
+
if export_raw
|
474
|
+
r1_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r1)
|
475
|
+
r2_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r2)
|
476
|
+
r1_sh_raw = r1_sh_raw.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
|
477
|
+
r2_sh_raw = r2_sh_raw.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
|
478
|
+
new_r1_seq_raw = r1_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
479
|
+
new_r2_seq_raw = r2_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
|
480
|
+
joined_seq_raw = {}
|
481
|
+
new_r1_seq_raw.each do |seq_name, seq|
|
482
|
+
next unless seq
|
483
|
+
next unless new_r2_seq_raw[seq_name]
|
484
|
+
joined_seq_raw[seq_name] = seq + new_r2_seq_raw[seq_name]
|
485
|
+
end
|
486
|
+
joined_sh_raw = ViralSeq::SeqHash.new(joined_seq_raw)
|
487
|
+
end
|
488
|
+
else
|
489
|
+
joined_sh = joined_sh.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
|
490
|
+
|
491
|
+
if export_raw
|
492
|
+
joined_sh_raw = joined_sh.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
|
493
|
+
end
|
494
|
+
end
|
495
|
+
log.puts Time.now.to_s + "\t" + "Paired TCS number after QC based on reference genome: " + joined_sh.size.to_s
|
496
|
+
summary_json[:combined_tcs_after_qc] = joined_sh.size
|
497
|
+
if primer[:trim]
|
498
|
+
trim_start = primer[:trim_ref_start]
|
499
|
+
trim_end = primer[:trim_ref_end]
|
500
|
+
trim_ref = primer[:trim_ref].to_sym
|
501
|
+
joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
|
502
|
+
end
|
503
|
+
joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
|
504
|
+
if export_raw
|
505
|
+
joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.fasta"))
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
File.open(outfile_log, "w") do |f|
|
510
|
+
f.puts JSON.pretty_generate(summary_json)
|
511
|
+
end
|
512
|
+
end
|
513
|
+
|
514
|
+
log.puts Time.now.to_s + "\t" + "Removing raw sequence files..."
|
515
|
+
File.unlink(r1_f)
|
516
|
+
File.unlink(r2_f)
|
517
|
+
log.puts Time.now.to_s + "\t" + "TCS pipeline successfuly exercuted."
|
518
|
+
log.close
|
519
|
+
puts "DONE!"
|