viral_seq 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +33 -10
- data/bin/tcs +21 -2
- data/bin/tcs_log +20 -1
- data/docs/assets/img/cover.jpg +0 -0
- data/{doc → docs}/dr.json +0 -1
- data/docs/sample_miseq_data/hivdr_control/r1.fastq.gz +0 -0
- data/docs/sample_miseq_data/hivdr_control/r2.fastq.gz +0 -0
- data/lib/viral_seq.rb +1 -1
- data/lib/viral_seq/tcs_dr.rb +71 -0
- data/lib/viral_seq/version.rb +2 -2
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a283f3a09cc5d9807e7622cd1ddf27197919955e85d6472b34fc14b66749c03
|
4
|
+
data.tar.gz: 4f90c5a9c7ea0ec148ba7d45ee88dc441f79da67a97654734194a773499ebb8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 385a94eb93c3d8d9116c16a0d8af56ba714ba6191a454076acf881a036de80d1d598f3fcd1a4de841745ca08a1ad3e8bc028a30db9f96c19f3b217ef4583d652
|
7
|
+
data.tar.gz: 714d035b6f65863746cafb120c9cf6eccb8261f3eac69985bad96e5275351eec71aa3b744ee9b462e2dc3e0e199c2d4112386f6a2d7eef89b5b7824c1ab769be
|
data/README.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# ViralSeq
|
2
2
|
|
3
|
+
[![Gem Version](https://badge.fury.io/rb/viral_seq.svg)](https://rubygems.org/gems/viral_seq)
|
4
|
+
![GitHub](https://img.shields.io/github/license/viralseq/viral_seq)
|
5
|
+
![Gem](https://img.shields.io/gem/dt/viral_seq?color=%23E9967A)
|
6
|
+
![GitHub last commit](https://img.shields.io/github/last-commit/viralseq/viral_seq?color=%2300BFFF)
|
7
|
+
[![Join the chat at https://gitter.im/viral_seq/community](https://badges.gitter.im/viral_seq/community.svg)](https://gitter.im/viral_seq/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
8
|
+
|
3
9
|
A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
4
10
|
|
5
11
|
Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
@@ -7,11 +13,12 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
|
7
13
|
## Illustration for the Primer ID Sequencing
|
8
14
|
|
9
15
|
|
10
|
-
![Primer ID Sequencing](
|
16
|
+
![Primer ID Sequencing](./docs/assets/img/cover.jpg)
|
11
17
|
|
12
18
|
### Reference readings on the Primer ID sequencing
|
13
|
-
[Primer ID
|
14
|
-
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
19
|
+
[Explantion of Primer ID sequencing](https://doi.org/10.21769/BioProtoc.3938)
|
20
|
+
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
21
|
+
[Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
|
15
22
|
|
16
23
|
## Install
|
17
24
|
|
@@ -29,9 +36,18 @@ Use executable `tcs` pipeline to process **Primer ID MiSeq sequencing** data.
|
|
29
36
|
Example commands:
|
30
37
|
```bash
|
31
38
|
$ tcs -p params.json # run TCS pipeline with params.json
|
39
|
+
$ tcs -p params.json -i DIRECTORY
|
40
|
+
# run TCS pipeline with params.json and DIRECTORY
|
41
|
+
# if DIRECTORY is not defined in params.json
|
42
|
+
$ tcs -dr -i DIRECTORY
|
43
|
+
# run tcs-dr (MPID HIV drug resistance sequencing) pipeline
|
44
|
+
# DIRECTORY needs to be given.
|
32
45
|
$ tcs -j # CLI to generate params.json
|
33
46
|
$ tcs -h # print out the help
|
34
47
|
```
|
48
|
+
|
49
|
+
[sample params.json for the tcs-dr pipeline](./docs/dr.json)
|
50
|
+
|
35
51
|
---
|
36
52
|
### `tcs_log`
|
37
53
|
|
@@ -121,15 +137,22 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
121
137
|
|
122
138
|
## Updates
|
123
139
|
|
140
|
+
### Version 1.1.1-04012021
|
141
|
+
|
142
|
+
1. Added warning when paired_raw_sequence less than 0.1% of total_raw_sequence.
|
143
|
+
2. Added option `-i WORKING_DIRECTORY` to the `tcs` script.
|
144
|
+
If the `params.json` file does not contain the path to the working directory, it will append path to the run params.
|
145
|
+
3. Added option `-dr` to the `tcs` script.
|
146
|
+
|
124
147
|
### Version 1.1.0-03252021
|
125
148
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
149
|
+
1. Optimized the algorithm of end-join.
|
150
|
+
2. Fixed a bug in the `tcs` pipeline that sometimes combined tcs files are not saved.
|
151
|
+
3. Added `tcs_log` command to pool run logs and tcs files from one batch of tcs jobs.
|
152
|
+
4. Added the preset of MPID-HIVDR params file [***dr.json***](./docs/dr.json) in /docs.
|
153
|
+
5. Add `platform_format` option in the json generator of the `tcs` Pipeline.
|
154
|
+
Users can choose from 3 MiSeq platforms for processing their sequencing data.
|
155
|
+
MiSeq 300x7x300 is the default option.
|
133
156
|
|
134
157
|
### Version 1.0.14-03052021
|
135
158
|
|
data/bin/tcs
CHANGED
@@ -46,6 +46,14 @@ OptionParser.new do |opts|
|
|
46
46
|
options[:params_json] = p
|
47
47
|
end
|
48
48
|
|
49
|
+
opts.on("-i", "--input PATH_TO_WORKING_DIRECTORY", "Path to the working directory") do |p|
|
50
|
+
options[:input] = p
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-dr", "--dr_pipeline", "HIV drug resistance MPID pipeline") do |p|
|
54
|
+
options[:dr] = true
|
55
|
+
end
|
56
|
+
|
49
57
|
opts.on("-h", "--help", "Prints this help") do
|
50
58
|
puts opts
|
51
59
|
exit
|
@@ -64,15 +72,21 @@ end.parse!
|
|
64
72
|
|
65
73
|
if options[:json_generator]
|
66
74
|
params = ViralSeq::TcsJson.generate
|
75
|
+
elsif options[:dr]
|
76
|
+
params = ViralSeq::TcsDr::PARAMS
|
67
77
|
elsif (options[:params_json] && File.exist?(options[:params_json]))
|
68
78
|
params = JSON.parse(File.read(options[:params_json]), symbolize_names: true)
|
69
79
|
else
|
70
80
|
abort "No params JSON file found. Script terminated.".red
|
71
81
|
end
|
72
82
|
|
73
|
-
|
83
|
+
if options[:input]
|
84
|
+
indir = options[:input]
|
85
|
+
else
|
86
|
+
indir = params[:raw_sequence_dir]
|
87
|
+
end
|
74
88
|
|
75
|
-
unless File.exist?(indir)
|
89
|
+
unless indir and File.exist?(indir)
|
76
90
|
abort "No input sequence directory found. Script terminated.".red.bold
|
77
91
|
end
|
78
92
|
|
@@ -129,6 +143,7 @@ end
|
|
129
143
|
|
130
144
|
primers.each do |primer|
|
131
145
|
summary_json = {}
|
146
|
+
summary_json[:warnings] = []
|
132
147
|
summary_json[:tcs_version] = ViralSeq::TCS_VERSION
|
133
148
|
summary_json[:viralseq_version] = ViralSeq::VERSION
|
134
149
|
summary_json[:runtime] = Time.now.to_s
|
@@ -181,6 +196,10 @@ primers.each do |primer|
|
|
181
196
|
paired_seq_number = common_keys.size
|
182
197
|
log.puts Time.now.to_s + "\t" + "Paired raw sequences are : #{paired_seq_number.to_s}"
|
183
198
|
summary_json[:paired_raw_sequence] = paired_seq_number
|
199
|
+
if paired_seq_number < raw_sequence_number * 0.001
|
200
|
+
summary_json[:warnings] <<
|
201
|
+
"WARNING: Filtered raw sequneces less than 0.1% of the total raw sequences. Possible contamination."
|
202
|
+
end
|
184
203
|
|
185
204
|
common_keys.each do |seqtag|
|
186
205
|
r1_seq = r1_passed_seq[seqtag]
|
data/bin/tcs_log
CHANGED
@@ -37,8 +37,26 @@ Dir.mkdir(outdir4) unless File.directory?(outdir4)
|
|
37
37
|
|
38
38
|
log_file = File.join(tcs_dir,"log.csv")
|
39
39
|
log = File.open(log_file,'w')
|
40
|
-
log.puts "lib name,Region,Raw Sequences per barcode,R1 Raw,R2 Raw,Paired Raw,Cutoff,PID Length,Consensus1,Consensus2,Distinct to Raw,Resampling index,Combined TCS,Combined TCS after QC"
|
41
40
|
|
41
|
+
header = %w{
|
42
|
+
lib_name
|
43
|
+
Region
|
44
|
+
Raw_Sequences_per_barcode
|
45
|
+
R1_Raw
|
46
|
+
R2_Raw
|
47
|
+
Paired_Raw
|
48
|
+
Cutoff
|
49
|
+
PID_Length
|
50
|
+
Consensus1
|
51
|
+
Consensus2
|
52
|
+
Distinct_to_Raw
|
53
|
+
Resampling_index
|
54
|
+
Combined_TCS
|
55
|
+
Combined_TCS_after_QC
|
56
|
+
WARNINGS
|
57
|
+
}
|
58
|
+
|
59
|
+
log.puts header.join(',')
|
42
60
|
libs.each do |lib|
|
43
61
|
Dir.mkdir(File.join(outdir2, lib)) unless File.directory?(File.join(outdir2, lib))
|
44
62
|
fasta_files = []
|
@@ -77,6 +95,7 @@ libs.each do |lib|
|
|
77
95
|
json_log[:resampling_param],
|
78
96
|
json_log[:combined_tcs],
|
79
97
|
json_log[:combined_tcs_after_qc],
|
98
|
+
json_log[:warnings],
|
80
99
|
].join(',') + "\n"
|
81
100
|
end
|
82
101
|
end
|
Binary file
|
data/{doc → docs}/dr.json
RENAMED
Binary file
|
Binary file
|
data/lib/viral_seq.rb
CHANGED
@@ -0,0 +1,71 @@
|
|
1
|
+
module ViralSeq
|
2
|
+
|
3
|
+
class TcsDr
|
4
|
+
PARAMS = {:platform_error_rate=>0.02,
|
5
|
+
:primer_pairs=>
|
6
|
+
[{:region=>"RT",
|
7
|
+
:cdna=>
|
8
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCACTATAGGCTGTACTGTCCATTTATC",
|
9
|
+
:forward=>
|
10
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGGCCATTGACAGAAGAAAAAATAAAAGC",
|
11
|
+
:majority=>0.5,
|
12
|
+
:end_join=>true,
|
13
|
+
:end_join_option=>1,
|
14
|
+
:overlap=>0,
|
15
|
+
:TCS_QC=>true,
|
16
|
+
:ref_genome=>"HXB2",
|
17
|
+
:ref_start=>2648,
|
18
|
+
:ref_end=>3257,
|
19
|
+
:indel=>true,
|
20
|
+
:trim=>false},
|
21
|
+
{:region=>"PR",
|
22
|
+
:cdna=>
|
23
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNCAGTTTAACTTTTGGGCCATCCATTCC",
|
24
|
+
:forward=>
|
25
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTCAGAGCAGACCAGAGCCAACAGCCCCA",
|
26
|
+
:majority=>0.5,
|
27
|
+
:end_join=>true,
|
28
|
+
:end_join_option=>3,
|
29
|
+
:TCS_QC=>true,
|
30
|
+
:ref_genome=>"HXB2",
|
31
|
+
:ref_start=>0,
|
32
|
+
:ref_end=>2591,
|
33
|
+
:indel=>true,
|
34
|
+
:trim=>true,
|
35
|
+
:trim_ref=>"HXB2",
|
36
|
+
:trim_ref_start=>2253,
|
37
|
+
:trim_ref_end=>2549},
|
38
|
+
{:region=>"IN",
|
39
|
+
:cdna=>
|
40
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNATCGAATACTGCCATTTGTACTGC",
|
41
|
+
:forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
|
42
|
+
:majority=>0.5,
|
43
|
+
:end_join=>true,
|
44
|
+
:end_join_option=>3,
|
45
|
+
:overlap=>171,
|
46
|
+
:TCS_QC=>true,
|
47
|
+
:ref_genome=>"HXB2",
|
48
|
+
:ref_start=>4384,
|
49
|
+
:ref_end=>4751,
|
50
|
+
:indel=>false,
|
51
|
+
:trim=>false},
|
52
|
+
{:region=>"V1V3",
|
53
|
+
:cdna=>
|
54
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCCATTTTGCTYTAYTRABVTTACAATRTGC",
|
55
|
+
:forward=>
|
56
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTTATGGGATCAAAGCCTAAAGCCATGTGTA",
|
57
|
+
:majority=>0.5,
|
58
|
+
:end_join=>true,
|
59
|
+
:end_join_option=>1,
|
60
|
+
:overlap=>0,
|
61
|
+
:TCS_QC=>true,
|
62
|
+
:ref_genome=>"HXB2",
|
63
|
+
:ref_start=>6585,
|
64
|
+
:ref_end=>7208,
|
65
|
+
:indel=>true,
|
66
|
+
:trim=>false}
|
67
|
+
]
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-04-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -106,7 +106,10 @@ files:
|
|
106
106
|
- bin/locator
|
107
107
|
- bin/tcs
|
108
108
|
- bin/tcs_log
|
109
|
-
-
|
109
|
+
- docs/assets/img/cover.jpg
|
110
|
+
- docs/dr.json
|
111
|
+
- docs/sample_miseq_data/hivdr_control/r1.fastq.gz
|
112
|
+
- docs/sample_miseq_data/hivdr_control/r2.fastq.gz
|
110
113
|
- lib/viral_seq.rb
|
111
114
|
- lib/viral_seq/constant.rb
|
112
115
|
- lib/viral_seq/enumerable.rb
|
@@ -123,6 +126,7 @@ files:
|
|
123
126
|
- lib/viral_seq/sequence.rb
|
124
127
|
- lib/viral_seq/string.rb
|
125
128
|
- lib/viral_seq/tcs_core.rb
|
129
|
+
- lib/viral_seq/tcs_dr.rb
|
126
130
|
- lib/viral_seq/tcs_json.rb
|
127
131
|
- lib/viral_seq/version.rb
|
128
132
|
- viral_seq.gemspec
|