viral_seq 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +33 -10
- data/bin/tcs +21 -2
- data/bin/tcs_log +20 -1
- data/docs/assets/img/cover.jpg +0 -0
- data/{doc → docs}/dr.json +0 -1
- data/docs/sample_miseq_data/hivdr_control/r1.fastq.gz +0 -0
- data/docs/sample_miseq_data/hivdr_control/r2.fastq.gz +0 -0
- data/lib/viral_seq.rb +1 -1
- data/lib/viral_seq/tcs_dr.rb +71 -0
- data/lib/viral_seq/version.rb +2 -2
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 7a283f3a09cc5d9807e7622cd1ddf27197919955e85d6472b34fc14b66749c03
|
4
|
+
data.tar.gz: 4f90c5a9c7ea0ec148ba7d45ee88dc441f79da67a97654734194a773499ebb8e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 385a94eb93c3d8d9116c16a0d8af56ba714ba6191a454076acf881a036de80d1d598f3fcd1a4de841745ca08a1ad3e8bc028a30db9f96c19f3b217ef4583d652
|
7
|
+
data.tar.gz: 714d035b6f65863746cafb120c9cf6eccb8261f3eac69985bad96e5275351eec71aa3b744ee9b462e2dc3e0e199c2d4112386f6a2d7eef89b5b7824c1ab769be
|
data/README.md
CHANGED
@@ -1,5 +1,11 @@
|
|
1
1
|
# ViralSeq
|
2
2
|
|
3
|
+
[](https://rubygems.org/gems/viral_seq)
|
4
|
+

|
5
|
+

|
6
|
+

|
7
|
+
[](https://gitter.im/viral_seq/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
|
8
|
+
|
3
9
|
A Ruby Gem containing bioinformatics tools for processing viral NGS data.
|
4
10
|
|
5
11
|
Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
@@ -7,11 +13,12 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
|
|
7
13
|
## Illustration for the Primer ID Sequencing
|
8
14
|
|
9
15
|
|
10
|
-

|
11
17
|
|
12
18
|
### Reference readings on the Primer ID sequencing
|
13
|
-
[Primer ID
|
14
|
-
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
19
|
+
[Explantion of Primer ID sequencing](https://doi.org/10.21769/BioProtoc.3938)
|
20
|
+
[Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
|
21
|
+
[Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
|
15
22
|
|
16
23
|
## Install
|
17
24
|
|
@@ -29,9 +36,18 @@ Use executable `tcs` pipeline to process **Primer ID MiSeq sequencing** data.
|
|
29
36
|
Example commands:
|
30
37
|
```bash
|
31
38
|
$ tcs -p params.json # run TCS pipeline with params.json
|
39
|
+
$ tcs -p params.json -i DIRECTORY
|
40
|
+
# run TCS pipeline with params.json and DIRECTORY
|
41
|
+
# if DIRECTORY is not defined in params.json
|
42
|
+
$ tcs -dr -i DIRECTORY
|
43
|
+
# run tcs-dr (MPID HIV drug resistance sequencing) pipeline
|
44
|
+
# DIRECTORY needs to be given.
|
32
45
|
$ tcs -j # CLI to generate params.json
|
33
46
|
$ tcs -h # print out the help
|
34
47
|
```
|
48
|
+
|
49
|
+
[sample params.json for the tcs-dr pipeline](./docs/dr.json)
|
50
|
+
|
35
51
|
---
|
36
52
|
### `tcs_log`
|
37
53
|
|
@@ -121,15 +137,22 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
121
137
|
|
122
138
|
## Updates
|
123
139
|
|
140
|
+
### Version 1.1.1-04012021
|
141
|
+
|
142
|
+
1. Added warning when paired_raw_sequence less than 0.1% of total_raw_sequence.
|
143
|
+
2. Added option `-i WORKING_DIRECTORY` to the `tcs` script.
|
144
|
+
If the `params.json` file does not contain the path to the working directory, it will append path to the run params.
|
145
|
+
3. Added option `-dr` to the `tcs` script.
|
146
|
+
|
124
147
|
### Version 1.1.0-03252021
|
125
148
|
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
149
|
+
1. Optimized the algorithm of end-join.
|
150
|
+
2. Fixed a bug in the `tcs` pipeline that sometimes combined tcs files are not saved.
|
151
|
+
3. Added `tcs_log` command to pool run logs and tcs files from one batch of tcs jobs.
|
152
|
+
4. Added the preset of MPID-HIVDR params file [***dr.json***](./docs/dr.json) in /docs.
|
153
|
+
5. Add `platform_format` option in the json generator of the `tcs` Pipeline.
|
154
|
+
Users can choose from 3 MiSeq platforms for processing their sequencing data.
|
155
|
+
MiSeq 300x7x300 is the default option.
|
133
156
|
|
134
157
|
### Version 1.0.14-03052021
|
135
158
|
|
data/bin/tcs
CHANGED
@@ -46,6 +46,14 @@ OptionParser.new do |opts|
|
|
46
46
|
options[:params_json] = p
|
47
47
|
end
|
48
48
|
|
49
|
+
opts.on("-i", "--input PATH_TO_WORKING_DIRECTORY", "Path to the working directory") do |p|
|
50
|
+
options[:input] = p
|
51
|
+
end
|
52
|
+
|
53
|
+
opts.on("-dr", "--dr_pipeline", "HIV drug resistance MPID pipeline") do |p|
|
54
|
+
options[:dr] = true
|
55
|
+
end
|
56
|
+
|
49
57
|
opts.on("-h", "--help", "Prints this help") do
|
50
58
|
puts opts
|
51
59
|
exit
|
@@ -64,15 +72,21 @@ end.parse!
|
|
64
72
|
|
65
73
|
if options[:json_generator]
|
66
74
|
params = ViralSeq::TcsJson.generate
|
75
|
+
elsif options[:dr]
|
76
|
+
params = ViralSeq::TcsDr::PARAMS
|
67
77
|
elsif (options[:params_json] && File.exist?(options[:params_json]))
|
68
78
|
params = JSON.parse(File.read(options[:params_json]), symbolize_names: true)
|
69
79
|
else
|
70
80
|
abort "No params JSON file found. Script terminated.".red
|
71
81
|
end
|
72
82
|
|
73
|
-
|
83
|
+
if options[:input]
|
84
|
+
indir = options[:input]
|
85
|
+
else
|
86
|
+
indir = params[:raw_sequence_dir]
|
87
|
+
end
|
74
88
|
|
75
|
-
unless File.exist?(indir)
|
89
|
+
unless indir and File.exist?(indir)
|
76
90
|
abort "No input sequence directory found. Script terminated.".red.bold
|
77
91
|
end
|
78
92
|
|
@@ -129,6 +143,7 @@ end
|
|
129
143
|
|
130
144
|
primers.each do |primer|
|
131
145
|
summary_json = {}
|
146
|
+
summary_json[:warnings] = []
|
132
147
|
summary_json[:tcs_version] = ViralSeq::TCS_VERSION
|
133
148
|
summary_json[:viralseq_version] = ViralSeq::VERSION
|
134
149
|
summary_json[:runtime] = Time.now.to_s
|
@@ -181,6 +196,10 @@ primers.each do |primer|
|
|
181
196
|
paired_seq_number = common_keys.size
|
182
197
|
log.puts Time.now.to_s + "\t" + "Paired raw sequences are : #{paired_seq_number.to_s}"
|
183
198
|
summary_json[:paired_raw_sequence] = paired_seq_number
|
199
|
+
if paired_seq_number < raw_sequence_number * 0.001
|
200
|
+
summary_json[:warnings] <<
|
201
|
+
"WARNING: Filtered raw sequneces less than 0.1% of the total raw sequences. Possible contamination."
|
202
|
+
end
|
184
203
|
|
185
204
|
common_keys.each do |seqtag|
|
186
205
|
r1_seq = r1_passed_seq[seqtag]
|
data/bin/tcs_log
CHANGED
@@ -37,8 +37,26 @@ Dir.mkdir(outdir4) unless File.directory?(outdir4)
|
|
37
37
|
|
38
38
|
log_file = File.join(tcs_dir,"log.csv")
|
39
39
|
log = File.open(log_file,'w')
|
40
|
-
log.puts "lib name,Region,Raw Sequences per barcode,R1 Raw,R2 Raw,Paired Raw,Cutoff,PID Length,Consensus1,Consensus2,Distinct to Raw,Resampling index,Combined TCS,Combined TCS after QC"
|
41
40
|
|
41
|
+
header = %w{
|
42
|
+
lib_name
|
43
|
+
Region
|
44
|
+
Raw_Sequences_per_barcode
|
45
|
+
R1_Raw
|
46
|
+
R2_Raw
|
47
|
+
Paired_Raw
|
48
|
+
Cutoff
|
49
|
+
PID_Length
|
50
|
+
Consensus1
|
51
|
+
Consensus2
|
52
|
+
Distinct_to_Raw
|
53
|
+
Resampling_index
|
54
|
+
Combined_TCS
|
55
|
+
Combined_TCS_after_QC
|
56
|
+
WARNINGS
|
57
|
+
}
|
58
|
+
|
59
|
+
log.puts header.join(',')
|
42
60
|
libs.each do |lib|
|
43
61
|
Dir.mkdir(File.join(outdir2, lib)) unless File.directory?(File.join(outdir2, lib))
|
44
62
|
fasta_files = []
|
@@ -77,6 +95,7 @@ libs.each do |lib|
|
|
77
95
|
json_log[:resampling_param],
|
78
96
|
json_log[:combined_tcs],
|
79
97
|
json_log[:combined_tcs_after_qc],
|
98
|
+
json_log[:warnings],
|
80
99
|
].join(',') + "\n"
|
81
100
|
end
|
82
101
|
end
|
Binary file
|
data/{doc → docs}/dr.json
RENAMED
Binary file
|
Binary file
|
data/lib/viral_seq.rb
CHANGED
@@ -0,0 +1,71 @@
|
|
1
|
+
module ViralSeq
|
2
|
+
|
3
|
+
class TcsDr
|
4
|
+
PARAMS = {:platform_error_rate=>0.02,
|
5
|
+
:primer_pairs=>
|
6
|
+
[{:region=>"RT",
|
7
|
+
:cdna=>
|
8
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCACTATAGGCTGTACTGTCCATTTATC",
|
9
|
+
:forward=>
|
10
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGGCCATTGACAGAAGAAAAAATAAAAGC",
|
11
|
+
:majority=>0.5,
|
12
|
+
:end_join=>true,
|
13
|
+
:end_join_option=>1,
|
14
|
+
:overlap=>0,
|
15
|
+
:TCS_QC=>true,
|
16
|
+
:ref_genome=>"HXB2",
|
17
|
+
:ref_start=>2648,
|
18
|
+
:ref_end=>3257,
|
19
|
+
:indel=>true,
|
20
|
+
:trim=>false},
|
21
|
+
{:region=>"PR",
|
22
|
+
:cdna=>
|
23
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNCAGTTTAACTTTTGGGCCATCCATTCC",
|
24
|
+
:forward=>
|
25
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTCAGAGCAGACCAGAGCCAACAGCCCCA",
|
26
|
+
:majority=>0.5,
|
27
|
+
:end_join=>true,
|
28
|
+
:end_join_option=>3,
|
29
|
+
:TCS_QC=>true,
|
30
|
+
:ref_genome=>"HXB2",
|
31
|
+
:ref_start=>0,
|
32
|
+
:ref_end=>2591,
|
33
|
+
:indel=>true,
|
34
|
+
:trim=>true,
|
35
|
+
:trim_ref=>"HXB2",
|
36
|
+
:trim_ref_start=>2253,
|
37
|
+
:trim_ref_end=>2549},
|
38
|
+
{:region=>"IN",
|
39
|
+
:cdna=>
|
40
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNATCGAATACTGCCATTTGTACTGC",
|
41
|
+
:forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
|
42
|
+
:majority=>0.5,
|
43
|
+
:end_join=>true,
|
44
|
+
:end_join_option=>3,
|
45
|
+
:overlap=>171,
|
46
|
+
:TCS_QC=>true,
|
47
|
+
:ref_genome=>"HXB2",
|
48
|
+
:ref_start=>4384,
|
49
|
+
:ref_end=>4751,
|
50
|
+
:indel=>false,
|
51
|
+
:trim=>false},
|
52
|
+
{:region=>"V1V3",
|
53
|
+
:cdna=>
|
54
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCCATTTTGCTYTAYTRABVTTACAATRTGC",
|
55
|
+
:forward=>
|
56
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTTATGGGATCAAAGCCTAAAGCCATGTGTA",
|
57
|
+
:majority=>0.5,
|
58
|
+
:end_join=>true,
|
59
|
+
:end_join_option=>1,
|
60
|
+
:overlap=>0,
|
61
|
+
:TCS_QC=>true,
|
62
|
+
:ref_genome=>"HXB2",
|
63
|
+
:ref_start=>6585,
|
64
|
+
:ref_end=>7208,
|
65
|
+
:indel=>true,
|
66
|
+
:trim=>false}
|
67
|
+
]
|
68
|
+
}
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.1.
|
4
|
+
version: 1.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-04-01 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -106,7 +106,10 @@ files:
|
|
106
106
|
- bin/locator
|
107
107
|
- bin/tcs
|
108
108
|
- bin/tcs_log
|
109
|
-
-
|
109
|
+
- docs/assets/img/cover.jpg
|
110
|
+
- docs/dr.json
|
111
|
+
- docs/sample_miseq_data/hivdr_control/r1.fastq.gz
|
112
|
+
- docs/sample_miseq_data/hivdr_control/r2.fastq.gz
|
110
113
|
- lib/viral_seq.rb
|
111
114
|
- lib/viral_seq/constant.rb
|
112
115
|
- lib/viral_seq/enumerable.rb
|
@@ -123,6 +126,7 @@ files:
|
|
123
126
|
- lib/viral_seq/sequence.rb
|
124
127
|
- lib/viral_seq/string.rb
|
125
128
|
- lib/viral_seq/tcs_core.rb
|
129
|
+
- lib/viral_seq/tcs_dr.rb
|
126
130
|
- lib/viral_seq/tcs_json.rb
|
127
131
|
- lib/viral_seq/version.rb
|
128
132
|
- viral_seq.gemspec
|