viral_seq 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: ea453e452e6832e942512cdb94462c33af89ffd8295017806c9aa6ff7ec77ad4
4
- data.tar.gz: 2bb89d193e0e84ebe0791882c53e226a0a934ea3b9d1e61f87b8ffff6c22af1b
3
+ metadata.gz: 7a283f3a09cc5d9807e7622cd1ddf27197919955e85d6472b34fc14b66749c03
4
+ data.tar.gz: 4f90c5a9c7ea0ec148ba7d45ee88dc441f79da67a97654734194a773499ebb8e
5
5
  SHA512:
6
- metadata.gz: 9dc0403ecaea119d3aa3e832305a0bd4f038fdb71789dcd036080fa89b0e454ee79001b6042df171364e4207a93b2d4d5747336b2fb7f8fb7d83103f5d641134
7
- data.tar.gz: 510ccfce7d717b56d55e2477ae01124009d1f53f010635759cf2f69afe0132313e08db9abaae1ec6d8d894961beba1c2d70a637eafa9b57b05f0aac3372cd0ca
6
+ metadata.gz: 385a94eb93c3d8d9116c16a0d8af56ba714ba6191a454076acf881a036de80d1d598f3fcd1a4de841745ca08a1ad3e8bc028a30db9f96c19f3b217ef4583d652
7
+ data.tar.gz: 714d035b6f65863746cafb120c9cf6eccb8261f3eac69985bad96e5275351eec71aa3b744ee9b462e2dc3e0e199c2d4112386f6a2d7eef89b5b7824c1ab769be
data/README.md CHANGED
@@ -1,5 +1,11 @@
1
1
  # ViralSeq
2
2
 
3
+ [![Gem Version](https://badge.fury.io/rb/viral_seq.svg)](https://rubygems.org/gems/viral_seq)
4
+ ![GitHub](https://img.shields.io/github/license/viralseq/viral_seq)
5
+ ![Gem](https://img.shields.io/gem/dt/viral_seq?color=%23E9967A)
6
+ ![GitHub last commit](https://img.shields.io/github/last-commit/viralseq/viral_seq?color=%2300BFFF)
7
+ [![Join the chat at https://gitter.im/viral_seq/community](https://badges.gitter.im/viral_seq/community.svg)](https://gitter.im/viral_seq/community?utm_source=badge&utm_medium=badge&utm_campaign=pr-badge&utm_content=badge)
8
+
3
9
  A Ruby Gem containing bioinformatics tools for processing viral NGS data.
4
10
 
5
11
  Specifically for Primer ID sequencing and HIV drug resistance analysis.
@@ -7,11 +13,12 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
7
13
  ## Illustration for the Primer ID Sequencing
8
14
 
9
15
 
10
- ![Primer ID Sequencing](https://storage.googleapis.com/tcs-dr-public/pid.png)
16
+ ![Primer ID Sequencing](./docs/assets/img/cover.jpg)
11
17
 
12
18
  ### Reference readings on the Primer ID sequencing
13
- [Primer ID JID paper](https://doi.org/10.21769/BioProtoc.3938)
14
- [Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
19
+ [Explantion of Primer ID sequencing](https://doi.org/10.21769/BioProtoc.3938)
20
+ [Primer ID MiSeq protocol](https://doi.org/10.1128/JVI.00522-15)
21
+ [Application of Primer ID sequencing in COVID-19 research](https://doi.org/10.1126/scitranslmed.abb5883)
15
22
 
16
23
  ## Install
17
24
 
@@ -29,9 +36,18 @@ Use executable `tcs` pipeline to process **Primer ID MiSeq sequencing** data.
29
36
  Example commands:
30
37
  ```bash
31
38
  $ tcs -p params.json # run TCS pipeline with params.json
39
+ $ tcs -p params.json -i DIRECTORY
40
+ # run TCS pipeline with params.json and DIRECTORY
41
+ # if DIRECTORY is not defined in params.json
42
+ $ tcs -dr -i DIRECTORY
43
+ # run tcs-dr (MPID HIV drug resistance sequencing) pipeline
44
+ # DIRECTORY needs to be given.
32
45
  $ tcs -j # CLI to generate params.json
33
46
  $ tcs -h # print out the help
34
47
  ```
48
+
49
+ [sample params.json for the tcs-dr pipeline](./docs/dr.json)
50
+
35
51
  ---
36
52
  ### `tcs_log`
37
53
 
@@ -121,15 +137,22 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
121
137
 
122
138
  ## Updates
123
139
 
140
+ ### Version 1.1.1-04012021
141
+
142
+ 1. Added warning when paired_raw_sequence less than 0.1% of total_raw_sequence.
143
+ 2. Added option `-i WORKING_DIRECTORY` to the `tcs` script.
144
+ If the `params.json` file does not contain the path to the working directory, it will append path to the run params.
145
+ 3. Added option `-dr` to the `tcs` script.
146
+
124
147
  ### Version 1.1.0-03252021
125
148
 
126
- 1. Optimized the algorithm of end-join.
127
- 2. Fixed a bug in the `tcs` pipeline that sometimes combined tcs files are not saved.
128
- 3. Added `tcs_log` command to pool run logs and tcs files from one batch of tcs jobs.
129
- 4. Added the preset of MPID-HIVDR params file ***dr.json*** in /doc.
130
- 5. Add `platform_format` option in the json generator of the `tcs` Pipeline.
131
- Users can choose from 3 MiSeq platforms for processing their sequencing data.
132
- MiSeq 300x7x300 is the default option.
149
+ 1. Optimized the algorithm of end-join.
150
+ 2. Fixed a bug in the `tcs` pipeline that sometimes combined tcs files are not saved.
151
+ 3. Added `tcs_log` command to pool run logs and tcs files from one batch of tcs jobs.
152
+ 4. Added the preset of MPID-HIVDR params file [***dr.json***](./docs/dr.json) in /docs.
153
+ 5. Add `platform_format` option in the json generator of the `tcs` Pipeline.
154
+ Users can choose from 3 MiSeq platforms for processing their sequencing data.
155
+ MiSeq 300x7x300 is the default option.
133
156
 
134
157
  ### Version 1.0.14-03052021
135
158
 
data/bin/tcs CHANGED
@@ -46,6 +46,14 @@ OptionParser.new do |opts|
46
46
  options[:params_json] = p
47
47
  end
48
48
 
49
+ opts.on("-i", "--input PATH_TO_WORKING_DIRECTORY", "Path to the working directory") do |p|
50
+ options[:input] = p
51
+ end
52
+
53
+ opts.on("-dr", "--dr_pipeline", "HIV drug resistance MPID pipeline") do |p|
54
+ options[:dr] = true
55
+ end
56
+
49
57
  opts.on("-h", "--help", "Prints this help") do
50
58
  puts opts
51
59
  exit
@@ -64,15 +72,21 @@ end.parse!
64
72
 
65
73
  if options[:json_generator]
66
74
  params = ViralSeq::TcsJson.generate
75
+ elsif options[:dr]
76
+ params = ViralSeq::TcsDr::PARAMS
67
77
  elsif (options[:params_json] && File.exist?(options[:params_json]))
68
78
  params = JSON.parse(File.read(options[:params_json]), symbolize_names: true)
69
79
  else
70
80
  abort "No params JSON file found. Script terminated.".red
71
81
  end
72
82
 
73
- indir = params[:raw_sequence_dir]
83
+ if options[:input]
84
+ indir = options[:input]
85
+ else
86
+ indir = params[:raw_sequence_dir]
87
+ end
74
88
 
75
- unless File.exist?(indir)
89
+ unless indir and File.exist?(indir)
76
90
  abort "No input sequence directory found. Script terminated.".red.bold
77
91
  end
78
92
 
@@ -129,6 +143,7 @@ end
129
143
 
130
144
  primers.each do |primer|
131
145
  summary_json = {}
146
+ summary_json[:warnings] = []
132
147
  summary_json[:tcs_version] = ViralSeq::TCS_VERSION
133
148
  summary_json[:viralseq_version] = ViralSeq::VERSION
134
149
  summary_json[:runtime] = Time.now.to_s
@@ -181,6 +196,10 @@ primers.each do |primer|
181
196
  paired_seq_number = common_keys.size
182
197
  log.puts Time.now.to_s + "\t" + "Paired raw sequences are : #{paired_seq_number.to_s}"
183
198
  summary_json[:paired_raw_sequence] = paired_seq_number
199
+ if paired_seq_number < raw_sequence_number * 0.001
200
+ summary_json[:warnings] <<
201
+ "WARNING: Filtered raw sequneces less than 0.1% of the total raw sequences. Possible contamination."
202
+ end
184
203
 
185
204
  common_keys.each do |seqtag|
186
205
  r1_seq = r1_passed_seq[seqtag]
data/bin/tcs_log CHANGED
@@ -37,8 +37,26 @@ Dir.mkdir(outdir4) unless File.directory?(outdir4)
37
37
 
38
38
  log_file = File.join(tcs_dir,"log.csv")
39
39
  log = File.open(log_file,'w')
40
- log.puts "lib name,Region,Raw Sequences per barcode,R1 Raw,R2 Raw,Paired Raw,Cutoff,PID Length,Consensus1,Consensus2,Distinct to Raw,Resampling index,Combined TCS,Combined TCS after QC"
41
40
 
41
+ header = %w{
42
+ lib_name
43
+ Region
44
+ Raw_Sequences_per_barcode
45
+ R1_Raw
46
+ R2_Raw
47
+ Paired_Raw
48
+ Cutoff
49
+ PID_Length
50
+ Consensus1
51
+ Consensus2
52
+ Distinct_to_Raw
53
+ Resampling_index
54
+ Combined_TCS
55
+ Combined_TCS_after_QC
56
+ WARNINGS
57
+ }
58
+
59
+ log.puts header.join(',')
42
60
  libs.each do |lib|
43
61
  Dir.mkdir(File.join(outdir2, lib)) unless File.directory?(File.join(outdir2, lib))
44
62
  fasta_files = []
@@ -77,6 +95,7 @@ libs.each do |lib|
77
95
  json_log[:resampling_param],
78
96
  json_log[:combined_tcs],
79
97
  json_log[:combined_tcs_after_qc],
98
+ json_log[:warnings],
80
99
  ].join(',') + "\n"
81
100
  end
82
101
  end
Binary file
@@ -1,5 +1,4 @@
1
1
  {
2
- "raw_sequence_dir": "MyExampleDir",
3
2
  "platform_error_rate": 0.02,
4
3
  "primer_pairs": [
5
4
  {
data/lib/viral_seq.rb CHANGED
@@ -37,6 +37,6 @@ require_relative "viral_seq/string"
37
37
  require_relative "viral_seq/version"
38
38
  require_relative "viral_seq/tcs_core"
39
39
  require_relative "viral_seq/tcs_json"
40
-
40
+ require_relative "viral_seq/tcs_dr"
41
41
 
42
42
  require "muscle_bio"
@@ -0,0 +1,71 @@
1
+ module ViralSeq
2
+
3
+ class TcsDr
4
+ PARAMS = {:platform_error_rate=>0.02,
5
+ :primer_pairs=>
6
+ [{:region=>"RT",
7
+ :cdna=>
8
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCACTATAGGCTGTACTGTCCATTTATC",
9
+ :forward=>
10
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGGCCATTGACAGAAGAAAAAATAAAAGC",
11
+ :majority=>0.5,
12
+ :end_join=>true,
13
+ :end_join_option=>1,
14
+ :overlap=>0,
15
+ :TCS_QC=>true,
16
+ :ref_genome=>"HXB2",
17
+ :ref_start=>2648,
18
+ :ref_end=>3257,
19
+ :indel=>true,
20
+ :trim=>false},
21
+ {:region=>"PR",
22
+ :cdna=>
23
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNCAGTTTAACTTTTGGGCCATCCATTCC",
24
+ :forward=>
25
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTCAGAGCAGACCAGAGCCAACAGCCCCA",
26
+ :majority=>0.5,
27
+ :end_join=>true,
28
+ :end_join_option=>3,
29
+ :TCS_QC=>true,
30
+ :ref_genome=>"HXB2",
31
+ :ref_start=>0,
32
+ :ref_end=>2591,
33
+ :indel=>true,
34
+ :trim=>true,
35
+ :trim_ref=>"HXB2",
36
+ :trim_ref_start=>2253,
37
+ :trim_ref_end=>2549},
38
+ {:region=>"IN",
39
+ :cdna=>
40
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNATCGAATACTGCCATTTGTACTGC",
41
+ :forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
42
+ :majority=>0.5,
43
+ :end_join=>true,
44
+ :end_join_option=>3,
45
+ :overlap=>171,
46
+ :TCS_QC=>true,
47
+ :ref_genome=>"HXB2",
48
+ :ref_start=>4384,
49
+ :ref_end=>4751,
50
+ :indel=>false,
51
+ :trim=>false},
52
+ {:region=>"V1V3",
53
+ :cdna=>
54
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCCATTTTGCTYTAYTRABVTTACAATRTGC",
55
+ :forward=>
56
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTTATGGGATCAAAGCCTAAAGCCATGTGTA",
57
+ :majority=>0.5,
58
+ :end_join=>true,
59
+ :end_join_option=>1,
60
+ :overlap=>0,
61
+ :TCS_QC=>true,
62
+ :ref_genome=>"HXB2",
63
+ :ref_start=>6585,
64
+ :ref_end=>7208,
65
+ :indel=>true,
66
+ :trim=>false}
67
+ ]
68
+ }
69
+ end
70
+
71
+ end
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.1.0"
6
- TCS_VERSION = "2.2.0"
5
+ VERSION = "1.1.1"
6
+ TCS_VERSION = "2.3.0"
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.0
4
+ version: 1.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-03-26 00:00:00.000000000 Z
12
+ date: 2021-04-01 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -106,7 +106,10 @@ files:
106
106
  - bin/locator
107
107
  - bin/tcs
108
108
  - bin/tcs_log
109
- - doc/dr.json
109
+ - docs/assets/img/cover.jpg
110
+ - docs/dr.json
111
+ - docs/sample_miseq_data/hivdr_control/r1.fastq.gz
112
+ - docs/sample_miseq_data/hivdr_control/r2.fastq.gz
110
113
  - lib/viral_seq.rb
111
114
  - lib/viral_seq/constant.rb
112
115
  - lib/viral_seq/enumerable.rb
@@ -123,6 +126,7 @@ files:
123
126
  - lib/viral_seq/sequence.rb
124
127
  - lib/viral_seq/string.rb
125
128
  - lib/viral_seq/tcs_core.rb
129
+ - lib/viral_seq/tcs_dr.rb
126
130
  - lib/viral_seq/tcs_json.rb
127
131
  - lib/viral_seq/version.rb
128
132
  - viral_seq.gemspec