viral_seq 1.2.1 → 1.2.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f3316ff7e72ca84c6eb2fa861a9fdad14fbcb3ab3c0053ade843ee13cc9ce82e
4
- data.tar.gz: df1035ea5934b794ef8c64a04085f407bcd4dffc0888bf81a569a7ccfba3560a
3
+ metadata.gz: 554845dba339d0e06b84c88bc117258516f391bdf58cce015c2669e7b2c6c0d5
4
+ data.tar.gz: 870280337c90d1f5b9ecbea6e6478d7e2dc22aa70917c6b2ecd94afaa185c1c6
5
5
  SHA512:
6
- metadata.gz: a3ec35b3a40ee9cf66131416a1c20eda38bf8bde818aa41af285c099ddd2b49e4f31fe1d011c95def77fd5c6653d96f4295142fd543444f249242154bb2b671b
7
- data.tar.gz: daa6e694a841cc615cfde850bf2d98ca7467cb1d27502daf398ab0204e55c4d477f34aafce0149c765a931755fc3a3f7dbdf425964904d0199efb0651b9a09a6
6
+ metadata.gz: 54db76e6fd8333ccebb19dee602378ec8dbe5d196ec7bd675e55f65db80cb06ac2ab51ce1f13ab7ea65c0a50ad49978bd3e9581074c497b298f0912858946fa8
7
+ data.tar.gz: 03d02329192465a9f278715c8a85e3a910e5c5c7252026980d29e669df823a5bdb4be323eeb56f7c9804b71fa8f1763c5a526227f3764315d6eb8e208934ce81
data/README.md CHANGED
@@ -10,6 +10,8 @@ A Ruby Gem containing bioinformatics tools for processing viral NGS data.
10
10
 
11
11
  Specifically for Primer ID sequencing and HIV drug resistance analysis.
12
12
 
13
+ #### tcs web app - https://primer-id.org/
14
+
13
15
  ## Illustration for the Primer ID Sequencing
14
16
 
15
17
 
@@ -33,6 +35,8 @@ Specifically for Primer ID sequencing and HIV drug resistance analysis.
33
35
  ### `tcs`
34
36
  Use executable `tcs` pipeline to process **Primer ID MiSeq sequencing** data.
35
37
 
38
+ Web-based `tcs` analysis can be accessed at https://primer-id.org/
39
+
36
40
  Example commands:
37
41
  ```bash
38
42
  $ tcs -p params.json # run TCS pipeline with params.json
@@ -175,6 +179,29 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
175
179
 
176
180
  ## Updates
177
181
 
182
+ ### Version 1.2.7-07152021
183
+
184
+ 1. Optimzed the workflow of the `tcs` pipeline on raw data with uneven lengths.
185
+ `tcs` version to v2.3.5.
186
+
187
+
188
+ ### Version 1.2.6-07122021
189
+
190
+ 1. Optimized the workflow of the `tcs` pipeline in the "end-join/QC/Trimming" section.
191
+ `tcs` version to v2.3.5.
192
+
193
+
194
+ ### Version 1.2.5-06232021
195
+
196
+ 1. Add error rescue and report in the `tcs` pipeline.
197
+ error messages are stored in the .tcs_error file. `tcs` pipeline updated to v2.3.4.
198
+ 2. Use simple majority for the consensus cut-off in the default setting of the `tcs -dr` pipeline.
199
+
200
+ ### Version 1.2.2-05272021
201
+
202
+ 1. Fixed a bug in the `tcs` pipeline that sometimes causes `SystemStackError`.
203
+ `tcs` pipeline upgraded to v2.3.2
204
+
178
205
  ### Version 1.2.1-05172021
179
206
 
180
207
  1. Added a function in R to check and install missing R packages for `tcs_sdrm` pipeline.
data/bin/tcs CHANGED
@@ -101,395 +101,426 @@ log = File.open(runtime_log_file, "w")
101
101
  log.puts "TSC pipeline Version " + ViralSeq::TCS_VERSION.to_s
102
102
  log.puts "viral_seq Version " + ViralSeq::VERSION.to_s
103
103
  log.puts Time.now.to_s + "\t" + "Start TCS pipeline..."
104
+ File.unlink(File.join(indir, ".tcs_error")) if File.exist?(File.join(indir, ".tcs_error"))
105
+
106
+ begin
107
+ libname = File.basename indir
108
+ seq_files = ViralSeq::TcsCore.r1r2 indir
109
+
110
+ if seq_files[:r1_file].size > 0 and seq_files[:r2_file].size > 0
111
+ r1_f = seq_files[:r1_file]
112
+ r2_f = seq_files[:r2_file]
113
+ elsif seq_files[:r1_file].size > 0 and seq_files[:r2_file].empty?
114
+ raise StandardError.new "Missing R2 file."
115
+ elsif seq_files[:r2_file].size > 0 and seq_files[:r1_file].empty?
116
+ raise StandardError.new "Missing R1 file."
117
+ else
118
+ raise StandardError.new "Cannot determine R1 R2 file in #{indir}."
119
+ end
104
120
 
105
- libname = File.basename indir
121
+ r1_fastq_sh = ViralSeq::SeqHash.fq(r1_f)
122
+ r2_fastq_sh = ViralSeq::SeqHash.fq(r2_f)
106
123
 
107
- seq_files = ViralSeq::TcsCore.r1r2 indir
124
+ raw_sequence_number = r1_fastq_sh.size
125
+ log.puts Time.now.to_s + "\tRaw sequence number: #{raw_sequence_number.to_s}"
108
126
 
109
- if seq_files[:r1_file].size > 0 and seq_files[:r2_file].size > 0
110
- r1_f = seq_files[:r1_file]
111
- r2_f = seq_files[:r2_file]
112
- elsif seq_files[:r1_file].size > 0 and seq_files[:r2_file].empty?
113
- exit_sig = "Missing R2 file. Aborted."
114
- elsif seq_files[:r2_file].size > 0 and seq_files[:r1_file].empty?
115
- exit_sig = "Missing R1 file. Aborted."
116
- else
117
- exit_sig = "Cannot determine R1 R2 file in #{indir}. Aborted."
118
- end
127
+ if params[:platform_error_rate]
128
+ error_rate = params[:platform_error_rate]
129
+ else
130
+ error_rate = 0.02
131
+ end
119
132
 
120
- if exit_sig
121
- ViralSeq::TcsCore.log_and_abort log, exit_sig
122
- end
133
+ if params[:platform_format]
134
+ $platform_sequencing_length = params[:platform_format]
135
+ else
136
+ $platform_sequencing_length = 300
137
+ end
123
138
 
124
- r1_fastq_sh = ViralSeq::SeqHash.fq(r1_f)
125
- r2_fastq_sh = ViralSeq::SeqHash.fq(r2_f)
139
+ primers = params[:primer_pairs]
140
+ if primers.empty? or primers.nil?
141
+ ViralSeq::TcsCore.log_and_abort log, "No primer information. Script terminated."
142
+ end
126
143
 
127
- raw_sequence_number = r1_fastq_sh.size
128
- log.puts Time.now.to_s + "\tRaw sequence number: #{raw_sequence_number.to_s}"
129
144
 
130
- if params[:platform_error_rate]
131
- error_rate = params[:platform_error_rate]
132
- else
133
- error_rate = 0.02
134
- end
145
+ primers.each do |primer|
146
+ summary_json = {}
147
+ summary_json[:warnings] = []
148
+ summary_json[:tcs_version] = ViralSeq::TCS_VERSION
149
+ summary_json[:viralseq_version] = ViralSeq::VERSION
150
+ summary_json[:runtime] = Time.now.to_s
135
151
 
136
- if params[:platform_format]
137
- $platform_sequencing_length = params[:platform_format]
138
- else
139
- $platform_sequencing_length = 300
140
- end
152
+ primer[:region] ? region = primer[:region] : region = "region"
153
+ summary_json[:primer_set_name] = region
141
154
 
142
- primers = params[:primer_pairs]
143
- if primers.empty?
144
- ViralSeq::TcsCore.log_and_abort log, "No primer information. Script terminated."
145
- end
155
+ cdna_primer = primer[:cdna]
156
+ forward_primer = primer[:forward]
146
157
 
158
+ export_raw = primer[:export_raw]
159
+ limit_raw = primer[:limit_raw]
147
160
 
148
- primers.each do |primer|
149
- summary_json = {}
150
- summary_json[:warnings] = []
151
- summary_json[:tcs_version] = ViralSeq::TCS_VERSION
152
- summary_json[:viralseq_version] = ViralSeq::VERSION
153
- summary_json[:runtime] = Time.now.to_s
161
+ unless cdna_primer
162
+ log.puts Time.now.to_s + "\t" + region + " does not have cDNA primer sequence. #{region} skipped."
163
+ end
164
+ unless forward_primer
165
+ log.puts Time.now.to_s + "\t" + region + " does not have forward primer sequence. #{region} skipped."
166
+ end
167
+ summary_json[:cdan_primer] = cdna_primer
168
+ summary_json[:forward_primer] = forward_primer
169
+
170
+ primer[:majority] ? majority_cut_off = primer[:majority] : majority_cut_off = 0
171
+ summary_json[:majority_cut_off] = majority_cut_off
172
+
173
+ summary_json[:total_raw_sequence] = raw_sequence_number
174
+
175
+ log.puts Time.now.to_s + "\t" + "Porcessing #{region}..."
176
+
177
+ # filter R1
178
+ log.puts Time.now.to_s + "\t" + "filtering R1..."
179
+ filter_r1 = ViralSeq::TcsCore.filter_r1(r1_fastq_sh, forward_primer)
180
+ r1_passed_seq = filter_r1[:r1_passed_seq]
181
+ log.puts Time.now.to_s + "\t" + "R1 filtered: #{r1_passed_seq.size.to_s}"
182
+ summary_json[:r1_filtered_raw] = r1_passed_seq.size
183
+
184
+ # filter R2
185
+ log.puts Time.now.to_s + "\t" + "filtering R2..."
186
+ filter_r2 = ViralSeq::TcsCore.filter_r2(r2_fastq_sh, cdna_primer)
187
+ r2_passed_seq = filter_r2[:r2_passed_seq]
188
+ pid_length = filter_r2[:pid_length]
189
+ log.puts Time.now.to_s + "\t" + "R2 filtered: #{r2_passed_seq.size.to_s}"
190
+ summary_json[:r2_filtered_raw] = r2_passed_seq.size
191
+
192
+ # pair-end
193
+ log.puts Time.now.to_s + "\t" + "Pairing R1 and R2 seqs..."
194
+ id = {} # hash for :sequence_tag => primer_id
195
+ bio_r2 = {} # hash for :sequence_tag => primer_trimmed_r2_sequence
196
+ bio_r1 = {} # hash for :sequence_tag => primer_trimmed_r1_sequence
197
+ common_keys = r1_passed_seq.keys & r2_passed_seq.keys
198
+ paired_seq_number = common_keys.size
199
+ log.puts Time.now.to_s + "\t" + "Paired raw sequences are : #{paired_seq_number.to_s}"
200
+ summary_json[:paired_raw_sequence] = paired_seq_number
201
+ if paired_seq_number < raw_sequence_number * 0.001
202
+ summary_json[:warnings] <<
203
+ "WARNING: Filtered raw sequneces less than 0.1% of the total raw sequences. Possible contamination."
204
+ end
154
205
 
155
- primer[:region] ? region = primer[:region] : region = "region"
156
- summary_json[:primer_set_name] = region
206
+ common_keys.each do |seqtag|
207
+ r1_seq = r1_passed_seq[seqtag]
208
+ r2_seq = r2_passed_seq[seqtag]
209
+ pid = r2_seq[0, pid_length]
210
+ id[seqtag] = pid
211
+ bio_r2[seqtag] = r2_seq[filter_r2[:reverse_starting_number]..-2]
212
+ bio_r1[seqtag] = r1_seq[filter_r1[:forward_starting_number]..-2]
213
+ end
157
214
 
158
- cdna_primer = primer[:cdna]
159
- forward_primer = primer[:forward]
215
+ # TCS cut-off
216
+ log.puts Time.now.to_s + "\t" + "Calculate consensus cutoff...."
160
217
 
161
- export_raw = primer[:export_raw]
162
- limit_raw = primer[:limit_raw]
218
+ primer_id_list = id.values
219
+ primer_id_count = primer_id_list.count_freq
220
+ primer_id_dis = primer_id_count.values.count_freq
163
221
 
164
- unless cdna_primer
165
- log.puts Time.now.to_s + "\t" + region + " does not have cDNA primer sequence. #{region} skipped."
166
- end
167
- unless forward_primer
168
- log.puts Time.now.to_s + "\t" + region + " does not have forward primer sequence. #{region} skipped."
169
- end
170
- summary_json[:cdan_primer] = cdna_primer
171
- summary_json[:forward_primer] = forward_primer
172
-
173
- primer[:majority] ? majority_cut_off = primer[:majority] : majority_cut_off = 0
174
- summary_json[:majority_cut_off] = majority_cut_off
175
-
176
- summary_json[:total_raw_sequence] = raw_sequence_number
177
-
178
- log.puts Time.now.to_s + "\t" + "Porcessing #{region}..."
179
-
180
- # filter R1
181
- log.puts Time.now.to_s + "\t" + "filtering R1..."
182
- filter_r1 = ViralSeq::TcsCore.filter_r1(r1_fastq_sh, forward_primer)
183
- r1_passed_seq = filter_r1[:r1_passed_seq]
184
- log.puts Time.now.to_s + "\t" + "R1 filtered: #{r1_passed_seq.size.to_s}"
185
- summary_json[:r1_filtered_raw] = r1_passed_seq.size
186
-
187
- # filter R2
188
- log.puts Time.now.to_s + "\t" + "filtering R2..."
189
- filter_r2 = ViralSeq::TcsCore.filter_r2(r2_fastq_sh, cdna_primer)
190
- r2_passed_seq = filter_r2[:r2_passed_seq]
191
- pid_length = filter_r2[:pid_length]
192
- log.puts Time.now.to_s + "\t" + "R2 filtered: #{r2_passed_seq.size.to_s}"
193
- summary_json[:r2_filtered_raw] = r2_passed_seq.size
194
-
195
- # pair-end
196
- log.puts Time.now.to_s + "\t" + "Pairing R1 and R2 seqs..."
197
- id = {} # hash for :sequence_tag => primer_id
198
- bio_r2 = {} # hash for :sequence_tag => primer_trimmed_r2_sequence
199
- bio_r1 = {} # hash for :sequence_tag => primer_trimmed_r1_sequence
200
- common_keys = r1_passed_seq.keys & r2_passed_seq.keys
201
- paired_seq_number = common_keys.size
202
- log.puts Time.now.to_s + "\t" + "Paired raw sequences are : #{paired_seq_number.to_s}"
203
- summary_json[:paired_raw_sequence] = paired_seq_number
204
- if paired_seq_number < raw_sequence_number * 0.001
205
- summary_json[:warnings] <<
206
- "WARNING: Filtered raw sequneces less than 0.1% of the total raw sequences. Possible contamination."
207
- end
222
+ # calculate distinct_to_raw
223
+ distinct_to_raw = (primer_id_count.size/primer_id_list.size.to_f).round(3)
224
+ summary_json[:distinct_to_raw] = distinct_to_raw
208
225
 
209
- common_keys.each do |seqtag|
210
- r1_seq = r1_passed_seq[seqtag]
211
- r2_seq = r2_passed_seq[seqtag]
212
- pid = r2_seq[0, pid_length]
213
- id[seqtag] = pid
214
- bio_r2[seqtag] = r2_seq[filter_r2[:reverse_starting_number]..-2]
215
- bio_r1[seqtag] = r1_seq[filter_r1[:forward_starting_number]..-2]
216
- end
226
+ if primer_id_dis.keys.size < 5
227
+ log.puts Time.now.to_s + "\t" + "Less than 5 Primer IDs detected. Region #{region} aborted."
228
+ next
229
+ end
217
230
 
218
- # TCS cut-off
219
- log.puts Time.now.to_s + "\t" + "Calculate consensus cutoff...."
231
+ max_id = primer_id_dis.keys.sort[-5..-1].mean
232
+ consensus_cutoff = ViralSeq::TcsCore.calculate_cut_off(max_id,error_rate)
233
+ log.puts Time.now.to_s + "\t" + "Consensus cut-off is #{consensus_cutoff.to_s}"
234
+ summary_json[:consensus_cutoff] = consensus_cutoff
235
+ summary_json[:length_of_pid] = pid_length
236
+ log.puts Time.now.to_s + "\t" + "Creating consensus..."
237
+
238
+ # Primer ID over the cut-off
239
+ primer_id_count_over_n = []
240
+ primer_id_count.each do |primer_id,count|
241
+ primer_id_count_over_n << primer_id if count > consensus_cutoff
242
+ end
243
+ pid_to_process = primer_id_count_over_n.size
244
+ log.puts Time.now.to_s + "\t" + "Number of consensus to process: #{pid_to_process.to_s}"
245
+ summary_json[:total_tcs_with_ambiguities] = pid_to_process
220
246
 
221
- primer_id_list = id.values
222
- primer_id_count = primer_id_list.count_freq
223
- primer_id_dis = primer_id_count.values.count_freq
247
+ # setup output path
248
+ out_dir_set = File.join(indir, region)
249
+ Dir.mkdir(out_dir_set) unless File.directory?(out_dir_set)
250
+ out_dir_consensus = File.join(out_dir_set, "consensus")
251
+ Dir.mkdir(out_dir_consensus) unless File.directory?(out_dir_consensus)
224
252
 
225
- # calculate distinct_to_raw
226
- distinct_to_raw = (primer_id_count.size/primer_id_list.size.to_f).round(3)
227
- summary_json[:distinct_to_raw] = distinct_to_raw
253
+ outfile_r1 = File.join(out_dir_consensus, 'r1.fasta')
254
+ outfile_r2 = File.join(out_dir_consensus, 'r2.fasta')
255
+ outfile_log = File.join(out_dir_set, 'log.json')
228
256
 
229
- if primer_id_dis.keys.size < 5
230
- log.puts Time.now.to_s + "\t" + "Less than 5 Primer IDs detected. Region #{region} aborted."
231
- next
232
- end
257
+ # if export_raw is true, create dir for raw sequence
258
+ if export_raw
259
+ out_dir_raw = File.join(out_dir_set, "raw")
260
+ Dir.mkdir(out_dir_raw) unless File.directory?(out_dir_raw)
261
+ outfile_raw_r1 = File.join(out_dir_raw, 'r1.raw.fasta')
262
+ outfile_raw_r2 = File.join(out_dir_raw, 'r2.raw.fasta')
263
+ raw_r1_f = File.open(outfile_raw_r1, 'w')
264
+ raw_r2_f = File.open(outfile_raw_r2, 'w')
265
+
266
+ if limit_raw
267
+ raw_keys = bio_r1.keys.sample(limit_raw.to_i)
268
+ else
269
+ raw_keys = bio_r1.keys
270
+ end
233
271
 
234
- max_id = primer_id_dis.keys.sort[-5..-1].mean
235
- consensus_cutoff = ViralSeq::TcsCore.calculate_cut_off(max_id,error_rate)
236
- log.puts Time.now.to_s + "\t" + "Consensus cut-off is #{consensus_cutoff.to_s}"
237
- summary_json[:consensus_cutoff] = consensus_cutoff
238
- summary_json[:length_of_pid] = pid_length
239
- log.puts Time.now.to_s + "\t" + "Creating consensus..."
240
-
241
- # Primer ID over the cut-off
242
- primer_id_count_over_n = []
243
- primer_id_count.each do |primer_id,count|
244
- primer_id_count_over_n << primer_id if count > consensus_cutoff
245
- end
246
- pid_to_process = primer_id_count_over_n.size
247
- log.puts Time.now.to_s + "\t" + "Number of consensus to process: #{pid_to_process.to_s}"
248
- summary_json[:total_tcs_with_ambiguities] = pid_to_process
249
-
250
- # setup output path
251
- out_dir_set = File.join(indir, region)
252
- Dir.mkdir(out_dir_set) unless File.directory?(out_dir_set)
253
- out_dir_consensus = File.join(out_dir_set, "consensus")
254
- Dir.mkdir(out_dir_consensus) unless File.directory?(out_dir_consensus)
255
-
256
- outfile_r1 = File.join(out_dir_consensus, 'r1.fasta')
257
- outfile_r2 = File.join(out_dir_consensus, 'r2.fasta')
258
- outfile_log = File.join(out_dir_set, 'log.json')
259
-
260
- # if export_raw is true, create dir for raw sequence
261
- if export_raw
262
- out_dir_raw = File.join(out_dir_set, "raw")
263
- Dir.mkdir(out_dir_raw) unless File.directory?(out_dir_raw)
264
- outfile_raw_r1 = File.join(out_dir_raw, 'r1.raw.fasta')
265
- outfile_raw_r2 = File.join(out_dir_raw, 'r2.raw.fasta')
266
- raw_r1_f = File.open(outfile_raw_r1, 'w')
267
- raw_r2_f = File.open(outfile_raw_r2, 'w')
268
-
269
- if limit_raw
270
- raw_keys = bio_r1.keys.sample(limit_raw.to_i)
271
- else
272
- raw_keys = bio_r1.keys
273
- end
272
+ raw_keys.each do |k|
273
+ raw_r1_f.puts k + "_r1"
274
+ raw_r2_f.puts k + "_r2"
275
+ raw_r1_f.puts bio_r1[k]
276
+ raw_r2_f.puts bio_r2[k].rc
277
+ end
274
278
 
275
- raw_keys.each do |k|
276
- raw_r1_f.puts k + "_r1"
277
- raw_r2_f.puts k + "_r2"
278
- raw_r1_f.puts bio_r1[k]
279
- raw_r2_f.puts bio_r2[k].rc
279
+ raw_r1_f.close
280
+ raw_r2_f.close
280
281
  end
281
282
 
282
- raw_r1_f.close
283
- raw_r2_f.close
284
- end
283
+ # create TCS
285
284
 
286
- # create TCS
285
+ pid_seqtag_hash = {}
286
+ id.each do |name, pid|
287
+ if pid_seqtag_hash[pid]
288
+ pid_seqtag_hash[pid] << name
289
+ else
290
+ pid_seqtag_hash[pid] = []
291
+ pid_seqtag_hash[pid] << name
292
+ end
293
+ end
287
294
 
288
- pid_seqtag_hash = {}
289
- id.each do |name, pid|
290
- if pid_seqtag_hash[pid]
291
- pid_seqtag_hash[pid] << name
295
+ consensus = {}
296
+ r1_temp = {}
297
+ r2_temp = {}
298
+ m = 0
299
+ primer_id_count_over_n.each do |primer_id|
300
+ m += 1
301
+ log.puts Time.now.to_s + "\t" + "Now processing number #{m}" if m%100 == 0
302
+ seq_with_same_primer_id = pid_seqtag_hash[primer_id]
303
+ r1_sub_seq = []
304
+ r2_sub_seq = []
305
+ seq_with_same_primer_id.each do |seq_name|
306
+ r1_sub_seq << bio_r1[seq_name]
307
+ r2_sub_seq << bio_r2[seq_name]
308
+ end
309
+ #consensus name including the Primer ID and number of raw sequences of that Primer ID, library name and setname.
310
+ consensus_name = ">" + primer_id + "_" + seq_with_same_primer_id.size.to_s + "_" + libname + "_" + region
311
+ r1_consensus = ViralSeq::SeqHash.array(r1_sub_seq).consensus(majority_cut_off)
312
+ r2_consensus = ViralSeq::SeqHash.array(r2_sub_seq).consensus(majority_cut_off)
313
+
314
+ # hide the following two lines if allowing sequence to have ambiguities.
315
+ next if r1_consensus =~ /[^ATCG]/
316
+ next if r2_consensus =~ /[^ATCG]/
317
+
318
+ # reverse complement sequence of the R2 region
319
+ r2_consensus = r2_consensus.rc
320
+ consensus[consensus_name] = [r1_consensus, r2_consensus]
321
+ r1_temp[consensus_name] = r1_consensus
322
+ r2_temp[consensus_name] = r2_consensus
323
+ end
324
+ r1_temp_sh = ViralSeq::SeqHash.new(r1_temp)
325
+ r2_temp_sh = ViralSeq::SeqHash.new(r2_temp)
326
+
327
+ # filter consensus sequences for residual offspring PIDs
328
+ consensus_filtered = {}
329
+ consensus_number_temp = consensus.size
330
+ max_pid_comb = 4**pid_length
331
+ if consensus_number_temp < 0.003*max_pid_comb
332
+ log.puts Time.now.to_s + "\t" + "Applying PID post TCS filter..."
333
+ r1_consensus_filtered = r1_temp_sh.filter_similar_pid.dna_hash
334
+ r2_consensus_filtered = r2_temp_sh.filter_similar_pid.dna_hash
335
+ common_pid = r1_consensus_filtered.keys & r2_consensus_filtered.keys
336
+ common_pid.each do |pid|
337
+ consensus_filtered[pid] = [r1_consensus_filtered[pid], r2_consensus_filtered[pid]]
338
+ end
292
339
  else
293
- pid_seqtag_hash[pid] = []
294
- pid_seqtag_hash[pid] << name
340
+ consensus_filtered = consensus
295
341
  end
296
- end
297
-
298
- consensus = {}
299
- r1_temp = {}
300
- r2_temp = {}
301
- m = 0
302
- primer_id_count_over_n.each do |primer_id|
303
- m += 1
304
- log.puts Time.now.to_s + "\t" + "Now processing number #{m}" if m%100 == 0
305
- seq_with_same_primer_id = pid_seqtag_hash[primer_id]
306
- r1_sub_seq = []
307
- r2_sub_seq = []
308
- seq_with_same_primer_id.each do |seq_name|
309
- r1_sub_seq << bio_r1[seq_name]
310
- r2_sub_seq << bio_r2[seq_name]
342
+ n_con = consensus_filtered.size
343
+ log.puts Time.now.to_s + "\t" + "Number of consensus sequences: " + n_con.to_s
344
+ summary_json[:total_tcs] = n_con
345
+ summary_json[:resampling_param] = (n_con/pid_to_process.to_f).round(3)
346
+
347
+ log.puts Time.now.to_s + "\t" + "Writing R1 and R2 files..."
348
+ # r1_file output
349
+ f1 = File.open(outfile_r1, 'w')
350
+ f2 = File.open(outfile_r2, 'w')
351
+ primer_id_in_use = {}
352
+ if n_con > 0
353
+ r1_seq_length = consensus_filtered.values[0][0].size
354
+ r2_seq_length = consensus_filtered.values[0][1].size
355
+ else
356
+ r1_seq_length = "n/a"
357
+ r2_seq_length = "n/a"
311
358
  end
312
- #consensus name including the Primer ID and number of raw sequences of that Primer ID, library name and setname.
313
- consensus_name = ">" + primer_id + "_" + seq_with_same_primer_id.size.to_s + "_" + libname + "_" + region
314
- r1_consensus = ViralSeq::SeqHash.array(r1_sub_seq).consensus(majority_cut_off)
315
- r2_consensus = ViralSeq::SeqHash.array(r2_sub_seq).consensus(majority_cut_off)
316
-
317
- # hide the following two lines if allowing sequence to have ambiguities.
318
- next if r1_consensus =~ /[^ATCG]/
319
- next if r2_consensus =~ /[^ATCG]/
320
-
321
- # reverse complement sequence of the R2 region
322
- r2_consensus = r2_consensus.rc
323
- consensus[consensus_name] = [r1_consensus, r2_consensus]
324
- r1_temp[consensus_name] = r1_consensus
325
- r2_temp[consensus_name] = r2_consensus
326
- end
327
- r1_temp_sh = ViralSeq::SeqHash.new(r1_temp)
328
- r2_temp_sh = ViralSeq::SeqHash.new(r2_temp)
329
-
330
- # filter consensus sequences for residual offspring PIDs
331
- consensus_filtered = {}
332
- consensus_number_temp = consensus.size
333
- max_pid_comb = 4**pid_length
334
- if consensus_number_temp < 0.003*max_pid_comb
335
- log.puts Time.now.to_s + "\t" + "Applying PID post TCS filter..."
336
- r1_consensus_filtered = r1_temp_sh.filter_similar_pid.dna_hash
337
- r2_consensus_filtered = r2_temp_sh.filter_similar_pid.dna_hash
338
- common_pid = r1_consensus_filtered.keys & r2_consensus_filtered.keys
339
- common_pid.each do |pid|
340
- consensus_filtered[pid] = [r1_consensus_filtered[pid], r2_consensus_filtered[pid]]
359
+ log.puts Time.now.to_s + "\t" + "R1 sequence #{r1_seq_length} bp"
360
+ log.puts Time.now.to_s + "\t" + "R1 sequence #{r2_seq_length} bp"
361
+ consensus_filtered.each do |seq_name,seq|
362
+ f1.print seq_name + "_r1\n" + seq[0] + "\n"
363
+ f2.print seq_name + "_r2\n" + seq[1] + "\n"
364
+ primer_id_in_use[seq_name.split("_")[0][1..-1]] = seq_name.split("_")[1].to_i
341
365
  end
342
- else
343
- consensus_filtered = consensus
344
- end
345
- n_con = consensus_filtered.size
346
- log.puts Time.now.to_s + "\t" + "Number of consensus sequences: " + n_con.to_s
347
- summary_json[:total_tcs] = n_con
348
- summary_json[:resampling_param] = (n_con/pid_to_process.to_f).round(3)
349
-
350
- log.puts Time.now.to_s + "\t" + "Writing R1 and R2 files..."
351
- # r1_file output
352
- f1 = File.open(outfile_r1, 'w')
353
- f2 = File.open(outfile_r2, 'w')
354
- primer_id_in_use = {}
355
- if n_con > 0
356
- r1_seq_length = consensus_filtered.values[0][0].size
357
- r2_seq_length = consensus_filtered.values[0][1].size
358
- else
359
- next
360
- end
361
- log.puts Time.now.to_s + "\t" + "R1 sequence #{r1_seq_length} bp"
362
- log.puts Time.now.to_s + "\t" + "R1 sequence #{r2_seq_length} bp"
363
- consensus_filtered.each do |seq_name,seq|
364
- f1.print seq_name + "_r1\n" + seq[0] + "\n"
365
- f2.print seq_name + "_r2\n" + seq[1] + "\n"
366
- primer_id_in_use[seq_name.split("_")[0][1..-1]] = seq_name.split("_")[1].to_i
367
- end
368
- f1.close
369
- f2.close
370
-
371
- # Primer ID distribution in .json file
372
- out_pid_json = File.join(out_dir_set, 'primer_id.json')
373
- pid_json = {}
374
- pid_json[:primer_id_in_use] = Hash[*(primer_id_in_use.sort_by {|k, v| [-v,k]}.flatten)]
375
- pid_json[:primer_id_distribution] = Hash[*(primer_id_dis.sort_by{|k,v| k}.flatten)]
376
- pid_json[:primer_id_frequency] = Hash[*(primer_id_count.sort_by {|k, v| [-v,k]}.flatten)]
377
- File.open(out_pid_json, 'w') do |f|
378
- f.puts JSON.pretty_generate(pid_json)
379
- end
380
-
381
- # start end-join
382
- def end_join(dir, option, overlap)
383
- shp = ViralSeq::SeqHashPair.fa(dir)
384
- case option
385
- when 1
386
- joined_sh = shp.join1()
387
- when 2
388
- joined_sh = shp.join1(overlap)
389
- when 3
390
- joined_sh = shp.join2
391
- when 4
392
- joined_sh = shp.join2(model: :indiv)
366
+ f1.close
367
+ f2.close
368
+
369
+ # Primer ID distribution in .json file
370
+ out_pid_json = File.join(out_dir_set, 'primer_id.json')
371
+ pid_json = {}
372
+ pid_json[:primer_id_in_use] = {}
373
+ primer_id_in_use.sort_by {|k, v| [-v,k]}.each do |k,v|
374
+ pid_json[:primer_id_in_use][k] = v
393
375
  end
394
- return joined_sh
395
- end
396
-
397
- if primer[:end_join]
398
- log.puts Time.now.to_s + "\t" + "Start end-pairing for TCS..."
399
- shp = ViralSeq::SeqHashPair.fa(out_dir_consensus)
400
- joined_sh = end_join(out_dir_consensus, primer[:end_join_option], primer[:overlap])
401
- log.puts Time.now.to_s + "\t" + "Paired TCS number: " + joined_sh.size.to_s
402
376
 
403
- summary_json[:combined_tcs] = joined_sh.size
404
-
405
- if export_raw
406
- joined_sh_raw = end_join(out_dir_raw, primer[:end_join_option], primer[:overlap])
377
+ pid_json[:primer_id_distribution] = {}
378
+ primer_id_dis.sort_by{|k,v| k}.each do |k,v|
379
+ pid_json[:primer_id_distribution][k] = v
407
380
  end
408
381
 
409
- else
410
- File.open(outfile_log, "w") do |f|
411
- f.puts JSON.pretty_generate(summary_json)
382
+ pid_json[:primer_id_frequency] = {}
383
+ primer_id_count.sort_by {|k,v| [-v,k]}.each do |k,v|
384
+ pid_json[:primer_id_frequency][k] = v
412
385
  end
413
- next
414
- end
415
386
 
416
- if primer[:TCS_QC]
417
- ref_start = primer[:ref_start]
418
- ref_end = primer[:ref_end]
419
- ref_genome = primer[:ref_genome].to_sym
420
- indel = primer[:indel]
421
- if ref_start == 0
422
- ref_start = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
423
- end
424
- if ref_end == 0
425
- ref_end = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
387
+ File.open(out_pid_json, 'w') do |f|
388
+ f.puts JSON.pretty_generate(pid_json)
426
389
  end
427
- if primer[:end_join_option] == 1 and primer[:overlap] == 0
428
- r1_sh = ViralSeq::SeqHash.fa(outfile_r1)
429
- r2_sh = ViralSeq::SeqHash.fa(outfile_r2)
430
- r1_sh = r1_sh.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
431
- r2_sh = r2_sh.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
432
- new_r1_seq = r1_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
433
- new_r2_seq = r2_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
434
- joined_seq = {}
435
- new_r1_seq.each do |seq_name, seq|
436
- next unless seq
437
- next unless new_r2_seq[seq_name]
438
- joined_seq[seq_name] = seq + new_r2_seq[seq_name]
390
+
391
+ # start end-join
392
+ def end_join(dir, option, overlap)
393
+ shp = ViralSeq::SeqHashPair.fa(dir)
394
+ case option
395
+ when 1
396
+ joined_sh = shp.join1()
397
+ when 2
398
+ joined_sh = shp.join1(overlap)
399
+ when 3
400
+ joined_sh = shp.join2
401
+ when 4
402
+ joined_sh = shp.join2(model: :indiv)
439
403
  end
440
- joined_sh = ViralSeq::SeqHash.new(joined_seq)
404
+ return joined_sh
405
+ end
406
+
407
+ if primer[:end_join]
408
+ log.puts Time.now.to_s + "\t" + "Start end-pairing for TCS..."
409
+ shp = ViralSeq::SeqHashPair.fa(out_dir_consensus)
410
+ joined_sh = end_join(out_dir_consensus, primer[:end_join_option], primer[:overlap])
411
+ log.puts Time.now.to_s + "\t" + "Paired TCS number: " + joined_sh.size.to_s
412
+
413
+ summary_json[:combined_tcs] = joined_sh.size
441
414
 
442
415
  if export_raw
443
- r1_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r1)
444
- r2_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r2)
445
- r1_sh_raw = r1_sh_raw.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
446
- r2_sh_raw = r2_sh_raw.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
447
- new_r1_seq_raw = r1_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
448
- new_r2_seq_raw = r2_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
449
- joined_seq_raw = {}
450
- new_r1_seq_raw.each do |seq_name, seq|
451
- next unless seq
452
- next unless new_r2_seq_raw[seq_name]
453
- joined_seq_raw[seq_name] = seq + new_r2_seq_raw[seq_name]
454
- end
455
- joined_sh_raw = ViralSeq::SeqHash.new(joined_seq_raw)
416
+ joined_sh_raw = end_join(out_dir_raw, primer[:end_join_option], primer[:overlap])
456
417
  end
457
- else
458
- joined_sh = joined_sh.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
459
418
 
460
- if export_raw
461
- joined_sh_raw = joined_sh_raw.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
419
+ if primer[:TCS_QC]
420
+ ref_start = primer[:ref_start]
421
+ ref_end = primer[:ref_end]
422
+ ref_genome = primer[:ref_genome].to_sym
423
+ indel = primer[:indel]
424
+ if ref_start == 0
425
+ ref_start = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
426
+ end
427
+ if ref_end == 0
428
+ ref_end = 0..(ViralSeq::RefSeq.get(ref_genome).size - 1)
429
+ end
430
+ if primer[:end_join_option] == 1
431
+ r1_sh = ViralSeq::SeqHash.fa(outfile_r1)
432
+ r2_sh = ViralSeq::SeqHash.fa(outfile_r2)
433
+ r1_sh = r1_sh.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
434
+ r2_sh = r2_sh.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
435
+ new_r1_seq = r1_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
436
+ new_r2_seq = r2_sh.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
437
+ joined_seq = {}
438
+ new_r1_seq.each do |seq_name, seq|
439
+ next unless seq
440
+ next unless new_r2_seq[seq_name]
441
+ joined_seq[seq_name] = seq + new_r2_seq[seq_name]
442
+ end
443
+ joined_sh = ViralSeq::SeqHash.new(joined_seq)
444
+
445
+ if export_raw
446
+ r1_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r1)
447
+ r2_sh_raw = ViralSeq::SeqHash.fa(outfile_raw_r2)
448
+ r1_sh_raw = r1_sh_raw.hiv_seq_qc(ref_start, (0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), indel, ref_genome)
449
+ r2_sh_raw = r2_sh_raw.hiv_seq_qc((0..(ViralSeq::RefSeq.get(ref_genome).size - 1)), ref_end, indel, ref_genome)
450
+ new_r1_seq_raw = r1_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
451
+ new_r2_seq_raw = r2_sh_raw.dna_hash.each_with_object({}) {|(k, v), h| h[k[0..-4]] = v}
452
+ joined_seq_raw = {}
453
+ new_r1_seq_raw.each do |seq_name, seq|
454
+ next unless seq
455
+ next unless new_r2_seq_raw[seq_name]
456
+ joined_seq_raw[seq_name] = seq + new_r2_seq_raw[seq_name]
457
+ end
458
+ joined_sh_raw = ViralSeq::SeqHash.new(joined_seq_raw)
459
+ end
460
+ else
461
+ joined_sh = joined_sh.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
462
+
463
+ if export_raw
464
+ joined_sh_raw = joined_sh_raw.hiv_seq_qc(ref_start, ref_end, indel, ref_genome)
465
+ end
466
+ end
467
+
468
+ log.puts Time.now.to_s + "\t" + "Paired TCS number after QC based on reference genome: " + joined_sh.size.to_s
469
+ summary_json[:combined_tcs_after_qc] = joined_sh.size
470
+ if primer[:trim]
471
+ trim_start = primer[:trim_ref_start]
472
+ trim_end = primer[:trim_ref_end]
473
+ trim_ref = primer[:trim_ref].to_sym
474
+ joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
475
+ if export_raw
476
+ joined_sh_raw = joined_sh_raw.trim(trim_start, trim_end, trim_ref)
477
+ end
478
+ end
462
479
  end
463
- end
464
480
 
465
- log.puts Time.now.to_s + "\t" + "Paired TCS number after QC based on reference genome: " + joined_sh.size.to_s
466
- summary_json[:combined_tcs_after_qc] = joined_sh.size
467
- if primer[:trim]
468
- trim_start = primer[:trim_ref_start]
469
- trim_end = primer[:trim_ref_end]
470
- trim_ref = primer[:trim_ref].to_sym
471
- joined_sh = joined_sh.trim(trim_start, trim_end, trim_ref)
481
+ joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
472
482
  if export_raw
473
- joined_sh_raw = joined_sh_raw.trim(trim_start, trim_end, trim_ref)
483
+ joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
474
484
  end
485
+
475
486
  end
476
487
 
477
- joined_sh.write_nt_fa(File.join(out_dir_consensus, "combined.fasta"))
478
- if export_raw
479
- joined_sh_raw.write_nt_fa(File.join(out_dir_raw, "combined.raw.fasta"))
488
+ File.open(outfile_log, "w") do |f|
489
+ f.puts JSON.pretty_generate(summary_json)
480
490
  end
481
- end
482
491
 
483
- File.open(outfile_log, "w") do |f|
484
- f.puts JSON.pretty_generate(summary_json)
485
492
  end
486
- end
487
493
 
488
- unless options[:keep]
489
- log.puts Time.now.to_s + "\t" + "Removing raw sequence files..."
490
- File.unlink(r1_f)
491
- File.unlink(r2_f)
494
+ unless options[:keep]
495
+ log.puts Time.now.to_s + "\t" + "Removing raw sequence files..."
496
+ File.unlink(r1_f)
497
+ File.unlink(r2_f)
498
+ end
499
+ log.puts Time.now.to_s + "\t" + "TCS pipeline successfuly executed."
500
+ log.close
501
+ puts "DONE!"
502
+ rescue => e
503
+ puts "`tcs` pipeline run with errors: " + e.message.red
504
+ puts "`tcs` pipeline aborted.".red.bold
505
+ log.puts Time.now.to_s + "\t" + e.full_message
506
+ log.puts Time.now.to_s + "\tAborted."
507
+ log.close
508
+ error_hash = {}
509
+ error_hash[:directory] = indir
510
+ error_hash[:tcs_version] = ViralSeq::TCS_VERSION
511
+ error_hash[:viralSeq_version] = ViralSeq::VERSION
512
+ error_hash[:time] = Time.now
513
+ error_hash[:error] = e.full_message
514
+ File.open(File.join(indir, ".tcs_error"), 'w') do |f|
515
+ f.puts JSON.pretty_generate([error_hash])
516
+ end
517
+ master_error_file = File.join(File.dirname(indir), ".tcs_error")
518
+ master_errors = []
519
+ if File.exist? master_error_file
520
+ master_errors << JSON.parse(File.read(master_error_file), symbolize_names: true)
521
+ end
522
+ master_errors << error_hash
523
+ File.open(master_error_file, 'w') do |f|
524
+ f.puts JSON.pretty_generate(master_errors)
525
+ end
492
526
  end
493
- log.puts Time.now.to_s + "\t" + "TCS pipeline successfuly exercuted."
494
- log.close
495
- puts "DONE!"
@@ -397,7 +397,9 @@ module ViralSeq
397
397
  (0..(seq_length - 1)).each do |position|
398
398
  all_base = []
399
399
  seq_array.each do |seq|
400
- all_base << seq[position]
400
+ if seq[position]
401
+ all_base << seq[position]
402
+ end
401
403
  end
402
404
  base_count = all_base.count_freq
403
405
  max_base_list = []
@@ -305,7 +305,7 @@ module ViralSeq
305
305
  end
306
306
 
307
307
  def general_filter(seq)
308
- if seq.size < $platform_sequencing_length
308
+ if seq.size < ($platform_sequencing_length - 1)
309
309
  return false
310
310
  elsif seq[1..-2] =~ /N/ # sequences with ambiguities except the 1st and last position removed
311
311
  return false
@@ -8,7 +8,7 @@ module ViralSeq
8
8
  "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCACTATAGGCTGTACTGTCCATTTATC",
9
9
  :forward=>
10
10
  "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGGCCATTGACAGAAGAAAAAATAAAAGC",
11
- :majority=>0.5,
11
+ :majority=>0,
12
12
  :end_join=>true,
13
13
  :end_join_option=>1,
14
14
  :overlap=>0,
@@ -23,7 +23,7 @@ module ViralSeq
23
23
  "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNCAGTTTAACTTTTGGGCCATCCATTCC",
24
24
  :forward=>
25
25
  "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTCAGAGCAGACCAGAGCCAACAGCCCCA",
26
- :majority=>0.5,
26
+ :majority=>0,
27
27
  :end_join=>true,
28
28
  :end_join_option=>3,
29
29
  :TCS_QC=>true,
@@ -39,7 +39,7 @@ module ViralSeq
39
39
  :cdna=>
40
40
  "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNATCGAATACTGCCATTTGTACTGC",
41
41
  :forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
42
- :majority=>0.5,
42
+ :majority=>0,
43
43
  :end_join=>true,
44
44
  :end_join_option=>3,
45
45
  :overlap=>171,
@@ -54,7 +54,7 @@ module ViralSeq
54
54
  "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCCATTTTGCTYTAYTRABVTTACAATRTGC",
55
55
  :forward=>
56
56
  "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTTATGGGATCAAAGCCTAAAGCCATGTGTA",
57
- :majority=>0.5,
57
+ :majority=>0,
58
58
  :end_join=>true,
59
59
  :end_join_option=>1,
60
60
  :overlap=>0,
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.2.1"
6
- TCS_VERSION = "2.3.1"
5
+ VERSION = "1.2.7"
6
+ TCS_VERSION = "2.3.6"
7
7
  end
metadata CHANGED
@@ -1,15 +1,15 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.2.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
8
8
  - Michael Clark
9
- autorequire:
9
+ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-05-17 00:00:00.000000000 Z
12
+ date: 2021-07-15 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -214,7 +214,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
214
214
  requirements:
215
215
  - R required for some functions
216
216
  rubygems_version: 3.2.2
217
- signing_key:
217
+ signing_key:
218
218
  specification_version: 4
219
219
  summary: A Ruby Gem containing bioinformatics tools for processing viral NGS data.
220
220
  test_files: []