viral_seq 1.6.0 → 1.6.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6bc062e2f207fd9e33d97885125b656c4b24ae8056221020bb611c191104109b
4
- data.tar.gz: a0086f02da866821c134a9433317b7016a429e37e775de65d434dbae1b7efd23
3
+ metadata.gz: 02d26d720fef0501d70b012d9919f932b23a21b1caabbf508a56fffa162c311b
4
+ data.tar.gz: '0681add2b2fa2ca7dedffeaaf43bd8b0e2b6200dd38633e2eeda58946ced8238'
5
5
  SHA512:
6
- metadata.gz: dc29bb6914196e4aa0fa6d7c2db25d4db1179f40a551dc0784534fe2225c857ef22663815d483412db1a715977574a01ab457a5d4913e38a7a9ac439b38795ab
7
- data.tar.gz: f42559bc07b75b2d9a4023af0313cc40e037697970c0ac4a6c34fd04938b5ab879ece19fd95d05237075292c38a93e315018c7e0c03bd0ee9ab8cf1716b894ed
6
+ metadata.gz: 301c188d736c9812006d30db8995fa7df683cc63443c1370de3d487dae77b88cfc60c8b674abc93b35f7457ced536a6e374433e5fbe0f423e4a7993ea4240ebc
7
+ data.tar.gz: 1118ab7b586da98bb2c3533f81c35a02f0efb1978c0b91e15b56218b05342ad83e73f96bd2df53f75e2e6cdd16c591582ebe864562a02d3dd78997678b706233
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.3.0)
4
+ viral_seq (1.6.2)
5
5
  colorize (~> 0.1)
6
6
  combine_pdf (~> 1.0, >= 1.0.0)
7
- muscle_bio (~> 0.4)
7
+ muscle_bio (~> 0.5)
8
8
  prawn (~> 2.3, >= 2.3.0)
9
9
  prawn-table (~> 0.2, >= 0.2.0)
10
10
 
@@ -15,7 +15,7 @@ GEM
15
15
  combine_pdf (1.0.21)
16
16
  ruby-rc4 (>= 0.1.5)
17
17
  diff-lcs (1.3)
18
- muscle_bio (0.4.0)
18
+ muscle_bio (0.5.0)
19
19
  pdf-core (0.9.0)
20
20
  prawn (2.4.0)
21
21
  pdf-core (~> 0.9.0)
data/README.md CHANGED
@@ -179,6 +179,23 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
179
179
 
180
180
  ## Updates
181
181
 
182
+ ### Version-1.6.4-07182022
183
+
184
+ 1. Included region "P17" in the default `tcs -d` pipeline setting. `tcs` pipeline updated to version 2.5.1.
185
+ 2. Loosen the locator params for the "V1V3" end region for rare alignment issues. Now the default "V1V3" region end with position 7205 to 7210 instead of 7208.
186
+ 3. `tcs_sdrm` now analyse "P17" region for pairwise diversity.
187
+
188
+ ### Version-1.6.3-02052022
189
+
190
+ 1. Updated on `ViralSeq::Muscle` module along with the update of `muscle` from version 3.8.1 to 5.1.
191
+ 2. Optimized the `locator` algorithm based on `muscle` v5.1.
192
+ 3. Optimized the `tcs_sdrm` pipeline based on `muscle` v5.1.
193
+
194
+ ### Version-1.6.1-02022022
195
+
196
+ 1. Fixed the `nav bar` in tcs_log html file.
197
+ 2. Fixed a typo in `tcs`.
198
+
182
199
  ### Version 1.6.0-01042022
183
200
 
184
201
  1. Update the `ViralSeq::TcsCore::detection_limit` with pre-calculated values to save processing time.
data/bin/tcs CHANGED
@@ -173,7 +173,7 @@ begin
173
173
  unless forward_primer
174
174
  log.puts Time.now.to_s + "\t" + region + " does not have forward primer sequence. #{region} skipped."
175
175
  end
176
- summary_json[:cdan_primer] = cdna_primer
176
+ summary_json[:cdna_primer] = cdna_primer
177
177
  summary_json[:forward_primer] = forward_primer
178
178
 
179
179
  primer[:majority] ? majority_cut_off = primer[:majority] : majority_cut_off = 0
data/bin/tcs_log CHANGED
@@ -613,7 +613,7 @@ html = '
613
613
  <body>
614
614
  <div style="display: flex; flex-direction: column; height: 100vh; width: 100vw; position: fixed; overflow: hidden;">
615
615
  <div style="display: flex; gap: 4px;">
616
- <div id="nav" class="card" style="overflow: auto; min-height: 100vh; text-align: left; margin-top: 0;">
616
+ <div id="nav" class="card" style="overflow: auto; height: 100vh; text-align: left; margin-top: 0;">
617
617
  <a href="https://primer-id.org" target="_BLANK">
618
618
  <h3 style="margin: 24px; font-weight: 600; color: #333 !important">TCS Log</h3>
619
619
  </a>
data/bin/tcs_sdrm CHANGED
@@ -172,6 +172,25 @@ libs.each do |lib|
172
172
  linkage_list += sdrm[1]
173
173
  aa_report_list += sdrm[2]
174
174
 
175
+ elsif seq_basename =~/P17/i
176
+ a3g_check = seqs.a3g
177
+ a3g_seqs = a3g_check[:a3g_seq]
178
+ a3g_filtered_seqs = a3g_check[:filtered_seq]
179
+ stop_codon_check = a3g_filtered_seqs.stop_codon(2)
180
+ stop_codon_seqs = stop_codon_check[:with_stop_codon]
181
+ filtered_seqs = stop_codon_check[:without_stop_codon]
182
+ poisson_minority_cutoff = filtered_seqs.pm
183
+ fdr_hash = filtered_seqs.fdr
184
+ summary_hash[:P17] = [
185
+ seqs.size.to_s,
186
+ a3g_seqs.size.to_s,
187
+ stop_codon_seqs.size.to_s,
188
+ filtered_seqs.size.to_s,
189
+ poisson_minority_cutoff.to_s
190
+ ].join(',')
191
+ next if filtered_seqs.size < 3
192
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
193
+
175
194
  elsif seq_basename =~/RT/i
176
195
  rt_seq1 = {}
177
196
  rt_seq2 = {}
@@ -229,7 +248,7 @@ libs.each do |lib|
229
248
  filtered_seq_files.each do |seq_file|
230
249
  filtered_sh = ViralSeq::SeqHash.fa(seq_file)
231
250
  next if filtered_sh.size < 3
232
- aligned_sh = filtered_sh.random_select(1000).align
251
+ aligned_sh = filtered_sh.random_select(1000).align(:Super5)
233
252
  aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
234
253
  end
235
254
 
@@ -249,7 +268,7 @@ libs.each do |lib|
249
268
  tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
250
269
  summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
251
270
  end
252
- [:PR, :RT, :IN, :V1V3].each do |regions|
271
+ [:PR, :RT, :IN, :V1V3, :P17].each do |regions|
253
272
  next unless summary_hash[regions]
254
273
  seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
255
274
  end
@@ -270,10 +289,13 @@ libs.each do |lib|
270
289
  tcs_RT = 0
271
290
  tcs_IN = 0
272
291
  tcs_V1V3 = 0
292
+ tcs_P17 = 0
273
293
  pi_RT = 0.0
274
294
  pi_V1V3 = 0.0
295
+ pi_P17 = 0.0
275
296
  dist20_RT = 0.0
276
297
  dist20_V1V3 = 0.0
298
+ dist20_P17 = 0.0
277
299
  summary_lines.each do |line|
278
300
  data = line.chomp.split(",")
279
301
  if data[0] == "PR"
@@ -288,6 +310,10 @@ libs.each do |lib|
288
310
  tcs_V1V3 = data[1].to_i
289
311
  pi_V1V3 = data[6].to_f
290
312
  dist20_V1V3 = data[7].to_f
313
+ elsif data[0] == "P17"
314
+ tcs_P17 = data[4].to_i
315
+ pi_P17 = data[6].to_f
316
+ dist20_P17 = data[7].to_f
291
317
  end
292
318
  end
293
319
 
@@ -323,9 +349,13 @@ libs.each do |lib|
323
349
  tcs_RT: tcs_RT,
324
350
  tcs_IN: tcs_IN,
325
351
  tcs_V1V3: tcs_V1V3,
352
+ tcs_P17: tcs_P17,
326
353
  pi_RT: pi_RT,
354
+ pi_V1V3: pi_V1V3,
355
+ pi_P17: pi_P17,
327
356
  dist20_RT: dist20_RT,
328
357
  dist20_V1V3: dist20_V1V3,
358
+ dist20_P17: dist20_P17,
329
359
  recency: recency,
330
360
  sdrm_PR: sdrm_PR,
331
361
  sdrm_RT: sdrm_RT,
data/docs/dr.json CHANGED
@@ -62,6 +62,21 @@
62
62
  "ref_end": 7208,
63
63
  "indel": true,
64
64
  "trim": false
65
+ },
66
+ {
67
+ "region": "P17",
68
+ "cdna": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
69
+ "forward": "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
70
+ "majority": 0.5,
71
+ "end_join": true,
72
+ "end_join_option": 1,
73
+ "overlap": 0,
74
+ "TCS_QC": true,
75
+ "ref_genome": "HXB2",
76
+ "ref_start": 1196,
77
+ "ref_end": 1725,
78
+ "indel": true,
79
+ "trim": false
65
80
  }
66
81
  ]
67
82
  }
@@ -5,7 +5,7 @@ module ViralSeq
5
5
  # functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
6
6
  # works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
7
7
  # PR codon 1-99
8
- # RT codon 34-122 (HXB2 2650-2914) and 152-236(3001-3257)
8
+ # RT codon 34-122 (HXB2 2649-2914) and 152-236(3001-3257)
9
9
  # IN codon 53-174 (HXB2 4384-4751)
10
10
  # @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
11
11
  # can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
@@ -28,6 +28,8 @@ module ViralSeq
28
28
  # align a sequence with reference sequence Strings
29
29
  # @param ref_seq [String] reference sequence
30
30
  # @param test_seq [String] test sequence
31
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
32
+ # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
31
33
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
32
34
  # @return [Array] a pair of [:ref_seq_aligned, :test_seq_aligned] or nil
33
35
  # if the cannot find MUSCLE excutable
@@ -37,7 +39,7 @@ module ViralSeq
37
39
  # aligned_seqs = ViralSeq::Muscle.align(seq1,seq2)
38
40
  # => ["AAGGCGTAGGAC-", "-AAGCTTAGGACG"]
39
41
 
40
- def self.align(ref_seq = "", test_seq = "", path_to_muscle = false)
42
+ def self.align(ref_seq = "", test_seq = "", algorithm = :PPP, path_to_muscle = false)
41
43
  temp_dir = Dir.home
42
44
  temp_name = "_" + SecureRandom.alphanumeric
43
45
  temp_file = File.join(temp_dir, temp_name)
@@ -56,7 +58,11 @@ module ViralSeq
56
58
  end
57
59
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
58
60
  else
59
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
61
+ if MuscleBio::VERSION.to_f < 0.5
62
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
63
+ else
64
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
65
+ end
60
66
  end
61
67
  aln_seq_hash = ViralSeq::SeqHash.fa(temp_aln).dna_hash
62
68
  File.unlink(temp_file)
@@ -223,7 +223,7 @@ module ViralSeq
223
223
 
224
224
  # check the size range of the DNA sequences of the SeqHash object
225
225
  # @return [Hash] Hash of {max: MAX_SIZE, min: MIN_SIZE}
226
-
226
+
227
227
  def check_nt_size
228
228
  dna_hash = self.dna_hash
229
229
  size_array = []
@@ -711,10 +711,11 @@ module ViralSeq
711
711
 
712
712
 
713
713
  # align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
714
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
714
715
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
715
716
  # @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
716
717
 
717
- def align(path_to_muscle = false)
718
+ def align(algorithm = :PPP, path_to_muscle = false)
718
719
  seq_hash = self.dna_hash
719
720
  if self.file.size > 0
720
721
  temp_dir = File.dirname(self.file)
@@ -732,7 +733,11 @@ module ViralSeq
732
733
  end
733
734
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
734
735
  else
735
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
736
+ if MuscleBio::VERSION.to_f < 0.5
737
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
738
+ else
739
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
740
+ end
736
741
  end
737
742
  out_seq_hash = ViralSeq::SeqHash.fa(temp_aln)
738
743
  out_seq_hash.title = self.title + "_aligned"
@@ -180,7 +180,7 @@ module ViralSeq
180
180
  l1 = 0
181
181
  l2 = 0
182
182
 
183
- aln_seq = ViralSeq::Muscle.align(ori_ref, seq, path_to_muscle)
183
+ aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
184
184
  aln_test = aln_seq[1]
185
185
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
186
186
  gap_begin = $1.size
@@ -214,7 +214,7 @@ module ViralSeq
214
214
  l2 = l2 + (post_aln - b2)
215
215
  end
216
216
 
217
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
217
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
218
218
  aln_test = aln_seq[1]
219
219
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
220
220
  gap_begin = $1.size
@@ -240,22 +240,22 @@ module ViralSeq
240
240
  repeat = 0
241
241
 
242
242
  if g1 == g2 and (s1 + g1 + s2) == ref.size
243
- if s1 > s2 and g2 > 2*s2
243
+ if s1 > s2 and g2 >= s2
244
244
  ref = ref[0..(-g2-1)]
245
245
  repeat = 1
246
246
  l2 = l2 + g2
247
- elsif s1 < s2 and g1 > 2*s1
247
+ elsif s1 < s2 and g1 >= s1
248
248
  ref = ref[g1..-1]
249
249
  repeat = 1
250
250
  l1 = l1 + g1
251
251
  end
252
252
  else
253
- if g1 > 2*s1
253
+ if g1 >= s1
254
254
  ref = ref[g1..-1]
255
255
  repeat = 1
256
256
  l1 = l1 + g1
257
257
  end
258
- if g2 > 2*s2
258
+ if g2 >= s2
259
259
  ref = ref[0..(-g2 - 1)]
260
260
  repeat = 1
261
261
  l2 = l2 + g2
@@ -263,7 +263,7 @@ module ViralSeq
263
263
  end
264
264
 
265
265
  while repeat == 1
266
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
266
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
267
267
  aln_test = aln_seq[1]
268
268
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
269
269
  gap_begin = $1.size
@@ -280,12 +280,12 @@ module ViralSeq
280
280
  l1 = l1 + gap_begin
281
281
  l2 = l2 + gap_end
282
282
  repeat = 0
283
- if g1 > 2*s1
283
+ if g1 >= s1
284
284
  ref = ref[g1..-1]
285
285
  repeat = 1
286
286
  l1 = l1 + g1
287
287
  end
288
- if g2 > 2*s2
288
+ if g2 >= s2
289
289
  ref = ref[0..(-g2 - 1)]
290
290
  repeat = 1
291
291
  l2 = l2 + g2
@@ -293,8 +293,7 @@ module ViralSeq
293
293
  end
294
294
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
295
295
 
296
-
297
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
296
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
298
297
  aln_test = aln_seq[1]
299
298
  ref = aln_seq[0]
300
299
 
@@ -303,12 +302,12 @@ module ViralSeq
303
302
  if ref =~ /^(\-+)/
304
303
  l1 = l1 - $1.size
305
304
  elsif ref =~ /(\-+)$/
306
- l2 = l2 + $1.size
305
+ l2 = l2 - $1.size
307
306
  end
308
307
 
309
308
  if (ori_ref_l - l2 - 1) >= l1
310
309
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
311
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
310
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
312
311
  aln_test = aln_seq[1]
313
312
  ref = aln_seq[0]
314
313
 
@@ -16,7 +16,7 @@ module ViralSeq
16
16
  :ref_genome=>"HXB2",
17
17
  :ref_start=>2648,
18
18
  :ref_end=>3257,
19
- :indel=>false,
19
+ :indel=>true,
20
20
  :trim=>false},
21
21
  {:region=>"PR",
22
22
  :cdna=>
@@ -41,7 +41,7 @@ module ViralSeq
41
41
  :forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
42
42
  :majority=>0,
43
43
  :end_join=>true,
44
- :end_join_option=>3,
44
+ :end_join_option=>2,
45
45
  :overlap=>171,
46
46
  :TCS_QC=>true,
47
47
  :ref_genome=>"HXB2",
@@ -61,11 +61,26 @@ module ViralSeq
61
61
  :TCS_QC=>true,
62
62
  :ref_genome=>"HXB2",
63
63
  :ref_start=>6585,
64
- :ref_end=>7208,
64
+ :ref_end=>7205..7210,
65
65
  :indel=>true,
66
- :trim=>false}
66
+ :trim=>false},
67
+ {:region=>"P17",
68
+ :cdna=>
69
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
70
+ :forward=>
71
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
72
+ :majority=>0,
73
+ :end_join=>true,
74
+ :end_join_option=>1,
75
+ :overlap=>0,
76
+ :TCS_QC=>true,
77
+ :ref_genome=>"HXB2",
78
+ :ref_start=>1196,
79
+ :ref_end=>1725,
80
+ :indel=>true,
81
+ :trim=>false}
67
82
  ]
68
- }
83
+ }
69
84
  end
70
85
 
71
86
  end
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.6.0"
6
- TCS_VERSION = "2.5.0"
5
+ VERSION = "1.6.4"
6
+ TCS_VERSION = "2.5.1"
7
7
  end
Binary file
data/viral_seq.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.required_rubygems_version = '>= 1.3.6'
36
36
 
37
37
  # muscle_bio gem required
38
- spec.add_runtime_dependency "muscle_bio", "~> 0.4"
38
+ spec.add_runtime_dependency "muscle_bio", "~> 0.5"
39
39
 
40
40
  # colorize gem required
41
41
  spec.add_runtime_dependency "colorize", "~> 0.1"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-01-18 00:00:00.000000000 Z
12
+ date: 2022-07-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -59,14 +59,14 @@ dependencies:
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '0.4'
62
+ version: '0.5'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '0.4'
69
+ version: '0.5'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: colorize
72
72
  requirement: !ruby/object:Gem::Requirement
@@ -193,6 +193,7 @@ files:
193
193
  - lib/viral_seq/tcs_dr.rb
194
194
  - lib/viral_seq/tcs_json.rb
195
195
  - lib/viral_seq/version.rb
196
+ - rc_swans.svc@longleaf.unc.edu
196
197
  - viral_seq.gemspec
197
198
  homepage: https://github.com/ViralSeq/viral_seq
198
199
  licenses: