viral_seq 1.6.0 → 1.6.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6bc062e2f207fd9e33d97885125b656c4b24ae8056221020bb611c191104109b
4
- data.tar.gz: a0086f02da866821c134a9433317b7016a429e37e775de65d434dbae1b7efd23
3
+ metadata.gz: 02d26d720fef0501d70b012d9919f932b23a21b1caabbf508a56fffa162c311b
4
+ data.tar.gz: '0681add2b2fa2ca7dedffeaaf43bd8b0e2b6200dd38633e2eeda58946ced8238'
5
5
  SHA512:
6
- metadata.gz: dc29bb6914196e4aa0fa6d7c2db25d4db1179f40a551dc0784534fe2225c857ef22663815d483412db1a715977574a01ab457a5d4913e38a7a9ac439b38795ab
7
- data.tar.gz: f42559bc07b75b2d9a4023af0313cc40e037697970c0ac4a6c34fd04938b5ab879ece19fd95d05237075292c38a93e315018c7e0c03bd0ee9ab8cf1716b894ed
6
+ metadata.gz: 301c188d736c9812006d30db8995fa7df683cc63443c1370de3d487dae77b88cfc60c8b674abc93b35f7457ced536a6e374433e5fbe0f423e4a7993ea4240ebc
7
+ data.tar.gz: 1118ab7b586da98bb2c3533f81c35a02f0efb1978c0b91e15b56218b05342ad83e73f96bd2df53f75e2e6cdd16c591582ebe864562a02d3dd78997678b706233
data/Gemfile.lock CHANGED
@@ -1,10 +1,10 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.3.0)
4
+ viral_seq (1.6.2)
5
5
  colorize (~> 0.1)
6
6
  combine_pdf (~> 1.0, >= 1.0.0)
7
- muscle_bio (~> 0.4)
7
+ muscle_bio (~> 0.5)
8
8
  prawn (~> 2.3, >= 2.3.0)
9
9
  prawn-table (~> 0.2, >= 0.2.0)
10
10
 
@@ -15,7 +15,7 @@ GEM
15
15
  combine_pdf (1.0.21)
16
16
  ruby-rc4 (>= 0.1.5)
17
17
  diff-lcs (1.3)
18
- muscle_bio (0.4.0)
18
+ muscle_bio (0.5.0)
19
19
  pdf-core (0.9.0)
20
20
  prawn (2.4.0)
21
21
  pdf-core (~> 0.9.0)
data/README.md CHANGED
@@ -179,6 +179,23 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
179
179
 
180
180
  ## Updates
181
181
 
182
+ ### Version-1.6.4-07182022
183
+
184
+ 1. Included region "P17" in the default `tcs -d` pipeline setting. `tcs` pipeline updated to version 2.5.1.
185
+ 2. Loosen the locator params for the "V1V3" end region for rare alignment issues. Now the default "V1V3" region end with position 7205 to 7210 instead of 7208.
186
+ 3. `tcs_sdrm` now analyse "P17" region for pairwise diversity.
187
+
188
+ ### Version-1.6.3-02052022
189
+
190
+ 1. Updated on `ViralSeq::Muscle` module along with the update of `muscle` from version 3.8.1 to 5.1.
191
+ 2. Optimized the `locator` algorithm based on `muscle` v5.1.
192
+ 3. Optimized the `tcs_sdrm` pipeline based on `muscle` v5.1.
193
+
194
+ ### Version-1.6.1-02022022
195
+
196
+ 1. Fixed the `nav bar` in tcs_log html file.
197
+ 2. Fixed a typo in `tcs`.
198
+
182
199
  ### Version 1.6.0-01042022
183
200
 
184
201
  1. Update the `ViralSeq::TcsCore::detection_limit` with pre-calculated values to save processing time.
data/bin/tcs CHANGED
@@ -173,7 +173,7 @@ begin
173
173
  unless forward_primer
174
174
  log.puts Time.now.to_s + "\t" + region + " does not have forward primer sequence. #{region} skipped."
175
175
  end
176
- summary_json[:cdan_primer] = cdna_primer
176
+ summary_json[:cdna_primer] = cdna_primer
177
177
  summary_json[:forward_primer] = forward_primer
178
178
 
179
179
  primer[:majority] ? majority_cut_off = primer[:majority] : majority_cut_off = 0
data/bin/tcs_log CHANGED
@@ -613,7 +613,7 @@ html = '
613
613
  <body>
614
614
  <div style="display: flex; flex-direction: column; height: 100vh; width: 100vw; position: fixed; overflow: hidden;">
615
615
  <div style="display: flex; gap: 4px;">
616
- <div id="nav" class="card" style="overflow: auto; min-height: 100vh; text-align: left; margin-top: 0;">
616
+ <div id="nav" class="card" style="overflow: auto; height: 100vh; text-align: left; margin-top: 0;">
617
617
  <a href="https://primer-id.org" target="_BLANK">
618
618
  <h3 style="margin: 24px; font-weight: 600; color: #333 !important">TCS Log</h3>
619
619
  </a>
data/bin/tcs_sdrm CHANGED
@@ -172,6 +172,25 @@ libs.each do |lib|
172
172
  linkage_list += sdrm[1]
173
173
  aa_report_list += sdrm[2]
174
174
 
175
+ elsif seq_basename =~/P17/i
176
+ a3g_check = seqs.a3g
177
+ a3g_seqs = a3g_check[:a3g_seq]
178
+ a3g_filtered_seqs = a3g_check[:filtered_seq]
179
+ stop_codon_check = a3g_filtered_seqs.stop_codon(2)
180
+ stop_codon_seqs = stop_codon_check[:with_stop_codon]
181
+ filtered_seqs = stop_codon_check[:without_stop_codon]
182
+ poisson_minority_cutoff = filtered_seqs.pm
183
+ fdr_hash = filtered_seqs.fdr
184
+ summary_hash[:P17] = [
185
+ seqs.size.to_s,
186
+ a3g_seqs.size.to_s,
187
+ stop_codon_seqs.size.to_s,
188
+ filtered_seqs.size.to_s,
189
+ poisson_minority_cutoff.to_s
190
+ ].join(',')
191
+ next if filtered_seqs.size < 3
192
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
193
+
175
194
  elsif seq_basename =~/RT/i
176
195
  rt_seq1 = {}
177
196
  rt_seq2 = {}
@@ -229,7 +248,7 @@ libs.each do |lib|
229
248
  filtered_seq_files.each do |seq_file|
230
249
  filtered_sh = ViralSeq::SeqHash.fa(seq_file)
231
250
  next if filtered_sh.size < 3
232
- aligned_sh = filtered_sh.random_select(1000).align
251
+ aligned_sh = filtered_sh.random_select(1000).align(:Super5)
233
252
  aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
234
253
  end
235
254
 
@@ -249,7 +268,7 @@ libs.each do |lib|
249
268
  tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
250
269
  summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
251
270
  end
252
- [:PR, :RT, :IN, :V1V3].each do |regions|
271
+ [:PR, :RT, :IN, :V1V3, :P17].each do |regions|
253
272
  next unless summary_hash[regions]
254
273
  seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
255
274
  end
@@ -270,10 +289,13 @@ libs.each do |lib|
270
289
  tcs_RT = 0
271
290
  tcs_IN = 0
272
291
  tcs_V1V3 = 0
292
+ tcs_P17 = 0
273
293
  pi_RT = 0.0
274
294
  pi_V1V3 = 0.0
295
+ pi_P17 = 0.0
275
296
  dist20_RT = 0.0
276
297
  dist20_V1V3 = 0.0
298
+ dist20_P17 = 0.0
277
299
  summary_lines.each do |line|
278
300
  data = line.chomp.split(",")
279
301
  if data[0] == "PR"
@@ -288,6 +310,10 @@ libs.each do |lib|
288
310
  tcs_V1V3 = data[1].to_i
289
311
  pi_V1V3 = data[6].to_f
290
312
  dist20_V1V3 = data[7].to_f
313
+ elsif data[0] == "P17"
314
+ tcs_P17 = data[4].to_i
315
+ pi_P17 = data[6].to_f
316
+ dist20_P17 = data[7].to_f
291
317
  end
292
318
  end
293
319
 
@@ -323,9 +349,13 @@ libs.each do |lib|
323
349
  tcs_RT: tcs_RT,
324
350
  tcs_IN: tcs_IN,
325
351
  tcs_V1V3: tcs_V1V3,
352
+ tcs_P17: tcs_P17,
326
353
  pi_RT: pi_RT,
354
+ pi_V1V3: pi_V1V3,
355
+ pi_P17: pi_P17,
327
356
  dist20_RT: dist20_RT,
328
357
  dist20_V1V3: dist20_V1V3,
358
+ dist20_P17: dist20_P17,
329
359
  recency: recency,
330
360
  sdrm_PR: sdrm_PR,
331
361
  sdrm_RT: sdrm_RT,
data/docs/dr.json CHANGED
@@ -62,6 +62,21 @@
62
62
  "ref_end": 7208,
63
63
  "indel": true,
64
64
  "trim": false
65
+ },
66
+ {
67
+ "region": "P17",
68
+ "cdna": "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
69
+ "forward": "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
70
+ "majority": 0.5,
71
+ "end_join": true,
72
+ "end_join_option": 1,
73
+ "overlap": 0,
74
+ "TCS_QC": true,
75
+ "ref_genome": "HXB2",
76
+ "ref_start": 1196,
77
+ "ref_end": 1725,
78
+ "indel": true,
79
+ "trim": false
65
80
  }
66
81
  ]
67
82
  }
@@ -5,7 +5,7 @@ module ViralSeq
5
5
  # functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
6
6
  # works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
7
7
  # PR codon 1-99
8
- # RT codon 34-122 (HXB2 2650-2914) and 152-236(3001-3257)
8
+ # RT codon 34-122 (HXB2 2649-2914) and 152-236(3001-3257)
9
9
  # IN codon 53-174 (HXB2 4384-4751)
10
10
  # @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
11
11
  # can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
@@ -28,6 +28,8 @@ module ViralSeq
28
28
  # align a sequence with reference sequence Strings
29
29
  # @param ref_seq [String] reference sequence
30
30
  # @param test_seq [String] test sequence
31
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
32
+ # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
31
33
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
32
34
  # @return [Array] a pair of [:ref_seq_aligned, :test_seq_aligned] or nil
33
35
  # if the cannot find MUSCLE excutable
@@ -37,7 +39,7 @@ module ViralSeq
37
39
  # aligned_seqs = ViralSeq::Muscle.align(seq1,seq2)
38
40
  # => ["AAGGCGTAGGAC-", "-AAGCTTAGGACG"]
39
41
 
40
- def self.align(ref_seq = "", test_seq = "", path_to_muscle = false)
42
+ def self.align(ref_seq = "", test_seq = "", algorithm = :PPP, path_to_muscle = false)
41
43
  temp_dir = Dir.home
42
44
  temp_name = "_" + SecureRandom.alphanumeric
43
45
  temp_file = File.join(temp_dir, temp_name)
@@ -56,7 +58,11 @@ module ViralSeq
56
58
  end
57
59
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
58
60
  else
59
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
61
+ if MuscleBio::VERSION.to_f < 0.5
62
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
63
+ else
64
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
65
+ end
60
66
  end
61
67
  aln_seq_hash = ViralSeq::SeqHash.fa(temp_aln).dna_hash
62
68
  File.unlink(temp_file)
@@ -223,7 +223,7 @@ module ViralSeq
223
223
 
224
224
  # check the size range of the DNA sequences of the SeqHash object
225
225
  # @return [Hash] Hash of {max: MAX_SIZE, min: MIN_SIZE}
226
-
226
+
227
227
  def check_nt_size
228
228
  dna_hash = self.dna_hash
229
229
  size_array = []
@@ -711,10 +711,11 @@ module ViralSeq
711
711
 
712
712
 
713
713
  # align the @dna_hash sequences, return a new ViralSeq::SeqHash object with aligned @dna_hash using MUSCLE
714
+ # @param algorithm [Symbol], algorithm for MUSCLE5 only. Choose from :PPP or :Super5.
714
715
  # @param path_to_muscle [String], path to MUSCLE excutable. if not provided (as default), it will use RubyGem::MuscleBio
715
716
  # @return [SeqHash] new SeqHash object of the aligned @dna_hash, the title has "_aligned"
716
717
 
717
- def align(path_to_muscle = false)
718
+ def align(algorithm = :PPP, path_to_muscle = false)
718
719
  seq_hash = self.dna_hash
719
720
  if self.file.size > 0
720
721
  temp_dir = File.dirname(self.file)
@@ -732,7 +733,11 @@ module ViralSeq
732
733
  end
733
734
  print `#{path_to_muscle} -in #{temp_file} -out #{temp_aln} -quiet`
734
735
  else
735
- MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
736
+ if MuscleBio::VERSION.to_f < 0.5
737
+ MuscleBio.run("muscle -in #{temp_file} -out #{temp_aln} -quiet")
738
+ else
739
+ MuscleBio.exec(temp_file, temp_aln, algorithm)
740
+ end
736
741
  end
737
742
  out_seq_hash = ViralSeq::SeqHash.fa(temp_aln)
738
743
  out_seq_hash.title = self.title + "_aligned"
@@ -180,7 +180,7 @@ module ViralSeq
180
180
  l1 = 0
181
181
  l2 = 0
182
182
 
183
- aln_seq = ViralSeq::Muscle.align(ori_ref, seq, path_to_muscle)
183
+ aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
184
184
  aln_test = aln_seq[1]
185
185
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
186
186
  gap_begin = $1.size
@@ -214,7 +214,7 @@ module ViralSeq
214
214
  l2 = l2 + (post_aln - b2)
215
215
  end
216
216
 
217
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
217
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
218
218
  aln_test = aln_seq[1]
219
219
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
220
220
  gap_begin = $1.size
@@ -240,22 +240,22 @@ module ViralSeq
240
240
  repeat = 0
241
241
 
242
242
  if g1 == g2 and (s1 + g1 + s2) == ref.size
243
- if s1 > s2 and g2 > 2*s2
243
+ if s1 > s2 and g2 >= s2
244
244
  ref = ref[0..(-g2-1)]
245
245
  repeat = 1
246
246
  l2 = l2 + g2
247
- elsif s1 < s2 and g1 > 2*s1
247
+ elsif s1 < s2 and g1 >= s1
248
248
  ref = ref[g1..-1]
249
249
  repeat = 1
250
250
  l1 = l1 + g1
251
251
  end
252
252
  else
253
- if g1 > 2*s1
253
+ if g1 >= s1
254
254
  ref = ref[g1..-1]
255
255
  repeat = 1
256
256
  l1 = l1 + g1
257
257
  end
258
- if g2 > 2*s2
258
+ if g2 >= s2
259
259
  ref = ref[0..(-g2 - 1)]
260
260
  repeat = 1
261
261
  l2 = l2 + g2
@@ -263,7 +263,7 @@ module ViralSeq
263
263
  end
264
264
 
265
265
  while repeat == 1
266
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
266
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
267
267
  aln_test = aln_seq[1]
268
268
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
269
269
  gap_begin = $1.size
@@ -280,12 +280,12 @@ module ViralSeq
280
280
  l1 = l1 + gap_begin
281
281
  l2 = l2 + gap_end
282
282
  repeat = 0
283
- if g1 > 2*s1
283
+ if g1 >= s1
284
284
  ref = ref[g1..-1]
285
285
  repeat = 1
286
286
  l1 = l1 + g1
287
287
  end
288
- if g2 > 2*s2
288
+ if g2 >= s2
289
289
  ref = ref[0..(-g2 - 1)]
290
290
  repeat = 1
291
291
  l2 = l2 + g2
@@ -293,8 +293,7 @@ module ViralSeq
293
293
  end
294
294
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
295
295
 
296
-
297
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
296
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
298
297
  aln_test = aln_seq[1]
299
298
  ref = aln_seq[0]
300
299
 
@@ -303,12 +302,12 @@ module ViralSeq
303
302
  if ref =~ /^(\-+)/
304
303
  l1 = l1 - $1.size
305
304
  elsif ref =~ /(\-+)$/
306
- l2 = l2 + $1.size
305
+ l2 = l2 - $1.size
307
306
  end
308
307
 
309
308
  if (ori_ref_l - l2 - 1) >= l1
310
309
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
311
- aln_seq = ViralSeq::Muscle.align(ref, seq, path_to_muscle)
310
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
312
311
  aln_test = aln_seq[1]
313
312
  ref = aln_seq[0]
314
313
 
@@ -16,7 +16,7 @@ module ViralSeq
16
16
  :ref_genome=>"HXB2",
17
17
  :ref_start=>2648,
18
18
  :ref_end=>3257,
19
- :indel=>false,
19
+ :indel=>true,
20
20
  :trim=>false},
21
21
  {:region=>"PR",
22
22
  :cdna=>
@@ -41,7 +41,7 @@ module ViralSeq
41
41
  :forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNAAAAGGAGAAGCCATGCATG",
42
42
  :majority=>0,
43
43
  :end_join=>true,
44
- :end_join_option=>3,
44
+ :end_join_option=>2,
45
45
  :overlap=>171,
46
46
  :TCS_QC=>true,
47
47
  :ref_genome=>"HXB2",
@@ -61,11 +61,26 @@ module ViralSeq
61
61
  :TCS_QC=>true,
62
62
  :ref_genome=>"HXB2",
63
63
  :ref_start=>6585,
64
- :ref_end=>7208,
64
+ :ref_end=>7205..7210,
65
65
  :indel=>true,
66
- :trim=>false}
66
+ :trim=>false},
67
+ {:region=>"P17",
68
+ :cdna=>
69
+ "GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
70
+ :forward=>
71
+ "GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
72
+ :majority=>0,
73
+ :end_join=>true,
74
+ :end_join_option=>1,
75
+ :overlap=>0,
76
+ :TCS_QC=>true,
77
+ :ref_genome=>"HXB2",
78
+ :ref_start=>1196,
79
+ :ref_end=>1725,
80
+ :indel=>true,
81
+ :trim=>false}
67
82
  ]
68
- }
83
+ }
69
84
  end
70
85
 
71
86
  end
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.6.0"
6
- TCS_VERSION = "2.5.0"
5
+ VERSION = "1.6.4"
6
+ TCS_VERSION = "2.5.1"
7
7
  end
Binary file
data/viral_seq.gemspec CHANGED
@@ -35,7 +35,7 @@ Gem::Specification.new do |spec|
35
35
  spec.required_rubygems_version = '>= 1.3.6'
36
36
 
37
37
  # muscle_bio gem required
38
- spec.add_runtime_dependency "muscle_bio", "~> 0.4"
38
+ spec.add_runtime_dependency "muscle_bio", "~> 0.5"
39
39
 
40
40
  # colorize gem required
41
41
  spec.add_runtime_dependency "colorize", "~> 0.1"
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.6.0
4
+ version: 1.6.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-01-18 00:00:00.000000000 Z
12
+ date: 2022-07-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -59,14 +59,14 @@ dependencies:
59
59
  requirements:
60
60
  - - "~>"
61
61
  - !ruby/object:Gem::Version
62
- version: '0.4'
62
+ version: '0.5'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
67
  - - "~>"
68
68
  - !ruby/object:Gem::Version
69
- version: '0.4'
69
+ version: '0.5'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: colorize
72
72
  requirement: !ruby/object:Gem::Requirement
@@ -193,6 +193,7 @@ files:
193
193
  - lib/viral_seq/tcs_dr.rb
194
194
  - lib/viral_seq/tcs_json.rb
195
195
  - lib/viral_seq/version.rb
196
+ - rc_swans.svc@longleaf.unc.edu
196
197
  - viral_seq.gemspec
197
198
  homepage: https://github.com/ViralSeq/viral_seq
198
199
  licenses: