viral_seq 1.7.0 → 1.7.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 8be7a521e58d5335122db011b5f003407cfaab95480062337451377ee2fdfca9
4
- data.tar.gz: 5c437afa58d63d0bde9dc6acf6c98904b8a7b364618fb3ebebd2cb36a44daa2c
3
+ metadata.gz: 4e6d55ab37ecd3b9c5688c99772fc49792a5319bac853ac768367a8b42c0e0b6
4
+ data.tar.gz: a69e78c80f22848facb41ad4f9d9fb64e6d4e47ff6e18afa3421d64513ce6558
5
5
  SHA512:
6
- metadata.gz: 23622009f3f39961e3d2d760bdde3b9f9b831d001aca68b6eee3d44305a77d3e964c48541811fd9dddc26ad9427383716ccdc64436789b01eb11c51f762d2a6b
7
- data.tar.gz: c1a1ac49930c24f61bfa0872f518fea8146e701a5a874de45e373d4d3d20eca50d138bd44f9d59ea1102d525b392dd9b6ed053647b1c25d97ad0244eb4fe15ff
6
+ metadata.gz: ae34ac12bd2b86d4c7fc040765b26b94d41cfe239a206b2e84bf55841988826bcfbf685e788b93224ee78e29b1280454059991d644f81cbf24f1b97fff3f2294
7
+ data.tar.gz: 254993ea2126ca51d0ad5e2b6be2dca90e1b3ed817266e46b5ca46f91d2a69288c0a87c58906ef3da8ad6465e08788c850d6bc72013e30f1ead13e186ba16dfd
data/README.md CHANGED
@@ -187,6 +187,13 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
187
187
 
188
188
  ## Updates
189
189
 
190
+ ### Version-1.7.1-05120203
191
+
192
+ 1. Add a size check for the raw sequences. If the size smaller than the input params, error messages will be sent to users. IF the actual size is greater than the input params, extra bases will be truncated.
193
+ 2. Now allows mismatch for the primer region sequences. Forward primer region allows 2 nt differences and cDNA primer region allows 3 nt differences.
194
+ 3. Bug fix.
195
+ 4. TCS version to 2.5.2
196
+
190
197
  ### Version-1.7.0-08242022
191
198
 
192
199
  1. Add warnings if `tcs` pipeline is excecuting through source instead of installing from `gem`.
data/bin/tcs CHANGED
@@ -41,7 +41,7 @@ if gem_installed?('viral_seq')
41
41
  require 'viral_seq'
42
42
  else
43
43
  printf "\n****************************************************\n"
44
- printf "**** THIS PACKAGE CANNOT BE RAN FROM SOURCE ********\n"
44
+ printf "**** THIS PACKAGE CANNOT BE RUN FROM SOURCE ********\n"
45
45
  printf "**** PLEASE INSTALL USING `gem install viral_seq` **\n"
46
46
  printf "****************************************************\n\n"
47
47
  exit 1
@@ -104,7 +104,7 @@ end.parse!
104
104
  if options[:json_generator]
105
105
  params = ViralSeq::TcsJson.generate
106
106
  elsif options[:dr]
107
- params = ViralSeq::TcsDr::PARAMS
107
+ params = ViralSeq::TcsDr::PARAMS
108
108
  elsif (options[:params_json] && File.exist?(options[:params_json]))
109
109
  params = JSON.parse(File.read(options[:params_json]), symbolize_names: true)
110
110
  else
@@ -163,6 +163,24 @@ begin
163
163
  $platform_sequencing_length = 300
164
164
  end
165
165
 
166
+ r1_raw_size = r1_fastq_sh.dna_hash.values[0].size
167
+ r2_raw_size = r2_fastq_sh.dna_hash.values[0].size
168
+
169
+ if r1_raw_size >= $platform_sequencing_length
170
+ r1_size_diff = r1_raw_size - $platform_sequencing_length
171
+ else
172
+ raise StandardError.new "R1 size smaller than the input platform format #{$platform_sequencing_length} bp."
173
+ end
174
+
175
+ if r2_raw_size >= $platform_sequencing_length
176
+ r2_size_diff = r2_raw_size - $platform_sequencing_length
177
+ else
178
+ raise StandardError.new "R2 size smaller than the input platform format #{$platform_sequencing_length} bp."
179
+ end
180
+
181
+ r1_truncate_base_number = 2 + r1_size_diff
182
+ r2_truncate_base_number = 2 + r2_size_diff
183
+
166
184
  primers = params[:primer_pairs]
167
185
  if primers.empty? or primers.nil?
168
186
  ViralSeq::TcsCore.log_and_abort log, "No primer information. Script terminated."
@@ -235,8 +253,8 @@ begin
235
253
  r2_seq = r2_passed_seq[seqtag]
236
254
  pid = r2_seq[0, pid_length]
237
255
  id[seqtag] = pid
238
- bio_r2[seqtag] = r2_seq[filter_r2[:reverse_starting_number]..-2]
239
- bio_r1[seqtag] = r1_seq[filter_r1[:forward_starting_number]..-2]
256
+ bio_r2[seqtag] = r2_seq[filter_r2[:reverse_starting_number]..-r2_truncate_base_number]
257
+ bio_r1[seqtag] = r1_seq[filter_r1[:forward_starting_number]..-r1_truncate_base_number]
240
258
  end
241
259
 
242
260
  # TCS cut-off
data/bin/tcs_log CHANGED
@@ -155,7 +155,7 @@ region_colors = {"Other" => "#808080"}
155
155
  CSV.foreach(log_file).each_with_index do |row, i|
156
156
  next if i == 0 || row[0] == nil
157
157
 
158
- lib_name = row[0]
158
+ lib_name = row[0].to_s
159
159
  region = row[1]
160
160
  raw_sequences_per_barcode = row[2].to_i
161
161
 
@@ -180,7 +180,7 @@ module ViralSeq
180
180
  l1 = 0
181
181
  l2 = 0
182
182
 
183
- aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :PPP, path_to_muscle)
183
+ aln_seq = ViralSeq::Muscle.align(ori_ref, seq, :Super5, path_to_muscle)
184
184
  aln_test = aln_seq[1]
185
185
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
186
186
  gap_begin = $1.size
@@ -214,7 +214,7 @@ module ViralSeq
214
214
  l2 = l2 + (post_aln - b2)
215
215
  end
216
216
 
217
- aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
217
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
218
218
  aln_test = aln_seq[1]
219
219
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
220
220
  gap_begin = $1.size
@@ -263,7 +263,7 @@ module ViralSeq
263
263
  end
264
264
 
265
265
  while repeat == 1
266
- aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
266
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
267
267
  aln_test = aln_seq[1]
268
268
  aln_test =~ /^(\-*)(\w.*\w)(\-*)$/
269
269
  gap_begin = $1.size
@@ -293,7 +293,7 @@ module ViralSeq
293
293
  end
294
294
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
295
295
 
296
- aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
296
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
297
297
  aln_test = aln_seq[1]
298
298
  ref = aln_seq[0]
299
299
 
@@ -307,7 +307,7 @@ module ViralSeq
307
307
 
308
308
  if (ori_ref_l - l2 - 1) >= l1
309
309
  ref = ori_ref[l1..(ori_ref_l - l2 - 1)]
310
- aln_seq = ViralSeq::Muscle.align(ref, seq, :PPP, path_to_muscle)
310
+ aln_seq = ViralSeq::Muscle.align(ref, seq, :Super5, path_to_muscle)
311
311
  aln_test = aln_seq[1]
312
312
  ref = aln_seq[0]
313
313
 
@@ -56,6 +56,43 @@ class String
56
56
  Regexp.new match
57
57
  end
58
58
 
59
+ # parse the nucleotide sequences as an Array of Array
60
+ # @return [Array] Array of Array at each position
61
+ # @example parse a sequence with ambiguities to Array of Array
62
+ # "ATRWCG".nt_to_array
63
+ # => [["A"], ["T"], ["A", "G"], ["A", "T"], ["C"], ["G"]]
64
+
65
+ def nt_to_array
66
+ return_array = []
67
+ self.each_char.each do |base|
68
+ base_array = base.to_list
69
+ return_array.append base_array
70
+ end
71
+ return return_array
72
+ end
73
+
74
+
75
+ # compare the given nt sequence string with the ref sequence string
76
+ # @param ref [String] the ref sequence string to compare with
77
+ # @return [Interger] Number of differences
78
+ # @example parse a sequence with ambiguities to Array of Array
79
+ # "ATRWCG".nt_to_array
80
+ # => [["A"], ["T"], ["A", "G"], ["A", "T"], ["C"], ["G"]]
81
+
82
+ def nt_diff(ref)
83
+ count_diff = 0
84
+ self_array = self.split("")
85
+ ref_array = ref.nt_to_array
86
+ self_array.each_with_index do |nt, i|
87
+ ref_nt = ref_array[i]
88
+ unless ref_nt.include? nt
89
+ count_diff += 1
90
+ end
91
+ end
92
+ return count_diff
93
+ end
94
+
95
+
59
96
  # parse IUPAC nucleotide ambiguity codes (W S M K R Y B D H V N) as String if String.size == 1
60
97
  # @return [Array] parsed nt bases
61
98
  # @example parse IUPAC `R`
@@ -223,7 +223,7 @@ module ViralSeq
223
223
  end
224
224
  forward_bio_primer_size = forward_bio_primer.size
225
225
  forward_starting_number = forward_n + forward_bio_primer_size
226
- forward_primer_ref = forward_bio_primer.nt_parser
226
+ #forward_primer_ref = forward_bio_primer.nt_parser
227
227
 
228
228
  r1_passed_seq = {}
229
229
  r1_raw = r1_sh.dna_hash
@@ -232,7 +232,7 @@ module ViralSeq
232
232
  seq = r1_raw[name]
233
233
  next unless general_filter seq
234
234
  primer_region_seq = seq[forward_n, forward_bio_primer_size]
235
- if primer_region_seq =~ forward_primer_ref
235
+ if primer_region_seq.nt_diff(forward_bio_primer) < 3
236
236
  new_name = remove_tag name
237
237
  r1_passed_seq[new_name] = seq
238
238
  end
@@ -255,13 +255,13 @@ module ViralSeq
255
255
  cdna_bio_primer = $2
256
256
  cdna_bio_primer_size = cdna_bio_primer.size
257
257
  reverse_starting_number = pid_length + cdna_bio_primer_size
258
- cdna_primer_ref = cdna_bio_primer.nt_parser
258
+ # cdna_primer_ref = cdna_bio_primer.nt_to_array
259
259
  r2_passed_seq = {}
260
260
  proc_filter = proc do |name|
261
261
  seq = r2_raw[name]
262
262
  next unless general_filter seq
263
263
  primer_region_seq = seq[pid_length, cdna_bio_primer_size]
264
- if primer_region_seq =~ cdna_primer_ref
264
+ if primer_region_seq.nt_diff(cdna_bio_primer) < 4
265
265
  new_name = remove_tag name
266
266
  r2_passed_seq[new_name] = seq
267
267
  end
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.7.0"
6
- TCS_VERSION = "2.5.1"
5
+ VERSION = "1.7.1"
6
+ TCS_VERSION = "2.5.2"
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.7.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-08-25 00:00:00.000000000 Z
12
+ date: 2023-05-12 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler