viral_seq 1.8.1.1 → 1.9.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 11381ab74d67a54b5d922e8f1a7085866fe5349f4124a971b9dc64709fa763f1
4
- data.tar.gz: dd578b649b9857cf1c7ff8a23fff62db51f665ef23c74c8000a2b4d7f4500f9e
3
+ metadata.gz: 5817b0c1bb2887e02c101dd1032ad1a5523d7390caa00c785050ad05e4bb77e7
4
+ data.tar.gz: 0e9e8c40625122a932f7e06062b4e129712d433dbf11dcd0b4b51d51ce80b514
5
5
  SHA512:
6
- metadata.gz: add86e088cd1ef50c0ce546eb4f755b7b5725a072ddf1119982ec0cccd20246bc25aab077c51673032e442f098e15f4c40d9a526ff67af8a5514aab580f255a8
7
- data.tar.gz: 4b6cdbe344c2835c3f07c10b86f33613e0d5cdba5a9405da50d9d7f208796da6cdee709993b465f72e2a14a49667ea5ddfed84ddaf8ff21bda4a09448ebdde1b
6
+ metadata.gz: 6abfc477dea09519649614f8300d17be470a2511f79d7ed33b794ba96ab721584a488b71c01040e6c55e92e9e22c5282afedbdf02b6889acfdb7fb6fcddecb0d
7
+ data.tar.gz: 861e6ff9b55be29357b677c270ea7c1cc20ff49960cd8f3429ceb323751c3ddd6a7c9a773a9c383da5abb5e8f99a5172cd74cb2a70d54a674eba2d1406d904ea
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- viral_seq (1.8.0.pre)
4
+ viral_seq (1.9.0)
5
5
  colorize (~> 0.1)
6
6
  combine_pdf (~> 1.0, >= 1.0.0)
7
7
  muscle_bio (= 0.4)
data/README.md CHANGED
@@ -187,6 +187,12 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
187
187
 
188
188
  ## Updates
189
189
 
190
+ ### Version-1.9.0-11132024
191
+
192
+ 1. `ViralSeq::TcsCore::validate_file_name` will not report errors when non-sequence data in the folder, instead these files will be ignored.
193
+ 2. Rewrote the APIs for DRM analysis for HIV. Now uses version config files for the sequencing information and DRM list configure files for DRM interpretation. Two configure files are at located in `/lib/viral_seq/util/`
194
+ 3. `tcs_sdrm` will take a second argument for DRM config versions. Currently supports `["v1", "v2", "v3"]`. Refer to the documentations of the APIs for the details.
195
+ 4. Next update will use secondary command `tcs sdrm` to replace `tcs_sdrm`, and `tcs log` to replace `tcs_log`.
190
196
 
191
197
  ### Version-1.8.1-06042024
192
198
 
data/bin/tcs CHANGED
@@ -299,11 +299,17 @@ begin
299
299
  distinct_to_raw = (primer_id_count.size/primer_id_list.size.to_f).round(3)
300
300
  summary_json[:distinct_to_raw] = distinct_to_raw
301
301
 
302
- if primer_id_dis.keys.size < 5
302
+ if primer_id_count.keys.size < 5
303
303
  log.puts Time.now.to_s + "\t" + "Less than 5 Primer IDs detected. Region #{region} aborted."
304
304
  next
305
305
  end
306
306
 
307
+ if primer_id_dis.keys.size < 5
308
+ log.puts Time.now.to_s + "\t" + "Maximum frequency of Primer ID less than 5. Primer ID overloaded. Region #{region} aborted."
309
+ next
310
+ end
311
+
312
+
307
313
  max_id = primer_id_dis.keys.sort[-5..-1].mean
308
314
  consensus_cutoff = ViralSeq::TcsCore.calculate_cut_off(max_id,error_rate)
309
315
  log.puts Time.now.to_s + "\t" + "Consensus cut-off is #{consensus_cutoff.to_s}"
data/bin/tcs_sdrm CHANGED
@@ -21,7 +21,6 @@
21
21
  # output data in a new dir as 'libs_dir_SDRM'
22
22
 
23
23
  require 'viral_seq'
24
- require 'json'
25
24
  require 'csv'
26
25
  require 'fileutils'
27
26
  require 'prawn'
@@ -32,6 +31,8 @@ unless ARGV[0] && File.directory?(ARGV[0])
32
31
  abort "No sequence data provided. `tcs_sdrm` pipeline aborted. "
33
32
  end
34
33
 
34
+ dr_version = ARGV[1] ? ARGV[1] : "V1"
35
+
35
36
  r_version = ViralSeq::R.check_R
36
37
  ViralSeq::R.check_R_packages
37
38
 
@@ -49,20 +50,22 @@ log << { time: Time.now }
49
50
  log << { viral_seq_version: ViralSeq::VERSION }
50
51
  log << { tcs_version: ViralSeq::TCS_VERSION }
51
52
  log << { R_version: r_version}
52
- sdrm_list = {}
53
- sdrm_list[:nrti] = ViralSeq::DRMs.sdrm_json(:nrti)
54
- sdrm_list[:nnrti] = ViralSeq::DRMs.sdrm_json(:nnrti)
55
- sdrm_list[:hiv_pr] = ViralSeq::DRMs.sdrm_json(:hiv_pr)
56
- sdrm_list[:hiv_in] = ViralSeq::DRMs.sdrm_json(:hiv_in)
53
+ log << { DR_version: dr_version}
54
+
55
+ version_config = ViralSeq::DrmVersion.config_version(dr_version)
56
+
57
+ sdrm_list = version_config.pull_drm_json
57
58
  log << { sdrm_list: sdrm_list }
58
59
 
60
+ regions = version_config.seq_drm_correlation.keys
61
+
59
62
  # input dir
60
63
  indir = ARGV[0]
61
64
  libs = Dir[indir + "/*"]
62
65
  log << { processed_libs: libs }
63
66
 
64
67
  #output dir
65
- outdir = indir + "_SDRM"
68
+ outdir = indir + "_DRM_analysis"
66
69
  Dir.mkdir(outdir) unless File.directory?(outdir)
67
70
 
68
71
  libs.each do |lib|
@@ -85,7 +88,7 @@ libs.each do |lib|
85
88
 
86
89
  point_mutation_file = File.join(out_lib_dir, (lib_name + "_substitution.csv"))
87
90
  point_mutation_out = File.open(point_mutation_file, "w")
88
- point_mutation_out.puts "region,TCS,AA position,wild type,mutation," +
91
+ point_mutation_out.puts "DRM_Class,TCS,AA position,wild type,mutation," +
89
92
  "number,frequency,95% CI low,95% CI high,fdr,notes"
90
93
 
91
94
  linkage_file = File.join(out_lib_dir, (lib_name + "_linkage.csv"))
@@ -114,118 +117,65 @@ libs.each do |lib|
114
117
  aa_report_list = []
115
118
  summary_hash = {}
116
119
 
120
+ process_config = {}
121
+
117
122
  sub_seq_files.each do |sub_seq|
118
123
  seq_basename = File.basename(sub_seq)
119
- seqs = ViralSeq::SeqHash.fa(sub_seq)
120
- next if seqs.size < 3
124
+
121
125
  if seq_basename =~ /V1V3/i
122
- summary_hash[:V1V3] = "#{seqs.size.to_s},NA,NA,NA,NA"
123
- FileUtils.cp(sub_seq, filtered_seq_dir)
124
- elsif seq_basename =~ /PR/i
125
- a3g_check = seqs.a3g
126
- a3g_seqs = a3g_check[:a3g_seq]
127
- a3g_filtered_seqs = a3g_check[:filtered_seq]
128
- stop_codon_check = a3g_filtered_seqs.stop_codon
129
- stop_codon_seqs = stop_codon_check[:with_stop_codon]
130
- filtered_seqs = stop_codon_check[:without_stop_codon]
131
- poisson_minority_cutoff = filtered_seqs.pm
132
- fdr_hash = filtered_seqs.fdr
133
- summary_hash[:PR] = [
134
- seqs.size.to_s,
135
- a3g_seqs.size.to_s,
136
- stop_codon_seqs.size.to_s,
137
- filtered_seqs.size.to_s,
138
- poisson_minority_cutoff.to_s
139
- ].join(',')
140
- next if filtered_seqs.size < 3
141
- filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
142
-
143
- sdrm = filtered_seqs.sdrm_hiv_pr(poisson_minority_cutoff, fdr_hash)
144
- point_mutation_list += sdrm[0]
145
- linkage_list += sdrm[1]
146
- aa_report_list += sdrm[2]
147
-
148
- elsif seq_basename =~/IN/i
149
- a3g_check = seqs.a3g
150
- a3g_seqs = a3g_check[:a3g_seq]
151
- a3g_filtered_seqs = a3g_check[:filtered_seq]
152
- stop_codon_check = a3g_filtered_seqs.stop_codon(2)
153
- stop_codon_seqs = stop_codon_check[:with_stop_codon]
154
- filtered_seqs = stop_codon_check[:without_stop_codon]
155
- poisson_minority_cutoff = filtered_seqs.pm
156
- fdr_hash = filtered_seqs.fdr
157
- summary_hash[:IN] = [
158
- seqs.size.to_s,
159
- a3g_seqs.size.to_s,
160
- stop_codon_seqs.size.to_s,
161
- filtered_seqs.size.to_s,
162
- poisson_minority_cutoff.to_s
163
- ].join(',')
164
- next if filtered_seqs.size < 3
165
- filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
166
-
167
- sdrm = filtered_seqs.sdrm_hiv_in(poisson_minority_cutoff, fdr_hash)
168
- point_mutation_list += sdrm[0]
169
- linkage_list += sdrm[1]
170
- aa_report_list += sdrm[2]
171
-
172
- elsif seq_basename =~/P17/i
173
- a3g_check = seqs.a3g
174
- a3g_seqs = a3g_check[:a3g_seq]
175
- a3g_filtered_seqs = a3g_check[:filtered_seq]
176
- stop_codon_check = a3g_filtered_seqs.stop_codon(2)
177
- stop_codon_seqs = stop_codon_check[:with_stop_codon]
178
- filtered_seqs = stop_codon_check[:without_stop_codon]
179
- poisson_minority_cutoff = filtered_seqs.pm
180
- fdr_hash = filtered_seqs.fdr
181
- summary_hash[:P17] = [
182
- seqs.size.to_s,
183
- a3g_seqs.size.to_s,
184
- stop_codon_seqs.size.to_s,
185
- filtered_seqs.size.to_s,
186
- poisson_minority_cutoff.to_s
187
- ].join(',')
188
- next if filtered_seqs.size < 3
189
- filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
190
-
191
- elsif seq_basename =~/RT/i
192
- rt_seq1 = {}
193
- rt_seq2 = {}
194
- seqs.dna_hash.each do |k,v|
195
- rt_seq1[k] = v[0,267]
196
- rt_seq2[k] = v[267..-1]
126
+ seqs = ViralSeq::SeqHash.fa(sub_seq)
127
+ if seqs.size > 3
128
+ summary_hash["V1V3"] = "#{seqs.size.to_s},NA,NA,NA,NA"
129
+ FileUtils.cp(sub_seq, filtered_seq_dir)
130
+ end
131
+ end
132
+
133
+ seq_basename.gsub!(/\_P17/i, "_CA")
134
+ regions.each do |region|
135
+ if seq_basename =~ /\_#{region}/i
136
+ process_config[region] = sub_seq
197
137
  end
198
- rt1 = ViralSeq::SeqHash.new(rt_seq1)
199
- rt2 = ViralSeq::SeqHash.new(rt_seq2)
200
- rt1_a3g = rt1.a3g
201
- rt2_a3g = rt2.a3g
202
- hypermut_seq_rt1 = rt1_a3g[:a3g_seq]
203
- hypermut_seq_rt2 = rt2_a3g[:a3g_seq]
204
- rt1_stop_codon = rt1.stop_codon(1)[:with_stop_codon]
205
- rt2_stop_codon = rt2.stop_codon(2)[:with_stop_codon]
206
- hypermut_seq_keys = (hypermut_seq_rt1.dna_hash.keys | hypermut_seq_rt2.dna_hash.keys)
207
- stop_codon_seq_keys = (rt1_stop_codon.dna_hash.keys | rt2_stop_codon.dna_hash.keys)
208
- reject_keys = (hypermut_seq_keys | stop_codon_seq_keys)
209
- filtered_seqs = ViralSeq::SeqHash.new(seqs.dna_hash.reject {|k,v| reject_keys.include?(k) })
210
- poisson_minority_cutoff = filtered_seqs.pm
211
- fdr_hash = filtered_seqs.fdr
212
- summary_hash[:RT] = [
213
- seqs.size.to_s,
214
- hypermut_seq_keys.size.to_s,
215
- stop_codon_seq_keys.size.to_s,
216
- filtered_seqs.size.to_s,
217
- poisson_minority_cutoff.to_s
218
- ].join(',')
219
- next if filtered_seqs.size < 3
220
- filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
221
-
222
- sdrm = filtered_seqs.sdrm_hiv_rt(poisson_minority_cutoff, fdr_hash)
223
- point_mutation_list += sdrm[0]
224
- linkage_list += sdrm[1]
225
- aa_report_list += sdrm[2]
226
138
  end
227
139
  end
228
140
 
141
+ process_config.each do |region_name, path_to_file|
142
+ seq_basename = File.basename(path_to_file)
143
+ seq_basename.gsub!(/\_P17/i, "_CA")
144
+ region = version_config.query_region(region_name.to_s)
145
+
146
+ puts "prcessing region: " + region.region
147
+
148
+ sh = ViralSeq::SeqHash.fa(path_to_file)
149
+
150
+ if sh.size < 3
151
+ puts "Less than 3 TCS for #{region.region}. Skipped."
152
+ next
153
+ end
154
+
155
+ run_filtering = sh.filter_for_drm(region)
156
+
157
+ summary_hash[region.region] = [
158
+ sh.size,
159
+ run_filtering[:a3g_seq].size,
160
+ run_filtering[:stop_codon_seq].size,
161
+ run_filtering[:filtered_seq].size,
162
+ run_filtering[:filtered_seq].pm
163
+ ].join(',')
164
+
165
+ if run_filtering[:filtered_seq].size < 1
166
+ puts "Warning: NO TCS passed the filter for #{region.region}. DRM analysis skipped."
167
+ next
168
+ end
169
+
170
+ run_filtering[:filtered_seq].write_nt_fa(File.join(filtered_seq_dir,seq_basename))
171
+
172
+ drm_results = run_filtering[:filtered_seq].drm(region)
173
+ point_mutation_list += drm_results[0]
174
+ linkage_list += drm_results[1]
175
+ aa_report_list += drm_results[2]
176
+
177
+ end
178
+
229
179
  point_mutation_list.each do |record|
230
180
  point_mutation_out.puts record.join(",")
231
181
  end
@@ -262,12 +212,13 @@ libs.each do |lib|
262
212
  pi_csv.each do |line|
263
213
  line.chomp!
264
214
  data = line.split(",")
265
- tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
215
+ tag = data[0].split("_")[-1].gsub(/\W/,"")
266
216
  summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
267
217
  end
268
- [:PR, :RT, :IN, :V1V3, :P17].each do |regions|
269
- next unless summary_hash[regions]
270
- seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
218
+ regions << "V1V3"
219
+ regions.each do |region|
220
+ next unless summary_hash[region]
221
+ seq_summary_out.puts region.to_s + "," + summary_hash[region]
271
222
  end
272
223
  File.unlink(out_r_csv)
273
224
  end
@@ -286,13 +237,13 @@ libs.each do |lib|
286
237
  tcs_RT = 0
287
238
  tcs_IN = 0
288
239
  tcs_V1V3 = 0
289
- tcs_P17 = 0
240
+ tcs_CA = 0
290
241
  pi_RT = "NA"
291
242
  pi_V1V3 = "NA"
292
- pi_P17 = "NA"
243
+ pi_CA = "NA"
293
244
  dist20_RT = "NA"
294
245
  dist20_V1V3 = "NA"
295
- dist20_P17 = "NA"
246
+ dist20_CA = "NA"
296
247
  summary_lines.each do |line|
297
248
  data = line.chomp.split(",")
298
249
  if data[0] == "PR"
@@ -307,10 +258,10 @@ libs.each do |lib|
307
258
  tcs_V1V3 = data[1].to_i
308
259
  pi_V1V3 = data[6].to_f
309
260
  dist20_V1V3 = data[7].to_f
310
- elsif data[0] == "P17"
311
- tcs_P17 = data[4].to_i
312
- pi_P17 = data[6].to_f
313
- dist20_P17 = data[7].to_f
261
+ elsif data[0] == "CA"
262
+ tcs_CA = data[4].to_i
263
+ pi_CA = data[6].to_f
264
+ dist20_CA = data[7].to_f
314
265
  end
315
266
  end
316
267
 
@@ -334,6 +285,7 @@ libs.each do |lib|
334
285
  sdrm_PR = ""
335
286
  sdrm_RT = ""
336
287
  sdrm_IN = ""
288
+ sdrm_CA = ""
337
289
  sdrm_lines.each do |line|
338
290
  data = line.chomp.split(",")
339
291
  next if data[-1] == "*"
@@ -343,6 +295,8 @@ libs.each do |lib|
343
295
  sdrm_RT += abstract_line(data)
344
296
  elsif data[0] == "IN"
345
297
  sdrm_IN += abstract_line(data)
298
+ elsif data[0] == "CA"
299
+ sdrm_CA += abstract_line(data)
346
300
  end
347
301
  end
348
302
 
@@ -352,13 +306,13 @@ libs.each do |lib|
352
306
  tcs_RT: tcs_RT,
353
307
  tcs_IN: tcs_IN,
354
308
  tcs_V1V3: tcs_V1V3,
355
- tcs_P17: tcs_P17,
309
+ tcs_CA: tcs_CA,
356
310
  pi_RT: pi_RT,
357
311
  pi_V1V3: pi_V1V3,
358
- pi_P17: pi_P17,
312
+ pi_CA: pi_CA,
359
313
  dist20_RT: dist20_RT,
360
314
  dist20_V1V3: dist20_V1V3,
361
- dist20_P17: dist20_P17,
315
+ dist20_CA: dist20_CA,
362
316
  recency: recency,
363
317
  dpi: dpi,
364
318
  dpi_lwr: dpi_lwr,
@@ -366,7 +320,8 @@ libs.each do |lib|
366
320
  possible_dual_infection: possible_dual_infection,
367
321
  sdrm_PR: sdrm_PR,
368
322
  sdrm_RT: sdrm_RT,
369
- sdrm_IN: sdrm_IN
323
+ sdrm_IN: sdrm_IN,
324
+ sdrm_CA: sdrm_CA
370
325
  ]
371
326
 
372
327
  summary_json_out.puts JSON.pretty_generate(summary_json)
@@ -389,7 +344,7 @@ libs.each do |lib|
389
344
  file: point_mutation_file,
390
345
  newPDF: "",
391
346
  table_width: [60,50,70,65,65,60,75,70,70,70,45],
392
- extra_text: "* Mutation below Poisson cut-off for minority mutations"
347
+ extra_text: "* Mutation below FDR significance cut-off for minority mutations"
393
348
  },
394
349
  {
395
350
  name: "linkage",
@@ -397,7 +352,6 @@ libs.each do |lib|
397
352
  file: linkage_file,
398
353
  newPDF: "",
399
354
  table_width: [55,50,250,60,80,80,80,45],
400
- extra_text: "* Mutation below Poisson cut-off for minority mutations"
401
355
  }
402
356
  ]
403
357
 
@@ -433,7 +387,7 @@ libs.each do |lib|
433
387
  pdf << CombinePDF.load(out_r_pdf) if File.exist?(out_r_pdf)
434
388
 
435
389
  pdf.number_pages location: [:bottom_right],
436
- number_format: "Swanstrom\'s lab HIV SDRM Pipeline, version #{$sdrm_version_number} by S.Z. and M.U.C. Page %s",
390
+ number_format: "Swanstrom\'s lab HIV SDRM Pipeline, TCS version #{ViralSeq::TCS_VERSION}, DRM config version #{version_config.drm_version}, by S.Z. and M.U.C. Page %s",
437
391
  font_size: 6,
438
392
  opacity: 0.5
439
393
 
@@ -448,4 +402,6 @@ log_file = File.join(File.dirname(indir), "sdrm_log.json")
448
402
 
449
403
  File.open(log_file, 'w') { |f| f.puts JSON.pretty_generate(log) }
450
404
 
405
+ puts "TCS SDRM pipeline done. Exit 0."
406
+
451
407
  FileUtils.touch(File.join(outdir, ".done"))
@@ -0,0 +1,83 @@
1
+
2
+ module ViralSeq
3
+
4
+ # DRM configuration for each region
5
+
6
+ class DrmRegionConfig
7
+
8
+ # initialize DRM region configuration
9
+ # @param drm_version [String] version of the instance of DrmVersion
10
+ # @param region [String] name of the region
11
+ # @param drm_class [Array] classes of DRMs at this region
12
+ # @param drm_range [Hash] DRM range for each class of DRMs at this region
13
+ # @param drm_list [Hash] List of detailed DRM mutations for each DRM classes at this region
14
+ # @param seq_drm_corrlation [Hash] correlation of sequenced region and DRM class
15
+ # @param ref_info [Hash] information of the reference genome, including sequence coordinates on HXB2
16
+ def initialize(drm_version, region, drm_class, drm_range, drm_list, seq_coord, ref_info)
17
+ @drm_version = drm_version
18
+ @region = region
19
+ @drm_class = drm_class
20
+ @drm_range = drm_range
21
+ @drm_list = drm_list
22
+ @seq_coord = seq_coord
23
+ @ref_info = ref_info
24
+ end
25
+
26
+ attr_accessor :drm_version, :region, :drm_class, :drm_range, :drm_list, :seq_coord, :ref_info
27
+
28
+ # summarize the DRM information for the output as JSON for the specific version
29
+ # @return [Hash] json has for DRM inforation of each position
30
+ def drm_json
31
+ sdrm = self.drm_list
32
+ json_hash = {}
33
+ sdrm.each do |drm_class, drms|
34
+ json_hash[drm_class] = []
35
+ drms.each do |pos, muts|
36
+ mutation = {}
37
+ mutation[:position] = pos
38
+ mutation[:wildtypeCodon] = muts[0]
39
+ mutation[:mutationCodons] = muts[1]
40
+ json_hash[drm_class] << mutation
41
+ end
42
+ end
43
+ return json_hash
44
+ end
45
+
46
+ # calculate the length of R1 and R2 based on the sequence coordinates
47
+ # @return [Hash] {r1_length: [Integer], r2_length: [Integer]}
48
+ def r1_r2_length
49
+ seq_coord = self.seq_coord
50
+ return nil unless seq_coord["gap"]
51
+
52
+ r1_length = seq_coord["gap"]["minimum"] - seq_coord["minimum"]
53
+ r2_length = seq_coord["maximum"] - seq_coord["gap"]["maximum"]
54
+
55
+ return {r1_length: r1_length, r2_length: r2_length}
56
+ end #end of #r1_r2_length
57
+
58
+
59
+ # determine the reading frame number based on the sequence coordinates
60
+ # @return [Integer] reading frame of 0, 1 or 2
61
+ def get_reading_frame_number
62
+ m1 = (self.seq_coord["minimum"] - self.ref_info["ref_coord"][0]) % 3
63
+ if m1.zero?
64
+ n1 = 0
65
+ else
66
+ n1 = 3 - m1
67
+ end
68
+
69
+ if seq_coord["gap"]
70
+ m2 = (self.seq_coord["gap"]["maximum"] + 1 - self.ref_info["ref_coord"][0]) % 3
71
+ if m2.zero?
72
+ n2 = 0
73
+ else
74
+ n2 = 3 - m2
75
+ end
76
+ return [n1, n2]
77
+ else
78
+ return [n1]
79
+ end
80
+ end #end get_reading_frame_number
81
+
82
+ end
83
+ end
@@ -0,0 +1,120 @@
1
+
2
+ module ViralSeq
3
+
4
+ # DRM version configuration.
5
+ # Configuration files are located at `lib/viral_seq/drm_versions_config.json`
6
+
7
+ class DrmVersion
8
+
9
+ # initialize a ViralSeq::DrmVersion instance
10
+ # @param drm_version [String] version of the instance of DrmVersion
11
+ # @param drm_range [Hash] region/class of DRM and the range of amino acid positions included in this version.
12
+ # @param seq_coord [Hash] region and its amplicon positions on HXB2 reference
13
+ # @param seq_drm_corrlation [Hash] correlation of sequenced region and DRM class
14
+ # @param ref_info [Hash] information of the reference genome, including sequence coordinates on HXB2
15
+ def initialize(drm_version, drm_range, seq_coord, seq_drm_correlation, ref_info)
16
+ @drm_version = drm_version
17
+ @drm_range = drm_range
18
+ @seq_coord = seq_coord
19
+ @seq_drm_correlation = seq_drm_correlation
20
+ @ref_info = ref_info
21
+ end
22
+
23
+ attr_accessor :drm_version, :drm_range, :seq_coord, :seq_drm_correlation, :ref_info
24
+
25
+ # construct an instance of ViralSeq::DrmVersion
26
+ # @param version_config_hash [Hash] json hash of stored version configurations.
27
+ # @return [ViralSeq::DrmVersion] an instance of constructed DrmVersion
28
+
29
+ def self.construct(version_config_hash)
30
+ drm_version = version_config_hash["version"]
31
+ drm_range = version_config_hash["DRM_range"]
32
+ seq_coord = version_config_hash["seq_coord"]
33
+ seq_drm_correlation = version_config_hash["seq_drm_correlation"]
34
+ ref_info = version_config_hash["ref_info"]
35
+ ViralSeq::DrmVersion.new(drm_version, drm_range, seq_coord, seq_drm_correlation, ref_info)
36
+ end
37
+
38
+ # construct a specific version of ViralSeq::DrmVersion
39
+ # @param v [String] version string
40
+ # @return [ViralSeq::DrmVersion] an instance of constructed DrmVersion
41
+
42
+ def self.config_version(v="v1")
43
+ v = v.downcase
44
+ v = "v1" if v == "v2"
45
+
46
+ drm_config = JSON.parse(
47
+ File.read(
48
+ File.join( ViralSeq.root, 'viral_seq', 'util', 'drm_versions_config.json')
49
+ )
50
+ )
51
+
52
+ drm_versions = {}
53
+
54
+ drm_config.each do |config|
55
+ drm_versions[config["version"]] = ViralSeq::DrmVersion.construct(config)
56
+ end
57
+
58
+ if drm_versions[v]
59
+ drm_versions[v]
60
+ else
61
+ abort (
62
+ "Version '#{v}' config not found. Program aborted. \nCurrent supported versions '#{drm_versions.keys.sort.join(", ")}'\nCheck documentations for details".red
63
+ )
64
+ end
65
+ end
66
+
67
+ # construct a ViralSeq::DrmRegionConfig instance from a specific version
68
+ # @param region [String] name of the region
69
+ # @return [ViralSeq::DrmRegionConfig] an instance of DrmRegionConfig
70
+
71
+ def query_region(region)
72
+ region = region.to_s.upcase
73
+ drm_classes = self.seq_drm_correlation[region]
74
+
75
+ if drm_classes.nil?
76
+ abort "Region not recognized by the specific DRM config version. Program aborted."
77
+ end
78
+
79
+ drm_range = {}
80
+ drm_list = {}
81
+
82
+ drm_classes.each do |drm_class|
83
+ drm_range[drm_class] = self.drm_range[drm_class]
84
+ drm_list_single_class = ViralSeq::DRMs.sdrm_hash(drm_class)
85
+
86
+ drm_list[drm_class] = drm_list_single_class.select { |k, _v| drm_range[drm_class].include? k }
87
+
88
+ end
89
+
90
+
91
+ seq_coord = self.seq_coord[region]
92
+
93
+ ref_info = {}
94
+ ref_info["ref_type"] = self.ref_info["ref_type"]
95
+ ref_info["ref_coord"] = self.ref_info["ref_coord"][region]
96
+
97
+
98
+ ViralSeq::DrmRegionConfig.new(
99
+ self.drm_version, region, drm_classes, drm_range, drm_list, seq_coord, ref_info
100
+ )
101
+ end
102
+
103
+ # summarize the DRM information for the output as JSON
104
+ # @return [Hash] json has for DRM inforation of each position
105
+
106
+ def pull_drm_json
107
+
108
+ summary_json_hash = {}
109
+
110
+ self.seq_drm_correlation.keys.each do |region|
111
+ summary_json_hash = summary_json_hash.merge query_region(region).drm_json
112
+ end
113
+
114
+ summary_json_hash
115
+
116
+ end
117
+
118
+ end # end of class
119
+
120
+ end