viral_seq 1.8.1.1 → 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +6 -0
- data/bin/tcs +7 -1
- data/bin/tcs_sdrm +87 -131
- data/lib/viral_seq/drm_region_config.rb +83 -0
- data/lib/viral_seq/drm_version.rb +120 -0
- data/lib/viral_seq/hivdr.rb +206 -2
- data/lib/viral_seq/sdrm.rb +34 -99
- data/lib/viral_seq/sequence.rb +26 -1
- data/lib/viral_seq/tcs_core.rb +1 -1
- data/lib/viral_seq/tcs_dr.rb +87 -5
- data/lib/viral_seq/util/drm_list.json +684 -0
- data/lib/viral_seq/util/drm_versions_config.json +114 -0
- data/lib/viral_seq/version.rb +2 -2
- data/lib/viral_seq.rb +3 -0
- metadata +7 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5817b0c1bb2887e02c101dd1032ad1a5523d7390caa00c785050ad05e4bb77e7
|
4
|
+
data.tar.gz: 0e9e8c40625122a932f7e06062b4e129712d433dbf11dcd0b4b51d51ce80b514
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6abfc477dea09519649614f8300d17be470a2511f79d7ed33b794ba96ab721584a488b71c01040e6c55e92e9e22c5282afedbdf02b6889acfdb7fb6fcddecb0d
|
7
|
+
data.tar.gz: 861e6ff9b55be29357b677c270ea7c1cc20ff49960cd8f3429ceb323751c3ddd6a7c9a773a9c383da5abb5e8f99a5172cd74cb2a70d54a674eba2d1406d904ea
|
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -187,6 +187,12 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
187
187
|
|
188
188
|
## Updates
|
189
189
|
|
190
|
+
### Version-1.9.0-11132024
|
191
|
+
|
192
|
+
1. `ViralSeq::TcsCore::validate_file_name` will not report errors when non-sequence data in the folder, instead these files will be ignored.
|
193
|
+
2. Rewrote the APIs for DRM analysis for HIV. Now uses version config files for the sequencing information and DRM list configure files for DRM interpretation. Two configure files are at located in `/lib/viral_seq/util/`
|
194
|
+
3. `tcs_sdrm` will take a second argument for DRM config versions. Currently supports `["v1", "v2", "v3"]`. Refer to the documentations of the APIs for the details.
|
195
|
+
4. Next update will use secondary command `tcs sdrm` to replace `tcs_sdrm`, and `tcs log` to replace `tcs_log`.
|
190
196
|
|
191
197
|
### Version-1.8.1-06042024
|
192
198
|
|
data/bin/tcs
CHANGED
@@ -299,11 +299,17 @@ begin
|
|
299
299
|
distinct_to_raw = (primer_id_count.size/primer_id_list.size.to_f).round(3)
|
300
300
|
summary_json[:distinct_to_raw] = distinct_to_raw
|
301
301
|
|
302
|
-
if
|
302
|
+
if primer_id_count.keys.size < 5
|
303
303
|
log.puts Time.now.to_s + "\t" + "Less than 5 Primer IDs detected. Region #{region} aborted."
|
304
304
|
next
|
305
305
|
end
|
306
306
|
|
307
|
+
if primer_id_dis.keys.size < 5
|
308
|
+
log.puts Time.now.to_s + "\t" + "Maximum frequency of Primer ID less than 5. Primer ID overloaded. Region #{region} aborted."
|
309
|
+
next
|
310
|
+
end
|
311
|
+
|
312
|
+
|
307
313
|
max_id = primer_id_dis.keys.sort[-5..-1].mean
|
308
314
|
consensus_cutoff = ViralSeq::TcsCore.calculate_cut_off(max_id,error_rate)
|
309
315
|
log.puts Time.now.to_s + "\t" + "Consensus cut-off is #{consensus_cutoff.to_s}"
|
data/bin/tcs_sdrm
CHANGED
@@ -21,7 +21,6 @@
|
|
21
21
|
# output data in a new dir as 'libs_dir_SDRM'
|
22
22
|
|
23
23
|
require 'viral_seq'
|
24
|
-
require 'json'
|
25
24
|
require 'csv'
|
26
25
|
require 'fileutils'
|
27
26
|
require 'prawn'
|
@@ -32,6 +31,8 @@ unless ARGV[0] && File.directory?(ARGV[0])
|
|
32
31
|
abort "No sequence data provided. `tcs_sdrm` pipeline aborted. "
|
33
32
|
end
|
34
33
|
|
34
|
+
dr_version = ARGV[1] ? ARGV[1] : "V1"
|
35
|
+
|
35
36
|
r_version = ViralSeq::R.check_R
|
36
37
|
ViralSeq::R.check_R_packages
|
37
38
|
|
@@ -49,20 +50,22 @@ log << { time: Time.now }
|
|
49
50
|
log << { viral_seq_version: ViralSeq::VERSION }
|
50
51
|
log << { tcs_version: ViralSeq::TCS_VERSION }
|
51
52
|
log << { R_version: r_version}
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
sdrm_list
|
53
|
+
log << { DR_version: dr_version}
|
54
|
+
|
55
|
+
version_config = ViralSeq::DrmVersion.config_version(dr_version)
|
56
|
+
|
57
|
+
sdrm_list = version_config.pull_drm_json
|
57
58
|
log << { sdrm_list: sdrm_list }
|
58
59
|
|
60
|
+
regions = version_config.seq_drm_correlation.keys
|
61
|
+
|
59
62
|
# input dir
|
60
63
|
indir = ARGV[0]
|
61
64
|
libs = Dir[indir + "/*"]
|
62
65
|
log << { processed_libs: libs }
|
63
66
|
|
64
67
|
#output dir
|
65
|
-
outdir = indir + "
|
68
|
+
outdir = indir + "_DRM_analysis"
|
66
69
|
Dir.mkdir(outdir) unless File.directory?(outdir)
|
67
70
|
|
68
71
|
libs.each do |lib|
|
@@ -85,7 +88,7 @@ libs.each do |lib|
|
|
85
88
|
|
86
89
|
point_mutation_file = File.join(out_lib_dir, (lib_name + "_substitution.csv"))
|
87
90
|
point_mutation_out = File.open(point_mutation_file, "w")
|
88
|
-
point_mutation_out.puts "
|
91
|
+
point_mutation_out.puts "DRM_Class,TCS,AA position,wild type,mutation," +
|
89
92
|
"number,frequency,95% CI low,95% CI high,fdr,notes"
|
90
93
|
|
91
94
|
linkage_file = File.join(out_lib_dir, (lib_name + "_linkage.csv"))
|
@@ -114,118 +117,65 @@ libs.each do |lib|
|
|
114
117
|
aa_report_list = []
|
115
118
|
summary_hash = {}
|
116
119
|
|
120
|
+
process_config = {}
|
121
|
+
|
117
122
|
sub_seq_files.each do |sub_seq|
|
118
123
|
seq_basename = File.basename(sub_seq)
|
119
|
-
|
120
|
-
next if seqs.size < 3
|
124
|
+
|
121
125
|
if seq_basename =~ /V1V3/i
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
summary_hash[:PR] = [
|
134
|
-
seqs.size.to_s,
|
135
|
-
a3g_seqs.size.to_s,
|
136
|
-
stop_codon_seqs.size.to_s,
|
137
|
-
filtered_seqs.size.to_s,
|
138
|
-
poisson_minority_cutoff.to_s
|
139
|
-
].join(',')
|
140
|
-
next if filtered_seqs.size < 3
|
141
|
-
filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
142
|
-
|
143
|
-
sdrm = filtered_seqs.sdrm_hiv_pr(poisson_minority_cutoff, fdr_hash)
|
144
|
-
point_mutation_list += sdrm[0]
|
145
|
-
linkage_list += sdrm[1]
|
146
|
-
aa_report_list += sdrm[2]
|
147
|
-
|
148
|
-
elsif seq_basename =~/IN/i
|
149
|
-
a3g_check = seqs.a3g
|
150
|
-
a3g_seqs = a3g_check[:a3g_seq]
|
151
|
-
a3g_filtered_seqs = a3g_check[:filtered_seq]
|
152
|
-
stop_codon_check = a3g_filtered_seqs.stop_codon(2)
|
153
|
-
stop_codon_seqs = stop_codon_check[:with_stop_codon]
|
154
|
-
filtered_seqs = stop_codon_check[:without_stop_codon]
|
155
|
-
poisson_minority_cutoff = filtered_seqs.pm
|
156
|
-
fdr_hash = filtered_seqs.fdr
|
157
|
-
summary_hash[:IN] = [
|
158
|
-
seqs.size.to_s,
|
159
|
-
a3g_seqs.size.to_s,
|
160
|
-
stop_codon_seqs.size.to_s,
|
161
|
-
filtered_seqs.size.to_s,
|
162
|
-
poisson_minority_cutoff.to_s
|
163
|
-
].join(',')
|
164
|
-
next if filtered_seqs.size < 3
|
165
|
-
filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
166
|
-
|
167
|
-
sdrm = filtered_seqs.sdrm_hiv_in(poisson_minority_cutoff, fdr_hash)
|
168
|
-
point_mutation_list += sdrm[0]
|
169
|
-
linkage_list += sdrm[1]
|
170
|
-
aa_report_list += sdrm[2]
|
171
|
-
|
172
|
-
elsif seq_basename =~/P17/i
|
173
|
-
a3g_check = seqs.a3g
|
174
|
-
a3g_seqs = a3g_check[:a3g_seq]
|
175
|
-
a3g_filtered_seqs = a3g_check[:filtered_seq]
|
176
|
-
stop_codon_check = a3g_filtered_seqs.stop_codon(2)
|
177
|
-
stop_codon_seqs = stop_codon_check[:with_stop_codon]
|
178
|
-
filtered_seqs = stop_codon_check[:without_stop_codon]
|
179
|
-
poisson_minority_cutoff = filtered_seqs.pm
|
180
|
-
fdr_hash = filtered_seqs.fdr
|
181
|
-
summary_hash[:P17] = [
|
182
|
-
seqs.size.to_s,
|
183
|
-
a3g_seqs.size.to_s,
|
184
|
-
stop_codon_seqs.size.to_s,
|
185
|
-
filtered_seqs.size.to_s,
|
186
|
-
poisson_minority_cutoff.to_s
|
187
|
-
].join(',')
|
188
|
-
next if filtered_seqs.size < 3
|
189
|
-
filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
190
|
-
|
191
|
-
elsif seq_basename =~/RT/i
|
192
|
-
rt_seq1 = {}
|
193
|
-
rt_seq2 = {}
|
194
|
-
seqs.dna_hash.each do |k,v|
|
195
|
-
rt_seq1[k] = v[0,267]
|
196
|
-
rt_seq2[k] = v[267..-1]
|
126
|
+
seqs = ViralSeq::SeqHash.fa(sub_seq)
|
127
|
+
if seqs.size > 3
|
128
|
+
summary_hash["V1V3"] = "#{seqs.size.to_s},NA,NA,NA,NA"
|
129
|
+
FileUtils.cp(sub_seq, filtered_seq_dir)
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
seq_basename.gsub!(/\_P17/i, "_CA")
|
134
|
+
regions.each do |region|
|
135
|
+
if seq_basename =~ /\_#{region}/i
|
136
|
+
process_config[region] = sub_seq
|
197
137
|
end
|
198
|
-
rt1 = ViralSeq::SeqHash.new(rt_seq1)
|
199
|
-
rt2 = ViralSeq::SeqHash.new(rt_seq2)
|
200
|
-
rt1_a3g = rt1.a3g
|
201
|
-
rt2_a3g = rt2.a3g
|
202
|
-
hypermut_seq_rt1 = rt1_a3g[:a3g_seq]
|
203
|
-
hypermut_seq_rt2 = rt2_a3g[:a3g_seq]
|
204
|
-
rt1_stop_codon = rt1.stop_codon(1)[:with_stop_codon]
|
205
|
-
rt2_stop_codon = rt2.stop_codon(2)[:with_stop_codon]
|
206
|
-
hypermut_seq_keys = (hypermut_seq_rt1.dna_hash.keys | hypermut_seq_rt2.dna_hash.keys)
|
207
|
-
stop_codon_seq_keys = (rt1_stop_codon.dna_hash.keys | rt2_stop_codon.dna_hash.keys)
|
208
|
-
reject_keys = (hypermut_seq_keys | stop_codon_seq_keys)
|
209
|
-
filtered_seqs = ViralSeq::SeqHash.new(seqs.dna_hash.reject {|k,v| reject_keys.include?(k) })
|
210
|
-
poisson_minority_cutoff = filtered_seqs.pm
|
211
|
-
fdr_hash = filtered_seqs.fdr
|
212
|
-
summary_hash[:RT] = [
|
213
|
-
seqs.size.to_s,
|
214
|
-
hypermut_seq_keys.size.to_s,
|
215
|
-
stop_codon_seq_keys.size.to_s,
|
216
|
-
filtered_seqs.size.to_s,
|
217
|
-
poisson_minority_cutoff.to_s
|
218
|
-
].join(',')
|
219
|
-
next if filtered_seqs.size < 3
|
220
|
-
filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
221
|
-
|
222
|
-
sdrm = filtered_seqs.sdrm_hiv_rt(poisson_minority_cutoff, fdr_hash)
|
223
|
-
point_mutation_list += sdrm[0]
|
224
|
-
linkage_list += sdrm[1]
|
225
|
-
aa_report_list += sdrm[2]
|
226
138
|
end
|
227
139
|
end
|
228
140
|
|
141
|
+
process_config.each do |region_name, path_to_file|
|
142
|
+
seq_basename = File.basename(path_to_file)
|
143
|
+
seq_basename.gsub!(/\_P17/i, "_CA")
|
144
|
+
region = version_config.query_region(region_name.to_s)
|
145
|
+
|
146
|
+
puts "prcessing region: " + region.region
|
147
|
+
|
148
|
+
sh = ViralSeq::SeqHash.fa(path_to_file)
|
149
|
+
|
150
|
+
if sh.size < 3
|
151
|
+
puts "Less than 3 TCS for #{region.region}. Skipped."
|
152
|
+
next
|
153
|
+
end
|
154
|
+
|
155
|
+
run_filtering = sh.filter_for_drm(region)
|
156
|
+
|
157
|
+
summary_hash[region.region] = [
|
158
|
+
sh.size,
|
159
|
+
run_filtering[:a3g_seq].size,
|
160
|
+
run_filtering[:stop_codon_seq].size,
|
161
|
+
run_filtering[:filtered_seq].size,
|
162
|
+
run_filtering[:filtered_seq].pm
|
163
|
+
].join(',')
|
164
|
+
|
165
|
+
if run_filtering[:filtered_seq].size < 1
|
166
|
+
puts "Warning: NO TCS passed the filter for #{region.region}. DRM analysis skipped."
|
167
|
+
next
|
168
|
+
end
|
169
|
+
|
170
|
+
run_filtering[:filtered_seq].write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
171
|
+
|
172
|
+
drm_results = run_filtering[:filtered_seq].drm(region)
|
173
|
+
point_mutation_list += drm_results[0]
|
174
|
+
linkage_list += drm_results[1]
|
175
|
+
aa_report_list += drm_results[2]
|
176
|
+
|
177
|
+
end
|
178
|
+
|
229
179
|
point_mutation_list.each do |record|
|
230
180
|
point_mutation_out.puts record.join(",")
|
231
181
|
end
|
@@ -262,12 +212,13 @@ libs.each do |lib|
|
|
262
212
|
pi_csv.each do |line|
|
263
213
|
line.chomp!
|
264
214
|
data = line.split(",")
|
265
|
-
tag = data[0].split("_")[-1].gsub(/\W/,"")
|
215
|
+
tag = data[0].split("_")[-1].gsub(/\W/,"")
|
266
216
|
summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
|
267
217
|
end
|
268
|
-
|
269
|
-
|
270
|
-
|
218
|
+
regions << "V1V3"
|
219
|
+
regions.each do |region|
|
220
|
+
next unless summary_hash[region]
|
221
|
+
seq_summary_out.puts region.to_s + "," + summary_hash[region]
|
271
222
|
end
|
272
223
|
File.unlink(out_r_csv)
|
273
224
|
end
|
@@ -286,13 +237,13 @@ libs.each do |lib|
|
|
286
237
|
tcs_RT = 0
|
287
238
|
tcs_IN = 0
|
288
239
|
tcs_V1V3 = 0
|
289
|
-
|
240
|
+
tcs_CA = 0
|
290
241
|
pi_RT = "NA"
|
291
242
|
pi_V1V3 = "NA"
|
292
|
-
|
243
|
+
pi_CA = "NA"
|
293
244
|
dist20_RT = "NA"
|
294
245
|
dist20_V1V3 = "NA"
|
295
|
-
|
246
|
+
dist20_CA = "NA"
|
296
247
|
summary_lines.each do |line|
|
297
248
|
data = line.chomp.split(",")
|
298
249
|
if data[0] == "PR"
|
@@ -307,10 +258,10 @@ libs.each do |lib|
|
|
307
258
|
tcs_V1V3 = data[1].to_i
|
308
259
|
pi_V1V3 = data[6].to_f
|
309
260
|
dist20_V1V3 = data[7].to_f
|
310
|
-
elsif data[0] == "
|
311
|
-
|
312
|
-
|
313
|
-
|
261
|
+
elsif data[0] == "CA"
|
262
|
+
tcs_CA = data[4].to_i
|
263
|
+
pi_CA = data[6].to_f
|
264
|
+
dist20_CA = data[7].to_f
|
314
265
|
end
|
315
266
|
end
|
316
267
|
|
@@ -334,6 +285,7 @@ libs.each do |lib|
|
|
334
285
|
sdrm_PR = ""
|
335
286
|
sdrm_RT = ""
|
336
287
|
sdrm_IN = ""
|
288
|
+
sdrm_CA = ""
|
337
289
|
sdrm_lines.each do |line|
|
338
290
|
data = line.chomp.split(",")
|
339
291
|
next if data[-1] == "*"
|
@@ -343,6 +295,8 @@ libs.each do |lib|
|
|
343
295
|
sdrm_RT += abstract_line(data)
|
344
296
|
elsif data[0] == "IN"
|
345
297
|
sdrm_IN += abstract_line(data)
|
298
|
+
elsif data[0] == "CA"
|
299
|
+
sdrm_CA += abstract_line(data)
|
346
300
|
end
|
347
301
|
end
|
348
302
|
|
@@ -352,13 +306,13 @@ libs.each do |lib|
|
|
352
306
|
tcs_RT: tcs_RT,
|
353
307
|
tcs_IN: tcs_IN,
|
354
308
|
tcs_V1V3: tcs_V1V3,
|
355
|
-
|
309
|
+
tcs_CA: tcs_CA,
|
356
310
|
pi_RT: pi_RT,
|
357
311
|
pi_V1V3: pi_V1V3,
|
358
|
-
|
312
|
+
pi_CA: pi_CA,
|
359
313
|
dist20_RT: dist20_RT,
|
360
314
|
dist20_V1V3: dist20_V1V3,
|
361
|
-
|
315
|
+
dist20_CA: dist20_CA,
|
362
316
|
recency: recency,
|
363
317
|
dpi: dpi,
|
364
318
|
dpi_lwr: dpi_lwr,
|
@@ -366,7 +320,8 @@ libs.each do |lib|
|
|
366
320
|
possible_dual_infection: possible_dual_infection,
|
367
321
|
sdrm_PR: sdrm_PR,
|
368
322
|
sdrm_RT: sdrm_RT,
|
369
|
-
sdrm_IN: sdrm_IN
|
323
|
+
sdrm_IN: sdrm_IN,
|
324
|
+
sdrm_CA: sdrm_CA
|
370
325
|
]
|
371
326
|
|
372
327
|
summary_json_out.puts JSON.pretty_generate(summary_json)
|
@@ -389,7 +344,7 @@ libs.each do |lib|
|
|
389
344
|
file: point_mutation_file,
|
390
345
|
newPDF: "",
|
391
346
|
table_width: [60,50,70,65,65,60,75,70,70,70,45],
|
392
|
-
extra_text: "* Mutation below
|
347
|
+
extra_text: "* Mutation below FDR significance cut-off for minority mutations"
|
393
348
|
},
|
394
349
|
{
|
395
350
|
name: "linkage",
|
@@ -397,7 +352,6 @@ libs.each do |lib|
|
|
397
352
|
file: linkage_file,
|
398
353
|
newPDF: "",
|
399
354
|
table_width: [55,50,250,60,80,80,80,45],
|
400
|
-
extra_text: "* Mutation below Poisson cut-off for minority mutations"
|
401
355
|
}
|
402
356
|
]
|
403
357
|
|
@@ -433,7 +387,7 @@ libs.each do |lib|
|
|
433
387
|
pdf << CombinePDF.load(out_r_pdf) if File.exist?(out_r_pdf)
|
434
388
|
|
435
389
|
pdf.number_pages location: [:bottom_right],
|
436
|
-
number_format: "Swanstrom\'s lab HIV SDRM Pipeline, version #{
|
390
|
+
number_format: "Swanstrom\'s lab HIV SDRM Pipeline, TCS version #{ViralSeq::TCS_VERSION}, DRM config version #{version_config.drm_version}, by S.Z. and M.U.C. Page %s",
|
437
391
|
font_size: 6,
|
438
392
|
opacity: 0.5
|
439
393
|
|
@@ -448,4 +402,6 @@ log_file = File.join(File.dirname(indir), "sdrm_log.json")
|
|
448
402
|
|
449
403
|
File.open(log_file, 'w') { |f| f.puts JSON.pretty_generate(log) }
|
450
404
|
|
405
|
+
puts "TCS SDRM pipeline done. Exit 0."
|
406
|
+
|
451
407
|
FileUtils.touch(File.join(outdir, ".done"))
|
@@ -0,0 +1,83 @@
|
|
1
|
+
|
2
|
+
module ViralSeq
|
3
|
+
|
4
|
+
# DRM configuration for each region
|
5
|
+
|
6
|
+
class DrmRegionConfig
|
7
|
+
|
8
|
+
# initialize DRM region configuration
|
9
|
+
# @param drm_version [String] version of the instance of DrmVersion
|
10
|
+
# @param region [String] name of the region
|
11
|
+
# @param drm_class [Array] classes of DRMs at this region
|
12
|
+
# @param drm_range [Hash] DRM range for each class of DRMs at this region
|
13
|
+
# @param drm_list [Hash] List of detailed DRM mutations for each DRM classes at this region
|
14
|
+
# @param seq_drm_corrlation [Hash] correlation of sequenced region and DRM class
|
15
|
+
# @param ref_info [Hash] information of the reference genome, including sequence coordinates on HXB2
|
16
|
+
def initialize(drm_version, region, drm_class, drm_range, drm_list, seq_coord, ref_info)
|
17
|
+
@drm_version = drm_version
|
18
|
+
@region = region
|
19
|
+
@drm_class = drm_class
|
20
|
+
@drm_range = drm_range
|
21
|
+
@drm_list = drm_list
|
22
|
+
@seq_coord = seq_coord
|
23
|
+
@ref_info = ref_info
|
24
|
+
end
|
25
|
+
|
26
|
+
attr_accessor :drm_version, :region, :drm_class, :drm_range, :drm_list, :seq_coord, :ref_info
|
27
|
+
|
28
|
+
# summarize the DRM information for the output as JSON for the specific version
|
29
|
+
# @return [Hash] json has for DRM inforation of each position
|
30
|
+
def drm_json
|
31
|
+
sdrm = self.drm_list
|
32
|
+
json_hash = {}
|
33
|
+
sdrm.each do |drm_class, drms|
|
34
|
+
json_hash[drm_class] = []
|
35
|
+
drms.each do |pos, muts|
|
36
|
+
mutation = {}
|
37
|
+
mutation[:position] = pos
|
38
|
+
mutation[:wildtypeCodon] = muts[0]
|
39
|
+
mutation[:mutationCodons] = muts[1]
|
40
|
+
json_hash[drm_class] << mutation
|
41
|
+
end
|
42
|
+
end
|
43
|
+
return json_hash
|
44
|
+
end
|
45
|
+
|
46
|
+
# calculate the length of R1 and R2 based on the sequence coordinates
|
47
|
+
# @return [Hash] {r1_length: [Integer], r2_length: [Integer]}
|
48
|
+
def r1_r2_length
|
49
|
+
seq_coord = self.seq_coord
|
50
|
+
return nil unless seq_coord["gap"]
|
51
|
+
|
52
|
+
r1_length = seq_coord["gap"]["minimum"] - seq_coord["minimum"]
|
53
|
+
r2_length = seq_coord["maximum"] - seq_coord["gap"]["maximum"]
|
54
|
+
|
55
|
+
return {r1_length: r1_length, r2_length: r2_length}
|
56
|
+
end #end of #r1_r2_length
|
57
|
+
|
58
|
+
|
59
|
+
# determine the reading frame number based on the sequence coordinates
|
60
|
+
# @return [Integer] reading frame of 0, 1 or 2
|
61
|
+
def get_reading_frame_number
|
62
|
+
m1 = (self.seq_coord["minimum"] - self.ref_info["ref_coord"][0]) % 3
|
63
|
+
if m1.zero?
|
64
|
+
n1 = 0
|
65
|
+
else
|
66
|
+
n1 = 3 - m1
|
67
|
+
end
|
68
|
+
|
69
|
+
if seq_coord["gap"]
|
70
|
+
m2 = (self.seq_coord["gap"]["maximum"] + 1 - self.ref_info["ref_coord"][0]) % 3
|
71
|
+
if m2.zero?
|
72
|
+
n2 = 0
|
73
|
+
else
|
74
|
+
n2 = 3 - m2
|
75
|
+
end
|
76
|
+
return [n1, n2]
|
77
|
+
else
|
78
|
+
return [n1]
|
79
|
+
end
|
80
|
+
end #end get_reading_frame_number
|
81
|
+
|
82
|
+
end
|
83
|
+
end
|
@@ -0,0 +1,120 @@
|
|
1
|
+
|
2
|
+
module ViralSeq
|
3
|
+
|
4
|
+
# DRM version configuration.
|
5
|
+
# Configuration files are located at `lib/viral_seq/drm_versions_config.json`
|
6
|
+
|
7
|
+
class DrmVersion
|
8
|
+
|
9
|
+
# initialize a ViralSeq::DrmVersion instance
|
10
|
+
# @param drm_version [String] version of the instance of DrmVersion
|
11
|
+
# @param drm_range [Hash] region/class of DRM and the range of amino acid positions included in this version.
|
12
|
+
# @param seq_coord [Hash] region and its amplicon positions on HXB2 reference
|
13
|
+
# @param seq_drm_corrlation [Hash] correlation of sequenced region and DRM class
|
14
|
+
# @param ref_info [Hash] information of the reference genome, including sequence coordinates on HXB2
|
15
|
+
def initialize(drm_version, drm_range, seq_coord, seq_drm_correlation, ref_info)
|
16
|
+
@drm_version = drm_version
|
17
|
+
@drm_range = drm_range
|
18
|
+
@seq_coord = seq_coord
|
19
|
+
@seq_drm_correlation = seq_drm_correlation
|
20
|
+
@ref_info = ref_info
|
21
|
+
end
|
22
|
+
|
23
|
+
attr_accessor :drm_version, :drm_range, :seq_coord, :seq_drm_correlation, :ref_info
|
24
|
+
|
25
|
+
# construct an instance of ViralSeq::DrmVersion
|
26
|
+
# @param version_config_hash [Hash] json hash of stored version configurations.
|
27
|
+
# @return [ViralSeq::DrmVersion] an instance of constructed DrmVersion
|
28
|
+
|
29
|
+
def self.construct(version_config_hash)
|
30
|
+
drm_version = version_config_hash["version"]
|
31
|
+
drm_range = version_config_hash["DRM_range"]
|
32
|
+
seq_coord = version_config_hash["seq_coord"]
|
33
|
+
seq_drm_correlation = version_config_hash["seq_drm_correlation"]
|
34
|
+
ref_info = version_config_hash["ref_info"]
|
35
|
+
ViralSeq::DrmVersion.new(drm_version, drm_range, seq_coord, seq_drm_correlation, ref_info)
|
36
|
+
end
|
37
|
+
|
38
|
+
# construct a specific version of ViralSeq::DrmVersion
|
39
|
+
# @param v [String] version string
|
40
|
+
# @return [ViralSeq::DrmVersion] an instance of constructed DrmVersion
|
41
|
+
|
42
|
+
def self.config_version(v="v1")
|
43
|
+
v = v.downcase
|
44
|
+
v = "v1" if v == "v2"
|
45
|
+
|
46
|
+
drm_config = JSON.parse(
|
47
|
+
File.read(
|
48
|
+
File.join( ViralSeq.root, 'viral_seq', 'util', 'drm_versions_config.json')
|
49
|
+
)
|
50
|
+
)
|
51
|
+
|
52
|
+
drm_versions = {}
|
53
|
+
|
54
|
+
drm_config.each do |config|
|
55
|
+
drm_versions[config["version"]] = ViralSeq::DrmVersion.construct(config)
|
56
|
+
end
|
57
|
+
|
58
|
+
if drm_versions[v]
|
59
|
+
drm_versions[v]
|
60
|
+
else
|
61
|
+
abort (
|
62
|
+
"Version '#{v}' config not found. Program aborted. \nCurrent supported versions '#{drm_versions.keys.sort.join(", ")}'\nCheck documentations for details".red
|
63
|
+
)
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
# construct a ViralSeq::DrmRegionConfig instance from a specific version
|
68
|
+
# @param region [String] name of the region
|
69
|
+
# @return [ViralSeq::DrmRegionConfig] an instance of DrmRegionConfig
|
70
|
+
|
71
|
+
def query_region(region)
|
72
|
+
region = region.to_s.upcase
|
73
|
+
drm_classes = self.seq_drm_correlation[region]
|
74
|
+
|
75
|
+
if drm_classes.nil?
|
76
|
+
abort "Region not recognized by the specific DRM config version. Program aborted."
|
77
|
+
end
|
78
|
+
|
79
|
+
drm_range = {}
|
80
|
+
drm_list = {}
|
81
|
+
|
82
|
+
drm_classes.each do |drm_class|
|
83
|
+
drm_range[drm_class] = self.drm_range[drm_class]
|
84
|
+
drm_list_single_class = ViralSeq::DRMs.sdrm_hash(drm_class)
|
85
|
+
|
86
|
+
drm_list[drm_class] = drm_list_single_class.select { |k, _v| drm_range[drm_class].include? k }
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
|
91
|
+
seq_coord = self.seq_coord[region]
|
92
|
+
|
93
|
+
ref_info = {}
|
94
|
+
ref_info["ref_type"] = self.ref_info["ref_type"]
|
95
|
+
ref_info["ref_coord"] = self.ref_info["ref_coord"][region]
|
96
|
+
|
97
|
+
|
98
|
+
ViralSeq::DrmRegionConfig.new(
|
99
|
+
self.drm_version, region, drm_classes, drm_range, drm_list, seq_coord, ref_info
|
100
|
+
)
|
101
|
+
end
|
102
|
+
|
103
|
+
# summarize the DRM information for the output as JSON
|
104
|
+
# @return [Hash] json has for DRM inforation of each position
|
105
|
+
|
106
|
+
def pull_drm_json
|
107
|
+
|
108
|
+
summary_json_hash = {}
|
109
|
+
|
110
|
+
self.seq_drm_correlation.keys.each do |region|
|
111
|
+
summary_json_hash = summary_json_hash.merge query_region(region).drm_json
|
112
|
+
end
|
113
|
+
|
114
|
+
summary_json_hash
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
end # end of class
|
119
|
+
|
120
|
+
end
|