viral_seq 1.1.2 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +13 -2
- data/bin/tcs_sdrm +402 -0
- data/lib/viral_seq.rb +1 -0
- data/lib/viral_seq/recency.rb +52 -0
- data/lib/viral_seq/sdrm.rb +2 -2
- data/lib/viral_seq/seq_hash.rb +24 -4
- data/lib/viral_seq/version.rb +1 -1
- metadata +5 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: dbcddd0679b712b28592875aa18e38269ccbac5b85113f53873d4cedc5572b04
|
4
|
+
data.tar.gz: 7268e596a2c40f7cdd2c815ccf5cdb40663c096c709aba6ee2e0dc4bc9a07542
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: a689ed94201b19ee258fb07f73dd89ed2c8fd297b9580ba720d85ef2a16c5a38fdfed326dbdcc987f0913b4c9ab2aa060683a770df48baa4b1d657d63de35152
|
7
|
+
data.tar.gz: 0b8065ae813f66b88fda3d7788c20718aa0db1a4f723d6831e948157b682a81fd1ae44a1d9043ebfb046df91c072bbc16db41ddd42e272d3f6c74a13fa473836
|
data/README.md
CHANGED
@@ -109,7 +109,7 @@ qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
|
|
109
109
|
Further filter out sequences with Apobec3g/f hypermutations
|
110
110
|
|
111
111
|
```ruby
|
112
|
-
qc_seqhash = qc_seqhash.a3g
|
112
|
+
qc_seqhash = qc_seqhash.a3g[:filtered_seq]
|
113
113
|
```
|
114
114
|
|
115
115
|
Calculate nucleotide diveristy π
|
@@ -137,11 +137,22 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
|
|
137
137
|
|
138
138
|
## Updates
|
139
139
|
|
140
|
+
### Version 1.2.0-05102021
|
141
|
+
|
142
|
+
1. Added `tcs_sdrm` pipeline as an excutable.
|
143
|
+
`tcs_sdrm` processes `tcs`-processed HIV MPID-NGS data for drug resistance mutations, recency and phylogentic analysis.
|
144
|
+
|
145
|
+
2. Added function ViralSeq::SeqHash#sample.
|
146
|
+
|
147
|
+
3. Added recency determining function `ViralSeq::Recency::define`
|
148
|
+
|
149
|
+
4. Fixed a few bugs related to `tcs_sdrm`.
|
150
|
+
|
140
151
|
### Version 1.1.2-04262021
|
141
152
|
|
142
153
|
1. Added function `ViralSeq::DRMs.sdrm_json` to export SDRM as json object.
|
143
154
|
2. Added a random string to the temp file names for `muscle_bio` to avoid issues when running scripts in parallel.
|
144
|
-
3. Added `--keep-original` flag to the `tcs` pipeline.
|
155
|
+
3. Added `--keep-original` flag to the `tcs` pipeline.
|
145
156
|
|
146
157
|
### Version 1.1.1-04012021
|
147
158
|
|
data/bin/tcs_sdrm
ADDED
@@ -0,0 +1,402 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# tcs/sdrm pipeline for HIV-1 drug resistance mutation and recency
|
3
|
+
#
|
4
|
+
# command example:
|
5
|
+
# $ tcs_sdrm libs_dir
|
6
|
+
#
|
7
|
+
# lib_dir file structure:
|
8
|
+
# libs_dir
|
9
|
+
# ├── lib1
|
10
|
+
# ├── lib1_RT
|
11
|
+
# ├── lib1_PR
|
12
|
+
# ├── lib1_IN
|
13
|
+
# ├── lib1_V1V3
|
14
|
+
# ├── lib2
|
15
|
+
# ├── lib1_RT
|
16
|
+
# ├── lib1_PR
|
17
|
+
# ├── lib1_IN
|
18
|
+
# ├── lib1_V1V3
|
19
|
+
# ├── ...
|
20
|
+
#
|
21
|
+
# output data in a new dir as 'libs_dir_SDRM'
|
22
|
+
|
23
|
+
require 'viral_seq'
|
24
|
+
require 'json'
|
25
|
+
require 'csv'
|
26
|
+
require 'fileutils'
|
27
|
+
require 'prawn'
|
28
|
+
require 'prawn/table'
|
29
|
+
require 'combine_pdf'
|
30
|
+
|
31
|
+
unless ARGV[0] && File.directory?(ARGV[0])
|
32
|
+
abort "No sequence data provided. `tcs_sdrm` pipeline aborted. "
|
33
|
+
end
|
34
|
+
|
35
|
+
def abstract_line(data)
|
36
|
+
return_data = data[3] + data[2] + data[4] + ":" +
|
37
|
+
(data[6].to_f * 100).round(2).to_s + "(" +
|
38
|
+
(data[7].to_f * 100).round(2).to_s + "-" +
|
39
|
+
(data[8].to_f * 100).round(2).to_s + "); "
|
40
|
+
end
|
41
|
+
|
42
|
+
# run params
|
43
|
+
log = []
|
44
|
+
|
45
|
+
log << { time: Time.now }
|
46
|
+
log << { viral_seq_version: ViralSeq::VERSION }
|
47
|
+
log << { tcs_version: ViralSeq::TCS_VERSION }
|
48
|
+
r_version = `R --version`.split("\n")[0]
|
49
|
+
log << { R_version: r_version}
|
50
|
+
sdrm_list = {}
|
51
|
+
sdrm_list[:nrti] = ViralSeq::DRMs.sdrm_json(:nrti)
|
52
|
+
sdrm_list[:nnrti] = ViralSeq::DRMs.sdrm_json(:nnrti)
|
53
|
+
sdrm_list[:hiv_pr] = ViralSeq::DRMs.sdrm_json(:hiv_pr)
|
54
|
+
sdrm_list[:hiv_in] = ViralSeq::DRMs.sdrm_json(:hiv_in)
|
55
|
+
log << { sdrm_list: sdrm_list }
|
56
|
+
|
57
|
+
# input dir
|
58
|
+
indir = ARGV[0]
|
59
|
+
libs = Dir[indir + "/*"]
|
60
|
+
log << { processed_libs: libs }
|
61
|
+
|
62
|
+
#output dir
|
63
|
+
outdir = indir + "_SDRM"
|
64
|
+
Dir.mkdir(outdir) unless File.directory?(outdir)
|
65
|
+
|
66
|
+
libs.each do |lib|
|
67
|
+
|
68
|
+
r_script = ViralSeq::R_SCRIPT.dup
|
69
|
+
|
70
|
+
next unless File.directory?(lib)
|
71
|
+
|
72
|
+
lib_name = File.basename(lib)
|
73
|
+
out_lib_dir = File.join(outdir, lib_name)
|
74
|
+
Dir.mkdir(out_lib_dir) unless File.directory?(out_lib_dir)
|
75
|
+
|
76
|
+
sub_seq_files = Dir[lib + "/*"]
|
77
|
+
|
78
|
+
seq_summary_file = File.join(out_lib_dir, (lib_name + "_summary.csv"))
|
79
|
+
seq_summary_out = File.open(seq_summary_file, "w")
|
80
|
+
seq_summary_out.puts 'Region,TCS,TCS with A3G/F hypermutation,TCS with stop codon,' +
|
81
|
+
'TCS w/o hypermutation and stop codon,' +
|
82
|
+
'Poisson cutoff for minority mutation (>=),Pi,Dist20'
|
83
|
+
|
84
|
+
point_mutation_file = File.join(out_lib_dir, (lib_name + "_substitution.csv"))
|
85
|
+
point_mutation_out = File.open(point_mutation_file, "w")
|
86
|
+
point_mutation_out.puts "region,TCS,AA position,wild type,mutation," +
|
87
|
+
"number,percentage,95% CI low, 95% CI high, notes"
|
88
|
+
|
89
|
+
linkage_file = File.join(out_lib_dir, (lib_name + "_linkage.csv"))
|
90
|
+
linkage_out = File.open(linkage_file, "w")
|
91
|
+
linkage_out.puts "region,TCS,mutation linkage,number," +
|
92
|
+
"percentage,95% CI low, 95% CI high, notes"
|
93
|
+
|
94
|
+
aa_report_file = File.join(out_lib_dir, (lib_name + "_aa.csv"))
|
95
|
+
aa_report_out = File.open(aa_report_file, "w")
|
96
|
+
aa_report_out.puts "region,ref.aa.positions,TCS.number," +
|
97
|
+
ViralSeq::AMINO_ACID_LIST.join(",")
|
98
|
+
|
99
|
+
summary_json_file = File.join(out_lib_dir, (lib_name + "_summary.json"))
|
100
|
+
summary_json_out = File.open(summary_json_file,"w")
|
101
|
+
|
102
|
+
filtered_seq_dir = File.join(out_lib_dir, (lib_name + "_filtered_seq"))
|
103
|
+
Dir.mkdir(filtered_seq_dir) unless File.directory?(filtered_seq_dir)
|
104
|
+
|
105
|
+
aln_seq_dir = File.join(out_lib_dir, (lib_name + "_aln_seq"))
|
106
|
+
Dir.mkdir(aln_seq_dir) unless File.directory?(aln_seq_dir)
|
107
|
+
|
108
|
+
point_mutation_list = []
|
109
|
+
linkage_list = []
|
110
|
+
aa_report_list = []
|
111
|
+
summary_hash = {}
|
112
|
+
|
113
|
+
sub_seq_files.each do |sub_seq|
|
114
|
+
seq_basename = File.basename(sub_seq)
|
115
|
+
seqs = ViralSeq::SeqHash.fa(sub_seq)
|
116
|
+
next if seqs.size < 3
|
117
|
+
if seq_basename =~ /V1V3/i
|
118
|
+
summary_hash[:V1V3] = "#{seqs.size.to_s},NA,NA,NA,NA"
|
119
|
+
FileUtils.cp(sub_seq, filtered_seq_dir)
|
120
|
+
elsif seq_basename =~ /PR/i
|
121
|
+
a3g_check = seqs.a3g
|
122
|
+
a3g_seqs = a3g_check[:a3g_seq]
|
123
|
+
a3g_filtered_seqs = a3g_check[:filtered_seq]
|
124
|
+
stop_codon_check = a3g_filtered_seqs.stop_codon
|
125
|
+
stop_codon_seqs = stop_codon_check[:with_stop_codon]
|
126
|
+
filtered_seqs = stop_codon_check[:without_stop_codon]
|
127
|
+
poisson_minority_cutoff = filtered_seqs.pm
|
128
|
+
summary_hash[:PR] = [
|
129
|
+
seqs.size.to_s,
|
130
|
+
a3g_seqs.size.to_s,
|
131
|
+
stop_codon_seqs.size.to_s,
|
132
|
+
filtered_seqs.size.to_s,
|
133
|
+
poisson_minority_cutoff.to_s
|
134
|
+
].join(',')
|
135
|
+
next if filtered_seqs.size < 3
|
136
|
+
filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
137
|
+
|
138
|
+
sdrm = filtered_seqs.sdrm_hiv_pr(poisson_minority_cutoff)
|
139
|
+
point_mutation_list += sdrm[0]
|
140
|
+
linkage_list += sdrm[1]
|
141
|
+
aa_report_list += sdrm[2]
|
142
|
+
|
143
|
+
elsif seq_basename =~/IN/i
|
144
|
+
a3g_check = seqs.a3g
|
145
|
+
a3g_seqs = a3g_check[:a3g_seq]
|
146
|
+
a3g_filtered_seqs = a3g_check[:filtered_seq]
|
147
|
+
stop_codon_check = a3g_filtered_seqs.stop_codon(2)
|
148
|
+
stop_codon_seqs = stop_codon_check[:with_stop_codon]
|
149
|
+
filtered_seqs = stop_codon_check[:without_stop_codon]
|
150
|
+
poisson_minority_cutoff = filtered_seqs.pm
|
151
|
+
summary_hash[:IN] = [
|
152
|
+
seqs.size.to_s,
|
153
|
+
a3g_seqs.size.to_s,
|
154
|
+
stop_codon_seqs.size.to_s,
|
155
|
+
filtered_seqs.size.to_s,
|
156
|
+
poisson_minority_cutoff.to_s
|
157
|
+
].join(',')
|
158
|
+
next if filtered_seqs.size < 3
|
159
|
+
filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
160
|
+
|
161
|
+
sdrm = filtered_seqs.sdrm_hiv_in(poisson_minority_cutoff)
|
162
|
+
point_mutation_list += sdrm[0]
|
163
|
+
linkage_list += sdrm[1]
|
164
|
+
aa_report_list += sdrm[2]
|
165
|
+
|
166
|
+
elsif seq_basename =~/RT/i
|
167
|
+
rt_seq1 = {}
|
168
|
+
rt_seq2 = {}
|
169
|
+
seqs.dna_hash.each do |k,v|
|
170
|
+
rt_seq1[k] = v[0,267]
|
171
|
+
rt_seq2[k] = v[267..-1]
|
172
|
+
end
|
173
|
+
rt1 = ViralSeq::SeqHash.new(rt_seq1)
|
174
|
+
rt2 = ViralSeq::SeqHash.new(rt_seq2)
|
175
|
+
rt1_a3g = rt1.a3g
|
176
|
+
rt2_a3g = rt2.a3g
|
177
|
+
hypermut_seq_rt1 = rt1_a3g[:a3g_seq]
|
178
|
+
hypermut_seq_rt2 = rt2_a3g[:a3g_seq]
|
179
|
+
rt1_stop_codon = rt1.stop_codon(1)[:with_stop_codon]
|
180
|
+
rt2_stop_codon = rt2.stop_codon(2)[:with_stop_codon]
|
181
|
+
hypermut_seq_keys = (hypermut_seq_rt1.dna_hash.keys | hypermut_seq_rt2.dna_hash.keys)
|
182
|
+
stop_codon_seq_keys = (rt1_stop_codon.dna_hash.keys | rt2_stop_codon.dna_hash.keys)
|
183
|
+
reject_keys = (hypermut_seq_keys | stop_codon_seq_keys)
|
184
|
+
filtered_seqs = ViralSeq::SeqHash.new(seqs.dna_hash.reject {|k,v| reject_keys.include?(k) })
|
185
|
+
poisson_minority_cutoff = filtered_seqs.pm
|
186
|
+
summary_hash[:RT] = [
|
187
|
+
seqs.size.to_s,
|
188
|
+
hypermut_seq_keys.size.to_s,
|
189
|
+
stop_codon_seq_keys.size.to_s,
|
190
|
+
filtered_seqs.size.to_s,
|
191
|
+
poisson_minority_cutoff.to_s
|
192
|
+
].join(',')
|
193
|
+
next if filtered_seqs.size < 3
|
194
|
+
filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
|
195
|
+
|
196
|
+
sdrm = filtered_seqs.sdrm_hiv_rt(poisson_minority_cutoff)
|
197
|
+
point_mutation_list += sdrm[0]
|
198
|
+
linkage_list += sdrm[1]
|
199
|
+
aa_report_list += sdrm[2]
|
200
|
+
end
|
201
|
+
end
|
202
|
+
|
203
|
+
point_mutation_list.each do |record|
|
204
|
+
point_mutation_out.puts record.join(",")
|
205
|
+
end
|
206
|
+
linkage_list.each do |record|
|
207
|
+
linkage_out.puts record.join(",")
|
208
|
+
end
|
209
|
+
aa_report_list.each do |record|
|
210
|
+
aa_report_out.puts record.join(",")
|
211
|
+
end
|
212
|
+
|
213
|
+
filtered_seq_files = Dir[filtered_seq_dir + "/*"]
|
214
|
+
|
215
|
+
out_r_csv = File.join(out_lib_dir, (lib_name + "_pi.csv"))
|
216
|
+
out_r_pdf = File.join(out_lib_dir, (lib_name + "_pi.pdf"))
|
217
|
+
|
218
|
+
if filtered_seq_files.size > 0
|
219
|
+
filtered_seq_files.each do |seq_file|
|
220
|
+
filtered_sh = ViralSeq::SeqHash.fa(seq_file)
|
221
|
+
next if filtered_sh.size < 3
|
222
|
+
aligned_sh = filtered_sh.random_select(1000).align
|
223
|
+
aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
|
224
|
+
end
|
225
|
+
|
226
|
+
r_script.gsub!(/PATH_TO_FASTA/,aln_seq_dir)
|
227
|
+
File.unlink(out_r_csv) if File.exist?(out_r_csv)
|
228
|
+
File.unlink(out_r_pdf) if File.exist?(out_r_pdf)
|
229
|
+
r_script.gsub!(/OUTPUT_CSV/,out_r_csv)
|
230
|
+
r_script.gsub!(/OUTPUT_PDF/,out_r_pdf)
|
231
|
+
r_script_file = File.join(out_lib_dir, "/pi.R")
|
232
|
+
File.open(r_script_file,"w") {|line| line.puts r_script}
|
233
|
+
print `Rscript #{r_script_file} 1> /dev/null 2> /dev/null`
|
234
|
+
if File.exist?(out_r_csv)
|
235
|
+
pi_csv = File.readlines(out_r_csv)
|
236
|
+
pi_csv.each do |line|
|
237
|
+
line.chomp!
|
238
|
+
data = line.split(",")
|
239
|
+
tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
|
240
|
+
summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
|
241
|
+
end
|
242
|
+
[:PR, :RT, :IN, :V1V3].each do |regions|
|
243
|
+
next unless summary_hash[regions]
|
244
|
+
seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
|
245
|
+
end
|
246
|
+
File.unlink(out_r_csv)
|
247
|
+
end
|
248
|
+
File.unlink(r_script_file)
|
249
|
+
end
|
250
|
+
|
251
|
+
seq_summary_out.close
|
252
|
+
point_mutation_out.close
|
253
|
+
linkage_out.close
|
254
|
+
aa_report_out.close
|
255
|
+
|
256
|
+
summary_lines = File.readlines(seq_summary_file)
|
257
|
+
summary_lines.shift
|
258
|
+
|
259
|
+
tcs_PR = 0
|
260
|
+
tcs_RT = 0
|
261
|
+
tcs_IN = 0
|
262
|
+
tcs_V1V3 = 0
|
263
|
+
pi_RT = 0.0
|
264
|
+
pi_V1V3 = 0.0
|
265
|
+
dist20_RT = 0.0
|
266
|
+
dist20_V1V3 = 0.0
|
267
|
+
summary_lines.each do |line|
|
268
|
+
data = line.chomp.split(",")
|
269
|
+
if data[0] == "PR"
|
270
|
+
tcs_PR = data[4].to_i
|
271
|
+
elsif data[0] == "RT"
|
272
|
+
tcs_RT = data[4].to_i
|
273
|
+
pi_RT = data[6].to_f
|
274
|
+
dist20_RT = data[7].to_f
|
275
|
+
elsif data[0] == "IN"
|
276
|
+
tcs_IN = data[4].to_i
|
277
|
+
elsif data[0] == "V1V3"
|
278
|
+
tcs_V1V3 = data[1].to_i
|
279
|
+
pi_V1V3 = data[6].to_f
|
280
|
+
dist20_V1V3 = data[7].to_f
|
281
|
+
end
|
282
|
+
end
|
283
|
+
|
284
|
+
recency = ViralSeq::Recency.define(
|
285
|
+
tcs_RT: tcs_RT,
|
286
|
+
tcs_V1V3: tcs_V1V3,
|
287
|
+
pi_RT: pi_RT,
|
288
|
+
dist20_RT: dist20_RT,
|
289
|
+
pi_V1V3: pi_V1V3,
|
290
|
+
dist20_V1V3: dist20_V1V3
|
291
|
+
)
|
292
|
+
|
293
|
+
sdrm_lines = File.readlines(point_mutation_file)
|
294
|
+
sdrm_lines.shift
|
295
|
+
sdrm_PR = ""
|
296
|
+
sdrm_RT = ""
|
297
|
+
sdrm_IN = ""
|
298
|
+
sdrm_lines.each do |line|
|
299
|
+
data = line.chomp.split(",")
|
300
|
+
next if data[-1] == "*"
|
301
|
+
if data[0] == "PR"
|
302
|
+
sdrm_PR += abstract_line(data)
|
303
|
+
elsif data[0] =~ /NRTI/
|
304
|
+
sdrm_RT += abstract_line(data)
|
305
|
+
elsif data[0] == "IN"
|
306
|
+
sdrm_IN += abstract_line(data)
|
307
|
+
end
|
308
|
+
end
|
309
|
+
|
310
|
+
summary_json = [
|
311
|
+
sample_id: lib_name,
|
312
|
+
tcs_PR: tcs_PR,
|
313
|
+
tcs_RT: tcs_RT,
|
314
|
+
tcs_IN: tcs_IN,
|
315
|
+
tcs_V1V3: tcs_V1V3,
|
316
|
+
pi_RT: pi_RT,
|
317
|
+
dist20_RT: dist20_RT,
|
318
|
+
dist20_V1V3: dist20_V1V3,
|
319
|
+
recency: recency,
|
320
|
+
sdrm_PR: sdrm_PR,
|
321
|
+
sdrm_RT: sdrm_RT,
|
322
|
+
sdrm_IN: sdrm_IN
|
323
|
+
]
|
324
|
+
|
325
|
+
summary_json_out.puts JSON.pretty_generate(summary_json)
|
326
|
+
summary_json_out.close
|
327
|
+
|
328
|
+
csvs = [
|
329
|
+
{
|
330
|
+
name: "summary",
|
331
|
+
title: "Summary",
|
332
|
+
file: seq_summary_file,
|
333
|
+
newPDF: "",
|
334
|
+
table_width: [65,55,110,110,110,110,60,60],
|
335
|
+
extra_text: ""
|
336
|
+
},
|
337
|
+
{
|
338
|
+
name: "substitution",
|
339
|
+
title: "Surveillance Drug Resistance Mutations",
|
340
|
+
file: point_mutation_file,
|
341
|
+
newPDF: "",
|
342
|
+
table_width: [65,55,85,80,60,65,85,85,85,45],
|
343
|
+
extra_text: "* Mutation below Poisson cut-off for minority mutations"
|
344
|
+
},
|
345
|
+
{
|
346
|
+
name: "linkage",
|
347
|
+
title: "Mutation Linkage",
|
348
|
+
file: linkage_file,
|
349
|
+
newPDF: "",
|
350
|
+
table_width: [55,50,250,60,80,80,80,45],
|
351
|
+
extra_text: "* Mutation below Poisson cut-off for minority mutations"
|
352
|
+
}
|
353
|
+
]
|
354
|
+
|
355
|
+
csvs.each do |csv|
|
356
|
+
file_name = File.join(out_lib_dir, (csv[:name] + ".pdf"))
|
357
|
+
next unless File.exist? csv[:file]
|
358
|
+
Prawn::Document.generate(file_name, :page_layout => :landscape) do |pdf|
|
359
|
+
pdf.text((File.basename(lib, ".*") + ': ' + csv[:title]),
|
360
|
+
:size => 20,
|
361
|
+
:align => :center,
|
362
|
+
:style => :bold)
|
363
|
+
pdf.move_down 20
|
364
|
+
table_data = CSV.open(csv[:file]).to_a
|
365
|
+
header = table_data.first
|
366
|
+
pdf.table(table_data,
|
367
|
+
:header => header,
|
368
|
+
:position => :center,
|
369
|
+
:column_widths => csv[:table_width],
|
370
|
+
:row_colors => ["B6B6B6", "FFFFFF"],
|
371
|
+
:cell_style => {:align => :center, :size => 10}) do |table|
|
372
|
+
table.row(0).style :font_style => :bold, :size => 12 #, :background_color => 'ff00ff'
|
373
|
+
end
|
374
|
+
pdf.move_down 5
|
375
|
+
pdf.text(csv[:extra_text], :size => 8, :align => :justify,)
|
376
|
+
end
|
377
|
+
csv[:newPDF] = file_name
|
378
|
+
end
|
379
|
+
|
380
|
+
pdf = CombinePDF.new
|
381
|
+
csvs.each do |csv|
|
382
|
+
pdf << CombinePDF.load(csv[:newPDF]) if File.exist?(csv[:newPDF])
|
383
|
+
end
|
384
|
+
pdf << CombinePDF.load(out_r_pdf) if File.exist?(out_r_pdf)
|
385
|
+
|
386
|
+
pdf.number_pages location: [:bottom_right],
|
387
|
+
number_format: "Swanstrom\'s lab HIV SDRM Pipeline, version #{$sdrm_version_number} by S.Z. and M.U.C. Page %s",
|
388
|
+
font_size: 6,
|
389
|
+
opacity: 0.5
|
390
|
+
|
391
|
+
pdf.save File.join(out_lib_dir, (lib_name + ".pdf"))
|
392
|
+
|
393
|
+
csvs.each do |csv|
|
394
|
+
File.unlink csv[:newPDF]
|
395
|
+
end
|
396
|
+
end
|
397
|
+
|
398
|
+
log_file = indir + "_sdrm_log.json"
|
399
|
+
|
400
|
+
File.open(log_file, 'w') { |f| f.puts JSON.pretty_generate(log) }
|
401
|
+
|
402
|
+
FileUtils.touch(File.join(outdir, ".done"))
|
data/lib/viral_seq.rb
CHANGED
@@ -0,0 +1,52 @@
|
|
1
|
+
module ViralSeq
|
2
|
+
|
3
|
+
# recency prediction function based on HIV MPID-NGS
|
4
|
+
# @see https://pubmed.ncbi.nlm.nih.gov/32663847 Ref: Zhou et al. J Infect Dis. 2021
|
5
|
+
|
6
|
+
module Recency
|
7
|
+
|
8
|
+
# @params tcs_RT [Integer] number of TCS at the RT region
|
9
|
+
# @params tcs_V1V3 [Integer] number of TCS at the V1V3 region
|
10
|
+
# @params pi_RT [Float] pairwise diversity at the RT region
|
11
|
+
# @params pi_V1V3 [Float] pairwise diversity at the V1V3 region
|
12
|
+
# @params dist20_RT [Float] dist20 at the RT region
|
13
|
+
# @params dist20_V1V3 [Float] dist20 at the V1V3 region
|
14
|
+
# @return [String] determination of the recency
|
15
|
+
|
16
|
+
def self.define(tcs_RT: nil,
|
17
|
+
tcs_V1V3: nil,
|
18
|
+
pi_RT: nil,
|
19
|
+
dist20_RT: nil,
|
20
|
+
pi_V1V3: nil,
|
21
|
+
dist20_V1V3: nil)
|
22
|
+
tcs_RT ||= 0
|
23
|
+
tcs_V1V3 ||= 0
|
24
|
+
if (tcs_RT >= 3 && pi_RT) and (tcs_V1V3 >= 3 && pi_V1V3)
|
25
|
+
if (pi_RT + pi_V1V3) < 0.0103
|
26
|
+
recency = "recent"
|
27
|
+
elsif (pi_RT + pi_V1V3) >= 0.0103 and (dist20_RT + dist20_V1V3) >= 0.006
|
28
|
+
recency = "chronic"
|
29
|
+
else
|
30
|
+
recency = "indeterminant"
|
31
|
+
end
|
32
|
+
elsif (tcs_RT >= 3 && pi_RT) and tcs_V1V3 < 3
|
33
|
+
if pi_RT < 0.0021
|
34
|
+
recency = "recent"
|
35
|
+
elsif pi_RT >= 0.0021 and dist20_RT >= 0.001
|
36
|
+
recency = "chronic"
|
37
|
+
else
|
38
|
+
recency = "indeterminant"
|
39
|
+
end
|
40
|
+
elsif (tcs_V1V3 >= 3 && pi_V1V3)
|
41
|
+
if pi_V1V3 >= 0.0103 and dist20_V1V3 >= 0.006
|
42
|
+
recency = "chronic"
|
43
|
+
else
|
44
|
+
recency = "insufficient data"
|
45
|
+
end
|
46
|
+
else
|
47
|
+
recency = "insufficient data"
|
48
|
+
end
|
49
|
+
return recency
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
data/lib/viral_seq/sdrm.rb
CHANGED
@@ -90,7 +90,7 @@ module ViralSeq
|
|
90
90
|
|
91
91
|
# function to export SDRM positions as json object
|
92
92
|
# @param (see #sdrm_hash)
|
93
|
-
# @return [
|
93
|
+
# @return [Array] json Array of SDRM positions
|
94
94
|
|
95
95
|
def sdrm_json(options)
|
96
96
|
sdrm = ViralSeq::DRMs.sdrm_hash(options)
|
@@ -102,7 +102,7 @@ module ViralSeq
|
|
102
102
|
mutation[:mutationCodons] = muts[1]
|
103
103
|
json_array << mutation
|
104
104
|
end
|
105
|
-
|
105
|
+
return json_array
|
106
106
|
end
|
107
107
|
end
|
108
108
|
end
|
data/lib/viral_seq/seq_hash.rb
CHANGED
@@ -11,7 +11,7 @@ module ViralSeq
|
|
11
11
|
# # filter nt sequences with the reference coordinates
|
12
12
|
# filtered_seqhash = aligned_pr_seqhash.stop_codon[:without_stop_codon]
|
13
13
|
# # return a new ViralSeq::SeqHash object without stop codons
|
14
|
-
# filtered_seqhash = filtered_seqhash.a3g[
|
14
|
+
# filtered_seqhash = filtered_seqhash.a3g[:filtered_seq]
|
15
15
|
# # further filter out sequences with A3G hypermutations
|
16
16
|
# filtered_seqhash.pi
|
17
17
|
# # return pairwise diveristy π
|
@@ -187,6 +187,25 @@ module ViralSeq
|
|
187
187
|
return new_seqhash
|
188
188
|
end
|
189
189
|
|
190
|
+
# sample a certain number of sequences from a SeqHash object
|
191
|
+
# @param n [Integer] number of sequences to sample
|
192
|
+
# @return [ViralSeq::SeqHash] sampled SeqHash
|
193
|
+
|
194
|
+
def sample(n = 1)
|
195
|
+
keys = self.dna_hash.keys
|
196
|
+
sampled_keys = keys.sample(n)
|
197
|
+
sampled_nt = {}
|
198
|
+
sampled_aa = {}
|
199
|
+
sampled_qc = {}
|
200
|
+
sampled_title = self.title + "_sampled_" + n.to_s
|
201
|
+
sampled_keys.each do |k|
|
202
|
+
sampled_nt[k] = self.dna_hash[k]
|
203
|
+
sampled_aa[k] = self.aa_hash[k]
|
204
|
+
sampled_qc[k] = self.qc_hash[k]
|
205
|
+
end
|
206
|
+
return ViralSeq::SeqHash.new(sampled_nt, sampled_aa, sampled_qc, sampled_title, self.file)
|
207
|
+
end
|
208
|
+
|
190
209
|
# write the nt sequences to a FASTA format file
|
191
210
|
# @param file [String] path to the FASTA output file
|
192
211
|
# @return [NilClass]
|
@@ -582,8 +601,8 @@ module ViralSeq
|
|
582
601
|
temp_dir=File.dirname($0)
|
583
602
|
end
|
584
603
|
|
585
|
-
temp_file = temp_dir
|
586
|
-
temp_aln = temp_dir
|
604
|
+
temp_file = File.join(temp_dir, "_temp_muscle_in")
|
605
|
+
temp_aln = File.join(temp_dir, "_temp_muscle_aln")
|
587
606
|
File.open(temp_file, 'w'){|f| seq_hash.each {|k,v| f.puts k; f.puts v}}
|
588
607
|
if path_to_muscle
|
589
608
|
unless ViralSeq.check_muscle?(path_to_muscle)
|
@@ -808,7 +827,7 @@ module ViralSeq
|
|
808
827
|
end # end of locator
|
809
828
|
alias_method :loc, :sequence_locator
|
810
829
|
|
811
|
-
# Remove
|
830
|
+
# Remove sequences with residual offspring Primer IDs.
|
812
831
|
# Compare PID with sequences which have identical sequences.
|
813
832
|
# PIDs differ by 1 base will be recognized. If PID1 is x time (cutoff) greater than PID2, PID2 will be disgarded.
|
814
833
|
# each sequence tag starting with ">" and the Primer ID sequence
|
@@ -1155,6 +1174,7 @@ module ViralSeq
|
|
1155
1174
|
new_sh.aa_hash[k] = aa_hash[k]
|
1156
1175
|
new_sh.qc_hash[k] = qc_hash[k]
|
1157
1176
|
end
|
1177
|
+
new_sh.file = self.file
|
1158
1178
|
new_sh.title = self.title + "_" + n.to_s
|
1159
1179
|
return new_sh
|
1160
1180
|
end
|
data/lib/viral_seq/version.rb
CHANGED
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: viral_seq
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.2.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Shuntai Zhou
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2021-
|
12
|
+
date: 2021-05-11 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -151,6 +151,7 @@ executables:
|
|
151
151
|
- locator
|
152
152
|
- tcs
|
153
153
|
- tcs_log
|
154
|
+
- tcs_sdrm
|
154
155
|
extensions: []
|
155
156
|
extra_rdoc_files: []
|
156
157
|
files:
|
@@ -166,6 +167,7 @@ files:
|
|
166
167
|
- bin/locator
|
167
168
|
- bin/tcs
|
168
169
|
- bin/tcs_log
|
170
|
+
- bin/tcs_sdrm
|
169
171
|
- docs/assets/img/cover.jpg
|
170
172
|
- docs/dr.json
|
171
173
|
- docs/sample_miseq_data/hivdr_control/r1.fastq.gz
|
@@ -178,6 +180,7 @@ files:
|
|
178
180
|
- lib/viral_seq/math.rb
|
179
181
|
- lib/viral_seq/muscle.rb
|
180
182
|
- lib/viral_seq/pid.rb
|
183
|
+
- lib/viral_seq/recency.rb
|
181
184
|
- lib/viral_seq/ref_seq.rb
|
182
185
|
- lib/viral_seq/rubystats.rb
|
183
186
|
- lib/viral_seq/sdrm.rb
|