viral_seq 1.1.2 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '093a8d1d70e251b0748b7035c829eb512991437ffa78fd67387318412e54acf5'
4
- data.tar.gz: 1b9d6f6b2cb2ffa8d9cc588b8df096e7ac3840c694bfb241fcf970b738899328
3
+ metadata.gz: dbcddd0679b712b28592875aa18e38269ccbac5b85113f53873d4cedc5572b04
4
+ data.tar.gz: 7268e596a2c40f7cdd2c815ccf5cdb40663c096c709aba6ee2e0dc4bc9a07542
5
5
  SHA512:
6
- metadata.gz: 3853dbfa3f6604d907ec3d77b8c86ec8d885fedcc854c40ca6822ec72e8b2cfe9413bc188aa722a14e4e4f6c9503eca1b36d7f8e0963a5a997c9f0ca8b54fc86
7
- data.tar.gz: e5b056cddcf7b87cc30e52c878879cea82d865ea7fc867535767918c30c699d58d6f426518aad02be49916c49f38d9603b0ab27ca6f3625f7a5102ae86863023
6
+ metadata.gz: a689ed94201b19ee258fb07f73dd89ed2c8fd297b9580ba720d85ef2a16c5a38fdfed326dbdcc987f0913b4c9ab2aa060683a770df48baa4b1d657d63de35152
7
+ data.tar.gz: 0b8065ae813f66b88fda3d7788c20718aa0db1a4f723d6831e948157b682a81fd1ae44a1d9043ebfb046df91c072bbc16db41ddd42e272d3f6c74a13fa473836
data/README.md CHANGED
@@ -109,7 +109,7 @@ qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
109
109
  Further filter out sequences with Apobec3g/f hypermutations
110
110
 
111
111
  ```ruby
112
- qc_seqhash = qc_seqhash.a3g
112
+ qc_seqhash = qc_seqhash.a3g[:filtered_seq]
113
113
  ```
114
114
 
115
115
  Calculate nucleotide diveristy π
@@ -137,11 +137,22 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
137
137
 
138
138
  ## Updates
139
139
 
140
+ ### Version 1.2.0-05102021
141
+
142
+ 1. Added `tcs_sdrm` pipeline as an excutable.
143
+ `tcs_sdrm` processes `tcs`-processed HIV MPID-NGS data for drug resistance mutations, recency and phylogentic analysis.
144
+
145
+ 2. Added function ViralSeq::SeqHash#sample.
146
+
147
+ 3. Added recency determining function `ViralSeq::Recency::define`
148
+
149
+ 4. Fixed a few bugs related to `tcs_sdrm`.
150
+
140
151
  ### Version 1.1.2-04262021
141
152
 
142
153
  1. Added function `ViralSeq::DRMs.sdrm_json` to export SDRM as json object.
143
154
  2. Added a random string to the temp file names for `muscle_bio` to avoid issues when running scripts in parallel.
144
- 3. Added `--keep-original` flag to the `tcs` pipeline.
155
+ 3. Added `--keep-original` flag to the `tcs` pipeline.
145
156
 
146
157
  ### Version 1.1.1-04012021
147
158
 
data/bin/tcs_sdrm ADDED
@@ -0,0 +1,402 @@
1
+ #!/usr/bin/env ruby
2
+ # tcs/sdrm pipeline for HIV-1 drug resistance mutation and recency
3
+ #
4
+ # command example:
5
+ # $ tcs_sdrm libs_dir
6
+ #
7
+ # lib_dir file structure:
8
+ # libs_dir
9
+ # ├── lib1
10
+ # ├── lib1_RT
11
+ # ├── lib1_PR
12
+ # ├── lib1_IN
13
+ # ├── lib1_V1V3
14
+ # ├── lib2
15
+ # ├── lib1_RT
16
+ # ├── lib1_PR
17
+ # ├── lib1_IN
18
+ # ├── lib1_V1V3
19
+ # ├── ...
20
+ #
21
+ # output data in a new dir as 'libs_dir_SDRM'
22
+
23
+ require 'viral_seq'
24
+ require 'json'
25
+ require 'csv'
26
+ require 'fileutils'
27
+ require 'prawn'
28
+ require 'prawn/table'
29
+ require 'combine_pdf'
30
+
31
+ unless ARGV[0] && File.directory?(ARGV[0])
32
+ abort "No sequence data provided. `tcs_sdrm` pipeline aborted. "
33
+ end
34
+
35
+ def abstract_line(data)
36
+ return_data = data[3] + data[2] + data[4] + ":" +
37
+ (data[6].to_f * 100).round(2).to_s + "(" +
38
+ (data[7].to_f * 100).round(2).to_s + "-" +
39
+ (data[8].to_f * 100).round(2).to_s + "); "
40
+ end
41
+
42
+ # run params
43
+ log = []
44
+
45
+ log << { time: Time.now }
46
+ log << { viral_seq_version: ViralSeq::VERSION }
47
+ log << { tcs_version: ViralSeq::TCS_VERSION }
48
+ r_version = `R --version`.split("\n")[0]
49
+ log << { R_version: r_version}
50
+ sdrm_list = {}
51
+ sdrm_list[:nrti] = ViralSeq::DRMs.sdrm_json(:nrti)
52
+ sdrm_list[:nnrti] = ViralSeq::DRMs.sdrm_json(:nnrti)
53
+ sdrm_list[:hiv_pr] = ViralSeq::DRMs.sdrm_json(:hiv_pr)
54
+ sdrm_list[:hiv_in] = ViralSeq::DRMs.sdrm_json(:hiv_in)
55
+ log << { sdrm_list: sdrm_list }
56
+
57
+ # input dir
58
+ indir = ARGV[0]
59
+ libs = Dir[indir + "/*"]
60
+ log << { processed_libs: libs }
61
+
62
+ #output dir
63
+ outdir = indir + "_SDRM"
64
+ Dir.mkdir(outdir) unless File.directory?(outdir)
65
+
66
+ libs.each do |lib|
67
+
68
+ r_script = ViralSeq::R_SCRIPT.dup
69
+
70
+ next unless File.directory?(lib)
71
+
72
+ lib_name = File.basename(lib)
73
+ out_lib_dir = File.join(outdir, lib_name)
74
+ Dir.mkdir(out_lib_dir) unless File.directory?(out_lib_dir)
75
+
76
+ sub_seq_files = Dir[lib + "/*"]
77
+
78
+ seq_summary_file = File.join(out_lib_dir, (lib_name + "_summary.csv"))
79
+ seq_summary_out = File.open(seq_summary_file, "w")
80
+ seq_summary_out.puts 'Region,TCS,TCS with A3G/F hypermutation,TCS with stop codon,' +
81
+ 'TCS w/o hypermutation and stop codon,' +
82
+ 'Poisson cutoff for minority mutation (>=),Pi,Dist20'
83
+
84
+ point_mutation_file = File.join(out_lib_dir, (lib_name + "_substitution.csv"))
85
+ point_mutation_out = File.open(point_mutation_file, "w")
86
+ point_mutation_out.puts "region,TCS,AA position,wild type,mutation," +
87
+ "number,percentage,95% CI low, 95% CI high, notes"
88
+
89
+ linkage_file = File.join(out_lib_dir, (lib_name + "_linkage.csv"))
90
+ linkage_out = File.open(linkage_file, "w")
91
+ linkage_out.puts "region,TCS,mutation linkage,number," +
92
+ "percentage,95% CI low, 95% CI high, notes"
93
+
94
+ aa_report_file = File.join(out_lib_dir, (lib_name + "_aa.csv"))
95
+ aa_report_out = File.open(aa_report_file, "w")
96
+ aa_report_out.puts "region,ref.aa.positions,TCS.number," +
97
+ ViralSeq::AMINO_ACID_LIST.join(",")
98
+
99
+ summary_json_file = File.join(out_lib_dir, (lib_name + "_summary.json"))
100
+ summary_json_out = File.open(summary_json_file,"w")
101
+
102
+ filtered_seq_dir = File.join(out_lib_dir, (lib_name + "_filtered_seq"))
103
+ Dir.mkdir(filtered_seq_dir) unless File.directory?(filtered_seq_dir)
104
+
105
+ aln_seq_dir = File.join(out_lib_dir, (lib_name + "_aln_seq"))
106
+ Dir.mkdir(aln_seq_dir) unless File.directory?(aln_seq_dir)
107
+
108
+ point_mutation_list = []
109
+ linkage_list = []
110
+ aa_report_list = []
111
+ summary_hash = {}
112
+
113
+ sub_seq_files.each do |sub_seq|
114
+ seq_basename = File.basename(sub_seq)
115
+ seqs = ViralSeq::SeqHash.fa(sub_seq)
116
+ next if seqs.size < 3
117
+ if seq_basename =~ /V1V3/i
118
+ summary_hash[:V1V3] = "#{seqs.size.to_s},NA,NA,NA,NA"
119
+ FileUtils.cp(sub_seq, filtered_seq_dir)
120
+ elsif seq_basename =~ /PR/i
121
+ a3g_check = seqs.a3g
122
+ a3g_seqs = a3g_check[:a3g_seq]
123
+ a3g_filtered_seqs = a3g_check[:filtered_seq]
124
+ stop_codon_check = a3g_filtered_seqs.stop_codon
125
+ stop_codon_seqs = stop_codon_check[:with_stop_codon]
126
+ filtered_seqs = stop_codon_check[:without_stop_codon]
127
+ poisson_minority_cutoff = filtered_seqs.pm
128
+ summary_hash[:PR] = [
129
+ seqs.size.to_s,
130
+ a3g_seqs.size.to_s,
131
+ stop_codon_seqs.size.to_s,
132
+ filtered_seqs.size.to_s,
133
+ poisson_minority_cutoff.to_s
134
+ ].join(',')
135
+ next if filtered_seqs.size < 3
136
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
137
+
138
+ sdrm = filtered_seqs.sdrm_hiv_pr(poisson_minority_cutoff)
139
+ point_mutation_list += sdrm[0]
140
+ linkage_list += sdrm[1]
141
+ aa_report_list += sdrm[2]
142
+
143
+ elsif seq_basename =~/IN/i
144
+ a3g_check = seqs.a3g
145
+ a3g_seqs = a3g_check[:a3g_seq]
146
+ a3g_filtered_seqs = a3g_check[:filtered_seq]
147
+ stop_codon_check = a3g_filtered_seqs.stop_codon(2)
148
+ stop_codon_seqs = stop_codon_check[:with_stop_codon]
149
+ filtered_seqs = stop_codon_check[:without_stop_codon]
150
+ poisson_minority_cutoff = filtered_seqs.pm
151
+ summary_hash[:IN] = [
152
+ seqs.size.to_s,
153
+ a3g_seqs.size.to_s,
154
+ stop_codon_seqs.size.to_s,
155
+ filtered_seqs.size.to_s,
156
+ poisson_minority_cutoff.to_s
157
+ ].join(',')
158
+ next if filtered_seqs.size < 3
159
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
160
+
161
+ sdrm = filtered_seqs.sdrm_hiv_in(poisson_minority_cutoff)
162
+ point_mutation_list += sdrm[0]
163
+ linkage_list += sdrm[1]
164
+ aa_report_list += sdrm[2]
165
+
166
+ elsif seq_basename =~/RT/i
167
+ rt_seq1 = {}
168
+ rt_seq2 = {}
169
+ seqs.dna_hash.each do |k,v|
170
+ rt_seq1[k] = v[0,267]
171
+ rt_seq2[k] = v[267..-1]
172
+ end
173
+ rt1 = ViralSeq::SeqHash.new(rt_seq1)
174
+ rt2 = ViralSeq::SeqHash.new(rt_seq2)
175
+ rt1_a3g = rt1.a3g
176
+ rt2_a3g = rt2.a3g
177
+ hypermut_seq_rt1 = rt1_a3g[:a3g_seq]
178
+ hypermut_seq_rt2 = rt2_a3g[:a3g_seq]
179
+ rt1_stop_codon = rt1.stop_codon(1)[:with_stop_codon]
180
+ rt2_stop_codon = rt2.stop_codon(2)[:with_stop_codon]
181
+ hypermut_seq_keys = (hypermut_seq_rt1.dna_hash.keys | hypermut_seq_rt2.dna_hash.keys)
182
+ stop_codon_seq_keys = (rt1_stop_codon.dna_hash.keys | rt2_stop_codon.dna_hash.keys)
183
+ reject_keys = (hypermut_seq_keys | stop_codon_seq_keys)
184
+ filtered_seqs = ViralSeq::SeqHash.new(seqs.dna_hash.reject {|k,v| reject_keys.include?(k) })
185
+ poisson_minority_cutoff = filtered_seqs.pm
186
+ summary_hash[:RT] = [
187
+ seqs.size.to_s,
188
+ hypermut_seq_keys.size.to_s,
189
+ stop_codon_seq_keys.size.to_s,
190
+ filtered_seqs.size.to_s,
191
+ poisson_minority_cutoff.to_s
192
+ ].join(',')
193
+ next if filtered_seqs.size < 3
194
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
195
+
196
+ sdrm = filtered_seqs.sdrm_hiv_rt(poisson_minority_cutoff)
197
+ point_mutation_list += sdrm[0]
198
+ linkage_list += sdrm[1]
199
+ aa_report_list += sdrm[2]
200
+ end
201
+ end
202
+
203
+ point_mutation_list.each do |record|
204
+ point_mutation_out.puts record.join(",")
205
+ end
206
+ linkage_list.each do |record|
207
+ linkage_out.puts record.join(",")
208
+ end
209
+ aa_report_list.each do |record|
210
+ aa_report_out.puts record.join(",")
211
+ end
212
+
213
+ filtered_seq_files = Dir[filtered_seq_dir + "/*"]
214
+
215
+ out_r_csv = File.join(out_lib_dir, (lib_name + "_pi.csv"))
216
+ out_r_pdf = File.join(out_lib_dir, (lib_name + "_pi.pdf"))
217
+
218
+ if filtered_seq_files.size > 0
219
+ filtered_seq_files.each do |seq_file|
220
+ filtered_sh = ViralSeq::SeqHash.fa(seq_file)
221
+ next if filtered_sh.size < 3
222
+ aligned_sh = filtered_sh.random_select(1000).align
223
+ aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
224
+ end
225
+
226
+ r_script.gsub!(/PATH_TO_FASTA/,aln_seq_dir)
227
+ File.unlink(out_r_csv) if File.exist?(out_r_csv)
228
+ File.unlink(out_r_pdf) if File.exist?(out_r_pdf)
229
+ r_script.gsub!(/OUTPUT_CSV/,out_r_csv)
230
+ r_script.gsub!(/OUTPUT_PDF/,out_r_pdf)
231
+ r_script_file = File.join(out_lib_dir, "/pi.R")
232
+ File.open(r_script_file,"w") {|line| line.puts r_script}
233
+ print `Rscript #{r_script_file} 1> /dev/null 2> /dev/null`
234
+ if File.exist?(out_r_csv)
235
+ pi_csv = File.readlines(out_r_csv)
236
+ pi_csv.each do |line|
237
+ line.chomp!
238
+ data = line.split(",")
239
+ tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
240
+ summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
241
+ end
242
+ [:PR, :RT, :IN, :V1V3].each do |regions|
243
+ next unless summary_hash[regions]
244
+ seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
245
+ end
246
+ File.unlink(out_r_csv)
247
+ end
248
+ File.unlink(r_script_file)
249
+ end
250
+
251
+ seq_summary_out.close
252
+ point_mutation_out.close
253
+ linkage_out.close
254
+ aa_report_out.close
255
+
256
+ summary_lines = File.readlines(seq_summary_file)
257
+ summary_lines.shift
258
+
259
+ tcs_PR = 0
260
+ tcs_RT = 0
261
+ tcs_IN = 0
262
+ tcs_V1V3 = 0
263
+ pi_RT = 0.0
264
+ pi_V1V3 = 0.0
265
+ dist20_RT = 0.0
266
+ dist20_V1V3 = 0.0
267
+ summary_lines.each do |line|
268
+ data = line.chomp.split(",")
269
+ if data[0] == "PR"
270
+ tcs_PR = data[4].to_i
271
+ elsif data[0] == "RT"
272
+ tcs_RT = data[4].to_i
273
+ pi_RT = data[6].to_f
274
+ dist20_RT = data[7].to_f
275
+ elsif data[0] == "IN"
276
+ tcs_IN = data[4].to_i
277
+ elsif data[0] == "V1V3"
278
+ tcs_V1V3 = data[1].to_i
279
+ pi_V1V3 = data[6].to_f
280
+ dist20_V1V3 = data[7].to_f
281
+ end
282
+ end
283
+
284
+ recency = ViralSeq::Recency.define(
285
+ tcs_RT: tcs_RT,
286
+ tcs_V1V3: tcs_V1V3,
287
+ pi_RT: pi_RT,
288
+ dist20_RT: dist20_RT,
289
+ pi_V1V3: pi_V1V3,
290
+ dist20_V1V3: dist20_V1V3
291
+ )
292
+
293
+ sdrm_lines = File.readlines(point_mutation_file)
294
+ sdrm_lines.shift
295
+ sdrm_PR = ""
296
+ sdrm_RT = ""
297
+ sdrm_IN = ""
298
+ sdrm_lines.each do |line|
299
+ data = line.chomp.split(",")
300
+ next if data[-1] == "*"
301
+ if data[0] == "PR"
302
+ sdrm_PR += abstract_line(data)
303
+ elsif data[0] =~ /NRTI/
304
+ sdrm_RT += abstract_line(data)
305
+ elsif data[0] == "IN"
306
+ sdrm_IN += abstract_line(data)
307
+ end
308
+ end
309
+
310
+ summary_json = [
311
+ sample_id: lib_name,
312
+ tcs_PR: tcs_PR,
313
+ tcs_RT: tcs_RT,
314
+ tcs_IN: tcs_IN,
315
+ tcs_V1V3: tcs_V1V3,
316
+ pi_RT: pi_RT,
317
+ dist20_RT: dist20_RT,
318
+ dist20_V1V3: dist20_V1V3,
319
+ recency: recency,
320
+ sdrm_PR: sdrm_PR,
321
+ sdrm_RT: sdrm_RT,
322
+ sdrm_IN: sdrm_IN
323
+ ]
324
+
325
+ summary_json_out.puts JSON.pretty_generate(summary_json)
326
+ summary_json_out.close
327
+
328
+ csvs = [
329
+ {
330
+ name: "summary",
331
+ title: "Summary",
332
+ file: seq_summary_file,
333
+ newPDF: "",
334
+ table_width: [65,55,110,110,110,110,60,60],
335
+ extra_text: ""
336
+ },
337
+ {
338
+ name: "substitution",
339
+ title: "Surveillance Drug Resistance Mutations",
340
+ file: point_mutation_file,
341
+ newPDF: "",
342
+ table_width: [65,55,85,80,60,65,85,85,85,45],
343
+ extra_text: "* Mutation below Poisson cut-off for minority mutations"
344
+ },
345
+ {
346
+ name: "linkage",
347
+ title: "Mutation Linkage",
348
+ file: linkage_file,
349
+ newPDF: "",
350
+ table_width: [55,50,250,60,80,80,80,45],
351
+ extra_text: "* Mutation below Poisson cut-off for minority mutations"
352
+ }
353
+ ]
354
+
355
+ csvs.each do |csv|
356
+ file_name = File.join(out_lib_dir, (csv[:name] + ".pdf"))
357
+ next unless File.exist? csv[:file]
358
+ Prawn::Document.generate(file_name, :page_layout => :landscape) do |pdf|
359
+ pdf.text((File.basename(lib, ".*") + ': ' + csv[:title]),
360
+ :size => 20,
361
+ :align => :center,
362
+ :style => :bold)
363
+ pdf.move_down 20
364
+ table_data = CSV.open(csv[:file]).to_a
365
+ header = table_data.first
366
+ pdf.table(table_data,
367
+ :header => header,
368
+ :position => :center,
369
+ :column_widths => csv[:table_width],
370
+ :row_colors => ["B6B6B6", "FFFFFF"],
371
+ :cell_style => {:align => :center, :size => 10}) do |table|
372
+ table.row(0).style :font_style => :bold, :size => 12 #, :background_color => 'ff00ff'
373
+ end
374
+ pdf.move_down 5
375
+ pdf.text(csv[:extra_text], :size => 8, :align => :justify,)
376
+ end
377
+ csv[:newPDF] = file_name
378
+ end
379
+
380
+ pdf = CombinePDF.new
381
+ csvs.each do |csv|
382
+ pdf << CombinePDF.load(csv[:newPDF]) if File.exist?(csv[:newPDF])
383
+ end
384
+ pdf << CombinePDF.load(out_r_pdf) if File.exist?(out_r_pdf)
385
+
386
+ pdf.number_pages location: [:bottom_right],
387
+ number_format: "Swanstrom\'s lab HIV SDRM Pipeline, version #{$sdrm_version_number} by S.Z. and M.U.C. Page %s",
388
+ font_size: 6,
389
+ opacity: 0.5
390
+
391
+ pdf.save File.join(out_lib_dir, (lib_name + ".pdf"))
392
+
393
+ csvs.each do |csv|
394
+ File.unlink csv[:newPDF]
395
+ end
396
+ end
397
+
398
+ log_file = indir + "_sdrm_log.json"
399
+
400
+ File.open(log_file, 'w') { |f| f.puts JSON.pretty_generate(log) }
401
+
402
+ FileUtils.touch(File.join(outdir, ".done"))
data/lib/viral_seq.rb CHANGED
@@ -39,6 +39,7 @@ require_relative "viral_seq/tcs_core"
39
39
  require_relative "viral_seq/tcs_json"
40
40
  require_relative "viral_seq/tcs_dr"
41
41
  require_relative "viral_seq/sdrm"
42
+ require_relative "viral_seq/recency"
42
43
 
43
44
  require "muscle_bio"
44
45
  require "json"
@@ -0,0 +1,52 @@
1
+ module ViralSeq
2
+
3
+ # recency prediction function based on HIV MPID-NGS
4
+ # @see https://pubmed.ncbi.nlm.nih.gov/32663847 Ref: Zhou et al. J Infect Dis. 2021
5
+
6
+ module Recency
7
+
8
+ # @params tcs_RT [Integer] number of TCS at the RT region
9
+ # @params tcs_V1V3 [Integer] number of TCS at the V1V3 region
10
+ # @params pi_RT [Float] pairwise diversity at the RT region
11
+ # @params pi_V1V3 [Float] pairwise diversity at the V1V3 region
12
+ # @params dist20_RT [Float] dist20 at the RT region
13
+ # @params dist20_V1V3 [Float] dist20 at the V1V3 region
14
+ # @return [String] determination of the recency
15
+
16
+ def self.define(tcs_RT: nil,
17
+ tcs_V1V3: nil,
18
+ pi_RT: nil,
19
+ dist20_RT: nil,
20
+ pi_V1V3: nil,
21
+ dist20_V1V3: nil)
22
+ tcs_RT ||= 0
23
+ tcs_V1V3 ||= 0
24
+ if (tcs_RT >= 3 && pi_RT) and (tcs_V1V3 >= 3 && pi_V1V3)
25
+ if (pi_RT + pi_V1V3) < 0.0103
26
+ recency = "recent"
27
+ elsif (pi_RT + pi_V1V3) >= 0.0103 and (dist20_RT + dist20_V1V3) >= 0.006
28
+ recency = "chronic"
29
+ else
30
+ recency = "indeterminant"
31
+ end
32
+ elsif (tcs_RT >= 3 && pi_RT) and tcs_V1V3 < 3
33
+ if pi_RT < 0.0021
34
+ recency = "recent"
35
+ elsif pi_RT >= 0.0021 and dist20_RT >= 0.001
36
+ recency = "chronic"
37
+ else
38
+ recency = "indeterminant"
39
+ end
40
+ elsif (tcs_V1V3 >= 3 && pi_V1V3)
41
+ if pi_V1V3 >= 0.0103 and dist20_V1V3 >= 0.006
42
+ recency = "chronic"
43
+ else
44
+ recency = "insufficient data"
45
+ end
46
+ else
47
+ recency = "insufficient data"
48
+ end
49
+ return recency
50
+ end
51
+ end
52
+ end
@@ -90,7 +90,7 @@ module ViralSeq
90
90
 
91
91
  # function to export SDRM positions as json object
92
92
  # @param (see #sdrm_hash)
93
- # @return [String] json String of SDRM positions
93
+ # @return [Array] json Array of SDRM positions
94
94
 
95
95
  def sdrm_json(options)
96
96
  sdrm = ViralSeq::DRMs.sdrm_hash(options)
@@ -102,7 +102,7 @@ module ViralSeq
102
102
  mutation[:mutationCodons] = muts[1]
103
103
  json_array << mutation
104
104
  end
105
- JSON.pretty_generate(json_array)
105
+ return json_array
106
106
  end
107
107
  end
108
108
  end
@@ -11,7 +11,7 @@ module ViralSeq
11
11
  # # filter nt sequences with the reference coordinates
12
12
  # filtered_seqhash = aligned_pr_seqhash.stop_codon[:without_stop_codon]
13
13
  # # return a new ViralSeq::SeqHash object without stop codons
14
- # filtered_seqhash = filtered_seqhash.a3g[1]
14
+ # filtered_seqhash = filtered_seqhash.a3g[:filtered_seq]
15
15
  # # further filter out sequences with A3G hypermutations
16
16
  # filtered_seqhash.pi
17
17
  # # return pairwise diveristy π
@@ -187,6 +187,25 @@ module ViralSeq
187
187
  return new_seqhash
188
188
  end
189
189
 
190
+ # sample a certain number of sequences from a SeqHash object
191
+ # @param n [Integer] number of sequences to sample
192
+ # @return [ViralSeq::SeqHash] sampled SeqHash
193
+
194
+ def sample(n = 1)
195
+ keys = self.dna_hash.keys
196
+ sampled_keys = keys.sample(n)
197
+ sampled_nt = {}
198
+ sampled_aa = {}
199
+ sampled_qc = {}
200
+ sampled_title = self.title + "_sampled_" + n.to_s
201
+ sampled_keys.each do |k|
202
+ sampled_nt[k] = self.dna_hash[k]
203
+ sampled_aa[k] = self.aa_hash[k]
204
+ sampled_qc[k] = self.qc_hash[k]
205
+ end
206
+ return ViralSeq::SeqHash.new(sampled_nt, sampled_aa, sampled_qc, sampled_title, self.file)
207
+ end
208
+
190
209
  # write the nt sequences to a FASTA format file
191
210
  # @param file [String] path to the FASTA output file
192
211
  # @return [NilClass]
@@ -582,8 +601,8 @@ module ViralSeq
582
601
  temp_dir=File.dirname($0)
583
602
  end
584
603
 
585
- temp_file = temp_dir + "/_temp_muscle_in"
586
- temp_aln = temp_dir + "/_temp_muscle_aln"
604
+ temp_file = File.join(temp_dir, "_temp_muscle_in")
605
+ temp_aln = File.join(temp_dir, "_temp_muscle_aln")
587
606
  File.open(temp_file, 'w'){|f| seq_hash.each {|k,v| f.puts k; f.puts v}}
588
607
  if path_to_muscle
589
608
  unless ViralSeq.check_muscle?(path_to_muscle)
@@ -808,7 +827,7 @@ module ViralSeq
808
827
  end # end of locator
809
828
  alias_method :loc, :sequence_locator
810
829
 
811
- # Remove squences with residual offspring Primer IDs.
830
+ # Remove sequences with residual offspring Primer IDs.
812
831
  # Compare PID with sequences which have identical sequences.
813
832
  # PIDs differ by 1 base will be recognized. If PID1 is x time (cutoff) greater than PID2, PID2 will be disgarded.
814
833
  # each sequence tag starting with ">" and the Primer ID sequence
@@ -1155,6 +1174,7 @@ module ViralSeq
1155
1174
  new_sh.aa_hash[k] = aa_hash[k]
1156
1175
  new_sh.qc_hash[k] = qc_hash[k]
1157
1176
  end
1177
+ new_sh.file = self.file
1158
1178
  new_sh.title = self.title + "_" + n.to_s
1159
1179
  return new_sh
1160
1180
  end
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.1.2"
5
+ VERSION = "1.2.0"
6
6
  TCS_VERSION = "2.3.1"
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-04-27 00:00:00.000000000 Z
12
+ date: 2021-05-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -151,6 +151,7 @@ executables:
151
151
  - locator
152
152
  - tcs
153
153
  - tcs_log
154
+ - tcs_sdrm
154
155
  extensions: []
155
156
  extra_rdoc_files: []
156
157
  files:
@@ -166,6 +167,7 @@ files:
166
167
  - bin/locator
167
168
  - bin/tcs
168
169
  - bin/tcs_log
170
+ - bin/tcs_sdrm
169
171
  - docs/assets/img/cover.jpg
170
172
  - docs/dr.json
171
173
  - docs/sample_miseq_data/hivdr_control/r1.fastq.gz
@@ -178,6 +180,7 @@ files:
178
180
  - lib/viral_seq/math.rb
179
181
  - lib/viral_seq/muscle.rb
180
182
  - lib/viral_seq/pid.rb
183
+ - lib/viral_seq/recency.rb
181
184
  - lib/viral_seq/ref_seq.rb
182
185
  - lib/viral_seq/rubystats.rb
183
186
  - lib/viral_seq/sdrm.rb