viral_seq 1.1.2 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: '093a8d1d70e251b0748b7035c829eb512991437ffa78fd67387318412e54acf5'
4
- data.tar.gz: 1b9d6f6b2cb2ffa8d9cc588b8df096e7ac3840c694bfb241fcf970b738899328
3
+ metadata.gz: dbcddd0679b712b28592875aa18e38269ccbac5b85113f53873d4cedc5572b04
4
+ data.tar.gz: 7268e596a2c40f7cdd2c815ccf5cdb40663c096c709aba6ee2e0dc4bc9a07542
5
5
  SHA512:
6
- metadata.gz: 3853dbfa3f6604d907ec3d77b8c86ec8d885fedcc854c40ca6822ec72e8b2cfe9413bc188aa722a14e4e4f6c9503eca1b36d7f8e0963a5a997c9f0ca8b54fc86
7
- data.tar.gz: e5b056cddcf7b87cc30e52c878879cea82d865ea7fc867535767918c30c699d58d6f426518aad02be49916c49f38d9603b0ab27ca6f3625f7a5102ae86863023
6
+ metadata.gz: a689ed94201b19ee258fb07f73dd89ed2c8fd297b9580ba720d85ef2a16c5a38fdfed326dbdcc987f0913b4c9ab2aa060683a770df48baa4b1d657d63de35152
7
+ data.tar.gz: 0b8065ae813f66b88fda3d7788c20718aa0db1a4f723d6831e948157b682a81fd1ae44a1d9043ebfb046df91c072bbc16db41ddd42e272d3f6c74a13fa473836
data/README.md CHANGED
@@ -109,7 +109,7 @@ qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
109
109
  Further filter out sequences with Apobec3g/f hypermutations
110
110
 
111
111
  ```ruby
112
- qc_seqhash = qc_seqhash.a3g
112
+ qc_seqhash = qc_seqhash.a3g[:filtered_seq]
113
113
  ```
114
114
 
115
115
  Calculate nucleotide diveristy π
@@ -137,11 +137,22 @@ qc_seqhash.sdrm_hiv_pr(cut_off)
137
137
 
138
138
  ## Updates
139
139
 
140
+ ### Version 1.2.0-05102021
141
+
142
+ 1. Added `tcs_sdrm` pipeline as an excutable.
143
+ `tcs_sdrm` processes `tcs`-processed HIV MPID-NGS data for drug resistance mutations, recency and phylogentic analysis.
144
+
145
+ 2. Added function ViralSeq::SeqHash#sample.
146
+
147
+ 3. Added recency determining function `ViralSeq::Recency::define`
148
+
149
+ 4. Fixed a few bugs related to `tcs_sdrm`.
150
+
140
151
  ### Version 1.1.2-04262021
141
152
 
142
153
  1. Added function `ViralSeq::DRMs.sdrm_json` to export SDRM as json object.
143
154
  2. Added a random string to the temp file names for `muscle_bio` to avoid issues when running scripts in parallel.
144
- 3. Added `--keep-original` flag to the `tcs` pipeline.
155
+ 3. Added `--keep-original` flag to the `tcs` pipeline.
145
156
 
146
157
  ### Version 1.1.1-04012021
147
158
 
data/bin/tcs_sdrm ADDED
@@ -0,0 +1,402 @@
1
+ #!/usr/bin/env ruby
2
+ # tcs/sdrm pipeline for HIV-1 drug resistance mutation and recency
3
+ #
4
+ # command example:
5
+ # $ tcs_sdrm libs_dir
6
+ #
7
+ # lib_dir file structure:
8
+ # libs_dir
9
+ # ├── lib1
10
+ # ├── lib1_RT
11
+ # ├── lib1_PR
12
+ # ├── lib1_IN
13
+ # ├── lib1_V1V3
14
+ # ├── lib2
15
+ # ├── lib1_RT
16
+ # ├── lib1_PR
17
+ # ├── lib1_IN
18
+ # ├── lib1_V1V3
19
+ # ├── ...
20
+ #
21
+ # output data in a new dir as 'libs_dir_SDRM'
22
+
23
+ require 'viral_seq'
24
+ require 'json'
25
+ require 'csv'
26
+ require 'fileutils'
27
+ require 'prawn'
28
+ require 'prawn/table'
29
+ require 'combine_pdf'
30
+
31
+ unless ARGV[0] && File.directory?(ARGV[0])
32
+ abort "No sequence data provided. `tcs_sdrm` pipeline aborted. "
33
+ end
34
+
35
+ def abstract_line(data)
36
+ return_data = data[3] + data[2] + data[4] + ":" +
37
+ (data[6].to_f * 100).round(2).to_s + "(" +
38
+ (data[7].to_f * 100).round(2).to_s + "-" +
39
+ (data[8].to_f * 100).round(2).to_s + "); "
40
+ end
41
+
42
+ # run params
43
+ log = []
44
+
45
+ log << { time: Time.now }
46
+ log << { viral_seq_version: ViralSeq::VERSION }
47
+ log << { tcs_version: ViralSeq::TCS_VERSION }
48
+ r_version = `R --version`.split("\n")[0]
49
+ log << { R_version: r_version}
50
+ sdrm_list = {}
51
+ sdrm_list[:nrti] = ViralSeq::DRMs.sdrm_json(:nrti)
52
+ sdrm_list[:nnrti] = ViralSeq::DRMs.sdrm_json(:nnrti)
53
+ sdrm_list[:hiv_pr] = ViralSeq::DRMs.sdrm_json(:hiv_pr)
54
+ sdrm_list[:hiv_in] = ViralSeq::DRMs.sdrm_json(:hiv_in)
55
+ log << { sdrm_list: sdrm_list }
56
+
57
+ # input dir
58
+ indir = ARGV[0]
59
+ libs = Dir[indir + "/*"]
60
+ log << { processed_libs: libs }
61
+
62
+ #output dir
63
+ outdir = indir + "_SDRM"
64
+ Dir.mkdir(outdir) unless File.directory?(outdir)
65
+
66
+ libs.each do |lib|
67
+
68
+ r_script = ViralSeq::R_SCRIPT.dup
69
+
70
+ next unless File.directory?(lib)
71
+
72
+ lib_name = File.basename(lib)
73
+ out_lib_dir = File.join(outdir, lib_name)
74
+ Dir.mkdir(out_lib_dir) unless File.directory?(out_lib_dir)
75
+
76
+ sub_seq_files = Dir[lib + "/*"]
77
+
78
+ seq_summary_file = File.join(out_lib_dir, (lib_name + "_summary.csv"))
79
+ seq_summary_out = File.open(seq_summary_file, "w")
80
+ seq_summary_out.puts 'Region,TCS,TCS with A3G/F hypermutation,TCS with stop codon,' +
81
+ 'TCS w/o hypermutation and stop codon,' +
82
+ 'Poisson cutoff for minority mutation (>=),Pi,Dist20'
83
+
84
+ point_mutation_file = File.join(out_lib_dir, (lib_name + "_substitution.csv"))
85
+ point_mutation_out = File.open(point_mutation_file, "w")
86
+ point_mutation_out.puts "region,TCS,AA position,wild type,mutation," +
87
+ "number,percentage,95% CI low, 95% CI high, notes"
88
+
89
+ linkage_file = File.join(out_lib_dir, (lib_name + "_linkage.csv"))
90
+ linkage_out = File.open(linkage_file, "w")
91
+ linkage_out.puts "region,TCS,mutation linkage,number," +
92
+ "percentage,95% CI low, 95% CI high, notes"
93
+
94
+ aa_report_file = File.join(out_lib_dir, (lib_name + "_aa.csv"))
95
+ aa_report_out = File.open(aa_report_file, "w")
96
+ aa_report_out.puts "region,ref.aa.positions,TCS.number," +
97
+ ViralSeq::AMINO_ACID_LIST.join(",")
98
+
99
+ summary_json_file = File.join(out_lib_dir, (lib_name + "_summary.json"))
100
+ summary_json_out = File.open(summary_json_file,"w")
101
+
102
+ filtered_seq_dir = File.join(out_lib_dir, (lib_name + "_filtered_seq"))
103
+ Dir.mkdir(filtered_seq_dir) unless File.directory?(filtered_seq_dir)
104
+
105
+ aln_seq_dir = File.join(out_lib_dir, (lib_name + "_aln_seq"))
106
+ Dir.mkdir(aln_seq_dir) unless File.directory?(aln_seq_dir)
107
+
108
+ point_mutation_list = []
109
+ linkage_list = []
110
+ aa_report_list = []
111
+ summary_hash = {}
112
+
113
+ sub_seq_files.each do |sub_seq|
114
+ seq_basename = File.basename(sub_seq)
115
+ seqs = ViralSeq::SeqHash.fa(sub_seq)
116
+ next if seqs.size < 3
117
+ if seq_basename =~ /V1V3/i
118
+ summary_hash[:V1V3] = "#{seqs.size.to_s},NA,NA,NA,NA"
119
+ FileUtils.cp(sub_seq, filtered_seq_dir)
120
+ elsif seq_basename =~ /PR/i
121
+ a3g_check = seqs.a3g
122
+ a3g_seqs = a3g_check[:a3g_seq]
123
+ a3g_filtered_seqs = a3g_check[:filtered_seq]
124
+ stop_codon_check = a3g_filtered_seqs.stop_codon
125
+ stop_codon_seqs = stop_codon_check[:with_stop_codon]
126
+ filtered_seqs = stop_codon_check[:without_stop_codon]
127
+ poisson_minority_cutoff = filtered_seqs.pm
128
+ summary_hash[:PR] = [
129
+ seqs.size.to_s,
130
+ a3g_seqs.size.to_s,
131
+ stop_codon_seqs.size.to_s,
132
+ filtered_seqs.size.to_s,
133
+ poisson_minority_cutoff.to_s
134
+ ].join(',')
135
+ next if filtered_seqs.size < 3
136
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
137
+
138
+ sdrm = filtered_seqs.sdrm_hiv_pr(poisson_minority_cutoff)
139
+ point_mutation_list += sdrm[0]
140
+ linkage_list += sdrm[1]
141
+ aa_report_list += sdrm[2]
142
+
143
+ elsif seq_basename =~/IN/i
144
+ a3g_check = seqs.a3g
145
+ a3g_seqs = a3g_check[:a3g_seq]
146
+ a3g_filtered_seqs = a3g_check[:filtered_seq]
147
+ stop_codon_check = a3g_filtered_seqs.stop_codon(2)
148
+ stop_codon_seqs = stop_codon_check[:with_stop_codon]
149
+ filtered_seqs = stop_codon_check[:without_stop_codon]
150
+ poisson_minority_cutoff = filtered_seqs.pm
151
+ summary_hash[:IN] = [
152
+ seqs.size.to_s,
153
+ a3g_seqs.size.to_s,
154
+ stop_codon_seqs.size.to_s,
155
+ filtered_seqs.size.to_s,
156
+ poisson_minority_cutoff.to_s
157
+ ].join(',')
158
+ next if filtered_seqs.size < 3
159
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
160
+
161
+ sdrm = filtered_seqs.sdrm_hiv_in(poisson_minority_cutoff)
162
+ point_mutation_list += sdrm[0]
163
+ linkage_list += sdrm[1]
164
+ aa_report_list += sdrm[2]
165
+
166
+ elsif seq_basename =~/RT/i
167
+ rt_seq1 = {}
168
+ rt_seq2 = {}
169
+ seqs.dna_hash.each do |k,v|
170
+ rt_seq1[k] = v[0,267]
171
+ rt_seq2[k] = v[267..-1]
172
+ end
173
+ rt1 = ViralSeq::SeqHash.new(rt_seq1)
174
+ rt2 = ViralSeq::SeqHash.new(rt_seq2)
175
+ rt1_a3g = rt1.a3g
176
+ rt2_a3g = rt2.a3g
177
+ hypermut_seq_rt1 = rt1_a3g[:a3g_seq]
178
+ hypermut_seq_rt2 = rt2_a3g[:a3g_seq]
179
+ rt1_stop_codon = rt1.stop_codon(1)[:with_stop_codon]
180
+ rt2_stop_codon = rt2.stop_codon(2)[:with_stop_codon]
181
+ hypermut_seq_keys = (hypermut_seq_rt1.dna_hash.keys | hypermut_seq_rt2.dna_hash.keys)
182
+ stop_codon_seq_keys = (rt1_stop_codon.dna_hash.keys | rt2_stop_codon.dna_hash.keys)
183
+ reject_keys = (hypermut_seq_keys | stop_codon_seq_keys)
184
+ filtered_seqs = ViralSeq::SeqHash.new(seqs.dna_hash.reject {|k,v| reject_keys.include?(k) })
185
+ poisson_minority_cutoff = filtered_seqs.pm
186
+ summary_hash[:RT] = [
187
+ seqs.size.to_s,
188
+ hypermut_seq_keys.size.to_s,
189
+ stop_codon_seq_keys.size.to_s,
190
+ filtered_seqs.size.to_s,
191
+ poisson_minority_cutoff.to_s
192
+ ].join(',')
193
+ next if filtered_seqs.size < 3
194
+ filtered_seqs.write_nt_fa(File.join(filtered_seq_dir,seq_basename))
195
+
196
+ sdrm = filtered_seqs.sdrm_hiv_rt(poisson_minority_cutoff)
197
+ point_mutation_list += sdrm[0]
198
+ linkage_list += sdrm[1]
199
+ aa_report_list += sdrm[2]
200
+ end
201
+ end
202
+
203
+ point_mutation_list.each do |record|
204
+ point_mutation_out.puts record.join(",")
205
+ end
206
+ linkage_list.each do |record|
207
+ linkage_out.puts record.join(",")
208
+ end
209
+ aa_report_list.each do |record|
210
+ aa_report_out.puts record.join(",")
211
+ end
212
+
213
+ filtered_seq_files = Dir[filtered_seq_dir + "/*"]
214
+
215
+ out_r_csv = File.join(out_lib_dir, (lib_name + "_pi.csv"))
216
+ out_r_pdf = File.join(out_lib_dir, (lib_name + "_pi.pdf"))
217
+
218
+ if filtered_seq_files.size > 0
219
+ filtered_seq_files.each do |seq_file|
220
+ filtered_sh = ViralSeq::SeqHash.fa(seq_file)
221
+ next if filtered_sh.size < 3
222
+ aligned_sh = filtered_sh.random_select(1000).align
223
+ aligned_sh.write_nt_fa(File.join(aln_seq_dir, File.basename(seq_file)))
224
+ end
225
+
226
+ r_script.gsub!(/PATH_TO_FASTA/,aln_seq_dir)
227
+ File.unlink(out_r_csv) if File.exist?(out_r_csv)
228
+ File.unlink(out_r_pdf) if File.exist?(out_r_pdf)
229
+ r_script.gsub!(/OUTPUT_CSV/,out_r_csv)
230
+ r_script.gsub!(/OUTPUT_PDF/,out_r_pdf)
231
+ r_script_file = File.join(out_lib_dir, "/pi.R")
232
+ File.open(r_script_file,"w") {|line| line.puts r_script}
233
+ print `Rscript #{r_script_file} 1> /dev/null 2> /dev/null`
234
+ if File.exist?(out_r_csv)
235
+ pi_csv = File.readlines(out_r_csv)
236
+ pi_csv.each do |line|
237
+ line.chomp!
238
+ data = line.split(",")
239
+ tag = data[0].split("_")[-1].gsub(/\W/,"").to_sym
240
+ summary_hash[tag] += "," + data[1].to_f.round(4).to_s + "," + data[2].to_f.round(4).to_s
241
+ end
242
+ [:PR, :RT, :IN, :V1V3].each do |regions|
243
+ next unless summary_hash[regions]
244
+ seq_summary_out.puts regions.to_s + "," + summary_hash[regions]
245
+ end
246
+ File.unlink(out_r_csv)
247
+ end
248
+ File.unlink(r_script_file)
249
+ end
250
+
251
+ seq_summary_out.close
252
+ point_mutation_out.close
253
+ linkage_out.close
254
+ aa_report_out.close
255
+
256
+ summary_lines = File.readlines(seq_summary_file)
257
+ summary_lines.shift
258
+
259
+ tcs_PR = 0
260
+ tcs_RT = 0
261
+ tcs_IN = 0
262
+ tcs_V1V3 = 0
263
+ pi_RT = 0.0
264
+ pi_V1V3 = 0.0
265
+ dist20_RT = 0.0
266
+ dist20_V1V3 = 0.0
267
+ summary_lines.each do |line|
268
+ data = line.chomp.split(",")
269
+ if data[0] == "PR"
270
+ tcs_PR = data[4].to_i
271
+ elsif data[0] == "RT"
272
+ tcs_RT = data[4].to_i
273
+ pi_RT = data[6].to_f
274
+ dist20_RT = data[7].to_f
275
+ elsif data[0] == "IN"
276
+ tcs_IN = data[4].to_i
277
+ elsif data[0] == "V1V3"
278
+ tcs_V1V3 = data[1].to_i
279
+ pi_V1V3 = data[6].to_f
280
+ dist20_V1V3 = data[7].to_f
281
+ end
282
+ end
283
+
284
+ recency = ViralSeq::Recency.define(
285
+ tcs_RT: tcs_RT,
286
+ tcs_V1V3: tcs_V1V3,
287
+ pi_RT: pi_RT,
288
+ dist20_RT: dist20_RT,
289
+ pi_V1V3: pi_V1V3,
290
+ dist20_V1V3: dist20_V1V3
291
+ )
292
+
293
+ sdrm_lines = File.readlines(point_mutation_file)
294
+ sdrm_lines.shift
295
+ sdrm_PR = ""
296
+ sdrm_RT = ""
297
+ sdrm_IN = ""
298
+ sdrm_lines.each do |line|
299
+ data = line.chomp.split(",")
300
+ next if data[-1] == "*"
301
+ if data[0] == "PR"
302
+ sdrm_PR += abstract_line(data)
303
+ elsif data[0] =~ /NRTI/
304
+ sdrm_RT += abstract_line(data)
305
+ elsif data[0] == "IN"
306
+ sdrm_IN += abstract_line(data)
307
+ end
308
+ end
309
+
310
+ summary_json = [
311
+ sample_id: lib_name,
312
+ tcs_PR: tcs_PR,
313
+ tcs_RT: tcs_RT,
314
+ tcs_IN: tcs_IN,
315
+ tcs_V1V3: tcs_V1V3,
316
+ pi_RT: pi_RT,
317
+ dist20_RT: dist20_RT,
318
+ dist20_V1V3: dist20_V1V3,
319
+ recency: recency,
320
+ sdrm_PR: sdrm_PR,
321
+ sdrm_RT: sdrm_RT,
322
+ sdrm_IN: sdrm_IN
323
+ ]
324
+
325
+ summary_json_out.puts JSON.pretty_generate(summary_json)
326
+ summary_json_out.close
327
+
328
+ csvs = [
329
+ {
330
+ name: "summary",
331
+ title: "Summary",
332
+ file: seq_summary_file,
333
+ newPDF: "",
334
+ table_width: [65,55,110,110,110,110,60,60],
335
+ extra_text: ""
336
+ },
337
+ {
338
+ name: "substitution",
339
+ title: "Surveillance Drug Resistance Mutations",
340
+ file: point_mutation_file,
341
+ newPDF: "",
342
+ table_width: [65,55,85,80,60,65,85,85,85,45],
343
+ extra_text: "* Mutation below Poisson cut-off for minority mutations"
344
+ },
345
+ {
346
+ name: "linkage",
347
+ title: "Mutation Linkage",
348
+ file: linkage_file,
349
+ newPDF: "",
350
+ table_width: [55,50,250,60,80,80,80,45],
351
+ extra_text: "* Mutation below Poisson cut-off for minority mutations"
352
+ }
353
+ ]
354
+
355
+ csvs.each do |csv|
356
+ file_name = File.join(out_lib_dir, (csv[:name] + ".pdf"))
357
+ next unless File.exist? csv[:file]
358
+ Prawn::Document.generate(file_name, :page_layout => :landscape) do |pdf|
359
+ pdf.text((File.basename(lib, ".*") + ': ' + csv[:title]),
360
+ :size => 20,
361
+ :align => :center,
362
+ :style => :bold)
363
+ pdf.move_down 20
364
+ table_data = CSV.open(csv[:file]).to_a
365
+ header = table_data.first
366
+ pdf.table(table_data,
367
+ :header => header,
368
+ :position => :center,
369
+ :column_widths => csv[:table_width],
370
+ :row_colors => ["B6B6B6", "FFFFFF"],
371
+ :cell_style => {:align => :center, :size => 10}) do |table|
372
+ table.row(0).style :font_style => :bold, :size => 12 #, :background_color => 'ff00ff'
373
+ end
374
+ pdf.move_down 5
375
+ pdf.text(csv[:extra_text], :size => 8, :align => :justify,)
376
+ end
377
+ csv[:newPDF] = file_name
378
+ end
379
+
380
+ pdf = CombinePDF.new
381
+ csvs.each do |csv|
382
+ pdf << CombinePDF.load(csv[:newPDF]) if File.exist?(csv[:newPDF])
383
+ end
384
+ pdf << CombinePDF.load(out_r_pdf) if File.exist?(out_r_pdf)
385
+
386
+ pdf.number_pages location: [:bottom_right],
387
+ number_format: "Swanstrom\'s lab HIV SDRM Pipeline, version #{$sdrm_version_number} by S.Z. and M.U.C. Page %s",
388
+ font_size: 6,
389
+ opacity: 0.5
390
+
391
+ pdf.save File.join(out_lib_dir, (lib_name + ".pdf"))
392
+
393
+ csvs.each do |csv|
394
+ File.unlink csv[:newPDF]
395
+ end
396
+ end
397
+
398
+ log_file = indir + "_sdrm_log.json"
399
+
400
+ File.open(log_file, 'w') { |f| f.puts JSON.pretty_generate(log) }
401
+
402
+ FileUtils.touch(File.join(outdir, ".done"))
data/lib/viral_seq.rb CHANGED
@@ -39,6 +39,7 @@ require_relative "viral_seq/tcs_core"
39
39
  require_relative "viral_seq/tcs_json"
40
40
  require_relative "viral_seq/tcs_dr"
41
41
  require_relative "viral_seq/sdrm"
42
+ require_relative "viral_seq/recency"
42
43
 
43
44
  require "muscle_bio"
44
45
  require "json"
@@ -0,0 +1,52 @@
1
+ module ViralSeq
2
+
3
+ # recency prediction function based on HIV MPID-NGS
4
+ # @see https://pubmed.ncbi.nlm.nih.gov/32663847 Ref: Zhou et al. J Infect Dis. 2021
5
+
6
+ module Recency
7
+
8
+ # @params tcs_RT [Integer] number of TCS at the RT region
9
+ # @params tcs_V1V3 [Integer] number of TCS at the V1V3 region
10
+ # @params pi_RT [Float] pairwise diversity at the RT region
11
+ # @params pi_V1V3 [Float] pairwise diversity at the V1V3 region
12
+ # @params dist20_RT [Float] dist20 at the RT region
13
+ # @params dist20_V1V3 [Float] dist20 at the V1V3 region
14
+ # @return [String] determination of the recency
15
+
16
+ def self.define(tcs_RT: nil,
17
+ tcs_V1V3: nil,
18
+ pi_RT: nil,
19
+ dist20_RT: nil,
20
+ pi_V1V3: nil,
21
+ dist20_V1V3: nil)
22
+ tcs_RT ||= 0
23
+ tcs_V1V3 ||= 0
24
+ if (tcs_RT >= 3 && pi_RT) and (tcs_V1V3 >= 3 && pi_V1V3)
25
+ if (pi_RT + pi_V1V3) < 0.0103
26
+ recency = "recent"
27
+ elsif (pi_RT + pi_V1V3) >= 0.0103 and (dist20_RT + dist20_V1V3) >= 0.006
28
+ recency = "chronic"
29
+ else
30
+ recency = "indeterminant"
31
+ end
32
+ elsif (tcs_RT >= 3 && pi_RT) and tcs_V1V3 < 3
33
+ if pi_RT < 0.0021
34
+ recency = "recent"
35
+ elsif pi_RT >= 0.0021 and dist20_RT >= 0.001
36
+ recency = "chronic"
37
+ else
38
+ recency = "indeterminant"
39
+ end
40
+ elsif (tcs_V1V3 >= 3 && pi_V1V3)
41
+ if pi_V1V3 >= 0.0103 and dist20_V1V3 >= 0.006
42
+ recency = "chronic"
43
+ else
44
+ recency = "insufficient data"
45
+ end
46
+ else
47
+ recency = "insufficient data"
48
+ end
49
+ return recency
50
+ end
51
+ end
52
+ end
@@ -90,7 +90,7 @@ module ViralSeq
90
90
 
91
91
  # function to export SDRM positions as json object
92
92
  # @param (see #sdrm_hash)
93
- # @return [String] json String of SDRM positions
93
+ # @return [Array] json Array of SDRM positions
94
94
 
95
95
  def sdrm_json(options)
96
96
  sdrm = ViralSeq::DRMs.sdrm_hash(options)
@@ -102,7 +102,7 @@ module ViralSeq
102
102
  mutation[:mutationCodons] = muts[1]
103
103
  json_array << mutation
104
104
  end
105
- JSON.pretty_generate(json_array)
105
+ return json_array
106
106
  end
107
107
  end
108
108
  end
@@ -11,7 +11,7 @@ module ViralSeq
11
11
  # # filter nt sequences with the reference coordinates
12
12
  # filtered_seqhash = aligned_pr_seqhash.stop_codon[:without_stop_codon]
13
13
  # # return a new ViralSeq::SeqHash object without stop codons
14
- # filtered_seqhash = filtered_seqhash.a3g[1]
14
+ # filtered_seqhash = filtered_seqhash.a3g[:filtered_seq]
15
15
  # # further filter out sequences with A3G hypermutations
16
16
  # filtered_seqhash.pi
17
17
  # # return pairwise diveristy π
@@ -187,6 +187,25 @@ module ViralSeq
187
187
  return new_seqhash
188
188
  end
189
189
 
190
+ # sample a certain number of sequences from a SeqHash object
191
+ # @param n [Integer] number of sequences to sample
192
+ # @return [ViralSeq::SeqHash] sampled SeqHash
193
+
194
+ def sample(n = 1)
195
+ keys = self.dna_hash.keys
196
+ sampled_keys = keys.sample(n)
197
+ sampled_nt = {}
198
+ sampled_aa = {}
199
+ sampled_qc = {}
200
+ sampled_title = self.title + "_sampled_" + n.to_s
201
+ sampled_keys.each do |k|
202
+ sampled_nt[k] = self.dna_hash[k]
203
+ sampled_aa[k] = self.aa_hash[k]
204
+ sampled_qc[k] = self.qc_hash[k]
205
+ end
206
+ return ViralSeq::SeqHash.new(sampled_nt, sampled_aa, sampled_qc, sampled_title, self.file)
207
+ end
208
+
190
209
  # write the nt sequences to a FASTA format file
191
210
  # @param file [String] path to the FASTA output file
192
211
  # @return [NilClass]
@@ -582,8 +601,8 @@ module ViralSeq
582
601
  temp_dir=File.dirname($0)
583
602
  end
584
603
 
585
- temp_file = temp_dir + "/_temp_muscle_in"
586
- temp_aln = temp_dir + "/_temp_muscle_aln"
604
+ temp_file = File.join(temp_dir, "_temp_muscle_in")
605
+ temp_aln = File.join(temp_dir, "_temp_muscle_aln")
587
606
  File.open(temp_file, 'w'){|f| seq_hash.each {|k,v| f.puts k; f.puts v}}
588
607
  if path_to_muscle
589
608
  unless ViralSeq.check_muscle?(path_to_muscle)
@@ -808,7 +827,7 @@ module ViralSeq
808
827
  end # end of locator
809
828
  alias_method :loc, :sequence_locator
810
829
 
811
- # Remove squences with residual offspring Primer IDs.
830
+ # Remove sequences with residual offspring Primer IDs.
812
831
  # Compare PID with sequences which have identical sequences.
813
832
  # PIDs differ by 1 base will be recognized. If PID1 is x time (cutoff) greater than PID2, PID2 will be disgarded.
814
833
  # each sequence tag starting with ">" and the Primer ID sequence
@@ -1155,6 +1174,7 @@ module ViralSeq
1155
1174
  new_sh.aa_hash[k] = aa_hash[k]
1156
1175
  new_sh.qc_hash[k] = qc_hash[k]
1157
1176
  end
1177
+ new_sh.file = self.file
1158
1178
  new_sh.title = self.title + "_" + n.to_s
1159
1179
  return new_sh
1160
1180
  end
@@ -2,6 +2,6 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.1.2"
5
+ VERSION = "1.2.0"
6
6
  TCS_VERSION = "2.3.1"
7
7
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ version: 1.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2021-04-27 00:00:00.000000000 Z
12
+ date: 2021-05-11 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -151,6 +151,7 @@ executables:
151
151
  - locator
152
152
  - tcs
153
153
  - tcs_log
154
+ - tcs_sdrm
154
155
  extensions: []
155
156
  extra_rdoc_files: []
156
157
  files:
@@ -166,6 +167,7 @@ files:
166
167
  - bin/locator
167
168
  - bin/tcs
168
169
  - bin/tcs_log
170
+ - bin/tcs_sdrm
169
171
  - docs/assets/img/cover.jpg
170
172
  - docs/dr.json
171
173
  - docs/sample_miseq_data/hivdr_control/r1.fastq.gz
@@ -178,6 +180,7 @@ files:
178
180
  - lib/viral_seq/math.rb
179
181
  - lib/viral_seq/muscle.rb
180
182
  - lib/viral_seq/pid.rb
183
+ - lib/viral_seq/recency.rb
181
184
  - lib/viral_seq/ref_seq.rb
182
185
  - lib/viral_seq/rubystats.rb
183
186
  - lib/viral_seq/sdrm.rb