viral_seq 1.8.1.1 → 1.9.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/README.md +6 -0
- data/bin/tcs +7 -1
- data/bin/tcs_sdrm +87 -131
- data/lib/viral_seq/drm_region_config.rb +83 -0
- data/lib/viral_seq/drm_version.rb +120 -0
- data/lib/viral_seq/hivdr.rb +206 -2
- data/lib/viral_seq/sdrm.rb +34 -99
- data/lib/viral_seq/sequence.rb +26 -1
- data/lib/viral_seq/tcs_core.rb +1 -1
- data/lib/viral_seq/tcs_dr.rb +87 -5
- data/lib/viral_seq/util/drm_list.json +684 -0
- data/lib/viral_seq/util/drm_versions_config.json +114 -0
- data/lib/viral_seq/version.rb +2 -2
- data/lib/viral_seq.rb +3 -0
- metadata +7 -3
data/lib/viral_seq/hivdr.rb
CHANGED
@@ -154,7 +154,7 @@ module ViralSeq
|
|
154
154
|
s = ViralSeq::Sequence.new(name,seq)
|
155
155
|
s.translate(rf_label)
|
156
156
|
aa[name] = s.aa_string
|
157
|
-
record = s.sdrm(:
|
157
|
+
record = s.sdrm(:PI)
|
158
158
|
mut_com << record
|
159
159
|
record.each do |position,mutation|
|
160
160
|
if mut[position]
|
@@ -386,7 +386,7 @@ module ViralSeq
|
|
386
386
|
s = ViralSeq::Sequence.new(name,seq)
|
387
387
|
s.translate(rf_label)
|
388
388
|
aa[name] = s.aa_string
|
389
|
-
record = s.sdrm(:
|
389
|
+
record = s.sdrm(:INSTI, start_codon_number)
|
390
390
|
mut_com << record
|
391
391
|
record.each do |position,mutation|
|
392
392
|
if mut[position]
|
@@ -460,5 +460,209 @@ module ViralSeq
|
|
460
460
|
return [point_mutation_list, linkage_list, report_list]
|
461
461
|
end
|
462
462
|
|
463
|
+
|
464
|
+
# wrapper function for #a3g_hypermut and #stop_codon with ViralSeq::DrmRegionConfig as a param.
|
465
|
+
|
466
|
+
def filter_for_drm(region_config)
|
467
|
+
seq_coord = region_config.seq_coord
|
468
|
+
reading_frame_number = region_config.get_reading_frame_number
|
469
|
+
|
470
|
+
if !seq_coord["gap"]
|
471
|
+
|
472
|
+
a3g_check = self.a3g
|
473
|
+
a3g_seqs = a3g_check[:a3g_seq]
|
474
|
+
a3g_filtered_seqs = a3g_check[:filtered_seq]
|
475
|
+
|
476
|
+
stop_codon_check = a3g_filtered_seqs.stop_codon(reading_frame_number[0])
|
477
|
+
stop_codon_seqs = stop_codon_check[:with_stop_codon]
|
478
|
+
filtered_seqs = stop_codon_check[:without_stop_codon]
|
479
|
+
|
480
|
+
return {
|
481
|
+
filtered_seq: filtered_seqs,
|
482
|
+
a3g_seq: a3g_seqs,
|
483
|
+
stop_codon_seq: stop_codon_seqs
|
484
|
+
}
|
485
|
+
|
486
|
+
else
|
487
|
+
|
488
|
+
r1_length, r2_length = region_config.r1_r2_length.values
|
489
|
+
|
490
|
+
r1_seqs = {}
|
491
|
+
r2_seqs = {}
|
492
|
+
|
493
|
+
self.dna_hash.each do |k,v|
|
494
|
+
r1_seqs[k] = v[0,r1_length]
|
495
|
+
r2_seqs[k] = v[r1_length, r2_length]
|
496
|
+
end
|
497
|
+
|
498
|
+
r1_sh = ViralSeq::SeqHash.new(r1_seqs)
|
499
|
+
r2_sh = ViralSeq::SeqHash.new(r2_seqs)
|
500
|
+
|
501
|
+
a3g_seqs_r1 = r1_sh.a3g[:a3g_seq]
|
502
|
+
a3g_seqs_r2 = r2_sh.a3g[:a3g_seq]
|
503
|
+
|
504
|
+
stop_codon_r1 = r1_sh.stop_codon(reading_frame_number[0])[:with_stop_codon]
|
505
|
+
stop_codon_r2 = r2_sh.stop_codon(reading_frame_number[1])[:with_stop_codon]
|
506
|
+
|
507
|
+
a3g_seq_keys = (a3g_seqs_r1.dna_hash.keys | a3g_seqs_r2.dna_hash.keys)
|
508
|
+
a3g_seqs = ViralSeq::SeqHash.new(self.dna_hash.select {|k, _v| a3g_seq_keys.include? k})
|
509
|
+
|
510
|
+
stop_codon_keys = (stop_codon_r1.dna_hash.keys | stop_codon_r2.dna_hash.keys)
|
511
|
+
stop_codon_seqs = ViralSeq::SeqHash.new(self.dna_hash.select {|k, _v| stop_codon_keys.include? k})
|
512
|
+
|
513
|
+
reject_keys = (a3g_seq_keys | stop_codon_keys)
|
514
|
+
|
515
|
+
filtered_seqs = ViralSeq::SeqHash.new(self.dna_hash.reject { |k, _v| reject_keys.include? k })
|
516
|
+
|
517
|
+
return {
|
518
|
+
filtered_seq: filtered_seqs,
|
519
|
+
a3g_seq: a3g_seqs,
|
520
|
+
stop_codon_seq: stop_codon_seqs
|
521
|
+
}
|
522
|
+
|
523
|
+
end
|
524
|
+
|
525
|
+
end # end of #filter_for_drm
|
526
|
+
|
527
|
+
|
528
|
+
# insert the partial genome into the whole gene for HIV resistance analysis
|
529
|
+
|
530
|
+
|
531
|
+
def complete_with_ref(region_config)
|
532
|
+
complete_seqs = {}
|
533
|
+
seq_coord = region_config.seq_coord
|
534
|
+
|
535
|
+
ref = ViralSeq::RefSeq.get(region_config.ref_info["ref_type"].to_sym)
|
536
|
+
a = region_config.ref_info["ref_coord"][0]
|
537
|
+
b = region_config.ref_info["ref_coord"][1]
|
538
|
+
c = seq_coord["minimum"]
|
539
|
+
d = seq_coord["maximum"]
|
540
|
+
|
541
|
+
if seq_coord["gap"]
|
542
|
+
e = seq_coord["gap"]["minimum"]
|
543
|
+
f = seq_coord["gap"]["maximum"]
|
544
|
+
|
545
|
+
self.dna_hash.each do |k,v|
|
546
|
+
complete_seqs[k] = ref[(a-1)..(c-2)] + v[0,(e-c)] + ref[(e-1)..(f-1)] + v[(e-c)..-1] + ref[d..(b-1)]
|
547
|
+
end
|
548
|
+
else
|
549
|
+
self.dna_hash.each do |k,v|
|
550
|
+
complete_seqs[k] = ref[(a-1)..(c-2)] + v + ref[d..(b-1)]
|
551
|
+
end
|
552
|
+
end
|
553
|
+
|
554
|
+
return ViralSeq::SeqHash.new(complete_seqs)
|
555
|
+
end #end of #complete_with_ref
|
556
|
+
|
557
|
+
|
558
|
+
# function to interpret HIV drms with ViralSeq::DrmRegionConfig as a param.
|
559
|
+
|
560
|
+
def drm(region_config)
|
561
|
+
region = region_config.region
|
562
|
+
fdr_hash = self.fdr # must run fdr before the completion of the sequences
|
563
|
+
|
564
|
+
complete_gene = self.complete_with_ref(region_config)
|
565
|
+
sequences = complete_gene.dna_hash
|
566
|
+
|
567
|
+
n_seq = sequences.size
|
568
|
+
aa = {}
|
569
|
+
mut = {}
|
570
|
+
mut_com = []
|
571
|
+
point_mutation_list = []
|
572
|
+
|
573
|
+
drm_list = region_config.drm_list
|
574
|
+
|
575
|
+
sequences.each do |name, seq|
|
576
|
+
s = ViralSeq::Sequence.new(name, seq)
|
577
|
+
s.translate
|
578
|
+
aa[name] = s.aa_string
|
579
|
+
|
580
|
+
records_per_seq = {}
|
581
|
+
|
582
|
+
drm_list.each do |drm_class, list|
|
583
|
+
|
584
|
+
mut[drm_class] = {} if !mut[drm_class]
|
585
|
+
|
586
|
+
record = s.check_drm(list)
|
587
|
+
records_per_seq = records_per_seq.merge(record)
|
588
|
+
|
589
|
+
record.each do |position, mutation|
|
590
|
+
if !mut[drm_class][position]
|
591
|
+
mut[drm_class][position] = [mutation[0],[]]
|
592
|
+
end
|
593
|
+
mut[drm_class][position][1] << mutation[1]
|
594
|
+
end
|
595
|
+
end
|
596
|
+
|
597
|
+
mut_com << records_per_seq.sort.to_h
|
598
|
+
end
|
599
|
+
|
600
|
+
mut.each do |drm_class, mutations|
|
601
|
+
mutations.each do |position, mutation|
|
602
|
+
wt = mutation[0]
|
603
|
+
mut_list = mutation[1]
|
604
|
+
count_mut_list = mut_list.count_freq
|
605
|
+
count_mut_list.each do |m,number|
|
606
|
+
ci = ViralSeq::Math::BinomCI.new(number, n_seq)
|
607
|
+
fdr = fdr_hash[number].round(5)
|
608
|
+
label = fdr >= 0.05 ? "*" : ""
|
609
|
+
point_mutation_list << [drm_class, n_seq, position, wt, m, number, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), fdr, label]
|
610
|
+
end
|
611
|
+
end
|
612
|
+
end
|
613
|
+
|
614
|
+
point_mutation_list.sort_by! {|record| record[2]}
|
615
|
+
|
616
|
+
link = mut_com.count_freq
|
617
|
+
link2 = {}
|
618
|
+
link.each do |k,v|
|
619
|
+
pattern = []
|
620
|
+
if k.size == 0
|
621
|
+
pattern = ['WT']
|
622
|
+
else
|
623
|
+
k.each do |p,m|
|
624
|
+
pattern << (m[0] + p.to_s + m[1])
|
625
|
+
end
|
626
|
+
end
|
627
|
+
link2[pattern.join("+")] = v
|
628
|
+
end
|
629
|
+
linkage_list = []
|
630
|
+
link2.sort_by{|_key,value|value}.reverse.to_h.each do |k,v|
|
631
|
+
ci = ViralSeq::Math::BinomCI.new(v, n_seq)
|
632
|
+
label = ""
|
633
|
+
linkage_list << [region, n_seq, k, v, ci.mean.round(5), ci.lower.round(5), ci.upper.round(5), label]
|
634
|
+
end
|
635
|
+
|
636
|
+
report_list = []
|
637
|
+
|
638
|
+
div_aa = {}
|
639
|
+
aa_start = 1
|
640
|
+
|
641
|
+
aa_size = aa.values[0].size - 1
|
642
|
+
|
643
|
+
(0..aa_size).to_a.each do |p|
|
644
|
+
aas = []
|
645
|
+
aa.values.each do |r1|
|
646
|
+
aas << r1[p]
|
647
|
+
end
|
648
|
+
count_aas = aas.count_freq
|
649
|
+
div_aa[aa_start] = count_aas.sort_by{|_k,v|v}.reverse.to_h
|
650
|
+
aa_start += 1
|
651
|
+
end
|
652
|
+
|
653
|
+
div_aa.each do |k,v|
|
654
|
+
record = [region, k, n_seq]
|
655
|
+
ViralSeq::AMINO_ACID_LIST.each do |amino_acid|
|
656
|
+
aa_count = v[amino_acid]
|
657
|
+
record << (aa_count.to_f/n_seq*100).round(4)
|
658
|
+
end
|
659
|
+
report_list << record
|
660
|
+
end
|
661
|
+
|
662
|
+
return [point_mutation_list, linkage_list, report_list]
|
663
|
+
|
664
|
+
end
|
665
|
+
|
463
666
|
end # end of ViralSeq::SeqHash
|
667
|
+
|
464
668
|
end # end of ViralSeq
|
data/lib/viral_seq/sdrm.rb
CHANGED
@@ -1,109 +1,44 @@
|
|
1
1
|
module ViralSeq
|
2
2
|
class DRMs
|
3
|
-
class << self
|
4
3
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
4
|
+
# function to retrieve sdrm positions as a hash, DRM list are stored at `lib/viral_seq/util/drm_list.json`
|
5
|
+
# @param ref_option [Symbol], name of reference genomes, options are `:hiv_pr`, `:hiv_rt`, `:hiv_in`, `hcv_ns5a`
|
6
|
+
# @return [Hash] Hash of :position_number => [ 'wildtype_codon', ['mutation_codons']]
|
7
|
+
|
8
|
+
def self.sdrm_hash(options)
|
9
|
+
options = options.to_s.upcase
|
10
|
+
drm_data = JSON.parse(
|
11
|
+
File.read(
|
12
|
+
File.join(ViralSeq.root, 'viral_seq', 'util', 'drm_list.json')
|
13
|
+
)
|
14
|
+
)
|
15
|
+
if drm_data[options]
|
9
16
|
sdrm = {}
|
10
|
-
|
11
|
-
|
12
|
-
sdrm[28] = ['M',['T']]
|
13
|
-
sdrm[30] = ['L',['H','K','R','Q','A','S','D']]
|
14
|
-
sdrm[31] = ['L',['M','V','F']]
|
15
|
-
sdrm[32] = ['P',['L']]
|
16
|
-
sdrm[44] = ['K',['R']]
|
17
|
-
sdrm[58] = ['H',['D','P','S']]
|
18
|
-
sdrm[64] = ['T',['A','S']]
|
19
|
-
sdrm[77] = ['P',['A','S']]
|
20
|
-
sdrm[78] = ['R',['K']]
|
21
|
-
sdrm[79] = ['T',['A']]
|
22
|
-
sdrm[83] = ['T',['M']]
|
23
|
-
sdrm[85] = ['S',['N','H','Y']]
|
24
|
-
sdrm[92] = ['A',['P','T','K','E']]
|
25
|
-
sdrm[93] = ['Y',['C','F','H','N']]
|
26
|
-
sdrm[107] = ['K',['T','S']]
|
27
|
-
sdrm[121] = ['I',['V']]
|
28
|
-
sdrm[135] = ['T',['A']]
|
29
|
-
when :nrti
|
30
|
-
sdrm[41] = ['M',['L']]
|
31
|
-
sdrm[65] = ['K',['R']]
|
32
|
-
sdrm[67] = ['D',['N','G','E']]
|
33
|
-
sdrm[69] = ['T',['D']]
|
34
|
-
sdrm[70] = ['K',['R','E']]
|
35
|
-
sdrm[74] = ['L',['V','I']]
|
36
|
-
sdrm[75] = ['V',['M','T','A','S']]
|
37
|
-
sdrm[77] = ['F',['L']]
|
38
|
-
sdrm[115] = ['Y',['F']]
|
39
|
-
sdrm[116] = ['F',['Y']]
|
40
|
-
sdrm[151] = ['Q',['M']]
|
41
|
-
sdrm[184] = ['M',['V','I']]
|
42
|
-
sdrm[210] = ['L',['W']]
|
43
|
-
sdrm[215] = ["T",["Y","F","I","C","D","V","E"]]
|
44
|
-
sdrm[219] = ["K",["Q","E","N","R"]]
|
45
|
-
when :nnrti
|
46
|
-
sdrm[100] = ['L',['I']]
|
47
|
-
sdrm[101] = ['K',['E','P']]
|
48
|
-
sdrm[103] = ['K',['N','S']]
|
49
|
-
sdrm[106] = ['V',['M','A']]
|
50
|
-
sdrm[179] = ['V',['F','D']]
|
51
|
-
sdrm[181] = ['Y',['C','I','V']]
|
52
|
-
sdrm[188] = ['Y',['L','H','C']]
|
53
|
-
sdrm[190] = ['G',['A','S','E']]
|
54
|
-
sdrm[225] = ['P',['H']]
|
55
|
-
sdrm[230] = ['M',['L']]
|
56
|
-
when :hiv_pr
|
57
|
-
sdrm[23] = ['L',['I']]
|
58
|
-
sdrm[24] = ['L',['I']]
|
59
|
-
sdrm[30] = ['D',['N']]
|
60
|
-
sdrm[32] = ['V',['I']]
|
61
|
-
sdrm[46] = ['M',['I','L']]
|
62
|
-
sdrm[47] = ['I',['V','A']]
|
63
|
-
sdrm[48] = ['G',['V','M']]
|
64
|
-
sdrm[50] = ['I',['V','L']]
|
65
|
-
sdrm[53] = ['F',['L']]
|
66
|
-
sdrm[54] = ['I',['V','L','M','T','A','S']]
|
67
|
-
sdrm[73] = ['G',['S','T','C','A']]
|
68
|
-
sdrm[76] = ['L',['V']]
|
69
|
-
sdrm[82] = ['V',['A','T','S','F','L','C','M']]
|
70
|
-
sdrm[83] = ['N',['D']]
|
71
|
-
sdrm[84] = ['I',['V','A','C']]
|
72
|
-
sdrm[88] = ['N',['D','S']]
|
73
|
-
sdrm[90] = ['L',['M']]
|
74
|
-
when :hiv_in
|
75
|
-
sdrm[66] = ['T',['A','I','K']]
|
76
|
-
sdrm[74] = ['L',['M']]
|
77
|
-
sdrm[92] = ['E',['Q']]
|
78
|
-
sdrm[95] = ['Q',['K']]
|
79
|
-
sdrm[97] = ['T',['A']]
|
80
|
-
sdrm[121] = ['F',['Y']]
|
81
|
-
sdrm[140] = ['G',['A','S','C']]
|
82
|
-
sdrm[143] = ["Y",["C","H","R"]]
|
83
|
-
sdrm[147] = ['S',['G']]
|
84
|
-
sdrm[148] = ['Q',['H','K','R']]
|
85
|
-
sdrm[155] = ['N',['S','H']]
|
86
|
-
else raise "Input option `#{options}` for ViralSeq::Sequence.sdrm not supported"
|
17
|
+
drm_data[options].each do |record|
|
18
|
+
sdrm[record["position"]] = [record["wild-type"], record["mutations"]]
|
87
19
|
end
|
88
|
-
return sdrm
|
89
|
-
end # end of #sdrm_hash
|
90
20
|
|
91
|
-
|
92
|
-
|
93
|
-
|
21
|
+
else
|
22
|
+
abort "Input option `#{options}` for ViralSeq::DRMs.sdrm_hash not supported. Program aborted.\nSupported type of mutations for '#{drm_data.keys.join(", ")}' only."
|
23
|
+
end
|
24
|
+
return sdrm
|
25
|
+
end # end of #sdrm_hash
|
26
|
+
|
27
|
+
# function to export SDRM positions as json object
|
28
|
+
# @param (see #sdrm_hash)
|
29
|
+
# @return [Array] json Array of SDRM positions
|
94
30
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
end
|
105
|
-
return json_array
|
31
|
+
def self.sdrm_json(options)
|
32
|
+
sdrm = ViralSeq::DRMs.sdrm_hash(options)
|
33
|
+
json_array = []
|
34
|
+
sdrm.each do |pos, muts|
|
35
|
+
mutation = {}
|
36
|
+
mutation[:position] = pos
|
37
|
+
mutation[:wildtypeCodon] = muts[0]
|
38
|
+
mutation[:mutationCodons] = muts[1]
|
39
|
+
json_array << mutation
|
106
40
|
end
|
107
|
-
|
41
|
+
return json_array
|
42
|
+
end #end of #sdrm_json
|
108
43
|
end
|
109
44
|
end
|
data/lib/viral_seq/sequence.rb
CHANGED
@@ -136,7 +136,32 @@ module ViralSeq
|
|
136
136
|
end
|
137
137
|
end
|
138
138
|
return out_hash
|
139
|
-
end # end of #
|
139
|
+
end # end of #sdrm
|
140
|
+
|
141
|
+
# Similar to #sdrm but use a DRM list as a param
|
142
|
+
|
143
|
+
def check_drm(drm_list_single_type)
|
144
|
+
aa_array = self.aa_array
|
145
|
+
out_hash = {}
|
146
|
+
|
147
|
+
drm_list_single_type.each do |position, mut|
|
148
|
+
wt_aa = mut[0]
|
149
|
+
mut_aas = mut[1]
|
150
|
+
test_aa = aa_array[position - 1]
|
151
|
+
if test_aa.size == 1 and mut_aas.include?(test_aa)
|
152
|
+
out_hash[position] = [wt_aa, test_aa]
|
153
|
+
elsif test_aa.size > 1
|
154
|
+
test_aa_array = test_aa.split("")
|
155
|
+
mut_detected = test_aa_array & mut_aas
|
156
|
+
|
157
|
+
if !mut_detected.empty?
|
158
|
+
out_hash[position] = [wt_aa, mut_detected.join]
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
end
|
163
|
+
return out_hash
|
164
|
+
end
|
140
165
|
|
141
166
|
# HIV sequence locator function, resembling HIV Sequence Locator from LANL
|
142
167
|
# # current version only supports nucleotide sequence, not for amino acid sequence.
|
data/lib/viral_seq/tcs_core.rb
CHANGED
@@ -126,7 +126,7 @@ module ViralSeq
|
|
126
126
|
name_array.each do |name|
|
127
127
|
tag = parser_file_name(name)[:tag]
|
128
128
|
if name !~ /\.fastq\Z|\.fastq\.gz\Z/
|
129
|
-
|
129
|
+
name_array.delete(name)
|
130
130
|
elsif tag.count("R1") == 0 and tag.count("R2") == 0
|
131
131
|
errors[:no_region_tag] << name
|
132
132
|
elsif tag.count("R1") > 0 and tag.count("R2") > 0
|
data/lib/viral_seq/tcs_dr.rb
CHANGED
@@ -5,7 +5,7 @@ module ViralSeq
|
|
5
5
|
# run `tcs --dr_params [VERSION]` to pull the params json string for each version of DR.
|
6
6
|
module TcsDr
|
7
7
|
PARAMS = {
|
8
|
-
"v1" => {:platform_error_rate=>0.
|
8
|
+
"v1" => {:platform_error_rate=>0.01,
|
9
9
|
:primer_pairs=>
|
10
10
|
[{:region=>"RT",
|
11
11
|
:cdna=>
|
@@ -68,7 +68,7 @@ module ViralSeq
|
|
68
68
|
:ref_end=>7205..7210,
|
69
69
|
:indel=>true,
|
70
70
|
:trim=>false},
|
71
|
-
{:region=>"
|
71
|
+
{:region=>"CA",
|
72
72
|
:cdna=>
|
73
73
|
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
|
74
74
|
:forward=>
|
@@ -86,7 +86,7 @@ module ViralSeq
|
|
86
86
|
]
|
87
87
|
},
|
88
88
|
|
89
|
-
"v2" => {:platform_error_rate=>0.
|
89
|
+
"v2" => {:platform_error_rate=>0.01,
|
90
90
|
:primer_pairs=>
|
91
91
|
[{:region=>"RT",
|
92
92
|
:cdna=>
|
@@ -149,7 +149,7 @@ module ViralSeq
|
|
149
149
|
:ref_end=>7205..7210,
|
150
150
|
:indel=>true,
|
151
151
|
:trim=>false},
|
152
|
-
{:region=>"
|
152
|
+
{:region=>"CA",
|
153
153
|
:cdna=>
|
154
154
|
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
|
155
155
|
:forward=>
|
@@ -165,7 +165,89 @@ module ViralSeq
|
|
165
165
|
:indel=>true,
|
166
166
|
:trim=>false}
|
167
167
|
]
|
168
|
-
}
|
168
|
+
},
|
169
|
+
|
170
|
+
"v3" => {:platform_error_rate=>0.01,
|
171
|
+
:primer_pairs=>
|
172
|
+
[{:region=>"RT",
|
173
|
+
:cdna=>
|
174
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTAAGGAATGGAGGTTCTTTCTGATG",
|
175
|
+
:forward=>
|
176
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGGCCATTGACAGAAGAAAAAATAAAAGC",
|
177
|
+
:majority=>0,
|
178
|
+
:end_join=>true,
|
179
|
+
:end_join_option=>1,
|
180
|
+
:overlap=>0,
|
181
|
+
:TCS_QC=>true,
|
182
|
+
:ref_genome=>"HXB2",
|
183
|
+
:ref_start=>2648,
|
184
|
+
:ref_end=>3209,
|
185
|
+
:indel=>true,
|
186
|
+
:trim=>false},
|
187
|
+
{:region=>"PR",
|
188
|
+
:cdna=>
|
189
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNTTAACCTTTGGGCCATCCATTCC",
|
190
|
+
:forward=>
|
191
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTCAGAGCAGACCAGAGCCAACAGCCCCA",
|
192
|
+
:majority=>0,
|
193
|
+
:end_join=>true,
|
194
|
+
:end_join_option=>3,
|
195
|
+
:TCS_QC=>true,
|
196
|
+
:ref_genome=>"HXB2",
|
197
|
+
:ref_start=>0,
|
198
|
+
:ref_end=>2591,
|
199
|
+
:indel=>true,
|
200
|
+
:trim=>true,
|
201
|
+
:trim_ref=>"HXB2",
|
202
|
+
:trim_ref_start=>2253,
|
203
|
+
:trim_ref_end=>2549},
|
204
|
+
{:region=>"IN",
|
205
|
+
:cdna=>
|
206
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCACAATCAKCACCTGCCATCTG",
|
207
|
+
:forward=>"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGCAGAAGTTATYCCAGCAGAAACA",
|
208
|
+
:majority=>0,
|
209
|
+
:end_join=>true,
|
210
|
+
:end_join_option=>3,
|
211
|
+
:TCS_QC=>true,
|
212
|
+
:ref_genome=>"HXB2",
|
213
|
+
:ref_start=>4509,
|
214
|
+
:ref_end=>5048,
|
215
|
+
:indel=>true,
|
216
|
+
:trim=>false},
|
217
|
+
{:region=>"V1V3",
|
218
|
+
:cdna=>
|
219
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCCATTTTGCTYTAYTRABVTTACAATRTGC",
|
220
|
+
:forward=>
|
221
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNTTATGGGATCAAAGCCTAAAGCCATGTGTA",
|
222
|
+
:majority=>0,
|
223
|
+
:end_join=>true,
|
224
|
+
:end_join_option=>1,
|
225
|
+
:overlap=>0,
|
226
|
+
:TCS_QC=>true,
|
227
|
+
:ref_genome=>"HXB2",
|
228
|
+
:ref_start=>6585,
|
229
|
+
:ref_end=>7205..7210,
|
230
|
+
:indel=>true,
|
231
|
+
:trim=>false},
|
232
|
+
{:region=>"CA",
|
233
|
+
:cdna=>
|
234
|
+
"GTGACTGGAGTTCAGACGTGTGCTCTTCCGATCTNNNNNNNNNNNCAGTCAACAAGGTTTCTGTCATCCAATTTTTTAC",
|
235
|
+
:forward=>
|
236
|
+
"GCCTCCCTCGCGCCATCAGAGATGTGTATAAGAGACAGNNNNGTCAGCCAAAATTACCCTATAGTGC",
|
237
|
+
:majority=>0,
|
238
|
+
:end_join=>true,
|
239
|
+
:end_join_option=>1,
|
240
|
+
:overlap=>0,
|
241
|
+
:TCS_QC=>true,
|
242
|
+
:ref_genome=>"HXB2",
|
243
|
+
:ref_start=>1196,
|
244
|
+
:ref_end=>1725,
|
245
|
+
:indel=>true,
|
246
|
+
:trim=>false}
|
247
|
+
]
|
248
|
+
},
|
249
|
+
|
250
|
+
|
169
251
|
}
|
170
252
|
|
171
253
|
end
|