viral_seq 1.0.5 → 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a13ba7912ee87511c2ecf19f07256d3a129661c6d7e180d57ecd1e34978386e6
4
- data.tar.gz: 61e5ed6b423f0b64c53a6bb8e8ec3801bf7e093e4d0741bd71bf9fbfa24f1b55
3
+ metadata.gz: df8d50f2dfbf0f2e7e1efcf66c8a91c286c7b5029533b725a4a33219882748bb
4
+ data.tar.gz: 4061c3875d4629025d1ccc216a54fdb7a011d397408a3ecb15125475e9f262e9
5
5
  SHA512:
6
- metadata.gz: f18d03220190bf1479ed29bd4d4b83777ffe5216951d38a91dd2afdc6c07b516883a8694291106b6fee2693a246b8a3c6a824786527cd03730f28f6777fa3231
7
- data.tar.gz: 7fe146b081a7b633de963ed632bdcb548c71d1f401e227109d8745d23ad770d2099a2aa50bc4553a9450b260b7206892ed2a898d9154764aebe4094f38faeb44
6
+ metadata.gz: a52087ced9fe258ef5bab4449b90e964ff9a557292dc1ce679aae03a56bd2570fdf1221e7026fec2b1ccb49ad2a9ff076338a397982e47c46877e2cdfb4e6d2e
7
+ data.tar.gz: 792cb9424fd46d536d0b95cfc90914a8548ee5ea6d1c3efe45cccd1d01c6dbd6b7a7ee0ba1be010bd6cf7a3ea201f4850803c28f16889b5286e1e458a774c8f1
data/README.md CHANGED
@@ -10,46 +10,55 @@ Specifically for Primer-ID sequencing and HIV drug resistance analysis.
10
10
 
11
11
  ## Usage
12
12
 
13
- Load all ViralSeq classes by requiring 'viral_seq.rb'
13
+ #### Load all ViralSeq classes by requiring 'viral_seq.rb'
14
14
 
15
15
  #!/usr/bin/env ruby
16
16
  require 'viral_seq'
17
+
18
+ #### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
19
+
20
+ $ locator -i sequence.fasta -o sequence.fasta.csv
17
21
 
18
22
  ## Some Examples
19
23
 
20
- ### Load nucleotide sequences from a FASTA format sequence file
24
+ #### Load nucleotide sequences from a FASTA format sequence file
21
25
 
22
26
  my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
23
27
 
24
- ### Make an alignment (using MUSCLE)
28
+ #### Make an alignment (using MUSCLE)
25
29
 
26
30
  aligned_seqhash = my_seqhash.align
27
31
 
28
- ### Filter nucleotide sequences with the reference coordinates (HIV Protease)
32
+ #### Filter nucleotide sequences with the reference coordinates (HIV Protease)
29
33
 
30
34
  qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
31
35
 
32
- ### Further filter out sequences with Apobec3g/f hypermutations
36
+ #### Further filter out sequences with Apobec3g/f hypermutations
33
37
 
34
38
  qc_seqhash = qc_seqhash.a3g
35
39
 
36
- ### Calculate nucleotide diveristy π
40
+ #### Calculate nucleotide diveristy π
37
41
 
38
42
  qc_seqhash.pi
39
43
 
40
- ### Calculate cut-off for minority variants based on Poisson model
44
+ #### Calculate cut-off for minority variants based on Poisson model
41
45
 
42
46
  cut_off = qc_seqhash.pm
43
47
 
44
- ### Examine for drug resistance mutations for HIV PR region
48
+ #### Examine for drug resistance mutations for HIV PR region
45
49
 
46
50
  qc_seqhash.sdrm_hiv_pr(cut_off)
47
51
 
48
- ### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
52
+ ## Updates
49
53
 
50
- $ locator -i sequence.fasta -o sequence.fasta.csv
54
+ Version 1.0.6-07232019:
51
55
 
52
- ## Updates
56
+ 1. Several methods added to ViralSeq::SeqHash, including
57
+ ViralSeq::SeqHash#size
58
+ ViralSeq::SeqHash#+
59
+ ViralSeq::SeqHash#write_nt_fa
60
+ ViralSeq::SeqHash#mutation
61
+ 2. Update documentations and rspec samples.
53
62
 
54
63
  Version 1.0.5-07112019:
55
64
 
@@ -5,6 +5,8 @@ module ViralSeq
5
5
  # functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
6
6
  # works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
7
7
  # PR codon 1-99
8
+ # RT codon 34-122 (HXB2 2650-2914) and 152-236(3001-3257)
9
+ # IN codon 53-174 (HXB2 4384-4751)
8
10
  # @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
9
11
  # can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
10
12
  # @return [Array] three elements `[point_mutation_list, linkage_list, report_list]`
@@ -166,6 +166,39 @@ module ViralSeq
166
166
  alias_method :array, :new_from_array
167
167
  end
168
168
 
169
+ # the size of nt sequence hash of the SeqHash object
170
+ # @return [Integer] size of nt sequence hash of the SeqHash object
171
+
172
+ def size
173
+ self.dna_hash.size
174
+ end
175
+
176
+ # combine SeqHash objects
177
+ # @param sh2 [ViralSeq::SeqHash] another SeqHash
178
+ # @return [ViralSeq::SeqHash] combined SeqHash
179
+
180
+ def +(sh2)
181
+ new_seqhash = ViralSeq::SeqHash.new
182
+ new_seqhash.dna_hash = self.dna_hash.merge(sh2.dna_hash)
183
+ new_seqhash.aa_hash = self.aa_hash.merge(sh2.aa_hash)
184
+ new_seqhash.title = self.title + "_with_" + sh2.title
185
+ new_seqhash.file = self.file + "," + sh2.file
186
+ return new_seqhash
187
+ end
188
+
189
+ # write the nt sequences to a FASTA format file
190
+ # @param file [String] path to the FASTA output file
191
+ # @return [NilClass]
192
+
193
+ def write_nt_fa(file)
194
+ File.open(file, 'w') do |f|
195
+ self.dna_hash.each do |k,v|
196
+ f.puts k
197
+ f.puts v
198
+ end
199
+ end
200
+ end
201
+
169
202
  # generate sequences in relaxed sequencial phylip format from a ViralSeq::SeqHash object
170
203
  # @return [String] relaxed sequencial phylip format in a String object
171
204
  # @example convert fasta format to relaxed sequencial phylip format
@@ -748,7 +781,7 @@ module ViralSeq
748
781
  s.rc!
749
782
  loc2 = s.locator(ref_option)
750
783
  loc1[2] >= loc2[2] ? (direction = :+; loc = loc1): (direction = :-; loc = loc2)
751
-
784
+
752
785
  names.each do |name|
753
786
  out_array << ([title, name, ref_option.to_s, direction.to_s] + loc)
754
787
  end
@@ -999,6 +1032,21 @@ module ViralSeq
999
1032
  end
1000
1033
 
1001
1034
 
1035
+ # mutate @dna_hash based on the error_rate
1036
+ # @param error_rate [Float] error rate used to mutate sequences.
1037
+ # @return [ViralSeq::SeqHash] new SeqHash object of mutated sequences.
1038
+
1039
+ def mutation(error_rate = 0.01)
1040
+ new_seqhash = ViralSeq::SeqHash.new
1041
+ dna = {}
1042
+ self.dna_hash.each do |name, seq|
1043
+ dna[name + '_mut-' + error_rate.to_s] = seq.mutation(error_rate)
1044
+ end
1045
+ new_seqhash.dna_hash = dna
1046
+ new_seqhash.title = self.title + "_mut-" + error_rate.to_s
1047
+ new_seqhash.file = self.file
1048
+ return new_seqhash
1049
+ end
1002
1050
 
1003
1051
 
1004
1052
 
@@ -2,5 +2,5 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.0.5"
5
+ VERSION = "1.0.6"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-07-11 00:00:00.000000000 Z
12
+ date: 2019-07-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler