viral_seq 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a13ba7912ee87511c2ecf19f07256d3a129661c6d7e180d57ecd1e34978386e6
4
- data.tar.gz: 61e5ed6b423f0b64c53a6bb8e8ec3801bf7e093e4d0741bd71bf9fbfa24f1b55
3
+ metadata.gz: df8d50f2dfbf0f2e7e1efcf66c8a91c286c7b5029533b725a4a33219882748bb
4
+ data.tar.gz: 4061c3875d4629025d1ccc216a54fdb7a011d397408a3ecb15125475e9f262e9
5
5
  SHA512:
6
- metadata.gz: f18d03220190bf1479ed29bd4d4b83777ffe5216951d38a91dd2afdc6c07b516883a8694291106b6fee2693a246b8a3c6a824786527cd03730f28f6777fa3231
7
- data.tar.gz: 7fe146b081a7b633de963ed632bdcb548c71d1f401e227109d8745d23ad770d2099a2aa50bc4553a9450b260b7206892ed2a898d9154764aebe4094f38faeb44
6
+ metadata.gz: a52087ced9fe258ef5bab4449b90e964ff9a557292dc1ce679aae03a56bd2570fdf1221e7026fec2b1ccb49ad2a9ff076338a397982e47c46877e2cdfb4e6d2e
7
+ data.tar.gz: 792cb9424fd46d536d0b95cfc90914a8548ee5ea6d1c3efe45cccd1d01c6dbd6b7a7ee0ba1be010bd6cf7a3ea201f4850803c28f16889b5286e1e458a774c8f1
data/README.md CHANGED
@@ -10,46 +10,55 @@ Specifically for Primer-ID sequencing and HIV drug resistance analysis.
10
10
 
11
11
  ## Usage
12
12
 
13
- Load all ViralSeq classes by requiring 'viral_seq.rb'
13
+ #### Load all ViralSeq classes by requiring 'viral_seq.rb'
14
14
 
15
15
  #!/usr/bin/env ruby
16
16
  require 'viral_seq'
17
+
18
+ #### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
19
+
20
+ $ locator -i sequence.fasta -o sequence.fasta.csv
17
21
 
18
22
  ## Some Examples
19
23
 
20
- ### Load nucleotide sequences from a FASTA format sequence file
24
+ #### Load nucleotide sequences from a FASTA format sequence file
21
25
 
22
26
  my_seqhash = ViralSeq::SeqHash.fa('my_seq_file.fasta')
23
27
 
24
- ### Make an alignment (using MUSCLE)
28
+ #### Make an alignment (using MUSCLE)
25
29
 
26
30
  aligned_seqhash = my_seqhash.align
27
31
 
28
- ### Filter nucleotide sequences with the reference coordinates (HIV Protease)
32
+ #### Filter nucleotide sequences with the reference coordinates (HIV Protease)
29
33
 
30
34
  qc_seqhash = aligned_seqhash.hiv_seq_qc(2253, 2549, false, :HXB2)
31
35
 
32
- ### Further filter out sequences with Apobec3g/f hypermutations
36
+ #### Further filter out sequences with Apobec3g/f hypermutations
33
37
 
34
38
  qc_seqhash = qc_seqhash.a3g
35
39
 
36
- ### Calculate nucleotide diveristy π
40
+ #### Calculate nucleotide diveristy π
37
41
 
38
42
  qc_seqhash.pi
39
43
 
40
- ### Calculate cut-off for minority variants based on Poisson model
44
+ #### Calculate cut-off for minority variants based on Poisson model
41
45
 
42
46
  cut_off = qc_seqhash.pm
43
47
 
44
- ### Examine for drug resistance mutations for HIV PR region
48
+ #### Examine for drug resistance mutations for HIV PR region
45
49
 
46
50
  qc_seqhash.sdrm_hiv_pr(cut_off)
47
51
 
48
- ### Use executable `locator` to get the coordinates of the sequences on HIV/SIV reference genome from a FASTA file through a terminal
52
+ ## Updates
49
53
 
50
- $ locator -i sequence.fasta -o sequence.fasta.csv
54
+ Version 1.0.6-07232019:
51
55
 
52
- ## Updates
56
+ 1. Several methods added to ViralSeq::SeqHash, including
57
+ ViralSeq::SeqHash#size
58
+ ViralSeq::SeqHash#+
59
+ ViralSeq::SeqHash#write_nt_fa
60
+ ViralSeq::SeqHash#mutation
61
+ 2. Update documentations and rspec samples.
53
62
 
54
63
  Version 1.0.5-07112019:
55
64
 
@@ -5,6 +5,8 @@ module ViralSeq
5
5
  # functions to identify SDRMs from a ViralSeq::SeqHash object at HIV PR region.
6
6
  # works for MPID-DR protocol (dx.doi.org/10.17504/protocols.io.useewbe)
7
7
  # PR codon 1-99
8
+ # RT codon 34-122 (HXB2 2650-2914) and 152-236(3001-3257)
9
+ # IN codon 53-174 (HXB2 4384-4751)
8
10
  # @param cutoff [Integer] cut-off for minimal abundance of a mutation to be called as valid mutation,
9
11
  # can be obtained using ViralSeq::SeqHash#poisson_minority_cutoff function
10
12
  # @return [Array] three elements `[point_mutation_list, linkage_list, report_list]`
@@ -166,6 +166,39 @@ module ViralSeq
166
166
  alias_method :array, :new_from_array
167
167
  end
168
168
 
169
+ # the size of nt sequence hash of the SeqHash object
170
+ # @return [Integer] size of nt sequence hash of the SeqHash object
171
+
172
+ def size
173
+ self.dna_hash.size
174
+ end
175
+
176
+ # combine SeqHash objects
177
+ # @param sh2 [ViralSeq::SeqHash] another SeqHash
178
+ # @return [ViralSeq::SeqHash] combined SeqHash
179
+
180
+ def +(sh2)
181
+ new_seqhash = ViralSeq::SeqHash.new
182
+ new_seqhash.dna_hash = self.dna_hash.merge(sh2.dna_hash)
183
+ new_seqhash.aa_hash = self.aa_hash.merge(sh2.aa_hash)
184
+ new_seqhash.title = self.title + "_with_" + sh2.title
185
+ new_seqhash.file = self.file + "," + sh2.file
186
+ return new_seqhash
187
+ end
188
+
189
+ # write the nt sequences to a FASTA format file
190
+ # @param file [String] path to the FASTA output file
191
+ # @return [NilClass]
192
+
193
+ def write_nt_fa(file)
194
+ File.open(file, 'w') do |f|
195
+ self.dna_hash.each do |k,v|
196
+ f.puts k
197
+ f.puts v
198
+ end
199
+ end
200
+ end
201
+
169
202
  # generate sequences in relaxed sequencial phylip format from a ViralSeq::SeqHash object
170
203
  # @return [String] relaxed sequencial phylip format in a String object
171
204
  # @example convert fasta format to relaxed sequencial phylip format
@@ -748,7 +781,7 @@ module ViralSeq
748
781
  s.rc!
749
782
  loc2 = s.locator(ref_option)
750
783
  loc1[2] >= loc2[2] ? (direction = :+; loc = loc1): (direction = :-; loc = loc2)
751
-
784
+
752
785
  names.each do |name|
753
786
  out_array << ([title, name, ref_option.to_s, direction.to_s] + loc)
754
787
  end
@@ -999,6 +1032,21 @@ module ViralSeq
999
1032
  end
1000
1033
 
1001
1034
 
1035
+ # mutate @dna_hash based on the error_rate
1036
+ # @param error_rate [Float] error rate used to mutate sequences.
1037
+ # @return [ViralSeq::SeqHash] new SeqHash object of mutated sequences.
1038
+
1039
+ def mutation(error_rate = 0.01)
1040
+ new_seqhash = ViralSeq::SeqHash.new
1041
+ dna = {}
1042
+ self.dna_hash.each do |name, seq|
1043
+ dna[name + '_mut-' + error_rate.to_s] = seq.mutation(error_rate)
1044
+ end
1045
+ new_seqhash.dna_hash = dna
1046
+ new_seqhash.title = self.title + "_mut-" + error_rate.to_s
1047
+ new_seqhash.file = self.file
1048
+ return new_seqhash
1049
+ end
1002
1050
 
1003
1051
 
1004
1052
 
@@ -2,5 +2,5 @@
2
2
  # version info and histroy
3
3
 
4
4
  module ViralSeq
5
- VERSION = "1.0.5"
5
+ VERSION = "1.0.6"
6
6
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: viral_seq
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Shuntai Zhou
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2019-07-11 00:00:00.000000000 Z
12
+ date: 2019-07-23 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler