most_frequent_seq 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +7 -0
  2. data/lib/most_frequent_seq.rb +110 -0
  3. metadata +48 -0
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 75fa8faad532a7542135ec0564a69d8d9a8d78001b2bb82dd365f9d020e8b54f
4
+ data.tar.gz: a2867365be614d19bba1227fd74b737a61c571cdd708369230f95f9f5860373c
5
+ SHA512:
6
+ metadata.gz: 9787d33933b14263996017f39be246e4fec356688b5d31fe509f784a2772000fbe3690840e16e2e7477531a4a11b01e5b746eb5985d9c55d0fdbbba1c1ebee08
7
+ data.tar.gz: fbc1963bcffb51c1694efad0d71713b49251b6e5f74d6062b98dad6b4c0818c989367ab298afb7f48dff4e11349b185038e0364368b40b0e17590d531143346b
@@ -0,0 +1,110 @@
1
+ # SEQUENCE FREQUENCY ANALYSIS - DNA most_frequent_seq - Consensus Reference Builder
2
+
3
+ ##############################################################
4
+ # MIT License
5
+
6
+ # Copyright (c) [2020] [ZACHARY L. DWIGHT]
7
+
8
+ # Permission is hereby granted, free of charge, to any person obtaining a copy
9
+ # of this software and associated documentation files (the "Software"), to deal
10
+ # in the Software without restriction, including without limitation the rights
11
+ # to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12
+ # copies of the Software, and to permit persons to whom the Software is
13
+ # furnished to do so, subject to the following conditions:
14
+
15
+ # The above copyright notice and this permission notice shall be included in all
16
+ # copies or substantial portions of the Software.
17
+
18
+ # THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19
+ # IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20
+ # FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21
+ # AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22
+ # LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23
+ # OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
24
+ # SOFTWARE.
25
+ ##############################################################
26
+ #
27
+ #
28
+ # Use for creating consensus or reference sequence from array of seqs
29
+
30
+ #EXAMPLE
31
+ #require 'most_frequent_seq'
32
+ #seqs = ['tgactgactgatcgatcgatcgnaaa','tgactgactgatcgatcgatcgaagg','tgactgactgatcgatcgatcgaggg','tgactggctgatcgatcgatcgaccc','tgactg-ctgatcgctcgatcgaccg','agactgactgancgctcgatcgccct']
33
+ #puts CONSENSUS::Sequences.calcConsensus(seqs)
34
+ # >> TGACTGACTGATCGATCGATCGACCG
35
+
36
+
37
+ module CONSENSUS
38
+ class Sequences
39
+ def self.calcConsensus(seqs)
40
+ seql = seqs[0].length - 1
41
+ seqr = ""
42
+
43
+ #loop through each base index
44
+ for i in 0..seql
45
+
46
+ #establish array for counting bases at i position
47
+ bases = [0,0,0,0,0,0,0] # A,G,C,T,N,-,U
48
+
49
+ #for each seq, count the type of base at each index
50
+ seqs.each { |n|
51
+ n = n.upcase
52
+ if(n[i]=='A')
53
+ bases[0] = bases[0]+1
54
+ end
55
+ if(n[i]=='G')
56
+ bases[1] = bases[1]+1
57
+ end
58
+ if(n[i]=='C')
59
+ bases[2] = bases[2]+1
60
+ end
61
+ if(n[i]=='T')
62
+ bases[3] = bases[3]+1
63
+ end
64
+ if(n[i]=='N')
65
+ bases[4] = bases[4]+1
66
+ end
67
+ if(n[i]=='-')
68
+ bases[5] = bases[5]+1
69
+ end
70
+ if(n[i]=='U')
71
+ bases[6] = bases[6]+1
72
+ end
73
+
74
+ }
75
+
76
+ #find the max frequency , most frequent base at this index
77
+ cb = bases.each_with_index.max[1]
78
+
79
+ #convert back to string
80
+ if(cb==0)
81
+ base = 'A'
82
+ end
83
+ if(cb==1)
84
+ base = 'G'
85
+ end
86
+ if(cb==2)
87
+ base = 'C'
88
+ end
89
+ if(cb==3)
90
+ base = 'T'
91
+ end
92
+ if(cb==4)
93
+ base = 'N'
94
+ end
95
+ if(cb==5)
96
+ base = '-'
97
+ end
98
+ if(cb==6)
99
+ base = 'U'
100
+ end
101
+
102
+ #concat base to our consensus sequence
103
+ seqr.concat(base.to_s)
104
+
105
+
106
+ end
107
+ return seqr
108
+ end
109
+ end
110
+ end
metadata ADDED
@@ -0,0 +1,48 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: most_frequent_seq
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Zachary L. Dwight
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2020-11-20 00:00:00.000000000 Z
12
+ dependencies: []
13
+ description: A Ruby Gem used for calculating a consensus (most frequent) DNA sequence
14
+ from an array of ALIGNED sequences. Useful for bioinformatics pipelines to create
15
+ a reference sequence when using another sequence aligning utility. Also, helpful
16
+ in finding consensus regions for primer design or viral genotyping.
17
+ email: zach.dwight@path.utah.edu
18
+ executables: []
19
+ extensions: []
20
+ extra_rdoc_files: []
21
+ files:
22
+ - lib/most_frequent_seq.rb
23
+ homepage: https://dna-utah.org
24
+ licenses:
25
+ - MIT
26
+ metadata: {}
27
+ post_install_message:
28
+ rdoc_options: []
29
+ require_paths:
30
+ - lib
31
+ required_ruby_version: !ruby/object:Gem::Requirement
32
+ requirements:
33
+ - - ">="
34
+ - !ruby/object:Gem::Version
35
+ version: '0'
36
+ required_rubygems_version: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: '0'
41
+ requirements: []
42
+ rubyforge_project:
43
+ rubygems_version: 2.7.6
44
+ signing_key:
45
+ specification_version: 4
46
+ summary: Calculate the most frequent (consensus) DNA sequence from an array of ALIGNED
47
+ sequences
48
+ test_files: []