bio-jaspar 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 835fa6794d4fa377301992ede3fc0fc43d0013c0
4
+ data.tar.gz: 98b718e4a029fec3389078213fd7ab5613f89c38
5
+ SHA512:
6
+ metadata.gz: 29c60f81959d0880c2b690df72537953816cd5b571c635c835b8207d699cd372d429e01c7f17c69e02c146bc12679940dcb0f1b0fe5663e9a951fb78389a4ae5
7
+ data.tar.gz: a69f3fca22da7ccbc8c1e2138c809102828a208e3118f944c110986d5491ef7e1b2e33c5dc09d46547e5a2333b7161e50a942f251a32209bb79a9b8f7d8bee87
@@ -0,0 +1,5 @@
1
+ lib/**/*.rb
2
+ bin/*
3
+ -
4
+ features/**/*.feature
5
+ LICENSE.txt
@@ -0,0 +1,13 @@
1
+ language: ruby
2
+ rvm:
3
+ - 1.9.2
4
+ - 1.9.3
5
+ - jruby-19mode # JRuby in 1.9 mode
6
+
7
+ # - rbx-19mode
8
+ # - 1.8.7
9
+ # - jruby-18mode # JRuby in 1.8 mode
10
+ # - rbx-18mode
11
+
12
+ # uncomment this line if your project needs to run something other than `rake`:
13
+ # script: bundle exec rspec spec
data/Gemfile ADDED
@@ -0,0 +1,17 @@
1
+ source "http://rubygems.org"
2
+ # Add dependencies required to use your gem here.
3
+ # Example:
4
+ # gem "activesupport", ">= 2.3.5"
5
+ gem "bio", ">= 1.4.2"
6
+ gem "mysql2", "~> 0.3.19"
7
+
8
+ # Add dependencies to develop your gem here.
9
+ # Include everything needed to run rake, tests, features, etc.
10
+ group :development do
11
+ gem "shoulda", ">= 0"
12
+ gem "rake", "~> 0.9.3"
13
+ gem "rdoc", "~> 3.12"
14
+ gem 'test-unit'
15
+ gem "jeweler", "~> 2.0.1", :git => "https://github.com/technicalpickles/jeweler.git"
16
+ gem "bundler", ">= 1.0.21"
17
+ end
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2015 Wasserman Lab
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -0,0 +1,147 @@
1
+ # bio-jaspar
2
+
3
+ ## Tools for JASPAR motif analysis
4
+
5
+ This gem provides methods for:
6
+
7
+ 1. Reading and writing sequence motifs in JASPAR format
8
+ 2. Accessing a JASPAR5 formatted database
9
+ 3. Comparing, searching, and analyzing motifs in sequences
10
+
11
+ <sup>*</sup> **Note:** The JASPAR motif analysis tools consist of several modules that are directly imported from the Bio.motifs package in BioPython. Namely, those modules/submodules are: Bio.motifs, Bio.motifs.matrix, Bio.motifs.thresholds, Bio.motifs.jaspar. The functionality of this gem will be identical to the aforementioned modules/submodules.
12
+
13
+
14
+ ## Installation
15
+
16
+ ```sh
17
+ gem install bio-jaspar
18
+ ```
19
+
20
+ ## Usage
21
+
22
+ ### Loading the gem
23
+
24
+ ```ruby
25
+ require 'bio-jaspar'
26
+ ```
27
+
28
+ ### Loading a motif/motifs from a JASPAR database
29
+
30
+ A connection to the JASPAR database is made by creating a JASPAR5 instance.
31
+
32
+ ```ruby
33
+ # Substitute the database credentials!
34
+ db = Bio::Jaspar::JASPAR5.new(
35
+ :host => <db_host.org>,
36
+ :name => <db_name>,
37
+ :user => <db_user>,
38
+ :password => <db_password>
39
+ )
40
+ ```
41
+
42
+ Now, a motif can be retrieved by the matrix_id
43
+
44
+ ```ruby
45
+ m = db.fetch_motif_by_id("MA0049")
46
+ puts m.to_s
47
+ ```
48
+
49
+ Or multiple motifs can be retrieved by various criteria
50
+
51
+ ```ruby
52
+ motifs = db.fetch_motifs(
53
+ :collection => "CORE",
54
+ :tax_group => ["fungi", "vertebrate"],
55
+ :tf_class => "Helix-Turn-Helix",
56
+ :min_ic => 2
57
+ )
58
+ motifs.each { |m| # do something with a motif }
59
+ ```
60
+
61
+ ### Motif analysis
62
+
63
+ Many methods are available for motif analysis. Here are some examples:
64
+
65
+ ```ruby
66
+ m = db.fetch_motif_by_id("MA0049")
67
+
68
+ # Consensus sequence
69
+ m.consensus # BioRuby Sequence object
70
+ puts m.consensus
71
+
72
+ # Anticonsensus sequence
73
+ m.anticonsensus # BioRuby Sequence object
74
+ puts m.anticonsensus
75
+
76
+ # Reverse complement motif
77
+ m.reverse_complement # Bio::Motif::Motifs object
78
+
79
+ # Pseudocounts
80
+ m.pseudocounts
81
+
82
+ # Background
83
+ m.background
84
+
85
+ # Position weight matrix
86
+ m.pwm
87
+
88
+ # Position specific scoring matrix
89
+ m.pssm
90
+ ```
91
+
92
+ Matrix methods are also available. Here are some examples:
93
+
94
+ ```ruby
95
+ m = db.fetch_motif_by_id("MA0049")
96
+
97
+ # Maximum possible score for the given motif
98
+ m.pssm.max
99
+
100
+ # Minimum possible score for the given motif
101
+ m.pssm.min
102
+
103
+ # Expected value of the motif score
104
+ m.pssm.mean
105
+
106
+ # Standard deviation of the given motif score
107
+ m.pssm.std
108
+
109
+ # Find hits with the PWM score above given threshold
110
+ m.pssm.search(Bio::Sequence.auto("ACCTGCCTAAAAAA"), threshold = 0.5)
111
+ ```
112
+
113
+ ### Read/write Jaspar file
114
+
115
+ Already downloaded pfm, jaspar, sites files can be loaded/written using the Jaspar module
116
+
117
+ ```ruby
118
+ # Read a pfm file
119
+ f = File.open("test.pfm", "r")
120
+ Bio::Jaspar.read(f, "pfm")
121
+ f.close
122
+
123
+ # Write motifs into a jaspar file
124
+ motifs = db.fetch_motifs(
125
+ :collection => "CORE",
126
+ :tax_group => ["fungi", "vertebrate"],
127
+ :tf_class => "Helix-Turn-Helix",
128
+ :min_ic => 2
129
+ )
130
+ File.open("test.jaspar", "w") do |f|
131
+ Bio::Jaspar.write(f, "jaspar")
132
+ end
133
+ ```
134
+
135
+ Please refer to the rdoc for full information on all available methods & classes.
136
+
137
+ ## Project home page
138
+
139
+ Information on the source tree, documentation, examples, issues and
140
+ how to contribute, see
141
+
142
+ http://github.com/wassermanlab/jaspar-bioruby
143
+
144
+ ## Copyright
145
+
146
+ See LICENSE.txt for further details.
147
+
@@ -0,0 +1,126 @@
1
+ = bio-jaspar
2
+
3
+ == Tools for JASPAR motif analysis
4
+
5
+ This gem provides methods for:
6
+
7
+ 1. Reading and writing sequence motifs in JASPAR format
8
+ 2. Accessing a JASPAR5 formatted database
9
+ 3. Comparing, searching, and analyzing motifs in sequences
10
+
11
+ \* *Note:* The JASPAR motif analysis tools consist of several modules that are directly imported from the Bio.motifs package in BioPython. Namely, those modules/submodules are: Bio.motifs, Bio.motifs.matrix, Bio.motifs.thresholds, Bio.motifs.jaspar. The functionality of this gem will be identical to the aforementioned modules/submodules.
12
+
13
+ == Installation
14
+
15
+ gem install bio-jaspar
16
+
17
+ == Usage
18
+
19
+ === Loading the gem
20
+
21
+ require 'bio-jaspar'
22
+
23
+ === Loading a motif/motifs from a JASPAR database
24
+
25
+ A connection to the JASPAR database is made by creating a JASPAR5 instance.
26
+
27
+ # Substitute the database credentials!
28
+ db = Bio::Jaspar::JASPAR5.new(
29
+ :host => <db_host.org>,
30
+ :name => <db_name>,
31
+ :user => <db_user>,
32
+ :password => <db_password>
33
+ )
34
+
35
+ Now, a motif can be retrieved by the matrix_id
36
+
37
+ m = db.fetch_motif_by_id("MA0049")
38
+ puts m.to_s
39
+
40
+ Or multiple motifs can be retrieved by various criteria
41
+
42
+ motifs = db.fetch_motifs(
43
+ :collection => "CORE",
44
+ :tax_group => ["fungi", "vertebrate"],
45
+ :tf_class => "Helix-Turn-Helix",
46
+ :min_ic => 2
47
+ )
48
+
49
+ === Motif analysis
50
+
51
+ Many methods are available for motif analysis. Here are some examples:
52
+
53
+ m = db.fetch_motif_by_id("MA0049")
54
+
55
+ # Consensus sequence
56
+ m.consensus # BioRuby Sequence object
57
+ puts m.consensus
58
+
59
+ # Anticonsensus sequence
60
+ m.anticonsensus # BioRuby Sequence object
61
+ puts m.anticonsensus
62
+
63
+ # Reverse complement motif
64
+ m.reverse_complement # Bio::Motif::Motifs object
65
+
66
+ # Pseudocounts
67
+ m.pseudocounts
68
+
69
+ # Background
70
+ m.background
71
+
72
+ # Position weight matrix
73
+ m.pwm
74
+
75
+ # Position specific scoring matrix
76
+ m.pssm
77
+
78
+ Matrix methods are also available. Here are some examples:
79
+
80
+ m = db.fetch_motif_by_id("MA0049")
81
+
82
+ # Maximum possible score for the given motif
83
+ m.pssm.max
84
+
85
+ # Minimum possible score for the given motif
86
+ m.pssm.min
87
+
88
+ # Expected value of the motif score
89
+ m.pssm.mean
90
+
91
+ # Standard deviation of the given motif score
92
+ m.pssm.std
93
+
94
+ # Find hits with the PWM score above given threshold
95
+ m.pssm.search(Bio::Sequence.auto("ACCTGCCTAAAAAA"), threshold = 0.5)
96
+
97
+ === Read/write Jaspar file
98
+
99
+ Already downloaded pfm, jaspar, sites files can be loaded/written using the Jaspar module
100
+
101
+ # Read a pfm file
102
+ f = File.open("test.pfm", "r")
103
+ Bio::Jaspar.read(f, "pfm")
104
+ f.close
105
+
106
+ # Write motifs into a jaspar file
107
+ motifs = db.fetch_motifs(
108
+ :collection => "CORE",
109
+ :tax_group => ["fungi", "vertebrate"],
110
+ :tf_class => "Helix-Turn-Helix",
111
+ :min_ic => 2
112
+ )
113
+ File.open("test.jaspar", "w") do |f|
114
+ Bio::Jaspar.write(f, "jaspar")
115
+ end
116
+
117
+ == Project home page
118
+
119
+ Information on the source tree, documentation, issues and how to contribute, see
120
+
121
+ http://github.com/wassermanlab/jaspar-bioruby
122
+
123
+ == Copyright
124
+
125
+ See LICENSE.txt for further details.
126
+
@@ -0,0 +1,45 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'bundler'
5
+ begin
6
+ Bundler.setup(:default, :development)
7
+ rescue Bundler::BundlerError => e
8
+ $stderr.puts e.message
9
+ $stderr.puts "Run `bundle install` to install missing gems"
10
+ exit e.status_code
11
+ end
12
+ require 'rake'
13
+
14
+ require 'jeweler'
15
+ Jeweler::Tasks.new do |gem|
16
+ # gem is a Gem::Specification... see http://guides.rubygems.org/specification-reference/ for more options
17
+ gem.name = "bio-jaspar"
18
+ gem.homepage = "http://github.com/wassermanlab/jaspar-bioruby"
19
+ gem.license = "MIT"
20
+ gem.summary = %Q{Tools for JASPAR motifs in BioRuby}
21
+ gem.description = %Q{Basic tools for parsing, searching, and comparing JASPAR motifs; Based on Bio.motifs module in Biopython}
22
+ gem.authors = ["Jessica Lee", "Wasserman Lab"]
23
+ # dependencies defined in Gemfile
24
+ end
25
+ Jeweler::RubygemsDotOrgTasks.new
26
+
27
+ require 'rake/testtask'
28
+ Rake::TestTask.new(:test) do |test|
29
+ test.libs << 'lib' << 'test'
30
+ test.pattern = 'test/**/test_*.rb'
31
+ test.verbose = true
32
+ end
33
+
34
+ task :default => :test
35
+
36
+ require 'rdoc/task'
37
+ Rake::RDocTask.new do |rdoc|
38
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
39
+
40
+ rdoc.rdoc_dir = 'rdoc'
41
+ rdoc.title = "bio-jaspar #{version}"
42
+ rdoc.rdoc_files.include('README*')
43
+ rdoc.rdoc_files.include('lib/**/*.rb')
44
+ rdoc.main = "README.rdoc"
45
+ end
@@ -0,0 +1,15 @@
1
+ # Please require your code below, respecting the naming conventions in the
2
+ # bioruby directory tree.
3
+ #
4
+ # For example, say you have a plugin named bio-plugin, the only uncommented
5
+ # line in this file would be
6
+ #
7
+ # require 'bio/bio-plugin/plugin'
8
+ #
9
+ # In this file only require other files. Avoid other source code.
10
+
11
+ require 'bio-jaspar/motifs.rb'
12
+ require 'bio-jaspar/matrix.rb'
13
+ require 'bio-jaspar/thresholds.rb'
14
+ require 'bio-jaspar/jaspar.rb'
15
+ require 'bio-jaspar/db.rb'
@@ -0,0 +1,432 @@
1
+ #--
2
+ # = bio-jaspar/jaspar.rb
3
+ #
4
+ # Copyright:: (C) 2015-2015 Wasserman Lab
5
+ # License:: Ruby License
6
+ #
7
+ # JASPAR 2014 module
8
+ #
9
+ # A direct import of Bio.motifs.jaspar module in Biopython
10
+ #++
11
+
12
+ require 'bio'
13
+
14
+ module Bio # :nodoc:
15
+ # == JASPAR 2014 module
16
+ #
17
+ # Provides read access to a JASPAR5 formatted database.
18
+ #
19
+ # This module is a direct import of Bio.motifs.jaspar module in Biopython.
20
+ # The following document contains excerpts from Bio.motifs.jaspar module
21
+ # in Biopython.
22
+ module Jaspar
23
+
24
+ # Unambiguous DNA bases
25
+ DNA = Bio::Motifs::Alphabet.new.IUPAC_unambiguous_dna
26
+
27
+ # JASPAR OUTPUT specific DNA bases
28
+ JASPAR_ORDERED_DNA_LETTERS = ["A","C","G","T"] # Jaspar requires specific order for printouts
29
+
30
+ # A subclass of Bio::Motifs::Motif used to represent a JASPAR profile.
31
+ #
32
+ # Additional metadata information are stored if available. The metadata
33
+ # availability depends on the source of the JASPAR motif (a 'pfm' format
34
+ # file, a 'jaspar' format file or a JASPAR database).
35
+ #
36
+ # <i>A direct import of Bio.motifs.jaspar module in Biopython</i>
37
+ class Motif < Bio::Motifs::Motif
38
+ attr_accessor :matrix_id, :collection, :tf_class, :tf_family, :species,
39
+ :tax_group, :acc, :data_type, :medline, :pazar_id, :comment
40
+
41
+ # Construct a JASPAR Motif instance
42
+ #
43
+ def initialize(matrix_id, name, opts = {})
44
+ opts = {
45
+ :alphabet => DNA,
46
+ :instances => nil,
47
+ :counts => nil,
48
+ :collection => nil,
49
+ :tf_class => nil,
50
+ :tf_family => nil,
51
+ :species => nil,
52
+ :tax_group => nil,
53
+ :acc => nil,
54
+ :data_type => nil,
55
+ :medline => nil,
56
+ :pazar_id => nil,
57
+ :comment => nil
58
+ }.merge(opts)
59
+
60
+ super(opts[:alphabet], opts[:instances], opts[:counts])
61
+
62
+ @name = name
63
+ @matrix_id = matrix_id
64
+ @collection = opts[:collection]
65
+ @tf_class = opts[:tf_class]
66
+ @tf_family = opts[:tf_family]
67
+ @species = opts[:species]
68
+ @tax_group = opts[:tax_group]
69
+ @acc = opts[:acc]
70
+ @data_type = opts[:data_type]
71
+ @medline = opts[:medline]
72
+ @pazar_id = opts[:pazar_id]
73
+ @comment = opts[:comment]
74
+ end
75
+
76
+ # Return the JASPAR base matrix ID
77
+ def base_id
78
+ base_id, _ = Jaspar.split_jaspar_id(@matrix_id)
79
+ return base_id
80
+ end
81
+
82
+ # Return the JASPAR matrix version
83
+ def version
84
+ _, version = Jaspar.split_jaspar_id(@matrix_id)
85
+ return version
86
+ end
87
+
88
+ # Return a string represention of the JASPAR profile.
89
+ #
90
+ # We choose to provide only the filled metadata information.
91
+ def to_s
92
+ tf_name_str = "TF name\t#{@name}\n"
93
+ matrix_id_str = "Matrix ID\t#{@matrix_id}\n"
94
+ the_string = tf_name_str + matrix_id_str
95
+
96
+ if @collection
97
+ collection_str = "Collection\t#{@collection}\n"
98
+ the_string += collection_str
99
+ end
100
+ if @tf_class
101
+ tf_class_str = "TF class\t#{@tf_class}\n"
102
+ the_string += tf_class_str
103
+ end
104
+ if @tf_family
105
+ tf_family_str = "TF family\t#{@tf_family}\n"
106
+ the_string += tf_family_str
107
+ end
108
+ if @species
109
+ species_str = "Species\t#{@species.join(",")}\n"
110
+ the_string += species_str
111
+ end
112
+ if @tax_group
113
+ tax_group_str = "Taxonomic group\t#{@tax_group}\n"
114
+ the_string += tax_group_str
115
+ end
116
+ if @acc
117
+ acc_str = "Accession\t#{@acc}\n"
118
+ the_string += acc_str
119
+ end
120
+ if @data_type
121
+ data_type_str = "Data type used\t#{@data_type}\n"
122
+ the_string += data_type_str
123
+ end
124
+ if @medline
125
+ medline_str = "Medline\t#{@medline}\n"
126
+ the_string += medline_str
127
+ end
128
+ if @pazar_id
129
+ pazar_id_str = "PAZAR ID\t#{@pazar_id}\n"
130
+ the_string += pazar_id_str
131
+ end
132
+ if @comment
133
+ comment_str = "Comments\t#{@comment}\n"
134
+ the_string += comment_str
135
+ end
136
+ matrix_str = "Matrix:\n#{counts}\n\n"
137
+ the_string += matrix_str
138
+ return the_string
139
+ end
140
+
141
+ # Return the hash key corresponding to the JASPAR profile
142
+ #
143
+ # Note: We assume the unicity of matrix IDs
144
+ def hash
145
+ return @matrix_id.hash
146
+ end
147
+
148
+ # Compare two JASPAR motifs for equality. Two motifs are equal if their
149
+ # matrix_ids match
150
+ def ==(other)
151
+ return @matrix_id == other.matrix_id
152
+ end
153
+
154
+ end
155
+
156
+ # Represent a list of JASPAR motifs.
157
+ #
158
+ # <i>A direct import of Bio.motifs.jaspar module in Biopython</i>
159
+ #
160
+ # ==== Attributes
161
+ #
162
+ # * +version+ - The JASPAR version used
163
+ class Record < Array
164
+ # Construct a record instance
165
+ def initialize
166
+ super()
167
+ @version = nil
168
+ end
169
+
170
+ # Return a string of all JASPAR motifs in the list
171
+ def to_s
172
+ return self.map { |the_motif| the_motif.to_s }.join("\n")
173
+ end
174
+
175
+ # Return the list of matrices as a hash (ruby equivalent of dict)
176
+ # of matrices
177
+ def to_h
178
+ dic = {}
179
+ self.each { |motif|
180
+ dic[motif.matrix_id] = motif
181
+ }
182
+ return dic
183
+ end
184
+ end
185
+
186
+ # Read motif(s) from a file in one of several different JASPAR formats.
187
+ #
188
+ # Return the record of PFM(s).
189
+ # Call the appropriate routine based on the format passed
190
+ def Jaspar.read(handle, format)
191
+ format = format.downcase
192
+ if format == "pfm"
193
+ record = _read_pfm(handle)
194
+ return record
195
+ elsif format == "sites"
196
+ record = _read_sites(handle)
197
+ return record
198
+ elsif format == "jaspar"
199
+ record = _read_jaspar(handle)
200
+ return record
201
+ else
202
+ raise ArgumentError, "Unknown JASPAR format #{format}"
203
+ end
204
+
205
+ end
206
+
207
+ # Return the representation of motifs in "pfm" or "jaspar" format.
208
+ def Jaspar.write(motifs, format)
209
+ letters = JASPAR_ORDERED_DNA_LETTERS
210
+ lines = []
211
+ if format == "pfm"
212
+ motif = motifs[0]
213
+ counts = motif.counts
214
+ letters.each do |letter|
215
+ terms = counts[letter].map { |value| "%6.2f" % value }
216
+ line = "#{terms.join(" ")}\n"
217
+ lines << line
218
+ end
219
+ elsif format == "jaspar"
220
+ motifs.each do |m|
221
+ counts = m.counts
222
+ line = ">#{m.matrix_id} #{m.name}\n"
223
+ lines << line
224
+
225
+ letters.each do |letter|
226
+ terms = counts[letter].map { |value| "%6.2f" % value }
227
+ line = "#{letter} [#{terms.join(" ")}]\n"
228
+ lines << line
229
+ end
230
+ end
231
+ else
232
+ raise ArgumentError, "Unknown JASPAR format #{format}"
233
+ end
234
+
235
+ text = lines.join("")
236
+ return text
237
+ end
238
+
239
+ # Return pseudocounts of a given JASPAR motif
240
+ def Jaspar.calculate_pseudocounts(motif)
241
+ alphabet = motif.alphabet
242
+ background = motif.background
243
+
244
+ total = 0
245
+ (0...motif.length).each do |i|
246
+ total += alphabet.letters.map { |letter| motif.counts[letter][i].to_f }.inject(:+)
247
+ end
248
+
249
+ avg_nb_instances = total / motif.length
250
+ sq_nb_instances = Math.sqrt(avg_nb_instances)
251
+
252
+ if background
253
+ background = Hash[background]
254
+ else
255
+ background = Hash[alphabet.letters.sort.map { |l| [l, 1.0] }]
256
+ end
257
+
258
+ total = background.values.inject(:+)
259
+ pseudocounts = {}
260
+
261
+ alphabet.letters.each do |letter|
262
+ background[letter] /= total
263
+ pseudocounts[letter] = sq_nb_instances * background[letter]
264
+ end
265
+
266
+ return pseudocounts
267
+ end
268
+
269
+ # Utility function to split a JASPAR matrix ID into its component.
270
+ #
271
+ # Components are base ID and version number, e.g. 'MA0047.2' is returned as
272
+ # ('MA0047', 2).
273
+ def Jaspar.split_jaspar_id(id)
274
+ id_split = id.split(".")
275
+
276
+ base_id = nil
277
+ version = nil
278
+
279
+ if id_split.length == 2
280
+ base_id = id_split[0]
281
+ version = id_split[1]
282
+ else
283
+ base_id = id
284
+ end
285
+
286
+ return base_id, version
287
+ end
288
+
289
+ # Private methods
290
+ private
291
+
292
+ # Read the motif from a JASPAR .pfm file (PRIVATE).
293
+ def Jaspar._read_pfm(handle)
294
+ alphabet = DNA
295
+ counts = {}
296
+
297
+ letters = JASPAR_ORDERED_DNA_LETTERS
298
+ letters.zip(handle).each do |letter, line|
299
+ words = line.split
300
+ if words[0] == letter
301
+ words = words[1..-1]
302
+ end
303
+ counts[letter] = words.map(&:to_f)
304
+ end
305
+
306
+ motif = Motif.new(nil, nil, :alphabet => alphabet, :counts => counts)
307
+ motif.mask = "*" * motif.length
308
+ record = Record.new
309
+ record << motif
310
+
311
+ return record
312
+ end
313
+
314
+ # Read the motif from JASPAR .sites file (PRIVATE).
315
+ def Jaspar._read_sites(handle)
316
+ alphabet = DNA
317
+ instances = []
318
+
319
+ handle_enum = handle.to_enum
320
+
321
+ handle.each do |line|
322
+ unless line.start_with?(">")
323
+ break
324
+ end
325
+
326
+ line = handle_enum.next
327
+ instance = ""
328
+ line.strip.each_char do |c|
329
+ if c == c.upcase
330
+ instance += c
331
+ end
332
+ end
333
+ instance = Bio::Sequence.auto(instance)
334
+ instances << instance
335
+ end
336
+
337
+ instances = Bio::Motifs::Instances.new(instances, alphabet)
338
+ motif = Motif.new(nil, nil, :alphabet => alphabet, :instances => instances)
339
+ motif.mask = "*" * motif.length
340
+ record = Record.new
341
+ record << motif
342
+
343
+ return record
344
+ end
345
+
346
+ # Read motifs from a JASPAR formatted file (PRIVATE).
347
+ #
348
+ # Format is one or more records of the form, e.g.::
349
+ #
350
+ # - JASPAR 2010 matrix_only format::
351
+ #
352
+ # >MA0001.1 AGL3
353
+ # A [ 0 3 79 40 66 48 65 11 65 0 ]
354
+ # C [94 75 4 3 1 2 5 2 3 3 ]
355
+ # G [ 1 0 3 4 1 0 5 3 28 88 ]
356
+ # T [ 2 19 11 50 29 47 22 81 1 6 ]
357
+ #
358
+ # - JASPAR 2010-2014 PFMs format::
359
+ #
360
+ # >MA0001.1 AGL3
361
+ # 0 3 79 40 66 48 65 11 65 0
362
+ # 94 75 4 3 1 2 5 2 3 3
363
+ # 1 0 3 4 1 0 5 3 28 88
364
+ # 2 19 11 50 29 47 22 81 1 6
365
+ #
366
+ def Jaspar._read_jaspar(handle)
367
+ alphabet = DNA
368
+ counts = {}
369
+
370
+ record = Record.new
371
+
372
+ head_pat = /^>\s*(\S+)(\s+(\S+))?/
373
+ row_pat_long = /\s*([ACGT])\s*\[\s*(.*)\s*\]/
374
+ row_pat_short = /\s*(.+)\s*/
375
+
376
+ identifier = nil
377
+ name = nil
378
+ row_count = 0
379
+ nucleotides = ["A","C","G","T"]
380
+ handle.each do |line|
381
+ line = line.strip
382
+
383
+ head_match = line.match(head_pat)
384
+ row_match_long = line.match(row_pat_long)
385
+ row_match_short = line.match(row_pat_short)
386
+
387
+ if head_match
388
+ identifier = head_match[1]
389
+ if head_match[3]
390
+ name = head_match[3]
391
+ else
392
+ name = identifier
393
+ end
394
+ elsif row_match_long
395
+ letter, counts_str = row_match_long[1..2]
396
+ words = counts_str.split
397
+ counts[letter] = words.map(&:to_f)
398
+ row_count += 1
399
+ if row_count == 4
400
+ record << Motif.new(identifier,
401
+ name,
402
+ :alphabet => alphabet,
403
+ :counts => counts)
404
+ identifier = nil
405
+ name = nil
406
+ counts = {}
407
+ row_count = 0
408
+ end
409
+ elsif row_match_short
410
+ words = row_match_short[1].split
411
+ counts[nucleotides[row_count]] = words.map(&:to_f)
412
+ row_count += 1
413
+ if row_count == 4
414
+ record << Motif.new(identifier,
415
+ name,
416
+ :alphabet => alphabet,
417
+ :counts => counts)
418
+ identifier = nil
419
+ name = nil
420
+ counts = {}
421
+ row_count = 0
422
+ end
423
+ end
424
+ end
425
+
426
+ return record
427
+ end
428
+
429
+ private_class_method :_read_pfm, :_read_sites, :_read_jaspar
430
+
431
+ end
432
+ end
@@ -0,0 +1,18 @@
1
+ require 'rubygems'
2
+ require 'bundler'
3
+ begin
4
+ Bundler.setup(:default, :development)
5
+ rescue Bundler::BundlerError => e
6
+ $stderr.puts e.message
7
+ $stderr.puts "Run `bundle install` to install missing gems"
8
+ exit e.status_code
9
+ end
10
+ require 'test/unit'
11
+ require 'shoulda'
12
+
13
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
14
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
15
+ require 'bio-jaspar'
16
+
17
+ class Test::Unit::TestCase
18
+ end
@@ -0,0 +1,265 @@
1
+ require 'helper'
2
+ require 'bio-jaspar'
3
+ require 'bio'
4
+
5
+ class TestBioJaspar < Test::Unit::TestCase
6
+ context 'JASPAR module' do
7
+ should "correctly read jaspar formatted file" do
8
+ f = File.open('test/data/jaspar-test.jaspar', "r")
9
+ motifs = Bio::Motifs.parse(f, "jaspar")
10
+ f.close
11
+
12
+ # Test first motif in the set
13
+ corr_motifs_beg_counts = {
14
+ "A" => [3.0, 21.0, 25.0, 0.0, 0.0, 24.0, 1.0, 0.0],
15
+ "C" => [13.0, 1.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0],
16
+ "T" => [5.0, 3.0, 0.0, 25.0, 20.0, 0.0, 24.0, 23.0],
17
+ "G" => [4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0]
18
+ }
19
+ assert_equal corr_motifs_beg_counts, motifs[0].counts
20
+ assert_equal 8, motifs[0].length
21
+ assert_equal "HAT5", motifs[0].name
22
+ assert_equal "MA0008.1", motifs[0].matrix_id
23
+
24
+ # Test the last motif in the set
25
+ corr_motifs_end_counts = {
26
+ "A" => [4.0, 5.0, 5.0, 3.0, 0.0, 0.0, 25.0, 26.0, 0.0, 0.0, 26.0, 0.0, 0.0, 17.0, 0.0, 5.0, 2.0, 0.0, 0.0],
27
+ "C" => [2.0, 3.0, 4.0, 8.0, 1.0, 6.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 25.0, 1.0, 6.0, 3.0, 5.0],
28
+ "T" => [2.0, 3.0, 5.0, 5.0, 0.0, 20.0, 0.0, 0.0, 26.0, 0.0, 0.0, 26.0, 23.0, 0.0, 0.0, 7.0, 4.0, 3.0, 0.0],
29
+ "G" => [1.0, 3.0, 2.0, 2.0, 25.0, 0.0, 1.0, 0.0, 0.0, 26.0, 0.0, 0.0, 3.0, 9.0, 1.0, 4.0, 0.0, 4.0, 3.0]
30
+ }
31
+ assert_equal corr_motifs_end_counts, motifs[-1].counts
32
+ assert_equal 19, motifs[-1].length
33
+ assert_equal "ATHB9", motifs[-1].name
34
+ assert_equal "MA0573.1", motifs[-1].matrix_id
35
+ end
36
+
37
+ should "correctly read pfm formatted file" do
38
+ f = File.open('test/data/jaspar-test.pfm', "r")
39
+ motif = Bio::Motifs.parse(f, "pfm")
40
+ f.close
41
+
42
+ corr_counts = {
43
+ "A" => [3.0, 21.0, 25.0, 0.0, 0.0, 24.0, 1.0, 0.0],
44
+ "C" => [13.0, 1.0, 0.0, 0.0, 5.0, 0.0, 0.0, 0.0],
45
+ "T" => [5.0, 3.0, 0.0, 25.0, 20.0, 0.0, 24.0, 23.0],
46
+ "G" => [4.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 2.0]
47
+ }
48
+ assert_equal 1, motif.length # should only read one motif
49
+ assert_equal 8, motif[0].length
50
+ assert_equal corr_counts, motif[0].counts
51
+ end
52
+
53
+ should "correctly read sites formatted file" do
54
+ f = File.open('test/data/jaspar-test.sites', "r")
55
+ motif = Bio::Motifs.parse(f, "sites")
56
+ f.close
57
+
58
+ corr_counts = {
59
+ "A" => [15, 4, 41, 36, 7, 19, 3],
60
+ "C" => [11, 35, 1, 2, 29, 14, 22],
61
+ "T" => [7, 2, 0, 1, 1, 3, 3],
62
+ "G" => [10, 2, 1, 4, 6, 7, 15]
63
+ }
64
+ assert_equal 7, motif[0].length
65
+ assert_equal corr_counts, motif[0].counts
66
+
67
+ # Check the first and last sequence motifs
68
+ assert_equal "ccaaccc", motif[0].instances[0].to_s
69
+ assert_equal "gtatctc", motif[0].instances[-1].to_s
70
+ end
71
+
72
+ end
73
+
74
+ # Once the reads pass the test, load the test files and setup the test
75
+ setup do
76
+ f = File.open('test/data/jaspar-test.jaspar', "r")
77
+ @motifs = Bio::Motifs.parse(f, "jaspar")
78
+ @motif = @motifs.first
79
+ f.close
80
+ end
81
+
82
+ context 'JASPAR module' do
83
+ should "correctly convert Motifs into jaspar formatted string" do
84
+ corr_jaspar = ">MA0008.1 HAT5\nA [ 3.00 21.00 25.00 0.00 0.00 24.00 1.00 0.00]\nC [ 13.00 1.00 0.00 0.00 5.00 0.00 0.00 0.00]\nG [ 4.00 0.00 0.00 0.00 0.00 1.00 0.00 2.00]\nT [ 5.00 3.00 0.00 25.00 20.00 0.00 24.00 23.00]\n>MA0027.1 En1\nA [ 4.00 5.00 3.00 0.00 4.00 3.00 3.00 2.00 1.00 1.00 1.00]\nC [ 1.00 2.00 0.00 0.00 0.00 0.00 0.00 1.00 3.00 4.00 6.00]\nG [ 2.00 2.00 7.00 2.00 3.00 7.00 0.00 4.00 3.00 1.00 1.00]\nT [ 3.00 1.00 0.00 8.00 3.00 0.00 7.00 3.00 3.00 4.00 2.00]\n>MA0046.1 HNF1A\nA [ 5.00 1.00 1.00 1.00 20.00 16.00 1.00 8.00 14.00 2.00 0.00 13.00 8.00 5.00]\nC [ 0.00 0.00 0.00 0.00 0.00 2.00 0.00 2.00 0.00 0.00 4.00 1.00 8.00 13.00]\nG [ 14.00 20.00 0.00 0.00 0.00 1.00 0.00 4.00 1.00 0.00 0.00 3.00 3.00 0.00]\nT [ 2.00 0.00 20.00 20.00 1.00 2.00 20.00 7.00 6.00 19.00 17.00 4.00 2.00 3.00]\n"
85
+ jaspar = Bio::Jaspar.write(@motifs[0, 3], "jaspar")
86
+ assert_equal corr_jaspar, jaspar
87
+ end
88
+
89
+ should "correctly convert Motifs into pfm formatted string" do
90
+ corr_pfm = " 3.00 21.00 25.00 0.00 0.00 24.00 1.00 0.00\n 13.00 1.00 0.00 0.00 5.00 0.00 0.00 0.00\n 4.00 0.00 0.00 0.00 0.00 1.00 0.00 2.00\n 5.00 3.00 0.00 25.00 20.00 0.00 24.00 23.00\n"
91
+ pfm = Bio::Jaspar.write(@motifs, "pfm")
92
+ assert_equal corr_pfm, pfm
93
+ end
94
+
95
+ should "correctly calculate pseudocounts" do
96
+ corr_pc = {"A" => 1.25, "C" => 1.25, "T" => 1.25, "G" => 1.25}
97
+ pc = Bio::Jaspar.calculate_pseudocounts(@motifs[0])
98
+ assert_equal corr_pc, pc
99
+ end
100
+ end
101
+
102
+ context 'JASPAR Motif class' do
103
+ should "return a correct length" do
104
+ assert_equal 8, @motif.length
105
+ end
106
+
107
+ should "return a correct consensus sequence" do
108
+ assert_equal Bio::Sequence.auto("CAATTATT").to_s, @motif.consensus.to_s
109
+ end
110
+
111
+ should "return a correct anticonsensus sequence" do
112
+ assert_equal Bio::Sequence.auto("AGGGGTGA").to_s, @motif.anticonsensus.to_s
113
+ end
114
+
115
+ should "return a correct degenerate consensus" do
116
+ assert_equal Bio::Sequence.auto("CAATTATT").to_s, @motif.degenerate_consensus.to_s
117
+ end
118
+
119
+ should "return a correct reverse complement" do
120
+ corr_rc_counts = {
121
+ "A" => [23.0, 24.0, 0.0, 20.0, 25.0, 0.0, 3.0, 5.0],
122
+ "C" => [2.0, 0.0, 1.0, 0.0, 0.0, 0.0, 0.0, 4.0],
123
+ "T" => [0.0, 1.0, 24.0, 0.0, 0.0, 25.0, 21.0, 3.0],
124
+ "G" => [0.0, 0.0, 0.0, 5.0, 0.0, 0.0, 1.0, 13.0]
125
+ }
126
+ rc = @motif.reverse_complement
127
+ assert_equal 0.13, rc.counts.gc_content
128
+ assert_equal 8, rc.length
129
+ assert_equal corr_rc_counts, rc.counts
130
+ end
131
+
132
+ should "return a correct mask" do
133
+ assert_equal [1,1,1,1,1,1,1,1], @motif.mask
134
+ end
135
+
136
+ should "return correct pseudocounts" do
137
+ corr_pc = {"A" => 0.0, "C" => 0.0, "T" => 0.0, "G" => 0.0}
138
+ assert_equal corr_pc, @motif.pseudocounts
139
+ end
140
+
141
+ should "return a correct background" do
142
+ corr_bg = {"A" => 0.25, "C" => 0.25, "T" => 0.25, "G" => 0.25}
143
+ assert_equal corr_bg, @motif.background
144
+ end
145
+
146
+ should "return a correct pwm" do
147
+ corr_pwm = {
148
+ "A" => [0.12, 0.84, 1.0, 0.0, 0.0, 0.96, 0.04, 0.0],
149
+ "C" => [0.52, 0.04, 0.0, 0.0, 0.2, 0.0, 0.0, 0.0],
150
+ "T" => [0.2, 0.12, 0.0, 1.0, 0.8, 0.0, 0.96, 0.92],
151
+ "G" => [0.16, 0.0, 0.0, 0.0, 0.0, 0.04, 0.0, 0.08]
152
+ }
153
+ assert_equal corr_pwm, @motif.pwm
154
+ end
155
+
156
+ should "return a correct pssm" do
157
+ corr_pssm = {
158
+ "A" => [-1.0588936890535685, 1.7484612330040357, 2.0, -Float::INFINITY, -Float::INFINITY, 1.9411063109464317, -2.643856189774725, -Float::INFINITY],
159
+ "C" => [1.0565835283663676, -2.643856189774725, -Float::INFINITY, -Float::INFINITY, -0.3219280948873623, -Float::INFINITY, -Float::INFINITY, -Float::INFINITY],
160
+ "T" => [-0.3219280948873623, -1.0588936890535685, -Float::INFINITY, 2.0, 1.6780719051126378, -Float::INFINITY, 1.9411063109464317, 1.8797057662822885],
161
+ "G" => [-0.6438561897747247, -Float::INFINITY, -Float::INFINITY, -Float::INFINITY, -Float::INFINITY, -2.643856189774725, -Float::INFINITY, -1.6438561897747248]
162
+ }
163
+ assert_equal corr_pssm, @motif.pssm
164
+ end
165
+
166
+ should "correctly format motif in jaspar format" do
167
+ corr_jaspar_str = ">MA0008.1 HAT5\nA [ 3.00 21.00 25.00 0.00 0.00 24.00 1.00 0.00]\nC [ 13.00 1.00 0.00 0.00 5.00 0.00 0.00 0.00]\nG [ 4.00 0.00 0.00 0.00 0.00 1.00 0.00 2.00]\nT [ 5.00 3.00 0.00 25.00 20.00 0.00 24.00 23.00]\n"
168
+ assert_equal corr_jaspar_str, @motif.format("jaspar")
169
+ end
170
+
171
+ should "correctly format motif in pfm format" do
172
+ corr_pfm_str = " 3.00 21.00 25.00 0.00 0.00 24.00 1.00 0.00\n 13.00 1.00 0.00 0.00 5.00 0.00 0.00 0.00\n 4.00 0.00 0.00 0.00 0.00 1.00 0.00 2.00\n 5.00 3.00 0.00 25.00 20.00 0.00 24.00 23.00\n"
173
+ assert_equal corr_pfm_str, @motif.format("pfm")
174
+ end
175
+
176
+ end
177
+
178
+ context "matrix" do
179
+ setup do
180
+ @motif2 = @motifs[1]
181
+ @non_inf_dist = @motifs[15].pssm.distribution
182
+ end
183
+
184
+ should "correctly return maximum possible score" do
185
+ assert_equal 14.245035054658192, @motif.pssm.max
186
+ end
187
+
188
+ should "correctly return the minimum possible score" do
189
+ assert_equal -Float::INFINITY, @motif.pssm.min
190
+ end
191
+
192
+ should "correctly refuse fraction gc content calculation on pssm" do
193
+ assert_raise do
194
+ @motif.pssm.gc_content
195
+ end
196
+ end
197
+
198
+ should "correctly calculate the mean" do
199
+ assert_equal 11.882147864914165, @motif.pssm.mean
200
+ end
201
+
202
+ should "correctly calculate the std" do
203
+ assert_equal 2.315187013634166, @motif.pssm.std
204
+ end
205
+
206
+ should "correctly calculates the PWM score for the given sequence" do
207
+ corr_res = [-Float::INFINITY, -Float::INFINITY, -Float::INFINITY, 4.7579989]
208
+ res = @motif.pssm.calculate(Bio::Sequence.auto("AGTTAATTAAG")).map{ |a|
209
+ if a.infinite?
210
+ a
211
+ else
212
+ (a * (10 ** 7)).floor / (10.0 ** 7)
213
+ end
214
+ }
215
+ assert_equal corr_res, res
216
+ end
217
+
218
+ should "correctly search and return the position of the hits with PWM higher than threshold" do
219
+ corr_hits = [[3, 4.7579989]]
220
+ hits = @motif.pssm.search(Bio::Sequence.auto("AGTTAATTAAG")).map{ |a, b|
221
+ [a, (b * 10 ** 7).floor / (10.0 ** 7)]
222
+ }
223
+ assert_equal corr_hits, hits
224
+ end
225
+
226
+ should "correctly compare sequences using Pearson's correlation" do
227
+ corr_pearson = [0.024879199790793116, -10]
228
+ assert_equal corr_pearson, @motif.pssm.dist_pearson(@motif2.pssm)
229
+ end
230
+
231
+ should "correctly generate a distribution for non-infinite pssms" do
232
+ assert_equal -54.665224748002345, @non_inf_dist.min_score
233
+ assert_equal 15000, @non_inf_dist.n_points
234
+ assert_equal 77.64489601267111, @non_inf_dist.interval
235
+ assert_equal 0.005176671512278893, @non_inf_dist.step
236
+
237
+ corr_md_beg_100 = [3.2600762748340303e-26, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.4153180022070797e-26, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.61062211083769e-26, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 3.782556497068055e-26, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.075095343542538e-26, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.269147502758849e-26, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.410691430657807e-26, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 4.5132776385471104e-26, 0.0, 0.0, 0.0, 0.0, 4.620724355927226e-26, 0.0, 0.0, 0.0, 0.0, 0.0]
238
+ corr_md_end_100 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.006362547198123186, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.007703187540387484]
239
+ assert_equal corr_md_beg_100, @non_inf_dist.mo_density[0, 100]
240
+ assert_equal corr_md_end_100, @non_inf_dist.mo_density[@non_inf_dist.mo_density.length-100, 100]
241
+
242
+ corr_bd_beg_100 = [9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0]
243
+ corr_bd_end_100 = [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 9.313225746154785e-10]
244
+ assert_equal corr_bd_beg_100, @non_inf_dist.bg_density[0, 100]
245
+ assert_equal corr_bd_end_100, @non_inf_dist.bg_density[@non_inf_dist.bg_density.length-100, 100]
246
+ end
247
+
248
+ should "correctly calculate the threshold for false positive rate" do
249
+ assert_equal -11.00517721344216, @non_inf_dist.threshold_fpr(0.1)
250
+ end
251
+
252
+ should "correctly calculate the threshold for false negative rate" do
253
+ assert_equal 8.655821190193073, @non_inf_dist.threshold_fnr(0.1)
254
+ end
255
+
256
+ should "correctly calculate the balanced threshold" do
257
+ assert_equal 0.3058500408872149, @non_inf_dist.threshold_balanced()
258
+ end
259
+
260
+ should "correctly calculate the patser threshold" do
261
+ assert_equal 11.435693792286841, @non_inf_dist.threshold_patser()
262
+ end
263
+
264
+ end
265
+ end
metadata ADDED
@@ -0,0 +1,171 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: bio-jaspar
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jessica Lee
8
+ - Wasserman Lab
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2015-09-30 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: bio
16
+ requirement: !ruby/object:Gem::Requirement
17
+ requirements:
18
+ - - ">="
19
+ - !ruby/object:Gem::Version
20
+ version: 1.4.2
21
+ type: :runtime
22
+ prerelease: false
23
+ version_requirements: !ruby/object:Gem::Requirement
24
+ requirements:
25
+ - - ">="
26
+ - !ruby/object:Gem::Version
27
+ version: 1.4.2
28
+ - !ruby/object:Gem::Dependency
29
+ name: mysql2
30
+ requirement: !ruby/object:Gem::Requirement
31
+ requirements:
32
+ - - "~>"
33
+ - !ruby/object:Gem::Version
34
+ version: 0.3.19
35
+ type: :runtime
36
+ prerelease: false
37
+ version_requirements: !ruby/object:Gem::Requirement
38
+ requirements:
39
+ - - "~>"
40
+ - !ruby/object:Gem::Version
41
+ version: 0.3.19
42
+ - !ruby/object:Gem::Dependency
43
+ name: shoulda
44
+ requirement: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: '0'
49
+ type: :development
50
+ prerelease: false
51
+ version_requirements: !ruby/object:Gem::Requirement
52
+ requirements:
53
+ - - ">="
54
+ - !ruby/object:Gem::Version
55
+ version: '0'
56
+ - !ruby/object:Gem::Dependency
57
+ name: rake
58
+ requirement: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - "~>"
61
+ - !ruby/object:Gem::Version
62
+ version: 0.9.3
63
+ type: :development
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ requirements:
67
+ - - "~>"
68
+ - !ruby/object:Gem::Version
69
+ version: 0.9.3
70
+ - !ruby/object:Gem::Dependency
71
+ name: rdoc
72
+ requirement: !ruby/object:Gem::Requirement
73
+ requirements:
74
+ - - "~>"
75
+ - !ruby/object:Gem::Version
76
+ version: '3.12'
77
+ type: :development
78
+ prerelease: false
79
+ version_requirements: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - "~>"
82
+ - !ruby/object:Gem::Version
83
+ version: '3.12'
84
+ - !ruby/object:Gem::Dependency
85
+ name: test-unit
86
+ requirement: !ruby/object:Gem::Requirement
87
+ requirements:
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: '0'
91
+ type: :development
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ - !ruby/object:Gem::Dependency
99
+ name: jeweler
100
+ requirement: !ruby/object:Gem::Requirement
101
+ requirements:
102
+ - - "~>"
103
+ - !ruby/object:Gem::Version
104
+ version: 2.0.1
105
+ type: :development
106
+ prerelease: false
107
+ version_requirements: !ruby/object:Gem::Requirement
108
+ requirements:
109
+ - - "~>"
110
+ - !ruby/object:Gem::Version
111
+ version: 2.0.1
112
+ - !ruby/object:Gem::Dependency
113
+ name: bundler
114
+ requirement: !ruby/object:Gem::Requirement
115
+ requirements:
116
+ - - ">="
117
+ - !ruby/object:Gem::Version
118
+ version: 1.0.21
119
+ type: :development
120
+ prerelease: false
121
+ version_requirements: !ruby/object:Gem::Requirement
122
+ requirements:
123
+ - - ">="
124
+ - !ruby/object:Gem::Version
125
+ version: 1.0.21
126
+ description: Basic tools for parsing, searching, and comparing JASPAR motifs; Based
127
+ on Bio.motifs module in Biopython
128
+ email:
129
+ executables: []
130
+ extensions: []
131
+ extra_rdoc_files:
132
+ - LICENSE.txt
133
+ - README.md
134
+ - README.rdoc
135
+ files:
136
+ - ".document"
137
+ - ".travis.yml"
138
+ - Gemfile
139
+ - LICENSE.txt
140
+ - README.md
141
+ - README.rdoc
142
+ - Rakefile
143
+ - lib/bio-jaspar.rb
144
+ - lib/bio-jaspar/jaspar.rb
145
+ - test/helper.rb
146
+ - test/test_bio-jaspar.rb
147
+ homepage: http://github.com/wassermanlab/jaspar-bioruby
148
+ licenses:
149
+ - MIT
150
+ metadata: {}
151
+ post_install_message:
152
+ rdoc_options: []
153
+ require_paths:
154
+ - lib
155
+ required_ruby_version: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - ">="
158
+ - !ruby/object:Gem::Version
159
+ version: '0'
160
+ required_rubygems_version: !ruby/object:Gem::Requirement
161
+ requirements:
162
+ - - ">="
163
+ - !ruby/object:Gem::Version
164
+ version: '0'
165
+ requirements: []
166
+ rubyforge_project:
167
+ rubygems_version: 2.4.6
168
+ signing_key:
169
+ specification_version: 4
170
+ summary: Tools for JASPAR motifs in BioRuby
171
+ test_files: []