opentox-ruby 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +4 -4
- data/Rakefile +35 -35
- data/VERSION +1 -1
- data/lib/algorithm.rb +220 -50
- data/lib/compound.rb +138 -73
- data/lib/dataset.rb +296 -192
- data/lib/environment.rb +44 -29
- data/lib/feature.rb +15 -0
- data/lib/model.rb +240 -112
- data/lib/opentox-ruby.rb +13 -0
- data/lib/opentox.rb +47 -0
- data/lib/overwrite.rb +72 -0
- data/lib/parser.rb +286 -0
- data/lib/rest_client_wrapper.rb +12 -12
- data/lib/serializer.rb +340 -0
- data/lib/task.rb +184 -101
- data/lib/validation.rb +58 -8
- metadata +41 -22
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= opentox-ruby
|
1
|
+
= opentox-ruby
|
2
2
|
|
3
3
|
Ruby wrapper for the OpenTox REST API (http://www.opentox.org)
|
4
4
|
|
@@ -10,14 +10,14 @@ Run the following if you haven't already:
|
|
10
10
|
|
11
11
|
Install the gem:
|
12
12
|
|
13
|
-
sudo gem install helma-opentox-ruby
|
13
|
+
sudo gem install helma-opentox-ruby
|
14
14
|
|
15
15
|
== Usage
|
16
16
|
|
17
17
|
- adjust the settings in $HOME/.opentox/config
|
18
|
-
- require 'opentox-ruby
|
18
|
+
- require 'opentox-ruby' in your ruby application
|
19
19
|
- consult the rdoc API documentation for details
|
20
20
|
|
21
21
|
== Copyright
|
22
22
|
|
23
|
-
Copyright (c) 2009 Christoph Helma. See LICENSE for details.
|
23
|
+
Copyright (c) 2009-2010 Christoph Helma. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -4,46 +4,46 @@ require 'rake'
|
|
4
4
|
begin
|
5
5
|
require 'jeweler'
|
6
6
|
Jeweler::Tasks.new do |gem|
|
7
|
-
gem.name = "opentox-ruby
|
7
|
+
gem.name = "opentox-ruby"
|
8
8
|
gem.summary = %Q{Ruby wrapper for the OpenTox REST API}
|
9
9
|
gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
|
10
10
|
gem.email = "helma@in-silico.ch"
|
11
|
-
gem.homepage = "http://github.com/helma/opentox-ruby
|
12
|
-
gem.authors = ["Christoph Helma, Martin Guetlein"]
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
11
|
+
gem.homepage = "http://github.com/helma/opentox-ruby"
|
12
|
+
gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"]
|
13
|
+
# dependencies
|
14
|
+
[ "sinatra",
|
15
|
+
"emk-sinatra-url-for",
|
16
|
+
"sinatra-respond_to",
|
17
|
+
"sinatra-static-assets",
|
18
|
+
"rest-client",
|
19
|
+
"rack",
|
20
|
+
"rack-contrib",
|
21
|
+
"rack-flash",
|
22
|
+
"nokogiri",
|
23
|
+
"rubyzip",
|
24
|
+
"roo",
|
25
|
+
"spreadsheet",
|
26
|
+
"google-spreadsheet-ruby",
|
27
|
+
"yajl-ruby",
|
28
|
+
"tmail",
|
29
|
+
"rinruby",
|
30
|
+
"rjb"
|
31
|
+
].each { |dep| gem.add_dependency dep }
|
32
|
+
[ "dm-core",
|
33
|
+
'dm-serializer',
|
34
|
+
'dm-timestamps',
|
35
|
+
'dm-types',
|
36
|
+
'dm-migrations',
|
37
|
+
"dm-mysql-adapter",
|
38
38
|
"dm-validations",
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
].each {|dep| gem.add_dependency dep, ">= 1" }
|
40
|
+
gem.add_dependency "haml", ">=3"
|
41
|
+
['jeweler'].each { |dep| gem.add_development_dependency dep }
|
42
|
+
gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
|
43
|
+
#gem.files.include %w(lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/validation.rb, lib/templates/*)
|
44
44
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
45
45
|
end
|
46
|
-
|
46
|
+
Jeweler::GemcutterTasks.new
|
47
47
|
rescue LoadError
|
48
48
|
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
49
49
|
end
|
@@ -81,7 +81,7 @@ Rake::RDocTask.new do |rdoc|
|
|
81
81
|
end
|
82
82
|
|
83
83
|
rdoc.rdoc_dir = 'rdoc'
|
84
|
-
rdoc.title = "opentox-ruby
|
84
|
+
rdoc.title = "opentox-ruby #{version}"
|
85
85
|
rdoc.rdoc_files.include('README*')
|
86
86
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
87
87
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
0.0.2
|
data/lib/algorithm.rb
CHANGED
@@ -1,82 +1,252 @@
|
|
1
|
+
# R integration
|
2
|
+
# workaround to initialize R non-interactively (former rinruby versions did this by default)
|
3
|
+
# avoids compiling R with X
|
4
|
+
R = nil
|
5
|
+
require "rinruby"
|
1
6
|
|
2
7
|
module OpenTox
|
8
|
+
|
9
|
+
# Wrapper for OpenTox Algorithms
|
3
10
|
module Algorithm
|
11
|
+
|
12
|
+
include OpenTox
|
13
|
+
|
14
|
+
# Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters
|
15
|
+
# @param [optional,Hash] params Algorithm parameters
|
16
|
+
# @return [String] URI of new resource (dataset, model, ...)
|
17
|
+
def run(params=nil)
|
18
|
+
RestClientWrapper.post(@uri, {:accept => 'text/uri-list'}, params).to_s
|
19
|
+
end
|
4
20
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
def initialize(owl)
|
17
|
-
@title = owl.get("title")
|
18
|
-
@date = owl.get("date")
|
19
|
-
@uri = owl.uri
|
20
|
-
end
|
21
|
-
|
21
|
+
# Get OWL-DL representation in RDF/XML format
|
22
|
+
# @return [application/rdf+xml] RDF/XML representation
|
23
|
+
def to_rdfxml
|
24
|
+
s = Serializer::Owl.new
|
25
|
+
s.add_algorithm(@uri,@metadata)
|
26
|
+
s.to_rdfxml
|
27
|
+
end
|
28
|
+
|
29
|
+
# Generic Algorithm class, should work with all OpenTox webservices
|
30
|
+
class Generic
|
31
|
+
include Algorithm
|
22
32
|
end
|
23
33
|
|
24
|
-
|
34
|
+
# Fminer algorithms (https://github.com/amaunz/fminer2)
|
35
|
+
module Fminer
|
36
|
+
include Algorithm
|
37
|
+
|
38
|
+
# Backbone Refinement Class mining (http://bbrc.maunz.de/)
|
39
|
+
class BBRC
|
40
|
+
include Fminer
|
41
|
+
# Initialize bbrc algorithm
|
42
|
+
def initialize
|
43
|
+
super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc")
|
44
|
+
load_metadata
|
45
|
+
end
|
46
|
+
end
|
25
47
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
48
|
+
# LAtent STructure Pattern Mining (http://last-pm.maunz.de)
|
49
|
+
class LAST
|
50
|
+
include Fminer
|
51
|
+
# Initialize last algorithm
|
52
|
+
def initialize
|
53
|
+
super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last")
|
54
|
+
load_metadata
|
55
|
+
end
|
30
56
|
end
|
31
57
|
|
32
|
-
def self.uri
|
33
|
-
File.join(@@config[:services]["opentox-algorithm"], "fminer")
|
34
|
-
end
|
35
58
|
end
|
36
59
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
60
|
+
# Create lazar prediction model
|
61
|
+
class Lazar
|
62
|
+
include Algorithm
|
63
|
+
# Initialize lazar algorithm
|
64
|
+
def initialize
|
65
|
+
super File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
|
66
|
+
load_metadata
|
67
|
+
end
|
68
|
+
end
|
46
69
|
|
47
|
-
|
48
|
-
File.join(@@config[:services]["opentox-algorithm"], "lazar")
|
49
|
-
end
|
70
|
+
# Utility methods without dedicated webservices
|
50
71
|
|
51
|
-
|
72
|
+
# Similarity calculations
|
73
|
+
module Similarity
|
74
|
+
include Algorithm
|
52
75
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
76
|
+
# Tanimoto similarity
|
77
|
+
# @param [Array] features_a Features of first compound
|
78
|
+
# @param [Array] features_b Features of second compound
|
79
|
+
# @param [optional, Hash] weights Weights for all features
|
80
|
+
# @return [Float] (Weighted) tanimoto similarity
|
81
|
+
def self.tanimoto(features_a,features_b,weights=nil)
|
82
|
+
common_features = features_a & features_b
|
83
|
+
all_features = (features_a + features_b).uniq
|
57
84
|
common_p_sum = 0.0
|
58
85
|
if common_features.size > 0
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
86
|
+
if weights
|
87
|
+
common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
|
88
|
+
all_p_sum = 0.0
|
89
|
+
all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
|
90
|
+
common_p_sum/all_p_sum
|
91
|
+
else
|
92
|
+
common_features.to_f/all_features
|
93
|
+
end
|
63
94
|
else
|
64
95
|
0.0
|
65
96
|
end
|
66
97
|
end
|
67
|
-
|
68
|
-
|
98
|
+
|
99
|
+
# Euclidean similarity
|
100
|
+
# @param [Hash] properties_a Properties of first compound
|
101
|
+
# @param [Hash] properties_b Properties of second compound
|
102
|
+
# @param [optional, Hash] weights Weights for all properties
|
103
|
+
# @return [Float] (Weighted) euclidean similarity
|
104
|
+
def self.euclidean(properties_a,properties_b,weights=nil)
|
105
|
+
common_properties = properties_a.keys & properties_b.keys
|
69
106
|
if common_properties.size > 1
|
70
107
|
dist_sum = 0
|
71
108
|
common_properties.each do |p|
|
72
|
-
|
109
|
+
if weights
|
110
|
+
dist_sum += ( (properties_a[p] - properties_b[p]) * Algorithm.gauss(weights[p]) )**2
|
111
|
+
else
|
112
|
+
dist_sum += (properties_a[p] - properties_b[p])**2
|
113
|
+
end
|
73
114
|
end
|
74
115
|
1/(1+Math.sqrt(dist_sum))
|
75
116
|
else
|
76
|
-
|
117
|
+
0.0
|
77
118
|
end
|
78
119
|
end
|
79
120
|
end
|
80
121
|
|
122
|
+
module Neighbors
|
123
|
+
|
124
|
+
# Classification with majority vote from neighbors weighted by similarity
|
125
|
+
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
|
126
|
+
# @param [optional] params Ignored (only for compatibility with local_svm_regression)
|
127
|
+
# @return [Hash] Hash with keys `:prediction, :confidence`
|
128
|
+
def self.weighted_majority_vote(neighbors,params={})
|
129
|
+
conf = 0.0
|
130
|
+
confidence = 0.0
|
131
|
+
neighbors.each do |neighbor|
|
132
|
+
case neighbor[:activity].to_s
|
133
|
+
when 'true'
|
134
|
+
conf += Algorithm.gauss(neighbor[:similarity])
|
135
|
+
when 'false'
|
136
|
+
conf -= Algorithm.gauss(neighbor[:similarity])
|
137
|
+
end
|
138
|
+
end
|
139
|
+
if conf > 0.0
|
140
|
+
prediction = true
|
141
|
+
elsif conf < 0.0
|
142
|
+
prediction = false
|
143
|
+
else
|
144
|
+
prediction = nil
|
145
|
+
end
|
146
|
+
confidence = conf/neighbors.size if neighbors.size > 0
|
147
|
+
{:prediction => prediction, :confidence => confidence.abs}
|
148
|
+
end
|
149
|
+
|
150
|
+
# Local support vector regression from neighbors
|
151
|
+
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
|
152
|
+
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
|
153
|
+
# @return [Hash] Hash with keys `:prediction, :confidence`
|
154
|
+
def self.local_svm_regression(neighbors,params )
|
155
|
+
sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
|
156
|
+
conf = sims.inject{|sum,x| sum + x }
|
157
|
+
acts = neighbors.collect do |n|
|
158
|
+
act = n[:activity]
|
159
|
+
Math.log10(act.to_f)
|
160
|
+
end # activities of neighbors for supervised learning
|
161
|
+
|
162
|
+
neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
|
163
|
+
gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
|
164
|
+
if neighbor_matches.size == 0
|
165
|
+
raise "No neighbors found"
|
166
|
+
else
|
167
|
+
# gram matrix
|
168
|
+
(0..(neighbor_matches.length-1)).each do |i|
|
169
|
+
gram_matrix[i] = [] unless gram_matrix[i]
|
170
|
+
# upper triangle
|
171
|
+
((i+1)..(neighbor_matches.length-1)).each do |j|
|
172
|
+
sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
|
173
|
+
gram_matrix[i][j] = Algorithm.gauss(sim)
|
174
|
+
gram_matrix[j] = [] unless gram_matrix[j]
|
175
|
+
gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
|
176
|
+
end
|
177
|
+
gram_matrix[i][i] = 1.0
|
178
|
+
end
|
179
|
+
|
180
|
+
LOGGER.debug gram_matrix.to_yaml
|
181
|
+
|
182
|
+
@r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
|
183
|
+
@r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
|
184
|
+
LOGGER.debug "Setting R data ..."
|
185
|
+
# set data
|
186
|
+
@r.gram_matrix = gram_matrix.flatten
|
187
|
+
@r.n = neighbor_matches.size
|
188
|
+
@r.y = acts
|
189
|
+
@r.sims = sims
|
190
|
+
|
191
|
+
LOGGER.debug "Preparing R data ..."
|
192
|
+
# prepare data
|
193
|
+
@r.eval "y<-as.vector(y)"
|
194
|
+
@r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
|
195
|
+
@r.eval "sims<-as.vector(sims)"
|
196
|
+
|
197
|
+
# model + support vectors
|
198
|
+
LOGGER.debug "Creating SVM model ..."
|
199
|
+
@r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
|
200
|
+
@r.eval "sv<-as.vector(SVindex(model))"
|
201
|
+
@r.eval "sims<-sims[sv]"
|
202
|
+
@r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
|
203
|
+
LOGGER.debug "Predicting ..."
|
204
|
+
@r.eval "p<-predict(model,sims)[1,1]"
|
205
|
+
prediction = 10**(@r.p.to_f)
|
206
|
+
LOGGER.debug "Prediction is: '" + @prediction.to_s + "'."
|
207
|
+
@r.quit # free R
|
208
|
+
end
|
209
|
+
confidence = conf/neighbors.size if neighbors.size > 0
|
210
|
+
{:prediction => prediction, :confidence => confidence}
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
module Substructure
|
217
|
+
include Algorithm
|
218
|
+
# Substructure matching
|
219
|
+
# @param [OpenTox::Compound] compound Compound
|
220
|
+
# @param [Array] features Array with Smarts strings
|
221
|
+
# @return [Array] Array with matching Smarts
|
222
|
+
def self.match(compound,features)
|
223
|
+
compound.match(features)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
module Dataset
|
228
|
+
include Algorithm
|
229
|
+
# API should match Substructure.match
|
230
|
+
def features(dataset_uri,compound_uri)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# Gauss kernel
|
235
|
+
# @return [Float]
|
236
|
+
def self.gauss(x, sigma = 0.3)
|
237
|
+
d = 1.0 - x
|
238
|
+
Math.exp(-(d*d)/(2*sigma*sigma))
|
239
|
+
end
|
240
|
+
|
241
|
+
# Median of an array
|
242
|
+
# @param [Array] Array with values
|
243
|
+
# @return [Float] Median
|
244
|
+
def self.median(array)
|
245
|
+
return nil if array.empty?
|
246
|
+
array.sort!
|
247
|
+
m_pos = array.size / 2
|
248
|
+
return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2
|
249
|
+
end
|
250
|
+
|
81
251
|
end
|
82
252
|
end
|