opentox-ruby 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +4 -4
- data/Rakefile +35 -35
- data/VERSION +1 -1
- data/lib/algorithm.rb +220 -50
- data/lib/compound.rb +138 -73
- data/lib/dataset.rb +296 -192
- data/lib/environment.rb +44 -29
- data/lib/feature.rb +15 -0
- data/lib/model.rb +240 -112
- data/lib/opentox-ruby.rb +13 -0
- data/lib/opentox.rb +47 -0
- data/lib/overwrite.rb +72 -0
- data/lib/parser.rb +286 -0
- data/lib/rest_client_wrapper.rb +12 -12
- data/lib/serializer.rb +340 -0
- data/lib/task.rb +184 -101
- data/lib/validation.rb +58 -8
- metadata +41 -22
data/README.rdoc
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
= opentox-ruby
|
1
|
+
= opentox-ruby
|
2
2
|
|
3
3
|
Ruby wrapper for the OpenTox REST API (http://www.opentox.org)
|
4
4
|
|
@@ -10,14 +10,14 @@ Run the following if you haven't already:
|
|
10
10
|
|
11
11
|
Install the gem:
|
12
12
|
|
13
|
-
sudo gem install helma-opentox-ruby
|
13
|
+
sudo gem install helma-opentox-ruby
|
14
14
|
|
15
15
|
== Usage
|
16
16
|
|
17
17
|
- adjust the settings in $HOME/.opentox/config
|
18
|
-
- require 'opentox-ruby
|
18
|
+
- require 'opentox-ruby' in your ruby application
|
19
19
|
- consult the rdoc API documentation for details
|
20
20
|
|
21
21
|
== Copyright
|
22
22
|
|
23
|
-
Copyright (c) 2009 Christoph Helma. See LICENSE for details.
|
23
|
+
Copyright (c) 2009-2010 Christoph Helma. See LICENSE for details.
|
data/Rakefile
CHANGED
@@ -4,46 +4,46 @@ require 'rake'
|
|
4
4
|
begin
|
5
5
|
require 'jeweler'
|
6
6
|
Jeweler::Tasks.new do |gem|
|
7
|
-
gem.name = "opentox-ruby
|
7
|
+
gem.name = "opentox-ruby"
|
8
8
|
gem.summary = %Q{Ruby wrapper for the OpenTox REST API}
|
9
9
|
gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
|
10
10
|
gem.email = "helma@in-silico.ch"
|
11
|
-
gem.homepage = "http://github.com/helma/opentox-ruby
|
12
|
-
gem.authors = ["Christoph Helma, Martin Guetlein"]
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
11
|
+
gem.homepage = "http://github.com/helma/opentox-ruby"
|
12
|
+
gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"]
|
13
|
+
# dependencies
|
14
|
+
[ "sinatra",
|
15
|
+
"emk-sinatra-url-for",
|
16
|
+
"sinatra-respond_to",
|
17
|
+
"sinatra-static-assets",
|
18
|
+
"rest-client",
|
19
|
+
"rack",
|
20
|
+
"rack-contrib",
|
21
|
+
"rack-flash",
|
22
|
+
"nokogiri",
|
23
|
+
"rubyzip",
|
24
|
+
"roo",
|
25
|
+
"spreadsheet",
|
26
|
+
"google-spreadsheet-ruby",
|
27
|
+
"yajl-ruby",
|
28
|
+
"tmail",
|
29
|
+
"rinruby",
|
30
|
+
"rjb"
|
31
|
+
].each { |dep| gem.add_dependency dep }
|
32
|
+
[ "dm-core",
|
33
|
+
'dm-serializer',
|
34
|
+
'dm-timestamps',
|
35
|
+
'dm-types',
|
36
|
+
'dm-migrations',
|
37
|
+
"dm-mysql-adapter",
|
38
38
|
"dm-validations",
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
39
|
+
].each {|dep| gem.add_dependency dep, ">= 1" }
|
40
|
+
gem.add_dependency "haml", ">=3"
|
41
|
+
['jeweler'].each { |dep| gem.add_development_dependency dep }
|
42
|
+
gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
|
43
|
+
#gem.files.include %w(lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/validation.rb, lib/templates/*)
|
44
44
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
45
45
|
end
|
46
|
-
|
46
|
+
Jeweler::GemcutterTasks.new
|
47
47
|
rescue LoadError
|
48
48
|
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
49
49
|
end
|
@@ -81,7 +81,7 @@ Rake::RDocTask.new do |rdoc|
|
|
81
81
|
end
|
82
82
|
|
83
83
|
rdoc.rdoc_dir = 'rdoc'
|
84
|
-
rdoc.title = "opentox-ruby
|
84
|
+
rdoc.title = "opentox-ruby #{version}"
|
85
85
|
rdoc.rdoc_files.include('README*')
|
86
86
|
rdoc.rdoc_files.include('lib/**/*.rb')
|
87
87
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
0.0.2
|
data/lib/algorithm.rb
CHANGED
@@ -1,82 +1,252 @@
|
|
1
|
+
# R integration
|
2
|
+
# workaround to initialize R non-interactively (former rinruby versions did this by default)
|
3
|
+
# avoids compiling R with X
|
4
|
+
R = nil
|
5
|
+
require "rinruby"
|
1
6
|
|
2
7
|
module OpenTox
|
8
|
+
|
9
|
+
# Wrapper for OpenTox Algorithms
|
3
10
|
module Algorithm
|
11
|
+
|
12
|
+
include OpenTox
|
13
|
+
|
14
|
+
# Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters
|
15
|
+
# @param [optional,Hash] params Algorithm parameters
|
16
|
+
# @return [String] URI of new resource (dataset, model, ...)
|
17
|
+
def run(params=nil)
|
18
|
+
RestClientWrapper.post(@uri, {:accept => 'text/uri-list'}, params).to_s
|
19
|
+
end
|
4
20
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
def initialize(owl)
|
17
|
-
@title = owl.get("title")
|
18
|
-
@date = owl.get("date")
|
19
|
-
@uri = owl.uri
|
20
|
-
end
|
21
|
-
|
21
|
+
# Get OWL-DL representation in RDF/XML format
|
22
|
+
# @return [application/rdf+xml] RDF/XML representation
|
23
|
+
def to_rdfxml
|
24
|
+
s = Serializer::Owl.new
|
25
|
+
s.add_algorithm(@uri,@metadata)
|
26
|
+
s.to_rdfxml
|
27
|
+
end
|
28
|
+
|
29
|
+
# Generic Algorithm class, should work with all OpenTox webservices
|
30
|
+
class Generic
|
31
|
+
include Algorithm
|
22
32
|
end
|
23
33
|
|
24
|
-
|
34
|
+
# Fminer algorithms (https://github.com/amaunz/fminer2)
|
35
|
+
module Fminer
|
36
|
+
include Algorithm
|
37
|
+
|
38
|
+
# Backbone Refinement Class mining (http://bbrc.maunz.de/)
|
39
|
+
class BBRC
|
40
|
+
include Fminer
|
41
|
+
# Initialize bbrc algorithm
|
42
|
+
def initialize
|
43
|
+
super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc")
|
44
|
+
load_metadata
|
45
|
+
end
|
46
|
+
end
|
25
47
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
48
|
+
# LAtent STructure Pattern Mining (http://last-pm.maunz.de)
|
49
|
+
class LAST
|
50
|
+
include Fminer
|
51
|
+
# Initialize last algorithm
|
52
|
+
def initialize
|
53
|
+
super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last")
|
54
|
+
load_metadata
|
55
|
+
end
|
30
56
|
end
|
31
57
|
|
32
|
-
def self.uri
|
33
|
-
File.join(@@config[:services]["opentox-algorithm"], "fminer")
|
34
|
-
end
|
35
58
|
end
|
36
59
|
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
60
|
+
# Create lazar prediction model
|
61
|
+
class Lazar
|
62
|
+
include Algorithm
|
63
|
+
# Initialize lazar algorithm
|
64
|
+
def initialize
|
65
|
+
super File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
|
66
|
+
load_metadata
|
67
|
+
end
|
68
|
+
end
|
46
69
|
|
47
|
-
|
48
|
-
File.join(@@config[:services]["opentox-algorithm"], "lazar")
|
49
|
-
end
|
70
|
+
# Utility methods without dedicated webservices
|
50
71
|
|
51
|
-
|
72
|
+
# Similarity calculations
|
73
|
+
module Similarity
|
74
|
+
include Algorithm
|
52
75
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
76
|
+
# Tanimoto similarity
|
77
|
+
# @param [Array] features_a Features of first compound
|
78
|
+
# @param [Array] features_b Features of second compound
|
79
|
+
# @param [optional, Hash] weights Weights for all features
|
80
|
+
# @return [Float] (Weighted) tanimoto similarity
|
81
|
+
def self.tanimoto(features_a,features_b,weights=nil)
|
82
|
+
common_features = features_a & features_b
|
83
|
+
all_features = (features_a + features_b).uniq
|
57
84
|
common_p_sum = 0.0
|
58
85
|
if common_features.size > 0
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
86
|
+
if weights
|
87
|
+
common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
|
88
|
+
all_p_sum = 0.0
|
89
|
+
all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
|
90
|
+
common_p_sum/all_p_sum
|
91
|
+
else
|
92
|
+
common_features.to_f/all_features
|
93
|
+
end
|
63
94
|
else
|
64
95
|
0.0
|
65
96
|
end
|
66
97
|
end
|
67
|
-
|
68
|
-
|
98
|
+
|
99
|
+
# Euclidean similarity
|
100
|
+
# @param [Hash] properties_a Properties of first compound
|
101
|
+
# @param [Hash] properties_b Properties of second compound
|
102
|
+
# @param [optional, Hash] weights Weights for all properties
|
103
|
+
# @return [Float] (Weighted) euclidean similarity
|
104
|
+
def self.euclidean(properties_a,properties_b,weights=nil)
|
105
|
+
common_properties = properties_a.keys & properties_b.keys
|
69
106
|
if common_properties.size > 1
|
70
107
|
dist_sum = 0
|
71
108
|
common_properties.each do |p|
|
72
|
-
|
109
|
+
if weights
|
110
|
+
dist_sum += ( (properties_a[p] - properties_b[p]) * Algorithm.gauss(weights[p]) )**2
|
111
|
+
else
|
112
|
+
dist_sum += (properties_a[p] - properties_b[p])**2
|
113
|
+
end
|
73
114
|
end
|
74
115
|
1/(1+Math.sqrt(dist_sum))
|
75
116
|
else
|
76
|
-
|
117
|
+
0.0
|
77
118
|
end
|
78
119
|
end
|
79
120
|
end
|
80
121
|
|
122
|
+
module Neighbors
|
123
|
+
|
124
|
+
# Classification with majority vote from neighbors weighted by similarity
|
125
|
+
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
|
126
|
+
# @param [optional] params Ignored (only for compatibility with local_svm_regression)
|
127
|
+
# @return [Hash] Hash with keys `:prediction, :confidence`
|
128
|
+
def self.weighted_majority_vote(neighbors,params={})
|
129
|
+
conf = 0.0
|
130
|
+
confidence = 0.0
|
131
|
+
neighbors.each do |neighbor|
|
132
|
+
case neighbor[:activity].to_s
|
133
|
+
when 'true'
|
134
|
+
conf += Algorithm.gauss(neighbor[:similarity])
|
135
|
+
when 'false'
|
136
|
+
conf -= Algorithm.gauss(neighbor[:similarity])
|
137
|
+
end
|
138
|
+
end
|
139
|
+
if conf > 0.0
|
140
|
+
prediction = true
|
141
|
+
elsif conf < 0.0
|
142
|
+
prediction = false
|
143
|
+
else
|
144
|
+
prediction = nil
|
145
|
+
end
|
146
|
+
confidence = conf/neighbors.size if neighbors.size > 0
|
147
|
+
{:prediction => prediction, :confidence => confidence.abs}
|
148
|
+
end
|
149
|
+
|
150
|
+
# Local support vector regression from neighbors
|
151
|
+
# @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
|
152
|
+
# @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
|
153
|
+
# @return [Hash] Hash with keys `:prediction, :confidence`
|
154
|
+
def self.local_svm_regression(neighbors,params )
|
155
|
+
sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
|
156
|
+
conf = sims.inject{|sum,x| sum + x }
|
157
|
+
acts = neighbors.collect do |n|
|
158
|
+
act = n[:activity]
|
159
|
+
Math.log10(act.to_f)
|
160
|
+
end # activities of neighbors for supervised learning
|
161
|
+
|
162
|
+
neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
|
163
|
+
gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
|
164
|
+
if neighbor_matches.size == 0
|
165
|
+
raise "No neighbors found"
|
166
|
+
else
|
167
|
+
# gram matrix
|
168
|
+
(0..(neighbor_matches.length-1)).each do |i|
|
169
|
+
gram_matrix[i] = [] unless gram_matrix[i]
|
170
|
+
# upper triangle
|
171
|
+
((i+1)..(neighbor_matches.length-1)).each do |j|
|
172
|
+
sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
|
173
|
+
gram_matrix[i][j] = Algorithm.gauss(sim)
|
174
|
+
gram_matrix[j] = [] unless gram_matrix[j]
|
175
|
+
gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
|
176
|
+
end
|
177
|
+
gram_matrix[i][i] = 1.0
|
178
|
+
end
|
179
|
+
|
180
|
+
LOGGER.debug gram_matrix.to_yaml
|
181
|
+
|
182
|
+
@r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
|
183
|
+
@r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
|
184
|
+
LOGGER.debug "Setting R data ..."
|
185
|
+
# set data
|
186
|
+
@r.gram_matrix = gram_matrix.flatten
|
187
|
+
@r.n = neighbor_matches.size
|
188
|
+
@r.y = acts
|
189
|
+
@r.sims = sims
|
190
|
+
|
191
|
+
LOGGER.debug "Preparing R data ..."
|
192
|
+
# prepare data
|
193
|
+
@r.eval "y<-as.vector(y)"
|
194
|
+
@r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
|
195
|
+
@r.eval "sims<-as.vector(sims)"
|
196
|
+
|
197
|
+
# model + support vectors
|
198
|
+
LOGGER.debug "Creating SVM model ..."
|
199
|
+
@r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
|
200
|
+
@r.eval "sv<-as.vector(SVindex(model))"
|
201
|
+
@r.eval "sims<-sims[sv]"
|
202
|
+
@r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
|
203
|
+
LOGGER.debug "Predicting ..."
|
204
|
+
@r.eval "p<-predict(model,sims)[1,1]"
|
205
|
+
prediction = 10**(@r.p.to_f)
|
206
|
+
LOGGER.debug "Prediction is: '" + @prediction.to_s + "'."
|
207
|
+
@r.quit # free R
|
208
|
+
end
|
209
|
+
confidence = conf/neighbors.size if neighbors.size > 0
|
210
|
+
{:prediction => prediction, :confidence => confidence}
|
211
|
+
|
212
|
+
end
|
213
|
+
|
214
|
+
end
|
215
|
+
|
216
|
+
module Substructure
|
217
|
+
include Algorithm
|
218
|
+
# Substructure matching
|
219
|
+
# @param [OpenTox::Compound] compound Compound
|
220
|
+
# @param [Array] features Array with Smarts strings
|
221
|
+
# @return [Array] Array with matching Smarts
|
222
|
+
def self.match(compound,features)
|
223
|
+
compound.match(features)
|
224
|
+
end
|
225
|
+
end
|
226
|
+
|
227
|
+
module Dataset
|
228
|
+
include Algorithm
|
229
|
+
# API should match Substructure.match
|
230
|
+
def features(dataset_uri,compound_uri)
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
# Gauss kernel
|
235
|
+
# @return [Float]
|
236
|
+
def self.gauss(x, sigma = 0.3)
|
237
|
+
d = 1.0 - x
|
238
|
+
Math.exp(-(d*d)/(2*sigma*sigma))
|
239
|
+
end
|
240
|
+
|
241
|
+
# Median of an array
|
242
|
+
# @param [Array] Array with values
|
243
|
+
# @return [Float] Median
|
244
|
+
def self.median(array)
|
245
|
+
return nil if array.empty?
|
246
|
+
array.sort!
|
247
|
+
m_pos = array.size / 2
|
248
|
+
return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2
|
249
|
+
end
|
250
|
+
|
81
251
|
end
|
82
252
|
end
|