opentox-ruby 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README.rdoc CHANGED
@@ -1,4 +1,4 @@
1
- = opentox-ruby-api-wrapper
1
+ = opentox-ruby
2
2
 
3
3
  Ruby wrapper for the OpenTox REST API (http://www.opentox.org)
4
4
 
@@ -10,14 +10,14 @@ Run the following if you haven't already:
10
10
 
11
11
  Install the gem:
12
12
 
13
- sudo gem install helma-opentox-ruby-api-wrapper
13
+ sudo gem install helma-opentox-ruby
14
14
 
15
15
  == Usage
16
16
 
17
17
  - adjust the settings in $HOME/.opentox/config
18
- - require 'opentox-ruby-api-wrapper' in your ruby application
18
+ - require 'opentox-ruby' in your ruby application
19
19
  - consult the rdoc API documentation for details
20
20
 
21
21
  == Copyright
22
22
 
23
- Copyright (c) 2009 Christoph Helma. See LICENSE for details.
23
+ Copyright (c) 2009-2010 Christoph Helma. See LICENSE for details.
data/Rakefile CHANGED
@@ -4,46 +4,46 @@ require 'rake'
4
4
  begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
- gem.name = "opentox-ruby-api-wrapper"
7
+ gem.name = "opentox-ruby"
8
8
  gem.summary = %Q{Ruby wrapper for the OpenTox REST API}
9
9
  gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
10
10
  gem.email = "helma@in-silico.ch"
11
- gem.homepage = "http://github.com/helma/opentox-ruby-api-wrapper"
12
- gem.authors = ["Christoph Helma, Martin Guetlein"]
13
- # dependencies
14
- [ "sinatra",
15
- "emk-sinatra-url-for",
16
- "sinatra-respond_to",
17
- "sinatra-static-assets",
18
- "rest-client",
19
- "rack",
20
- "rack-contrib",
21
- "rack-flash",
22
- "nokogiri",
23
- "rubyzip",
24
- "builder",
25
- "roo",
26
- "spreadsheet",
27
- "google-spreadsheet-ruby",
28
- "tmail",
29
- "rinruby",
30
- "rjb"
31
- ].each { |dep| gem.add_dependency dep }
32
- [ "dm-core",
33
- 'dm-serializer',
34
- 'dm-timestamps',
35
- 'dm-types',
36
- 'dm-migrations',
37
- "dm-mysql-adapter",
11
+ gem.homepage = "http://github.com/helma/opentox-ruby"
12
+ gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"]
13
+ # dependencies
14
+ [ "sinatra",
15
+ "emk-sinatra-url-for",
16
+ "sinatra-respond_to",
17
+ "sinatra-static-assets",
18
+ "rest-client",
19
+ "rack",
20
+ "rack-contrib",
21
+ "rack-flash",
22
+ "nokogiri",
23
+ "rubyzip",
24
+ "roo",
25
+ "spreadsheet",
26
+ "google-spreadsheet-ruby",
27
+ "yajl-ruby",
28
+ "tmail",
29
+ "rinruby",
30
+ "rjb"
31
+ ].each { |dep| gem.add_dependency dep }
32
+ [ "dm-core",
33
+ 'dm-serializer',
34
+ 'dm-timestamps',
35
+ 'dm-types',
36
+ 'dm-migrations',
37
+ "dm-mysql-adapter",
38
38
  "dm-validations",
39
- ].each {|dep| gem.add_dependency dep, ">= 1" }
40
- gem.add_dependency "haml", ">=3"
41
- ['cucumber','jeweler'].each { |dep| gem.add_development_dependency dep }
42
- gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
43
- gem.files.include %w(lib/tasks/owl.rb, lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/utils.rb, lib/validation.rb, lib/templates/*)
39
+ ].each {|dep| gem.add_dependency dep, ">= 1" }
40
+ gem.add_dependency "haml", ">=3"
41
+ ['jeweler'].each { |dep| gem.add_development_dependency dep }
42
+ gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
43
+ #gem.files.include %w(lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/validation.rb, lib/templates/*)
44
44
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
45
45
  end
46
- Jeweler::GemcutterTasks.new
46
+ Jeweler::GemcutterTasks.new
47
47
  rescue LoadError
48
48
  puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
49
49
  end
@@ -81,7 +81,7 @@ Rake::RDocTask.new do |rdoc|
81
81
  end
82
82
 
83
83
  rdoc.rdoc_dir = 'rdoc'
84
- rdoc.title = "opentox-ruby-api-wrapper #{version}"
84
+ rdoc.title = "opentox-ruby #{version}"
85
85
  rdoc.rdoc_files.include('README*')
86
86
  rdoc.rdoc_files.include('lib/**/*.rb')
87
87
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.6.5
1
+ 0.0.2
data/lib/algorithm.rb CHANGED
@@ -1,82 +1,252 @@
1
+ # R integration
2
+ # workaround to initialize R non-interactively (former rinruby versions did this by default)
3
+ # avoids compiling R with X
4
+ R = nil
5
+ require "rinruby"
1
6
 
2
7
  module OpenTox
8
+
9
+ # Wrapper for OpenTox Algorithms
3
10
  module Algorithm
11
+
12
+ include OpenTox
13
+
14
+ # Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters
15
+ # @param [optional,Hash] params Algorithm parameters
16
+ # @return [String] URI of new resource (dataset, model, ...)
17
+ def run(params=nil)
18
+ RestClientWrapper.post(@uri, {:accept => 'text/uri-list'}, params).to_s
19
+ end
4
20
 
5
-
6
- class Generic
7
-
8
- attr_accessor :uri, :title, :date
9
-
10
- def self.find(uri)
11
- owl = OpenTox::Owl.from_uri(uri, "Algorithm")
12
- return self.new(owl)
13
- end
14
-
15
- protected
16
- def initialize(owl)
17
- @title = owl.get("title")
18
- @date = owl.get("date")
19
- @uri = owl.uri
20
- end
21
-
21
+ # Get OWL-DL representation in RDF/XML format
22
+ # @return [application/rdf+xml] RDF/XML representation
23
+ def to_rdfxml
24
+ s = Serializer::Owl.new
25
+ s.add_algorithm(@uri,@metadata)
26
+ s.to_rdfxml
27
+ end
28
+
29
+ # Generic Algorithm class, should work with all OpenTox webservices
30
+ class Generic
31
+ include Algorithm
22
32
  end
23
33
 
24
- class Fminer
34
+ # Fminer algorithms (https://github.com/amaunz/fminer2)
35
+ module Fminer
36
+ include Algorithm
37
+
38
+ # Backbone Refinement Class mining (http://bbrc.maunz.de/)
39
+ class BBRC
40
+ include Fminer
41
+ # Initialize bbrc algorithm
42
+ def initialize
43
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc")
44
+ load_metadata
45
+ end
46
+ end
25
47
 
26
- def self.create_feature_dataset(params)
27
- LOGGER.debug File.basename(__FILE__) + ": creating feature dataset"
28
- resource = RestClient::Resource.new(params[:feature_generation_uri], :user => @@users[:users].keys[0], :password => @@users[:users].values[0])
29
- resource.post :dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri]
48
+ # LAtent STructure Pattern Mining (http://last-pm.maunz.de)
49
+ class LAST
50
+ include Fminer
51
+ # Initialize last algorithm
52
+ def initialize
53
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last")
54
+ load_metadata
55
+ end
30
56
  end
31
57
 
32
- def self.uri
33
- File.join(@@config[:services]["opentox-algorithm"], "fminer")
34
- end
35
58
  end
36
59
 
37
- class Lazar
38
-
39
- def self.create_model(params)
40
- LOGGER.debug params
41
- LOGGER.debug File.basename(__FILE__) + ": creating model"
42
- LOGGER.debug File.join(@@config[:services]["opentox-algorithm"], "lazar")
43
- resource = RestClient::Resource.new(File.join(@@config[:services]["opentox-algorithm"], "lazar"), :user => @@users[:users].keys[0], :password => @@users[:users].values[0], :content_type => "application/x-yaml")
44
- @uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(@@config[:services]["opentox-algorithm"], "fminer")).body.chomp
45
- end
60
+ # Create lazar prediction model
61
+ class Lazar
62
+ include Algorithm
63
+ # Initialize lazar algorithm
64
+ def initialize
65
+ super File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
66
+ load_metadata
67
+ end
68
+ end
46
69
 
47
- def self.uri
48
- File.join(@@config[:services]["opentox-algorithm"], "lazar")
49
- end
70
+ # Utility methods without dedicated webservices
50
71
 
51
- end
72
+ # Similarity calculations
73
+ module Similarity
74
+ include Algorithm
52
75
 
53
- class Similarity
54
- def self.weighted_tanimoto(fp_a,fp_b,p)
55
- common_features = fp_a & fp_b
56
- all_features = (fp_a + fp_b).uniq
76
+ # Tanimoto similarity
77
+ # @param [Array] features_a Features of first compound
78
+ # @param [Array] features_b Features of second compound
79
+ # @param [optional, Hash] weights Weights for all features
80
+ # @return [Float] (Weighted) tanimoto similarity
81
+ def self.tanimoto(features_a,features_b,weights=nil)
82
+ common_features = features_a & features_b
83
+ all_features = (features_a + features_b).uniq
57
84
  common_p_sum = 0.0
58
85
  if common_features.size > 0
59
- common_features.each{|f| common_p_sum += OpenTox::Utils.gauss(p[f])}
60
- all_p_sum = 0.0
61
- all_features.each{|f| all_p_sum += OpenTox::Utils.gauss(p[f])}
62
- common_p_sum/all_p_sum
86
+ if weights
87
+ common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
88
+ all_p_sum = 0.0
89
+ all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
90
+ common_p_sum/all_p_sum
91
+ else
92
+ common_features.to_f/all_features
93
+ end
63
94
  else
64
95
  0.0
65
96
  end
66
97
  end
67
- def self.euclidean(prop_a,prop_b)
68
- common_properties = prop_a.keys & prop_b.keys
98
+
99
+ # Euclidean similarity
100
+ # @param [Hash] properties_a Properties of first compound
101
+ # @param [Hash] properties_b Properties of second compound
102
+ # @param [optional, Hash] weights Weights for all properties
103
+ # @return [Float] (Weighted) euclidean similarity
104
+ def self.euclidean(properties_a,properties_b,weights=nil)
105
+ common_properties = properties_a.keys & properties_b.keys
69
106
  if common_properties.size > 1
70
107
  dist_sum = 0
71
108
  common_properties.each do |p|
72
- dist_sum += (prop_a[p] - prop_b[p])**2
109
+ if weights
110
+ dist_sum += ( (properties_a[p] - properties_b[p]) * Algorithm.gauss(weights[p]) )**2
111
+ else
112
+ dist_sum += (properties_a[p] - properties_b[p])**2
113
+ end
73
114
  end
74
115
  1/(1+Math.sqrt(dist_sum))
75
116
  else
76
- nil
117
+ 0.0
77
118
  end
78
119
  end
79
120
  end
80
121
 
122
+ module Neighbors
123
+
124
+ # Classification with majority vote from neighbors weighted by similarity
125
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
126
+ # @param [optional] params Ignored (only for compatibility with local_svm_regression)
127
+ # @return [Hash] Hash with keys `:prediction, :confidence`
128
+ def self.weighted_majority_vote(neighbors,params={})
129
+ conf = 0.0
130
+ confidence = 0.0
131
+ neighbors.each do |neighbor|
132
+ case neighbor[:activity].to_s
133
+ when 'true'
134
+ conf += Algorithm.gauss(neighbor[:similarity])
135
+ when 'false'
136
+ conf -= Algorithm.gauss(neighbor[:similarity])
137
+ end
138
+ end
139
+ if conf > 0.0
140
+ prediction = true
141
+ elsif conf < 0.0
142
+ prediction = false
143
+ else
144
+ prediction = nil
145
+ end
146
+ confidence = conf/neighbors.size if neighbors.size > 0
147
+ {:prediction => prediction, :confidence => confidence.abs}
148
+ end
149
+
150
+ # Local support vector regression from neighbors
151
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
152
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
153
+ # @return [Hash] Hash with keys `:prediction, :confidence`
154
+ def self.local_svm_regression(neighbors,params )
155
+ sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
156
+ conf = sims.inject{|sum,x| sum + x }
157
+ acts = neighbors.collect do |n|
158
+ act = n[:activity]
159
+ Math.log10(act.to_f)
160
+ end # activities of neighbors for supervised learning
161
+
162
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
163
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
164
+ if neighbor_matches.size == 0
165
+ raise "No neighbors found"
166
+ else
167
+ # gram matrix
168
+ (0..(neighbor_matches.length-1)).each do |i|
169
+ gram_matrix[i] = [] unless gram_matrix[i]
170
+ # upper triangle
171
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
172
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
173
+ gram_matrix[i][j] = Algorithm.gauss(sim)
174
+ gram_matrix[j] = [] unless gram_matrix[j]
175
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
176
+ end
177
+ gram_matrix[i][i] = 1.0
178
+ end
179
+
180
+ LOGGER.debug gram_matrix.to_yaml
181
+
182
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
183
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
184
+ LOGGER.debug "Setting R data ..."
185
+ # set data
186
+ @r.gram_matrix = gram_matrix.flatten
187
+ @r.n = neighbor_matches.size
188
+ @r.y = acts
189
+ @r.sims = sims
190
+
191
+ LOGGER.debug "Preparing R data ..."
192
+ # prepare data
193
+ @r.eval "y<-as.vector(y)"
194
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
195
+ @r.eval "sims<-as.vector(sims)"
196
+
197
+ # model + support vectors
198
+ LOGGER.debug "Creating SVM model ..."
199
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
200
+ @r.eval "sv<-as.vector(SVindex(model))"
201
+ @r.eval "sims<-sims[sv]"
202
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
203
+ LOGGER.debug "Predicting ..."
204
+ @r.eval "p<-predict(model,sims)[1,1]"
205
+ prediction = 10**(@r.p.to_f)
206
+ LOGGER.debug "Prediction is: '" + @prediction.to_s + "'."
207
+ @r.quit # free R
208
+ end
209
+ confidence = conf/neighbors.size if neighbors.size > 0
210
+ {:prediction => prediction, :confidence => confidence}
211
+
212
+ end
213
+
214
+ end
215
+
216
+ module Substructure
217
+ include Algorithm
218
+ # Substructure matching
219
+ # @param [OpenTox::Compound] compound Compound
220
+ # @param [Array] features Array with Smarts strings
221
+ # @return [Array] Array with matching Smarts
222
+ def self.match(compound,features)
223
+ compound.match(features)
224
+ end
225
+ end
226
+
227
+ module Dataset
228
+ include Algorithm
229
+ # API should match Substructure.match
230
+ def features(dataset_uri,compound_uri)
231
+ end
232
+ end
233
+
234
+ # Gauss kernel
235
+ # @return [Float]
236
+ def self.gauss(x, sigma = 0.3)
237
+ d = 1.0 - x
238
+ Math.exp(-(d*d)/(2*sigma*sigma))
239
+ end
240
+
241
+ # Median of an array
242
+ # @param [Array] Array with values
243
+ # @return [Float] Median
244
+ def self.median(array)
245
+ return nil if array.empty?
246
+ array.sort!
247
+ m_pos = array.size / 2
248
+ return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2
249
+ end
250
+
81
251
  end
82
252
  end