opentox-ruby 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,4 +1,4 @@
1
- = opentox-ruby-api-wrapper
1
+ = opentox-ruby
2
2
 
3
3
  Ruby wrapper for the OpenTox REST API (http://www.opentox.org)
4
4
 
@@ -10,14 +10,14 @@ Run the following if you haven't already:
10
10
 
11
11
  Install the gem:
12
12
 
13
- sudo gem install helma-opentox-ruby-api-wrapper
13
+ sudo gem install helma-opentox-ruby
14
14
 
15
15
  == Usage
16
16
 
17
17
  - adjust the settings in $HOME/.opentox/config
18
- - require 'opentox-ruby-api-wrapper' in your ruby application
18
+ - require 'opentox-ruby' in your ruby application
19
19
  - consult the rdoc API documentation for details
20
20
 
21
21
  == Copyright
22
22
 
23
- Copyright (c) 2009 Christoph Helma. See LICENSE for details.
23
+ Copyright (c) 2009-2010 Christoph Helma. See LICENSE for details.
data/Rakefile CHANGED
@@ -4,46 +4,46 @@ require 'rake'
4
4
  begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
- gem.name = "opentox-ruby-api-wrapper"
7
+ gem.name = "opentox-ruby"
8
8
  gem.summary = %Q{Ruby wrapper for the OpenTox REST API}
9
9
  gem.description = %Q{Ruby wrapper for the OpenTox REST API (http://www.opentox.org)}
10
10
  gem.email = "helma@in-silico.ch"
11
- gem.homepage = "http://github.com/helma/opentox-ruby-api-wrapper"
12
- gem.authors = ["Christoph Helma, Martin Guetlein"]
13
- # dependencies
14
- [ "sinatra",
15
- "emk-sinatra-url-for",
16
- "sinatra-respond_to",
17
- "sinatra-static-assets",
18
- "rest-client",
19
- "rack",
20
- "rack-contrib",
21
- "rack-flash",
22
- "nokogiri",
23
- "rubyzip",
24
- "builder",
25
- "roo",
26
- "spreadsheet",
27
- "google-spreadsheet-ruby",
28
- "tmail",
29
- "rinruby",
30
- "rjb"
31
- ].each { |dep| gem.add_dependency dep }
32
- [ "dm-core",
33
- 'dm-serializer',
34
- 'dm-timestamps',
35
- 'dm-types',
36
- 'dm-migrations',
37
- "dm-mysql-adapter",
11
+ gem.homepage = "http://github.com/helma/opentox-ruby"
12
+ gem.authors = ["Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler"]
13
+ # dependencies
14
+ [ "sinatra",
15
+ "emk-sinatra-url-for",
16
+ "sinatra-respond_to",
17
+ "sinatra-static-assets",
18
+ "rest-client",
19
+ "rack",
20
+ "rack-contrib",
21
+ "rack-flash",
22
+ "nokogiri",
23
+ "rubyzip",
24
+ "roo",
25
+ "spreadsheet",
26
+ "google-spreadsheet-ruby",
27
+ "yajl-ruby",
28
+ "tmail",
29
+ "rinruby",
30
+ "rjb"
31
+ ].each { |dep| gem.add_dependency dep }
32
+ [ "dm-core",
33
+ 'dm-serializer',
34
+ 'dm-timestamps',
35
+ 'dm-types',
36
+ 'dm-migrations',
37
+ "dm-mysql-adapter",
38
38
  "dm-validations",
39
- ].each {|dep| gem.add_dependency dep, ">= 1" }
40
- gem.add_dependency "haml", ">=3"
41
- ['cucumber','jeweler'].each { |dep| gem.add_development_dependency dep }
42
- gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
43
- gem.files.include %w(lib/tasks/owl.rb, lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/utils.rb, lib/validation.rb, lib/templates/*)
39
+ ].each {|dep| gem.add_dependency dep, ">= 1" }
40
+ gem.add_dependency "haml", ">=3"
41
+ ['jeweler'].each { |dep| gem.add_development_dependency dep }
42
+ gem.files = FileList["[A-Z]*", "{bin,generators,lib,test}/**/*", 'lib/jeweler/templates/.gitignore']
43
+ #gem.files.include %w(lib/environment.rb, lib/algorithm.rb, lib/compound.rb, lib/dataset.rb, lib/model.rb, lib/validation.rb, lib/templates/*)
44
44
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
45
45
  end
46
- Jeweler::GemcutterTasks.new
46
+ Jeweler::GemcutterTasks.new
47
47
  rescue LoadError
48
48
  puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
49
49
  end
@@ -81,7 +81,7 @@ Rake::RDocTask.new do |rdoc|
81
81
  end
82
82
 
83
83
  rdoc.rdoc_dir = 'rdoc'
84
- rdoc.title = "opentox-ruby-api-wrapper #{version}"
84
+ rdoc.title = "opentox-ruby #{version}"
85
85
  rdoc.rdoc_files.include('README*')
86
86
  rdoc.rdoc_files.include('lib/**/*.rb')
87
87
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.6.5
1
+ 0.0.2
data/lib/algorithm.rb CHANGED
@@ -1,82 +1,252 @@
1
+ # R integration
2
+ # workaround to initialize R non-interactively (former rinruby versions did this by default)
3
+ # avoids compiling R with X
4
+ R = nil
5
+ require "rinruby"
1
6
 
2
7
  module OpenTox
8
+
9
+ # Wrapper for OpenTox Algorithms
3
10
  module Algorithm
11
+
12
+ include OpenTox
13
+
14
+ # Execute algorithm with parameters, please consult the OpenTox API and the webservice documentation for acceptable parameters
15
+ # @param [optional,Hash] params Algorithm parameters
16
+ # @return [String] URI of new resource (dataset, model, ...)
17
+ def run(params=nil)
18
+ RestClientWrapper.post(@uri, {:accept => 'text/uri-list'}, params).to_s
19
+ end
4
20
 
5
-
6
- class Generic
7
-
8
- attr_accessor :uri, :title, :date
9
-
10
- def self.find(uri)
11
- owl = OpenTox::Owl.from_uri(uri, "Algorithm")
12
- return self.new(owl)
13
- end
14
-
15
- protected
16
- def initialize(owl)
17
- @title = owl.get("title")
18
- @date = owl.get("date")
19
- @uri = owl.uri
20
- end
21
-
21
+ # Get OWL-DL representation in RDF/XML format
22
+ # @return [application/rdf+xml] RDF/XML representation
23
+ def to_rdfxml
24
+ s = Serializer::Owl.new
25
+ s.add_algorithm(@uri,@metadata)
26
+ s.to_rdfxml
27
+ end
28
+
29
+ # Generic Algorithm class, should work with all OpenTox webservices
30
+ class Generic
31
+ include Algorithm
22
32
  end
23
33
 
24
- class Fminer
34
+ # Fminer algorithms (https://github.com/amaunz/fminer2)
35
+ module Fminer
36
+ include Algorithm
37
+
38
+ # Backbone Refinement Class mining (http://bbrc.maunz.de/)
39
+ class BBRC
40
+ include Fminer
41
+ # Initialize bbrc algorithm
42
+ def initialize
43
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/bbrc")
44
+ load_metadata
45
+ end
46
+ end
25
47
 
26
- def self.create_feature_dataset(params)
27
- LOGGER.debug File.basename(__FILE__) + ": creating feature dataset"
28
- resource = RestClient::Resource.new(params[:feature_generation_uri], :user => @@users[:users].keys[0], :password => @@users[:users].values[0])
29
- resource.post :dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri]
48
+ # LAtent STructure Pattern Mining (http://last-pm.maunz.de)
49
+ class LAST
50
+ include Fminer
51
+ # Initialize last algorithm
52
+ def initialize
53
+ super File.join(CONFIG[:services]["opentox-algorithm"], "fminer/last")
54
+ load_metadata
55
+ end
30
56
  end
31
57
 
32
- def self.uri
33
- File.join(@@config[:services]["opentox-algorithm"], "fminer")
34
- end
35
58
  end
36
59
 
37
- class Lazar
38
-
39
- def self.create_model(params)
40
- LOGGER.debug params
41
- LOGGER.debug File.basename(__FILE__) + ": creating model"
42
- LOGGER.debug File.join(@@config[:services]["opentox-algorithm"], "lazar")
43
- resource = RestClient::Resource.new(File.join(@@config[:services]["opentox-algorithm"], "lazar"), :user => @@users[:users].keys[0], :password => @@users[:users].values[0], :content_type => "application/x-yaml")
44
- @uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(@@config[:services]["opentox-algorithm"], "fminer")).body.chomp
45
- end
60
+ # Create lazar prediction model
61
+ class Lazar
62
+ include Algorithm
63
+ # Initialize lazar algorithm
64
+ def initialize
65
+ super File.join(CONFIG[:services]["opentox-algorithm"], "lazar")
66
+ load_metadata
67
+ end
68
+ end
46
69
 
47
- def self.uri
48
- File.join(@@config[:services]["opentox-algorithm"], "lazar")
49
- end
70
+ # Utility methods without dedicated webservices
50
71
 
51
- end
72
+ # Similarity calculations
73
+ module Similarity
74
+ include Algorithm
52
75
 
53
- class Similarity
54
- def self.weighted_tanimoto(fp_a,fp_b,p)
55
- common_features = fp_a & fp_b
56
- all_features = (fp_a + fp_b).uniq
76
+ # Tanimoto similarity
77
+ # @param [Array] features_a Features of first compound
78
+ # @param [Array] features_b Features of second compound
79
+ # @param [optional, Hash] weights Weights for all features
80
+ # @return [Float] (Weighted) tanimoto similarity
81
+ def self.tanimoto(features_a,features_b,weights=nil)
82
+ common_features = features_a & features_b
83
+ all_features = (features_a + features_b).uniq
57
84
  common_p_sum = 0.0
58
85
  if common_features.size > 0
59
- common_features.each{|f| common_p_sum += OpenTox::Utils.gauss(p[f])}
60
- all_p_sum = 0.0
61
- all_features.each{|f| all_p_sum += OpenTox::Utils.gauss(p[f])}
62
- common_p_sum/all_p_sum
86
+ if weights
87
+ common_features.each{|f| common_p_sum += Algorithm.gauss(weights[f])}
88
+ all_p_sum = 0.0
89
+ all_features.each{|f| all_p_sum += Algorithm.gauss(weights[f])}
90
+ common_p_sum/all_p_sum
91
+ else
92
+ common_features.to_f/all_features
93
+ end
63
94
  else
64
95
  0.0
65
96
  end
66
97
  end
67
- def self.euclidean(prop_a,prop_b)
68
- common_properties = prop_a.keys & prop_b.keys
98
+
99
+ # Euclidean similarity
100
+ # @param [Hash] properties_a Properties of first compound
101
+ # @param [Hash] properties_b Properties of second compound
102
+ # @param [optional, Hash] weights Weights for all properties
103
+ # @return [Float] (Weighted) euclidean similarity
104
+ def self.euclidean(properties_a,properties_b,weights=nil)
105
+ common_properties = properties_a.keys & properties_b.keys
69
106
  if common_properties.size > 1
70
107
  dist_sum = 0
71
108
  common_properties.each do |p|
72
- dist_sum += (prop_a[p] - prop_b[p])**2
109
+ if weights
110
+ dist_sum += ( (properties_a[p] - properties_b[p]) * Algorithm.gauss(weights[p]) )**2
111
+ else
112
+ dist_sum += (properties_a[p] - properties_b[p])**2
113
+ end
73
114
  end
74
115
  1/(1+Math.sqrt(dist_sum))
75
116
  else
76
- nil
117
+ 0.0
77
118
  end
78
119
  end
79
120
  end
80
121
 
122
+ module Neighbors
123
+
124
+ # Classification with majority vote from neighbors weighted by similarity
125
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity`
126
+ # @param [optional] params Ignored (only for compatibility with local_svm_regression)
127
+ # @return [Hash] Hash with keys `:prediction, :confidence`
128
+ def self.weighted_majority_vote(neighbors,params={})
129
+ conf = 0.0
130
+ confidence = 0.0
131
+ neighbors.each do |neighbor|
132
+ case neighbor[:activity].to_s
133
+ when 'true'
134
+ conf += Algorithm.gauss(neighbor[:similarity])
135
+ when 'false'
136
+ conf -= Algorithm.gauss(neighbor[:similarity])
137
+ end
138
+ end
139
+ if conf > 0.0
140
+ prediction = true
141
+ elsif conf < 0.0
142
+ prediction = false
143
+ else
144
+ prediction = nil
145
+ end
146
+ confidence = conf/neighbors.size if neighbors.size > 0
147
+ {:prediction => prediction, :confidence => confidence.abs}
148
+ end
149
+
150
+ # Local support vector regression from neighbors
151
+ # @param [Array] neighbors, each neighbor is a hash with keys `:similarity, :activity, :features`
152
+ # @param [Hash] params Keys `:similarity_algorithm,:p_values` are required
153
+ # @return [Hash] Hash with keys `:prediction, :confidence`
154
+ def self.local_svm_regression(neighbors,params )
155
+ sims = neighbors.collect{ |n| n[:similarity] } # similarity values between query and neighbors
156
+ conf = sims.inject{|sum,x| sum + x }
157
+ acts = neighbors.collect do |n|
158
+ act = n[:activity]
159
+ Math.log10(act.to_f)
160
+ end # activities of neighbors for supervised learning
161
+
162
+ neighbor_matches = neighbors.collect{ |n| n[:features] } # as in classification: URIs of matches
163
+ gram_matrix = [] # square matrix of similarities between neighbors; implements weighted tanimoto kernel
164
+ if neighbor_matches.size == 0
165
+ raise "No neighbors found"
166
+ else
167
+ # gram matrix
168
+ (0..(neighbor_matches.length-1)).each do |i|
169
+ gram_matrix[i] = [] unless gram_matrix[i]
170
+ # upper triangle
171
+ ((i+1)..(neighbor_matches.length-1)).each do |j|
172
+ sim = eval("#{params[:similarity_algorithm]}(neighbor_matches[i], neighbor_matches[j], params[:p_values])")
173
+ gram_matrix[i][j] = Algorithm.gauss(sim)
174
+ gram_matrix[j] = [] unless gram_matrix[j]
175
+ gram_matrix[j][i] = gram_matrix[i][j] # lower triangle
176
+ end
177
+ gram_matrix[i][i] = 1.0
178
+ end
179
+
180
+ LOGGER.debug gram_matrix.to_yaml
181
+
182
+ @r = RinRuby.new(false,false) # global R instance leads to Socket errors after a large number of requests
183
+ @r.eval "library('kernlab')" # this requires R package "kernlab" to be installed
184
+ LOGGER.debug "Setting R data ..."
185
+ # set data
186
+ @r.gram_matrix = gram_matrix.flatten
187
+ @r.n = neighbor_matches.size
188
+ @r.y = acts
189
+ @r.sims = sims
190
+
191
+ LOGGER.debug "Preparing R data ..."
192
+ # prepare data
193
+ @r.eval "y<-as.vector(y)"
194
+ @r.eval "gram_matrix<-as.kernelMatrix(matrix(gram_matrix,n,n))"
195
+ @r.eval "sims<-as.vector(sims)"
196
+
197
+ # model + support vectors
198
+ LOGGER.debug "Creating SVM model ..."
199
+ @r.eval "model<-ksvm(gram_matrix, y, kernel=matrix, type=\"nu-svr\", nu=0.8)"
200
+ @r.eval "sv<-as.vector(SVindex(model))"
201
+ @r.eval "sims<-sims[sv]"
202
+ @r.eval "sims<-as.kernelMatrix(matrix(sims,1))"
203
+ LOGGER.debug "Predicting ..."
204
+ @r.eval "p<-predict(model,sims)[1,1]"
205
+ prediction = 10**(@r.p.to_f)
206
+ LOGGER.debug "Prediction is: '" + @prediction.to_s + "'."
207
+ @r.quit # free R
208
+ end
209
+ confidence = conf/neighbors.size if neighbors.size > 0
210
+ {:prediction => prediction, :confidence => confidence}
211
+
212
+ end
213
+
214
+ end
215
+
216
+ module Substructure
217
+ include Algorithm
218
+ # Substructure matching
219
+ # @param [OpenTox::Compound] compound Compound
220
+ # @param [Array] features Array with Smarts strings
221
+ # @return [Array] Array with matching Smarts
222
+ def self.match(compound,features)
223
+ compound.match(features)
224
+ end
225
+ end
226
+
227
+ module Dataset
228
+ include Algorithm
229
+ # API should match Substructure.match
230
+ def features(dataset_uri,compound_uri)
231
+ end
232
+ end
233
+
234
+ # Gauss kernel
235
+ # @return [Float]
236
+ def self.gauss(x, sigma = 0.3)
237
+ d = 1.0 - x
238
+ Math.exp(-(d*d)/(2*sigma*sigma))
239
+ end
240
+
241
+ # Median of an array
242
+ # @param [Array] Array with values
243
+ # @return [Float] Median
244
+ def self.median(array)
245
+ return nil if array.empty?
246
+ array.sort!
247
+ m_pos = array.size / 2
248
+ return array.size % 2 == 1 ? array[m_pos] : (array[m_pos-1] + array[m_pos])/2
249
+ end
250
+
81
251
  end
82
252
  end