opentox-ruby 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,82 @@
1
+
2
+ module OpenTox
3
+ module Algorithm
4
+
5
+
6
+ class Generic
7
+
8
+ attr_accessor :uri, :title, :date
9
+
10
+ def self.find(uri)
11
+ owl = OpenTox::Owl.from_uri(uri, "Algorithm")
12
+ return self.new(owl)
13
+ end
14
+
15
+ protected
16
+ def initialize(owl)
17
+ @title = owl.get("title")
18
+ @date = owl.get("date")
19
+ @uri = owl.uri
20
+ end
21
+
22
+ end
23
+
24
+ class Fminer
25
+
26
+ def self.create_feature_dataset(params)
27
+ LOGGER.debug File.basename(__FILE__) + ": creating feature dataset"
28
+ resource = RestClient::Resource.new(params[:feature_generation_uri], :user => @@users[:users].keys[0], :password => @@users[:users].values[0])
29
+ resource.post :dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri]
30
+ end
31
+
32
+ def self.uri
33
+ File.join(@@config[:services]["opentox-algorithm"], "fminer")
34
+ end
35
+ end
36
+
37
+ class Lazar
38
+
39
+ def self.create_model(params)
40
+ LOGGER.debug params
41
+ LOGGER.debug File.basename(__FILE__) + ": creating model"
42
+ LOGGER.debug File.join(@@config[:services]["opentox-algorithm"], "lazar")
43
+ resource = RestClient::Resource.new(File.join(@@config[:services]["opentox-algorithm"], "lazar"), :user => @@users[:users].keys[0], :password => @@users[:users].values[0], :content_type => "application/x-yaml")
44
+ @uri = resource.post(:dataset_uri => params[:dataset_uri], :prediction_feature => params[:prediction_feature], :feature_generation_uri => File.join(@@config[:services]["opentox-algorithm"], "fminer")).body.chomp
45
+ end
46
+
47
+ def self.uri
48
+ File.join(@@config[:services]["opentox-algorithm"], "lazar")
49
+ end
50
+
51
+ end
52
+
53
+ class Similarity
54
+ def self.weighted_tanimoto(fp_a,fp_b,p)
55
+ common_features = fp_a & fp_b
56
+ all_features = (fp_a + fp_b).uniq
57
+ common_p_sum = 0.0
58
+ if common_features.size > 0
59
+ common_features.each{|f| common_p_sum += OpenTox::Utils.gauss(p[f])}
60
+ all_p_sum = 0.0
61
+ all_features.each{|f| all_p_sum += OpenTox::Utils.gauss(p[f])}
62
+ common_p_sum/all_p_sum
63
+ else
64
+ 0.0
65
+ end
66
+ end
67
+ def self.euclidean(prop_a,prop_b)
68
+ common_properties = prop_a.keys & prop_b.keys
69
+ if common_properties.size > 1
70
+ dist_sum = 0
71
+ common_properties.each do |p|
72
+ dist_sum += (prop_a[p] - prop_b[p])**2
73
+ end
74
+ 1/(1+Math.sqrt(dist_sum))
75
+ else
76
+ nil
77
+ end
78
+ end
79
+ end
80
+
81
+ end
82
+ end
@@ -0,0 +1,128 @@
1
+ @@cactus_uri="http://cactus.nci.nih.gov/chemical/structure/"
2
+ @@ambit_uri="http://ambit.uni-plovdiv.bg:8080/ambit2/depict/cdk?search="
3
+
4
+ module OpenTox
5
+
6
+ class Compound #< OpenTox
7
+
8
+ attr_reader :inchi, :uri
9
+
10
+ # Initialize with <tt>:uri => uri</tt>, <tt>:smiles => smiles</tt> or <tt>:name => name</tt> (name can be also an InChI/InChiKey, CAS number, etc)
11
+ def initialize(params)
12
+ if params[:smiles]
13
+ @inchi = smiles2inchi(params[:smiles])
14
+ @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi))
15
+ elsif params[:inchi]
16
+ @inchi = params[:inchi]
17
+ @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi))
18
+ elsif params[:sdf]
19
+ @inchi = sdf2inchi(params[:sdf])
20
+ @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi))
21
+ elsif params[:name]
22
+ # paranoid URI encoding to keep SMILES charges and brackets
23
+ @inchi = RestClient.get("#{@@cactus_uri}#{URI.encode(params[:name], Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))}/stdinchi").body.chomp
24
+ # this was too hard for me to debug and leads to additional errors (ch)
25
+ #@inchi = RestClientWrapper.get("#{@@cactus_uri}#{URI.encode(params[:name], Regexp.new("[^#{URI::PATTERN::UNRESERVED}]"))}/stdinchi").chomp
26
+ @uri = File.join(@@config[:services]["opentox-compound"],URI.escape(@inchi))
27
+ elsif params[:uri]
28
+ @uri = params[:uri]
29
+ case params[:uri]
30
+ when /ambit/ # Ambit does not deliver InChIs reliably
31
+ smiles = RestClientWrapper.get @uri, :accept => 'chemical/x-daylight-smiles'
32
+ @inchi = obconversion(smiles,'smi','inchi')
33
+ when /InChI/ # shortcut for IST services
34
+ @inchi = params[:uri].sub(/^.*InChI/, 'InChI')
35
+ else
36
+ @inchi = RestClientWrapper.get @uri, :accept => 'chemical/x-inchi'
37
+ end
38
+ end
39
+ end
40
+
41
+ # Get the (canonical) smiles
42
+ def smiles
43
+ obconversion(@inchi,'inchi','can')
44
+ end
45
+
46
+ def sdf
47
+ obconversion(@inchi,'inchi','sdf')
48
+ end
49
+
50
+ def gif
51
+ RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/image")
52
+ end
53
+
54
+ def png
55
+ RestClientWrapper.get(File.join @uri, "image")
56
+ end
57
+
58
+ def names
59
+ begin
60
+ RestClientWrapper.get("#{@@cactus_uri}#{@inchi}/names")
61
+ rescue
62
+ "not available"
63
+ end
64
+ end
65
+
66
+ def display_smarts_uri(activating, deactivating, highlight = nil)
67
+ LOGGER.debug activating.to_yaml unless activating.nil?
68
+ activating_smarts = URI.encode "\"#{activating.join("\"/\"")}\""
69
+ deactivating_smarts = URI.encode "\"#{deactivating.join("\"/\"")}\""
70
+ if highlight.nil?
71
+ File.join @@config[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts)
72
+ else
73
+ File.join @@config[:services]["opentox-compound"], "smiles", URI.encode(smiles), "smarts/activating", URI.encode(activating_smarts),"deactivating", URI.encode(deactivating_smarts), "highlight", URI.encode(highlight)
74
+ end
75
+ end
76
+
77
+ def image_uri
78
+ File.join @uri, "image"
79
+ end
80
+
81
+ # Matchs a smarts string
82
+ def match?(smarts)
83
+ obconversion = OpenBabel::OBConversion.new
84
+ obmol = OpenBabel::OBMol.new
85
+ obconversion.set_in_format('inchi')
86
+ obconversion.read_string(obmol,@inchi)
87
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
88
+ smarts_pattern.init(smarts)
89
+ smarts_pattern.match(obmol)
90
+ end
91
+
92
+ # Match an array of smarts features, returns matching features
93
+ def match(smarts_array)
94
+ smarts_array.collect{|s| s if match?(s)}.compact
95
+ end
96
+
97
+ # AM
98
+ # Match an array of smarts features, returns (0)1 for (non)matching features at each pos
99
+ def match_all(smarts_array)
100
+ smarts_array.collect{|s| match?(s) ? 1 : 0 }
101
+ end
102
+
103
+ def sdf2inchi(sdf)
104
+ obconversion(sdf,'sdf','inchi')
105
+ end
106
+
107
+ def smiles2inchi(smiles)
108
+ obconversion(smiles,'smi','inchi')
109
+ end
110
+
111
+ def smiles2cansmi(smiles)
112
+ obconversion(smiles,'smi','can')
113
+ end
114
+
115
+ def obconversion(identifier,input_format,output_format)
116
+ obconversion = OpenBabel::OBConversion.new
117
+ obmol = OpenBabel::OBMol.new
118
+ obconversion.set_in_and_out_formats input_format, output_format
119
+ obconversion.read_string obmol, identifier
120
+ case output_format
121
+ when /smi|can|inchi/
122
+ obconversion.write_string(obmol).gsub(/\s/,'').chomp
123
+ else
124
+ obconversion.write_string(obmol)
125
+ end
126
+ end
127
+ end
128
+ end
@@ -0,0 +1,51 @@
1
+ require 'rubygems'
2
+ require 'rack'
3
+ require 'rack/contrib'
4
+ require 'application.rb'
5
+
6
+ # log at centralized place
7
+ logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
8
+ log = File.new(logfile, "a+")
9
+ $stdout.reopen(log)
10
+ $stderr.reopen(log)
11
+ $stdout.sync = true
12
+ $stderr.sync = true
13
+ set :logging, false
14
+ set :raise_errors, true
15
+
16
+ ['public','tmp'].each do |dir|
17
+ FileUtils.mkdir_p dir unless File.exists?(dir)
18
+ end
19
+
20
+ use Rack::ShowExceptions
21
+ if defined?(MAIL)
22
+
23
+ # monkeypatch with the original method
24
+ # strangely enough my mailserver returns "Connection refused - connect(2)" errors without this patch
25
+ module Rack
26
+ class MailExceptions
27
+
28
+ def send_notification(exception, env)
29
+ mail = generate_mail(exception, env)
30
+ smtp = config[:smtp]
31
+ env['mail.sent'] = true
32
+ return if smtp[:server] == 'example.com'
33
+
34
+ Net::SMTP.start smtp[:server], smtp[:port], smtp[:domain], smtp[:user_name], smtp[:password], smtp[:authentication] do |server|
35
+ mail.to.each do |recipient|
36
+ server.send_message mail.to_s, mail.from, recipient
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+
43
+
44
+ require "socket"
45
+ use Rack::MailExceptions do |mail|
46
+ mail.to MAIL[:user_name]
47
+ mail.subject '[ERROR] %s'
48
+ mail.from "#{Socket.gethostname}@#{MAIL[:domain]}"
49
+ mail.smtp MAIL
50
+ end
51
+ end
@@ -0,0 +1,226 @@
1
+ module OpenTox
2
+
3
+ class Dataset
4
+
5
+ attr_accessor :uri, :title, :creator, :data, :features, :compounds
6
+
7
+ def initialize( owl=nil )
8
+ @data = {}
9
+ @features = []
10
+ @compounds = []
11
+
12
+ # creates dataset object from Opentox::Owl object
13
+ # use Dataset.find( <uri> ) to load dataset from rdf-supporting datasetservice
14
+ # note: does not load all feature values, as this is time consuming
15
+ if owl
16
+ raise "invalid param" unless owl.is_a?(OpenTox::Owl)
17
+ @title = owl.get("title")
18
+ @creator = owl.get("creator")
19
+ @uri = owl.uri
20
+ # when loading a dataset from owl, only compound- and feature-uris are loaded
21
+ owl.load_dataset(@compounds, @features)
22
+ # all features are marked as dirty
23
+ # as soon as a feature-value is requested all values for this feature are loaded from the rdf
24
+ @dirty_features = @features.dclone
25
+ @owl = owl
26
+ end
27
+ end
28
+
29
+ def self.find(uri, accept_header=nil)
30
+
31
+ unless accept_header
32
+ if (@@config[:yaml_hosts].include?(URI.parse(uri).host))
33
+ accept_header = 'application/x-yaml'
34
+ else
35
+ accept_header = "application/rdf+xml"
36
+ end
37
+ end
38
+
39
+ case accept_header
40
+ when "application/x-yaml"
41
+ d = YAML.load RestClientWrapper.get(uri.to_s.strip, :accept => 'application/x-yaml').to_s
42
+ d.uri = uri unless d.uri
43
+ when "application/rdf+xml"
44
+ owl = OpenTox::Owl.from_uri(uri.to_s.strip, "Dataset")
45
+ d = Dataset.new(owl)
46
+ else
47
+ raise "cannot get datset with accept header: "+accept_header.to_s
48
+ end
49
+ d
50
+ end
51
+
52
+ # converts a dataset represented in owl to yaml
53
+ # (uses a temporary dataset)
54
+ # note: to_yaml is overwritten, loads complete owl dataset values
55
+ def self.owl_to_yaml( owl_data, uri)
56
+ owl = OpenTox::Owl.from_data(owl_data, uri, "Dataset")
57
+ d = Dataset.new(owl)
58
+ d.to_yaml
59
+ end
60
+
61
+ # creates a new dataset, using only those compounsd specified in new_compounds
62
+ # returns uri of new dataset
63
+ def create_new_dataset( new_compounds, new_features, new_title, new_creator )
64
+
65
+ LOGGER.debug "create new dataset with "+new_compounds.size.to_s+"/"+compounds.size.to_s+" compounds"
66
+ raise "no new compounds selected" unless new_compounds and new_compounds.size>0
67
+
68
+ # load require features
69
+ if ((defined? @dirty_features) && (@dirty_features & new_features).size > 0)
70
+ (@dirty_features & new_features).each{|f| load_feature_values(f)}
71
+ end
72
+
73
+ dataset = OpenTox::Dataset.new
74
+ dataset.title = new_title
75
+ dataset.creator = new_creator
76
+ dataset.features = new_features
77
+ dataset.compounds = new_compounds
78
+
79
+ # Copy dataset data for compounds and features
80
+ # PENDING: why storing feature values in an array?
81
+ new_compounds.each do |c|
82
+ data_c = []
83
+ raise "no data for compound '"+c.to_s+"'" if @data[c]==nil
84
+ @data[c].each do |d|
85
+ m = {}
86
+ new_features.each do |f|
87
+ m[f] = d[f]
88
+ end
89
+ data_c << m
90
+ end
91
+ dataset.data[c] = data_c
92
+ end
93
+ return dataset.save
94
+ end
95
+
96
+ # returns classification value
97
+ def get_predicted_class(compound, feature)
98
+ v = get_value(compound, feature)
99
+ if v.is_a?(Hash)
100
+ k = v.keys.grep(/classification/).first
101
+ unless k.empty?
102
+ #if v.has_key?(:classification)
103
+ return v[k]
104
+ else
105
+ return "no classification key"
106
+ end
107
+ elsif v.is_a?(Array)
108
+ raise "predicted class value is an array\n"+
109
+ "value "+v.to_s+"\n"+
110
+ "value-class "+v.class.to_s+"\n"+
111
+ "dataset "+@uri.to_s+"\n"+
112
+ "compound "+compound.to_s+"\n"+
113
+ "feature "+feature.to_s+"\n"
114
+ else
115
+ return v
116
+ end
117
+ end
118
+
119
+ # returns regression value
120
+ def get_predicted_regression(compound, feature)
121
+ v = get_value(compound, feature)
122
+ if v.is_a?(Hash)
123
+ k = v.keys.grep(/regression/).first
124
+ unless k.empty?
125
+ return v[k]
126
+ else
127
+ return "no regression key"
128
+ end
129
+ elsif v.is_a?(Array)
130
+ raise "predicted regression value is an array\n"+
131
+ "value "+v.to_s+"\n"+
132
+ "value-class "+v.class.to_s+"\n"+
133
+ "dataset "+@uri.to_s+"\n"+
134
+ "compound "+compound.to_s+"\n"+
135
+ "feature "+feature.to_s+"\n"
136
+ else
137
+ return v
138
+ end
139
+ end
140
+
141
+ # returns prediction confidence if available
142
+ def get_prediction_confidence(compound, feature)
143
+ v = get_value(compound, feature)
144
+ if v.is_a?(Hash)
145
+ k = v.keys.grep(/confidence/).first
146
+ unless k.empty?
147
+ #if v.has_key?(:confidence)
148
+ return v[k].abs
149
+ #return v["http://ot-dev.in-silico.ch/model/lazar#confidence"].abs
150
+ else
151
+ # PENDING: return nil isntead of raising an exception
152
+ raise "no confidence key"
153
+ end
154
+ else
155
+ LOGGER.warn "no confidence for compound: "+compound.to_s+", feature: "+feature.to_s
156
+ return 1
157
+ end
158
+ end
159
+
160
+ # return compound-feature value
161
+ def get_value(compound, feature)
162
+ if (defined? @dirty_features) && @dirty_features.include?(feature)
163
+ load_feature_values(feature)
164
+ end
165
+
166
+ v = @data[compound]
167
+ return nil if v == nil # missing values for all features
168
+ if v.is_a?(Array)
169
+ # PENDING: why using an array here?
170
+ v.each do |e|
171
+ if e.is_a?(Hash)
172
+ if e.has_key?(feature)
173
+ return e[feature]
174
+ end
175
+ else
176
+ raise "invalid internal value type"
177
+ end
178
+ end
179
+ return nil #missing value
180
+ else
181
+ raise "value is not an array\n"+
182
+ "value "+v.to_s+"\n"+
183
+ "value-class "+v.class.to_s+"\n"+
184
+ "dataset "+@uri.to_s+"\n"+
185
+ "compound "+compound.to_s+"\n"+
186
+ "feature "+feature.to_s+"\n"
187
+ end
188
+ end
189
+
190
+ # loads specified feature and removes dirty-flag, loads all features if feature is nil
191
+ def load_feature_values(feature=nil)
192
+ if feature
193
+ raise "feature already loaded" unless @dirty_features.include?(feature)
194
+ @owl.load_dataset_feature_values(@compounds, @data, [feature])
195
+ @dirty_features.delete(feature)
196
+ else
197
+ @data = {} unless @data
198
+ @owl.load_dataset_feature_values(@compounds, @data, @dirty_features)
199
+ @dirty_features.clear
200
+ end
201
+ end
202
+
203
+ # overwrite to yaml:
204
+ # in case dataset is loaded from owl:
205
+ # * load all values
206
+ def to_yaml
207
+ # loads all features
208
+ if ((defined? @dirty_features) && @dirty_features.size > 0)
209
+ load_feature_values
210
+ end
211
+ super
212
+ end
213
+
214
+ # * remove @owl from yaml, not necessary
215
+ def to_yaml_properties
216
+ super - ["@owl"]
217
+ end
218
+
219
+ # saves (changes) as new dataset in dataset service
220
+ # returns uri
221
+ # uses to yaml method (which is overwritten)
222
+ def save
223
+ OpenTox::RestClientWrapper.post(@@config[:services]["opentox-dataset"],{:content_type => "application/x-yaml"},self.to_yaml).strip
224
+ end
225
+ end
226
+ end