opentox-ruby 2.0.1 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +39 -46
- data/VERSION +1 -1
- data/lib/algorithm.rb +797 -80
- data/lib/compound.rb +40 -0
- data/lib/config/config_ru.rb +2 -0
- data/lib/dataset.rb +57 -18
- data/lib/environment.rb +3 -3
- data/lib/feature.rb +15 -13
- data/lib/helper.rb +1 -2
- data/lib/model.rb +185 -82
- data/lib/opentox-ruby.rb +1 -1
- data/lib/overwrite.rb +2 -1
- data/lib/parser.rb +247 -69
- data/lib/rest_client_wrapper.rb +3 -2
- data/lib/serializer.rb +24 -10
- data/lib/task.rb +10 -3
- data/lib/to-html.rb +66 -41
- data/lib/validation.rb +93 -29
- metadata +206 -117
data/lib/compound.rb
CHANGED
@@ -21,6 +21,17 @@ module OpenTox
|
|
21
21
|
else
|
22
22
|
@inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri
|
23
23
|
end
|
24
|
+
|
25
|
+
if @uri and @inchi.to_s.size==0
|
26
|
+
LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles"
|
27
|
+
@inchi = Compound.smiles2inchi(Compound.smiles(@uri))
|
28
|
+
end
|
29
|
+
end
|
30
|
+
|
31
|
+
# request smiles from compound service via accept header
|
32
|
+
# @return smiles as string
|
33
|
+
def self.smiles(uri)
|
34
|
+
RestClientWrapper.get(uri, :accept => 'chemical/x-daylight-smiles').to_s.chomp
|
24
35
|
end
|
25
36
|
|
26
37
|
# Create a compound from smiles string
|
@@ -153,6 +164,35 @@ module OpenTox
|
|
153
164
|
#smarts_array.collect { |s| s if match?(s)}.compact
|
154
165
|
end
|
155
166
|
|
167
|
+
# Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value
|
168
|
+
# @example
|
169
|
+
# compound = OpenTox::Compound.from_name("Benzene")
|
170
|
+
# compound.match(['cc','cN']) # returns ['cc']
|
171
|
+
# @param [Array] smarts_array Array with Smarts strings
|
172
|
+
# @return [Hash] Hash with matching smarts as key and number of non-unique hits as value
|
173
|
+
def match_hits(smarts_array)
|
174
|
+
# avoid recreation of OpenBabel objects
|
175
|
+
obconversion = OpenBabel::OBConversion.new
|
176
|
+
obmol = OpenBabel::OBMol.new
|
177
|
+
obconversion.set_in_format('inchi')
|
178
|
+
obconversion.read_string(obmol,@inchi)
|
179
|
+
smarts_pattern = OpenBabel::OBSmartsPattern.new
|
180
|
+
smarts_hits = {}
|
181
|
+
#LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}"
|
182
|
+
smarts_array.collect do |smarts|
|
183
|
+
#LOGGER.debug "dv ----------- all smarts #{smarts}"
|
184
|
+
smarts_pattern.init(smarts)
|
185
|
+
if smarts_pattern.match(obmol)
|
186
|
+
hits = smarts_pattern.get_map_list
|
187
|
+
smarts_hits[smarts] = hits.size
|
188
|
+
end
|
189
|
+
end
|
190
|
+
#LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}"
|
191
|
+
return smarts_hits
|
192
|
+
#smarts_array.collect { |s| s if match?(s)}.compact
|
193
|
+
end
|
194
|
+
|
195
|
+
|
156
196
|
# Get URI of compound image with highlighted fragments
|
157
197
|
#
|
158
198
|
# @param [Array] activating Array with activating Smarts strings
|
data/lib/config/config_ru.rb
CHANGED
data/lib/dataset.rb
CHANGED
@@ -102,6 +102,13 @@ module OpenTox
|
|
102
102
|
copy parser.load_uri(subjectid)
|
103
103
|
end
|
104
104
|
|
105
|
+
def load_sdf(sdf,subjectid=nil)
|
106
|
+
save(subjectid) unless @uri # get a uri for creating features
|
107
|
+
parser = Parser::Sdf.new
|
108
|
+
parser.dataset = self
|
109
|
+
parser.load_sdf(sdf)
|
110
|
+
end
|
111
|
+
|
105
112
|
# Load CSV string (format specification: http://toxcreate.org/help)
|
106
113
|
# - loads data_entries, compounds, features
|
107
114
|
# - sets metadata (warnings) for parser errors
|
@@ -149,7 +156,11 @@ module OpenTox
|
|
149
156
|
# Load and return only compound URIs from the dataset service
|
150
157
|
# @return [Array] Compound URIs in the dataset
|
151
158
|
def load_compounds(subjectid=nil)
|
152
|
-
|
159
|
+
# fix for datasets like http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50
|
160
|
+
u = URI::parse(uri)
|
161
|
+
u.path = File.join(u.path,"compounds")
|
162
|
+
u = u.to_s
|
163
|
+
RestClientWrapper.get(u,{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri|
|
153
164
|
@compounds << compound_uri.chomp
|
154
165
|
end
|
155
166
|
@compounds.uniq!
|
@@ -167,19 +178,15 @@ module OpenTox
|
|
167
178
|
@features
|
168
179
|
end
|
169
180
|
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
175
|
-
|
176
|
-
|
177
|
-
else
|
178
|
-
nil
|
179
|
-
end
|
181
|
+
# returns the accept_values of a feature, i.e. the classification domain / all possible feature values
|
182
|
+
# @param [String] feature the URI of the feature
|
183
|
+
# @return [Array] return array with strings, nil if value is not set (e.g. when feature is numeric)
|
184
|
+
def accept_values(feature)
|
185
|
+
accept_values = features[feature][OT.acceptValue]
|
186
|
+
accept_values.sort if accept_values
|
187
|
+
accept_values
|
180
188
|
end
|
181
189
|
|
182
|
-
=begin
|
183
190
|
# Detect feature type(s) in the dataset
|
184
191
|
# @return [String] `classification", "regression", "mixed" or unknown`
|
185
192
|
def feature_type(subjectid=nil)
|
@@ -193,6 +200,7 @@ module OpenTox
|
|
193
200
|
"unknown"
|
194
201
|
end
|
195
202
|
end
|
203
|
+
=begin
|
196
204
|
=end
|
197
205
|
|
198
206
|
# Get Spreadsheet representation
|
@@ -229,6 +237,30 @@ module OpenTox
|
|
229
237
|
s.to_rdfxml
|
230
238
|
end
|
231
239
|
|
240
|
+
# Get SDF representation of compounds
|
241
|
+
# @return [String] SDF representation
|
242
|
+
def to_sdf
|
243
|
+
sum=""
|
244
|
+
@compounds.each{ |c|
|
245
|
+
sum << OpenTox::Compound.new(c).to_inchi
|
246
|
+
sum << OpenTox::Compound.new(c).to_sdf.sub(/\n\$\$\$\$/,'')
|
247
|
+
@data_entries[c].each{ |f,v|
|
248
|
+
sum << "> <\"#{f}\">\n"
|
249
|
+
sum << v.join(", ")
|
250
|
+
sum << "\n\n"
|
251
|
+
}
|
252
|
+
sum << "$$$$\n"
|
253
|
+
}
|
254
|
+
sum
|
255
|
+
end
|
256
|
+
|
257
|
+
def to_urilist
|
258
|
+
@compounds.inject { |sum, c|
|
259
|
+
sum << OpenTox::Compound.new(c).uri
|
260
|
+
sum + "\n"
|
261
|
+
}
|
262
|
+
end
|
263
|
+
|
232
264
|
# Get name (DC.title) of a feature
|
233
265
|
# @param [String] feature Feature URI
|
234
266
|
# @return [String] Feture title
|
@@ -307,6 +339,12 @@ module OpenTox
|
|
307
339
|
end
|
308
340
|
end
|
309
341
|
end
|
342
|
+
# set feature metadata in new dataset accordingly (including accept values)
|
343
|
+
features.each do |f|
|
344
|
+
self.features[f].each do |k,v|
|
345
|
+
dataset.features[f][k] = v
|
346
|
+
end
|
347
|
+
end
|
310
348
|
dataset.add_metadata(metadata)
|
311
349
|
dataset.save(subjectid)
|
312
350
|
dataset
|
@@ -369,12 +407,14 @@ module OpenTox
|
|
369
407
|
end
|
370
408
|
|
371
409
|
def value(compound)
|
372
|
-
|
410
|
+
v = nil
|
411
|
+
v = @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first if @data_entries[compound.uri]
|
412
|
+
v = nil if v.is_a? Array and v.empty?
|
413
|
+
v
|
373
414
|
end
|
374
415
|
|
375
416
|
def confidence(compound)
|
376
|
-
|
377
|
-
@features[feature_uri][OT.confidence]
|
417
|
+
@data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first if @data_entries[compound.uri]
|
378
418
|
end
|
379
419
|
|
380
420
|
def descriptors(compound)
|
@@ -382,12 +422,11 @@ module OpenTox
|
|
382
422
|
end
|
383
423
|
|
384
424
|
def measured_activities(compound)
|
385
|
-
|
386
|
-
@data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten
|
425
|
+
@data_entries[compound.uri].collect{|f,v| v if f.match(/#{@metadata[OT.hasSource]}/)}.compact.flatten if @data_entries[compound.uri]
|
387
426
|
end
|
388
427
|
|
389
428
|
def neighbors(compound)
|
390
|
-
@data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact
|
429
|
+
@data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri]
|
391
430
|
end
|
392
431
|
|
393
432
|
# def errors(compound)
|
data/lib/environment.rb
CHANGED
@@ -27,7 +27,7 @@ end
|
|
27
27
|
Ohm.connect :thread_safe => true
|
28
28
|
|
29
29
|
# load mail settings for error messages
|
30
|
-
load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
|
30
|
+
#load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
|
31
31
|
|
32
32
|
logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
|
33
33
|
#LOGGER = OTLogger.new(logfile,'daily') # daily rotation
|
@@ -40,8 +40,8 @@ else
|
|
40
40
|
end
|
41
41
|
|
42
42
|
# Regular expressions for parsing classification data
|
43
|
-
TRUE_REGEXP = /^(true|active|1|1.0|tox)$/i
|
44
|
-
FALSE_REGEXP = /^(false|inactive|0|0.0|low tox)$/i
|
43
|
+
TRUE_REGEXP = /^(true|active|1|1.0|tox|activating)$/i
|
44
|
+
FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating)$/i
|
45
45
|
|
46
46
|
# Task durations
|
47
47
|
DEFAULT_TASK_MAX_DURATION = 36000
|
data/lib/feature.rb
CHANGED
@@ -2,6 +2,8 @@ module OpenTox
|
|
2
2
|
class Feature
|
3
3
|
include OpenTox
|
4
4
|
|
5
|
+
attr_accessor :subjectid
|
6
|
+
|
5
7
|
# Find a feature
|
6
8
|
# @param [String] uri Feature URI
|
7
9
|
# @return [OpenTox::Task] Feature object
|
@@ -13,31 +15,31 @@ module OpenTox
|
|
13
15
|
else
|
14
16
|
feature.add_metadata Parser::Owl::Dataset.new(uri).load_metadata
|
15
17
|
end
|
18
|
+
feature.subjectid = subjectid
|
16
19
|
feature
|
17
20
|
end
|
18
|
-
|
21
|
+
|
19
22
|
# provides feature type, possible types are "regression" or "classification"
|
20
23
|
# @return [String] feature type, unknown if OT.isA property is unknown/ not set
|
21
24
|
def feature_type
|
25
|
+
raise OpenTox::BadRequestError.new("rdf type of feature '"+uri.to_s+"' not set") unless metadata[RDF.type]
|
22
26
|
if metadata[RDF.type].flatten.include?(OT.NominalFeature)
|
23
27
|
"classification"
|
24
28
|
elsif metadata[RDF.type].flatten.include?(OT.NumericFeature)
|
25
29
|
"regression"
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
"
|
30
|
+
elsif metadata[OWL.sameAs]
|
31
|
+
metadata[OWL.sameAs].each do |f|
|
32
|
+
begin
|
33
|
+
type = Feature.find(f, subjectid).feature_type
|
34
|
+
return type unless type=="unknown"
|
35
|
+
rescue => ex
|
36
|
+
LOGGER.warn "could not load same-as-feature '"+f.to_s+"' for feature '"+uri.to_s+"' : "+ex.message.to_s
|
37
|
+
end
|
38
|
+
end
|
39
|
+
"unknown"
|
36
40
|
else
|
37
41
|
"unknown"
|
38
42
|
end
|
39
|
-
=end
|
40
43
|
end
|
41
|
-
|
42
44
|
end
|
43
45
|
end
|
data/lib/helper.rb
CHANGED
@@ -81,7 +81,7 @@ helpers do
|
|
81
81
|
when "css"
|
82
82
|
@accept = 'text/css'
|
83
83
|
else
|
84
|
-
#
|
84
|
+
# raise OpenTox::NotFoundError.new "File format #{extension} not supported."
|
85
85
|
end
|
86
86
|
end
|
87
87
|
end
|
@@ -94,4 +94,3 @@ before do
|
|
94
94
|
protected!(@subjectid)
|
95
95
|
end
|
96
96
|
end
|
97
|
-
|
data/lib/model.rb
CHANGED
@@ -23,7 +23,7 @@ module OpenTox
|
|
23
23
|
# Generic OpenTox model class for all API compliant services
|
24
24
|
class Generic
|
25
25
|
include Model
|
26
|
-
|
26
|
+
|
27
27
|
# Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
|
28
28
|
# @param [String] uri Model URI
|
29
29
|
# @return [OpenTox::Model::Generic] Model instance
|
@@ -34,42 +34,75 @@ module OpenTox
|
|
34
34
|
raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
|
35
35
|
model
|
36
36
|
end
|
37
|
-
|
38
|
-
|
39
|
-
|
37
|
+
|
38
|
+
# provides feature type, possible types are "regression" or "classification"
|
39
|
+
# @return [String] feature type, "unknown" if type could not be estimated
|
40
40
|
def feature_type(subjectid=nil)
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
|
45
|
-
algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
|
46
|
-
algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil
|
47
|
-
algorithm_type = algorithm ? algorithm.metadata[RDF.type] : nil
|
48
|
-
dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid )
|
49
|
-
dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil
|
50
|
-
type_indicators = [dependent_variable_type, @metadata[RDF.type], @metadata[DC.title], @uri, algorithm_type, algorithm_title].flatten
|
51
|
-
type_indicators.each do |type|
|
52
|
-
case type
|
53
|
-
when /(?i)classification/
|
54
|
-
@feature_type = "classification"
|
55
|
-
break
|
56
|
-
when /(?i)regression/
|
57
|
-
@feature_type = "regression"
|
58
|
-
end
|
41
|
+
unless @feature_type
|
42
|
+
load_predicted_variables( subjectid ) unless @predicted_variable
|
43
|
+
@feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type
|
59
44
|
end
|
60
|
-
raise "unknown model "+type_indicators.inspect unless @feature_type
|
61
45
|
@feature_type
|
62
46
|
end
|
63
|
-
|
64
|
-
end
|
65
47
|
|
48
|
+
def predicted_variable( subjectid )
|
49
|
+
load_predicted_variables( subjectid ) unless @predicted_variable
|
50
|
+
@predicted_variable
|
51
|
+
end
|
52
|
+
|
53
|
+
def predicted_variables( subjectid )
|
54
|
+
load_predicted_variables( subjectid, false ) unless @predicted_variables
|
55
|
+
@predicted_variables
|
56
|
+
end
|
57
|
+
|
58
|
+
def predicted_confidence( subjectid )
|
59
|
+
load_predicted_variables( subjectid ) unless @predicted_confidence
|
60
|
+
@predicted_confidence
|
61
|
+
end
|
62
|
+
|
63
|
+
private
|
64
|
+
def load_predicted_variables( subjectid=nil, use_confidence=true )
|
65
|
+
load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
|
66
|
+
if @metadata[OT.predictedVariables]
|
67
|
+
predictedVariables = @metadata[OT.predictedVariables]
|
68
|
+
if predictedVariables.is_a?(Array)
|
69
|
+
if (predictedVariables.size==1)
|
70
|
+
@predicted_variable = predictedVariables[0]
|
71
|
+
elsif (predictedVariables.size>=2)
|
72
|
+
# PENDING identify confidence
|
73
|
+
if use_confidence
|
74
|
+
conf_index = -1
|
75
|
+
predictedVariables.size.times do |i|
|
76
|
+
f = OpenTox::Feature.find(predictedVariables[i], subjectid)
|
77
|
+
conf_index = i if f.metadata[DC.title]=~/(?i)confidence/
|
78
|
+
end
|
79
|
+
raise "could not estimate predicted variable from model: '"+uri.to_s+
|
80
|
+
"', number of predicted-variables==2, but no confidence found" if conf_index==-1
|
81
|
+
end
|
82
|
+
if (predictedVariables.size==2) && use_confidence
|
83
|
+
@predicted_variable = predictedVariables[1-conf_index]
|
84
|
+
@predicted_confidence = predictedVariables[conf_index]
|
85
|
+
else
|
86
|
+
@predicted_variables = predictedVariables
|
87
|
+
end
|
88
|
+
else
|
89
|
+
raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0"
|
90
|
+
end
|
91
|
+
else
|
92
|
+
raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables)
|
96
|
+
end
|
97
|
+
end
|
98
|
+
|
66
99
|
# Lazy Structure Activity Relationship class
|
67
100
|
class Lazar
|
68
101
|
|
69
|
-
include Model
|
70
102
|
include Algorithm
|
103
|
+
include Model
|
71
104
|
|
72
|
-
attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid
|
105
|
+
attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev, :prediction_min_max
|
73
106
|
|
74
107
|
def initialize(uri=nil)
|
75
108
|
|
@@ -78,7 +111,7 @@ module OpenTox
|
|
78
111
|
else
|
79
112
|
super CONFIG[:services]["opentox-model"]
|
80
113
|
end
|
81
|
-
|
114
|
+
|
82
115
|
@metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
|
83
116
|
|
84
117
|
@features = []
|
@@ -86,12 +119,18 @@ module OpenTox
|
|
86
119
|
@activities = {}
|
87
120
|
@p_values = {}
|
88
121
|
@fingerprints = {}
|
122
|
+
@value_map = {}
|
123
|
+
@prediction_min_max = []
|
89
124
|
|
90
125
|
@feature_calculation_algorithm = "Substructure.match"
|
91
126
|
@similarity_algorithm = "Similarity.tanimoto"
|
92
127
|
@prediction_algorithm = "Neighbors.weighted_majority_vote"
|
93
|
-
|
128
|
+
|
129
|
+
@nr_hits = false
|
94
130
|
@min_sim = 0.3
|
131
|
+
@prop_kernel = false
|
132
|
+
@transform = { "class" => "NOP" }
|
133
|
+
@conf_stdev = false
|
95
134
|
|
96
135
|
end
|
97
136
|
|
@@ -111,13 +150,25 @@ module OpenTox
|
|
111
150
|
# Create a new lazar model
|
112
151
|
# @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
|
113
152
|
# @return [OpenTox::Model::Lazar] lazar model
|
114
|
-
def self.create(params)
|
153
|
+
def self.create(params, waiting_task=nil )
|
115
154
|
subjectid = params[:subjectid]
|
116
155
|
lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
|
117
|
-
model_uri = lazar_algorithm.run(params)
|
156
|
+
model_uri = lazar_algorithm.run(params, waiting_task)
|
118
157
|
OpenTox::Model::Lazar.find(model_uri, subjectid)
|
119
158
|
end
|
120
159
|
|
160
|
+
def run( params, accept_header=nil, waiting_task=nil )
|
161
|
+
unless accept_header
|
162
|
+
if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
|
163
|
+
accept_header = 'application/x-yaml'
|
164
|
+
else
|
165
|
+
accept_header = 'application/rdf+xml'
|
166
|
+
end
|
167
|
+
end
|
168
|
+
LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
|
169
|
+
RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
|
170
|
+
end
|
171
|
+
|
121
172
|
# Get a parameter value
|
122
173
|
# @param [String] param Parameter name
|
123
174
|
# @return [String] Parameter value
|
@@ -131,6 +182,7 @@ module OpenTox
|
|
131
182
|
# @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
|
132
183
|
# @return [OpenTox::Dataset] Dataset with predictions
|
133
184
|
def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
|
185
|
+
|
134
186
|
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
|
135
187
|
@prediction_dataset.add_metadata({
|
136
188
|
OT.hasSource => @uri,
|
@@ -150,7 +202,7 @@ module OpenTox
|
|
150
202
|
LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
|
151
203
|
end
|
152
204
|
end
|
153
|
-
|
205
|
+
#@prediction_dataset.save(subjectid)
|
154
206
|
@prediction_dataset
|
155
207
|
end
|
156
208
|
|
@@ -164,49 +216,52 @@ module OpenTox
|
|
164
216
|
features = {}
|
165
217
|
|
166
218
|
unless @prediction_dataset
|
167
|
-
#@prediction_dataset = cached_prediction
|
168
|
-
#return @prediction_dataset if cached_prediction
|
169
219
|
@prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
|
170
220
|
@prediction_dataset.add_metadata( {
|
171
221
|
OT.hasSource => @uri,
|
172
222
|
DC.creator => @uri,
|
173
|
-
# TODO: fix dependentVariable
|
174
223
|
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
|
175
224
|
OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
|
176
225
|
} )
|
177
226
|
end
|
178
227
|
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
# TODO: fix dependentVariable
|
186
|
-
@prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
|
187
|
-
|
188
|
-
if @neighbors.size == 0
|
189
|
-
@prediction_dataset.add_feature(prediction_feature_uri, {
|
190
|
-
RDF.type => [OT.MeasuredFeature],
|
191
|
-
OT.hasSource => @uri,
|
192
|
-
DC.creator => @uri,
|
193
|
-
DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
|
194
|
-
OT.error => "No similar compounds in training dataset.",
|
195
|
-
OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
|
196
|
-
})
|
197
|
-
@prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
|
228
|
+
if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression"
|
229
|
+
all_activities = []
|
230
|
+
all_activities = @activities.values.flatten.collect! { |i| i.to_f }
|
231
|
+
@prediction_min_max[0] = (all_activities.to_scale.min/2)
|
232
|
+
@prediction_min_max[1] = (all_activities.to_scale.max*2)
|
233
|
+
end
|
198
234
|
|
199
|
-
|
200
|
-
|
201
|
-
|
202
|
-
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
235
|
+
unless database_activity(subjectid) # adds database activity to @prediction_dataset
|
236
|
+
|
237
|
+
neighbors
|
238
|
+
prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors,
|
239
|
+
:compound => @compound,
|
240
|
+
:features => @features,
|
241
|
+
:p_values => @p_values,
|
242
|
+
:fingerprints => @fingerprints,
|
243
|
+
:similarity_algorithm => @similarity_algorithm,
|
244
|
+
:prop_kernel => @prop_kernel,
|
245
|
+
:value_map => @value_map,
|
246
|
+
:nr_hits => @nr_hits,
|
247
|
+
:conf_stdev => @conf_stdev,
|
248
|
+
:prediction_min_max => @prediction_min_max,
|
249
|
+
:transform => @transform } ) ")
|
250
|
+
|
251
|
+
value_feature_uri = File.join( @uri, "predicted", "value")
|
252
|
+
confidence_feature_uri = File.join( @uri, "predicted", "confidence")
|
253
|
+
|
254
|
+
@prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables]
|
255
|
+
@prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables]
|
256
|
+
|
257
|
+
if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
|
258
|
+
@prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]]
|
259
|
+
else
|
260
|
+
@prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
|
261
|
+
end
|
262
|
+
@prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
|
263
|
+
@prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title]
|
264
|
+
@prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence"
|
210
265
|
|
211
266
|
if verbose
|
212
267
|
if @feature_calculation_algorithm == "Substructure.match"
|
@@ -260,7 +315,6 @@ module OpenTox
|
|
260
315
|
end
|
261
316
|
n+=1
|
262
317
|
end
|
263
|
-
# what happens with dataset predictions?
|
264
318
|
end
|
265
319
|
end
|
266
320
|
|
@@ -268,33 +322,53 @@ module OpenTox
|
|
268
322
|
@prediction_dataset
|
269
323
|
end
|
270
324
|
|
271
|
-
|
272
|
-
def neighbors
|
325
|
+
|
273
326
|
|
327
|
+
# Find neighbors and store them as object variable, access all compounds for that.
|
328
|
+
def neighbors
|
274
329
|
@compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
|
275
|
-
|
276
330
|
@neighbors = []
|
277
|
-
@fingerprints.each do |training_compound
|
278
|
-
|
279
|
-
if sim > @min_sim
|
280
|
-
@activities[training_compound].each do |act|
|
281
|
-
@neighbors << {
|
282
|
-
:compound => training_compound,
|
283
|
-
:similarity => sim,
|
284
|
-
:features => training_features,
|
285
|
-
:activity => act
|
286
|
-
}
|
287
|
-
end
|
288
|
-
end
|
331
|
+
@fingerprints.keys.each do |training_compound| # AM: access all compounds
|
332
|
+
add_neighbor @fingerprints[training_compound].keys, training_compound
|
289
333
|
end
|
334
|
+
end
|
290
335
|
|
336
|
+
# Adds a neighbor to @neighbors if it passes the similarity threshold.
|
337
|
+
def add_neighbor(training_features, training_compound)
|
338
|
+
compound_features_hits = {}
|
339
|
+
training_compound_features_hits = {}
|
340
|
+
if @nr_hits
|
341
|
+
compound_features_hits = @compound.match_hits(@compound_features)
|
342
|
+
training_compound_features_hits = @fingerprints[training_compound]
|
343
|
+
#LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}"
|
344
|
+
end
|
345
|
+
params = {}
|
346
|
+
params[:nr_hits] = @nr_hits
|
347
|
+
params[:compound_features_hits] = compound_features_hits
|
348
|
+
params[:training_compound_features_hits] = training_compound_features_hits
|
349
|
+
|
350
|
+
sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)")
|
351
|
+
if sim > @min_sim
|
352
|
+
@activities[training_compound].each do |act|
|
353
|
+
@neighbors << {
|
354
|
+
:compound => training_compound,
|
355
|
+
:similarity => sim,
|
356
|
+
:features => training_features,
|
357
|
+
:activity => act
|
358
|
+
}
|
359
|
+
end
|
360
|
+
end
|
291
361
|
end
|
292
362
|
|
293
363
|
# Find database activities and store them in @prediction_dataset
|
294
364
|
# @return [Boolean] true if compound has databasse activities, false if not
|
295
365
|
def database_activity(subjectid)
|
296
366
|
if @activities[@compound.uri]
|
297
|
-
|
367
|
+
if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
|
368
|
+
@activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] }
|
369
|
+
else
|
370
|
+
@activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
|
371
|
+
end
|
298
372
|
@prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
|
299
373
|
@prediction_dataset.save(subjectid)
|
300
374
|
true
|
@@ -303,6 +377,35 @@ module OpenTox
|
|
303
377
|
end
|
304
378
|
end
|
305
379
|
|
380
|
+
def prediction_features
|
381
|
+
[prediction_value_feature,prediction_confidence_feature]
|
382
|
+
end
|
383
|
+
|
384
|
+
def prediction_value_feature
|
385
|
+
dependent_uri = @metadata[OT.dependentVariables].first
|
386
|
+
feature = OpenTox::Feature.new File.join( @uri, "predicted", "value")
|
387
|
+
feature.add_metadata( {
|
388
|
+
RDF.type => [OT.ModelPrediction],
|
389
|
+
OT.hasSource => @uri,
|
390
|
+
DC.creator => @uri,
|
391
|
+
DC.title => URI.decode(File.basename( dependent_uri )),
|
392
|
+
OWL.sameAs => dependent_uri
|
393
|
+
})
|
394
|
+
feature
|
395
|
+
end
|
396
|
+
|
397
|
+
def prediction_confidence_feature
|
398
|
+
dependent_uri = @metadata[OT.dependentVariables].first
|
399
|
+
feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence")
|
400
|
+
feature.add_metadata( {
|
401
|
+
RDF.type => [OT.ModelPrediction],
|
402
|
+
OT.hasSource => @uri,
|
403
|
+
DC.creator => @uri,
|
404
|
+
DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence"
|
405
|
+
})
|
406
|
+
feature
|
407
|
+
end
|
408
|
+
|
306
409
|
# Save model at model service
|
307
410
|
def save(subjectid)
|
308
411
|
self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid})
|