opentox-ruby 2.0.1 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -21,6 +21,17 @@ module OpenTox
21
21
  else
22
22
  @inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri
23
23
  end
24
+
25
+ if @uri and @inchi.to_s.size==0
26
+ LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles"
27
+ @inchi = Compound.smiles2inchi(Compound.smiles(@uri))
28
+ end
29
+ end
30
+
31
+ # request smiles from compound service via accept header
32
+ # @return smiles as string
33
+ def self.smiles(uri)
34
+ RestClientWrapper.get(uri, :accept => 'chemical/x-daylight-smiles').to_s.chomp
24
35
  end
25
36
 
26
37
  # Create a compound from smiles string
@@ -153,6 +164,35 @@ module OpenTox
153
164
  #smarts_array.collect { |s| s if match?(s)}.compact
154
165
  end
155
166
 
167
+ # Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value
168
+ # @example
169
+ # compound = OpenTox::Compound.from_name("Benzene")
170
+ # compound.match(['cc','cN']) # returns ['cc']
171
+ # @param [Array] smarts_array Array with Smarts strings
172
+ # @return [Hash] Hash with matching smarts as key and number of non-unique hits as value
173
+ def match_hits(smarts_array)
174
+ # avoid recreation of OpenBabel objects
175
+ obconversion = OpenBabel::OBConversion.new
176
+ obmol = OpenBabel::OBMol.new
177
+ obconversion.set_in_format('inchi')
178
+ obconversion.read_string(obmol,@inchi)
179
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
180
+ smarts_hits = {}
181
+ #LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}"
182
+ smarts_array.collect do |smarts|
183
+ #LOGGER.debug "dv ----------- all smarts #{smarts}"
184
+ smarts_pattern.init(smarts)
185
+ if smarts_pattern.match(obmol)
186
+ hits = smarts_pattern.get_map_list
187
+ smarts_hits[smarts] = hits.size
188
+ end
189
+ end
190
+ #LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}"
191
+ return smarts_hits
192
+ #smarts_array.collect { |s| s if match?(s)}.compact
193
+ end
194
+
195
+
156
196
  # Get URI of compound image with highlighted fragments
157
197
  #
158
198
  # @param [Array] activating Array with activating Smarts strings
@@ -19,6 +19,7 @@ set :lock, true
19
19
  end
20
20
 
21
21
  use Rack::ShowExceptions
22
+ =begin
22
23
  if defined?(MAIL)
23
24
 
24
25
  # monkeypatch with the original method
@@ -50,3 +51,4 @@ if defined?(MAIL)
50
51
  mail.smtp MAIL
51
52
  end
52
53
  end
54
+ =end
@@ -102,6 +102,13 @@ module OpenTox
102
102
  copy parser.load_uri(subjectid)
103
103
  end
104
104
 
105
+ def load_sdf(sdf,subjectid=nil)
106
+ save(subjectid) unless @uri # get a uri for creating features
107
+ parser = Parser::Sdf.new
108
+ parser.dataset = self
109
+ parser.load_sdf(sdf)
110
+ end
111
+
105
112
  # Load CSV string (format specification: http://toxcreate.org/help)
106
113
  # - loads data_entries, compounds, features
107
114
  # - sets metadata (warnings) for parser errors
@@ -149,7 +156,11 @@ module OpenTox
149
156
  # Load and return only compound URIs from the dataset service
150
157
  # @return [Array] Compound URIs in the dataset
151
158
  def load_compounds(subjectid=nil)
152
- RestClientWrapper.get(File.join(uri,"compounds"),{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri|
159
+ # fix for datasets like http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50
160
+ u = URI::parse(uri)
161
+ u.path = File.join(u.path,"compounds")
162
+ u = u.to_s
163
+ RestClientWrapper.get(u,{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri|
153
164
  @compounds << compound_uri.chomp
154
165
  end
155
166
  @compounds.uniq!
@@ -167,19 +178,15 @@ module OpenTox
167
178
  @features
168
179
  end
169
180
 
170
- def feature_classes(feature, subjectid=nil)
171
- if Feature.find(feature, subjectid).feature_type == "classification"
172
- classes = []
173
- @data_entries.each do |c,e|
174
- e[feature].each { |v| classes << v.to_s }
175
- end
176
- classes.uniq.sort
177
- else
178
- nil
179
- end
181
+ # returns the accept_values of a feature, i.e. the classification domain / all possible feature values
182
+ # @param [String] feature the URI of the feature
183
+ # @return [Array] return array with strings, nil if value is not set (e.g. when feature is numeric)
184
+ def accept_values(feature)
185
+ accept_values = features[feature][OT.acceptValue]
186
+ accept_values.sort if accept_values
187
+ accept_values
180
188
  end
181
189
 
182
- =begin
183
190
  # Detect feature type(s) in the dataset
184
191
  # @return [String] `classification", "regression", "mixed" or unknown`
185
192
  def feature_type(subjectid=nil)
@@ -193,6 +200,7 @@ module OpenTox
193
200
  "unknown"
194
201
  end
195
202
  end
203
+ =begin
196
204
  =end
197
205
 
198
206
  # Get Spreadsheet representation
@@ -229,6 +237,30 @@ module OpenTox
229
237
  s.to_rdfxml
230
238
  end
231
239
 
240
+ # Get SDF representation of compounds
241
+ # @return [String] SDF representation
242
+ def to_sdf
243
+ sum=""
244
+ @compounds.each{ |c|
245
+ sum << OpenTox::Compound.new(c).to_inchi
246
+ sum << OpenTox::Compound.new(c).to_sdf.sub(/\n\$\$\$\$/,'')
247
+ @data_entries[c].each{ |f,v|
248
+ sum << "> <\"#{f}\">\n"
249
+ sum << v.join(", ")
250
+ sum << "\n\n"
251
+ }
252
+ sum << "$$$$\n"
253
+ }
254
+ sum
255
+ end
256
+
257
+ def to_urilist
258
+ @compounds.inject { |sum, c|
259
+ sum << OpenTox::Compound.new(c).uri
260
+ sum + "\n"
261
+ }
262
+ end
263
+
232
264
  # Get name (DC.title) of a feature
233
265
  # @param [String] feature Feature URI
234
266
  # @return [String] Feture title
@@ -307,6 +339,12 @@ module OpenTox
307
339
  end
308
340
  end
309
341
  end
342
+ # set feature metadata in new dataset accordingly (including accept values)
343
+ features.each do |f|
344
+ self.features[f].each do |k,v|
345
+ dataset.features[f][k] = v
346
+ end
347
+ end
310
348
  dataset.add_metadata(metadata)
311
349
  dataset.save(subjectid)
312
350
  dataset
@@ -369,12 +407,14 @@ module OpenTox
369
407
  end
370
408
 
371
409
  def value(compound)
372
- @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first
410
+ v = nil
411
+ v = @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first if @data_entries[compound.uri]
412
+ v = nil if v.is_a? Array and v.empty?
413
+ v
373
414
  end
374
415
 
375
416
  def confidence(compound)
376
- feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first
377
- @features[feature_uri][OT.confidence]
417
+ @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first if @data_entries[compound.uri]
378
418
  end
379
419
 
380
420
  def descriptors(compound)
@@ -382,12 +422,11 @@ module OpenTox
382
422
  end
383
423
 
384
424
  def measured_activities(compound)
385
- source = @metadata[OT.hasSource]
386
- @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten
425
+ @data_entries[compound.uri].collect{|f,v| v if f.match(/#{@metadata[OT.hasSource]}/)}.compact.flatten if @data_entries[compound.uri]
387
426
  end
388
427
 
389
428
  def neighbors(compound)
390
- @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact
429
+ @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri]
391
430
  end
392
431
 
393
432
  # def errors(compound)
@@ -27,7 +27,7 @@ end
27
27
  Ohm.connect :thread_safe => true
28
28
 
29
29
  # load mail settings for error messages
30
- load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
30
+ #load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
31
31
 
32
32
  logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
33
33
  #LOGGER = OTLogger.new(logfile,'daily') # daily rotation
@@ -40,8 +40,8 @@ else
40
40
  end
41
41
 
42
42
  # Regular expressions for parsing classification data
43
- TRUE_REGEXP = /^(true|active|1|1.0|tox)$/i
44
- FALSE_REGEXP = /^(false|inactive|0|0.0|low tox)$/i
43
+ TRUE_REGEXP = /^(true|active|1|1.0|tox|activating)$/i
44
+ FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating)$/i
45
45
 
46
46
  # Task durations
47
47
  DEFAULT_TASK_MAX_DURATION = 36000
@@ -2,6 +2,8 @@ module OpenTox
2
2
  class Feature
3
3
  include OpenTox
4
4
 
5
+ attr_accessor :subjectid
6
+
5
7
  # Find a feature
6
8
  # @param [String] uri Feature URI
7
9
  # @return [OpenTox::Task] Feature object
@@ -13,31 +15,31 @@ module OpenTox
13
15
  else
14
16
  feature.add_metadata Parser::Owl::Dataset.new(uri).load_metadata
15
17
  end
18
+ feature.subjectid = subjectid
16
19
  feature
17
20
  end
18
-
21
+
19
22
  # provides feature type, possible types are "regression" or "classification"
20
23
  # @return [String] feature type, unknown if OT.isA property is unknown/ not set
21
24
  def feature_type
25
+ raise OpenTox::BadRequestError.new("rdf type of feature '"+uri.to_s+"' not set") unless metadata[RDF.type]
22
26
  if metadata[RDF.type].flatten.include?(OT.NominalFeature)
23
27
  "classification"
24
28
  elsif metadata[RDF.type].flatten.include?(OT.NumericFeature)
25
29
  "regression"
26
- else
27
- #"unknown"
28
- metadata[RDF.type].inspect
29
- end
30
- =begin
31
- case metadata[RDF.type]
32
- when /NominalFeature/
33
- "classification"
34
- when /NumericFeature/
35
- "regression"
30
+ elsif metadata[OWL.sameAs]
31
+ metadata[OWL.sameAs].each do |f|
32
+ begin
33
+ type = Feature.find(f, subjectid).feature_type
34
+ return type unless type=="unknown"
35
+ rescue => ex
36
+ LOGGER.warn "could not load same-as-feature '"+f.to_s+"' for feature '"+uri.to_s+"' : "+ex.message.to_s
37
+ end
38
+ end
39
+ "unknown"
36
40
  else
37
41
  "unknown"
38
42
  end
39
- =end
40
43
  end
41
-
42
44
  end
43
45
  end
@@ -81,7 +81,7 @@ helpers do
81
81
  when "css"
82
82
  @accept = 'text/css'
83
83
  else
84
- # halt 404, "File format #{extension} not supported."
84
+ # raise OpenTox::NotFoundError.new "File format #{extension} not supported."
85
85
  end
86
86
  end
87
87
  end
@@ -94,4 +94,3 @@ before do
94
94
  protected!(@subjectid)
95
95
  end
96
96
  end
97
-
@@ -23,7 +23,7 @@ module OpenTox
23
23
  # Generic OpenTox model class for all API compliant services
24
24
  class Generic
25
25
  include Model
26
-
26
+
27
27
  # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
28
28
  # @param [String] uri Model URI
29
29
  # @return [OpenTox::Model::Generic] Model instance
@@ -34,42 +34,75 @@ module OpenTox
34
34
  raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
35
35
  model
36
36
  end
37
-
38
- # provides feature type, possible types are "regression" or "classification"
39
- # @return [String] feature type, "unknown" if type could not be estimated
37
+
38
+ # provides feature type, possible types are "regression" or "classification"
39
+ # @return [String] feature type, "unknown" if type could not be estimated
40
40
  def feature_type(subjectid=nil)
41
- return @feature_type if @feature_type
42
-
43
- # dynamically perform restcalls if necessary
44
- load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
45
- algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
46
- algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil
47
- algorithm_type = algorithm ? algorithm.metadata[RDF.type] : nil
48
- dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid )
49
- dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil
50
- type_indicators = [dependent_variable_type, @metadata[RDF.type], @metadata[DC.title], @uri, algorithm_type, algorithm_title].flatten
51
- type_indicators.each do |type|
52
- case type
53
- when /(?i)classification/
54
- @feature_type = "classification"
55
- break
56
- when /(?i)regression/
57
- @feature_type = "regression"
58
- end
41
+ unless @feature_type
42
+ load_predicted_variables( subjectid ) unless @predicted_variable
43
+ @feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type
59
44
  end
60
- raise "unknown model "+type_indicators.inspect unless @feature_type
61
45
  @feature_type
62
46
  end
63
-
64
- end
65
47
 
48
+ def predicted_variable( subjectid )
49
+ load_predicted_variables( subjectid ) unless @predicted_variable
50
+ @predicted_variable
51
+ end
52
+
53
+ def predicted_variables( subjectid )
54
+ load_predicted_variables( subjectid, false ) unless @predicted_variables
55
+ @predicted_variables
56
+ end
57
+
58
+ def predicted_confidence( subjectid )
59
+ load_predicted_variables( subjectid ) unless @predicted_confidence
60
+ @predicted_confidence
61
+ end
62
+
63
+ private
64
+ def load_predicted_variables( subjectid=nil, use_confidence=true )
65
+ load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
66
+ if @metadata[OT.predictedVariables]
67
+ predictedVariables = @metadata[OT.predictedVariables]
68
+ if predictedVariables.is_a?(Array)
69
+ if (predictedVariables.size==1)
70
+ @predicted_variable = predictedVariables[0]
71
+ elsif (predictedVariables.size>=2)
72
+ # PENDING identify confidence
73
+ if use_confidence
74
+ conf_index = -1
75
+ predictedVariables.size.times do |i|
76
+ f = OpenTox::Feature.find(predictedVariables[i], subjectid)
77
+ conf_index = i if f.metadata[DC.title]=~/(?i)confidence/
78
+ end
79
+ raise "could not estimate predicted variable from model: '"+uri.to_s+
80
+ "', number of predicted-variables==2, but no confidence found" if conf_index==-1
81
+ end
82
+ if (predictedVariables.size==2) && use_confidence
83
+ @predicted_variable = predictedVariables[1-conf_index]
84
+ @predicted_confidence = predictedVariables[conf_index]
85
+ else
86
+ @predicted_variables = predictedVariables
87
+ end
88
+ else
89
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0"
90
+ end
91
+ else
92
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array"
93
+ end
94
+ end
95
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables)
96
+ end
97
+ end
98
+
66
99
  # Lazy Structure Activity Relationship class
67
100
  class Lazar
68
101
 
69
- include Model
70
102
  include Algorithm
103
+ include Model
71
104
 
72
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid
105
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev, :prediction_min_max
73
106
 
74
107
  def initialize(uri=nil)
75
108
 
@@ -78,7 +111,7 @@ module OpenTox
78
111
  else
79
112
  super CONFIG[:services]["opentox-model"]
80
113
  end
81
-
114
+
82
115
  @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
83
116
 
84
117
  @features = []
@@ -86,12 +119,18 @@ module OpenTox
86
119
  @activities = {}
87
120
  @p_values = {}
88
121
  @fingerprints = {}
122
+ @value_map = {}
123
+ @prediction_min_max = []
89
124
 
90
125
  @feature_calculation_algorithm = "Substructure.match"
91
126
  @similarity_algorithm = "Similarity.tanimoto"
92
127
  @prediction_algorithm = "Neighbors.weighted_majority_vote"
93
-
128
+
129
+ @nr_hits = false
94
130
  @min_sim = 0.3
131
+ @prop_kernel = false
132
+ @transform = { "class" => "NOP" }
133
+ @conf_stdev = false
95
134
 
96
135
  end
97
136
 
@@ -111,13 +150,25 @@ module OpenTox
111
150
  # Create a new lazar model
112
151
  # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
113
152
  # @return [OpenTox::Model::Lazar] lazar model
114
- def self.create(params)
153
+ def self.create(params, waiting_task=nil )
115
154
  subjectid = params[:subjectid]
116
155
  lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
117
- model_uri = lazar_algorithm.run(params)
156
+ model_uri = lazar_algorithm.run(params, waiting_task)
118
157
  OpenTox::Model::Lazar.find(model_uri, subjectid)
119
158
  end
120
159
 
160
+ def run( params, accept_header=nil, waiting_task=nil )
161
+ unless accept_header
162
+ if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
163
+ accept_header = 'application/x-yaml'
164
+ else
165
+ accept_header = 'application/rdf+xml'
166
+ end
167
+ end
168
+ LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
169
+ RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
170
+ end
171
+
121
172
  # Get a parameter value
122
173
  # @param [String] param Parameter name
123
174
  # @return [String] Parameter value
@@ -131,6 +182,7 @@ module OpenTox
131
182
  # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
132
183
  # @return [OpenTox::Dataset] Dataset with predictions
133
184
  def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
185
+
134
186
  @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
135
187
  @prediction_dataset.add_metadata({
136
188
  OT.hasSource => @uri,
@@ -150,7 +202,7 @@ module OpenTox
150
202
  LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
151
203
  end
152
204
  end
153
- @prediction_dataset.save(subjectid)
205
+ #@prediction_dataset.save(subjectid)
154
206
  @prediction_dataset
155
207
  end
156
208
 
@@ -164,49 +216,52 @@ module OpenTox
164
216
  features = {}
165
217
 
166
218
  unless @prediction_dataset
167
- #@prediction_dataset = cached_prediction
168
- #return @prediction_dataset if cached_prediction
169
219
  @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
170
220
  @prediction_dataset.add_metadata( {
171
221
  OT.hasSource => @uri,
172
222
  DC.creator => @uri,
173
- # TODO: fix dependentVariable
174
223
  DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
175
224
  OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
176
225
  } )
177
226
  end
178
227
 
179
- return @prediction_dataset if database_activity(subjectid)
180
-
181
- neighbors
182
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
183
-
184
- prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
185
- # TODO: fix dependentVariable
186
- @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
187
-
188
- if @neighbors.size == 0
189
- @prediction_dataset.add_feature(prediction_feature_uri, {
190
- RDF.type => [OT.MeasuredFeature],
191
- OT.hasSource => @uri,
192
- DC.creator => @uri,
193
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
194
- OT.error => "No similar compounds in training dataset.",
195
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
196
- })
197
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
228
+ if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression"
229
+ all_activities = []
230
+ all_activities = @activities.values.flatten.collect! { |i| i.to_f }
231
+ @prediction_min_max[0] = (all_activities.to_scale.min/2)
232
+ @prediction_min_max[1] = (all_activities.to_scale.max*2)
233
+ end
198
234
 
199
- else
200
- @prediction_dataset.add_feature(prediction_feature_uri, {
201
- RDF.type => [OT.ModelPrediction],
202
- OT.hasSource => @uri,
203
- DC.creator => @uri,
204
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
205
- OT.prediction => prediction[:prediction],
206
- OT.confidence => prediction[:confidence],
207
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
208
- })
209
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
235
+ unless database_activity(subjectid) # adds database activity to @prediction_dataset
236
+
237
+ neighbors
238
+ prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors,
239
+ :compound => @compound,
240
+ :features => @features,
241
+ :p_values => @p_values,
242
+ :fingerprints => @fingerprints,
243
+ :similarity_algorithm => @similarity_algorithm,
244
+ :prop_kernel => @prop_kernel,
245
+ :value_map => @value_map,
246
+ :nr_hits => @nr_hits,
247
+ :conf_stdev => @conf_stdev,
248
+ :prediction_min_max => @prediction_min_max,
249
+ :transform => @transform } ) ")
250
+
251
+ value_feature_uri = File.join( @uri, "predicted", "value")
252
+ confidence_feature_uri = File.join( @uri, "predicted", "confidence")
253
+
254
+ @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables]
255
+ @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables]
256
+
257
+ if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
258
+ @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]]
259
+ else
260
+ @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
261
+ end
262
+ @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
263
+ @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title]
264
+ @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence"
210
265
 
211
266
  if verbose
212
267
  if @feature_calculation_algorithm == "Substructure.match"
@@ -260,7 +315,6 @@ module OpenTox
260
315
  end
261
316
  n+=1
262
317
  end
263
- # what happens with dataset predictions?
264
318
  end
265
319
  end
266
320
 
@@ -268,33 +322,53 @@ module OpenTox
268
322
  @prediction_dataset
269
323
  end
270
324
 
271
- # Find neighbors and store them as object variable
272
- def neighbors
325
+
273
326
 
327
+ # Find neighbors and store them as object variable, access all compounds for that.
328
+ def neighbors
274
329
  @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
275
-
276
330
  @neighbors = []
277
- @fingerprints.each do |training_compound,training_features|
278
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
279
- if sim > @min_sim
280
- @activities[training_compound].each do |act|
281
- @neighbors << {
282
- :compound => training_compound,
283
- :similarity => sim,
284
- :features => training_features,
285
- :activity => act
286
- }
287
- end
288
- end
331
+ @fingerprints.keys.each do |training_compound| # AM: access all compounds
332
+ add_neighbor @fingerprints[training_compound].keys, training_compound
289
333
  end
334
+ end
290
335
 
336
+ # Adds a neighbor to @neighbors if it passes the similarity threshold.
337
+ def add_neighbor(training_features, training_compound)
338
+ compound_features_hits = {}
339
+ training_compound_features_hits = {}
340
+ if @nr_hits
341
+ compound_features_hits = @compound.match_hits(@compound_features)
342
+ training_compound_features_hits = @fingerprints[training_compound]
343
+ #LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}"
344
+ end
345
+ params = {}
346
+ params[:nr_hits] = @nr_hits
347
+ params[:compound_features_hits] = compound_features_hits
348
+ params[:training_compound_features_hits] = training_compound_features_hits
349
+
350
+ sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)")
351
+ if sim > @min_sim
352
+ @activities[training_compound].each do |act|
353
+ @neighbors << {
354
+ :compound => training_compound,
355
+ :similarity => sim,
356
+ :features => training_features,
357
+ :activity => act
358
+ }
359
+ end
360
+ end
291
361
  end
292
362
 
293
363
  # Find database activities and store them in @prediction_dataset
294
364
  # @return [Boolean] true if compound has databasse activities, false if not
295
365
  def database_activity(subjectid)
296
366
  if @activities[@compound.uri]
297
- @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
367
+ if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
368
+ @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] }
369
+ else
370
+ @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
371
+ end
298
372
  @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
299
373
  @prediction_dataset.save(subjectid)
300
374
  true
@@ -303,6 +377,35 @@ module OpenTox
303
377
  end
304
378
  end
305
379
 
380
+ def prediction_features
381
+ [prediction_value_feature,prediction_confidence_feature]
382
+ end
383
+
384
+ def prediction_value_feature
385
+ dependent_uri = @metadata[OT.dependentVariables].first
386
+ feature = OpenTox::Feature.new File.join( @uri, "predicted", "value")
387
+ feature.add_metadata( {
388
+ RDF.type => [OT.ModelPrediction],
389
+ OT.hasSource => @uri,
390
+ DC.creator => @uri,
391
+ DC.title => URI.decode(File.basename( dependent_uri )),
392
+ OWL.sameAs => dependent_uri
393
+ })
394
+ feature
395
+ end
396
+
397
+ def prediction_confidence_feature
398
+ dependent_uri = @metadata[OT.dependentVariables].first
399
+ feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence")
400
+ feature.add_metadata( {
401
+ RDF.type => [OT.ModelPrediction],
402
+ OT.hasSource => @uri,
403
+ DC.creator => @uri,
404
+ DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence"
405
+ })
406
+ feature
407
+ end
408
+
306
409
  # Save model at model service
307
410
  def save(subjectid)
308
411
  self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid})