opentox-ruby 2.0.1 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,17 @@ module OpenTox
21
21
  else
22
22
  @inchi = RestClientWrapper.get(@uri, :accept => 'chemical/x-inchi').to_s.chomp if @uri
23
23
  end
24
+
25
+ if @uri and @inchi.to_s.size==0
26
+ LOGGER.warn "REMOVE ABMIT HACK: no inchi for compound "+@uri.to_s+", load via smiles"
27
+ @inchi = Compound.smiles2inchi(Compound.smiles(@uri))
28
+ end
29
+ end
30
+
31
+ # request smiles from compound service via accept header
32
+ # @return smiles as string
33
+ def self.smiles(uri)
34
+ RestClientWrapper.get(uri, :accept => 'chemical/x-daylight-smiles').to_s.chomp
24
35
  end
25
36
 
26
37
  # Create a compound from smiles string
@@ -153,6 +164,35 @@ module OpenTox
153
164
  #smarts_array.collect { |s| s if match?(s)}.compact
154
165
  end
155
166
 
167
+ # Match_hits an array of smarts strings, returns hash with matching smarts as key and number of non-unique hits as value
168
+ # @example
169
+ # compound = OpenTox::Compound.from_name("Benzene")
170
+ # compound.match(['cc','cN']) # returns ['cc']
171
+ # @param [Array] smarts_array Array with Smarts strings
172
+ # @return [Hash] Hash with matching smarts as key and number of non-unique hits as value
173
+ def match_hits(smarts_array)
174
+ # avoid recreation of OpenBabel objects
175
+ obconversion = OpenBabel::OBConversion.new
176
+ obmol = OpenBabel::OBMol.new
177
+ obconversion.set_in_format('inchi')
178
+ obconversion.read_string(obmol,@inchi)
179
+ smarts_pattern = OpenBabel::OBSmartsPattern.new
180
+ smarts_hits = {}
181
+ #LOGGER.debug "dv ----------- obmol #{Compound.new(@inchi).to_smiles}"
182
+ smarts_array.collect do |smarts|
183
+ #LOGGER.debug "dv ----------- all smarts #{smarts}"
184
+ smarts_pattern.init(smarts)
185
+ if smarts_pattern.match(obmol)
186
+ hits = smarts_pattern.get_map_list
187
+ smarts_hits[smarts] = hits.size
188
+ end
189
+ end
190
+ #LOGGER.debug "dv ----------- smarts => hits #{smarts_hits}"
191
+ return smarts_hits
192
+ #smarts_array.collect { |s| s if match?(s)}.compact
193
+ end
194
+
195
+
156
196
  # Get URI of compound image with highlighted fragments
157
197
  #
158
198
  # @param [Array] activating Array with activating Smarts strings
@@ -19,6 +19,7 @@ set :lock, true
19
19
  end
20
20
 
21
21
  use Rack::ShowExceptions
22
+ =begin
22
23
  if defined?(MAIL)
23
24
 
24
25
  # monkeypatch with the original method
@@ -50,3 +51,4 @@ if defined?(MAIL)
50
51
  mail.smtp MAIL
51
52
  end
52
53
  end
54
+ =end
@@ -102,6 +102,13 @@ module OpenTox
102
102
  copy parser.load_uri(subjectid)
103
103
  end
104
104
 
105
+ def load_sdf(sdf,subjectid=nil)
106
+ save(subjectid) unless @uri # get a uri for creating features
107
+ parser = Parser::Sdf.new
108
+ parser.dataset = self
109
+ parser.load_sdf(sdf)
110
+ end
111
+
105
112
  # Load CSV string (format specification: http://toxcreate.org/help)
106
113
  # - loads data_entries, compounds, features
107
114
  # - sets metadata (warnings) for parser errors
@@ -149,7 +156,11 @@ module OpenTox
149
156
  # Load and return only compound URIs from the dataset service
150
157
  # @return [Array] Compound URIs in the dataset
151
158
  def load_compounds(subjectid=nil)
152
- RestClientWrapper.get(File.join(uri,"compounds"),{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri|
159
+ # fix for datasets like http://apps.ideaconsult.net:8080/ambit2/dataset/272?max=50
160
+ u = URI::parse(uri)
161
+ u.path = File.join(u.path,"compounds")
162
+ u = u.to_s
163
+ RestClientWrapper.get(u,{:accept=> "text/uri-list", :subjectid => subjectid}).to_s.each_line do |compound_uri|
153
164
  @compounds << compound_uri.chomp
154
165
  end
155
166
  @compounds.uniq!
@@ -167,19 +178,15 @@ module OpenTox
167
178
  @features
168
179
  end
169
180
 
170
- def feature_classes(feature, subjectid=nil)
171
- if Feature.find(feature, subjectid).feature_type == "classification"
172
- classes = []
173
- @data_entries.each do |c,e|
174
- e[feature].each { |v| classes << v.to_s }
175
- end
176
- classes.uniq.sort
177
- else
178
- nil
179
- end
181
+ # returns the accept_values of a feature, i.e. the classification domain / all possible feature values
182
+ # @param [String] feature the URI of the feature
183
+ # @return [Array] return array with strings, nil if value is not set (e.g. when feature is numeric)
184
+ def accept_values(feature)
185
+ accept_values = features[feature][OT.acceptValue]
186
+ accept_values.sort if accept_values
187
+ accept_values
180
188
  end
181
189
 
182
- =begin
183
190
  # Detect feature type(s) in the dataset
184
191
  # @return [String] `classification", "regression", "mixed" or unknown`
185
192
  def feature_type(subjectid=nil)
@@ -193,6 +200,7 @@ module OpenTox
193
200
  "unknown"
194
201
  end
195
202
  end
203
+ =begin
196
204
  =end
197
205
 
198
206
  # Get Spreadsheet representation
@@ -229,6 +237,30 @@ module OpenTox
229
237
  s.to_rdfxml
230
238
  end
231
239
 
240
+ # Get SDF representation of compounds
241
+ # @return [String] SDF representation
242
+ def to_sdf
243
+ sum=""
244
+ @compounds.each{ |c|
245
+ sum << OpenTox::Compound.new(c).to_inchi
246
+ sum << OpenTox::Compound.new(c).to_sdf.sub(/\n\$\$\$\$/,'')
247
+ @data_entries[c].each{ |f,v|
248
+ sum << "> <\"#{f}\">\n"
249
+ sum << v.join(", ")
250
+ sum << "\n\n"
251
+ }
252
+ sum << "$$$$\n"
253
+ }
254
+ sum
255
+ end
256
+
257
+ def to_urilist
258
+ @compounds.inject { |sum, c|
259
+ sum << OpenTox::Compound.new(c).uri
260
+ sum + "\n"
261
+ }
262
+ end
263
+
232
264
  # Get name (DC.title) of a feature
233
265
  # @param [String] feature Feature URI
234
266
  # @return [String] Feture title
@@ -307,6 +339,12 @@ module OpenTox
307
339
  end
308
340
  end
309
341
  end
342
+ # set feature metadata in new dataset accordingly (including accept values)
343
+ features.each do |f|
344
+ self.features[f].each do |k,v|
345
+ dataset.features[f][k] = v
346
+ end
347
+ end
310
348
  dataset.add_metadata(metadata)
311
349
  dataset.save(subjectid)
312
350
  dataset
@@ -369,12 +407,14 @@ module OpenTox
369
407
  end
370
408
 
371
409
  def value(compound)
372
- @data_entries[compound.uri].collect{|f,v| v.first if f.match(/prediction/)}.compact.first
410
+ v = nil
411
+ v = @data_entries[compound.uri].collect{|f,v| v.first if f.match(/value/)}.compact.first if @data_entries[compound.uri]
412
+ v = nil if v.is_a? Array and v.empty?
413
+ v
373
414
  end
374
415
 
375
416
  def confidence(compound)
376
- feature_uri = @data_entries[compound.uri].collect{|f,v| f if f.match(/prediction/)}.compact.first
377
- @features[feature_uri][OT.confidence]
417
+ @data_entries[compound.uri].collect{|f,v| v.first if f.match(/confidence/)}.compact.first if @data_entries[compound.uri]
378
418
  end
379
419
 
380
420
  def descriptors(compound)
@@ -382,12 +422,11 @@ module OpenTox
382
422
  end
383
423
 
384
424
  def measured_activities(compound)
385
- source = @metadata[OT.hasSource]
386
- @data_entries[compound.uri].collect{|f,v| v if f.match(/#{source}/)}.compact.flatten
425
+ @data_entries[compound.uri].collect{|f,v| v if f.match(/#{@metadata[OT.hasSource]}/)}.compact.flatten if @data_entries[compound.uri]
387
426
  end
388
427
 
389
428
  def neighbors(compound)
390
- @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact
429
+ @data_entries[compound.uri].collect{|f,v| @features[f] if f.match(/neighbor/)}.compact if @data_entries[compound.uri]
391
430
  end
392
431
 
393
432
  # def errors(compound)
@@ -27,7 +27,7 @@ end
27
27
  Ohm.connect :thread_safe => true
28
28
 
29
29
  # load mail settings for error messages
30
- load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
30
+ #load File.join config_dir,"mail.rb" if File.exists?(File.join config_dir,"mail.rb")
31
31
 
32
32
  logfile = "#{LOG_DIR}/#{ENV["RACK_ENV"]}.log"
33
33
  #LOGGER = OTLogger.new(logfile,'daily') # daily rotation
@@ -40,8 +40,8 @@ else
40
40
  end
41
41
 
42
42
  # Regular expressions for parsing classification data
43
- TRUE_REGEXP = /^(true|active|1|1.0|tox)$/i
44
- FALSE_REGEXP = /^(false|inactive|0|0.0|low tox)$/i
43
+ TRUE_REGEXP = /^(true|active|1|1.0|tox|activating)$/i
44
+ FALSE_REGEXP = /^(false|inactive|0|0.0|low tox|deactivating)$/i
45
45
 
46
46
  # Task durations
47
47
  DEFAULT_TASK_MAX_DURATION = 36000
@@ -2,6 +2,8 @@ module OpenTox
2
2
  class Feature
3
3
  include OpenTox
4
4
 
5
+ attr_accessor :subjectid
6
+
5
7
  # Find a feature
6
8
  # @param [String] uri Feature URI
7
9
  # @return [OpenTox::Task] Feature object
@@ -13,31 +15,31 @@ module OpenTox
13
15
  else
14
16
  feature.add_metadata Parser::Owl::Dataset.new(uri).load_metadata
15
17
  end
18
+ feature.subjectid = subjectid
16
19
  feature
17
20
  end
18
-
21
+
19
22
  # provides feature type, possible types are "regression" or "classification"
20
23
  # @return [String] feature type, unknown if OT.isA property is unknown/ not set
21
24
  def feature_type
25
+ raise OpenTox::BadRequestError.new("rdf type of feature '"+uri.to_s+"' not set") unless metadata[RDF.type]
22
26
  if metadata[RDF.type].flatten.include?(OT.NominalFeature)
23
27
  "classification"
24
28
  elsif metadata[RDF.type].flatten.include?(OT.NumericFeature)
25
29
  "regression"
26
- else
27
- #"unknown"
28
- metadata[RDF.type].inspect
29
- end
30
- =begin
31
- case metadata[RDF.type]
32
- when /NominalFeature/
33
- "classification"
34
- when /NumericFeature/
35
- "regression"
30
+ elsif metadata[OWL.sameAs]
31
+ metadata[OWL.sameAs].each do |f|
32
+ begin
33
+ type = Feature.find(f, subjectid).feature_type
34
+ return type unless type=="unknown"
35
+ rescue => ex
36
+ LOGGER.warn "could not load same-as-feature '"+f.to_s+"' for feature '"+uri.to_s+"' : "+ex.message.to_s
37
+ end
38
+ end
39
+ "unknown"
36
40
  else
37
41
  "unknown"
38
42
  end
39
- =end
40
43
  end
41
-
42
44
  end
43
45
  end
@@ -81,7 +81,7 @@ helpers do
81
81
  when "css"
82
82
  @accept = 'text/css'
83
83
  else
84
- # halt 404, "File format #{extension} not supported."
84
+ # raise OpenTox::NotFoundError.new "File format #{extension} not supported."
85
85
  end
86
86
  end
87
87
  end
@@ -94,4 +94,3 @@ before do
94
94
  protected!(@subjectid)
95
95
  end
96
96
  end
97
-
@@ -23,7 +23,7 @@ module OpenTox
23
23
  # Generic OpenTox model class for all API compliant services
24
24
  class Generic
25
25
  include Model
26
-
26
+
27
27
  # Find Generic Opentox Model via URI, and loads metadata, could raise NotFound/NotAuthorized error
28
28
  # @param [String] uri Model URI
29
29
  # @return [OpenTox::Model::Generic] Model instance
@@ -34,42 +34,75 @@ module OpenTox
34
34
  raise "could not load model metadata '"+uri.to_s+"'" if model.metadata==nil or model.metadata.size==0
35
35
  model
36
36
  end
37
-
38
- # provides feature type, possible types are "regression" or "classification"
39
- # @return [String] feature type, "unknown" if type could not be estimated
37
+
38
+ # provides feature type, possible types are "regression" or "classification"
39
+ # @return [String] feature type, "unknown" if type could not be estimated
40
40
  def feature_type(subjectid=nil)
41
- return @feature_type if @feature_type
42
-
43
- # dynamically perform restcalls if necessary
44
- load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
45
- algorithm = OpenTox::Algorithm::Generic.find(@metadata[OT.algorithm], subjectid)
46
- algorithm_title = algorithm ? algorithm.metadata[DC.title] : nil
47
- algorithm_type = algorithm ? algorithm.metadata[RDF.type] : nil
48
- dependent_variable = OpenTox::Feature.find( @metadata[OT.dependentVariables],subjectid )
49
- dependent_variable_type = dependent_variable ? dependent_variable.feature_type : nil
50
- type_indicators = [dependent_variable_type, @metadata[RDF.type], @metadata[DC.title], @uri, algorithm_type, algorithm_title].flatten
51
- type_indicators.each do |type|
52
- case type
53
- when /(?i)classification/
54
- @feature_type = "classification"
55
- break
56
- when /(?i)regression/
57
- @feature_type = "regression"
58
- end
41
+ unless @feature_type
42
+ load_predicted_variables( subjectid ) unless @predicted_variable
43
+ @feature_type = OpenTox::Feature.find( @predicted_variable, subjectid ).feature_type
59
44
  end
60
- raise "unknown model "+type_indicators.inspect unless @feature_type
61
45
  @feature_type
62
46
  end
63
-
64
- end
65
47
 
48
+ def predicted_variable( subjectid )
49
+ load_predicted_variables( subjectid ) unless @predicted_variable
50
+ @predicted_variable
51
+ end
52
+
53
+ def predicted_variables( subjectid )
54
+ load_predicted_variables( subjectid, false ) unless @predicted_variables
55
+ @predicted_variables
56
+ end
57
+
58
+ def predicted_confidence( subjectid )
59
+ load_predicted_variables( subjectid ) unless @predicted_confidence
60
+ @predicted_confidence
61
+ end
62
+
63
+ private
64
+ def load_predicted_variables( subjectid=nil, use_confidence=true )
65
+ load_metadata(subjectid) if @metadata==nil or @metadata.size==0 or (@metadata.size==1 && @metadata.values[0]==@uri)
66
+ if @metadata[OT.predictedVariables]
67
+ predictedVariables = @metadata[OT.predictedVariables]
68
+ if predictedVariables.is_a?(Array)
69
+ if (predictedVariables.size==1)
70
+ @predicted_variable = predictedVariables[0]
71
+ elsif (predictedVariables.size>=2)
72
+ # PENDING identify confidence
73
+ if use_confidence
74
+ conf_index = -1
75
+ predictedVariables.size.times do |i|
76
+ f = OpenTox::Feature.find(predictedVariables[i], subjectid)
77
+ conf_index = i if f.metadata[DC.title]=~/(?i)confidence/
78
+ end
79
+ raise "could not estimate predicted variable from model: '"+uri.to_s+
80
+ "', number of predicted-variables==2, but no confidence found" if conf_index==-1
81
+ end
82
+ if (predictedVariables.size==2) && use_confidence
83
+ @predicted_variable = predictedVariables[1-conf_index]
84
+ @predicted_confidence = predictedVariables[conf_index]
85
+ else
86
+ @predicted_variables = predictedVariables
87
+ end
88
+ else
89
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"', number of predicted-variables == 0"
90
+ end
91
+ else
92
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"', predicted-variables is no array"
93
+ end
94
+ end
95
+ raise "could not estimate predicted variable from model: '"+uri.to_s+"'" unless (@predicted_variable || @predicted_variables)
96
+ end
97
+ end
98
+
66
99
  # Lazy Structure Activity Relationship class
67
100
  class Lazar
68
101
 
69
- include Model
70
102
  include Algorithm
103
+ include Model
71
104
 
72
- attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid
105
+ attr_accessor :compound, :prediction_dataset, :features, :effects, :activities, :p_values, :fingerprints, :feature_calculation_algorithm, :similarity_algorithm, :prediction_algorithm, :min_sim, :subjectid, :prop_kernel, :value_map, :nr_hits, :transform, :conf_stdev, :prediction_min_max
73
106
 
74
107
  def initialize(uri=nil)
75
108
 
@@ -78,7 +111,7 @@ module OpenTox
78
111
  else
79
112
  super CONFIG[:services]["opentox-model"]
80
113
  end
81
-
114
+
82
115
  @metadata[OT.algorithm] = File.join(CONFIG[:services]["opentox-algorithm"],"lazar")
83
116
 
84
117
  @features = []
@@ -86,12 +119,18 @@ module OpenTox
86
119
  @activities = {}
87
120
  @p_values = {}
88
121
  @fingerprints = {}
122
+ @value_map = {}
123
+ @prediction_min_max = []
89
124
 
90
125
  @feature_calculation_algorithm = "Substructure.match"
91
126
  @similarity_algorithm = "Similarity.tanimoto"
92
127
  @prediction_algorithm = "Neighbors.weighted_majority_vote"
93
-
128
+
129
+ @nr_hits = false
94
130
  @min_sim = 0.3
131
+ @prop_kernel = false
132
+ @transform = { "class" => "NOP" }
133
+ @conf_stdev = false
95
134
 
96
135
  end
97
136
 
@@ -111,13 +150,25 @@ module OpenTox
111
150
  # Create a new lazar model
112
151
  # @param [optional,Hash] params Parameters for the lazar algorithm (OpenTox::Algorithm::Lazar)
113
152
  # @return [OpenTox::Model::Lazar] lazar model
114
- def self.create(params)
153
+ def self.create(params, waiting_task=nil )
115
154
  subjectid = params[:subjectid]
116
155
  lazar_algorithm = OpenTox::Algorithm::Generic.new File.join( CONFIG[:services]["opentox-algorithm"],"lazar")
117
- model_uri = lazar_algorithm.run(params)
156
+ model_uri = lazar_algorithm.run(params, waiting_task)
118
157
  OpenTox::Model::Lazar.find(model_uri, subjectid)
119
158
  end
120
159
 
160
+ def run( params, accept_header=nil, waiting_task=nil )
161
+ unless accept_header
162
+ if CONFIG[:yaml_hosts].include?(URI.parse(@uri).host)
163
+ accept_header = 'application/x-yaml'
164
+ else
165
+ accept_header = 'application/rdf+xml'
166
+ end
167
+ end
168
+ LOGGER.info "running model "+@uri.to_s+", params: "+params.inspect+", accept: "+accept_header.to_s
169
+ RestClientWrapper.post(@uri,params,{:accept => accept_header},waiting_task).to_s
170
+ end
171
+
121
172
  # Get a parameter value
122
173
  # @param [String] param Parameter name
123
174
  # @return [String] Parameter value
@@ -131,6 +182,7 @@ module OpenTox
131
182
  # @param [optional,OpenTox::Task] waiting_task (can be a OpenTox::Subtask as well), progress is updated accordingly
132
183
  # @return [OpenTox::Dataset] Dataset with predictions
133
184
  def predict_dataset(dataset_uri, subjectid=nil, waiting_task=nil)
185
+
134
186
  @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
135
187
  @prediction_dataset.add_metadata({
136
188
  OT.hasSource => @uri,
@@ -150,7 +202,7 @@ module OpenTox
150
202
  LOGGER.warn "prediction for compound "+compound_uri.to_s+" failed: "+ex.message
151
203
  end
152
204
  end
153
- @prediction_dataset.save(subjectid)
205
+ #@prediction_dataset.save(subjectid)
154
206
  @prediction_dataset
155
207
  end
156
208
 
@@ -164,49 +216,52 @@ module OpenTox
164
216
  features = {}
165
217
 
166
218
  unless @prediction_dataset
167
- #@prediction_dataset = cached_prediction
168
- #return @prediction_dataset if cached_prediction
169
219
  @prediction_dataset = Dataset.create(CONFIG[:services]["opentox-dataset"], subjectid)
170
220
  @prediction_dataset.add_metadata( {
171
221
  OT.hasSource => @uri,
172
222
  DC.creator => @uri,
173
- # TODO: fix dependentVariable
174
223
  DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
175
224
  OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
176
225
  } )
177
226
  end
178
227
 
179
- return @prediction_dataset if database_activity(subjectid)
180
-
181
- neighbors
182
- prediction = eval("#{@prediction_algorithm}(@neighbors,{:similarity_algorithm => @similarity_algorithm, :p_values => @p_values})")
183
-
184
- prediction_feature_uri = File.join( @prediction_dataset.uri, "feature", "prediction", File.basename(@metadata[OT.dependentVariables]),@prediction_dataset.compounds.size.to_s)
185
- # TODO: fix dependentVariable
186
- @prediction_dataset.metadata[OT.dependentVariables] = prediction_feature_uri
187
-
188
- if @neighbors.size == 0
189
- @prediction_dataset.add_feature(prediction_feature_uri, {
190
- RDF.type => [OT.MeasuredFeature],
191
- OT.hasSource => @uri,
192
- DC.creator => @uri,
193
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
194
- OT.error => "No similar compounds in training dataset.",
195
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
196
- })
197
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
228
+ if OpenTox::Feature.find(metadata[OT.dependentVariables]).feature_type == "regression"
229
+ all_activities = []
230
+ all_activities = @activities.values.flatten.collect! { |i| i.to_f }
231
+ @prediction_min_max[0] = (all_activities.to_scale.min/2)
232
+ @prediction_min_max[1] = (all_activities.to_scale.max*2)
233
+ end
198
234
 
199
- else
200
- @prediction_dataset.add_feature(prediction_feature_uri, {
201
- RDF.type => [OT.ModelPrediction],
202
- OT.hasSource => @uri,
203
- DC.creator => @uri,
204
- DC.title => URI.decode(File.basename( @metadata[OT.dependentVariables] )),
205
- OT.prediction => prediction[:prediction],
206
- OT.confidence => prediction[:confidence],
207
- OT.parameters => [{DC.title => "compound_uri", OT.paramValue => compound_uri}]
208
- })
209
- @prediction_dataset.add @compound.uri, prediction_feature_uri, prediction[:prediction]
235
+ unless database_activity(subjectid) # adds database activity to @prediction_dataset
236
+
237
+ neighbors
238
+ prediction = eval("#{@prediction_algorithm} ( { :neighbors => @neighbors,
239
+ :compound => @compound,
240
+ :features => @features,
241
+ :p_values => @p_values,
242
+ :fingerprints => @fingerprints,
243
+ :similarity_algorithm => @similarity_algorithm,
244
+ :prop_kernel => @prop_kernel,
245
+ :value_map => @value_map,
246
+ :nr_hits => @nr_hits,
247
+ :conf_stdev => @conf_stdev,
248
+ :prediction_min_max => @prediction_min_max,
249
+ :transform => @transform } ) ")
250
+
251
+ value_feature_uri = File.join( @uri, "predicted", "value")
252
+ confidence_feature_uri = File.join( @uri, "predicted", "confidence")
253
+
254
+ @prediction_dataset.metadata[OT.dependentVariables] = @metadata[OT.dependentVariables] unless @prediction_dataset.metadata[OT.dependentVariables]
255
+ @prediction_dataset.metadata[OT.predictedVariables] = [value_feature_uri, confidence_feature_uri] unless @prediction_dataset.metadata[OT.predictedVariables]
256
+
257
+ if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
258
+ @prediction_dataset.add @compound.uri, value_feature_uri, @value_map[prediction[:prediction]]
259
+ else
260
+ @prediction_dataset.add @compound.uri, value_feature_uri, prediction[:prediction]
261
+ end
262
+ @prediction_dataset.add @compound.uri, confidence_feature_uri, prediction[:confidence]
263
+ @prediction_dataset.features[value_feature_uri][DC.title] = @prediction_dataset.metadata[DC.title]
264
+ @prediction_dataset.features[confidence_feature_uri][DC.title] = "Confidence"
210
265
 
211
266
  if verbose
212
267
  if @feature_calculation_algorithm == "Substructure.match"
@@ -260,7 +315,6 @@ module OpenTox
260
315
  end
261
316
  n+=1
262
317
  end
263
- # what happens with dataset predictions?
264
318
  end
265
319
  end
266
320
 
@@ -268,33 +322,53 @@ module OpenTox
268
322
  @prediction_dataset
269
323
  end
270
324
 
271
- # Find neighbors and store them as object variable
272
- def neighbors
325
+
273
326
 
327
+ # Find neighbors and store them as object variable, access all compounds for that.
328
+ def neighbors
274
329
  @compound_features = eval("#{@feature_calculation_algorithm}(@compound,@features)") if @feature_calculation_algorithm
275
-
276
330
  @neighbors = []
277
- @fingerprints.each do |training_compound,training_features|
278
- sim = eval("#{@similarity_algorithm}(@compound_features,training_features,@p_values)")
279
- if sim > @min_sim
280
- @activities[training_compound].each do |act|
281
- @neighbors << {
282
- :compound => training_compound,
283
- :similarity => sim,
284
- :features => training_features,
285
- :activity => act
286
- }
287
- end
288
- end
331
+ @fingerprints.keys.each do |training_compound| # AM: access all compounds
332
+ add_neighbor @fingerprints[training_compound].keys, training_compound
289
333
  end
334
+ end
290
335
 
336
+ # Adds a neighbor to @neighbors if it passes the similarity threshold.
337
+ def add_neighbor(training_features, training_compound)
338
+ compound_features_hits = {}
339
+ training_compound_features_hits = {}
340
+ if @nr_hits
341
+ compound_features_hits = @compound.match_hits(@compound_features)
342
+ training_compound_features_hits = @fingerprints[training_compound]
343
+ #LOGGER.debug "dv ------------ training_compound_features_hits:#{training_compound_features_hits.class} #{training_compound_features_hits}"
344
+ end
345
+ params = {}
346
+ params[:nr_hits] = @nr_hits
347
+ params[:compound_features_hits] = compound_features_hits
348
+ params[:training_compound_features_hits] = training_compound_features_hits
349
+
350
+ sim = eval("#{@similarity_algorithm}(training_features, @compound_features, @p_values, params)")
351
+ if sim > @min_sim
352
+ @activities[training_compound].each do |act|
353
+ @neighbors << {
354
+ :compound => training_compound,
355
+ :similarity => sim,
356
+ :features => training_features,
357
+ :activity => act
358
+ }
359
+ end
360
+ end
291
361
  end
292
362
 
293
363
  # Find database activities and store them in @prediction_dataset
294
364
  # @return [Boolean] true if compound has databasse activities, false if not
295
365
  def database_activity(subjectid)
296
366
  if @activities[@compound.uri]
297
- @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
367
+ if OpenTox::Feature.find(metadata[OT.dependentVariables], subjectid).feature_type == "classification"
368
+ @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], @value_map[act] }
369
+ else
370
+ @activities[@compound.uri].each { |act| @prediction_dataset.add @compound.uri, @metadata[OT.dependentVariables], act }
371
+ end
298
372
  @prediction_dataset.add_metadata(OT.hasSource => @metadata[OT.trainingDataset])
299
373
  @prediction_dataset.save(subjectid)
300
374
  true
@@ -303,6 +377,35 @@ module OpenTox
303
377
  end
304
378
  end
305
379
 
380
+ def prediction_features
381
+ [prediction_value_feature,prediction_confidence_feature]
382
+ end
383
+
384
+ def prediction_value_feature
385
+ dependent_uri = @metadata[OT.dependentVariables].first
386
+ feature = OpenTox::Feature.new File.join( @uri, "predicted", "value")
387
+ feature.add_metadata( {
388
+ RDF.type => [OT.ModelPrediction],
389
+ OT.hasSource => @uri,
390
+ DC.creator => @uri,
391
+ DC.title => URI.decode(File.basename( dependent_uri )),
392
+ OWL.sameAs => dependent_uri
393
+ })
394
+ feature
395
+ end
396
+
397
+ def prediction_confidence_feature
398
+ dependent_uri = @metadata[OT.dependentVariables].first
399
+ feature = OpenTox::Feature.new File.join( @uri, "predicted", "confidence")
400
+ feature.add_metadata( {
401
+ RDF.type => [OT.ModelPrediction],
402
+ OT.hasSource => @uri,
403
+ DC.creator => @uri,
404
+ DC.title => "#{URI.decode(File.basename( dependent_uri ))} confidence"
405
+ })
406
+ feature
407
+ end
408
+
306
409
  # Save model at model service
307
410
  def save(subjectid)
308
411
  self.uri = RestClientWrapper.post(@uri,self.to_yaml,{:content_type => "application/x-yaml", :subjectid => subjectid})