opentox-ruby-api-wrapper 1.2.1 → 1.2.2

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.2.1
1
+ 1.2.2
@@ -0,0 +1,18 @@
1
+ #!/usr/bin/env ruby
2
+ require 'rubygems'
3
+ require 'opentox-ruby-api-wrapper'
4
+
5
+ input = YAML.load_file(ARGV[0])
6
+ dataset = OpenTox::Dataset.new
7
+ dataset.title = input[:title]
8
+ dataset.source = input[:source]
9
+ input[:data].each do |c,f|
10
+ f.each do |k,v|
11
+ v.each do |value|
12
+ dataset.add c,k,value
13
+ end
14
+ end
15
+ end
16
+ outfile = File.expand_path(File.join(File.dirname(__FILE__),ARGV[0].sub(/yaml/,'owl')))
17
+ dataset.uri = outfile
18
+ File.open(outfile,'w+'){|f| f.puts dataset.rdf}
@@ -14,6 +14,10 @@ module OpenTox
14
14
  "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri" }
15
15
  }
16
16
  end
17
+
18
+ def self.create_feature_dataset(params)
19
+ RestClient.post params[:feature_generation_uri], :dataset_uri => params[:dataset_uri], :feature_uri => params[:feature_uri]
20
+ end
17
21
  end
18
22
 
19
23
  class Lazar #< OpenTox
@@ -35,5 +39,23 @@ module OpenTox
35
39
  end
36
40
  end
37
41
 
42
+ class Similarity
43
+
44
+ def self.weighted_tanimoto(fp_a,fp_b,p)
45
+ common_features = fp_a & fp_b
46
+ all_features = fp_a + fp_b
47
+ common_p_sum = 0.0
48
+ if common_features.size > 0
49
+ common_features.each{|f| common_p_sum += p[f]}
50
+ all_p_sum = 0.0
51
+ all_features.each{|f| all_p_sum += p[f]}
52
+ common_p_sum/all_p_sum
53
+ else
54
+ 0.0
55
+ end
56
+ end
57
+
58
+ end
59
+
38
60
  end
39
61
  end
@@ -3,12 +3,55 @@ module OpenTox
3
3
  class Dataset
4
4
  include Owl
5
5
 
6
- #attr_accessor :model
7
-
8
6
  def initialize
9
7
  super
10
8
  end
11
9
 
10
+ # create/add to entry from uris or Redland::Resources
11
+ def add(compound,feature,value)
12
+ compound = self.find_or_create_compound compound unless compound.class == Redland::Resource
13
+ feature = self.find_or_create_feature feature unless feature.class == Redland::Resource
14
+ data_entry = @model.subject OT['compound'], compound
15
+ if data_entry.nil?
16
+ data_entry = @model.create_resource
17
+ @model.add data_entry, RDF['type'], OT["DataEntry"]
18
+ @model.add data_entry, OT['compound'], compound
19
+ end
20
+ values = @model.create_resource
21
+ @model.add data_entry, OT['values'], values
22
+ @model.add values, RDF['type'], OT['FeatureValue']
23
+ @model.add values, OT['feature'], feature
24
+ @model.add values, OT['value'], value.to_s
25
+ end
26
+
27
+ def add_tuple(compound,tuple)
28
+ compound = self.find_or_create_compound compound unless compound.class == Redland::Resource
29
+ data_entry = @model.subject OT['compound'], compound
30
+ if data_entry.nil?
31
+ data_entry = @model.create_resource
32
+ @model.add data_entry, RDF['type'], OT["DataEntry"]
33
+ @model.add data_entry, OT['compound'], compound
34
+ end
35
+ @model.add data_entry, OT['values'], tuple
36
+ end
37
+
38
+ def create_tuple(feature,t)
39
+ feature = self.find_or_create_feature feature unless feature.class == Redland::Resource
40
+ tuple = @model.create_resource
41
+ @model.add tuple, RDF['type'], OT["Tuple"]
42
+ @model.add tuple, OT['feature'], feature
43
+ t.each do |name,value|
44
+ f = self.find_or_create_feature name unless name.class == Redland::Resource
45
+ complex_value = @model.create_resource
46
+ feature = self.find_or_create_feature(name)
47
+ @model.add tuple, OT['complexValue'], complex_value
48
+ @model.add complex_value, RDF['type'], OT["FeatureValue"]
49
+ @model.add complex_value, OT['feature'], f
50
+ @model.add complex_value, OT['value'], value.to_s
51
+ end
52
+ tuple
53
+ end
54
+
12
55
  # find or create a new compound and return the resource
13
56
  def find_or_create_compound(uri)
14
57
  compound = @model.subject(DC["identifier"], uri)
@@ -21,74 +64,18 @@ module OpenTox
21
64
  end
22
65
 
23
66
  # find or create a new feature and return the resource
24
- def find_or_create_feature(f)
25
- feature = @model.subject(DC["title"], f[:name].to_s)
67
+ def find_or_create_feature(uri)
68
+ feature = @model.subject(DC["identifier"], uri)
26
69
  if feature.nil?
27
70
  feature = @model.create_resource
28
71
  @model.add feature, RDF['type'], OT["Feature"]
29
- @model.add feature, DC["identifier"], File.join("feature",feature.to_s.gsub(/[()]/,'')) # relative uri as we don know the final uri
30
- @model.add feature, DC["title"], f[:name].to_s
31
- @model.add feature, OT['hasSource'], f[:source].to_s if f[:source]
72
+ @model.add feature, DC["identifier"], uri
73
+ @model.add feature, DC["title"], File.basename(uri).split(/#/)[1]
74
+ @model.add feature, DC['source'], uri
32
75
  end
33
76
  feature
34
77
  end
35
78
 
36
- # find or create a new value and return the resource
37
- def find_or_create_value(v)
38
- value = @model.subject OT["value"], v.to_s
39
- if value.nil?
40
- value = @model.create_resource
41
- @model.add value, RDF['type'], OT["FeatureValue"]
42
- @model.add value, OT["value"], v.to_s
43
- end
44
- value
45
- end
46
-
47
- def tuple?(t)
48
- statements = []
49
- has_tuple = true
50
- t.each do |name,v|
51
- feature = self.find_or_create_feature(:name => name)
52
- value = self.find_or_create_value(v)
53
- tuple = @model.subject(feature,value)
54
- has_tuple = false if tuple.nil?
55
- statements << [tuple,feature,value]
56
- end
57
- tuples_found = statements.collect{|s| s[0]}.uniq
58
- has_tuple = false unless tuples_found.size == 1
59
- has_tuple
60
- end
61
-
62
- def create_tuple(t)
63
- tuple = @model.create_resource
64
- @model.add tuple, RDF['type'], OT["Tuple"]
65
- t.each do |name,value|
66
- feature = self.find_or_create_feature(:name => name)
67
- value = self.find_or_create_value(value)
68
- pair = @model.create_resource
69
- @model.add tuple, OT['tuple'], pair
70
- @model.add pair, OT['feature'], feature
71
- @model.add pair, OT['value'], value
72
- end
73
- tuple
74
- end
75
-
76
- def find_or_create_tuple(t)
77
- if self.tuple?(t)
78
- t
79
- else
80
- self.create_tuple(t)
81
- end
82
- end
83
-
84
- def add_data_entry(compound,feature,value)
85
- data_entry = @model.create_resource
86
- @model.add data_entry, RDF['type'], OT["DataEntry"]
87
- @model.add data_entry, OT['compound'], compound
88
- @model.add data_entry, OT['feature'], feature
89
- @model.add data_entry, OT['values'], value
90
- end
91
-
92
79
  def self.create(data, content_type = 'application/rdf+xml')
93
80
  uri = RestClient.post @@config[:services]["opentox-dataset"], data, :content_type => content_type
94
81
  dataset = Dataset.new
@@ -98,7 +85,7 @@ module OpenTox
98
85
 
99
86
  def self.find(uri)
100
87
  begin
101
- RestClient.get uri # check if the resource is available
88
+ RestClient.get uri, :accept => 'application/rdf+xml' # check if the resource is available
102
89
  dataset = Dataset.new
103
90
  dataset.read uri.to_s
104
91
  dataset
@@ -108,26 +95,61 @@ module OpenTox
108
95
  end
109
96
 
110
97
  def features
98
+ features = []
99
+ @model.subjects(RDF['type'], OT["Feature"]).each do |feature_node|
100
+ features << @model.object(feature_node, DC["identifier"])#
101
+ end
102
+ features
103
+ end
104
+
105
+ def data
106
+ data = {}
107
+ @model.subjects(RDF['type'], OT['DataEntry']).each do |data_entry|
108
+ compound_node = @model.object(data_entry, OT['compound'])
109
+ @model.find(compound_node, OT['identifier'],nil) {|s,p,o| puts o.to_s}
110
+ compound_uri = @model.object(compound_node, DC['identifier']).to_s
111
+ data[compound_uri] = [] unless data[compound_uri]
112
+ @model.find(data_entry, OT['values'], nil) do |s,p,values|
113
+ entry = {}
114
+ feature_node = @model.object values, OT['feature']
115
+ feature_uri = @model.object(feature_node, DC['identifier']).to_s
116
+ # TODO simple features
117
+ type = @model.object(values, RDF['type'])
118
+ if type == OT['FeatureValue']
119
+ #entry[feature_uri] = [] unless entry[feature_uri]
120
+ entry[feature_uri] = @model.object(values, OT['value']).to_s
121
+ elsif type == OT['Tuple']
122
+ entry[feature_uri] = {} unless entry[feature_uri]
123
+ @model.find(values, OT['complexValue'],nil) do |s,p,complex_value|
124
+ name_node = @model.object complex_value, OT['feature']
125
+ name = @model.object(name_node, DC['title']).to_s
126
+ value = @model.object(complex_value, OT['value']).to_s
127
+ entry[feature_uri][name] = value
128
+ end
129
+ end
130
+ data[compound_uri] << entry
131
+ end
132
+ end
133
+ data
111
134
  end
112
135
 
113
- def feature_values(uri)
136
+ def feature_values(feature_uri)
114
137
  features = {}
115
- feature = @model.subject(DC["identifier"],uri)
138
+ feature = @model.subject(DC["identifier"],feature_uri)
116
139
  @model.subjects(RDF['type'], OT["Compound"]).each do |compound_node|
117
140
  compound = @model.object(compound_node, DC["identifier"]).to_s.sub(/^\[(.*)\]$/,'\1')
118
141
  features[compound] = [] unless features[compound]
119
- @model.subjects(OT['compound'], compound_node).each do |data_entry|
120
- if feature == @model.object(data_entry, OT['feature'])
121
- values_node = @model.object(data_entry, OT['values'])
122
- @model.find(values_node, OT['value'], nil) do |s,p,value|
123
- case value.to_s
124
- when "true"
125
- features[compound] << true
126
- when "false"
127
- features[compound] << false
128
- else
129
- features[compound] << value.to_s
130
- end
142
+ data_entry = @model.subject(OT['compound'], compound_node)
143
+ @model.find( data_entry, OT['values'], nil ) do |s,p,values|
144
+ if feature == @model.object(values, OT['feature'])
145
+ value = @model.object(values, OT['value'])
146
+ case value.to_s
147
+ when "true"
148
+ features[compound] << true
149
+ when "false"
150
+ features[compound] << false
151
+ else
152
+ features[compound] << value.to_s
131
153
  end
132
154
  end
133
155
  end
@@ -135,6 +157,7 @@ module OpenTox
135
157
  features
136
158
  end
137
159
 
160
+ =begin
138
161
  def tuples
139
162
  tuples = []
140
163
  @model.subjects(RDF['type'], OT["Tuple"]).each do |t|
@@ -172,11 +195,12 @@ module OpenTox
172
195
  #puts values_node
173
196
  end
174
197
  end
198
+ =end
175
199
 
176
200
  def compounds
177
201
  compounds = []
178
202
  @model.subjects(RDF['type'], OT["Compound"]).each do |compound_node|
179
- compounds << @model.object(compound_node, DC["identifier"])#.to_s.sub(/^\[(.*)\]$/,'\1')
203
+ compounds << @model.object(compound_node, DC["identifier"]).to_s
180
204
  end
181
205
  compounds
182
206
  end
@@ -190,60 +214,19 @@ module OpenTox
190
214
  RestClient.post(@@config[:services]["opentox-dataset"], self.rdf, :content_type => "application/rdf+xml").to_s
191
215
  end
192
216
 
217
+ def to_yaml
218
+ {
219
+ :uri => self.uri,
220
+ :opentox_class => self.owl_class,
221
+ :title => self.title,
222
+ :source => self.source,
223
+ :identifier => self.identifier,
224
+ :compounds => self.compounds.collect{|c| c.to_s.to_s.sub(/^\[(.*)\]$/,'\1')},
225
+ :features => self.features.collect{|f| f.to_s },
226
+ :data_entries => self.data_entries,
227
+ }.to_yaml
228
+ end
229
+
193
230
  end
194
231
 
195
232
  end
196
-
197
-
198
- # def tanimoto(dataset)
199
- # RestClient.get(File.join(@uri,'tanimoto',dataset.path))
200
- # end
201
- #
202
- # def weighted_tanimoto(dataset)
203
- # RestClient.get(File.join(@uri,'weighted_tanimoto',dataset.path))
204
- # end
205
- =begin
206
- def data_entries
207
- data = {}
208
- @model.subjects(RDF['type'], OT["Compound"]).each do |compound_node|
209
- compound = @model.object(compound_node, DC["identifier"]).to_s#.sub(/^\[(.*)\]$/,'\1')
210
- #compound = OpenTox::Compound.new(:inchi => compound).smiles
211
- data[compound] = [] unless data[compound]
212
- #puts compound
213
- @model.subjects(OT['compound'], compound_node).each do |data_entry|
214
- feature_node = @model.object(data_entry, OT['feature'])
215
- feature = @model.object(feature_node, DC["identifier"]).to_s
216
- values_node = @model.object(data_entry, OT['values'])
217
- type = @model.object(values_node,RDF['type']).to_s
218
- case type
219
- when /FeatureValue/
220
- @model.find(values_node, OT['value'], nil) do |s,p,value|
221
- case value.to_s
222
- when "true"
223
- data[compound] << {feature => true}
224
- when "false"
225
- data[compound] << {feature => false}
226
- else
227
- data[compound] << {feature => value.to_s}
228
- end
229
- end
230
- when /Tuple/ # this is really slow
231
- t = {}
232
- @model.find(values_node, OT['tuple'], nil) do |s,p,tuple|
233
- @model.find(tuple, OT['feature'], nil) do |s,p,feature|
234
- @name = @model.object(feature,DC['title']).to_s
235
- end
236
- @model.find(tuple, OT['value'], nil) do |s,p,value|
237
- v = @model.object(value,OT['value']).to_s
238
- t[@name] = v
239
- #print @name + ": "
240
- #puts v
241
- end
242
- end
243
- data[compound] << t
244
- end
245
- end
246
- end
247
- data
248
- end
249
- =end
@@ -1,16 +1,18 @@
1
1
  module OpenTox
2
2
  module Model
3
+
3
4
  class Lazar
4
5
  include Owl
6
+
7
+ attr_accessor :dataset, :predictions
5
8
 
6
9
  # Create a new prediction model from a dataset
7
- def initialize
8
- super
9
- end
10
-
11
- def read_yaml(id,yaml)
12
- @lazar = YAML.load yaml
13
- self.identifier = File.join(@@config[:services]["opentox-model"],'lazar',id)
10
+ def initialize(yaml)
11
+ super()
12
+ id = File.basename(yaml,'.yaml')
13
+ # TODO Untyped Individual: http://localhost:4003/lazar/{id} ????
14
+ @lazar = YAML.load_file yaml
15
+ self.uri = File.join(@@config[:services]["opentox-model"],'lazar',id)
14
16
  self.title = "lazar model for #{@lazar[:endpoint]}"
15
17
  self.source = "http://github.com/helma/opentox-model"
16
18
  self.parameters = {
@@ -18,19 +20,24 @@ module OpenTox
18
20
  "Feature URI for dependent variable" => { :scope => "mandatory", :value => "feature_uri=#{@lazar[:endpoint]}" },
19
21
  "Feature generation URI" => { :scope => "mandatory", :value => "feature_generation_uri=" } #TODO write to yaml
20
22
  }
21
- self.algorithm = File.join(@@config[:services]["opentox-model"],"lazar")
23
+ self.algorithm = File.join(@@config[:services]["opentox-algorithm"],"lazar")
22
24
  self.trainingDataset = @lazar[:activity_dataset]
23
25
  self.dependentVariables = @lazar[:endpoint]
24
- self.predictedVariables = @lazar[:endpoint] + " lazar prediction"
26
+ self.independentVariables = "http://localhost:4002/fminer#BBRC_representative" # TODO read this from dataset
27
+ self.predictedVariables = @lazar[:endpoint] #+ " lazar prediction"
28
+ @dataset = OpenTox::Dataset.new
29
+ @predictions = {}
25
30
  end
26
31
 
27
32
  def self.find(uri)
33
+ =begin
28
34
  begin
29
35
  YAML.load(RestClient.get uri)
30
36
  Lazar.new uri
31
37
  rescue
32
38
  halt 404, "Model #{uri} not found."
33
39
  end
40
+ =end
34
41
  end
35
42
 
36
43
  def self.find_all
@@ -42,6 +49,65 @@ module OpenTox
42
49
  RestClient.post(@uri, :compound_uri => compound.uri)
43
50
  end
44
51
 
52
+ def database_activity?(compound_uri)
53
+ # find database activities
54
+ db_activities = @lazar[:activities][compound_uri]
55
+ if db_activities
56
+ c = @dataset.find_or_create_compound(compound_uri)
57
+ f = @dataset.find_or_create_feature(@lazar[:endpoint])
58
+ v = db_activities.join(',')
59
+ @dataset.add c,f,v
60
+ @predictions[compound_uri] = { @lazar[:endpoint] => {:measured_activities => db_activities}}
61
+ true
62
+ else
63
+ false
64
+ end
65
+ end
66
+
67
+ def classify(compound_uri)
68
+
69
+ compound = OpenTox::Compound.new(:uri => compound_uri)
70
+ compound_matches = compound.match @lazar[:features]
71
+
72
+ conf = 0.0
73
+ neighbors = []
74
+ classification = nil
75
+
76
+ @lazar[:fingerprints].each do |uri,matches|
77
+
78
+ sim = OpenTox::Algorithm::Similarity.weighted_tanimoto(compound_matches,matches,@lazar[:p_values])
79
+ if sim > 0.3
80
+ neighbors << uri
81
+ @lazar[:activities][uri].each do |act|
82
+ case act.to_s
83
+ when 'true'
84
+ conf += OpenTox::Utils.gauss(sim)
85
+ when 'false'
86
+ conf -= OpenTox::Utils.gauss(sim)
87
+ end
88
+ end
89
+ end
90
+ end
91
+
92
+ conf = conf/neighbors.size
93
+ if conf > 0.0
94
+ classification = true
95
+ elsif conf < 0.0
96
+ classification = false
97
+ end
98
+
99
+ compound = @dataset.find_or_create_compound(compound_uri)
100
+ feature = @dataset.find_or_create_feature(@lazar[:endpoint])
101
+ tuple = @dataset.create_tuple(feature,{ 'lazar#classification' => classification, 'lazar#confidence' => conf})
102
+ @dataset.add_tuple compound,tuple
103
+ @predictions[compound_uri] = { @lazar[:endpoint] => { :lazar_prediction => {
104
+ :classification => classification,
105
+ :confidence => conf,
106
+ :neighbors => neighbors,
107
+ :features => compound_matches
108
+ } } }
109
+ end
110
+
45
111
  def self.base_uri
46
112
  @@config[:services]["opentox-model"]
47
113
  end
@@ -54,50 +120,35 @@ module OpenTox
54
120
  YAML.load(RestClient.get uri)[:endpoint]
55
121
  end
56
122
 
57
- end
58
- end
59
-
60
-
61
- =begin
62
- module Model
63
-
64
- class LazarClassification < OpenTox
65
-
66
-
67
- end
68
-
69
- end
70
-
71
- module Prediction
72
-
73
- module Classification
74
-
75
- class Lazar < OpenTox
76
-
77
- def initialize(params)
78
- super(params[:uri])
79
- end
80
-
81
- def classification
82
- YAML.load(RestClient.get(@uri))[:classification]
83
- end
84
-
85
- def confidence
86
- YAML.load(RestClient.get(@uri))[:confidence]
87
- end
123
+ def algorithm=(algorithm)
124
+ me = @model.subject(RDF['type'],OT[self.owl_class])
125
+ @model.add me, OT['algorithm'], Redland::Uri.new(algorithm) # untyped individual comes from this line, why??
126
+ @model.add Redland::Uri.new(algorithm), RDF['type'], OT['Algorithm']
127
+ end
88
128
 
89
- def neighbors
90
- RestClient.get @uri + '/neighbors'
91
- end
129
+ def trainingDataset=(trainingDataset)
130
+ me = @model.subject(RDF['type'],OT[self.owl_class])
131
+ @model.add me, OT['trainingDataset'], Redland::Uri.new(trainingDataset) # untyped individual comes from this line, why??
132
+ @model.add Redland::Uri.new(trainingDataset), RDF['type'], OT['Dataset']
133
+ end
92
134
 
93
- def features
94
- RestClient.get @uri + '/features'
95
- end
135
+ def dependentVariables=(dependentVariables)
136
+ me = @model.subject(RDF['type'],OT[self.owl_class])
137
+ @model.add me, OT['dependentVariables'], Redland::Uri.new(dependentVariables) # untyped individual comes from this line, why??
138
+ @model.add Redland::Uri.new(dependentVariables), RDF['type'], OT['Feature']
139
+ end
96
140
 
141
+ def independentVariables=(independentVariables)
142
+ me = @model.subject(RDF['type'],OT[self.owl_class])
143
+ @model.add me, OT['independentVariables'], Redland::Uri.new(independentVariables) # untyped individual comes from this line, why??
144
+ @model.add Redland::Uri.new(independentVariables), RDF['type'], OT['Feature']
97
145
  end
98
146
 
147
+ def predictedVariables=(predictedVariables)
148
+ me = @model.subject(RDF['type'],OT[self.owl_class])
149
+ @model.add me, OT['predictedVariables'], Redland::Uri.new(predictedVariables) # untyped individual comes from this line, why??
150
+ @model.add Redland::Uri.new(predictedVariables), RDF['type'], OT['Feature']
151
+ end
99
152
  end
100
-
101
153
  end
102
- =end
103
154
  end