lazar-gui 1.1.3 → 1.3.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/FAQ.md +22 -2
- data/Gemfile +2 -1
- data/README.md +5 -4
- data/VERSION +1 -1
- data/application.rb +151 -66
- data/bin/lazar-start.sh +2 -2
- data/lazar-gui.gemspec +6 -4
- data/public/javascripts/google_analytics_lazar.js +7 -0
- data/qmrf_report.rb +254 -0
- data/tmp/.gitignore +2 -0
- data/unicorn.rb +1 -6
- data/views/batch.haml +83 -74
- data/views/error.haml +1 -2
- data/views/info.haml +1 -1
- data/views/layout.haml +16 -11
- data/views/model_details.haml +157 -137
- data/views/neighbors.haml +2 -2
- data/views/predict.haml +17 -10
- data/views/prediction.haml +28 -20
- data/views/style.scss +2 -4
- metadata +39 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 41811630fcbcad38d926cc7be7a0bf15f5a2f776
|
4
|
+
data.tar.gz: 650146eee1929807db04ac7e0e0a71cd527af9a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fcc1ee841ecba46e71b425033933836f74f6138b5df5e2c4defddfbe851c5559150fd3032cea96a179c01dc2efd46959fb63e01002742e2a9f6255925b03abf0
|
7
|
+
data.tar.gz: 81da5cb2d840cd315f732ec54f90c98cdc69353fa85988facbf366029ec52878d4e613a78ba08c20c8b1e367022cbfcac2312407590deb384f4665f0ec15f37d
|
data/FAQ.md
CHANGED
@@ -1,6 +1,6 @@
|
|
1
|
-
## lazar Frequently Asked Questions
|
1
|
+
## *lazar* Frequently Asked Questions
|
2
2
|
|
3
|
-
####The
|
3
|
+
#### The *lazar* program's interface has changed, and I am not sure how to use the information given with regard to its confidence. In the former version, I would consider a confidence value higher than 0.025 as reliable. But now, there is no such parameter in the prediction results. How can I consider a prediction as presenting high or low confidence?
|
4
4
|
|
5
5
|
In the past many users had problems to interpret the confidence level,
|
6
6
|
for this reason we provide now the probabilities that the prediction
|
@@ -16,3 +16,23 @@ activities of similar compounds.
|
|
16
16
|
Probabilities are calculated from the activities and similarities of
|
17
17
|
neighbors, please make sure to inspect the neighbors list for any
|
18
18
|
inconsistencies that might affect the prediction.
|
19
|
+
|
20
|
+
#### How can I use *lazar* locally on my computer
|
21
|
+
If you are familiar with docker, you can use one of our docker images to run lazar locally:
|
22
|
+
https://hub.docker.com/r/insilicotox/lazar
|
23
|
+
https://hub.docker.com/r/insilicotox/nano-lazar
|
24
|
+
|
25
|
+
If you want to install lazar/nano-lazar without docker you should know how to use UNIX/Linux and the Ruby programming language. Source code and brief installation instructions for the GUIs is available at:
|
26
|
+
|
27
|
+
https://github.com/opentox/lazar-gui
|
28
|
+
https://github.com/opentox/nano-lazar
|
29
|
+
|
30
|
+
You can also use just the libraries from the command line:
|
31
|
+
|
32
|
+
https://github.com/opentox/lazar
|
33
|
+
|
34
|
+
Documentation is available at:
|
35
|
+
|
36
|
+
http://www.rubydoc.info/gems/lazar
|
37
|
+
|
38
|
+
lazar depends on a couple of external libraries and programs, that could be difficult to install. Due to limited resources we cannot provide support, please use the docker version if you cannot manage it on your own.
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -5,11 +5,12 @@ Installation:
|
|
5
5
|
-------------
|
6
6
|
bundle install
|
7
7
|
|
8
|
-
|
8
|
+
Service start:
|
9
9
|
------
|
10
|
-
sudo
|
11
|
-
|
10
|
+
sudo mongod &
|
11
|
+
R CMD Rserve --vanilla &
|
12
|
+
unicorn -p 8088 -c unicorn.rb -E production
|
12
13
|
|
13
14
|
Visit:
|
14
15
|
------
|
15
|
-
http://localhost:
|
16
|
+
http://localhost:8088
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.1
|
1
|
+
1.3.1
|
data/application.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
|
-
#require_relative 'helper.rb'
|
2
1
|
require 'rdiscount'
|
2
|
+
require_relative 'qmrf_report.rb'
|
3
3
|
include OpenTox
|
4
4
|
|
5
5
|
|
@@ -13,15 +13,6 @@ configure :development do
|
|
13
13
|
enable :reloader
|
14
14
|
end
|
15
15
|
|
16
|
-
helpers do
|
17
|
-
class Numeric
|
18
|
-
def percent_of(n)
|
19
|
-
self.to_f / n.to_f * 100.0
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
end
|
24
|
-
|
25
16
|
before do
|
26
17
|
@version = File.read("VERSION").chomp
|
27
18
|
end
|
@@ -30,6 +21,11 @@ not_found do
|
|
30
21
|
redirect to('/predict')
|
31
22
|
end
|
32
23
|
|
24
|
+
error do
|
25
|
+
@error = request.env['sinatra.error']
|
26
|
+
haml :error
|
27
|
+
end
|
28
|
+
|
33
29
|
get '/?' do
|
34
30
|
redirect to('/predict')
|
35
31
|
end
|
@@ -38,7 +34,12 @@ get '/predict/?' do
|
|
38
34
|
@models = OpenTox::Model::Validation.all
|
39
35
|
@models = @models.delete_if{|m| m.model.name =~ /\b(Net cell association)\b/}
|
40
36
|
@endpoints = @models.collect{|m| m.endpoint}.sort.uniq
|
41
|
-
@models.count
|
37
|
+
if @models.count > 0
|
38
|
+
rodent_index = 0
|
39
|
+
@models.each_with_index{|model,idx| rodent_index = idx if model.species =~ /Rodent/}
|
40
|
+
@models.insert(rodent_index-1,@models.delete_at(rodent_index))
|
41
|
+
end
|
42
|
+
@models.count > 0 ? (haml :predict) : (haml :info)
|
42
43
|
end
|
43
44
|
|
44
45
|
get '/predict/modeldetails/:model' do
|
@@ -73,43 +74,10 @@ get '/predict/dataset/:name' do
|
|
73
74
|
csv
|
74
75
|
end
|
75
76
|
|
76
|
-
get '/predict
|
77
|
+
get '/predict/:tmppath/:filename/?' do
|
77
78
|
response['Content-Type'] = "text/csv"
|
78
|
-
|
79
|
-
|
80
|
-
compound = key
|
81
|
-
smiles = compound.smiles
|
82
|
-
values.each do |array|
|
83
|
-
model = array[0]
|
84
|
-
type = model.model.class.to_s.match("Classification") ? "Classification" : "Regression"
|
85
|
-
prediction = array[1]
|
86
|
-
endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})"
|
87
|
-
if prediction[:confidence] == "measured"
|
88
|
-
if prediction[:value].is_a?(Array)
|
89
|
-
prediction[:value].each do |value|
|
90
|
-
pred = value.numeric? ? "#{value} (#{model.unit}), #{compound.mmol_to_mg(value.delog10)} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : value
|
91
|
-
int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
|
92
|
-
interval = (int.nil? ? "--" : "#{int[1].delog10} - #{int[0].delog10} (#{model.unit})")
|
93
|
-
@csv += "\"#{smiles}\",\"#{endpoint}\",\"#{type}\",\"#{pred}\",\"#{interval}\"\n"
|
94
|
-
end
|
95
|
-
else
|
96
|
-
pred = prediction[:value].numeric? ? "#{prediction[:value]} (#{model.unit}), #{compound.mmol_to_mg(prediction[:value].delog10)} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
|
97
|
-
confidence = "measured activity"
|
98
|
-
end
|
99
|
-
elsif prediction[:neighbors].size > 0
|
100
|
-
type = model.model.class.to_s.match("Classification") ? "Classification" : "Regression"
|
101
|
-
pred = prediction[:value].numeric? ? "#{prediction[:value].delog10} (#{model.unit}), #{compound.mmol_to_mg(prediction[:value].delog10)} #{(model.unit =~ /\b(mol\/L)\b/) ? "(mg/L)" : "(mg/kg_bw/day)"}" : prediction[:value]
|
102
|
-
int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
|
103
|
-
interval = (int.nil? ? "--" : "#{int[1].delog10} - #{int[0].delog10} (#{model.unit})")
|
104
|
-
else
|
105
|
-
type = ""
|
106
|
-
pred = "Not enough similar compounds in training dataset."
|
107
|
-
interval = ""
|
108
|
-
end
|
109
|
-
@csv += "\"#{smiles}\",\"#{endpoint}\",\"#{type}\",\"#{pred}\",\"#{interval}\"\n" unless prediction[:value].is_a?(Array)
|
110
|
-
end
|
111
|
-
end
|
112
|
-
@csv
|
79
|
+
path = "/tmp/#{params[:tmppath]}"
|
80
|
+
send_file path, :filename => "lazar_batch_prediction_#{params[:filename]}", :type => "text/csv", :disposition => "attachment"
|
113
81
|
end
|
114
82
|
|
115
83
|
post '/predict/?' do
|
@@ -117,8 +85,7 @@ post '/predict/?' do
|
|
117
85
|
# process batch prediction
|
118
86
|
if !params[:fileselect].blank?
|
119
87
|
if params[:fileselect][:filename] !~ /\.csv$/
|
120
|
-
|
121
|
-
return haml :error
|
88
|
+
bad_request_error "Please submit a csv file."
|
122
89
|
end
|
123
90
|
File.open('tmp/' + params[:fileselect][:filename], "w") do |f|
|
124
91
|
f.write(params[:fileselect][:tempfile].read)
|
@@ -129,46 +96,153 @@ post '/predict/?' do
|
|
129
96
|
if input.class == OpenTox::Dataset
|
130
97
|
dataset = OpenTox::Dataset.find input
|
131
98
|
else
|
132
|
-
|
133
|
-
return haml :error
|
99
|
+
bad_request_error "Could not serialize file '#{@filename}'."
|
134
100
|
end
|
135
101
|
rescue
|
136
|
-
|
137
|
-
return haml :error
|
102
|
+
bad_request_error "Could not serialize file '#{@filename}'."
|
138
103
|
end
|
139
104
|
@compounds = dataset.compounds
|
140
105
|
if @compounds.size == 0
|
141
|
-
|
106
|
+
message = dataset[:warnings]
|
142
107
|
dataset.delete
|
143
|
-
|
108
|
+
bad_request_error message
|
144
109
|
end
|
110
|
+
|
111
|
+
# for csv export
|
145
112
|
@batch = {}
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
113
|
+
# for haml table
|
114
|
+
@view = {}
|
115
|
+
|
116
|
+
@compounds.each{|c| @view[c] = []}
|
117
|
+
params[:selection].keys.each do |model_id|
|
118
|
+
model = OpenTox::Model::Validation.find model_id
|
119
|
+
@batch[model] = []
|
120
|
+
@compounds.each_with_index do |compound,idx|
|
150
121
|
prediction = model.predict(compound)
|
151
|
-
@batch[
|
122
|
+
@batch[model] << [compound, prediction]
|
123
|
+
@view[compound] << [model,prediction]
|
152
124
|
end
|
153
125
|
end
|
154
|
-
|
126
|
+
|
127
|
+
@csvhash = {}
|
155
128
|
@warnings = dataset[:warnings]
|
129
|
+
dupEntries = {}
|
130
|
+
delEntries = ""
|
131
|
+
|
132
|
+
# split duplicates and deleted entries
|
133
|
+
@warnings.each do |w|
|
134
|
+
substring = w.match(/line .* of/)
|
135
|
+
unless substring.nil?
|
136
|
+
delEntries += "\"#{w.sub(/\b(tmp\/)\b/,"")}\"\n"
|
137
|
+
end
|
138
|
+
substring = w.match(/rows .* Entries/)
|
139
|
+
unless substring.nil?
|
140
|
+
lines = []
|
141
|
+
substring[0].split(",").each{|s| lines << s[/\d+/]}
|
142
|
+
lines.shift
|
143
|
+
lines.each{|l| dupEntries[l.to_i] = w.split(".").first}
|
144
|
+
end
|
145
|
+
end
|
146
|
+
|
147
|
+
@batch.each_with_index do |hash, idx|
|
148
|
+
@csvhash[idx] = ""
|
149
|
+
model = hash[0]
|
150
|
+
# create header
|
151
|
+
if model.regression?
|
152
|
+
predAunit = "(#{model.unit})"
|
153
|
+
predBunit = "(#{model.unit =~ /mmol\/L/ ? "(mol/L)" : "(mg/kg_bw/day)"})"
|
154
|
+
@csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction #{predAunit}\",\"Prediction #{predBunit}\",\"95% Prediction interval (low) #{predAunit}\",\"95% Prediction interval (high) #{predAunit}\",\"95% Prediction interval (low) #{predBunit}\",\"95% Prediction interval (high) #{predBunit}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
|
155
|
+
else #classification
|
156
|
+
av = model.prediction_feature.accept_values
|
157
|
+
probFirst = av[0].capitalize
|
158
|
+
probLast = av[1].capitalize
|
159
|
+
@csvhash[idx] = "\"ID\",\"Endpoint\",\"Type\",\"Unique SMILES\",\"Prediction\",\"predProbability#{probFirst}\",\"predProbability#{probLast}\",\"inApplicabilityDomain\",\"inTrainningSet\",\"Note\"\n"
|
160
|
+
end
|
161
|
+
values = hash[1]
|
162
|
+
dupEntries.keys.each{|k| values.insert(k-1, dupEntries[k])}.compact!
|
163
|
+
|
164
|
+
values.each_with_index do |array, id|
|
165
|
+
type = (model.regression? ? "Regression" : "Classification")
|
166
|
+
endpoint = "#{model.endpoint.gsub('_', ' ')} (#{model.species})"
|
167
|
+
|
168
|
+
if id == 0
|
169
|
+
@csvhash[idx] += delEntries unless delEntries.blank?
|
170
|
+
end
|
171
|
+
unless array.kind_of? String
|
172
|
+
compound = array[0]
|
173
|
+
prediction = array[1]
|
174
|
+
smiles = compound.smiles
|
175
|
+
|
176
|
+
if prediction[:neighbors]
|
177
|
+
if prediction[:value]
|
178
|
+
pred = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
|
179
|
+
predA = prediction[:value].numeric? ? "#{prediction[:value].delog10.signif(3)}" : prediction[:value]
|
180
|
+
predAunit = prediction[:value].numeric? ? "(#{model.unit})" : ""
|
181
|
+
predB = prediction[:value].numeric? ? "#{compound.mmol_to_mg(prediction[:value].delog10).signif(3)}" : prediction[:value]
|
182
|
+
predBunit = prediction[:value].numeric? ? "#{model.unit =~ /\b(mmol\/L)\b/ ? "(mg/L)" : "(mg/kg_bw/day)"}" : ""
|
183
|
+
int = (prediction[:prediction_interval].nil? ? nil : prediction[:prediction_interval])
|
184
|
+
intervalLow = (int.nil? ? "" : "#{int[1].delog10.signif(3)}")
|
185
|
+
intervalHigh = (int.nil? ? "" : "#{int[0].delog10.signif(3)}")
|
186
|
+
intervalLowMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[1].delog10).signif(3)}")
|
187
|
+
intervalHighMg = (int.nil? ? "" : "#{compound.mmol_to_mg(int[0].delog10).signif(3)}")
|
188
|
+
inApp = "yes"
|
189
|
+
inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
|
190
|
+
note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
|
191
|
+
|
192
|
+
unless prediction[:probabilities].nil?
|
193
|
+
av = model.prediction_feature.accept_values
|
194
|
+
propA = "#{prediction[:probabilities][av[0]].to_f.signif(3)}"
|
195
|
+
propB = "#{prediction[:probabilities][av[1]].to_f.signif(3)}"
|
196
|
+
end
|
197
|
+
else
|
198
|
+
# no prediction value only one neighbor
|
199
|
+
inApp = "no"
|
200
|
+
inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
|
201
|
+
note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
|
202
|
+
end
|
203
|
+
else
|
204
|
+
# no prediction value
|
205
|
+
inApp = "no"
|
206
|
+
inT = prediction[:info] =~ /\b(identical)\b/i ? "yes" : "no"
|
207
|
+
note = prediction[:warnings].join("\n") + ( prediction[:info] ? prediction[:info].sub(/\'.*\'/,"") : "\n" )
|
208
|
+
end
|
209
|
+
if @warnings
|
210
|
+
@warnings.each do |w|
|
211
|
+
note += (w.split(".").first + ".") if /\b(#{Regexp.escape(smiles)})\b/ === w
|
212
|
+
end
|
213
|
+
end
|
214
|
+
else
|
215
|
+
# string note for duplicates
|
216
|
+
endpoint = type = smiles = pred = predA = predB = propA = propB = intervalLow = intervalHigh = intervalLowMg = intervalHighMg = inApp = inT = ""
|
217
|
+
note = array
|
218
|
+
end
|
219
|
+
if model.regression?
|
220
|
+
@csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{predA}\",\"#{predB}\",\"#{intervalLow}\",\"#{intervalHigh}\",\"#{intervalLowMg}\",\"#{intervalHighMg}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
|
221
|
+
else
|
222
|
+
@csvhash[idx] += "\"#{id+1}\",\"#{endpoint}\",\"#{type}\",\"#{smiles}\",\"#{pred}\",\"#{propA}\",\"#{propB}\",\"#{inApp}\",\"#{inT}\",\"#{note.chomp}\"\n"
|
223
|
+
end
|
224
|
+
end
|
225
|
+
end
|
226
|
+
t = Tempfile.new
|
227
|
+
@csvhash.each do |model, csv|
|
228
|
+
t.write(csv)
|
229
|
+
t.write("\n")
|
230
|
+
end
|
231
|
+
t.rewind
|
232
|
+
@tmppath = t.path.split("/").last
|
233
|
+
|
156
234
|
dataset.delete
|
157
235
|
File.delete File.join("tmp", params[:fileselect][:filename])
|
158
236
|
return haml :batch
|
159
237
|
end
|
160
238
|
|
161
239
|
# validate identifier input
|
162
|
-
# transfered input
|
163
240
|
if !params[:identifier].blank?
|
164
241
|
@identifier = params[:identifier]
|
165
242
|
$logger.debug "input:#{@identifier}"
|
166
243
|
# get compound from SMILES
|
167
244
|
@compound = Compound.from_smiles @identifier
|
168
|
-
if @compound.blank?
|
169
|
-
@error_report = "'#{@identifier}' is not a valid SMILES string."
|
170
|
-
return haml :error
|
171
|
-
end
|
245
|
+
bad_request_error "'#{@identifier}' is not a valid SMILES string." if @compound.blank?
|
172
246
|
|
173
247
|
@models = []
|
174
248
|
@predictions = []
|
@@ -181,6 +255,17 @@ post '/predict/?' do
|
|
181
255
|
end
|
182
256
|
end
|
183
257
|
|
258
|
+
get "/report/:id/?" do
|
259
|
+
prediction_model = Model::Validation.find params[:id]
|
260
|
+
bad_request_error "model with id: '#{params[:id]}' not found." unless prediction_model
|
261
|
+
report = qmrf_report params[:id]
|
262
|
+
# output
|
263
|
+
t = Tempfile.new
|
264
|
+
t << report.to_xml
|
265
|
+
name = prediction_model.species.sub(/\s/,"-")+"-"+prediction_model.endpoint.downcase.sub(/\s/,"-")
|
266
|
+
send_file t.path, :filename => "QMRF_report_#{name.gsub!(/[^0-9A-Za-z]/, '_')}.xml", :type => "application/xml", :disposition => "attachment"
|
267
|
+
end
|
268
|
+
|
184
269
|
get '/license' do
|
185
270
|
@license = RDiscount.new(File.read("LICENSE.md")).to_html
|
186
271
|
haml :license, :layout => false
|
data/bin/lazar-start.sh
CHANGED
data/lazar-gui.gemspec
CHANGED
@@ -13,17 +13,19 @@ Gem::Specification.new do |s|
|
|
13
13
|
s.rubyforge_project = "lazar-gui"
|
14
14
|
s.files = `git ls-files`.split("\n")
|
15
15
|
|
16
|
-
s.add_runtime_dependency "lazar"
|
17
|
-
s.add_runtime_dependency "gem-path"
|
16
|
+
s.add_runtime_dependency "lazar"
|
18
17
|
s.add_runtime_dependency "sinatra"
|
18
|
+
s.add_runtime_dependency "sinatra-reloader"
|
19
19
|
s.add_runtime_dependency "rdiscount"
|
20
20
|
s.add_runtime_dependency "haml"
|
21
21
|
s.add_runtime_dependency "sass"
|
22
22
|
s.add_runtime_dependency "unicorn"
|
23
|
+
s.add_runtime_dependency "qsar-report"
|
24
|
+
s.add_runtime_dependency "gem-path"
|
23
25
|
|
24
26
|
s.post_install_message = %q{
|
25
|
-
Service
|
26
|
-
lazar-start
|
27
|
+
Service commands:
|
28
|
+
lazar-start
|
27
29
|
lazar-stop
|
28
30
|
}
|
29
31
|
end
|
@@ -0,0 +1,7 @@
|
|
1
|
+
(function(i,s,o,g,r,a,m){i['GoogleAnalyticsObject']=r;i[r]=i[r]||function(){
|
2
|
+
(i[r].q=i[r].q||[]).push(arguments)},i[r].l=1*new Date();a=s.createElement(o),
|
3
|
+
m=s.getElementsByTagName(o)[0];a.async=1;a.src=g;m.parentNode.insertBefore(a,m)
|
4
|
+
})(window,document,'script','//www.google-analytics.com/analytics.js','ga');
|
5
|
+
|
6
|
+
ga('create', 'UA-73247946-2', 'auto');
|
7
|
+
ga('send', 'pageview');
|
data/qmrf_report.rb
ADDED
@@ -0,0 +1,254 @@
|
|
1
|
+
def qmrf_report id
|
2
|
+
lazarpath = `gem path lazar`
|
3
|
+
lazarpath = File.dirname lazarpath
|
4
|
+
lazarpath = File.dirname lazarpath
|
5
|
+
qmrfpath = `gem path qsar-report`
|
6
|
+
qmrfpath = File.dirname qmrfpath
|
7
|
+
qmrfpath = File.dirname qmrfpath
|
8
|
+
prediction_model = Model::Validation.find id
|
9
|
+
model = prediction_model.model
|
10
|
+
|
11
|
+
if File.directory?(lazarpath)
|
12
|
+
lazar_commit = `cd #{lazarpath}; git rev-parse HEAD`.strip
|
13
|
+
lazar_commit = "https://github.com/opentox/lazar/tree/#{lazar_commit}"
|
14
|
+
else
|
15
|
+
lazar_commit = "https://github.com/opentox/lazar/releases/tag/v#{Gem.loaded_specs["lazar"].version}"
|
16
|
+
end
|
17
|
+
|
18
|
+
report = OpenTox::QMRFReport.new
|
19
|
+
|
20
|
+
# QSAR Identifier Title 1.1
|
21
|
+
report.value "QSAR_title", "Lazar model for #{prediction_model.species} #{prediction_model.endpoint.downcase}"
|
22
|
+
|
23
|
+
# Software coding the model 1.3
|
24
|
+
report.change_catalog :software_catalog, :firstsoftware, {:name => "lazar", :description => "lazar Lazy Structure- Activity Relationships. See #{lazar_commit}", :number => "1", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"}
|
25
|
+
report.ref_catalog :QSAR_software, :software_catalog, :firstsoftware
|
26
|
+
|
27
|
+
# Date of QMRF 2.1
|
28
|
+
report.value "qmrf_date", "#{Time.now.strftime('%d %B %Y')}"
|
29
|
+
|
30
|
+
# QMRF author(s) and contact details 2.2
|
31
|
+
report.change_catalog :authors_catalog, :firstauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"}
|
32
|
+
report.ref_catalog :qmrf_authors, :authors_catalog, :firstauthor
|
33
|
+
|
34
|
+
# Date of QMRF update(s) 2.3
|
35
|
+
$logger.debug prediction_model
|
36
|
+
if prediction_model.model.name =~ /TD50|multiple/
|
37
|
+
report.value "qmrf_date_revision", "2014-12-05"
|
38
|
+
end
|
39
|
+
|
40
|
+
# Date of QMRF update(s) 2.4
|
41
|
+
if prediction_model.model.name =~ /TD50/
|
42
|
+
report.value "qmrf_revision", "Q29-44-39-423"
|
43
|
+
elsif prediction_model.model.name =~ /multiple/
|
44
|
+
report.value "qmrf_revision", "Q28-43-38-420"
|
45
|
+
end
|
46
|
+
|
47
|
+
# Model developer(s) and contact details 2.5
|
48
|
+
report.change_catalog :authors_catalog, :modelauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"}
|
49
|
+
report.ref_catalog :model_authors, :authors_catalog, :modelauthor
|
50
|
+
|
51
|
+
# Date of model development and/or publication 2.6
|
52
|
+
report.value "model_date", "#{Time.parse(model.created_at.to_s).strftime('%Y')}"
|
53
|
+
|
54
|
+
# Reference(s) to main scientific papers and/or software package 2.7
|
55
|
+
report.change_catalog :publications_catalog, :publications_catalog_4, {:title => "Maunz A., Guetlein M., Rautenberg M., Vorgrimmler D., Gebele D. and Helma C. (2013), lazar: a modular predictive toxicology framework ", :url => "http://dx.doi.org/10.3389/fphar.2013.00038"}
|
56
|
+
|
57
|
+
report.ref_catalog :references, :publications_catalog, :publications_catalog_4
|
58
|
+
|
59
|
+
report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Helma C., Gebele D., Rautenberg M. (2017) lazar, software available at https://lazar.in-silico.ch,source code available at #{lazar_commit}", :url => "https://doi.org/10.5281/zenodo.215483"}
|
60
|
+
|
61
|
+
report.ref_catalog :references, :publications_catalog, :publications_catalog_1
|
62
|
+
|
63
|
+
# Availability of information about the model 2.8
|
64
|
+
report.value "info_availability", "Prediction interface and validation results available at https://lazar.in-silico.ch"
|
65
|
+
|
66
|
+
# Species 3.1
|
67
|
+
report.value "model_species", prediction_model.species
|
68
|
+
|
69
|
+
# Endpoint 3.2
|
70
|
+
report.change_catalog :endpoints_catalog, :endpoints_catalog_1, {:name => prediction_model.qmrf["name"], :group => "#{prediction_model.qmrf["group"]}"}
|
71
|
+
report.ref_catalog :model_endpoint, :endpoints_catalog, :endpoints_catalog_1
|
72
|
+
|
73
|
+
# Endpoint Units 3.4
|
74
|
+
report.value "endpoint_units", "#{prediction_model.unit}"
|
75
|
+
|
76
|
+
# Dependent variable 3.5
|
77
|
+
report.value "endpoint_variable", "#{prediction_model.endpoint}"
|
78
|
+
|
79
|
+
# Type of model 4.1
|
80
|
+
model_type = model.class.to_s.gsub('OpenTox::Model::Lazar','')
|
81
|
+
report.value "algorithm_type", "#{model_type}"
|
82
|
+
|
83
|
+
# Explicit algorithm 4.2
|
84
|
+
report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_1
|
85
|
+
report.change_catalog :algorithms_catalog, :algorithms_catalog_1, {:definition => "", :description => "modified k-nearest neighbor #{model_type.downcase} (#{model_type =~ /regression/i ? "local random forest" : "weighted majority vote"}), see #{lazar_commit}" }
|
86
|
+
|
87
|
+
# Descriptors in the model 4.3
|
88
|
+
if model.algorithms["descriptors"][:type]
|
89
|
+
report.change_catalog :descriptors_catalog, :descriptors_catalog_1, {:description => "(Bender et al. 2004)", :name => "#{model.algorithms["descriptors"][:type]} fingerprints", :publication_ref => "", :units => ""}
|
90
|
+
report.ref_catalog :algorithms_descriptors, :descriptors_catalog, :descriptors_catalog_1
|
91
|
+
end
|
92
|
+
|
93
|
+
# Descriptor selection 4.4
|
94
|
+
report.value "descriptors_selection", (model.class == OpenTox::Model::LazarRegression ? "Correlation with dependent variable (Pearson p <= 0.05)" : "none")
|
95
|
+
|
96
|
+
# Algorithm and descriptor generation 4.5
|
97
|
+
report.value "descriptors_generation", "lazar"
|
98
|
+
|
99
|
+
# Software name and version for descriptor generation 4.6
|
100
|
+
report.change_catalog :software_catalog, :software_catalog_2, {:name => "lazar, submitted version: #{lazar_commit}", :description => "", :number => "2", :url => "", :contact => ""}
|
101
|
+
report.ref_catalog :descriptors_generation_software, :software_catalog, :software_catalog_2
|
102
|
+
|
103
|
+
# Chemicals/Descriptors ratio 4.7
|
104
|
+
report.value "descriptors_chemicals_ratio", (model.class == OpenTox::Model::LazarRegression ? "variable (local regression models)" : "not applicable (classification based on activities of neighbors, descriptors are used for similarity calculation)")
|
105
|
+
|
106
|
+
# Description of the applicability domain of the model 5.1
|
107
|
+
report.value "app_domain_description", "<html><head></head><body>
|
108
|
+
<p>
|
109
|
+
No predictions are made for query compounds without similar structures
|
110
|
+
in the training data. Similarity is determined as the Tanimoto coefficient of
|
111
|
+
Molprint 2D fingerprints with a threshold of 0.1.
|
112
|
+
</p>
|
113
|
+
<p>
|
114
|
+
Predictions based on a low number and/or very dissimilar neighbors or
|
115
|
+
on neighbors with conflicting experimental measurements
|
116
|
+
should be treated with caution.
|
117
|
+
</p>
|
118
|
+
</body>
|
119
|
+
</html>"
|
120
|
+
|
121
|
+
# Method used to assess the applicability domain 5.2
|
122
|
+
report.value "app_domain_method", "Number and similarity of training set compounds (part of the main lazar algorithm)"
|
123
|
+
|
124
|
+
# Software name and version for applicability domain assessment 5.3
|
125
|
+
report.change_catalog :software_catalog, :software_catalog_3, {:name => "lazar, submitted version: #{lazar_commit}", :description => "", :number => "3", :url => "", :contact => ""}
|
126
|
+
report.ref_catalog :app_domain_software, :software_catalog, :software_catalog_3
|
127
|
+
|
128
|
+
# Limits of applicability 5.4
|
129
|
+
report.value "applicability_limits", "Compounds without similar substances in the training dataset"
|
130
|
+
|
131
|
+
# Availability of the training set 6.1
|
132
|
+
report.change_attributes "training_set_availability", {:answer => "Yes"}
|
133
|
+
|
134
|
+
# Available information for the training set 6.2
|
135
|
+
report.change_attributes "training_set_data", {:cas => "Yes", :chemname => "Yes", :formula => "Yes", :inchi => "Yes", :mol => "Yes", :smiles => "Yes"}
|
136
|
+
|
137
|
+
# Data for each descriptor variable for the training set 6.3
|
138
|
+
report.change_attributes "training_set_descriptors", {:answer => "on demand"}
|
139
|
+
|
140
|
+
# Data for the dependent variable for the training set 6.4
|
141
|
+
report.change_attributes "dependent_var_availability", {:answer => "Yes"}
|
142
|
+
|
143
|
+
# Other information about the training set 6.5
|
144
|
+
report.value "other_info", "Original data from: #{prediction_model.source}"
|
145
|
+
|
146
|
+
# Pre-processing of data before modelling 6.6
|
147
|
+
report.value "preprocessing", (model.class == OpenTox::Model::LazarRegression ? "-log10 transformation" : "none")
|
148
|
+
|
149
|
+
# Robustness - Statistics obtained by leave-many-out cross-validation 6.9
|
150
|
+
if prediction_model.repeated_crossvalidation
|
151
|
+
crossvalidations = prediction_model.crossvalidations
|
152
|
+
block = ""
|
153
|
+
crossvalidations.each do |cv|
|
154
|
+
block += "<p>
|
155
|
+
<p>Num folds: #{cv.folds}</p>
|
156
|
+
<p>Num instances: #{cv.nr_instances}</p>
|
157
|
+
<p>Num unpredicted: #{cv.nr_unpredicted}</p>"
|
158
|
+
if model_type =~ /classification/i
|
159
|
+
block += "<p>Accuracy: #{cv.accuracy.signif(3)}</p>
|
160
|
+
<p>Weighted accuracy: #{cv.weighted_accuracy.signif(3)}</p>
|
161
|
+
<p>True positive rate: #{cv.true_rate[cv.accept_values[0]].signif(3)}</p>
|
162
|
+
<p>True negative rate: #{cv.true_rate[cv.accept_values[1]].signif(3)}</p>
|
163
|
+
<p>Positive predictive value: #{cv.predictivity[cv.accept_values[0]].signif(3)}</p>
|
164
|
+
<p>Negative predictive value: #{cv.predictivity[cv.accept_values[1]].signif(3)}</p>"
|
165
|
+
end
|
166
|
+
if model_type =~ /regression/i
|
167
|
+
block += "<p>RMSE: #{cv.rmse.signif(3)}</p>
|
168
|
+
<p>MAE: #{cv.mae.signif(3)}</p>
|
169
|
+
<p>R<sup>2</sup>: #{cv.r_squared.signif(3)}</p>"
|
170
|
+
end
|
171
|
+
block += "</p>"
|
172
|
+
end
|
173
|
+
report.value "lmo", "<html><head></head><body><b>3 independent 10-fold crossvalidations:</b>"+block+"</body></html>"
|
174
|
+
end
|
175
|
+
|
176
|
+
# Availability of the external validation set 7.1
|
177
|
+
report.change_attributes "validation_set_availability", {:answer => "No"}
|
178
|
+
|
179
|
+
# Available information for the external validation set 7.2
|
180
|
+
report.change_attributes "validation_set_data", {:cas => "", :chemname => "", :formula => "", :inchi => "", :mol => "", :smiles => ""}
|
181
|
+
|
182
|
+
# Data for each descriptor variable for the external validation set 7.3
|
183
|
+
report.change_attributes "validation_set_descriptors", {:answer => "Unknown"}
|
184
|
+
|
185
|
+
# Data for the dependent variable for the external validation set 7.4
|
186
|
+
report.change_attributes "validation_dependent_var_availability", {:answer => "Unknown"}
|
187
|
+
|
188
|
+
# Mechanistic basis of the model 8.1
|
189
|
+
report.value "mechanistic_basis","<html><head></head><body>
|
190
|
+
<p>
|
191
|
+
Compounds with similar structures (neighbors) are assumed to have
|
192
|
+
similar activities as the query compound.
|
193
|
+
</p>
|
194
|
+
</body>
|
195
|
+
</html>"
|
196
|
+
|
197
|
+
# A priori or a posteriori mechanistic interpretation 8.2
|
198
|
+
report.value "mechanistic_basis_comments","A posteriori for individual predictions"
|
199
|
+
|
200
|
+
# Other information about the mechanistic interpretation 8.3
|
201
|
+
report.value "mechanistic_basis_info","<html><head></head><body>
|
202
|
+
<p>
|
203
|
+
Hypothesis about biochemical mechanisms can be derived from individual
|
204
|
+
predictions by inspecting neighbors and relevant descriptors.
|
205
|
+
</p>
|
206
|
+
<p>
|
207
|
+
Neighbors are compounds that are similar in respect to a certain
|
208
|
+
endpoint and it is likely that compounds with high similarity act by
|
209
|
+
similar mechanisms as the query compound. Links at the webinterface
|
210
|
+
prove an easy access to additional experimental data and literature
|
211
|
+
citations for the neighbors and the query structure.
|
212
|
+
</p>
|
213
|
+
<p>
|
214
|
+
Please note that lazar predictions are based on neighbors.
|
215
|
+
Descriptors are only used for the calculation of similarities.
|
216
|
+
</p>
|
217
|
+
</body>
|
218
|
+
</html>"
|
219
|
+
|
220
|
+
# Comments 9.1
|
221
|
+
report.value "comments", "<html><head></head><body>
|
222
|
+
<p>
|
223
|
+
Public model interface: https://lazar.in-silico.ch
|
224
|
+
</p>
|
225
|
+
<p>
|
226
|
+
Source code: #{lazar_commit}
|
227
|
+
</p>
|
228
|
+
<p>
|
229
|
+
Docker image: https://hub.docker.com/r/insilicotox/lazar/
|
230
|
+
</p>
|
231
|
+
</body>
|
232
|
+
</html>"
|
233
|
+
|
234
|
+
# Bibliography 9.2
|
235
|
+
|
236
|
+
report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Helma C., Rautenberg M. and Gebele D. (2017), Nano-Lazar: Read across Predictions for Nanoparticle Toxicities with Calculated and Measured Properties", :url => "https://dx.doi.org/10.3389%2Ffphar.2017.00377"}
|
237
|
+
|
238
|
+
report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Lo Piparo et al. (2014), Automated and reproducible read-across like models for predicting carcinogenic potency", :url => "https://doi.org/10.1016/j.yrtph.2014.07.010"}
|
239
|
+
|
240
|
+
report.change_catalog :publications_catalog, :publications_catalog_5, {:title => "Maunz A. and Helma C. (2008), Prediction of chemical toxicity with local support vector regression and activity-specific kernels", :url => "http://dx.doi.org/10.1080/10629360802358430"}
|
241
|
+
|
242
|
+
report.change_catalog :publications_catalog, :publications_catalog_6, {:title => "Helma C. (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"}
|
243
|
+
|
244
|
+
report.change_catalog :publications_catalog, :publications_catalog_7, {:title => "Bender et al. (2004), Molecular similarity searching using atom environments, information-based feature selection, and a nave bayesian classifier.", :url => "https://doi.org/10.1021/ci034207y"}
|
245
|
+
|
246
|
+
report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_2
|
247
|
+
report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_3
|
248
|
+
report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_5
|
249
|
+
report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_6
|
250
|
+
report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_7
|
251
|
+
|
252
|
+
report
|
253
|
+
|
254
|
+
end
|