lazar-rest 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,5 @@
1
+ SERVICE = "lazar-rest"
2
+ require 'bundler'
3
+ Bundler.require
4
+ require File.expand_path './lib/lazar-rest.rb'
5
+ run Sinatra::Application
@@ -0,0 +1,26 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = "lazar-rest"
5
+ s.version = File.read("./VERSION")
6
+ s.authors = ["Christoph Helma","Micha Rautenberg","Denis Gebele"]
7
+ s.email = ["supprot@in-silico.ch"]
8
+ s.homepage = "http://github.com/opentox/lazar-rest"
9
+ s.summary = %q{lazar-rest}
10
+ s.description = %q{REST Interface for Lazar Toxicology Predictions}
11
+ s.license = 'GPL-3'
12
+
13
+ s.rubyforge_project = "lazar-rest"
14
+
15
+ s.files = `git ls-files`.split("\n")
16
+ s.require_paths = ["lib"]
17
+ s.required_ruby_version = '>= 2.0.0'
18
+
19
+ s.add_runtime_dependency "lazar"
20
+ s.add_runtime_dependency "qsar-report"
21
+ s.add_runtime_dependency "sinatra"
22
+ s.add_runtime_dependency "haml"
23
+ s.add_runtime_dependency "sass"
24
+ s.add_runtime_dependency "unicorn"
25
+ s.add_runtime_dependency 'rack-cors'
26
+ end
@@ -0,0 +1,82 @@
1
+ post "/aa/authenticate/?" do
2
+ mime_types = ["text/plain"]
3
+ bad_request_error "Mime type #{@accept} not supported here. Please request data as #{mime_types.join(', ')}." unless mime_types.include? @accept
4
+ bad_request_error "Please send formdata username." unless params[:username]
5
+ bad_request_error "Please send formdata password." unless params[:password]
6
+ case @accept
7
+ when "text/plain"
8
+ if OpenTox::Authorization.authenticate(params[:username], params[:password])
9
+ return OpenTox::RestClientWrapper.subjectid
10
+ else
11
+ return nil
12
+ end
13
+ else
14
+ bad_request_error "'#{@accept}' is not a supported content type."
15
+ end
16
+ end
17
+
18
+ post "/aa/logout/?" do
19
+ mime_types = ["text/plain"]
20
+ bad_request_error "Mime type #{@accept} not supported here. Please request data as #{mime_types.join(', ')}." unless mime_types.include? @accept
21
+ bad_request_error "Please send formdata subjectid." unless params[:subjectid]
22
+ case @accept
23
+ when "text/plain"
24
+ if OpenTox::Authorization.logout(params[:subjectid])
25
+ return "Successfully logged out. \n"
26
+ else
27
+ return "Logout failed.\n"
28
+ end
29
+ else
30
+ bad_request_error "'#{@accept}' is not a supported content type."
31
+ end
32
+ end
33
+
34
+ module OpenTox
35
+
36
+ AA = "https://opensso.in-silico.ch"
37
+
38
+ module Authorization
39
+ #Authentication against OpenSSO. Returns token. Requires Username and Password.
40
+ # @param user [String] Username
41
+ # @param pw [String] Password
42
+ # @return [Boolean] true if successful
43
+ def self.authenticate(user, pw)
44
+ begin
45
+ res = RestClientWrapper.post("#{AA}/auth/authenticate",{:username=>user, :password => pw},{:subjectid => ""}).sub("token.id=","").sub("\n","")
46
+ if is_token_valid(res)
47
+ RestClientWrapper.subjectid = res
48
+ return true
49
+ else
50
+ bad_request_error "Authentication failed #{res.inspect}"
51
+ end
52
+ rescue
53
+ bad_request_error "Authentication failed #{res.inspect}"
54
+ end
55
+ end
56
+
57
+ #Logout on opensso. Make token invalid. Requires token
58
+ # @param [String] subjectid the subjectid
59
+ # @return [Boolean] true if logout is OK
60
+ def self.logout(subjectid=RestClientWrapper.subjectid)
61
+ begin
62
+ out = RestClientWrapper.post("#{AA}/auth/logout", :subjectid => subjectid)
63
+ return true unless is_token_valid(subjectid)
64
+ rescue
65
+ return false
66
+ end
67
+ return false
68
+ end
69
+
70
+ #Checks if a token is a valid token
71
+ # @param [String]subjectid subjectid from openSSO session
72
+ # @return [Boolean] subjectid is valid or not.
73
+ def self.is_token_valid(subjectid=RestClientWrapper.subjectid)
74
+ begin
75
+ return true if RestClientWrapper.post("#{AA}/auth/isTokenValid",:tokenid => subjectid) == "boolean=true\n"
76
+ rescue #do rescue because openSSO throws 401
77
+ return false
78
+ end
79
+ return false
80
+ end
81
+ end
82
+ end
@@ -0,0 +1,9 @@
1
+ # route to swagger API file
2
+ get "/api/api.json" do
3
+ response['Content-Type'] = "application/json"
4
+ api_file = File.join("api", "api.json")
5
+ bad_request_error "API Documentation in Swagger JSON is not implemented.", uri("/#{SERVICE}/api") unless File.exists?(api_file)
6
+ api_hash = JSON.parse(File.read(api_file))
7
+ api_hash["host"] = request.env['HTTP_HOST']
8
+ return api_hash.to_json
9
+ end
@@ -0,0 +1,64 @@
1
+ # Get a list of a single or all descriptors
2
+ # @param [Header] Accept one of text/plain, application/json
3
+ # @param [Path] Descriptor name or descriptor ID (e.G.: Openbabel.HBA1, 5755f8eb3cf99a00d8fedf2f)
4
+ # @return [text/plain, application/json] list of all prediction models
5
+ get "/compound/descriptor/?:descriptor?" do
6
+ case @accept
7
+ when "application/json"
8
+ return "#{JSON.pretty_generate PhysChem::DESCRIPTORS} " unless params[:descriptor]
9
+ return PhysChem.find_by(:name => params[:descriptor]).to_json if PhysChem::DESCRIPTORS.include?(params[:descriptor])
10
+ return PhysChem.find(params[:descriptor]).to_json if PhysChem.find(params[:descriptor])
11
+ else
12
+ return PhysChem::DESCRIPTORS.collect{|k, v| "#{k}: #{v}\n"} unless params[:descriptor]
13
+ return PhysChem::DESCRIPTORS[params[:descriptor]] if PhysChem::DESCRIPTORS.include?(params[:descriptor])
14
+ return "#{PhysChem.find(params[:descriptor]).name}: #{PhysChem.find(params[:descriptor]).description}" if PhysChem.find(params[:descriptor])
15
+ end
16
+ end
17
+
18
+ post "/compound/descriptor/?" do
19
+ bad_request_error "Missing Parameter " unless params[:identifier] && params[:descriptor]
20
+ descriptors = params['descriptor'].split(',')
21
+ compound = Compound.from_smiles params[:identifier]
22
+ physchem_descriptors = []
23
+ descriptors.each do |descriptor|
24
+ physchem_descriptors << PhysChem.find_by(:name => descriptor)
25
+ end
26
+ result = compound.physchem physchem_descriptors
27
+ csv = result.collect{|k,v| "\"#{PhysChem.find(k).name}\",#{v}" }.join("\n")
28
+ csv = "SMILES,#{params[:identifier]}\n#{csv}" if params[:identifier]
29
+ case @accept
30
+ when "text/csv","application/csv"
31
+ return csv
32
+ when "application/json"
33
+ result_hash = result.collect{|k,v| {"#{PhysChem.find(k).name}" => "#{v}"}} # result.collect{|k,v| "\"#{PhysChem.find(k).name}\"" => "#{v}"}.join(",")
34
+ data = {"compound" => {"SMILES" => "#{params[:identifier]}"}}
35
+ data["compound"]["InChI"] = "#{compound.inchi}" if compound.inchi
36
+ data["compound"]["results"] = result_hash
37
+ return JSON.pretty_generate(data)
38
+ end
39
+ end
40
+
41
+ get %r{/compound/(.+)} do |inchi|
42
+ bad_request_error "Input parameter #{inchi} is not an InChI" unless inchi.match(/^InChI=/)
43
+ compound = Compound.from_inchi URI.unescape(inchi)
44
+ response['Content-Type'] = @accept
45
+ case @accept
46
+ when "application/json"
47
+ return JSON.pretty_generate JSON.parse(compound.to_json)
48
+ when "chemical/x-daylight-smiles"
49
+ return compound.smiles
50
+ when "chemical/x-inchi"
51
+ return compound.inchi
52
+ when "chemical/x-mdl-sdfile"
53
+ return compound.sdf
54
+ when "chemical/x-mdl-molfile"
55
+ when "image/png"
56
+ return compound.png
57
+ when "image/svg+xml"
58
+ return compound.svg
59
+ when "text/plain"
60
+ return "#{compound.names}\n"
61
+ else
62
+ return compound.inspect
63
+ end
64
+ end
@@ -0,0 +1,47 @@
1
+ # Get all datasets
2
+ get "/dataset/?" do
3
+ datasets = Dataset.all
4
+ case @accept
5
+ when "text/uri-list"
6
+ uri_list = datasets.collect{|dataset| uri("/dataset/#{dataset.id}")}
7
+ return uri_list.join("\n") + "\n"
8
+ when "application/json"
9
+ datasets = JSON.parse datasets.to_json
10
+ datasets.each_index do |idx|
11
+ datasets[idx][:URI] = uri("/dataset/#{datasets[idx]["_id"]["$oid"]}")
12
+ end
13
+ return datasets.to_json
14
+ else
15
+ bad_request_error "Mime type #{@accept} is not supported."
16
+ end
17
+ end
18
+
19
+ # Get a dataset
20
+ get "/dataset/:id/?" do
21
+ dataset = Dataset.find :id => params[:id]
22
+ resource_not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
23
+ case @accept
24
+ when "application/json"
25
+ dataset.data_entries.each do |k, v|
26
+ dataset.data_entries[k][:URI] = uri("/substance/#{k}")
27
+ end
28
+ dataset[:URI] = uri("/dataset/#{dataset.id}")
29
+ dataset[:substances] = uri("/dataset/#{dataset.id}/substances")
30
+ dataset[:features] = uri("/dataset/#{dataset.id}/features")
31
+ return dataset.to_json
32
+ when "text/csv", "application/csv"
33
+ return dataset.to_csv
34
+ else
35
+ bad_request_error "Mime type #{@accept} is not supported."
36
+ end
37
+ end
38
+
39
+ # Get a dataset attribute. One of compounds, nanoparticles, substances, features
40
+ get "/dataset/:id/:attribute/?" do
41
+ dataset = Dataset.find :id => params[:id]
42
+ resource_not_found_error "Dataset with id: #{params[:id]} not found." unless dataset
43
+ attribs = ["compounds", "nanoparticles", "substances", "features"]
44
+ return "Attribute '#{params[:attribute]}' is not available. Choose one of #{attribs.join(', ')}." unless attribs.include? params[:attribute]
45
+ out = dataset.send(params[:attribute])
46
+ return out.to_json
47
+ end
@@ -0,0 +1,25 @@
1
+ # Get all Features
2
+ get "/feature/?" do
3
+ features = Feature.all
4
+ case @accept
5
+ when "text/uri-list"
6
+ uri_list = features.collect{|feature| uri("/feature/#{feature.id}")}
7
+ return uri_list.join("\n") + "\n"
8
+ when "application/json"
9
+ features = JSON.parse features.to_json
10
+ features.each_index do |idx|
11
+ features[idx][:URI] = uri("/feature/#{features[idx]["_id"]["$oid"]}")
12
+ end
13
+ return features.to_json
14
+ else
15
+ bad_request_error "Mime type #{@accept} is not supported."
16
+ end
17
+ end
18
+
19
+ # Get a feature
20
+ get "/feature/:id/?" do
21
+ feature = Feature.find :id => params[:id]
22
+ resource_not_found_error "Feature with id: #{params[:id]} not found." unless feature
23
+ feature[:URI] = uri("/feature/#{feature.id}")
24
+ return feature.to_json
25
+ end
@@ -0,0 +1,37 @@
1
+ require "sinatra"
2
+ require "../lazar/lib/lazar.rb"
3
+ require "../qsar-report/lib/qsar-report.rb"
4
+ include OpenTox
5
+
6
+ require 'rack/cors'
7
+
8
+
9
+ set :show_exceptions => false
10
+
11
+ # add CORS support for swagger
12
+ use Rack::Cors do |config|
13
+ config.allow do |allow|
14
+ allow.origins '*'
15
+ allow.resource "/#{SERVICE}/*",
16
+ :methods => [:head, :get, :post, :put, :delete, :options],
17
+ :headers => :any,
18
+ :max_age => 0
19
+ end
20
+ end
21
+ before do
22
+ @accept = request.env['HTTP_ACCEPT']
23
+ response['Content-Type'] = @accept
24
+ end
25
+
26
+ [
27
+ "aa.rb",
28
+ "api.rb",
29
+ "compound.rb",
30
+ "dataset.rb",
31
+ "feature.rb",
32
+ "model.rb",
33
+ "nanoparticle.rb",
34
+ "report.rb",
35
+ "substance.rb",
36
+ "validation.rb"
37
+ ].each{ |f| require_relative f }
@@ -0,0 +1,50 @@
1
+
2
+ # Get a list of all prediction models
3
+ # @param [Header] Accept one of text/uri-list,
4
+ # @return [text/uri-list] list of all prediction models
5
+ get "/model/?" do
6
+ models = Model::Prediction.all
7
+ case @accept
8
+ when "text/uri-list"
9
+ uri_list = models.collect{|model| uri("/model/#{model.model_id}")}
10
+ return uri_list.join("\n") + "\n"
11
+ when "application/json"
12
+ models = JSON.parse models.to_json
13
+ models.each_index do |idx|
14
+ models[idx][:URI] = uri("/model/#{models[idx]["model_id"]["$oid"]}")
15
+ models[idx][:crossvalidation_uri] = uri("/crossvalidation/#{models[idx]["crossvalidation_id"]["$oid"]}") if models[idx]["crossvalidation_id"]
16
+ end
17
+ return models.to_json
18
+ else
19
+ bad_request_error "Mime type #{@accept} is not supported."
20
+ end
21
+ end
22
+
23
+ get "/model/:id/?" do
24
+ model = Model::Lazar.find params[:id]
25
+ resource_not_found_error "Model with id: #{params[:id]} not found." unless model
26
+ model[:URI] = uri("/model/#{model.id}")
27
+ model[:neighbor_algorithm_parameters][:feature_dataset_uri] = uri("/dataset/#{model[:neighbor_algorithm_parameters][:feature_dataset_id]}") if model[:neighbor_algorithm_parameters][:feature_dataset_id]
28
+ model[:training_dataset_uri] = uri("/dataset/#{model.training_dataset_id}") if model.training_dataset_id
29
+ model[:prediction_feature_uri] = uri("/dataset/#{model.prediction_feature_id}") if model.prediction_feature_id
30
+ return model.to_json
31
+ end
32
+
33
+
34
+ post "/model/:id/?" do
35
+ identifier = params[:identifier].split(",")
36
+ begin
37
+ # get compound from SMILES
38
+ compounds = identifier.collect{ |i| Compound.from_smiles i.strip }
39
+ rescue
40
+ @error_report = "Attention, '#{params[:identifier]}' is not a valid SMILES string."
41
+ return @error_report
42
+ end
43
+ model = Model::Lazar.find params[:id]
44
+ batch = {}
45
+ compounds.each do |compound|
46
+ prediction = model.predict(compound)
47
+ batch[compound] = {:id => compound.id, :inchi => compound.inchi, :smiles => compound.smiles, :model => model, :prediction => prediction}
48
+ end
49
+ return batch.to_json
50
+ end
@@ -0,0 +1,25 @@
1
+ # Get all Nanoparticles
2
+ get "/nanoparticle/?" do
3
+ nanoparticles = Nanoparticle.all
4
+ case @accept
5
+ when "text/uri-list"
6
+ uri_list = nanoparticles.collect{|nanoparticle| uri("/nanoparticle/#{nanoparticle.id}")}
7
+ return uri_list.join("\n") + "\n"
8
+ when "application/json"
9
+ nanoparticles = JSON.parse nanoparticles.to_json
10
+ nanoparticles.each_index do |idx|
11
+ nanoparticles[idx][:URI] = uri("/nanoparticle/#{nanoparticles[idx]["_id"]["$oid"]}")
12
+ end
13
+ return nanoparticles.to_json
14
+ else
15
+ bad_request_error "Mime type #{@accept} is not supported."
16
+ end
17
+ end
18
+
19
+ # Get a nanoparticle
20
+ get "/nanoparticle/:id/?" do
21
+ nanoparticle = Nanoparticle.find :id => params[:id]
22
+ resource_not_found_error "Nanoparticle with id: #{params[:id]} not found." unless nanoparticle
23
+ nanoparticle[:URI] = uri("/nanoparticle/#{nanoparticle.id}")
24
+ return nanoparticle.to_json
25
+ end
@@ -0,0 +1,209 @@
1
+ # Get a list of all possible reports to prediction models
2
+ # @param [Header] Accept one of text/uri-list,
3
+ # @return [text/uri-list] list of all prediction models
4
+ get "/report/?" do
5
+ models = Model::Prediction.all
6
+ case @accept
7
+ when "text/uri-list"
8
+ uri_list = models.collect{|model| uri("/report/#{model.model_id}")}
9
+ return uri_list.join("\n") + "\n"
10
+ when "application/json"
11
+ reports = [{}]
12
+ #models = JSON.parse models.to_json
13
+ models.each_index do |idx|
14
+ reports[idx] = {}
15
+ reports[idx][:URI] = uri("/report/#{models[idx]["model_id"]}")
16
+ reports[idx][:repeated_crossvalidation_uri] = uri("/validation/repeatedcrossvalidation/#{models[idx]["repeated_crossvalidation_id"]}") if models[idx]["repeated_crossvalidation_id"]
17
+ reports[idx][:leave_one_out_validation_uri] = uri("/validation/leaveoneoutvalidation/#{models[idx]["leave_one_out_validation_id"]}") if models[idx]["leave_one_out_validation_id"]
18
+ reports[idx][:training_dataset_URI] = uri("/dataset/#{models[idx].training_dataset.id}") if models[idx].training_dataset.id
19
+ end
20
+ return reports.to_json
21
+ else
22
+ bad_request_error "Mime type #{@accept} is not supported."
23
+ end
24
+ end
25
+
26
+ get "/report/:id/?" do
27
+ model = Model::Lazar.find params[:id]
28
+ resource_not_found_error "Model with id: #{params[:id]} not found." unless model
29
+ prediction_model = Model::Prediction.find_by :model_id => params[:id]
30
+ validation_template = File.join(File.dirname(__FILE__),"../views/model_details.haml")
31
+
32
+ if File.directory?("#{File.dirname(__FILE__)}/../../lazar")
33
+ lazar_commit = `cd #{File.dirname(__FILE__)}/../../lazar; git rev-parse HEAD`.strip
34
+ lazar_commit = "https://github.com/opentox/lazar/tree/#{lazar_commit}"
35
+ else
36
+ lazar_commit = "https://github.com/opentox/lazar/releases/tag/v#{Gem.loaded_specs["lazar"].version}"
37
+ end
38
+
39
+ report = OpenTox::QMRFReport.new
40
+
41
+ # QSAR Identifier Title 1.1
42
+ report.value "QSAR_title", "Lazar model for #{prediction_model.species} #{prediction_model.endpoint}"
43
+
44
+ # Software coding the model 1.3
45
+ report.change_catalog :software_catalog, :firstsoftware, {:name => "lazar", :description => "lazar Lazy Structure- Activity Relationships", :number => "1", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"}
46
+ report.ref_catalog :QSAR_software, :software_catalog, :firstsoftware
47
+
48
+ # Date of QMRF 2.1
49
+ report.value "qmrf_date", "#{Time.now.strftime('%d %B %Y')}"
50
+
51
+ # QMRF author(s) and contact details 2.1
52
+ report.change_catalog :authors_catalog, :firstauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"}
53
+ report.ref_catalog :qmrf_authors, :authors_catalog, :firstauthor
54
+
55
+ # Model developer(s) and contact details 2.5
56
+ report.change_catalog :authors_catalog, :modelauthor, {:name => "Christoph Helma", :affiliation => "in silico toxicology gmbh", :contact => "Rastatterstr. 41, CH-4057 Basel", :email => "info@in-silico.ch", :number => "1", :url => "www.in-silico.ch"}
57
+ report.ref_catalog :model_authors, :authors_catalog, :modelauthor
58
+
59
+ # Date of model development and/or publication 2.6
60
+ report.value "model_date", "#{Time.parse(model.created_at.to_s).strftime('%Y')}"
61
+
62
+ # Reference(s) to main scientific papers and/or software package 2.7
63
+ report.change_catalog :publications_catalog, :publications_catalog_1, {:title => "Maunz, Guetlein, Rautenberg, Vorgrimmler, Gebele and Helma (2013), lazar: a modular predictive toxicology framework ", :url => "http://dx.doi.org/10.3389/fphar.2013.00038"}
64
+ report.ref_catalog :references, :publications_catalog, :publications_catalog_1
65
+
66
+ # Reference(s) to main scientific papers and/or software package 2.7
67
+ report.change_catalog :publications_catalog, :publications_catalog_2, {:title => "Maunz A and Helma C (2008) Prediction of chemical toxicity with local support vector regression and activity-specific kernels. SAR & QSAR in Environmental Research 19 (5-6), 413-431", :url => "http://dx.doi.org/10.1080/10629360802358430"}
68
+ report.ref_catalog :references, :publications_catalog, :publications_catalog_2
69
+
70
+ # Species 3.1
71
+ report.value "model_species", prediction_model.species
72
+
73
+ # Endpoint 3.2
74
+ report.change_catalog :endpoints_catalog, :endpoints_catalog_1, {:name => prediction_model.endpoint, :group => ""}
75
+ report.ref_catalog :model_endpoint, :endpoints_catalog, :endpoints_catalog_1
76
+
77
+ # Endpoint Units 3.4
78
+ report.value "endpoint_units", "#{prediction_model.unit}"
79
+
80
+ model_type = model.class.to_s.gsub('OpenTox::Model::Lazar','')
81
+
82
+ # Type of model 4.1
83
+ report.value "algorithm_type", "#{model_type}"
84
+
85
+ # Explicit algorithm 4.2
86
+ report.change_catalog :algorithms_catalog, :algorithms_catalog_1, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "Neighbor algorithm: #{model.neighbor_algorithm.gsub('_',' ').titleize}#{(model.neighbor_algorithm_parameters[:min_sim] ? ' with similarity > ' + model.neighbor_algorithm_parameters[:min_sim].to_s : '')}"}
87
+ report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_1
88
+ report.change_catalog :algorithms_catalog, :algorithms_catalog_3, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "modified k-nearest neighbor #{model_type}"}
89
+ report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_3
90
+ if model.prediction_algorithm_parameters
91
+ pred_algorithm_params = (model.prediction_algorithm_parameters[:method] == "rf" ? "random forest" : model.prediction_algorithm_parameters[:method])
92
+ end
93
+ report.change_catalog :algorithms_catalog, :algorithms_catalog_2, {:definition => "see Helma 2016 and lazar.in-silico.ch, submitted version: #{lazar_commit}", :description => "Prediction algorithm: #{model.prediction_algorithm.gsub('OpenTox::Algorithm::','').gsub('_',' ').gsub('.', ' with ')} #{(pred_algorithm_params ? pred_algorithm_params : '')}"}
94
+ report.ref_catalog :algorithm_explicit, :algorithms_catalog, :algorithms_catalog_2
95
+
96
+ # Descriptors in the model 4.3
97
+ if model.neighbor_algorithm_parameters[:type]
98
+ report.change_catalog :descriptors_catalog, :descriptors_catalog_1, {:description => "", :name => "#{model.neighbor_algorithm_parameters[:type]}", :publication_ref => "", :units => ""}
99
+ report.ref_catalog :algorithms_descriptors, :descriptors_catalog, :descriptors_catalog_1
100
+ end
101
+
102
+ # Descriptor selection 4.4
103
+ report.value "descriptors_selection", "#{model.feature_selection_algorithm.gsub('_',' ')} #{model.feature_selection_algorithm_parameters.collect{|k,v| k.to_s + ': ' + v.to_s}.join(', ')}" if model.feature_selection_algorithm
104
+
105
+ # Algorithm and descriptor generation 4.5
106
+ report.value "descriptors_generation", "exhaustive breadth first search for paths in chemical graphs (simplified MolFea algorithm)"
107
+
108
+ # Software name and version for descriptor generation 4.6
109
+ report.change_catalog :software_catalog, :software_catalog_2, {:name => "lazar, submitted version: #{lazar_commit}", :description => "simplified MolFea algorithm", :number => "2", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"}
110
+ report.ref_catalog :descriptors_generation_software, :software_catalog, :software_catalog_2
111
+
112
+ # Chemicals/Descriptors ratio 4.7
113
+ report.value "descriptors_chemicals_ratio", "not applicable (classification based on activities of neighbors, descriptors are used for similarity calculation)"
114
+
115
+ # Description of the applicability domain of the model 5.1
116
+ report.value "app_domain_description", "<html><head></head><body>
117
+ <p>
118
+ The applicability domain (AD) of the training set is characterized by
119
+ the confidence index of a prediction (high confidence index: close to
120
+ the applicability domain of the training set/reliable prediction, low
121
+ confidence: far from the applicability domain of the
122
+ trainingset/unreliable prediction). The confidence index considers (i)
123
+ the similarity and number of neighbors and (ii) contradictory examples
124
+ within the neighbors. A formal definition can be found in Helma 2006.
125
+ </p>
126
+ <p>
127
+ The reliability of predictions decreases gradually with increasing
128
+ distance from the applicability domain (i.e. decreasing confidence index)
129
+ </p>
130
+ </body>
131
+ </html>"
132
+
133
+ # Method used to assess the applicability domain 5.2
134
+ report.value "app_domain_method", "see Helma 2006 and Maunz 2008"
135
+
136
+ # Software name and version for applicability domain assessment 5.3
137
+ report.change_catalog :software_catalog, :software_catalog_3, {:name => "lazar, submitted version: #{lazar_commit}", :description => "integrated into main lazar algorithm", :number => "3", :url => "https://lazar.in-silico.ch", :contact => "info@in-silico.ch"}
138
+ report.ref_catalog :app_domain_software, :software_catalog, :software_catalog_3
139
+
140
+ # Limits of applicability 5.4
141
+ report.value "applicability_limits", "Predictions with low confidence index, unknown substructures and neighbors that might act by different mechanisms"
142
+
143
+ # Availability of the training set 6.1
144
+ report.change_attributes "training_set_availability", {:answer => "Yes"}
145
+
146
+ # Available information for the training set 6.2
147
+ report.change_attributes "training_set_data", {:cas => "Yes", :chemname => "Yes", :formula => "Yes", :inchi => "Yes", :mol => "Yes", :smiles => "Yes"}
148
+
149
+ # Data for each descriptor variable for the training set 6.3
150
+ report.change_attributes "training_set_descriptors", {:answer => "No"}
151
+
152
+ # Data for the dependent variable for the training set 6.4
153
+ report.change_attributes "dependent_var_availability", {:answer => "All"}
154
+
155
+ # Other information about the training set 6.5
156
+ report.value "other_info", "#{prediction_model.source}"
157
+
158
+ # Pre-processing of data before modelling 6.6
159
+ report.value "preprocessing", (model.class == OpenTox::Model::LazarRegression ? "-log10 transformation" : "none")
160
+
161
+ # Robustness - Statistics obtained by leave-many-out cross-validation 6.9
162
+ if prediction_model.repeated_crossvalidation
163
+ crossvalidations = prediction_model.crossvalidations
164
+ out = haml File.read(validation_template), :layout=> false, :locals => {:model => prediction_model}
165
+ report.value "lmo", out
166
+ end
167
+
168
+ # Mechanistic basis of the model 8.1
169
+ report.value "mechanistic_basis","<html><head></head><body>
170
+ <p>
171
+ Compounds with similar structures (neighbors) are assumed to have
172
+ similar activities as the query compound. For the determination of
173
+ activity specific similarities only statistically relevant subtructures
174
+ (paths) are used. For this reason there is a priori no bias towards
175
+ specific mechanistic hypothesis.
176
+ </p>
177
+ </body>
178
+ </html>"
179
+
180
+ # A priori or a posteriori mechanistic interpretation 8.2
181
+ report.value "mechanistic_basis_comments","a posteriori for individual predictions"
182
+
183
+ # Other information about the mechanistic interpretation 8.3
184
+ report.value "mechanistic_basis_info","<html><head></head><body><p>Hypothesis about biochemical mechanisms can be derived from individual
185
+ predictions by inspecting neighbors and relevant fragments.</p>
186
+ <p>Neighbors are compounds that are similar in respect to a certain
187
+ endpoint and it is likely that compounds with high similarity act by
188
+ similar mechanisms as the query compound. Links at the webinterface
189
+ prove an easy access to additional experimental data and literature
190
+ citations for the neighbors and the query structure.</p>
191
+ <p>Activating and deactivating parts of the query compound are highlighted
192
+ in red and green on the webinterface. Fragments that are unknown (or too
193
+ infrequent for statistical evaluation are marked in yellow and
194
+ additional statistical information about the individual fragments can be
195
+ retrieved. Please note that lazar predictions are based on neighbors and
196
+ not on fragments. Fragments and their statistical significance are used
197
+ for the calculation of activity specific similarities.</p>"
198
+
199
+ # Bibliography 9.2
200
+ report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_1
201
+ report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_2
202
+ report.change_catalog :publications_catalog, :publications_catalog_3, {:title => "Helma (2006), Lazy structure-activity relationships (lazar) for the prediction of rodent carcinogenicity and Salmonella mutagenicity.", :url => "http://dx.doi.org/10.1007/s11030-005-9001-5"}
203
+ report.ref_catalog :bibliography, :publications_catalog, :publications_catalog_3
204
+
205
+ # output
206
+ response['Content-Type'] = "application/xml"
207
+ return report.to_xml
208
+
209
+ end