lazar 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/README.md +2 -1
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +15 -76
  6. data/ext/lazar/rinstall.R +9 -0
  7. data/lazar.gemspec +7 -7
  8. data/lib/classification.rb +5 -78
  9. data/lib/compound.rb +201 -44
  10. data/lib/crossvalidation.rb +224 -121
  11. data/lib/dataset.rb +83 -93
  12. data/lib/error.rb +1 -1
  13. data/lib/experiment.rb +99 -0
  14. data/lib/feature.rb +2 -54
  15. data/lib/lazar.rb +47 -34
  16. data/lib/leave-one-out-validation.rb +205 -0
  17. data/lib/model.rb +131 -76
  18. data/lib/opentox.rb +2 -2
  19. data/lib/overwrite.rb +37 -0
  20. data/lib/physchem.rb +133 -0
  21. data/lib/regression.rb +117 -189
  22. data/lib/rest-client-wrapper.rb +4 -5
  23. data/lib/unique_descriptors.rb +6 -7
  24. data/lib/validation.rb +63 -69
  25. data/test/all.rb +2 -2
  26. data/test/classification.rb +41 -0
  27. data/test/compound.rb +116 -7
  28. data/test/data/LOAEL_log_mg_corrected_smiles.csv +567 -567
  29. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +566 -566
  30. data/test/data/LOAEL_mmol_corrected_smiles.csv +568 -0
  31. data/test/data/batch_prediction.csv +25 -0
  32. data/test/data/batch_prediction_inchi_small.csv +4 -0
  33. data/test/data/batch_prediction_smiles_small.csv +4 -0
  34. data/test/data/hamster_carcinogenicity.json +3 -0
  35. data/test/data/loael.csv +568 -0
  36. data/test/dataset-long.rb +5 -8
  37. data/test/dataset.rb +31 -11
  38. data/test/default_environment.rb +11 -0
  39. data/test/descriptor.rb +26 -41
  40. data/test/error.rb +1 -3
  41. data/test/experiment.rb +301 -0
  42. data/test/feature.rb +22 -10
  43. data/test/lazar-long.rb +43 -23
  44. data/test/lazar-physchem-short.rb +19 -16
  45. data/test/prediction_models.rb +20 -0
  46. data/test/regression.rb +43 -0
  47. data/test/setup.rb +3 -1
  48. data/test/test_environment.rb +10 -0
  49. data/test/validation.rb +92 -26
  50. metadata +64 -38
  51. data/lib/SMARTS_InteLigand.txt +0 -983
  52. data/lib/bbrc.rb +0 -165
  53. data/lib/descriptor.rb +0 -247
  54. data/lib/neighbor.rb +0 -25
  55. data/lib/similarity.rb +0 -58
  56. data/mongoid.yml +0 -8
  57. data/test/descriptor-long.rb +0 -26
  58. data/test/fminer-long.rb +0 -38
  59. data/test/fminer.rb +0 -52
  60. data/test/lazar-fminer.rb +0 -50
  61. data/test/lazar-regression.rb +0 -27
data/lib/physchem.rb ADDED
@@ -0,0 +1,133 @@
1
+ module OpenTox
2
+
3
+ # Feature for physico-chemical descriptors
4
+ class PhysChem < NumericFeature
5
+
6
+ field :library, type: String
7
+ field :descriptor, type: String
8
+ field :description, type: String
9
+
10
+ JAVA_DIR = File.join(File.dirname(__FILE__),"..","java")
11
+ CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
12
+ JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
13
+ LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
14
+ JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")
15
+
16
+ obexclude = ["cansmi","cansmiNS","formula","InChI","InChIKey","s","smarts","title","L5"]
17
+ OBDESCRIPTORS = Hash[OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
18
+ name,description = d.split(/\s+/,2)
19
+ ["Openbabel."+name,description] unless obexclude.include? name
20
+ end.compact.sort{|a,b| a[0] <=> b[0]}]
21
+
22
+ cdkdescriptors = {}
23
+ CDK_DESCRIPTIONS = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`)
24
+ CDK_DESCRIPTIONS.each do |d|
25
+ prefix="Cdk."+d[:java_class].split('.').last.sub(/Descriptor/,'')
26
+ d[:names].each { |name| cdkdescriptors[prefix+"."+name] = d[:description] }
27
+ end
28
+ CDKDESCRIPTORS = cdkdescriptors
29
+
30
+ # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
31
+ joelibexclude = ["MoleculeHashcode","GlobalTopologicalChargeIndex"]
32
+ # strip Joelib messages from stdout
33
+ JOELIBDESCRIPTORS = Hash[YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptorInfo | sed '0,/---/d'`).collect do |d|
34
+ name = d[:java_class].sub(/^joelib2.feature.types./,'')
35
+ ["Joelib."+name, "JOELIb does not provide meaningful descriptions, see java/JoelibDescriptors.java for details."] unless joelibexclude.include? name
36
+ end.compact.sort{|a,b| a[0] <=> b[0]}]
37
+
38
+ DESCRIPTORS = OBDESCRIPTORS.merge(CDKDESCRIPTORS.merge(JOELIBDESCRIPTORS))
39
+
40
+ require_relative "unique_descriptors.rb"
41
+
42
+ def self.descriptors desc=DESCRIPTORS
43
+ desc.collect do |name,description|
44
+ lib,desc = name.split('.',2)
45
+ self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
46
+ end
47
+ end
48
+
49
+ def self.unique_descriptors
50
+ udesc = []
51
+ UNIQUEDESCRIPTORS.each do |name|
52
+ lib,desc = name.split('.',2)
53
+ if lib == "Cdk"
54
+ CDK_DESCRIPTIONS.select{|d| desc == d[:java_class].split('.').last.sub('Descriptor','') }.first[:names].each do |n|
55
+ dname = "#{name}.#{n}"
56
+ description = DESCRIPTORS[dname]
57
+ udesc << self.find_or_create_by(:name => dname, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
58
+ end
59
+ else
60
+ description = DESCRIPTORS[name]
61
+ udesc << self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
62
+ end
63
+ end
64
+ udesc
65
+ end
66
+
67
+ def self.openbabel_descriptors
68
+ descriptors OBDESCRIPTORS
69
+ end
70
+
71
+ def self.cdk_descriptors
72
+ descriptors CDKDESCRIPTORS
73
+ end
74
+
75
+ def self.joelib_descriptors
76
+ descriptors JOELIBDESCRIPTORS
77
+ end
78
+
79
+ def calculate compound
80
+ result = send library.downcase,descriptor,compound
81
+ result[self.name]
82
+ end
83
+
84
+ def openbabel descriptor, compound
85
+ obdescriptor = OpenBabel::OBDescriptor.find_type descriptor
86
+ obmol = OpenBabel::OBMol.new
87
+ obconversion = OpenBabel::OBConversion.new
88
+ obconversion.set_in_format 'smi'
89
+ obconversion.read_string obmol, compound.smiles
90
+ {"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))}
91
+ end
92
+
93
+ def cdk descriptor, compound
94
+ java_descriptor "cdk", descriptor, compound
95
+ end
96
+
97
+ def joelib descriptor, compound
98
+ java_descriptor "joelib", descriptor, compound
99
+ end
100
+
101
+ private
102
+
103
+ def java_descriptor lib, descriptor, compound
104
+
105
+ sdf_3d = "/tmp/#{SecureRandom.uuid}.sdf"
106
+ File.open(sdf_3d,"w+"){|f| f.print compound.sdf}
107
+
108
+ # use java system call (rjb blocks within tasks)
109
+ # use Tempfiles to avoid "Argument list too long" error
110
+ case lib
111
+ when "cdk"
112
+ `java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{sdf_3d} #{descriptor}`
113
+ when "joelib"
114
+ `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{sdf_3d} #{descriptor}`
115
+ end
116
+ result = YAML.load_file("#{sdf_3d}#{lib}.yaml").first
117
+ result.keys.each{|k| result[k] = result.delete(k)}
118
+ result
119
+ end
120
+
121
+ def fix_value val
122
+ val = val.first if val.is_a? Array and val.size == 1
123
+ val = nil if val == "NaN"
124
+ if val.numeric?
125
+ val = Float(val)
126
+ val = nil if val.nan? or val.infinite?
127
+ end
128
+ val
129
+ end
130
+
131
+ end
132
+
133
+ end
data/lib/regression.rb CHANGED
@@ -1,223 +1,151 @@
1
- # TODO install R packages kernlab, caret, doMC, class, e1071
2
-
3
-
4
- # log transform activities (create new dataset)
5
- # scale, normalize features, might not be necessary
6
- # http://stats.stackexchange.com/questions/19216/variables-are-often-adjusted-e-g-standardised-before-making-a-model-when-is
7
- # http://stats.stackexchange.com/questions/7112/when-and-how-to-use-standardized-explanatory-variables-in-linear-regression
8
- # zero-order correlation and the semi-partial correlation
9
- # seems to be necessary for svm
10
- # http://stats.stackexchange.com/questions/77876/why-would-scaling-features-decrease-svm-performance?lq=1
11
- # http://stackoverflow.com/questions/15436367/svm-scaling-input-values
12
- # use lasso or elastic net??
13
- # select relevant features
14
- # remove features with a single value
15
- # remove correlated features
16
- # remove features not correlated with endpoint
17
1
  module OpenTox
18
2
  module Algorithm
19
3
 
20
4
  class Regression
21
5
 
22
- def self.weighted_average neighbors
6
+ def self.local_weighted_average compound, params
23
7
  weighted_sum = 0.0
24
8
  sim_sum = 0.0
9
+ neighbors = params[:neighbors]
25
10
  neighbors.each do |row|
26
- n,sim,acts = row
27
- acts.each do |act|
28
- weighted_sum += sim*Math.log10(act)
29
- sim_sum += sim
11
+ sim = row["tanimoto"]
12
+ if row["features"][params[:prediction_feature_id].to_s]
13
+ row["features"][params[:prediction_feature_id].to_s].each do |act|
14
+ weighted_sum += sim*Math.log10(act)
15
+ sim_sum += sim
16
+ end
30
17
  end
31
18
  end
32
- confidence = sim_sum/neighbors.size.to_f
33
19
  sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
34
- {:value => prediction,:confidence => confidence}
20
+ {:value => prediction}
35
21
  end
36
22
 
37
- def self.weighted_average_with_relevant_fingerprints neighbors
38
- weighted_sum = 0.0
39
- sim_sum = 0.0
40
- fingerprint_features = []
41
- neighbors.each do |row|
42
- n,sim,acts = row
43
- neighbor = Compound.find n
44
- fingerprint_features += neighbor.fp4
45
- end
46
- fingerprint_features.uniq!
47
- p fingerprint_features
48
- =begin
49
- p n
50
- acts.each do |act|
51
- weighted_sum += sim*Math.log10(act)
52
- sim_sum += sim
23
+ # TODO explicit neighbors, also for physchem
24
+ def self.local_fingerprint_regression compound, params, method='pls'#, method_params="sigma=0.05"
25
+ neighbors = params[:neighbors]
26
+ return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
27
+ activities = []
28
+ fingerprints = {}
29
+ weights = []
30
+ fingerprint_ids = neighbors.collect{|row| Compound.find(row["_id"]).fingerprint}.flatten.uniq.sort
31
+
32
+ neighbors.each_with_index do |row,i|
33
+ neighbor = Compound.find row["_id"]
34
+ fingerprint = neighbor.fingerprint
35
+ if row["features"][params[:prediction_feature_id].to_s]
36
+ row["features"][params[:prediction_feature_id].to_s].each do |act|
37
+ activities << Math.log10(act)
38
+ weights << row["tanimoto"]
39
+ fingerprint_ids.each_with_index do |id,j|
40
+ fingerprints[id] ||= []
41
+ fingerprints[id] << fingerprint.include?(id)
42
+ end
43
+ end
53
44
  end
54
45
  end
55
- =end
56
- confidence = sim_sum/neighbors.size.to_f
57
- sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
58
- {:value => prediction,:confidence => confidence}
59
- end
60
46
 
61
- # Local support vector regression from neighbors
62
- # @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
63
- # @return [Numeric] A prediction value.
64
- def self.local_svm_regression neighbors, params={:min_train_performance => 0.1}
47
+ variables = []
48
+ data_frame = [activities]
49
+ fingerprints.each do |k,v|
50
+ unless v.uniq.size == 1
51
+ data_frame << v.collect{|m| m ? "T" : "F"}
52
+ variables << k
53
+ end
54
+ end
65
55
 
66
- confidence = 0.0
67
- prediction = nil
56
+ if variables.empty?
57
+ result = local_weighted_average(compound, params)
58
+ result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
59
+ return result
68
60
 
69
- $logger.debug "Local SVM."
70
- props = neighbors.collect{|row| row[3] }
71
- neighbors.shift
72
- activities = neighbors.collect{|n| n[2]}
73
- prediction = self.local_svm_prop( props, activities, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
74
- prediction = nil if (!prediction.nil? && prediction.infinite?)
75
- $logger.debug "Prediction: '#{prediction}' ('#{prediction.class}')."
76
- if prediction
77
- confidence = get_confidence({:sims => neighbors.collect{|n| n[1]}, :activities => activities})
78
61
  else
79
- confidence = nil if prediction.nil?
62
+ compound_features = variables.collect{|f| compound.fingerprint.include?(f) ? "T" : "F"}
63
+ prediction = r_model_prediction method, data_frame, variables, weights, compound_features
64
+ if prediction.nil? or prediction[:value].nil?
65
+ prediction = local_weighted_average(compound, params)
66
+ prediction[:warning] = "Could not create local PLS model. Using weighted average of similar compounds."
67
+ return prediction
68
+ else
69
+ prediction[:prediction_interval] = [10**(prediction[:value]-1.96*prediction[:rmse]), 10**(prediction[:value]+1.96*prediction[:rmse])]
70
+ prediction[:value] = 10**prediction[:value]
71
+ prediction[:rmse] = 10**prediction[:rmse]
72
+ prediction
73
+ end
80
74
  end
81
- [prediction, confidence]
82
-
75
+
83
76
  end
84
77
 
78
+ def self.local_physchem_regression compound, params, method="plsr"#, method_params="ncomp = 4"
79
+
80
+ neighbors = params[:neighbors]
81
+ return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
82
+ return {:value => neighbors.first["features"][params[:prediction_feature_id]], :confidence => nil, :warning => "Only one similar compound in the training set"} unless neighbors.size > 1
83
+
84
+ activities = []
85
+ weights = []
86
+ physchem = {}
87
+
88
+ neighbors.each_with_index do |row,i|
89
+ neighbor = Compound.find row["_id"]
90
+ if row["features"][params[:prediction_feature_id].to_s]
91
+ row["features"][params[:prediction_feature_id].to_s].each do |act|
92
+ activities << Math.log10(act)
93
+ weights << row["tanimoto"] # TODO cosine ?
94
+ neighbor.physchem.each do |pid,v| # insert physchem only if there is an activity
95
+ physchem[pid] ||= []
96
+ physchem[pid] << v
97
+ end
98
+ end
99
+ end
100
+ end
85
101
 
86
- # Local support vector prediction from neighbors.
87
- # Uses propositionalized setting.
88
- # Not to be called directly (use local_svm_regression or local_svm_classification).
89
- # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
90
- # @param [Array] activities, activities for neighbors.
91
- # @param [Float] min_train_performance, parameter to control censoring
92
- # @return [Numeric] A prediction value.
93
- def self.local_svm_prop(props, activities, min_train_performance)
102
+ # remove properties with a single value
103
+ physchem.each do |pid,v|
104
+ physchem.delete(pid) if v.uniq.size <= 1
105
+ end
94
106
 
95
- $logger.debug "Local SVM (Propositionalization / Kernlab Kernel)."
96
- n_prop = props[1..-1] # is a matrix, i.e. two nested Arrays.
97
- q_prop = props[0] # is an Array.
107
+ if physchem.empty?
108
+ result = local_weighted_average(compound, params)
109
+ result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
110
+ return result
98
111
 
99
- prediction = nil
100
- if activities.uniq.size == 1
101
- prediction = activities[0]
102
112
  else
103
- t = Time.now
104
- #$logger.debug gram_matrix.to_yaml
105
- #@r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests
106
- @r = Rserve::Connection.new#(true,false) # global R instance leads to Socket errors after a large number of requests
107
- rs = []
108
- ["caret", "doMC", "class"].each do |lib|
109
- #raise "failed to load R-package #{lib}" unless @r.void_eval "suppressPackageStartupMessages(library('#{lib}'))"
110
- rs << "suppressPackageStartupMessages(library('#{lib}'))"
113
+ data_frame = [activities] + physchem.keys.collect { |pid| physchem[pid] }
114
+ prediction = r_model_prediction method, data_frame, physchem.keys, weights, physchem.keys.collect{|pid| compound.physchem[pid]}
115
+ if prediction.nil?
116
+ prediction = local_weighted_average(compound, params)
117
+ prediction[:warning] = "Could not create local PLS model. Using weighted average of similar compounds."
118
+ return prediction
119
+ else
120
+ prediction[:value] = 10**prediction[:value]
121
+ prediction
111
122
  end
112
- #@r.eval "registerDoMC()" # switch on parallel processing
113
- rs << "registerDoMC()" # switch on parallel processing
114
- #@r.eval "set.seed(1)"
115
- rs << "set.seed(1)"
116
- $logger.debug "Loading R packages: #{Time.now-t}"
117
- t = Time.now
118
- p n_prop
119
- begin
120
-
121
- # set data
122
- rs << "n_prop <- c(#{n_prop.flatten.join(',')})"
123
- rs << "n_prop <- c(#{n_prop.flatten.join(',')})"
124
- rs << "n_prop_x_size <- c(#{n_prop.size})"
125
- rs << "n_prop_y_size <- c(#{n_prop[0].size})"
126
- rs << "y <- c(#{activities.join(',')})"
127
- rs << "q_prop <- c(#{q_prop.join(',')})"
128
- rs << "y = matrix(y)"
129
- rs << "prop_matrix = matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=T)"
130
- rs << "q_prop = matrix(q_prop, 1, n_prop_y_size, byrow=T)"
131
-
132
- $logger.debug "Setting R data: #{Time.now-t}"
133
- t = Time.now
134
- # prepare data
135
- rs << "
136
- weights=NULL
137
- if (!(class(y) == 'numeric')) {
138
- y = factor(y)
139
- weights=unlist(as.list(prop.table(table(y))))
140
- weights=(weights-1)^2
141
- }
142
- "
143
-
144
- rs << "
145
- rem = nearZeroVar(prop_matrix)
146
- if (length(rem) > 0) {
147
- prop_matrix = prop_matrix[,-rem,drop=F]
148
- q_prop = q_prop[,-rem,drop=F]
149
- }
150
- rem = findCorrelation(cor(prop_matrix))
151
- if (length(rem) > 0) {
152
- prop_matrix = prop_matrix[,-rem,drop=F]
153
- q_prop = q_prop[,-rem,drop=F]
154
- }
155
- "
156
-
157
- #p @r.eval("y").to_ruby
158
- #p "weights"
159
- #p @r.eval("weights").to_ruby
160
- $logger.debug "Preparing R data: #{Time.now-t}"
161
- t = Time.now
162
- # model + support vectors
163
- #train_success = @r.eval <<-EOR
164
- rs << '
165
- model = train(prop_matrix,y,
166
- method="svmRadial",
167
- preProcess=c("center", "scale"),
168
- class.weights=weights,
169
- trControl=trainControl(method="LGOCV",number=10),
170
- tuneLength=8
171
- )
172
- perf = ifelse ( class(y)!="numeric", max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
173
- '
174
- File.open("/tmp/r.r","w+"){|f| f.puts rs.join("\n")}
175
- p rs.join("\n")
176
- p `Rscript /tmp/r.r`
177
- =begin
178
- @r.void_eval <<-EOR
179
- model = train(prop_matrix,y,
180
- method="svmRadial",
181
- #preProcess=c("center", "scale"),
182
- #class.weights=weights,
183
- #trControl=trainControl(method="LGOCV",number=10),
184
- #tuneLength=8
185
- )
186
- perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
187
- EOR
188
- =end
189
-
190
- $logger.debug "Creating R SVM model: #{Time.now-t}"
191
- t = Time.now
192
- if train_success
193
- # prediction
194
- @r.eval "predict(model,q_prop); p = predict(model,q_prop)" # kernlab bug: predict twice
195
- #@r.eval "p = predict(model,q_prop)" # kernlab bug: predict twice
196
- @r.eval "if (class(y)!='numeric') p = as.character(p)"
197
- prediction = @r.p
123
+ end
124
+
125
+ end
198
126
 
199
- # censoring
200
- prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance.to_f )
201
- prediction = nil if prediction =~ /NA/
202
- $logger.debug "Performance: '#{sprintf("%.2f", @r.perf)}'"
203
- else
204
- $logger.debug "Model creation failed."
205
- prediction = nil
206
- end
207
- $logger.debug "R Prediction: #{Time.now-t}"
208
- rescue Exception => e
209
- $logger.debug "#{e.class}: #{e.message}"
210
- $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
211
- ensure
212
- #puts @r.inspect
213
- #TODO: broken pipe
214
- #@r.quit # free R
215
- end
127
+ def self.r_model_prediction method, training_data, training_features, training_weights, query_feature_values
128
+ R.assign "weights", training_weights
129
+ r_data_frame = "data.frame(#{training_data.collect{|r| "c(#{r.join(',')})"}.join(', ')})"
130
+ R.eval "data <- #{r_data_frame}"
131
+ R.assign "features", training_features
132
+ R.eval "names(data) <- append(c('activities'),features)" #
133
+ begin
134
+ R.eval "model <- train(activities ~ ., data = data, method = '#{method}')"
135
+ rescue
136
+ return nil
216
137
  end
217
- prediction
138
+ R.eval "fingerprint <- data.frame(rbind(c(#{query_feature_values.join ','})))"
139
+ R.eval "names(fingerprint) <- features"
140
+ R.eval "prediction <- predict(model,fingerprint)"
141
+ {
142
+ :value => R.eval("prediction").to_f,
143
+ :rmse => R.eval("getTrainPerf(model)$TrainRMSE").to_f,
144
+ :r_squared => R.eval("getTrainPerf(model)$TrainRsquared").to_f,
145
+ }
218
146
  end
219
- end
220
147
 
148
+ end
221
149
  end
222
150
  end
223
151
 
@@ -26,15 +26,14 @@ module OpenTox
26
26
  define_singleton_method method do |uri,payload={},headers={},waiting_task=nil|
27
27
 
28
28
  # check input
29
- bad_request_error "Headers are not a hash: #{headers.inspect}", uri unless headers==nil or headers.is_a?(Hash)
29
+ bad_request_error "Headers are not a hash: #{headers.inspect} for #{uri}." unless headers==nil or headers.is_a?(Hash)
30
30
  headers[:subjectid] ||= @@subjectid
31
- bad_request_error "Invalid URI: '#{uri}'", uri unless URI.valid? uri
32
- #resource_not_found_error "URI '#{uri}' not found.", uri unless URI.accessible?(uri, @subjectid) unless URI.ssl?(uri)
31
+ bad_request_error "Invalid URI: '#{uri}'" unless URI.valid? uri
33
32
  # make sure that no header parameters are set in the payload
34
33
  [:accept,:content_type,:subjectid].each do |header|
35
34
  if defined? $aa || URI(uri).host == URI($aa[:uri]).host
36
35
  else
37
- bad_request_error "#{header} should be submitted in the headers", uri if payload and payload.is_a?(Hash) and payload[header]
36
+ bad_request_error "#{header} should be submitted in the headers of URI: #{uri}" if payload and payload.is_a?(Hash) and payload[header]
38
37
  end
39
38
  end
40
39
 
@@ -72,7 +71,7 @@ module OpenTox
72
71
  msg = "Could not parse error response from rest call '#{method}' to '#{uri}':\n#{response}"
73
72
  cause = nil
74
73
  end
75
- Object.method(error[:method]).call msg, uri, cause # call error method
74
+ Object.method(error[:method]).call "#{msg}, #{uri}, #{cause}" # call error method
76
75
  else
77
76
  response
78
77
  end
@@ -12,7 +12,7 @@ UNIQUEDESCRIPTORS = [
12
12
  "Openbabel.HBA1", #Number of Hydrogen Bond Acceptors 1 (JoelLib)
13
13
  "Openbabel.HBA2", #Number of Hydrogen Bond Acceptors 2 (JoelLib)
14
14
  "Openbabel.HBD", #Number of Hydrogen Bond Donors (JoelLib)
15
- "Openbabel.L5", #Lipinski Rule of Five
15
+ #"Openbabe..L5", #Lipinski Rule of Five# TODO Openbabel.L5 returns nil, investigate!!!
16
16
  "Openbabel.logP", #octanol/water partition coefficient
17
17
  "Openbabel.MP", #Melting point
18
18
  "Openbabel.MR", #molar refractivity
@@ -24,7 +24,7 @@ UNIQUEDESCRIPTORS = [
24
24
  "Cdk.ALOGP", #Calculates atom additive logP and molar refractivity values as described by Ghose and Crippen and
25
25
  "Cdk.APol", #Descriptor that calculates the sum of the atomic polarizabilities (including implicit hydrogens).
26
26
  "Cdk.AcidicGroupCount", #Returns the number of acidic groups.
27
- "Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
27
+ #"Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
28
28
  #"Cdk.AromaticAtomsCount", #Descriptor based on the number of aromatic atoms of a molecule.
29
29
  #"Cdk.AromaticBondsCount", #Descriptor based on the number of aromatic bonds of a molecule.
30
30
  #"Cdk.AtomCount", #Descriptor based on the number of atoms of a certain element type.
@@ -56,7 +56,7 @@ UNIQUEDESCRIPTORS = [
56
56
  "Cdk.LengthOverBreadth", #Calculates the ratio of length to breadth.
57
57
  "Cdk.LongestAliphaticChain", #Returns the number of atoms in the longest aliphatic chain
58
58
  "Cdk.MDE", #Evaluate molecular distance edge descriptors for C, N and O
59
- "Cdk.MannholdLogP", #Descriptor that calculates the LogP based on a simple equation using the number of carbons and hetero atoms .
59
+ #"Cdk.MannholdLogP", #Descriptor that calculates the LogP based on a simple equation using the number of carbons and hetero atoms .
60
60
  "Cdk.MomentOfInertia", #Descriptor that calculates the principal moments of inertia and ratios of the principal moments. Als calculates the radius of gyration.
61
61
  "Cdk.PetitjeanNumber", #Descriptor that calculates the Petitjean Number of a molecule.
62
62
  "Cdk.PetitjeanShapeIndex", #The topological and geometric shape indices described Petitjean and Bath et al. respectively. Both measure the anisotropy in a molecule.
@@ -75,7 +75,7 @@ UNIQUEDESCRIPTORS = [
75
75
  "Joelib.count.NumberOfP", #no description available
76
76
  "Joelib.count.NumberOfO", #no description available
77
77
  "Joelib.count.NumberOfN", #no description available
78
- #"Joelib.count.AromaticBonds", #no description available
78
+ #"Joeli#.count.AromaticBonds", #no description available
79
79
  "Joelib.count.NumberOfI", #no description available
80
80
  "Joelib.count.NumberOfF", #no description available
81
81
  "Joelib.count.NumberOfC", #no description available
@@ -91,7 +91,7 @@ UNIQUEDESCRIPTORS = [
91
91
  "Joelib.GeometricalShapeCoefficient", #no description available
92
92
  #"Joelib.MolecularWeight", #no description available
93
93
  "Joelib.FractionRotatableBonds", #no description available
94
- #"Joelib.count.HBD2", #no description available
94
+ #"Joeli..count.HBD2", #no description available
95
95
  #"Joelib.count.HBD1", #no description available
96
96
  "Joelib.LogP", #no description available
97
97
  "Joelib.GraphShapeCoefficient", #no description available
@@ -116,5 +116,4 @@ UNIQUEDESCRIPTORS = [
116
116
  "Joelib.count.SOGroups", #no description available
117
117
  "Joelib.TopologicalDiameter", #no description available
118
118
  "Joelib.count.NumberOfHal", #no description available
119
-
120
- ].sort
119
+ ]