lazar 0.0.7 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/README.md +2 -1
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +15 -76
  6. data/ext/lazar/rinstall.R +9 -0
  7. data/lazar.gemspec +7 -7
  8. data/lib/classification.rb +5 -78
  9. data/lib/compound.rb +201 -44
  10. data/lib/crossvalidation.rb +224 -121
  11. data/lib/dataset.rb +83 -93
  12. data/lib/error.rb +1 -1
  13. data/lib/experiment.rb +99 -0
  14. data/lib/feature.rb +2 -54
  15. data/lib/lazar.rb +47 -34
  16. data/lib/leave-one-out-validation.rb +205 -0
  17. data/lib/model.rb +131 -76
  18. data/lib/opentox.rb +2 -2
  19. data/lib/overwrite.rb +37 -0
  20. data/lib/physchem.rb +133 -0
  21. data/lib/regression.rb +117 -189
  22. data/lib/rest-client-wrapper.rb +4 -5
  23. data/lib/unique_descriptors.rb +6 -7
  24. data/lib/validation.rb +63 -69
  25. data/test/all.rb +2 -2
  26. data/test/classification.rb +41 -0
  27. data/test/compound.rb +116 -7
  28. data/test/data/LOAEL_log_mg_corrected_smiles.csv +567 -567
  29. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +566 -566
  30. data/test/data/LOAEL_mmol_corrected_smiles.csv +568 -0
  31. data/test/data/batch_prediction.csv +25 -0
  32. data/test/data/batch_prediction_inchi_small.csv +4 -0
  33. data/test/data/batch_prediction_smiles_small.csv +4 -0
  34. data/test/data/hamster_carcinogenicity.json +3 -0
  35. data/test/data/loael.csv +568 -0
  36. data/test/dataset-long.rb +5 -8
  37. data/test/dataset.rb +31 -11
  38. data/test/default_environment.rb +11 -0
  39. data/test/descriptor.rb +26 -41
  40. data/test/error.rb +1 -3
  41. data/test/experiment.rb +301 -0
  42. data/test/feature.rb +22 -10
  43. data/test/lazar-long.rb +43 -23
  44. data/test/lazar-physchem-short.rb +19 -16
  45. data/test/prediction_models.rb +20 -0
  46. data/test/regression.rb +43 -0
  47. data/test/setup.rb +3 -1
  48. data/test/test_environment.rb +10 -0
  49. data/test/validation.rb +92 -26
  50. metadata +64 -38
  51. data/lib/SMARTS_InteLigand.txt +0 -983
  52. data/lib/bbrc.rb +0 -165
  53. data/lib/descriptor.rb +0 -247
  54. data/lib/neighbor.rb +0 -25
  55. data/lib/similarity.rb +0 -58
  56. data/mongoid.yml +0 -8
  57. data/test/descriptor-long.rb +0 -26
  58. data/test/fminer-long.rb +0 -38
  59. data/test/fminer.rb +0 -52
  60. data/test/lazar-fminer.rb +0 -50
  61. data/test/lazar-regression.rb +0 -27
data/lib/physchem.rb ADDED
@@ -0,0 +1,133 @@
1
+ module OpenTox
2
+
3
+ # Feature for physico-chemical descriptors
4
+ class PhysChem < NumericFeature
5
+
6
+ field :library, type: String
7
+ field :descriptor, type: String
8
+ field :description, type: String
9
+
10
+ JAVA_DIR = File.join(File.dirname(__FILE__),"..","java")
11
+ CDK_JAR = Dir[File.join(JAVA_DIR,"cdk-*jar")].last
12
+ JOELIB_JAR = File.join(JAVA_DIR,"joelib2.jar")
13
+ LOG4J_JAR = File.join(JAVA_DIR,"log4j.jar")
14
+ JMOL_JAR = File.join(JAVA_DIR,"Jmol.jar")
15
+
16
+ obexclude = ["cansmi","cansmiNS","formula","InChI","InChIKey","s","smarts","title","L5"]
17
+ OBDESCRIPTORS = Hash[OpenBabel::OBDescriptor.list_as_string("descriptors").split("\n").collect do |d|
18
+ name,description = d.split(/\s+/,2)
19
+ ["Openbabel."+name,description] unless obexclude.include? name
20
+ end.compact.sort{|a,b| a[0] <=> b[0]}]
21
+
22
+ cdkdescriptors = {}
23
+ CDK_DESCRIPTIONS = YAML.load(`java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptorInfo`)
24
+ CDK_DESCRIPTIONS.each do |d|
25
+ prefix="Cdk."+d[:java_class].split('.').last.sub(/Descriptor/,'')
26
+ d[:names].each { |name| cdkdescriptors[prefix+"."+name] = d[:description] }
27
+ end
28
+ CDKDESCRIPTORS = cdkdescriptors
29
+
30
+ # exclude Hashcode (not a physchem property) and GlobalTopologicalChargeIndex (Joelib bug)
31
+ joelibexclude = ["MoleculeHashcode","GlobalTopologicalChargeIndex"]
32
+ # strip Joelib messages from stdout
33
+ JOELIBDESCRIPTORS = Hash[YAML.load(`java -classpath #{JOELIB_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptorInfo | sed '0,/---/d'`).collect do |d|
34
+ name = d[:java_class].sub(/^joelib2.feature.types./,'')
35
+ ["Joelib."+name, "JOELIb does not provide meaningful descriptions, see java/JoelibDescriptors.java for details."] unless joelibexclude.include? name
36
+ end.compact.sort{|a,b| a[0] <=> b[0]}]
37
+
38
+ DESCRIPTORS = OBDESCRIPTORS.merge(CDKDESCRIPTORS.merge(JOELIBDESCRIPTORS))
39
+
40
+ require_relative "unique_descriptors.rb"
41
+
42
+ def self.descriptors desc=DESCRIPTORS
43
+ desc.collect do |name,description|
44
+ lib,desc = name.split('.',2)
45
+ self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
46
+ end
47
+ end
48
+
49
+ def self.unique_descriptors
50
+ udesc = []
51
+ UNIQUEDESCRIPTORS.each do |name|
52
+ lib,desc = name.split('.',2)
53
+ if lib == "Cdk"
54
+ CDK_DESCRIPTIONS.select{|d| desc == d[:java_class].split('.').last.sub('Descriptor','') }.first[:names].each do |n|
55
+ dname = "#{name}.#{n}"
56
+ description = DESCRIPTORS[dname]
57
+ udesc << self.find_or_create_by(:name => dname, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
58
+ end
59
+ else
60
+ description = DESCRIPTORS[name]
61
+ udesc << self.find_or_create_by(:name => name, :library => lib, :descriptor => desc, :description => description, :measured => false, :calculated => true, :numeric => true, :nominal => false)
62
+ end
63
+ end
64
+ udesc
65
+ end
66
+
67
+ def self.openbabel_descriptors
68
+ descriptors OBDESCRIPTORS
69
+ end
70
+
71
+ def self.cdk_descriptors
72
+ descriptors CDKDESCRIPTORS
73
+ end
74
+
75
+ def self.joelib_descriptors
76
+ descriptors JOELIBDESCRIPTORS
77
+ end
78
+
79
+ def calculate compound
80
+ result = send library.downcase,descriptor,compound
81
+ result[self.name]
82
+ end
83
+
84
+ def openbabel descriptor, compound
85
+ obdescriptor = OpenBabel::OBDescriptor.find_type descriptor
86
+ obmol = OpenBabel::OBMol.new
87
+ obconversion = OpenBabel::OBConversion.new
88
+ obconversion.set_in_format 'smi'
89
+ obconversion.read_string obmol, compound.smiles
90
+ {"#{library.capitalize}.#{descriptor}" => fix_value(obdescriptor.predict(obmol))}
91
+ end
92
+
93
+ def cdk descriptor, compound
94
+ java_descriptor "cdk", descriptor, compound
95
+ end
96
+
97
+ def joelib descriptor, compound
98
+ java_descriptor "joelib", descriptor, compound
99
+ end
100
+
101
+ private
102
+
103
+ def java_descriptor lib, descriptor, compound
104
+
105
+ sdf_3d = "/tmp/#{SecureRandom.uuid}.sdf"
106
+ File.open(sdf_3d,"w+"){|f| f.print compound.sdf}
107
+
108
+ # use java system call (rjb blocks within tasks)
109
+ # use Tempfiles to avoid "Argument list too long" error
110
+ case lib
111
+ when "cdk"
112
+ `java -classpath #{CDK_JAR}:#{JAVA_DIR} CdkDescriptors #{sdf_3d} #{descriptor}`
113
+ when "joelib"
114
+ `java -classpath #{JOELIB_JAR}:#{JMOL_JAR}:#{LOG4J_JAR}:#{JAVA_DIR} JoelibDescriptors #{sdf_3d} #{descriptor}`
115
+ end
116
+ result = YAML.load_file("#{sdf_3d}#{lib}.yaml").first
117
+ result.keys.each{|k| result[k] = result.delete(k)}
118
+ result
119
+ end
120
+
121
+ def fix_value val
122
+ val = val.first if val.is_a? Array and val.size == 1
123
+ val = nil if val == "NaN"
124
+ if val.numeric?
125
+ val = Float(val)
126
+ val = nil if val.nan? or val.infinite?
127
+ end
128
+ val
129
+ end
130
+
131
+ end
132
+
133
+ end
data/lib/regression.rb CHANGED
@@ -1,223 +1,151 @@
1
- # TODO install R packages kernlab, caret, doMC, class, e1071
2
-
3
-
4
- # log transform activities (create new dataset)
5
- # scale, normalize features, might not be necessary
6
- # http://stats.stackexchange.com/questions/19216/variables-are-often-adjusted-e-g-standardised-before-making-a-model-when-is
7
- # http://stats.stackexchange.com/questions/7112/when-and-how-to-use-standardized-explanatory-variables-in-linear-regression
8
- # zero-order correlation and the semi-partial correlation
9
- # seems to be necessary for svm
10
- # http://stats.stackexchange.com/questions/77876/why-would-scaling-features-decrease-svm-performance?lq=1
11
- # http://stackoverflow.com/questions/15436367/svm-scaling-input-values
12
- # use lasso or elastic net??
13
- # select relevant features
14
- # remove features with a single value
15
- # remove correlated features
16
- # remove features not correlated with endpoint
17
1
  module OpenTox
18
2
  module Algorithm
19
3
 
20
4
  class Regression
21
5
 
22
- def self.weighted_average neighbors
6
+ def self.local_weighted_average compound, params
23
7
  weighted_sum = 0.0
24
8
  sim_sum = 0.0
9
+ neighbors = params[:neighbors]
25
10
  neighbors.each do |row|
26
- n,sim,acts = row
27
- acts.each do |act|
28
- weighted_sum += sim*Math.log10(act)
29
- sim_sum += sim
11
+ sim = row["tanimoto"]
12
+ if row["features"][params[:prediction_feature_id].to_s]
13
+ row["features"][params[:prediction_feature_id].to_s].each do |act|
14
+ weighted_sum += sim*Math.log10(act)
15
+ sim_sum += sim
16
+ end
30
17
  end
31
18
  end
32
- confidence = sim_sum/neighbors.size.to_f
33
19
  sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
34
- {:value => prediction,:confidence => confidence}
20
+ {:value => prediction}
35
21
  end
36
22
 
37
- def self.weighted_average_with_relevant_fingerprints neighbors
38
- weighted_sum = 0.0
39
- sim_sum = 0.0
40
- fingerprint_features = []
41
- neighbors.each do |row|
42
- n,sim,acts = row
43
- neighbor = Compound.find n
44
- fingerprint_features += neighbor.fp4
45
- end
46
- fingerprint_features.uniq!
47
- p fingerprint_features
48
- =begin
49
- p n
50
- acts.each do |act|
51
- weighted_sum += sim*Math.log10(act)
52
- sim_sum += sim
23
+ # TODO explicit neighbors, also for physchem
24
+ def self.local_fingerprint_regression compound, params, method='pls'#, method_params="sigma=0.05"
25
+ neighbors = params[:neighbors]
26
+ return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
27
+ activities = []
28
+ fingerprints = {}
29
+ weights = []
30
+ fingerprint_ids = neighbors.collect{|row| Compound.find(row["_id"]).fingerprint}.flatten.uniq.sort
31
+
32
+ neighbors.each_with_index do |row,i|
33
+ neighbor = Compound.find row["_id"]
34
+ fingerprint = neighbor.fingerprint
35
+ if row["features"][params[:prediction_feature_id].to_s]
36
+ row["features"][params[:prediction_feature_id].to_s].each do |act|
37
+ activities << Math.log10(act)
38
+ weights << row["tanimoto"]
39
+ fingerprint_ids.each_with_index do |id,j|
40
+ fingerprints[id] ||= []
41
+ fingerprints[id] << fingerprint.include?(id)
42
+ end
43
+ end
53
44
  end
54
45
  end
55
- =end
56
- confidence = sim_sum/neighbors.size.to_f
57
- sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
58
- {:value => prediction,:confidence => confidence}
59
- end
60
46
 
61
- # Local support vector regression from neighbors
62
- # @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
63
- # @return [Numeric] A prediction value.
64
- def self.local_svm_regression neighbors, params={:min_train_performance => 0.1}
47
+ variables = []
48
+ data_frame = [activities]
49
+ fingerprints.each do |k,v|
50
+ unless v.uniq.size == 1
51
+ data_frame << v.collect{|m| m ? "T" : "F"}
52
+ variables << k
53
+ end
54
+ end
65
55
 
66
- confidence = 0.0
67
- prediction = nil
56
+ if variables.empty?
57
+ result = local_weighted_average(compound, params)
58
+ result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
59
+ return result
68
60
 
69
- $logger.debug "Local SVM."
70
- props = neighbors.collect{|row| row[3] }
71
- neighbors.shift
72
- activities = neighbors.collect{|n| n[2]}
73
- prediction = self.local_svm_prop( props, activities, params[:min_train_performance]) # params[:props].nil? signals non-prop setting
74
- prediction = nil if (!prediction.nil? && prediction.infinite?)
75
- $logger.debug "Prediction: '#{prediction}' ('#{prediction.class}')."
76
- if prediction
77
- confidence = get_confidence({:sims => neighbors.collect{|n| n[1]}, :activities => activities})
78
61
  else
79
- confidence = nil if prediction.nil?
62
+ compound_features = variables.collect{|f| compound.fingerprint.include?(f) ? "T" : "F"}
63
+ prediction = r_model_prediction method, data_frame, variables, weights, compound_features
64
+ if prediction.nil? or prediction[:value].nil?
65
+ prediction = local_weighted_average(compound, params)
66
+ prediction[:warning] = "Could not create local PLS model. Using weighted average of similar compounds."
67
+ return prediction
68
+ else
69
+ prediction[:prediction_interval] = [10**(prediction[:value]-1.96*prediction[:rmse]), 10**(prediction[:value]+1.96*prediction[:rmse])]
70
+ prediction[:value] = 10**prediction[:value]
71
+ prediction[:rmse] = 10**prediction[:rmse]
72
+ prediction
73
+ end
80
74
  end
81
- [prediction, confidence]
82
-
75
+
83
76
  end
84
77
 
78
+ def self.local_physchem_regression compound, params, method="plsr"#, method_params="ncomp = 4"
79
+
80
+ neighbors = params[:neighbors]
81
+ return {:value => nil, :confidence => nil, :warning => "No similar compounds in the training data"} unless neighbors.size > 0
82
+ return {:value => neighbors.first["features"][params[:prediction_feature_id]], :confidence => nil, :warning => "Only one similar compound in the training set"} unless neighbors.size > 1
83
+
84
+ activities = []
85
+ weights = []
86
+ physchem = {}
87
+
88
+ neighbors.each_with_index do |row,i|
89
+ neighbor = Compound.find row["_id"]
90
+ if row["features"][params[:prediction_feature_id].to_s]
91
+ row["features"][params[:prediction_feature_id].to_s].each do |act|
92
+ activities << Math.log10(act)
93
+ weights << row["tanimoto"] # TODO cosine ?
94
+ neighbor.physchem.each do |pid,v| # insert physchem only if there is an activity
95
+ physchem[pid] ||= []
96
+ physchem[pid] << v
97
+ end
98
+ end
99
+ end
100
+ end
85
101
 
86
- # Local support vector prediction from neighbors.
87
- # Uses propositionalized setting.
88
- # Not to be called directly (use local_svm_regression or local_svm_classification).
89
- # @param [Array] props, propositionalization of neighbors and query structure e.g. [ Array_for_q, two-nested-Arrays_for_n ]
90
- # @param [Array] activities, activities for neighbors.
91
- # @param [Float] min_train_performance, parameter to control censoring
92
- # @return [Numeric] A prediction value.
93
- def self.local_svm_prop(props, activities, min_train_performance)
102
+ # remove properties with a single value
103
+ physchem.each do |pid,v|
104
+ physchem.delete(pid) if v.uniq.size <= 1
105
+ end
94
106
 
95
- $logger.debug "Local SVM (Propositionalization / Kernlab Kernel)."
96
- n_prop = props[1..-1] # is a matrix, i.e. two nested Arrays.
97
- q_prop = props[0] # is an Array.
107
+ if physchem.empty?
108
+ result = local_weighted_average(compound, params)
109
+ result[:warning] = "No variables for regression model. Using weighted average of similar compounds."
110
+ return result
98
111
 
99
- prediction = nil
100
- if activities.uniq.size == 1
101
- prediction = activities[0]
102
112
  else
103
- t = Time.now
104
- #$logger.debug gram_matrix.to_yaml
105
- #@r = RinRuby.new(true,false) # global R instance leads to Socket errors after a large number of requests
106
- @r = Rserve::Connection.new#(true,false) # global R instance leads to Socket errors after a large number of requests
107
- rs = []
108
- ["caret", "doMC", "class"].each do |lib|
109
- #raise "failed to load R-package #{lib}" unless @r.void_eval "suppressPackageStartupMessages(library('#{lib}'))"
110
- rs << "suppressPackageStartupMessages(library('#{lib}'))"
113
+ data_frame = [activities] + physchem.keys.collect { |pid| physchem[pid] }
114
+ prediction = r_model_prediction method, data_frame, physchem.keys, weights, physchem.keys.collect{|pid| compound.physchem[pid]}
115
+ if prediction.nil?
116
+ prediction = local_weighted_average(compound, params)
117
+ prediction[:warning] = "Could not create local PLS model. Using weighted average of similar compounds."
118
+ return prediction
119
+ else
120
+ prediction[:value] = 10**prediction[:value]
121
+ prediction
111
122
  end
112
- #@r.eval "registerDoMC()" # switch on parallel processing
113
- rs << "registerDoMC()" # switch on parallel processing
114
- #@r.eval "set.seed(1)"
115
- rs << "set.seed(1)"
116
- $logger.debug "Loading R packages: #{Time.now-t}"
117
- t = Time.now
118
- p n_prop
119
- begin
120
-
121
- # set data
122
- rs << "n_prop <- c(#{n_prop.flatten.join(',')})"
123
- rs << "n_prop <- c(#{n_prop.flatten.join(',')})"
124
- rs << "n_prop_x_size <- c(#{n_prop.size})"
125
- rs << "n_prop_y_size <- c(#{n_prop[0].size})"
126
- rs << "y <- c(#{activities.join(',')})"
127
- rs << "q_prop <- c(#{q_prop.join(',')})"
128
- rs << "y = matrix(y)"
129
- rs << "prop_matrix = matrix(n_prop, n_prop_x_size, n_prop_y_size, byrow=T)"
130
- rs << "q_prop = matrix(q_prop, 1, n_prop_y_size, byrow=T)"
131
-
132
- $logger.debug "Setting R data: #{Time.now-t}"
133
- t = Time.now
134
- # prepare data
135
- rs << "
136
- weights=NULL
137
- if (!(class(y) == 'numeric')) {
138
- y = factor(y)
139
- weights=unlist(as.list(prop.table(table(y))))
140
- weights=(weights-1)^2
141
- }
142
- "
143
-
144
- rs << "
145
- rem = nearZeroVar(prop_matrix)
146
- if (length(rem) > 0) {
147
- prop_matrix = prop_matrix[,-rem,drop=F]
148
- q_prop = q_prop[,-rem,drop=F]
149
- }
150
- rem = findCorrelation(cor(prop_matrix))
151
- if (length(rem) > 0) {
152
- prop_matrix = prop_matrix[,-rem,drop=F]
153
- q_prop = q_prop[,-rem,drop=F]
154
- }
155
- "
156
-
157
- #p @r.eval("y").to_ruby
158
- #p "weights"
159
- #p @r.eval("weights").to_ruby
160
- $logger.debug "Preparing R data: #{Time.now-t}"
161
- t = Time.now
162
- # model + support vectors
163
- #train_success = @r.eval <<-EOR
164
- rs << '
165
- model = train(prop_matrix,y,
166
- method="svmRadial",
167
- preProcess=c("center", "scale"),
168
- class.weights=weights,
169
- trControl=trainControl(method="LGOCV",number=10),
170
- tuneLength=8
171
- )
172
- perf = ifelse ( class(y)!="numeric", max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
173
- '
174
- File.open("/tmp/r.r","w+"){|f| f.puts rs.join("\n")}
175
- p rs.join("\n")
176
- p `Rscript /tmp/r.r`
177
- =begin
178
- @r.void_eval <<-EOR
179
- model = train(prop_matrix,y,
180
- method="svmRadial",
181
- #preProcess=c("center", "scale"),
182
- #class.weights=weights,
183
- #trControl=trainControl(method="LGOCV",number=10),
184
- #tuneLength=8
185
- )
186
- perf = ifelse ( class(y)!='numeric', max(model$results$Accuracy), model$results[which.min(model$results$RMSE),]$Rsquared )
187
- EOR
188
- =end
189
-
190
- $logger.debug "Creating R SVM model: #{Time.now-t}"
191
- t = Time.now
192
- if train_success
193
- # prediction
194
- @r.eval "predict(model,q_prop); p = predict(model,q_prop)" # kernlab bug: predict twice
195
- #@r.eval "p = predict(model,q_prop)" # kernlab bug: predict twice
196
- @r.eval "if (class(y)!='numeric') p = as.character(p)"
197
- prediction = @r.p
123
+ end
124
+
125
+ end
198
126
 
199
- # censoring
200
- prediction = nil if ( @r.perf.nan? || @r.perf < min_train_performance.to_f )
201
- prediction = nil if prediction =~ /NA/
202
- $logger.debug "Performance: '#{sprintf("%.2f", @r.perf)}'"
203
- else
204
- $logger.debug "Model creation failed."
205
- prediction = nil
206
- end
207
- $logger.debug "R Prediction: #{Time.now-t}"
208
- rescue Exception => e
209
- $logger.debug "#{e.class}: #{e.message}"
210
- $logger.debug "Backtrace:\n\t#{e.backtrace.join("\n\t")}"
211
- ensure
212
- #puts @r.inspect
213
- #TODO: broken pipe
214
- #@r.quit # free R
215
- end
127
+ def self.r_model_prediction method, training_data, training_features, training_weights, query_feature_values
128
+ R.assign "weights", training_weights
129
+ r_data_frame = "data.frame(#{training_data.collect{|r| "c(#{r.join(',')})"}.join(', ')})"
130
+ R.eval "data <- #{r_data_frame}"
131
+ R.assign "features", training_features
132
+ R.eval "names(data) <- append(c('activities'),features)" #
133
+ begin
134
+ R.eval "model <- train(activities ~ ., data = data, method = '#{method}')"
135
+ rescue
136
+ return nil
216
137
  end
217
- prediction
138
+ R.eval "fingerprint <- data.frame(rbind(c(#{query_feature_values.join ','})))"
139
+ R.eval "names(fingerprint) <- features"
140
+ R.eval "prediction <- predict(model,fingerprint)"
141
+ {
142
+ :value => R.eval("prediction").to_f,
143
+ :rmse => R.eval("getTrainPerf(model)$TrainRMSE").to_f,
144
+ :r_squared => R.eval("getTrainPerf(model)$TrainRsquared").to_f,
145
+ }
218
146
  end
219
- end
220
147
 
148
+ end
221
149
  end
222
150
  end
223
151
 
@@ -26,15 +26,14 @@ module OpenTox
26
26
  define_singleton_method method do |uri,payload={},headers={},waiting_task=nil|
27
27
 
28
28
  # check input
29
- bad_request_error "Headers are not a hash: #{headers.inspect}", uri unless headers==nil or headers.is_a?(Hash)
29
+ bad_request_error "Headers are not a hash: #{headers.inspect} for #{uri}." unless headers==nil or headers.is_a?(Hash)
30
30
  headers[:subjectid] ||= @@subjectid
31
- bad_request_error "Invalid URI: '#{uri}'", uri unless URI.valid? uri
32
- #resource_not_found_error "URI '#{uri}' not found.", uri unless URI.accessible?(uri, @subjectid) unless URI.ssl?(uri)
31
+ bad_request_error "Invalid URI: '#{uri}'" unless URI.valid? uri
33
32
  # make sure that no header parameters are set in the payload
34
33
  [:accept,:content_type,:subjectid].each do |header|
35
34
  if defined? $aa || URI(uri).host == URI($aa[:uri]).host
36
35
  else
37
- bad_request_error "#{header} should be submitted in the headers", uri if payload and payload.is_a?(Hash) and payload[header]
36
+ bad_request_error "#{header} should be submitted in the headers of URI: #{uri}" if payload and payload.is_a?(Hash) and payload[header]
38
37
  end
39
38
  end
40
39
 
@@ -72,7 +71,7 @@ module OpenTox
72
71
  msg = "Could not parse error response from rest call '#{method}' to '#{uri}':\n#{response}"
73
72
  cause = nil
74
73
  end
75
- Object.method(error[:method]).call msg, uri, cause # call error method
74
+ Object.method(error[:method]).call "#{msg}, #{uri}, #{cause}" # call error method
76
75
  else
77
76
  response
78
77
  end
@@ -12,7 +12,7 @@ UNIQUEDESCRIPTORS = [
12
12
  "Openbabel.HBA1", #Number of Hydrogen Bond Acceptors 1 (JoelLib)
13
13
  "Openbabel.HBA2", #Number of Hydrogen Bond Acceptors 2 (JoelLib)
14
14
  "Openbabel.HBD", #Number of Hydrogen Bond Donors (JoelLib)
15
- "Openbabel.L5", #Lipinski Rule of Five
15
+ #"Openbabe..L5", #Lipinski Rule of Five# TODO Openbabel.L5 returns nil, investigate!!!
16
16
  "Openbabel.logP", #octanol/water partition coefficient
17
17
  "Openbabel.MP", #Melting point
18
18
  "Openbabel.MR", #molar refractivity
@@ -24,7 +24,7 @@ UNIQUEDESCRIPTORS = [
24
24
  "Cdk.ALOGP", #Calculates atom additive logP and molar refractivity values as described by Ghose and Crippen and
25
25
  "Cdk.APol", #Descriptor that calculates the sum of the atomic polarizabilities (including implicit hydrogens).
26
26
  "Cdk.AcidicGroupCount", #Returns the number of acidic groups.
27
- "Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
27
+ #"Cdk.AminoAcidCount", #Returns the number of amino acids found in the system
28
28
  #"Cdk.AromaticAtomsCount", #Descriptor based on the number of aromatic atoms of a molecule.
29
29
  #"Cdk.AromaticBondsCount", #Descriptor based on the number of aromatic bonds of a molecule.
30
30
  #"Cdk.AtomCount", #Descriptor based on the number of atoms of a certain element type.
@@ -56,7 +56,7 @@ UNIQUEDESCRIPTORS = [
56
56
  "Cdk.LengthOverBreadth", #Calculates the ratio of length to breadth.
57
57
  "Cdk.LongestAliphaticChain", #Returns the number of atoms in the longest aliphatic chain
58
58
  "Cdk.MDE", #Evaluate molecular distance edge descriptors for C, N and O
59
- "Cdk.MannholdLogP", #Descriptor that calculates the LogP based on a simple equation using the number of carbons and hetero atoms .
59
+ #"Cdk.MannholdLogP", #Descriptor that calculates the LogP based on a simple equation using the number of carbons and hetero atoms .
60
60
  "Cdk.MomentOfInertia", #Descriptor that calculates the principal moments of inertia and ratios of the principal moments. Als calculates the radius of gyration.
61
61
  "Cdk.PetitjeanNumber", #Descriptor that calculates the Petitjean Number of a molecule.
62
62
  "Cdk.PetitjeanShapeIndex", #The topological and geometric shape indices described Petitjean and Bath et al. respectively. Both measure the anisotropy in a molecule.
@@ -75,7 +75,7 @@ UNIQUEDESCRIPTORS = [
75
75
  "Joelib.count.NumberOfP", #no description available
76
76
  "Joelib.count.NumberOfO", #no description available
77
77
  "Joelib.count.NumberOfN", #no description available
78
- #"Joelib.count.AromaticBonds", #no description available
78
+ #"Joeli#.count.AromaticBonds", #no description available
79
79
  "Joelib.count.NumberOfI", #no description available
80
80
  "Joelib.count.NumberOfF", #no description available
81
81
  "Joelib.count.NumberOfC", #no description available
@@ -91,7 +91,7 @@ UNIQUEDESCRIPTORS = [
91
91
  "Joelib.GeometricalShapeCoefficient", #no description available
92
92
  #"Joelib.MolecularWeight", #no description available
93
93
  "Joelib.FractionRotatableBonds", #no description available
94
- #"Joelib.count.HBD2", #no description available
94
+ #"Joeli..count.HBD2", #no description available
95
95
  #"Joelib.count.HBD1", #no description available
96
96
  "Joelib.LogP", #no description available
97
97
  "Joelib.GraphShapeCoefficient", #no description available
@@ -116,5 +116,4 @@ UNIQUEDESCRIPTORS = [
116
116
  "Joelib.count.SOGroups", #no description available
117
117
  "Joelib.TopologicalDiameter", #no description available
118
118
  "Joelib.count.NumberOfHal", #no description available
119
-
120
- ].sort
119
+ ]