lazar 0.0.7 → 0.0.9

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/README.md +2 -1
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +15 -76
  6. data/ext/lazar/rinstall.R +9 -0
  7. data/lazar.gemspec +7 -7
  8. data/lib/classification.rb +5 -78
  9. data/lib/compound.rb +201 -44
  10. data/lib/crossvalidation.rb +224 -121
  11. data/lib/dataset.rb +83 -93
  12. data/lib/error.rb +1 -1
  13. data/lib/experiment.rb +99 -0
  14. data/lib/feature.rb +2 -54
  15. data/lib/lazar.rb +47 -34
  16. data/lib/leave-one-out-validation.rb +205 -0
  17. data/lib/model.rb +131 -76
  18. data/lib/opentox.rb +2 -2
  19. data/lib/overwrite.rb +37 -0
  20. data/lib/physchem.rb +133 -0
  21. data/lib/regression.rb +117 -189
  22. data/lib/rest-client-wrapper.rb +4 -5
  23. data/lib/unique_descriptors.rb +6 -7
  24. data/lib/validation.rb +63 -69
  25. data/test/all.rb +2 -2
  26. data/test/classification.rb +41 -0
  27. data/test/compound.rb +116 -7
  28. data/test/data/LOAEL_log_mg_corrected_smiles.csv +567 -567
  29. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +566 -566
  30. data/test/data/LOAEL_mmol_corrected_smiles.csv +568 -0
  31. data/test/data/batch_prediction.csv +25 -0
  32. data/test/data/batch_prediction_inchi_small.csv +4 -0
  33. data/test/data/batch_prediction_smiles_small.csv +4 -0
  34. data/test/data/hamster_carcinogenicity.json +3 -0
  35. data/test/data/loael.csv +568 -0
  36. data/test/dataset-long.rb +5 -8
  37. data/test/dataset.rb +31 -11
  38. data/test/default_environment.rb +11 -0
  39. data/test/descriptor.rb +26 -41
  40. data/test/error.rb +1 -3
  41. data/test/experiment.rb +301 -0
  42. data/test/feature.rb +22 -10
  43. data/test/lazar-long.rb +43 -23
  44. data/test/lazar-physchem-short.rb +19 -16
  45. data/test/prediction_models.rb +20 -0
  46. data/test/regression.rb +43 -0
  47. data/test/setup.rb +3 -1
  48. data/test/test_environment.rb +10 -0
  49. data/test/validation.rb +92 -26
  50. metadata +64 -38
  51. data/lib/SMARTS_InteLigand.txt +0 -983
  52. data/lib/bbrc.rb +0 -165
  53. data/lib/descriptor.rb +0 -247
  54. data/lib/neighbor.rb +0 -25
  55. data/lib/similarity.rb +0 -58
  56. data/mongoid.yml +0 -8
  57. data/test/descriptor-long.rb +0 -26
  58. data/test/fminer-long.rb +0 -38
  59. data/test/fminer.rb +0 -52
  60. data/test/lazar-fminer.rb +0 -50
  61. data/test/lazar-regression.rb +0 -27
data/test/dataset.rb CHANGED
@@ -8,10 +8,22 @@ class DatasetTest < MiniTest::Test
8
8
  d1 = Dataset.new
9
9
  d1.save
10
10
  datasets = Dataset.all
11
- assert_equal Dataset, datasets.first.class
11
+ assert datasets.first.is_a?(Dataset), "#{datasets.first} is not a Dataset."
12
12
  d1.delete
13
13
  end
14
14
 
15
+ def test_create_without_features_smiles_and_inchi
16
+ ["smiles", "inchi"].each do |type|
17
+ d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv")
18
+ assert_equal Dataset, d.class
19
+ refute_nil d.id
20
+ dataset = Dataset.find d.id
21
+ #p dataset.compounds
22
+ assert_equal 3, d.compounds.size.to_i
23
+ d.delete
24
+ end
25
+ end
26
+
15
27
  def test_create_empty
16
28
  d = Dataset.new
17
29
  assert_equal Dataset, d.class
@@ -57,19 +69,15 @@ class DatasetTest < MiniTest::Test
57
69
  assert_equal 3, d.compounds.size
58
70
  assert_equal 2, d.features.size
59
71
  assert_equal [[1,2],[4,5],[6,7]], d.data_entries
60
- d.save_all
72
+ d.save
61
73
  # check if dataset has been saved correctly
62
74
  new_dataset = Dataset.find d.id
63
75
  assert_equal 3, new_dataset.compounds.size
64
76
  assert_equal 2, new_dataset.features.size
65
77
  assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries
66
78
  d.delete
67
- assert_raises Mongoid::Errors::DocumentNotFound do
68
- Dataset.find d.id
69
- end
70
- assert_raises Mongoid::Errors::DocumentNotFound do
71
- Dataset.find new_dataset.id
72
- end
79
+ assert_nil Dataset.find d.id
80
+ assert_nil Dataset.find new_dataset.id
73
81
  end
74
82
 
75
83
  def test_dataset_accessors
@@ -78,7 +86,7 @@ class DatasetTest < MiniTest::Test
78
86
  new_dataset = Dataset.find d.id
79
87
  # get metadata
80
88
  assert_match "multicolumn.csv", new_dataset.source
81
- assert_equal "multicolumn.csv", new_dataset.title
89
+ assert_equal "multicolumn", new_dataset.name
82
90
  # get features
83
91
  assert_equal 6, new_dataset.features.size
84
92
  assert_equal 7, new_dataset.compounds.size
@@ -119,7 +127,7 @@ class DatasetTest < MiniTest::Test
119
127
  original_csv.shift
120
128
  csv.each_with_index do |row,i|
121
129
  compound = Compound.from_smiles row.shift
122
- original_compound = Compound.from_smiles original_csv[i].shift
130
+ original_compound = Compound.from_smiles original_csv[i].shift.strip
123
131
  assert_equal original_compound.inchi, compound.inchi
124
132
  row.each_with_index do |v,j|
125
133
  if v.numeric?
@@ -161,7 +169,7 @@ class DatasetTest < MiniTest::Test
161
169
  def test_from_csv2
162
170
  File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
163
171
  dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
164
- assert_equal "Cannot parse SMILES compound ' ' at position 3, all entries are ignored.", dataset.warnings.join
172
+ assert_equal "Cannot parse SMILES compound '' at position 3, all entries are ignored.", dataset.warnings.join
165
173
  File.delete "#{DATA_DIR}/temp_test.csv"
166
174
  dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
167
175
  dataset.delete
@@ -195,5 +203,17 @@ class DatasetTest < MiniTest::Test
195
203
  assert_equal 0.00323, d2.data_entries[5][0]
196
204
  end
197
205
 
206
+ def test_folds
207
+ dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
208
+ dataset.folds(10).each do |fold|
209
+ fold.each do |d|
210
+ assert_equal d.data_entries.size, d.compound_ids.size
211
+ assert_operator d.compound_ids.size, :>=, d.compound_ids.uniq.size
212
+ end
213
+ assert_operator fold[0].compound_ids.uniq.size, :>=, fold[1].compound_ids.uniq.size
214
+ end
215
+ #puts dataset.folds 10
216
+ end
217
+
198
218
  end
199
219
 
@@ -0,0 +1,11 @@
1
+ require 'minitest/autorun'
2
+ require_relative '../lib/lazar.rb'
3
+ include OpenTox
4
+ class DefaultEnvironmentTest < MiniTest::Test
5
+ def test_lazar_environment
6
+ assert_equal "production", ENV["LAZAR_ENV"]
7
+ assert_equal "production", ENV["MONGOID_ENV"]
8
+ assert_equal "production", ENV["RACK_ENV"]
9
+ assert_equal "production", Mongoid.clients["default"]["database"]
10
+ end
11
+ end
data/test/descriptor.rb CHANGED
@@ -4,80 +4,65 @@ class DescriptorTest < MiniTest::Test
4
4
 
5
5
  def test_list
6
6
  # check available descriptors
7
- @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
8
- assert_equal 111,@descriptors.size,"wrong num physchem descriptors"
9
- @descriptor_values = OpenTox::Algorithm::Descriptor::DESCRIPTOR_VALUES
10
- assert_equal 356,@descriptor_values.size,"wrong num physchem descriptors"
11
- sum = 0
12
- [ @descriptors, @descriptor_values ].each do |desc|
13
- {"Openbabel"=>16,"Cdk"=>(desc==@descriptors ? 50 : 295),"Joelib"=>45}.each do |k,v|
14
- assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors"
15
- sum += v
16
- end
17
- end
18
- assert_equal (111+356),sum
7
+ assert_equal 355,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
8
+ assert_equal 15,PhysChem.openbabel_descriptors.size,"incorrect number of Openbabel descriptors"
9
+ assert_equal 295,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
10
+ assert_equal 45,PhysChem.joelib_descriptors.size,"incorrect number of Joelib descriptors"
19
11
  end
20
12
 
21
13
  def test_smarts
22
14
  c = OpenTox::Compound.from_smiles "N=C=C1CCC(=F=FO)C1"
23
15
  File.open("tmp.png","w+"){|f| f.puts c.png}
24
16
  s = Smarts.find_or_create_by(:smarts => "F=F")
25
- result = OpenTox::Algorithm::Descriptor.smarts_match c, s
17
+ result = c.smarts_match [s]
26
18
  assert_equal [1], result
27
19
  smarts = ["CC", "C", "C=C", "CO", "F=F", "C1CCCC1", "NN"].collect{|s| Smarts.find_or_create_by(:smarts => s)}
28
- result = OpenTox::Algorithm::Descriptor.smarts_match c, smarts
20
+ result = c.smarts_match smarts
29
21
  assert_equal [1, 1, 1, 0, 1, 1, 0], result
30
22
  smarts_count = [10, 6, 2, 0, 2, 10, 0]
31
- result = OpenTox::Algorithm::Descriptor.smarts_count c, smarts
23
+ result = c.smarts_match smarts, true
32
24
  assert_equal smarts_count, result
33
25
  end
34
26
 
35
27
  def test_compound_openbabel_single
36
28
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
37
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Openbabel.logP"]
38
- assert_equal 1.12518, result.first
29
+ result = c.physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")]
30
+ assert_equal 1.12518, result.first.last.round(5)
39
31
  end
40
32
 
41
33
  def test_compound_cdk_single
42
34
  c = OpenTox::Compound.from_smiles "c1ccccc1"
43
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.AtomCount"]
44
- assert_equal [12], result
35
+ result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
36
+ assert_equal 12, result.first.last
45
37
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
46
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.AtomCount"]
47
- assert_equal [17], result
48
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.CarbonTypes"]
38
+ result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
39
+ assert_equal 17, result.first.last
49
40
  c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
50
- assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result
41
+ physchem_features = c_types.collect{|t,nr| PhysChem.find_or_create_by(:name => t)}
42
+ result = c.physchem physchem_features
43
+ assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result.values
51
44
  end
52
45
 
53
46
  def test_compound_joelib_single
54
47
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
55
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Joelib.LogP"]
56
- assert_equal [2.65908], result
48
+ result = c.physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
49
+ assert_equal 2.65908, result.first.last
57
50
  end
58
51
 
59
52
  def test_compound_all
60
53
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
61
- result = OpenTox::Algorithm::Descriptor.physchem c
62
- assert_equal 332, result.size
63
- assert_equal 30.8723, result[2]
64
- assert_equal 1.12518, result[328]
54
+ result = c.physchem PhysChem.descriptors
55
+ amr = PhysChem.find_or_create_by(:name => "Cdk.ALOGP.AMR", :library => "Cdk")
56
+ sbonds = PhysChem.find_by(:name => "Openbabel.sbonds")
57
+ assert_equal 30.8723, result[amr.id.to_s]
58
+ assert_equal 5, result[sbonds.id.to_s]
65
59
  end
66
60
 
67
61
  def test_compound_descriptor_parameters
68
62
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
69
- result = OpenTox::Algorithm::Descriptor.physchem c, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]#, true
70
- assert_equal 12, result.size
71
- assert_equal [1.12518, 17.0, 1, 0, 0, 1, 0, 2, 1, 1, 0, 2.65908], result#.last
72
- end
73
-
74
- def test_dataset_descriptor_parameters
75
- dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
76
- d = OpenTox::Algorithm::Descriptor.physchem dataset, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]
77
- assert_kind_of Dataset, d
78
- assert_equal dataset.compounds, d.compounds
79
- assert_equal dataset.compounds.size, d.data_entries.size
80
- assert_equal 12, d.data_entries.first.size
63
+ result = c.physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
64
+ assert_equal 3, result.size
65
+ assert_equal [1.12518, 17.0, 2.65908], result.values.collect{|v| v.round 5}
81
66
  end
82
67
 
83
68
  end
data/test/error.rb CHANGED
@@ -4,9 +4,7 @@ class ErrorTest < MiniTest::Test
4
4
 
5
5
  def test_bad_request
6
6
  object = OpenTox::Feature.new
7
- assert_raises Mongoid::Errors::DocumentNotFound do
8
- response = OpenTox::Feature.find(object.id)
9
- end
7
+ assert_nil OpenTox::Feature.find(object.id)
10
8
  end
11
9
 
12
10
  def test_error_methods
@@ -0,0 +1,301 @@
1
+ require_relative "setup.rb"
2
+
3
+ class ExperimentTest < MiniTest::Test
4
+
5
+ def test_regression_experiment
6
+ skip
7
+ datasets = [
8
+ "EPAFHM.medi.csv",
9
+ #"EPAFHM.csv",
10
+ #"FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv",
11
+ "LOAEL_mmol_corrected_smiles.csv"
12
+ ]
13
+ experiment = Experiment.create(
14
+ :name => "Default regression for datasets #{datasets}.",
15
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
16
+ :model_settings => [
17
+ {
18
+ :algorithm => "OpenTox::Model::LazarRegression",
19
+ }
20
+ ]
21
+ )
22
+ #experiment.run
23
+ puts experiment.report.to_yaml
24
+ assert_equal datasets.size, experiment.results.size
25
+ experiment.results.each do |dataset_id, result|
26
+ assert_equal 1, result.size
27
+ result.each do |r|
28
+ assert_kind_of BSON::ObjectId, r[:model_id]
29
+ assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
30
+ end
31
+ end
32
+ end
33
+
34
+ def test_classification_experiment
35
+
36
+ skip
37
+ datasets = [ "hamster_carcinogenicity.csv" ]
38
+ experiment = Experiment.create(
39
+ :name => "Fminer vs fingerprint classification for datasets #{datasets}.",
40
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
41
+ :model_settings => [
42
+ {
43
+ :algorithm => "OpenTox::Model::LazarClassification",
44
+ },{
45
+ :algorithm => "OpenTox::Model::LazarClassification",
46
+ :neighbor_algorithm_parameter => {:min_sim => 0.3}
47
+ },
48
+ #{
49
+ #:algorithm => "OpenTox::Model::LazarFminerClassification",
50
+ #}
51
+ ]
52
+ )
53
+ #experiment.run
54
+ =begin
55
+ experiment = Experiment.find "55f944a22b72ed7de2000000"
56
+ =end
57
+ puts experiment.report.to_yaml
58
+ experiment.results.each do |dataset_id, result|
59
+ assert_equal 2, result.size
60
+ result.each do |r|
61
+ assert_kind_of BSON::ObjectId, r[:model_id]
62
+ assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
63
+ end
64
+ end
65
+ end
66
+
67
+ def test_regression_fingerprints
68
+ skip
69
+ #=begin
70
+ datasets = [
71
+ "EPAFHM.medi.csv",
72
+ #"LOAEL_mmol_corrected_smiles.csv"
73
+ ]
74
+ min_sims = [0.3,0.7]
75
+ #min_sims = [0.7]
76
+ #types = ["FP2","FP3","FP4","MACCS","MP2D"]
77
+ types = ["MP2D","FP3"]
78
+ experiment = Experiment.create(
79
+ :name => "Fingerprint regression with different types for datasets #{datasets}.",
80
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
81
+ )
82
+ types.each do |type|
83
+ min_sims.each do |min_sim|
84
+ experiment.model_settings << {
85
+ :model_algorithm => "OpenTox::Model::LazarRegression",
86
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
87
+ :neighbor_algorithm => "fingerprint_neighbors",
88
+ :neighbor_algorithm_parameters => {
89
+ :type => type,
90
+ :min_sim => min_sim,
91
+ }
92
+ }
93
+ end
94
+ end
95
+ experiment.run
96
+ #=end
97
+ =begin
98
+ experiment = Experiment.find '56029cb92b72ed673d000000'
99
+ =end
100
+ p experiment.id
101
+ experiment.results.each do |dataset,result|
102
+ result.each do |r|
103
+ params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
104
+ RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
105
+ cv.validation_ids.each do |vid|
106
+ model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
107
+ assert_equal params[:type], model_params[:type]
108
+ assert_equal params[:min_sim], model_params[:min_sim]
109
+ refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
110
+ end
111
+ end
112
+ end
113
+ end
114
+ puts experiment.report.to_yaml
115
+ p experiment.summary
116
+ end
117
+
118
+ def test_mpd_fingerprints
119
+ skip
120
+ datasets = [
121
+ "EPAFHM.medi.csv",
122
+ ]
123
+ types = ["FP2","MP2D"]
124
+ experiment = Experiment.create(
125
+ :name => "FP2 vs MP2D fingerprint regression for datasets #{datasets}.",
126
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
127
+ )
128
+ types.each do |type|
129
+ experiment.model_settings << {
130
+ :algorithm => "OpenTox::Model::LazarRegression",
131
+ :neighbor_algorithm => "fingerprint_neighbors",
132
+ :neighbor_algorithm_parameter => {
133
+ :type => type,
134
+ :min_sim => 0.7,
135
+ }
136
+ }
137
+ end
138
+ experiment.run
139
+ p experiment.id
140
+ =begin
141
+ =end
142
+ #experiment = Experiment.find '55ffd0c02b72ed123c000000'
143
+ p experiment
144
+ puts experiment.report.to_yaml
145
+ end
146
+
147
+ def test_multiple_datasets
148
+ skip
149
+ datasets = [
150
+ "EPAFHM.medi.csv",
151
+ "LOAEL_mmol_corrected_smiles.csv"
152
+ ]
153
+ min_sims = [0.3]
154
+ types = ["FP2"]
155
+ experiment = Experiment.create(
156
+ :name => "Fingerprint regression with mutiple datasets #{datasets}.",
157
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
158
+ )
159
+ types.each do |type|
160
+ min_sims.each do |min_sim|
161
+ experiment.model_settings << {
162
+ :model_algorithm => "OpenTox::Model::LazarRegression",
163
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
164
+ :neighbor_algorithm => "fingerprint_neighbors",
165
+ :neighbor_algorithm_parameters => {
166
+ :type => type,
167
+ :min_sim => min_sim,
168
+ }
169
+ }
170
+ end
171
+ end
172
+ experiment.run
173
+ p experiment.id
174
+ experiment.results.each do |dataset,result|
175
+ result.each do |r|
176
+ params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
177
+ RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
178
+ cv.validation_ids.each do |vid|
179
+ model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
180
+ assert_equal params[:type], model_params[:type]
181
+ assert_equal params[:min_sim], model_params[:min_sim]
182
+ refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
183
+ end
184
+ end
185
+ end
186
+ end
187
+ puts experiment.report.to_yaml
188
+ p experiment.summary
189
+ end
190
+
191
+ def test_mpd_mna_regression_fingerprints
192
+ skip
193
+ datasets = [
194
+ "EPAFHM.medi.csv",
195
+ #"hamster_carcinogenicity.csv"
196
+ ]
197
+ min_sims = [0.0,0.3]
198
+ types = ["MP2D","MNA"]
199
+ neighbor_algos = [
200
+ "fingerprint_neighbors",
201
+ "fingerprint_count_neighbors",
202
+ ]
203
+ experiment = Experiment.create(
204
+ :name => "MNA vs MPD descriptors",
205
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
206
+ )
207
+ types.each do |type|
208
+ min_sims.each do |min_sim|
209
+ neighbor_algos.each do |neighbor_algo|
210
+ experiment.model_settings << {
211
+ :model_algorithm => "OpenTox::Model::LazarRegression",
212
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
213
+ :neighbor_algorithm => neighbor_algo,
214
+ :neighbor_algorithm_parameters => {
215
+ :type => type,
216
+ :min_sim => min_sim,
217
+ }
218
+ }
219
+ end
220
+ end
221
+ end
222
+ experiment.run
223
+ #=end
224
+ =begin
225
+ experiment = Experiment.find '56029cb92b72ed673d000000'
226
+ =end
227
+ p experiment.id
228
+ puts experiment.report.to_yaml
229
+ #p experiment.summary
230
+ experiment.results.each do |dataset,result|
231
+ result.each do |r|
232
+ p r
233
+ # TODO fix r["model_id"]
234
+ params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
235
+ RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
236
+ cv.validation_ids.each do |vid|
237
+ model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
238
+ assert_equal params[:type], model_params[:type]
239
+ assert_equal params[:min_sim], model_params[:min_sim]
240
+ refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
241
+ end
242
+ end
243
+ end
244
+ end
245
+ end
246
+
247
+ def test_mpd_mna_classification_fingerprints
248
+ skip
249
+ datasets = [
250
+ #"EPAFHM.medi.csv",
251
+ "hamster_carcinogenicity.csv"
252
+ ]
253
+ min_sims = [0.0,0.3]
254
+ types = ["MP2D","MNA"]
255
+ neighbor_algos = [
256
+ "fingerprint_count_neighbors",
257
+ "fingerprint_neighbors",
258
+ ]
259
+ experiment = Experiment.create(
260
+ :name => "MNA vs MPD descriptors",
261
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
262
+ )
263
+ types.each do |type|
264
+ min_sims.each do |min_sim|
265
+ neighbor_algos.each do |neighbor_algo|
266
+ experiment.model_settings << {
267
+ :model_algorithm => "OpenTox::Model::LazarClassification",
268
+ :prediction_algorithm => "OpenTox::Algorithm::Classification.weighted_majority_vote",
269
+ :neighbor_algorithm => neighbor_algo,
270
+ :neighbor_algorithm_parameters => {
271
+ :type => type,
272
+ :min_sim => min_sim,
273
+ }
274
+ }
275
+ end
276
+ end
277
+ end
278
+ experiment.run
279
+ #=end
280
+ =begin
281
+ experiment = Experiment.find '56029cb92b72ed673d000000'
282
+ =end
283
+ p experiment.id
284
+ puts experiment.report.to_yaml
285
+ #p experiment.summary
286
+ experiment.results.each do |dataset,result|
287
+ result.each do |r|
288
+ # TODO fix r["model_id"]
289
+ params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
290
+ RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
291
+ cv.validation_ids.each do |vid|
292
+ model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
293
+ assert_equal params[:type], model_params[:type]
294
+ assert_equal params[:min_sim], model_params[:min_sim]
295
+ refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
296
+ end
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
data/test/feature.rb CHANGED
@@ -26,16 +26,13 @@ class FeatureTest < MiniTest::Test
26
26
 
27
27
  id = @feature2.id
28
28
  @feature2.delete
29
- assert_raises Mongoid::Errors::DocumentNotFound do
30
- OpenTox::Feature.find(id)
31
- end
29
+ assert_nil OpenTox::Feature.find(id)
32
30
  end
33
31
 
34
32
  def test_duplicated_features
35
33
  metadata = {
36
34
  :name => "feature duplication test",
37
35
  :nominal => true,
38
- :description => "feature duplication test"
39
36
  }
40
37
  feature = NumericBioAssay.find_or_create_by metadata
41
38
  dup_feature = NumericBioAssay.find_or_create_by metadata
@@ -44,12 +41,8 @@ class FeatureTest < MiniTest::Test
44
41
  assert !feature.id.nil?, "No Feature ID in #{dup_feature.inspect}"
45
42
  assert_equal feature.id, dup_feature.id
46
43
  feature.delete
47
- assert_raises Mongoid::Errors::DocumentNotFound do
48
- OpenTox::Feature.find(feature.id)
49
- end
50
- assert_raises Mongoid::Errors::DocumentNotFound do
51
- OpenTox::Feature.find(dup_feature.id)
52
- end
44
+ assert_nil OpenTox::Feature.find(feature.id)
45
+ assert_nil OpenTox::Feature.find(dup_feature.id)
53
46
  end
54
47
 
55
48
  def test_smarts_feature
@@ -62,4 +55,23 @@ class FeatureTest < MiniTest::Test
62
55
  assert original.smarts, "CN"
63
56
  end
64
57
 
58
+ def test_physchem_description
59
+ assert_equal 355, PhysChem.descriptors.size
60
+ assert_equal 15, PhysChem.openbabel_descriptors.size
61
+ assert_equal 295, PhysChem.cdk_descriptors.size
62
+ assert_equal 45, PhysChem.joelib_descriptors.size
63
+ assert_equal 310, PhysChem.unique_descriptors.size
64
+ end
65
+
66
+ def test_physchem
67
+ assert_equal 355, PhysChem.descriptors.size
68
+ c = Compound.from_smiles "CC(=O)CC(C)C"
69
+ logP = PhysChem.find_or_create_by :name => "Openbabel.logP"
70
+ assert_equal 1.6215, logP.calculate(c)
71
+ jlogP = PhysChem.find_or_create_by :name => "Joelib.LogP"
72
+ assert_equal 3.5951, jlogP.calculate(c)
73
+ alogP = PhysChem.find_or_create_by :name => "Cdk.ALOGP.ALogP"
74
+ assert_equal 0.35380000000000034, alogP.calculate(c)
75
+ end
76
+
65
77
  end