lazar 0.0.7 → 0.0.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +3 -0
  3. data/README.md +2 -1
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +15 -76
  6. data/ext/lazar/rinstall.R +9 -0
  7. data/lazar.gemspec +7 -7
  8. data/lib/classification.rb +5 -78
  9. data/lib/compound.rb +201 -44
  10. data/lib/crossvalidation.rb +224 -121
  11. data/lib/dataset.rb +83 -93
  12. data/lib/error.rb +1 -1
  13. data/lib/experiment.rb +99 -0
  14. data/lib/feature.rb +2 -54
  15. data/lib/lazar.rb +47 -34
  16. data/lib/leave-one-out-validation.rb +205 -0
  17. data/lib/model.rb +131 -76
  18. data/lib/opentox.rb +2 -2
  19. data/lib/overwrite.rb +37 -0
  20. data/lib/physchem.rb +133 -0
  21. data/lib/regression.rb +117 -189
  22. data/lib/rest-client-wrapper.rb +4 -5
  23. data/lib/unique_descriptors.rb +6 -7
  24. data/lib/validation.rb +63 -69
  25. data/test/all.rb +2 -2
  26. data/test/classification.rb +41 -0
  27. data/test/compound.rb +116 -7
  28. data/test/data/LOAEL_log_mg_corrected_smiles.csv +567 -567
  29. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +566 -566
  30. data/test/data/LOAEL_mmol_corrected_smiles.csv +568 -0
  31. data/test/data/batch_prediction.csv +25 -0
  32. data/test/data/batch_prediction_inchi_small.csv +4 -0
  33. data/test/data/batch_prediction_smiles_small.csv +4 -0
  34. data/test/data/hamster_carcinogenicity.json +3 -0
  35. data/test/data/loael.csv +568 -0
  36. data/test/dataset-long.rb +5 -8
  37. data/test/dataset.rb +31 -11
  38. data/test/default_environment.rb +11 -0
  39. data/test/descriptor.rb +26 -41
  40. data/test/error.rb +1 -3
  41. data/test/experiment.rb +301 -0
  42. data/test/feature.rb +22 -10
  43. data/test/lazar-long.rb +43 -23
  44. data/test/lazar-physchem-short.rb +19 -16
  45. data/test/prediction_models.rb +20 -0
  46. data/test/regression.rb +43 -0
  47. data/test/setup.rb +3 -1
  48. data/test/test_environment.rb +10 -0
  49. data/test/validation.rb +92 -26
  50. metadata +64 -38
  51. data/lib/SMARTS_InteLigand.txt +0 -983
  52. data/lib/bbrc.rb +0 -165
  53. data/lib/descriptor.rb +0 -247
  54. data/lib/neighbor.rb +0 -25
  55. data/lib/similarity.rb +0 -58
  56. data/mongoid.yml +0 -8
  57. data/test/descriptor-long.rb +0 -26
  58. data/test/fminer-long.rb +0 -38
  59. data/test/fminer.rb +0 -52
  60. data/test/lazar-fminer.rb +0 -50
  61. data/test/lazar-regression.rb +0 -27
data/test/dataset.rb CHANGED
@@ -8,10 +8,22 @@ class DatasetTest < MiniTest::Test
8
8
  d1 = Dataset.new
9
9
  d1.save
10
10
  datasets = Dataset.all
11
- assert_equal Dataset, datasets.first.class
11
+ assert datasets.first.is_a?(Dataset), "#{datasets.first} is not a Dataset."
12
12
  d1.delete
13
13
  end
14
14
 
15
+ def test_create_without_features_smiles_and_inchi
16
+ ["smiles", "inchi"].each do |type|
17
+ d = Dataset.from_csv_file File.join(DATA_DIR,"batch_prediction_#{type}_small.csv")
18
+ assert_equal Dataset, d.class
19
+ refute_nil d.id
20
+ dataset = Dataset.find d.id
21
+ #p dataset.compounds
22
+ assert_equal 3, d.compounds.size.to_i
23
+ d.delete
24
+ end
25
+ end
26
+
15
27
  def test_create_empty
16
28
  d = Dataset.new
17
29
  assert_equal Dataset, d.class
@@ -57,19 +69,15 @@ class DatasetTest < MiniTest::Test
57
69
  assert_equal 3, d.compounds.size
58
70
  assert_equal 2, d.features.size
59
71
  assert_equal [[1,2],[4,5],[6,7]], d.data_entries
60
- d.save_all
72
+ d.save
61
73
  # check if dataset has been saved correctly
62
74
  new_dataset = Dataset.find d.id
63
75
  assert_equal 3, new_dataset.compounds.size
64
76
  assert_equal 2, new_dataset.features.size
65
77
  assert_equal [[1,2],[4,5],[6,7]], new_dataset.data_entries
66
78
  d.delete
67
- assert_raises Mongoid::Errors::DocumentNotFound do
68
- Dataset.find d.id
69
- end
70
- assert_raises Mongoid::Errors::DocumentNotFound do
71
- Dataset.find new_dataset.id
72
- end
79
+ assert_nil Dataset.find d.id
80
+ assert_nil Dataset.find new_dataset.id
73
81
  end
74
82
 
75
83
  def test_dataset_accessors
@@ -78,7 +86,7 @@ class DatasetTest < MiniTest::Test
78
86
  new_dataset = Dataset.find d.id
79
87
  # get metadata
80
88
  assert_match "multicolumn.csv", new_dataset.source
81
- assert_equal "multicolumn.csv", new_dataset.title
89
+ assert_equal "multicolumn", new_dataset.name
82
90
  # get features
83
91
  assert_equal 6, new_dataset.features.size
84
92
  assert_equal 7, new_dataset.compounds.size
@@ -119,7 +127,7 @@ class DatasetTest < MiniTest::Test
119
127
  original_csv.shift
120
128
  csv.each_with_index do |row,i|
121
129
  compound = Compound.from_smiles row.shift
122
- original_compound = Compound.from_smiles original_csv[i].shift
130
+ original_compound = Compound.from_smiles original_csv[i].shift.strip
123
131
  assert_equal original_compound.inchi, compound.inchi
124
132
  row.each_with_index do |v,j|
125
133
  if v.numeric?
@@ -161,7 +169,7 @@ class DatasetTest < MiniTest::Test
161
169
  def test_from_csv2
162
170
  File.open("#{DATA_DIR}/temp_test.csv", "w+") { |file| file.write("SMILES,Hamster\nCC=O,true\n ,true\nO=C(N),true") }
163
171
  dataset = Dataset.from_csv_file "#{DATA_DIR}/temp_test.csv"
164
- assert_equal "Cannot parse SMILES compound ' ' at position 3, all entries are ignored.", dataset.warnings.join
172
+ assert_equal "Cannot parse SMILES compound '' at position 3, all entries are ignored.", dataset.warnings.join
165
173
  File.delete "#{DATA_DIR}/temp_test.csv"
166
174
  dataset.features.each{|f| feature = Feature.find f.id; feature.delete}
167
175
  dataset.delete
@@ -195,5 +203,17 @@ class DatasetTest < MiniTest::Test
195
203
  assert_equal 0.00323, d2.data_entries[5][0]
196
204
  end
197
205
 
206
+ def test_folds
207
+ dataset = Dataset.from_csv_file File.join(DATA_DIR,"loael.csv")
208
+ dataset.folds(10).each do |fold|
209
+ fold.each do |d|
210
+ assert_equal d.data_entries.size, d.compound_ids.size
211
+ assert_operator d.compound_ids.size, :>=, d.compound_ids.uniq.size
212
+ end
213
+ assert_operator fold[0].compound_ids.uniq.size, :>=, fold[1].compound_ids.uniq.size
214
+ end
215
+ #puts dataset.folds 10
216
+ end
217
+
198
218
  end
199
219
 
@@ -0,0 +1,11 @@
1
+ require 'minitest/autorun'
2
+ require_relative '../lib/lazar.rb'
3
+ include OpenTox
4
+ class DefaultEnvironmentTest < MiniTest::Test
5
+ def test_lazar_environment
6
+ assert_equal "production", ENV["LAZAR_ENV"]
7
+ assert_equal "production", ENV["MONGOID_ENV"]
8
+ assert_equal "production", ENV["RACK_ENV"]
9
+ assert_equal "production", Mongoid.clients["default"]["database"]
10
+ end
11
+ end
data/test/descriptor.rb CHANGED
@@ -4,80 +4,65 @@ class DescriptorTest < MiniTest::Test
4
4
 
5
5
  def test_list
6
6
  # check available descriptors
7
- @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
8
- assert_equal 111,@descriptors.size,"wrong num physchem descriptors"
9
- @descriptor_values = OpenTox::Algorithm::Descriptor::DESCRIPTOR_VALUES
10
- assert_equal 356,@descriptor_values.size,"wrong num physchem descriptors"
11
- sum = 0
12
- [ @descriptors, @descriptor_values ].each do |desc|
13
- {"Openbabel"=>16,"Cdk"=>(desc==@descriptors ? 50 : 295),"Joelib"=>45}.each do |k,v|
14
- assert_equal v,desc.select{|x| x=~/^#{k}\./}.size,"wrong num #{k} descriptors"
15
- sum += v
16
- end
17
- end
18
- assert_equal (111+356),sum
7
+ assert_equal 355,PhysChem.descriptors.size,"incorrect number of physchem descriptors"
8
+ assert_equal 15,PhysChem.openbabel_descriptors.size,"incorrect number of Openbabel descriptors"
9
+ assert_equal 295,PhysChem.cdk_descriptors.size,"incorrect number of Cdk descriptors"
10
+ assert_equal 45,PhysChem.joelib_descriptors.size,"incorrect number of Joelib descriptors"
19
11
  end
20
12
 
21
13
  def test_smarts
22
14
  c = OpenTox::Compound.from_smiles "N=C=C1CCC(=F=FO)C1"
23
15
  File.open("tmp.png","w+"){|f| f.puts c.png}
24
16
  s = Smarts.find_or_create_by(:smarts => "F=F")
25
- result = OpenTox::Algorithm::Descriptor.smarts_match c, s
17
+ result = c.smarts_match [s]
26
18
  assert_equal [1], result
27
19
  smarts = ["CC", "C", "C=C", "CO", "F=F", "C1CCCC1", "NN"].collect{|s| Smarts.find_or_create_by(:smarts => s)}
28
- result = OpenTox::Algorithm::Descriptor.smarts_match c, smarts
20
+ result = c.smarts_match smarts
29
21
  assert_equal [1, 1, 1, 0, 1, 1, 0], result
30
22
  smarts_count = [10, 6, 2, 0, 2, 10, 0]
31
- result = OpenTox::Algorithm::Descriptor.smarts_count c, smarts
23
+ result = c.smarts_match smarts, true
32
24
  assert_equal smarts_count, result
33
25
  end
34
26
 
35
27
  def test_compound_openbabel_single
36
28
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
37
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Openbabel.logP"]
38
- assert_equal 1.12518, result.first
29
+ result = c.physchem [PhysChem.find_or_create_by(:name => "Openbabel.logP")]
30
+ assert_equal 1.12518, result.first.last.round(5)
39
31
  end
40
32
 
41
33
  def test_compound_cdk_single
42
34
  c = OpenTox::Compound.from_smiles "c1ccccc1"
43
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.AtomCount"]
44
- assert_equal [12], result
35
+ result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
36
+ assert_equal 12, result.first.last
45
37
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
46
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.AtomCount"]
47
- assert_equal [17], result
48
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Cdk.CarbonTypes"]
38
+ result = c.physchem [PhysChem.find_or_create_by(:name => "Cdk.AtomCount.nAtom")]
39
+ assert_equal 17, result.first.last
49
40
  c_types = {"Cdk.CarbonTypes.C1SP1"=>1, "Cdk.CarbonTypes.C2SP1"=>0, "Cdk.CarbonTypes.C1SP2"=>0, "Cdk.CarbonTypes.C2SP2"=>1, "Cdk.CarbonTypes.C3SP2"=>0, "Cdk.CarbonTypes.C1SP3"=>2, "Cdk.CarbonTypes.C2SP3"=>1, "Cdk.CarbonTypes.C3SP3"=>1, "Cdk.CarbonTypes.C4SP3"=>0}
50
- assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result
41
+ physchem_features = c_types.collect{|t,nr| PhysChem.find_or_create_by(:name => t)}
42
+ result = c.physchem physchem_features
43
+ assert_equal [1, 0, 0, 1, 0, 2, 1, 1, 0], result.values
51
44
  end
52
45
 
53
46
  def test_compound_joelib_single
54
47
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
55
- result = OpenTox::Algorithm::Descriptor.physchem c, ["Joelib.LogP"]
56
- assert_equal [2.65908], result
48
+ result = c.physchem [PhysChem.find_or_create_by(:name => "Joelib.LogP")]
49
+ assert_equal 2.65908, result.first.last
57
50
  end
58
51
 
59
52
  def test_compound_all
60
53
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
61
- result = OpenTox::Algorithm::Descriptor.physchem c
62
- assert_equal 332, result.size
63
- assert_equal 30.8723, result[2]
64
- assert_equal 1.12518, result[328]
54
+ result = c.physchem PhysChem.descriptors
55
+ amr = PhysChem.find_or_create_by(:name => "Cdk.ALOGP.AMR", :library => "Cdk")
56
+ sbonds = PhysChem.find_by(:name => "Openbabel.sbonds")
57
+ assert_equal 30.8723, result[amr.id.to_s]
58
+ assert_equal 5, result[sbonds.id.to_s]
65
59
  end
66
60
 
67
61
  def test_compound_descriptor_parameters
68
62
  c = OpenTox::Compound.from_smiles "CC(=O)CC(C)C#N"
69
- result = OpenTox::Algorithm::Descriptor.physchem c, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]#, true
70
- assert_equal 12, result.size
71
- assert_equal [1.12518, 17.0, 1, 0, 0, 1, 0, 2, 1, 1, 0, 2.65908], result#.last
72
- end
73
-
74
- def test_dataset_descriptor_parameters
75
- dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.mini.csv")
76
- d = OpenTox::Algorithm::Descriptor.physchem dataset, [ "Openbabel.logP", "Cdk.AtomCount", "Cdk.CarbonTypes", "Joelib.LogP" ]
77
- assert_kind_of Dataset, d
78
- assert_equal dataset.compounds, d.compounds
79
- assert_equal dataset.compounds.size, d.data_entries.size
80
- assert_equal 12, d.data_entries.first.size
63
+ result = c.physchem [ "Openbabel.logP", "Cdk.AtomCount.nAtom", "Joelib.LogP" ].collect{|d| PhysChem.find_or_create_by(:name => d)}
64
+ assert_equal 3, result.size
65
+ assert_equal [1.12518, 17.0, 2.65908], result.values.collect{|v| v.round 5}
81
66
  end
82
67
 
83
68
  end
data/test/error.rb CHANGED
@@ -4,9 +4,7 @@ class ErrorTest < MiniTest::Test
4
4
 
5
5
  def test_bad_request
6
6
  object = OpenTox::Feature.new
7
- assert_raises Mongoid::Errors::DocumentNotFound do
8
- response = OpenTox::Feature.find(object.id)
9
- end
7
+ assert_nil OpenTox::Feature.find(object.id)
10
8
  end
11
9
 
12
10
  def test_error_methods
@@ -0,0 +1,301 @@
1
+ require_relative "setup.rb"
2
+
3
+ class ExperimentTest < MiniTest::Test
4
+
5
+ def test_regression_experiment
6
+ skip
7
+ datasets = [
8
+ "EPAFHM.medi.csv",
9
+ #"EPAFHM.csv",
10
+ #"FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv",
11
+ "LOAEL_mmol_corrected_smiles.csv"
12
+ ]
13
+ experiment = Experiment.create(
14
+ :name => "Default regression for datasets #{datasets}.",
15
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
16
+ :model_settings => [
17
+ {
18
+ :algorithm => "OpenTox::Model::LazarRegression",
19
+ }
20
+ ]
21
+ )
22
+ #experiment.run
23
+ puts experiment.report.to_yaml
24
+ assert_equal datasets.size, experiment.results.size
25
+ experiment.results.each do |dataset_id, result|
26
+ assert_equal 1, result.size
27
+ result.each do |r|
28
+ assert_kind_of BSON::ObjectId, r[:model_id]
29
+ assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
30
+ end
31
+ end
32
+ end
33
+
34
+ def test_classification_experiment
35
+
36
+ skip
37
+ datasets = [ "hamster_carcinogenicity.csv" ]
38
+ experiment = Experiment.create(
39
+ :name => "Fminer vs fingerprint classification for datasets #{datasets}.",
40
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
41
+ :model_settings => [
42
+ {
43
+ :algorithm => "OpenTox::Model::LazarClassification",
44
+ },{
45
+ :algorithm => "OpenTox::Model::LazarClassification",
46
+ :neighbor_algorithm_parameter => {:min_sim => 0.3}
47
+ },
48
+ #{
49
+ #:algorithm => "OpenTox::Model::LazarFminerClassification",
50
+ #}
51
+ ]
52
+ )
53
+ #experiment.run
54
+ =begin
55
+ experiment = Experiment.find "55f944a22b72ed7de2000000"
56
+ =end
57
+ puts experiment.report.to_yaml
58
+ experiment.results.each do |dataset_id, result|
59
+ assert_equal 2, result.size
60
+ result.each do |r|
61
+ assert_kind_of BSON::ObjectId, r[:model_id]
62
+ assert_kind_of BSON::ObjectId, r[:repeated_crossvalidation_id]
63
+ end
64
+ end
65
+ end
66
+
67
+ def test_regression_fingerprints
68
+ skip
69
+ #=begin
70
+ datasets = [
71
+ "EPAFHM.medi.csv",
72
+ #"LOAEL_mmol_corrected_smiles.csv"
73
+ ]
74
+ min_sims = [0.3,0.7]
75
+ #min_sims = [0.7]
76
+ #types = ["FP2","FP3","FP4","MACCS","MP2D"]
77
+ types = ["MP2D","FP3"]
78
+ experiment = Experiment.create(
79
+ :name => "Fingerprint regression with different types for datasets #{datasets}.",
80
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
81
+ )
82
+ types.each do |type|
83
+ min_sims.each do |min_sim|
84
+ experiment.model_settings << {
85
+ :model_algorithm => "OpenTox::Model::LazarRegression",
86
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
87
+ :neighbor_algorithm => "fingerprint_neighbors",
88
+ :neighbor_algorithm_parameters => {
89
+ :type => type,
90
+ :min_sim => min_sim,
91
+ }
92
+ }
93
+ end
94
+ end
95
+ experiment.run
96
+ #=end
97
+ =begin
98
+ experiment = Experiment.find '56029cb92b72ed673d000000'
99
+ =end
100
+ p experiment.id
101
+ experiment.results.each do |dataset,result|
102
+ result.each do |r|
103
+ params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
104
+ RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
105
+ cv.validation_ids.each do |vid|
106
+ model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
107
+ assert_equal params[:type], model_params[:type]
108
+ assert_equal params[:min_sim], model_params[:min_sim]
109
+ refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
110
+ end
111
+ end
112
+ end
113
+ end
114
+ puts experiment.report.to_yaml
115
+ p experiment.summary
116
+ end
117
+
118
+ def test_mpd_fingerprints
119
+ skip
120
+ datasets = [
121
+ "EPAFHM.medi.csv",
122
+ ]
123
+ types = ["FP2","MP2D"]
124
+ experiment = Experiment.create(
125
+ :name => "FP2 vs MP2D fingerprint regression for datasets #{datasets}.",
126
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
127
+ )
128
+ types.each do |type|
129
+ experiment.model_settings << {
130
+ :algorithm => "OpenTox::Model::LazarRegression",
131
+ :neighbor_algorithm => "fingerprint_neighbors",
132
+ :neighbor_algorithm_parameter => {
133
+ :type => type,
134
+ :min_sim => 0.7,
135
+ }
136
+ }
137
+ end
138
+ experiment.run
139
+ p experiment.id
140
+ =begin
141
+ =end
142
+ #experiment = Experiment.find '55ffd0c02b72ed123c000000'
143
+ p experiment
144
+ puts experiment.report.to_yaml
145
+ end
146
+
147
+ def test_multiple_datasets
148
+ skip
149
+ datasets = [
150
+ "EPAFHM.medi.csv",
151
+ "LOAEL_mmol_corrected_smiles.csv"
152
+ ]
153
+ min_sims = [0.3]
154
+ types = ["FP2"]
155
+ experiment = Experiment.create(
156
+ :name => "Fingerprint regression with mutiple datasets #{datasets}.",
157
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
158
+ )
159
+ types.each do |type|
160
+ min_sims.each do |min_sim|
161
+ experiment.model_settings << {
162
+ :model_algorithm => "OpenTox::Model::LazarRegression",
163
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
164
+ :neighbor_algorithm => "fingerprint_neighbors",
165
+ :neighbor_algorithm_parameters => {
166
+ :type => type,
167
+ :min_sim => min_sim,
168
+ }
169
+ }
170
+ end
171
+ end
172
+ experiment.run
173
+ p experiment.id
174
+ experiment.results.each do |dataset,result|
175
+ result.each do |r|
176
+ params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
177
+ RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
178
+ cv.validation_ids.each do |vid|
179
+ model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
180
+ assert_equal params[:type], model_params[:type]
181
+ assert_equal params[:min_sim], model_params[:min_sim]
182
+ refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
183
+ end
184
+ end
185
+ end
186
+ end
187
+ puts experiment.report.to_yaml
188
+ p experiment.summary
189
+ end
190
+
191
+ def test_mpd_mna_regression_fingerprints
192
+ skip
193
+ datasets = [
194
+ "EPAFHM.medi.csv",
195
+ #"hamster_carcinogenicity.csv"
196
+ ]
197
+ min_sims = [0.0,0.3]
198
+ types = ["MP2D","MNA"]
199
+ neighbor_algos = [
200
+ "fingerprint_neighbors",
201
+ "fingerprint_count_neighbors",
202
+ ]
203
+ experiment = Experiment.create(
204
+ :name => "MNA vs MPD descriptors",
205
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
206
+ )
207
+ types.each do |type|
208
+ min_sims.each do |min_sim|
209
+ neighbor_algos.each do |neighbor_algo|
210
+ experiment.model_settings << {
211
+ :model_algorithm => "OpenTox::Model::LazarRegression",
212
+ :prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average",
213
+ :neighbor_algorithm => neighbor_algo,
214
+ :neighbor_algorithm_parameters => {
215
+ :type => type,
216
+ :min_sim => min_sim,
217
+ }
218
+ }
219
+ end
220
+ end
221
+ end
222
+ experiment.run
223
+ #=end
224
+ =begin
225
+ experiment = Experiment.find '56029cb92b72ed673d000000'
226
+ =end
227
+ p experiment.id
228
+ puts experiment.report.to_yaml
229
+ #p experiment.summary
230
+ experiment.results.each do |dataset,result|
231
+ result.each do |r|
232
+ p r
233
+ # TODO fix r["model_id"]
234
+ params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
235
+ RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
236
+ cv.validation_ids.each do |vid|
237
+ model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
238
+ assert_equal params[:type], model_params[:type]
239
+ assert_equal params[:min_sim], model_params[:min_sim]
240
+ refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
241
+ end
242
+ end
243
+ end
244
+ end
245
+ end
246
+
247
+ def test_mpd_mna_classification_fingerprints
248
+ skip
249
+ datasets = [
250
+ #"EPAFHM.medi.csv",
251
+ "hamster_carcinogenicity.csv"
252
+ ]
253
+ min_sims = [0.0,0.3]
254
+ types = ["MP2D","MNA"]
255
+ neighbor_algos = [
256
+ "fingerprint_count_neighbors",
257
+ "fingerprint_neighbors",
258
+ ]
259
+ experiment = Experiment.create(
260
+ :name => "MNA vs MPD descriptors",
261
+ :dataset_ids => datasets.collect{|d| Dataset.from_csv_file(File.join(DATA_DIR, d)).id},
262
+ )
263
+ types.each do |type|
264
+ min_sims.each do |min_sim|
265
+ neighbor_algos.each do |neighbor_algo|
266
+ experiment.model_settings << {
267
+ :model_algorithm => "OpenTox::Model::LazarClassification",
268
+ :prediction_algorithm => "OpenTox::Algorithm::Classification.weighted_majority_vote",
269
+ :neighbor_algorithm => neighbor_algo,
270
+ :neighbor_algorithm_parameters => {
271
+ :type => type,
272
+ :min_sim => min_sim,
273
+ }
274
+ }
275
+ end
276
+ end
277
+ end
278
+ experiment.run
279
+ #=end
280
+ =begin
281
+ experiment = Experiment.find '56029cb92b72ed673d000000'
282
+ =end
283
+ p experiment.id
284
+ puts experiment.report.to_yaml
285
+ #p experiment.summary
286
+ experiment.results.each do |dataset,result|
287
+ result.each do |r|
288
+ # TODO fix r["model_id"]
289
+ params = Model::Lazar.find(r["model_id"])[:neighbor_algorithm_parameters]
290
+ RepeatedCrossValidation.find(r["repeated_crossvalidation_id"]).crossvalidations.each do |cv|
291
+ cv.validation_ids.each do |vid|
292
+ model_params = Model::Lazar.find(Validation.find(vid).model_id)[:neighbor_algorithm_parameters]
293
+ assert_equal params[:type], model_params[:type]
294
+ assert_equal params[:min_sim], model_params[:min_sim]
295
+ refute_equal params[:training_dataset_id], model_params[:training_dataset_id]
296
+ end
297
+ end
298
+ end
299
+ end
300
+ end
301
+ end
data/test/feature.rb CHANGED
@@ -26,16 +26,13 @@ class FeatureTest < MiniTest::Test
26
26
 
27
27
  id = @feature2.id
28
28
  @feature2.delete
29
- assert_raises Mongoid::Errors::DocumentNotFound do
30
- OpenTox::Feature.find(id)
31
- end
29
+ assert_nil OpenTox::Feature.find(id)
32
30
  end
33
31
 
34
32
  def test_duplicated_features
35
33
  metadata = {
36
34
  :name => "feature duplication test",
37
35
  :nominal => true,
38
- :description => "feature duplication test"
39
36
  }
40
37
  feature = NumericBioAssay.find_or_create_by metadata
41
38
  dup_feature = NumericBioAssay.find_or_create_by metadata
@@ -44,12 +41,8 @@ class FeatureTest < MiniTest::Test
44
41
  assert !feature.id.nil?, "No Feature ID in #{dup_feature.inspect}"
45
42
  assert_equal feature.id, dup_feature.id
46
43
  feature.delete
47
- assert_raises Mongoid::Errors::DocumentNotFound do
48
- OpenTox::Feature.find(feature.id)
49
- end
50
- assert_raises Mongoid::Errors::DocumentNotFound do
51
- OpenTox::Feature.find(dup_feature.id)
52
- end
44
+ assert_nil OpenTox::Feature.find(feature.id)
45
+ assert_nil OpenTox::Feature.find(dup_feature.id)
53
46
  end
54
47
 
55
48
  def test_smarts_feature
@@ -62,4 +55,23 @@ class FeatureTest < MiniTest::Test
62
55
  assert original.smarts, "CN"
63
56
  end
64
57
 
58
+ def test_physchem_description
59
+ assert_equal 355, PhysChem.descriptors.size
60
+ assert_equal 15, PhysChem.openbabel_descriptors.size
61
+ assert_equal 295, PhysChem.cdk_descriptors.size
62
+ assert_equal 45, PhysChem.joelib_descriptors.size
63
+ assert_equal 310, PhysChem.unique_descriptors.size
64
+ end
65
+
66
+ def test_physchem
67
+ assert_equal 355, PhysChem.descriptors.size
68
+ c = Compound.from_smiles "CC(=O)CC(C)C"
69
+ logP = PhysChem.find_or_create_by :name => "Openbabel.logP"
70
+ assert_equal 1.6215, logP.calculate(c)
71
+ jlogP = PhysChem.find_or_create_by :name => "Joelib.LogP"
72
+ assert_equal 3.5951, jlogP.calculate(c)
73
+ alogP = PhysChem.find_or_create_by :name => "Cdk.ALOGP.ALogP"
74
+ assert_equal 0.35380000000000034, alogP.calculate(c)
75
+ end
76
+
65
77
  end