lazar 0.9.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -4
  3. data/README.md +5 -15
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +1 -1
  6. data/ext/lazar/rinstall.R +9 -7
  7. data/java/CdkDescriptorInfo.class +0 -0
  8. data/java/CdkDescriptorInfo.java +3 -2
  9. data/java/CdkDescriptors.class +0 -0
  10. data/java/CdkDescriptors.java +28 -28
  11. data/java/Rakefile +3 -3
  12. data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
  13. data/lazar.gemspec +6 -7
  14. data/lib/algorithm.rb +2 -11
  15. data/lib/caret.rb +96 -0
  16. data/lib/classification.rb +14 -22
  17. data/lib/compound.rb +21 -87
  18. data/lib/crossvalidation.rb +80 -279
  19. data/lib/dataset.rb +105 -174
  20. data/lib/feature.rb +11 -18
  21. data/lib/feature_selection.rb +42 -0
  22. data/lib/import.rb +122 -0
  23. data/lib/lazar.rb +14 -4
  24. data/lib/leave-one-out-validation.rb +46 -192
  25. data/lib/model.rb +319 -128
  26. data/lib/nanoparticle.rb +98 -0
  27. data/lib/opentox.rb +7 -4
  28. data/lib/overwrite.rb +24 -3
  29. data/lib/physchem.rb +11 -10
  30. data/lib/regression.rb +7 -137
  31. data/lib/rest-client-wrapper.rb +0 -6
  32. data/lib/similarity.rb +65 -0
  33. data/lib/substance.rb +8 -0
  34. data/lib/train-test-validation.rb +69 -0
  35. data/lib/validation-statistics.rb +223 -0
  36. data/lib/validation.rb +17 -100
  37. data/scripts/mg2mmol.rb +17 -0
  38. data/scripts/mirror-enm2test.rb +4 -0
  39. data/scripts/mmol2-log10.rb +32 -0
  40. data/test/compound.rb +4 -94
  41. data/test/data/EPAFHM.medi_log10.csv +92 -0
  42. data/test/data/EPAFHM.mini_log10.csv +16 -0
  43. data/test/data/EPAFHM_log10.csv +581 -0
  44. data/test/data/loael_log10.csv +568 -0
  45. data/test/dataset.rb +195 -133
  46. data/test/descriptor.rb +27 -18
  47. data/test/error.rb +2 -2
  48. data/test/experiment.rb +4 -4
  49. data/test/feature.rb +2 -3
  50. data/test/gridfs.rb +10 -0
  51. data/test/model-classification.rb +106 -0
  52. data/test/model-nanoparticle.rb +128 -0
  53. data/test/model-regression.rb +171 -0
  54. data/test/model-validation.rb +19 -0
  55. data/test/nanomaterial-model-validation.rb +55 -0
  56. data/test/setup.rb +8 -4
  57. data/test/validation-classification.rb +67 -0
  58. data/test/validation-nanoparticle.rb +133 -0
  59. data/test/validation-regression.rb +92 -0
  60. metadata +50 -121
  61. data/test/classification.rb +0 -41
  62. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
  63. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
  64. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
  65. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
  66. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
  67. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
  68. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
  69. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
  70. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
  71. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
  72. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
  73. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
  74. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
  75. data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
  76. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
  77. data/test/data/boiling_points.ext.sdf +0 -11460
  78. data/test/data/cpdb_100.csv +0 -101
  79. data/test/data/hamster_carcinogenicity.ntriples +0 -618
  80. data/test/data/hamster_carcinogenicity.sdf +0 -2805
  81. data/test/data/hamster_carcinogenicity.xls +0 -0
  82. data/test/data/hamster_carcinogenicity.yaml +0 -352
  83. data/test/dataset-long.rb +0 -114
  84. data/test/lazar-long.rb +0 -92
  85. data/test/lazar-physchem-short.rb +0 -31
  86. data/test/prediction_models.rb +0 -20
  87. data/test/regression.rb +0 -43
  88. data/test/validation.rb +0 -108
@@ -0,0 +1,133 @@
1
+ require_relative "setup.rb"
2
+
3
+ class NanoparticleValidationTest < MiniTest::Test
4
+ include OpenTox::Validation
5
+
6
+ def setup
7
+ @training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
8
+ @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
9
+ end
10
+
11
+ def test_validate_default_nanoparticle_model
12
+ model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
13
+ cv = CrossValidation.create model
14
+ p cv.id
15
+ #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
16
+ refute_nil cv.r_squared
17
+ refute_nil cv.rmse
18
+ end
19
+
20
+ def test_validate_pls_pchem_model
21
+ algorithms = {
22
+ :descriptors => {
23
+ :method => "properties",
24
+ :categories => ["P-CHEM"]
25
+ },
26
+ :prediction => {:method => 'Algorithm::Caret.pls' },
27
+ :feature_selection => {
28
+ :method => "Algorithm::FeatureSelection.correlation_filter",
29
+ },
30
+ }
31
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
32
+ assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
33
+ cv = CrossValidation.create model
34
+ p cv.id
35
+ #File.open("tmp2.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
36
+ refute_nil cv.r_squared
37
+ refute_nil cv.rmse
38
+ end
39
+
40
+ =begin
41
+ def test_validate_proteomics_pls_pchem_model
42
+ algorithms = {
43
+ :descriptors => {
44
+ :method => "properties",
45
+ :categories => ["Proteomics"]
46
+ },
47
+ :prediction => {:method => 'Algorithm::Caret.pls' },
48
+ :feature_selection => {
49
+ :method => "Algorithm::FeatureSelection.correlation_filter",
50
+ },
51
+ }
52
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
53
+ assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
54
+ cv = CrossValidation.create model
55
+ refute_nil cv.r_squared
56
+ refute_nil cv.rmse
57
+ end
58
+ =end
59
+
60
+ def test_validate_proteomics_pchem_default_model
61
+ algorithms = {
62
+ :descriptors => {
63
+ :method => "properties",
64
+ :categories => ["Proteomics","P-CHEM"]
65
+ },
66
+ :feature_selection => {
67
+ :method => "Algorithm::FeatureSelection.correlation_filter",
68
+ },
69
+ }
70
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
71
+ cv = CrossValidation.create model
72
+ refute_nil cv.r_squared
73
+ refute_nil cv.rmse
74
+ end
75
+
76
+ def test_nanoparticle_fingerprint_model_without_feature_selection
77
+ algorithms = {
78
+ :descriptors => {
79
+ :method => "fingerprint",
80
+ :type => "MP2D",
81
+ },
82
+ :similarity => {
83
+ :method => "Algorithm::Similarity.tanimoto",
84
+ :min => 0.1
85
+ },
86
+ :feature_selection => nil
87
+ }
88
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
89
+ cv = CrossValidation.create model
90
+ refute_nil cv.r_squared
91
+ refute_nil cv.rmse
92
+ end
93
+
94
+ def test_nanoparticle_fingerprint_weighted_average_model_without_feature_selection
95
+ algorithms = {
96
+ :descriptors => {
97
+ :method => "fingerprint",
98
+ :type => "MP2D",
99
+ },
100
+ :similarity => {
101
+ :method => "Algorithm::Similarity.tanimoto",
102
+ :min => 0.1
103
+ },
104
+ :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
105
+ :feature_selection => nil
106
+ }
107
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
108
+ cv = CrossValidation.create model
109
+ refute_nil cv.r_squared
110
+ refute_nil cv.rmse
111
+ end
112
+
113
+ def test_nanoparticle_fingerprint_model_with_feature_selection
114
+ algorithms = {
115
+ :descriptors => {
116
+ :method => "fingerprint",
117
+ :type => "MP2D",
118
+ },
119
+ :similarity => {
120
+ :method => "Algorithm::Similarity.tanimoto",
121
+ :min => 0.1
122
+ },
123
+ :feature_selection => {
124
+ :method => "Algorithm::FeatureSelection.correlation_filter",
125
+ },
126
+ }
127
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
128
+ cv = CrossValidation.create model
129
+ refute_nil cv.r_squared
130
+ refute_nil cv.rmse
131
+ end
132
+
133
+ end
@@ -0,0 +1,92 @@
1
+ require_relative "setup.rb"
2
+
3
+ class ValidationRegressionTest < MiniTest::Test
4
+ include OpenTox::Validation
5
+
6
+ # defaults
7
+
8
+ def test_default_regression_crossvalidation
9
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
10
+ model = Model::Lazar.create training_dataset: dataset
11
+ cv = RegressionCrossValidation.create model
12
+ assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to unfavorable training/test set splits"
13
+ assert cv.mae < 1.1, "MAE #{cv.mae} should be smaller than 1.1, this may occur due to unfavorable training/test set splits"
14
+ assert cv.percent_within_prediction_interval > 80, "Only #{cv.percent_within_prediction_interval.round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits"
15
+ end
16
+
17
+ # parameters
18
+
19
+ def test_regression_crossvalidation_params
20
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
21
+ algorithms = {
22
+ :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
23
+ :descriptors => { :type => "MACCS", },
24
+ :similarity => {:min => 0.7}
25
+ }
26
+ model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
27
+ assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
28
+ cv = RegressionCrossValidation.create model
29
+ cv.validation_ids.each do |vid|
30
+ model = Model::Lazar.find(Validation.find(vid).model_id)
31
+ assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
32
+ assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
33
+ refute_nil model.training_dataset_id
34
+ refute_equal dataset.id, model.training_dataset_id
35
+ end
36
+
37
+ refute_nil cv.rmse
38
+ refute_nil cv.mae
39
+ end
40
+
41
+ def test_physchem_regression_crossvalidation
42
+ training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
43
+ model = Model::Lazar.create training_dataset:training_dataset
44
+ cv = RegressionCrossValidation.create model
45
+ refute_nil cv.rmse
46
+ refute_nil cv.mae
47
+ end
48
+
49
+ # LOO
50
+
51
+ def test_regression_loo_validation
52
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
53
+ model = Model::Lazar.create training_dataset: dataset
54
+ loo = RegressionLeaveOneOut.create model
55
+ assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
56
+ end
57
+
58
+ def test_regression_loo_validation_with_feature_selection
59
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
60
+ algorithms = {
61
+ :descriptors => {
62
+ :method => "calculate_properties",
63
+ :features => PhysChem.openbabel_descriptors,
64
+ },
65
+ :similarity => {
66
+ :method => "Algorithm::Similarity.weighted_cosine",
67
+ :min => 0.5
68
+ },
69
+ :feature_selection => {
70
+ :method => "Algorithm::FeatureSelection.correlation_filter",
71
+ },
72
+ }
73
+ model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
74
+ assert_raises OpenTox::BadRequestError do
75
+ loo = RegressionLeaveOneOut.create model
76
+ end
77
+ end
78
+
79
+ # repeated CV
80
+
81
+ def test_repeated_crossvalidation
82
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
83
+ model = Model::Lazar.create training_dataset: dataset
84
+ repeated_cv = RepeatedCrossValidation.create model
85
+ repeated_cv.crossvalidations.each do |cv|
86
+ #assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
87
+ #assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
88
+ end
89
+ File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot}
90
+ end
91
+
92
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lazar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.3
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
@@ -9,98 +9,92 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-04-12 00:00:00.000000000 Z
12
+ date: 2016-12-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - "~>"
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
- version: '1.11'
20
+ version: '0'
21
21
  type: :runtime
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - "~>"
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
- version: '1.11'
27
+ version: '0'
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: rest-client
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - "~>"
32
+ - - ">="
33
33
  - !ruby/object:Gem::Version
34
- version: '1.8'
34
+ version: '0'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - "~>"
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
- version: '1.8'
41
+ version: '0'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: nokogiri
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - "~>"
46
+ - - ">="
47
47
  - !ruby/object:Gem::Version
48
- version: '1.6'
48
+ version: '0'
49
49
  type: :runtime
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
- - - "~>"
53
+ - - ">="
54
54
  - !ruby/object:Gem::Version
55
- version: '1.6'
55
+ version: '0'
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: rserve-client
58
58
  requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
- - - "~>"
60
+ - - ">="
61
61
  - !ruby/object:Gem::Version
62
- version: '0.3'
62
+ version: '0'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
- - - "~>"
67
+ - - ">="
68
68
  - !ruby/object:Gem::Version
69
- version: '0.3'
69
+ version: '0'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: mongoid
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
- - - "~>"
74
+ - - ">="
75
75
  - !ruby/object:Gem::Version
76
- version: '5.0'
76
+ version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
- - - "~>"
81
+ - - ">="
82
82
  - !ruby/object:Gem::Version
83
- version: '5.0'
83
+ version: '0'
84
84
  - !ruby/object:Gem::Dependency
85
85
  name: openbabel
86
86
  requirement: !ruby/object:Gem::Requirement
87
87
  requirements:
88
- - - "~>"
89
- - !ruby/object:Gem::Version
90
- version: '2.3'
91
88
  - - ">="
92
89
  - !ruby/object:Gem::Version
93
- version: 2.3.2.2
90
+ version: '0'
94
91
  type: :runtime
95
92
  prerelease: false
96
93
  version_requirements: !ruby/object:Gem::Requirement
97
94
  requirements:
98
- - - "~>"
99
- - !ruby/object:Gem::Version
100
- version: '2.3'
101
95
  - - ">="
102
96
  - !ruby/object:Gem::Version
103
- version: 2.3.2.2
97
+ version: '0'
104
98
  description: Libraries for lazy structure-activity relationships and read-across.
105
99
  email:
106
100
  - helma@in-silico.ch
@@ -129,11 +123,12 @@ files:
129
123
  - java/JoelibDescriptors.class
130
124
  - java/JoelibDescriptors.java
131
125
  - java/Rakefile
132
- - java/cdk-1.4.19.jar
126
+ - java/cdk-2.0-SNAPSHOT.jar
133
127
  - java/joelib2.jar
134
128
  - java/log4j.jar
135
129
  - lazar.gemspec
136
130
  - lib/algorithm.rb
131
+ - lib/caret.rb
137
132
  - lib/classification.rb
138
133
  - lib/compound.rb
139
134
  - lib/crossvalidation.rb
@@ -141,77 +136,72 @@ files:
141
136
  - lib/error.rb
142
137
  - lib/experiment.rb
143
138
  - lib/feature.rb
139
+ - lib/feature_selection.rb
140
+ - lib/import.rb
144
141
  - lib/lazar.rb
145
142
  - lib/leave-one-out-validation.rb
146
143
  - lib/model.rb
144
+ - lib/nanoparticle.rb
147
145
  - lib/opentox.rb
148
146
  - lib/overwrite.rb
149
147
  - lib/physchem.rb
150
148
  - lib/regression.rb
151
149
  - lib/rest-client-wrapper.rb
150
+ - lib/similarity.rb
151
+ - lib/substance.rb
152
+ - lib/train-test-validation.rb
152
153
  - lib/unique_descriptors.rb
154
+ - lib/validation-statistics.rb
153
155
  - lib/validation.rb
156
+ - scripts/mg2mmol.rb
157
+ - scripts/mirror-enm2test.rb
158
+ - scripts/mmol2-log10.rb
154
159
  - test/all.rb
155
- - test/classification.rb
156
160
  - test/compound.rb
157
- - test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
158
- - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
159
- - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
160
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
161
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
162
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
163
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
164
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
165
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
166
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
167
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
168
161
  - test/data/EPAFHM.csv
169
162
  - test/data/EPAFHM.medi.csv
163
+ - test/data/EPAFHM.medi_log10.csv
170
164
  - test/data/EPAFHM.mini.csv
171
- - test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv
172
- - test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv
165
+ - test/data/EPAFHM.mini_log10.csv
166
+ - test/data/EPAFHM_log10.csv
173
167
  - test/data/ISSCAN-multi.csv
174
- - test/data/LOAEL_log_mg_corrected_smiles.csv
175
- - test/data/LOAEL_log_mmol_corrected_smiles.csv
176
168
  - test/data/LOAEL_mmol_corrected_smiles.csv
177
169
  - test/data/acetaldehyde.sdf
178
170
  - test/data/batch_prediction.csv
179
171
  - test/data/batch_prediction_inchi_small.csv
180
172
  - test/data/batch_prediction_smiles_small.csv
181
- - test/data/boiling_points.ext.sdf
182
- - test/data/cpdb_100.csv
183
173
  - test/data/hamster_carcinogenicity.csv
184
174
  - test/data/hamster_carcinogenicity.json
185
175
  - test/data/hamster_carcinogenicity.mini.bool_float.csv
186
176
  - test/data/hamster_carcinogenicity.mini.bool_int.csv
187
177
  - test/data/hamster_carcinogenicity.mini.bool_string.csv
188
178
  - test/data/hamster_carcinogenicity.mini.csv
189
- - test/data/hamster_carcinogenicity.ntriples
190
- - test/data/hamster_carcinogenicity.sdf
191
- - test/data/hamster_carcinogenicity.xls
192
- - test/data/hamster_carcinogenicity.yaml
193
179
  - test/data/hamster_carcinogenicity_with_errors.csv
194
180
  - test/data/kazius.csv
195
181
  - test/data/loael.csv
182
+ - test/data/loael_log10.csv
196
183
  - test/data/multi_cell_call.csv
197
184
  - test/data/multi_cell_call_no_dup.csv
198
185
  - test/data/multicolumn.csv
199
186
  - test/data/rat_feature_dataset.csv
200
187
  - test/data/wrong_dataset.csv
201
- - test/dataset-long.rb
202
188
  - test/dataset.rb
203
189
  - test/default_environment.rb
204
190
  - test/descriptor.rb
205
191
  - test/error.rb
206
192
  - test/experiment.rb
207
193
  - test/feature.rb
208
- - test/lazar-long.rb
209
- - test/lazar-physchem-short.rb
210
- - test/prediction_models.rb
211
- - test/regression.rb
194
+ - test/gridfs.rb
195
+ - test/model-classification.rb
196
+ - test/model-nanoparticle.rb
197
+ - test/model-regression.rb
198
+ - test/model-validation.rb
199
+ - test/nanomaterial-model-validation.rb
212
200
  - test/setup.rb
213
201
  - test/test_environment.rb
214
- - test/validation.rb
202
+ - test/validation-classification.rb
203
+ - test/validation-nanoparticle.rb
204
+ - test/validation-regression.rb
215
205
  homepage: http://github.com/opentox/lazar
216
206
  licenses:
217
207
  - GPL-3.0
@@ -236,65 +226,4 @@ rubygems_version: 2.5.1
236
226
  signing_key:
237
227
  specification_version: 4
238
228
  summary: Lazar framework
239
- test_files:
240
- - test/all.rb
241
- - test/classification.rb
242
- - test/compound.rb
243
- - test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
244
- - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
245
- - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
246
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
247
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
248
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
249
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
250
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
251
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
252
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
253
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
254
- - test/data/EPAFHM.csv
255
- - test/data/EPAFHM.medi.csv
256
- - test/data/EPAFHM.mini.csv
257
- - test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv
258
- - test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv
259
- - test/data/ISSCAN-multi.csv
260
- - test/data/LOAEL_log_mg_corrected_smiles.csv
261
- - test/data/LOAEL_log_mmol_corrected_smiles.csv
262
- - test/data/LOAEL_mmol_corrected_smiles.csv
263
- - test/data/acetaldehyde.sdf
264
- - test/data/batch_prediction.csv
265
- - test/data/batch_prediction_inchi_small.csv
266
- - test/data/batch_prediction_smiles_small.csv
267
- - test/data/boiling_points.ext.sdf
268
- - test/data/cpdb_100.csv
269
- - test/data/hamster_carcinogenicity.csv
270
- - test/data/hamster_carcinogenicity.json
271
- - test/data/hamster_carcinogenicity.mini.bool_float.csv
272
- - test/data/hamster_carcinogenicity.mini.bool_int.csv
273
- - test/data/hamster_carcinogenicity.mini.bool_string.csv
274
- - test/data/hamster_carcinogenicity.mini.csv
275
- - test/data/hamster_carcinogenicity.ntriples
276
- - test/data/hamster_carcinogenicity.sdf
277
- - test/data/hamster_carcinogenicity.xls
278
- - test/data/hamster_carcinogenicity.yaml
279
- - test/data/hamster_carcinogenicity_with_errors.csv
280
- - test/data/kazius.csv
281
- - test/data/loael.csv
282
- - test/data/multi_cell_call.csv
283
- - test/data/multi_cell_call_no_dup.csv
284
- - test/data/multicolumn.csv
285
- - test/data/rat_feature_dataset.csv
286
- - test/data/wrong_dataset.csv
287
- - test/dataset-long.rb
288
- - test/dataset.rb
289
- - test/default_environment.rb
290
- - test/descriptor.rb
291
- - test/error.rb
292
- - test/experiment.rb
293
- - test/feature.rb
294
- - test/lazar-long.rb
295
- - test/lazar-physchem-short.rb
296
- - test/prediction_models.rb
297
- - test/regression.rb
298
- - test/setup.rb
299
- - test/test_environment.rb
300
- - test/validation.rb
229
+ test_files: []
@@ -1,41 +0,0 @@
1
- require_relative "setup.rb"
2
-
3
- class LazarClassificationTest < MiniTest::Test
4
-
5
- def test_lazar_classification
6
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
7
- model = Model::LazarClassification.create training_dataset
8
-
9
- [ {
10
- :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
11
- :prediction => "false",
12
- :confidence => 0.25281385281385277,
13
- :nr_neighbors => 11
14
- },{
15
- :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
16
- :prediction => "false",
17
- :confidence => 0.3639589577089577,
18
- :nr_neighbors => 14
19
- } ].each do |example|
20
- prediction = model.predict example[:compound]
21
- assert_equal example[:prediction], prediction[:value]
22
- #assert_equal example[:confidence], prediction[:confidence]
23
- #assert_equal example[:nr_neighbors], prediction[:neighbors].size
24
- end
25
-
26
- compound = Compound.from_smiles "CCO"
27
- prediction = model.predict compound
28
- assert_equal ["false"], prediction[:database_activities]
29
- assert_equal "true", prediction[:value]
30
-
31
- # make a dataset prediction
32
- compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
33
- prediction = model.predict compound_dataset
34
- assert_equal compound_dataset.compounds, prediction.compounds
35
-
36
- assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction.data_entries[7][3]
37
- assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction.data_entries[14][3]
38
- # cleanup
39
- [training_dataset,model,compound_dataset].each{|o| o.delete}
40
- end
41
- end