lazar 0.9.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (88) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -4
  3. data/README.md +5 -15
  4. data/VERSION +1 -1
  5. data/ext/lazar/extconf.rb +1 -1
  6. data/ext/lazar/rinstall.R +9 -7
  7. data/java/CdkDescriptorInfo.class +0 -0
  8. data/java/CdkDescriptorInfo.java +3 -2
  9. data/java/CdkDescriptors.class +0 -0
  10. data/java/CdkDescriptors.java +28 -28
  11. data/java/Rakefile +3 -3
  12. data/java/{cdk-1.4.19.jar → cdk-2.0-SNAPSHOT.jar} +0 -0
  13. data/lazar.gemspec +6 -7
  14. data/lib/algorithm.rb +2 -11
  15. data/lib/caret.rb +96 -0
  16. data/lib/classification.rb +14 -22
  17. data/lib/compound.rb +21 -87
  18. data/lib/crossvalidation.rb +80 -279
  19. data/lib/dataset.rb +105 -174
  20. data/lib/feature.rb +11 -18
  21. data/lib/feature_selection.rb +42 -0
  22. data/lib/import.rb +122 -0
  23. data/lib/lazar.rb +14 -4
  24. data/lib/leave-one-out-validation.rb +46 -192
  25. data/lib/model.rb +319 -128
  26. data/lib/nanoparticle.rb +98 -0
  27. data/lib/opentox.rb +7 -4
  28. data/lib/overwrite.rb +24 -3
  29. data/lib/physchem.rb +11 -10
  30. data/lib/regression.rb +7 -137
  31. data/lib/rest-client-wrapper.rb +0 -6
  32. data/lib/similarity.rb +65 -0
  33. data/lib/substance.rb +8 -0
  34. data/lib/train-test-validation.rb +69 -0
  35. data/lib/validation-statistics.rb +223 -0
  36. data/lib/validation.rb +17 -100
  37. data/scripts/mg2mmol.rb +17 -0
  38. data/scripts/mirror-enm2test.rb +4 -0
  39. data/scripts/mmol2-log10.rb +32 -0
  40. data/test/compound.rb +4 -94
  41. data/test/data/EPAFHM.medi_log10.csv +92 -0
  42. data/test/data/EPAFHM.mini_log10.csv +16 -0
  43. data/test/data/EPAFHM_log10.csv +581 -0
  44. data/test/data/loael_log10.csv +568 -0
  45. data/test/dataset.rb +195 -133
  46. data/test/descriptor.rb +27 -18
  47. data/test/error.rb +2 -2
  48. data/test/experiment.rb +4 -4
  49. data/test/feature.rb +2 -3
  50. data/test/gridfs.rb +10 -0
  51. data/test/model-classification.rb +106 -0
  52. data/test/model-nanoparticle.rb +128 -0
  53. data/test/model-regression.rb +171 -0
  54. data/test/model-validation.rb +19 -0
  55. data/test/nanomaterial-model-validation.rb +55 -0
  56. data/test/setup.rb +8 -4
  57. data/test/validation-classification.rb +67 -0
  58. data/test/validation-nanoparticle.rb +133 -0
  59. data/test/validation-regression.rb +92 -0
  60. metadata +50 -121
  61. data/test/classification.rb +0 -41
  62. data/test/data/CPDBAS_v5c_1547_29Apr2008part.sdf +0 -13553
  63. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv +0 -436
  64. data/test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv +0 -568
  65. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv +0 -87
  66. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv +0 -978
  67. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv +0 -1120
  68. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv +0 -1113
  69. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv +0 -850
  70. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv +0 -829
  71. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv +0 -1198
  72. data/test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv +0 -1505
  73. data/test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv +0 -581
  74. data/test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv +0 -1217
  75. data/test/data/LOAEL_log_mg_corrected_smiles.csv +0 -568
  76. data/test/data/LOAEL_log_mmol_corrected_smiles.csv +0 -568
  77. data/test/data/boiling_points.ext.sdf +0 -11460
  78. data/test/data/cpdb_100.csv +0 -101
  79. data/test/data/hamster_carcinogenicity.ntriples +0 -618
  80. data/test/data/hamster_carcinogenicity.sdf +0 -2805
  81. data/test/data/hamster_carcinogenicity.xls +0 -0
  82. data/test/data/hamster_carcinogenicity.yaml +0 -352
  83. data/test/dataset-long.rb +0 -114
  84. data/test/lazar-long.rb +0 -92
  85. data/test/lazar-physchem-short.rb +0 -31
  86. data/test/prediction_models.rb +0 -20
  87. data/test/regression.rb +0 -43
  88. data/test/validation.rb +0 -108
@@ -0,0 +1,133 @@
1
+ require_relative "setup.rb"
2
+
3
+ class NanoparticleValidationTest < MiniTest::Test
4
+ include OpenTox::Validation
5
+
6
+ def setup
7
+ @training_dataset = Dataset.where(:name => "Protein Corona Fingerprinting Predicts the Cellular Interaction of Gold and Silver Nanoparticles").first
8
+ @prediction_feature = @training_dataset.features.select{|f| f["name"] == 'log2(Net cell association)'}.first
9
+ end
10
+
11
+ def test_validate_default_nanoparticle_model
12
+ model = Model::Lazar.create training_dataset: @training_dataset, prediction_feature: @prediction_feature
13
+ cv = CrossValidation.create model
14
+ p cv.id
15
+ #File.open("tmp.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
16
+ refute_nil cv.r_squared
17
+ refute_nil cv.rmse
18
+ end
19
+
20
+ def test_validate_pls_pchem_model
21
+ algorithms = {
22
+ :descriptors => {
23
+ :method => "properties",
24
+ :categories => ["P-CHEM"]
25
+ },
26
+ :prediction => {:method => 'Algorithm::Caret.pls' },
27
+ :feature_selection => {
28
+ :method => "Algorithm::FeatureSelection.correlation_filter",
29
+ },
30
+ }
31
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
32
+ assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
33
+ cv = CrossValidation.create model
34
+ p cv.id
35
+ #File.open("tmp2.pdf","w+"){|f| f.puts cv.correlation_plot format:"pdf"}
36
+ refute_nil cv.r_squared
37
+ refute_nil cv.rmse
38
+ end
39
+
40
+ =begin
41
+ def test_validate_proteomics_pls_pchem_model
42
+ algorithms = {
43
+ :descriptors => {
44
+ :method => "properties",
45
+ :categories => ["Proteomics"]
46
+ },
47
+ :prediction => {:method => 'Algorithm::Caret.pls' },
48
+ :feature_selection => {
49
+ :method => "Algorithm::FeatureSelection.correlation_filter",
50
+ },
51
+ }
52
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
53
+ assert_equal "Algorithm::Caret.pls", model.algorithms[:prediction][:method]
54
+ cv = CrossValidation.create model
55
+ refute_nil cv.r_squared
56
+ refute_nil cv.rmse
57
+ end
58
+ =end
59
+
60
+ def test_validate_proteomics_pchem_default_model
61
+ algorithms = {
62
+ :descriptors => {
63
+ :method => "properties",
64
+ :categories => ["Proteomics","P-CHEM"]
65
+ },
66
+ :feature_selection => {
67
+ :method => "Algorithm::FeatureSelection.correlation_filter",
68
+ },
69
+ }
70
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
71
+ cv = CrossValidation.create model
72
+ refute_nil cv.r_squared
73
+ refute_nil cv.rmse
74
+ end
75
+
76
+ def test_nanoparticle_fingerprint_model_without_feature_selection
77
+ algorithms = {
78
+ :descriptors => {
79
+ :method => "fingerprint",
80
+ :type => "MP2D",
81
+ },
82
+ :similarity => {
83
+ :method => "Algorithm::Similarity.tanimoto",
84
+ :min => 0.1
85
+ },
86
+ :feature_selection => nil
87
+ }
88
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
89
+ cv = CrossValidation.create model
90
+ refute_nil cv.r_squared
91
+ refute_nil cv.rmse
92
+ end
93
+
94
+ def test_nanoparticle_fingerprint_weighted_average_model_without_feature_selection
95
+ algorithms = {
96
+ :descriptors => {
97
+ :method => "fingerprint",
98
+ :type => "MP2D",
99
+ },
100
+ :similarity => {
101
+ :method => "Algorithm::Similarity.tanimoto",
102
+ :min => 0.1
103
+ },
104
+ :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
105
+ :feature_selection => nil
106
+ }
107
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
108
+ cv = CrossValidation.create model
109
+ refute_nil cv.r_squared
110
+ refute_nil cv.rmse
111
+ end
112
+
113
+ def test_nanoparticle_fingerprint_model_with_feature_selection
114
+ algorithms = {
115
+ :descriptors => {
116
+ :method => "fingerprint",
117
+ :type => "MP2D",
118
+ },
119
+ :similarity => {
120
+ :method => "Algorithm::Similarity.tanimoto",
121
+ :min => 0.1
122
+ },
123
+ :feature_selection => {
124
+ :method => "Algorithm::FeatureSelection.correlation_filter",
125
+ },
126
+ }
127
+ model = Model::Lazar.create prediction_feature: @prediction_feature, training_dataset: @training_dataset, algorithms: algorithms
128
+ cv = CrossValidation.create model
129
+ refute_nil cv.r_squared
130
+ refute_nil cv.rmse
131
+ end
132
+
133
+ end
@@ -0,0 +1,92 @@
1
+ require_relative "setup.rb"
2
+
3
+ class ValidationRegressionTest < MiniTest::Test
4
+ include OpenTox::Validation
5
+
6
+ # defaults
7
+
8
+ def test_default_regression_crossvalidation
9
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
10
+ model = Model::Lazar.create training_dataset: dataset
11
+ cv = RegressionCrossValidation.create model
12
+ assert cv.rmse < 1.5, "RMSE #{cv.rmse} should be smaller than 1.5, this may occur due to unfavorable training/test set splits"
13
+ assert cv.mae < 1.1, "MAE #{cv.mae} should be smaller than 1.1, this may occur due to unfavorable training/test set splits"
14
+ assert cv.percent_within_prediction_interval > 80, "Only #{cv.percent_within_prediction_interval.round(2)}% of measurement within prediction interval. This may occur due to unfavorable training/test set splits"
15
+ end
16
+
17
+ # parameters
18
+
19
+ def test_regression_crossvalidation_params
20
+ dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi_log10.csv"
21
+ algorithms = {
22
+ :prediction => { :method => "OpenTox::Algorithm::Regression.weighted_average" },
23
+ :descriptors => { :type => "MACCS", },
24
+ :similarity => {:min => 0.7}
25
+ }
26
+ model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
27
+ assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
28
+ cv = RegressionCrossValidation.create model
29
+ cv.validation_ids.each do |vid|
30
+ model = Model::Lazar.find(Validation.find(vid).model_id)
31
+ assert_equal algorithms[:descriptors][:type], model.algorithms[:descriptors][:type]
32
+ assert_equal algorithms[:similarity][:min], model.algorithms[:similarity][:min]
33
+ refute_nil model.training_dataset_id
34
+ refute_equal dataset.id, model.training_dataset_id
35
+ end
36
+
37
+ refute_nil cv.rmse
38
+ refute_nil cv.mae
39
+ end
40
+
41
+ def test_physchem_regression_crossvalidation
42
+ training_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
43
+ model = Model::Lazar.create training_dataset:training_dataset
44
+ cv = RegressionCrossValidation.create model
45
+ refute_nil cv.rmse
46
+ refute_nil cv.mae
47
+ end
48
+
49
+ # LOO
50
+
51
+ def test_regression_loo_validation
52
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
53
+ model = Model::Lazar.create training_dataset: dataset
54
+ loo = RegressionLeaveOneOut.create model
55
+ assert loo.r_squared > 0.34, "R^2 (#{loo.r_squared}) should be larger than 0.034"
56
+ end
57
+
58
+ def test_regression_loo_validation_with_feature_selection
59
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
60
+ algorithms = {
61
+ :descriptors => {
62
+ :method => "calculate_properties",
63
+ :features => PhysChem.openbabel_descriptors,
64
+ },
65
+ :similarity => {
66
+ :method => "Algorithm::Similarity.weighted_cosine",
67
+ :min => 0.5
68
+ },
69
+ :feature_selection => {
70
+ :method => "Algorithm::FeatureSelection.correlation_filter",
71
+ },
72
+ }
73
+ model = Model::Lazar.create training_dataset: dataset, algorithms: algorithms
74
+ assert_raises OpenTox::BadRequestError do
75
+ loo = RegressionLeaveOneOut.create model
76
+ end
77
+ end
78
+
79
+ # repeated CV
80
+
81
+ def test_repeated_crossvalidation
82
+ dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.medi_log10.csv")
83
+ model = Model::Lazar.create training_dataset: dataset
84
+ repeated_cv = RepeatedCrossValidation.create model
85
+ repeated_cv.crossvalidations.each do |cv|
86
+ #assert cv.r_squared > 0.34, "R^2 (#{cv.r_squared}) should be larger than 0.034"
87
+ #assert_operator cv.accuracy, :>, 0.7, "model accuracy < 0.7, this may happen by chance due to an unfavorable training/test set split"
88
+ end
89
+ File.open("tmp.png","w+"){|f| f.puts repeated_cv.correlation_plot}
90
+ end
91
+
92
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lazar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.9.3
4
+ version: 1.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
@@ -9,98 +9,92 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2016-04-12 00:00:00.000000000 Z
12
+ date: 2016-12-21 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
16
16
  requirement: !ruby/object:Gem::Requirement
17
17
  requirements:
18
- - - "~>"
18
+ - - ">="
19
19
  - !ruby/object:Gem::Version
20
- version: '1.11'
20
+ version: '0'
21
21
  type: :runtime
22
22
  prerelease: false
23
23
  version_requirements: !ruby/object:Gem::Requirement
24
24
  requirements:
25
- - - "~>"
25
+ - - ">="
26
26
  - !ruby/object:Gem::Version
27
- version: '1.11'
27
+ version: '0'
28
28
  - !ruby/object:Gem::Dependency
29
29
  name: rest-client
30
30
  requirement: !ruby/object:Gem::Requirement
31
31
  requirements:
32
- - - "~>"
32
+ - - ">="
33
33
  - !ruby/object:Gem::Version
34
- version: '1.8'
34
+ version: '0'
35
35
  type: :runtime
36
36
  prerelease: false
37
37
  version_requirements: !ruby/object:Gem::Requirement
38
38
  requirements:
39
- - - "~>"
39
+ - - ">="
40
40
  - !ruby/object:Gem::Version
41
- version: '1.8'
41
+ version: '0'
42
42
  - !ruby/object:Gem::Dependency
43
43
  name: nokogiri
44
44
  requirement: !ruby/object:Gem::Requirement
45
45
  requirements:
46
- - - "~>"
46
+ - - ">="
47
47
  - !ruby/object:Gem::Version
48
- version: '1.6'
48
+ version: '0'
49
49
  type: :runtime
50
50
  prerelease: false
51
51
  version_requirements: !ruby/object:Gem::Requirement
52
52
  requirements:
53
- - - "~>"
53
+ - - ">="
54
54
  - !ruby/object:Gem::Version
55
- version: '1.6'
55
+ version: '0'
56
56
  - !ruby/object:Gem::Dependency
57
57
  name: rserve-client
58
58
  requirement: !ruby/object:Gem::Requirement
59
59
  requirements:
60
- - - "~>"
60
+ - - ">="
61
61
  - !ruby/object:Gem::Version
62
- version: '0.3'
62
+ version: '0'
63
63
  type: :runtime
64
64
  prerelease: false
65
65
  version_requirements: !ruby/object:Gem::Requirement
66
66
  requirements:
67
- - - "~>"
67
+ - - ">="
68
68
  - !ruby/object:Gem::Version
69
- version: '0.3'
69
+ version: '0'
70
70
  - !ruby/object:Gem::Dependency
71
71
  name: mongoid
72
72
  requirement: !ruby/object:Gem::Requirement
73
73
  requirements:
74
- - - "~>"
74
+ - - ">="
75
75
  - !ruby/object:Gem::Version
76
- version: '5.0'
76
+ version: '0'
77
77
  type: :runtime
78
78
  prerelease: false
79
79
  version_requirements: !ruby/object:Gem::Requirement
80
80
  requirements:
81
- - - "~>"
81
+ - - ">="
82
82
  - !ruby/object:Gem::Version
83
- version: '5.0'
83
+ version: '0'
84
84
  - !ruby/object:Gem::Dependency
85
85
  name: openbabel
86
86
  requirement: !ruby/object:Gem::Requirement
87
87
  requirements:
88
- - - "~>"
89
- - !ruby/object:Gem::Version
90
- version: '2.3'
91
88
  - - ">="
92
89
  - !ruby/object:Gem::Version
93
- version: 2.3.2.2
90
+ version: '0'
94
91
  type: :runtime
95
92
  prerelease: false
96
93
  version_requirements: !ruby/object:Gem::Requirement
97
94
  requirements:
98
- - - "~>"
99
- - !ruby/object:Gem::Version
100
- version: '2.3'
101
95
  - - ">="
102
96
  - !ruby/object:Gem::Version
103
- version: 2.3.2.2
97
+ version: '0'
104
98
  description: Libraries for lazy structure-activity relationships and read-across.
105
99
  email:
106
100
  - helma@in-silico.ch
@@ -129,11 +123,12 @@ files:
129
123
  - java/JoelibDescriptors.class
130
124
  - java/JoelibDescriptors.java
131
125
  - java/Rakefile
132
- - java/cdk-1.4.19.jar
126
+ - java/cdk-2.0-SNAPSHOT.jar
133
127
  - java/joelib2.jar
134
128
  - java/log4j.jar
135
129
  - lazar.gemspec
136
130
  - lib/algorithm.rb
131
+ - lib/caret.rb
137
132
  - lib/classification.rb
138
133
  - lib/compound.rb
139
134
  - lib/crossvalidation.rb
@@ -141,77 +136,72 @@ files:
141
136
  - lib/error.rb
142
137
  - lib/experiment.rb
143
138
  - lib/feature.rb
139
+ - lib/feature_selection.rb
140
+ - lib/import.rb
144
141
  - lib/lazar.rb
145
142
  - lib/leave-one-out-validation.rb
146
143
  - lib/model.rb
144
+ - lib/nanoparticle.rb
147
145
  - lib/opentox.rb
148
146
  - lib/overwrite.rb
149
147
  - lib/physchem.rb
150
148
  - lib/regression.rb
151
149
  - lib/rest-client-wrapper.rb
150
+ - lib/similarity.rb
151
+ - lib/substance.rb
152
+ - lib/train-test-validation.rb
152
153
  - lib/unique_descriptors.rb
154
+ - lib/validation-statistics.rb
153
155
  - lib/validation.rb
156
+ - scripts/mg2mmol.rb
157
+ - scripts/mirror-enm2test.rb
158
+ - scripts/mmol2-log10.rb
154
159
  - test/all.rb
155
- - test/classification.rb
156
160
  - test/compound.rb
157
- - test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
158
- - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
159
- - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
160
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
161
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
162
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
163
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
164
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
165
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
166
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
167
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
168
161
  - test/data/EPAFHM.csv
169
162
  - test/data/EPAFHM.medi.csv
163
+ - test/data/EPAFHM.medi_log10.csv
170
164
  - test/data/EPAFHM.mini.csv
171
- - test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv
172
- - test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv
165
+ - test/data/EPAFHM.mini_log10.csv
166
+ - test/data/EPAFHM_log10.csv
173
167
  - test/data/ISSCAN-multi.csv
174
- - test/data/LOAEL_log_mg_corrected_smiles.csv
175
- - test/data/LOAEL_log_mmol_corrected_smiles.csv
176
168
  - test/data/LOAEL_mmol_corrected_smiles.csv
177
169
  - test/data/acetaldehyde.sdf
178
170
  - test/data/batch_prediction.csv
179
171
  - test/data/batch_prediction_inchi_small.csv
180
172
  - test/data/batch_prediction_smiles_small.csv
181
- - test/data/boiling_points.ext.sdf
182
- - test/data/cpdb_100.csv
183
173
  - test/data/hamster_carcinogenicity.csv
184
174
  - test/data/hamster_carcinogenicity.json
185
175
  - test/data/hamster_carcinogenicity.mini.bool_float.csv
186
176
  - test/data/hamster_carcinogenicity.mini.bool_int.csv
187
177
  - test/data/hamster_carcinogenicity.mini.bool_string.csv
188
178
  - test/data/hamster_carcinogenicity.mini.csv
189
- - test/data/hamster_carcinogenicity.ntriples
190
- - test/data/hamster_carcinogenicity.sdf
191
- - test/data/hamster_carcinogenicity.xls
192
- - test/data/hamster_carcinogenicity.yaml
193
179
  - test/data/hamster_carcinogenicity_with_errors.csv
194
180
  - test/data/kazius.csv
195
181
  - test/data/loael.csv
182
+ - test/data/loael_log10.csv
196
183
  - test/data/multi_cell_call.csv
197
184
  - test/data/multi_cell_call_no_dup.csv
198
185
  - test/data/multicolumn.csv
199
186
  - test/data/rat_feature_dataset.csv
200
187
  - test/data/wrong_dataset.csv
201
- - test/dataset-long.rb
202
188
  - test/dataset.rb
203
189
  - test/default_environment.rb
204
190
  - test/descriptor.rb
205
191
  - test/error.rb
206
192
  - test/experiment.rb
207
193
  - test/feature.rb
208
- - test/lazar-long.rb
209
- - test/lazar-physchem-short.rb
210
- - test/prediction_models.rb
211
- - test/regression.rb
194
+ - test/gridfs.rb
195
+ - test/model-classification.rb
196
+ - test/model-nanoparticle.rb
197
+ - test/model-regression.rb
198
+ - test/model-validation.rb
199
+ - test/nanomaterial-model-validation.rb
212
200
  - test/setup.rb
213
201
  - test/test_environment.rb
214
- - test/validation.rb
202
+ - test/validation-classification.rb
203
+ - test/validation-nanoparticle.rb
204
+ - test/validation-regression.rb
215
205
  homepage: http://github.com/opentox/lazar
216
206
  licenses:
217
207
  - GPL-3.0
@@ -236,65 +226,4 @@ rubygems_version: 2.5.1
236
226
  signing_key:
237
227
  specification_version: 4
238
228
  summary: Lazar framework
239
- test_files:
240
- - test/all.rb
241
- - test/classification.rb
242
- - test/compound.rb
243
- - test/data/CPDBAS_v5c_1547_29Apr2008part.sdf
244
- - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_mouse_TD50.csv
245
- - test/data/CPDBAS_v5d_cleaned/CPDBAS_v5d_20Nov2008_rat_TD50.csv
246
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Hamster.csv
247
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mouse.csv
248
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall.csv
249
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_MultiCellCall_no_duplicates.csv
250
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity.csv
251
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Mutagenicity_no_duplicates.csv
252
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_Rat.csv
253
- - test/data/CPDBAS_v5d_cleaned/DSSTox_Carcinogenic_Potency_DBS_SingleCellCall.csv
254
- - test/data/EPAFHM.csv
255
- - test/data/EPAFHM.medi.csv
256
- - test/data/EPAFHM.mini.csv
257
- - test/data/EPA_v4b_Fathead_Minnow_Acute_Toxicity_LC50_mmol.csv
258
- - test/data/FDA_v3b_Maximum_Recommended_Daily_Dose_mmol.csv
259
- - test/data/ISSCAN-multi.csv
260
- - test/data/LOAEL_log_mg_corrected_smiles.csv
261
- - test/data/LOAEL_log_mmol_corrected_smiles.csv
262
- - test/data/LOAEL_mmol_corrected_smiles.csv
263
- - test/data/acetaldehyde.sdf
264
- - test/data/batch_prediction.csv
265
- - test/data/batch_prediction_inchi_small.csv
266
- - test/data/batch_prediction_smiles_small.csv
267
- - test/data/boiling_points.ext.sdf
268
- - test/data/cpdb_100.csv
269
- - test/data/hamster_carcinogenicity.csv
270
- - test/data/hamster_carcinogenicity.json
271
- - test/data/hamster_carcinogenicity.mini.bool_float.csv
272
- - test/data/hamster_carcinogenicity.mini.bool_int.csv
273
- - test/data/hamster_carcinogenicity.mini.bool_string.csv
274
- - test/data/hamster_carcinogenicity.mini.csv
275
- - test/data/hamster_carcinogenicity.ntriples
276
- - test/data/hamster_carcinogenicity.sdf
277
- - test/data/hamster_carcinogenicity.xls
278
- - test/data/hamster_carcinogenicity.yaml
279
- - test/data/hamster_carcinogenicity_with_errors.csv
280
- - test/data/kazius.csv
281
- - test/data/loael.csv
282
- - test/data/multi_cell_call.csv
283
- - test/data/multi_cell_call_no_dup.csv
284
- - test/data/multicolumn.csv
285
- - test/data/rat_feature_dataset.csv
286
- - test/data/wrong_dataset.csv
287
- - test/dataset-long.rb
288
- - test/dataset.rb
289
- - test/default_environment.rb
290
- - test/descriptor.rb
291
- - test/error.rb
292
- - test/experiment.rb
293
- - test/feature.rb
294
- - test/lazar-long.rb
295
- - test/lazar-physchem-short.rb
296
- - test/prediction_models.rb
297
- - test/regression.rb
298
- - test/setup.rb
299
- - test/test_environment.rb
300
- - test/validation.rb
229
+ test_files: []
@@ -1,41 +0,0 @@
1
- require_relative "setup.rb"
2
-
3
- class LazarClassificationTest < MiniTest::Test
4
-
5
- def test_lazar_classification
6
- training_dataset = Dataset.from_csv_file File.join(DATA_DIR,"hamster_carcinogenicity.csv")
7
- model = Model::LazarClassification.create training_dataset
8
-
9
- [ {
10
- :compound => OpenTox::Compound.from_inchi("InChI=1S/C6H6/c1-2-4-6-5-3-1/h1-6H"),
11
- :prediction => "false",
12
- :confidence => 0.25281385281385277,
13
- :nr_neighbors => 11
14
- },{
15
- :compound => OpenTox::Compound.from_smiles("c1ccccc1NN"),
16
- :prediction => "false",
17
- :confidence => 0.3639589577089577,
18
- :nr_neighbors => 14
19
- } ].each do |example|
20
- prediction = model.predict example[:compound]
21
- assert_equal example[:prediction], prediction[:value]
22
- #assert_equal example[:confidence], prediction[:confidence]
23
- #assert_equal example[:nr_neighbors], prediction[:neighbors].size
24
- end
25
-
26
- compound = Compound.from_smiles "CCO"
27
- prediction = model.predict compound
28
- assert_equal ["false"], prediction[:database_activities]
29
- assert_equal "true", prediction[:value]
30
-
31
- # make a dataset prediction
32
- compound_dataset = OpenTox::Dataset.from_csv_file File.join(DATA_DIR,"EPAFHM.mini.csv")
33
- prediction = model.predict compound_dataset
34
- assert_equal compound_dataset.compounds, prediction.compounds
35
-
36
- assert_equal "Could not find similar compounds with experimental data in the training dataset.", prediction.data_entries[7][3]
37
- assert_equal "1 compounds have been removed from neighbors, because they have the same structure as the query compound.", prediction.data_entries[14][3]
38
- # cleanup
39
- [training_dataset,model,compound_dataset].each{|o| o.delete}
40
- end
41
- end