lazar 0.0.6 → 0.0.7
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/crossvalidation.rb +14 -3
- data/lib/dataset.rb +1 -1
- data/lib/lazar.rb +1 -1
- data/lib/{lazar-model.rb → model.rb} +10 -3
- data/lib/regression.rb +24 -0
- data/test/lazar-physchem-short.rb +1 -0
- data/test/lazar-regression.rb +27 -0
- data/test/validation.rb +3 -2
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56f51ab78b66037e55ff41d7515b0c4bc3876481
|
4
|
+
data.tar.gz: 893b5f4827406df36ff6abc186767889e4b2cb6c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0d402841c42990b7d2a3d8efcbb9c3c7e1839939ad61774a906d289d5a0c7a33277833827175eb006d922f13da24d7c489aaba5e9c25b967dc6ea18964d9333
|
7
|
+
data.tar.gz: 2242413832ffe15e2ec4bcbb8bf33a0fe126e365d163fe55c804bcd6dc3741ae6f0058dd3c39b7a70121a82e81586b190787dcce96fc504bc1e5aae32af3ec10
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
data/lib/crossvalidation.rb
CHANGED
@@ -2,11 +2,16 @@ module OpenTox
|
|
2
2
|
|
3
3
|
class CrossValidation
|
4
4
|
field :validation_ids, type: Array, default: []
|
5
|
+
field :model_id, type: BSON::ObjectId
|
5
6
|
field :folds, type: Integer
|
6
7
|
field :nr_instances, type: Integer
|
7
8
|
field :nr_unpredicted, type: Integer
|
8
9
|
field :predictions, type: Array
|
9
10
|
field :finished_at, type: Time
|
11
|
+
|
12
|
+
def time
|
13
|
+
finished_at - created_at
|
14
|
+
end
|
10
15
|
end
|
11
16
|
|
12
17
|
class ClassificationCrossValidation < CrossValidation
|
@@ -22,6 +27,7 @@ module OpenTox
|
|
22
27
|
|
23
28
|
def self.create model, n=10
|
24
29
|
cv = self.new
|
30
|
+
cv.save # set created_at
|
25
31
|
validation_ids = []
|
26
32
|
nr_instances = 0
|
27
33
|
nr_unpredicted = 0
|
@@ -64,6 +70,10 @@ module OpenTox
|
|
64
70
|
end
|
65
71
|
end
|
66
72
|
cv.update_attributes(
|
73
|
+
name: model.name,
|
74
|
+
model_id: model.id,
|
75
|
+
folds: n,
|
76
|
+
validation_ids: validation_ids,
|
67
77
|
nr_instances: nr_instances,
|
68
78
|
nr_unpredicted: nr_unpredicted,
|
69
79
|
accept_values: accept_values,
|
@@ -85,10 +95,8 @@ module OpenTox
|
|
85
95
|
#F measure carcinogen: 0.769, noncarcinogen: 0.348
|
86
96
|
end
|
87
97
|
|
88
|
-
class RegressionCrossValidation <
|
98
|
+
class RegressionCrossValidation < CrossValidation
|
89
99
|
|
90
|
-
field :validation_ids, type: Array, default: []
|
91
|
-
field :folds, type: Integer
|
92
100
|
field :rmse, type: Float
|
93
101
|
field :mae, type: Float
|
94
102
|
field :weighted_rmse, type: Float
|
@@ -96,6 +104,7 @@ module OpenTox
|
|
96
104
|
|
97
105
|
def self.create model, n=10
|
98
106
|
cv = self.new
|
107
|
+
cv.save # set created_at
|
99
108
|
validation_ids = []
|
100
109
|
nr_instances = 0
|
101
110
|
nr_unpredicted = 0
|
@@ -145,6 +154,8 @@ module OpenTox
|
|
145
154
|
rmse = Math.sqrt(rmse/n)
|
146
155
|
weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
|
147
156
|
cv.update_attributes(
|
157
|
+
name: model.name,
|
158
|
+
model_id: model.id,
|
148
159
|
folds: n,
|
149
160
|
validation_ids: validation_ids,
|
150
161
|
nr_instances: nr_instances,
|
data/lib/dataset.rb
CHANGED
@@ -49,7 +49,7 @@ module OpenTox
|
|
49
49
|
@data_entries = Marshal.load(data_entry_file.data)
|
50
50
|
bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
|
51
51
|
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
|
52
|
-
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries
|
52
|
+
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
|
53
53
|
$logger.debug "Retrieving data: #{Time.now-t}"
|
54
54
|
end
|
55
55
|
end
|
data/lib/lazar.rb
CHANGED
@@ -8,7 +8,7 @@ module OpenTox
|
|
8
8
|
include Mongoid::Timestamps
|
9
9
|
store_in collection: "models"
|
10
10
|
|
11
|
-
field :title, type: String
|
11
|
+
field :title, as: :name, type: String
|
12
12
|
field :creator, type: String, default: __FILE__
|
13
13
|
# datasets
|
14
14
|
field :training_dataset_id, type: BSON::ObjectId
|
@@ -156,9 +156,16 @@ module OpenTox
|
|
156
156
|
|
157
157
|
end
|
158
158
|
|
159
|
-
class PredictionModel
|
160
|
-
|
159
|
+
class PredictionModel
|
160
|
+
include OpenTox
|
161
|
+
include Mongoid::Document
|
162
|
+
include Mongoid::Timestamps
|
163
|
+
store_in collection: "models"
|
164
|
+
|
165
|
+
# TODO field Validations
|
161
166
|
field :endpoint, type: String
|
167
|
+
field :species, type: String
|
168
|
+
field :source, type: String
|
162
169
|
field :unit, type: String
|
163
170
|
field :model_id, type: BSON::ObjectId
|
164
171
|
field :crossvalidation_id, type: BSON::ObjectId
|
data/lib/regression.rb
CHANGED
@@ -34,6 +34,30 @@ module OpenTox
|
|
34
34
|
{:value => prediction,:confidence => confidence}
|
35
35
|
end
|
36
36
|
|
37
|
+
def self.weighted_average_with_relevant_fingerprints neighbors
|
38
|
+
weighted_sum = 0.0
|
39
|
+
sim_sum = 0.0
|
40
|
+
fingerprint_features = []
|
41
|
+
neighbors.each do |row|
|
42
|
+
n,sim,acts = row
|
43
|
+
neighbor = Compound.find n
|
44
|
+
fingerprint_features += neighbor.fp4
|
45
|
+
end
|
46
|
+
fingerprint_features.uniq!
|
47
|
+
p fingerprint_features
|
48
|
+
=begin
|
49
|
+
p n
|
50
|
+
acts.each do |act|
|
51
|
+
weighted_sum += sim*Math.log10(act)
|
52
|
+
sim_sum += sim
|
53
|
+
end
|
54
|
+
end
|
55
|
+
=end
|
56
|
+
confidence = sim_sum/neighbors.size.to_f
|
57
|
+
sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
|
58
|
+
{:value => prediction,:confidence => confidence}
|
59
|
+
end
|
60
|
+
|
37
61
|
# Local support vector regression from neighbors
|
38
62
|
# @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
|
39
63
|
# @return [Numeric] A prediction value.
|
@@ -2,6 +2,7 @@ require_relative "setup.rb"
|
|
2
2
|
|
3
3
|
class LazarPhyschemDescriptorTest < MiniTest::Test
|
4
4
|
def test_epafhm
|
5
|
+
skip "Physchem Regression not yet implemented."
|
5
6
|
# check available descriptors
|
6
7
|
@descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
|
7
8
|
assert_equal 111,@descriptors.size,"wrong number of physchem descriptors"
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative "setup.rb"
|
2
|
+
|
3
|
+
class LazarRegressionTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_weighted_average
|
6
|
+
training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
|
7
|
+
model = Model::LazarRegression.create training_dataset
|
8
|
+
compound = Compound.from_smiles "CC(C)(C)CN"
|
9
|
+
prediction = model.predict compound
|
10
|
+
assert_equal 13.6, prediction[:value].round(1)
|
11
|
+
assert_equal 0.83, prediction[:confidence].round(2)
|
12
|
+
assert_equal 1, prediction[:neighbors].size
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_weighted_average_with_relevant_fingerprints
|
16
|
+
training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
|
17
|
+
model = Model::LazarRegression.create training_dataset
|
18
|
+
model.update(:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average_with_relevant_fingerprints")
|
19
|
+
compound = Compound.from_smiles "CC(C)(C)CN"
|
20
|
+
prediction = model.predict compound
|
21
|
+
p prediction
|
22
|
+
#assert_equal 13.6, prediction[:value].round(1)
|
23
|
+
#assert_equal 0.83, prediction[:confidence].round(2)
|
24
|
+
#assert_equal 1, prediction[:neighbors].size
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
data/test/validation.rb
CHANGED
@@ -4,10 +4,11 @@ class ValidationTest < MiniTest::Test
|
|
4
4
|
|
5
5
|
def test_fminer_crossvalidation
|
6
6
|
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
|
7
|
-
model = Model::LazarFminerClassification.create dataset
|
7
|
+
model = Model::LazarFminerClassification.create dataset
|
8
8
|
cv = ClassificationCrossValidation.create model
|
9
9
|
p cv.accuracy
|
10
10
|
p cv.weighted_accuracy
|
11
|
+
refute_empty cv.validation_ids
|
11
12
|
assert cv.accuracy > 0.8
|
12
13
|
assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) larger than unweighted accuracy(#{cv.accuracy}) "
|
13
14
|
end
|
@@ -31,7 +32,7 @@ class ValidationTest < MiniTest::Test
|
|
31
32
|
p cv.weighted_rmse
|
32
33
|
p cv.mae
|
33
34
|
p cv.weighted_mae
|
34
|
-
|
35
|
+
#`inkview #{cv.plot}`
|
35
36
|
assert cv.rmse < 30, "RMSE > 30"
|
36
37
|
assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) "
|
37
38
|
assert cv.mae < 12
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lazar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-08-
|
12
|
+
date: 2015-08-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -122,8 +122,8 @@ files:
|
|
122
122
|
- lib/descriptor.rb
|
123
123
|
- lib/error.rb
|
124
124
|
- lib/feature.rb
|
125
|
-
- lib/lazar-model.rb
|
126
125
|
- lib/lazar.rb
|
126
|
+
- lib/model.rb
|
127
127
|
- lib/neighbor.rb
|
128
128
|
- lib/opentox.rb
|
129
129
|
- lib/overwrite.rb
|
@@ -184,6 +184,7 @@ files:
|
|
184
184
|
- test/lazar-fminer.rb
|
185
185
|
- test/lazar-long.rb
|
186
186
|
- test/lazar-physchem-short.rb
|
187
|
+
- test/lazar-regression.rb
|
187
188
|
- test/setup.rb
|
188
189
|
- test/validation.rb
|
189
190
|
homepage: http://github.com/opentox/lazar
|
@@ -262,5 +263,6 @@ test_files:
|
|
262
263
|
- test/lazar-fminer.rb
|
263
264
|
- test/lazar-long.rb
|
264
265
|
- test/lazar-physchem-short.rb
|
266
|
+
- test/lazar-regression.rb
|
265
267
|
- test/setup.rb
|
266
268
|
- test/validation.rb
|