lazar 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/VERSION +1 -1
- data/lib/crossvalidation.rb +14 -3
- data/lib/dataset.rb +1 -1
- data/lib/lazar.rb +1 -1
- data/lib/{lazar-model.rb → model.rb} +10 -3
- data/lib/regression.rb +24 -0
- data/test/lazar-physchem-short.rb +1 -0
- data/test/lazar-regression.rb +27 -0
- data/test/validation.rb +3 -2
- metadata +5 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 56f51ab78b66037e55ff41d7515b0c4bc3876481
|
4
|
+
data.tar.gz: 893b5f4827406df36ff6abc186767889e4b2cb6c
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b0d402841c42990b7d2a3d8efcbb9c3c7e1839939ad61774a906d289d5a0c7a33277833827175eb006d922f13da24d7c489aaba5e9c25b967dc6ea18964d9333
|
7
|
+
data.tar.gz: 2242413832ffe15e2ec4bcbb8bf33a0fe126e365d163fe55c804bcd6dc3741ae6f0058dd3c39b7a70121a82e81586b190787dcce96fc504bc1e5aae32af3ec10
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.7
|
data/lib/crossvalidation.rb
CHANGED
@@ -2,11 +2,16 @@ module OpenTox
|
|
2
2
|
|
3
3
|
class CrossValidation
|
4
4
|
field :validation_ids, type: Array, default: []
|
5
|
+
field :model_id, type: BSON::ObjectId
|
5
6
|
field :folds, type: Integer
|
6
7
|
field :nr_instances, type: Integer
|
7
8
|
field :nr_unpredicted, type: Integer
|
8
9
|
field :predictions, type: Array
|
9
10
|
field :finished_at, type: Time
|
11
|
+
|
12
|
+
def time
|
13
|
+
finished_at - created_at
|
14
|
+
end
|
10
15
|
end
|
11
16
|
|
12
17
|
class ClassificationCrossValidation < CrossValidation
|
@@ -22,6 +27,7 @@ module OpenTox
|
|
22
27
|
|
23
28
|
def self.create model, n=10
|
24
29
|
cv = self.new
|
30
|
+
cv.save # set created_at
|
25
31
|
validation_ids = []
|
26
32
|
nr_instances = 0
|
27
33
|
nr_unpredicted = 0
|
@@ -64,6 +70,10 @@ module OpenTox
|
|
64
70
|
end
|
65
71
|
end
|
66
72
|
cv.update_attributes(
|
73
|
+
name: model.name,
|
74
|
+
model_id: model.id,
|
75
|
+
folds: n,
|
76
|
+
validation_ids: validation_ids,
|
67
77
|
nr_instances: nr_instances,
|
68
78
|
nr_unpredicted: nr_unpredicted,
|
69
79
|
accept_values: accept_values,
|
@@ -85,10 +95,8 @@ module OpenTox
|
|
85
95
|
#F measure carcinogen: 0.769, noncarcinogen: 0.348
|
86
96
|
end
|
87
97
|
|
88
|
-
class RegressionCrossValidation <
|
98
|
+
class RegressionCrossValidation < CrossValidation
|
89
99
|
|
90
|
-
field :validation_ids, type: Array, default: []
|
91
|
-
field :folds, type: Integer
|
92
100
|
field :rmse, type: Float
|
93
101
|
field :mae, type: Float
|
94
102
|
field :weighted_rmse, type: Float
|
@@ -96,6 +104,7 @@ module OpenTox
|
|
96
104
|
|
97
105
|
def self.create model, n=10
|
98
106
|
cv = self.new
|
107
|
+
cv.save # set created_at
|
99
108
|
validation_ids = []
|
100
109
|
nr_instances = 0
|
101
110
|
nr_unpredicted = 0
|
@@ -145,6 +154,8 @@ module OpenTox
|
|
145
154
|
rmse = Math.sqrt(rmse/n)
|
146
155
|
weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
|
147
156
|
cv.update_attributes(
|
157
|
+
name: model.name,
|
158
|
+
model_id: model.id,
|
148
159
|
folds: n,
|
149
160
|
validation_ids: validation_ids,
|
150
161
|
nr_instances: nr_instances,
|
data/lib/dataset.rb
CHANGED
@@ -49,7 +49,7 @@ module OpenTox
|
|
49
49
|
@data_entries = Marshal.load(data_entry_file.data)
|
50
50
|
bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
|
51
51
|
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
|
52
|
-
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries
|
52
|
+
bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
|
53
53
|
$logger.debug "Retrieving data: #{Time.now-t}"
|
54
54
|
end
|
55
55
|
end
|
data/lib/lazar.rb
CHANGED
@@ -8,7 +8,7 @@ module OpenTox
|
|
8
8
|
include Mongoid::Timestamps
|
9
9
|
store_in collection: "models"
|
10
10
|
|
11
|
-
field :title, type: String
|
11
|
+
field :title, as: :name, type: String
|
12
12
|
field :creator, type: String, default: __FILE__
|
13
13
|
# datasets
|
14
14
|
field :training_dataset_id, type: BSON::ObjectId
|
@@ -156,9 +156,16 @@ module OpenTox
|
|
156
156
|
|
157
157
|
end
|
158
158
|
|
159
|
-
class PredictionModel
|
160
|
-
|
159
|
+
class PredictionModel
|
160
|
+
include OpenTox
|
161
|
+
include Mongoid::Document
|
162
|
+
include Mongoid::Timestamps
|
163
|
+
store_in collection: "models"
|
164
|
+
|
165
|
+
# TODO field Validations
|
161
166
|
field :endpoint, type: String
|
167
|
+
field :species, type: String
|
168
|
+
field :source, type: String
|
162
169
|
field :unit, type: String
|
163
170
|
field :model_id, type: BSON::ObjectId
|
164
171
|
field :crossvalidation_id, type: BSON::ObjectId
|
data/lib/regression.rb
CHANGED
@@ -34,6 +34,30 @@ module OpenTox
|
|
34
34
|
{:value => prediction,:confidence => confidence}
|
35
35
|
end
|
36
36
|
|
37
|
+
def self.weighted_average_with_relevant_fingerprints neighbors
|
38
|
+
weighted_sum = 0.0
|
39
|
+
sim_sum = 0.0
|
40
|
+
fingerprint_features = []
|
41
|
+
neighbors.each do |row|
|
42
|
+
n,sim,acts = row
|
43
|
+
neighbor = Compound.find n
|
44
|
+
fingerprint_features += neighbor.fp4
|
45
|
+
end
|
46
|
+
fingerprint_features.uniq!
|
47
|
+
p fingerprint_features
|
48
|
+
=begin
|
49
|
+
p n
|
50
|
+
acts.each do |act|
|
51
|
+
weighted_sum += sim*Math.log10(act)
|
52
|
+
sim_sum += sim
|
53
|
+
end
|
54
|
+
end
|
55
|
+
=end
|
56
|
+
confidence = sim_sum/neighbors.size.to_f
|
57
|
+
sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
|
58
|
+
{:value => prediction,:confidence => confidence}
|
59
|
+
end
|
60
|
+
|
37
61
|
# Local support vector regression from neighbors
|
38
62
|
# @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
|
39
63
|
# @return [Numeric] A prediction value.
|
@@ -2,6 +2,7 @@ require_relative "setup.rb"
|
|
2
2
|
|
3
3
|
class LazarPhyschemDescriptorTest < MiniTest::Test
|
4
4
|
def test_epafhm
|
5
|
+
skip "Physchem Regression not yet implemented."
|
5
6
|
# check available descriptors
|
6
7
|
@descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
|
7
8
|
assert_equal 111,@descriptors.size,"wrong number of physchem descriptors"
|
@@ -0,0 +1,27 @@
|
|
1
|
+
require_relative "setup.rb"
|
2
|
+
|
3
|
+
class LazarRegressionTest < MiniTest::Test
|
4
|
+
|
5
|
+
def test_weighted_average
|
6
|
+
training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
|
7
|
+
model = Model::LazarRegression.create training_dataset
|
8
|
+
compound = Compound.from_smiles "CC(C)(C)CN"
|
9
|
+
prediction = model.predict compound
|
10
|
+
assert_equal 13.6, prediction[:value].round(1)
|
11
|
+
assert_equal 0.83, prediction[:confidence].round(2)
|
12
|
+
assert_equal 1, prediction[:neighbors].size
|
13
|
+
end
|
14
|
+
|
15
|
+
def test_weighted_average_with_relevant_fingerprints
|
16
|
+
training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
|
17
|
+
model = Model::LazarRegression.create training_dataset
|
18
|
+
model.update(:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average_with_relevant_fingerprints")
|
19
|
+
compound = Compound.from_smiles "CC(C)(C)CN"
|
20
|
+
prediction = model.predict compound
|
21
|
+
p prediction
|
22
|
+
#assert_equal 13.6, prediction[:value].round(1)
|
23
|
+
#assert_equal 0.83, prediction[:confidence].round(2)
|
24
|
+
#assert_equal 1, prediction[:neighbors].size
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
data/test/validation.rb
CHANGED
@@ -4,10 +4,11 @@ class ValidationTest < MiniTest::Test
|
|
4
4
|
|
5
5
|
def test_fminer_crossvalidation
|
6
6
|
dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
|
7
|
-
model = Model::LazarFminerClassification.create dataset
|
7
|
+
model = Model::LazarFminerClassification.create dataset
|
8
8
|
cv = ClassificationCrossValidation.create model
|
9
9
|
p cv.accuracy
|
10
10
|
p cv.weighted_accuracy
|
11
|
+
refute_empty cv.validation_ids
|
11
12
|
assert cv.accuracy > 0.8
|
12
13
|
assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) larger than unweighted accuracy(#{cv.accuracy}) "
|
13
14
|
end
|
@@ -31,7 +32,7 @@ class ValidationTest < MiniTest::Test
|
|
31
32
|
p cv.weighted_rmse
|
32
33
|
p cv.mae
|
33
34
|
p cv.weighted_mae
|
34
|
-
|
35
|
+
#`inkview #{cv.plot}`
|
35
36
|
assert cv.rmse < 30, "RMSE > 30"
|
36
37
|
assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) "
|
37
38
|
assert cv.mae < 12
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: lazar
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.7
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2015-08-
|
12
|
+
date: 2015-08-20 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -122,8 +122,8 @@ files:
|
|
122
122
|
- lib/descriptor.rb
|
123
123
|
- lib/error.rb
|
124
124
|
- lib/feature.rb
|
125
|
-
- lib/lazar-model.rb
|
126
125
|
- lib/lazar.rb
|
126
|
+
- lib/model.rb
|
127
127
|
- lib/neighbor.rb
|
128
128
|
- lib/opentox.rb
|
129
129
|
- lib/overwrite.rb
|
@@ -184,6 +184,7 @@ files:
|
|
184
184
|
- test/lazar-fminer.rb
|
185
185
|
- test/lazar-long.rb
|
186
186
|
- test/lazar-physchem-short.rb
|
187
|
+
- test/lazar-regression.rb
|
187
188
|
- test/setup.rb
|
188
189
|
- test/validation.rb
|
189
190
|
homepage: http://github.com/opentox/lazar
|
@@ -262,5 +263,6 @@ test_files:
|
|
262
263
|
- test/lazar-fminer.rb
|
263
264
|
- test/lazar-long.rb
|
264
265
|
- test/lazar-physchem-short.rb
|
266
|
+
- test/lazar-regression.rb
|
265
267
|
- test/setup.rb
|
266
268
|
- test/validation.rb
|