lazar 0.0.6 → 0.0.7

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d12695cc30f60a81485f2db0be93a14074a9f4bf
4
- data.tar.gz: 226d76ec4b778d08022a8af3a7b16b12bbc62c9d
3
+ metadata.gz: 56f51ab78b66037e55ff41d7515b0c4bc3876481
4
+ data.tar.gz: 893b5f4827406df36ff6abc186767889e4b2cb6c
5
5
  SHA512:
6
- metadata.gz: 00613fdfbc30f7353851ec82c24c4aeac75ced9324d795a82f7c473f48b4c883ec14a4a168dd4e5d5355809a28c9d9313f080ce44ec133413e0f2c529c4cc035
7
- data.tar.gz: b4d427c70b096ae0ae05e1bdde94119bc61518c3b3edd37ed919387c732d8d6a1e6e34735e1e300d33bf02dfff881acdd9c95d134d43e29b9fd576bc381472f0
6
+ metadata.gz: b0d402841c42990b7d2a3d8efcbb9c3c7e1839939ad61774a906d289d5a0c7a33277833827175eb006d922f13da24d7c489aaba5e9c25b967dc6ea18964d9333
7
+ data.tar.gz: 2242413832ffe15e2ec4bcbb8bf33a0fe126e365d163fe55c804bcd6dc3741ae6f0058dd3c39b7a70121a82e81586b190787dcce96fc504bc1e5aae32af3ec10
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.0.7
@@ -2,11 +2,16 @@ module OpenTox
2
2
 
3
3
  class CrossValidation
4
4
  field :validation_ids, type: Array, default: []
5
+ field :model_id, type: BSON::ObjectId
5
6
  field :folds, type: Integer
6
7
  field :nr_instances, type: Integer
7
8
  field :nr_unpredicted, type: Integer
8
9
  field :predictions, type: Array
9
10
  field :finished_at, type: Time
11
+
12
+ def time
13
+ finished_at - created_at
14
+ end
10
15
  end
11
16
 
12
17
  class ClassificationCrossValidation < CrossValidation
@@ -22,6 +27,7 @@ module OpenTox
22
27
 
23
28
  def self.create model, n=10
24
29
  cv = self.new
30
+ cv.save # set created_at
25
31
  validation_ids = []
26
32
  nr_instances = 0
27
33
  nr_unpredicted = 0
@@ -64,6 +70,10 @@ module OpenTox
64
70
  end
65
71
  end
66
72
  cv.update_attributes(
73
+ name: model.name,
74
+ model_id: model.id,
75
+ folds: n,
76
+ validation_ids: validation_ids,
67
77
  nr_instances: nr_instances,
68
78
  nr_unpredicted: nr_unpredicted,
69
79
  accept_values: accept_values,
@@ -85,10 +95,8 @@ module OpenTox
85
95
  #F measure carcinogen: 0.769, noncarcinogen: 0.348
86
96
  end
87
97
 
88
- class RegressionCrossValidation < Validation
98
+ class RegressionCrossValidation < CrossValidation
89
99
 
90
- field :validation_ids, type: Array, default: []
91
- field :folds, type: Integer
92
100
  field :rmse, type: Float
93
101
  field :mae, type: Float
94
102
  field :weighted_rmse, type: Float
@@ -96,6 +104,7 @@ module OpenTox
96
104
 
97
105
  def self.create model, n=10
98
106
  cv = self.new
107
+ cv.save # set created_at
99
108
  validation_ids = []
100
109
  nr_instances = 0
101
110
  nr_unpredicted = 0
@@ -145,6 +154,8 @@ module OpenTox
145
154
  rmse = Math.sqrt(rmse/n)
146
155
  weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
147
156
  cv.update_attributes(
157
+ name: model.name,
158
+ model_id: model.id,
148
159
  folds: n,
149
160
  validation_ids: validation_ids,
150
161
  nr_instances: nr_instances,
data/lib/dataset.rb CHANGED
@@ -49,7 +49,7 @@ module OpenTox
49
49
  @data_entries = Marshal.load(data_entry_file.data)
50
50
  bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
51
51
  bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
52
- bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries..first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
52
+ bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
53
53
  $logger.debug "Retrieving data: #{Time.now-t}"
54
54
  end
55
55
  end
data/lib/lazar.rb CHANGED
@@ -58,7 +58,7 @@ CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation"]# Algor
58
58
  "algorithm.rb",
59
59
  "descriptor.rb",
60
60
  "bbrc.rb",
61
- "lazar-model.rb",
61
+ "model.rb",
62
62
  "similarity.rb",
63
63
  "neighbor.rb",
64
64
  "classification.rb",
@@ -8,7 +8,7 @@ module OpenTox
8
8
  include Mongoid::Timestamps
9
9
  store_in collection: "models"
10
10
 
11
- field :title, type: String
11
+ field :title, as: :name, type: String
12
12
  field :creator, type: String, default: __FILE__
13
13
  # datasets
14
14
  field :training_dataset_id, type: BSON::ObjectId
@@ -156,9 +156,16 @@ module OpenTox
156
156
 
157
157
  end
158
158
 
159
- class PredictionModel < Lazar
160
- field :category, type: String
159
+ class PredictionModel
160
+ include OpenTox
161
+ include Mongoid::Document
162
+ include Mongoid::Timestamps
163
+ store_in collection: "models"
164
+
165
+ # TODO field Validations
161
166
  field :endpoint, type: String
167
+ field :species, type: String
168
+ field :source, type: String
162
169
  field :unit, type: String
163
170
  field :model_id, type: BSON::ObjectId
164
171
  field :crossvalidation_id, type: BSON::ObjectId
data/lib/regression.rb CHANGED
@@ -34,6 +34,30 @@ module OpenTox
34
34
  {:value => prediction,:confidence => confidence}
35
35
  end
36
36
 
37
+ def self.weighted_average_with_relevant_fingerprints neighbors
38
+ weighted_sum = 0.0
39
+ sim_sum = 0.0
40
+ fingerprint_features = []
41
+ neighbors.each do |row|
42
+ n,sim,acts = row
43
+ neighbor = Compound.find n
44
+ fingerprint_features += neighbor.fp4
45
+ end
46
+ fingerprint_features.uniq!
47
+ p fingerprint_features
48
+ =begin
49
+ p n
50
+ acts.each do |act|
51
+ weighted_sum += sim*Math.log10(act)
52
+ sim_sum += sim
53
+ end
54
+ end
55
+ =end
56
+ confidence = sim_sum/neighbors.size.to_f
57
+ sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
58
+ {:value => prediction,:confidence => confidence}
59
+ end
60
+
37
61
  # Local support vector regression from neighbors
38
62
  # @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
39
63
  # @return [Numeric] A prediction value.
@@ -2,6 +2,7 @@ require_relative "setup.rb"
2
2
 
3
3
  class LazarPhyschemDescriptorTest < MiniTest::Test
4
4
  def test_epafhm
5
+ skip "Physchem Regression not yet implemented."
5
6
  # check available descriptors
6
7
  @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
7
8
  assert_equal 111,@descriptors.size,"wrong number of physchem descriptors"
@@ -0,0 +1,27 @@
1
+ require_relative "setup.rb"
2
+
3
+ class LazarRegressionTest < MiniTest::Test
4
+
5
+ def test_weighted_average
6
+ training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
7
+ model = Model::LazarRegression.create training_dataset
8
+ compound = Compound.from_smiles "CC(C)(C)CN"
9
+ prediction = model.predict compound
10
+ assert_equal 13.6, prediction[:value].round(1)
11
+ assert_equal 0.83, prediction[:confidence].round(2)
12
+ assert_equal 1, prediction[:neighbors].size
13
+ end
14
+
15
+ def test_weighted_average_with_relevant_fingerprints
16
+ training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
17
+ model = Model::LazarRegression.create training_dataset
18
+ model.update(:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average_with_relevant_fingerprints")
19
+ compound = Compound.from_smiles "CC(C)(C)CN"
20
+ prediction = model.predict compound
21
+ p prediction
22
+ #assert_equal 13.6, prediction[:value].round(1)
23
+ #assert_equal 0.83, prediction[:confidence].round(2)
24
+ #assert_equal 1, prediction[:neighbors].size
25
+ end
26
+
27
+ end
data/test/validation.rb CHANGED
@@ -4,10 +4,11 @@ class ValidationTest < MiniTest::Test
4
4
 
5
5
  def test_fminer_crossvalidation
6
6
  dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
7
- model = Model::LazarFminerClassification.create dataset#, features
7
+ model = Model::LazarFminerClassification.create dataset
8
8
  cv = ClassificationCrossValidation.create model
9
9
  p cv.accuracy
10
10
  p cv.weighted_accuracy
11
+ refute_empty cv.validation_ids
11
12
  assert cv.accuracy > 0.8
12
13
  assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) larger than unweighted accuracy(#{cv.accuracy}) "
13
14
  end
@@ -31,7 +32,7 @@ class ValidationTest < MiniTest::Test
31
32
  p cv.weighted_rmse
32
33
  p cv.mae
33
34
  p cv.weighted_mae
34
- `inkview #{cv.plot}`
35
+ #`inkview #{cv.plot}`
35
36
  assert cv.rmse < 30, "RMSE > 30"
36
37
  assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) "
37
38
  assert cv.mae < 12
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lazar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-08-19 00:00:00.000000000 Z
12
+ date: 2015-08-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -122,8 +122,8 @@ files:
122
122
  - lib/descriptor.rb
123
123
  - lib/error.rb
124
124
  - lib/feature.rb
125
- - lib/lazar-model.rb
126
125
  - lib/lazar.rb
126
+ - lib/model.rb
127
127
  - lib/neighbor.rb
128
128
  - lib/opentox.rb
129
129
  - lib/overwrite.rb
@@ -184,6 +184,7 @@ files:
184
184
  - test/lazar-fminer.rb
185
185
  - test/lazar-long.rb
186
186
  - test/lazar-physchem-short.rb
187
+ - test/lazar-regression.rb
187
188
  - test/setup.rb
188
189
  - test/validation.rb
189
190
  homepage: http://github.com/opentox/lazar
@@ -262,5 +263,6 @@ test_files:
262
263
  - test/lazar-fminer.rb
263
264
  - test/lazar-long.rb
264
265
  - test/lazar-physchem-short.rb
266
+ - test/lazar-regression.rb
265
267
  - test/setup.rb
266
268
  - test/validation.rb