lazar 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d12695cc30f60a81485f2db0be93a14074a9f4bf
4
- data.tar.gz: 226d76ec4b778d08022a8af3a7b16b12bbc62c9d
3
+ metadata.gz: 56f51ab78b66037e55ff41d7515b0c4bc3876481
4
+ data.tar.gz: 893b5f4827406df36ff6abc186767889e4b2cb6c
5
5
  SHA512:
6
- metadata.gz: 00613fdfbc30f7353851ec82c24c4aeac75ced9324d795a82f7c473f48b4c883ec14a4a168dd4e5d5355809a28c9d9313f080ce44ec133413e0f2c529c4cc035
7
- data.tar.gz: b4d427c70b096ae0ae05e1bdde94119bc61518c3b3edd37ed919387c732d8d6a1e6e34735e1e300d33bf02dfff881acdd9c95d134d43e29b9fd576bc381472f0
6
+ metadata.gz: b0d402841c42990b7d2a3d8efcbb9c3c7e1839939ad61774a906d289d5a0c7a33277833827175eb006d922f13da24d7c489aaba5e9c25b967dc6ea18964d9333
7
+ data.tar.gz: 2242413832ffe15e2ec4bcbb8bf33a0fe126e365d163fe55c804bcd6dc3741ae6f0058dd3c39b7a70121a82e81586b190787dcce96fc504bc1e5aae32af3ec10
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.6
1
+ 0.0.7
@@ -2,11 +2,16 @@ module OpenTox
2
2
 
3
3
  class CrossValidation
4
4
  field :validation_ids, type: Array, default: []
5
+ field :model_id, type: BSON::ObjectId
5
6
  field :folds, type: Integer
6
7
  field :nr_instances, type: Integer
7
8
  field :nr_unpredicted, type: Integer
8
9
  field :predictions, type: Array
9
10
  field :finished_at, type: Time
11
+
12
+ def time
13
+ finished_at - created_at
14
+ end
10
15
  end
11
16
 
12
17
  class ClassificationCrossValidation < CrossValidation
@@ -22,6 +27,7 @@ module OpenTox
22
27
 
23
28
  def self.create model, n=10
24
29
  cv = self.new
30
+ cv.save # set created_at
25
31
  validation_ids = []
26
32
  nr_instances = 0
27
33
  nr_unpredicted = 0
@@ -64,6 +70,10 @@ module OpenTox
64
70
  end
65
71
  end
66
72
  cv.update_attributes(
73
+ name: model.name,
74
+ model_id: model.id,
75
+ folds: n,
76
+ validation_ids: validation_ids,
67
77
  nr_instances: nr_instances,
68
78
  nr_unpredicted: nr_unpredicted,
69
79
  accept_values: accept_values,
@@ -85,10 +95,8 @@ module OpenTox
85
95
  #F measure carcinogen: 0.769, noncarcinogen: 0.348
86
96
  end
87
97
 
88
- class RegressionCrossValidation < Validation
98
+ class RegressionCrossValidation < CrossValidation
89
99
 
90
- field :validation_ids, type: Array, default: []
91
- field :folds, type: Integer
92
100
  field :rmse, type: Float
93
101
  field :mae, type: Float
94
102
  field :weighted_rmse, type: Float
@@ -96,6 +104,7 @@ module OpenTox
96
104
 
97
105
  def self.create model, n=10
98
106
  cv = self.new
107
+ cv.save # set created_at
99
108
  validation_ids = []
100
109
  nr_instances = 0
101
110
  nr_unpredicted = 0
@@ -145,6 +154,8 @@ module OpenTox
145
154
  rmse = Math.sqrt(rmse/n)
146
155
  weighted_rmse = Math.sqrt(weighted_rmse/confidence_sum)
147
156
  cv.update_attributes(
157
+ name: model.name,
158
+ model_id: model.id,
148
159
  folds: n,
149
160
  validation_ids: validation_ids,
150
161
  nr_instances: nr_instances,
data/lib/dataset.rb CHANGED
@@ -49,7 +49,7 @@ module OpenTox
49
49
  @data_entries = Marshal.load(data_entry_file.data)
50
50
  bad_request_error "Data entries (#{data_entries_id}) are not a 2D-Array" unless @data_entries.is_a? Array and @data_entries.first.is_a? Array
51
51
  bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.size} rows, but dataset (#{id}) has #{compound_ids.size} compounds" unless @data_entries.size == compound_ids.size
52
- bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries..first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
52
+ bad_request_error "Data entries (#{data_entries_id}) have #{@data_entries.first.size} columns, but dataset (#{id}) has #{feature_ids.size} features" unless @data_entries.first.size == feature_ids.size
53
53
  $logger.debug "Retrieving data: #{Time.now-t}"
54
54
  end
55
55
  end
data/lib/lazar.rb CHANGED
@@ -58,7 +58,7 @@ CLASSES = ["Feature","Compound","Dataset","Validation","CrossValidation"]# Algor
58
58
  "algorithm.rb",
59
59
  "descriptor.rb",
60
60
  "bbrc.rb",
61
- "lazar-model.rb",
61
+ "model.rb",
62
62
  "similarity.rb",
63
63
  "neighbor.rb",
64
64
  "classification.rb",
@@ -8,7 +8,7 @@ module OpenTox
8
8
  include Mongoid::Timestamps
9
9
  store_in collection: "models"
10
10
 
11
- field :title, type: String
11
+ field :title, as: :name, type: String
12
12
  field :creator, type: String, default: __FILE__
13
13
  # datasets
14
14
  field :training_dataset_id, type: BSON::ObjectId
@@ -156,9 +156,16 @@ module OpenTox
156
156
 
157
157
  end
158
158
 
159
- class PredictionModel < Lazar
160
- field :category, type: String
159
+ class PredictionModel
160
+ include OpenTox
161
+ include Mongoid::Document
162
+ include Mongoid::Timestamps
163
+ store_in collection: "models"
164
+
165
+ # TODO field Validations
161
166
  field :endpoint, type: String
167
+ field :species, type: String
168
+ field :source, type: String
162
169
  field :unit, type: String
163
170
  field :model_id, type: BSON::ObjectId
164
171
  field :crossvalidation_id, type: BSON::ObjectId
data/lib/regression.rb CHANGED
@@ -34,6 +34,30 @@ module OpenTox
34
34
  {:value => prediction,:confidence => confidence}
35
35
  end
36
36
 
37
+ def self.weighted_average_with_relevant_fingerprints neighbors
38
+ weighted_sum = 0.0
39
+ sim_sum = 0.0
40
+ fingerprint_features = []
41
+ neighbors.each do |row|
42
+ n,sim,acts = row
43
+ neighbor = Compound.find n
44
+ fingerprint_features += neighbor.fp4
45
+ end
46
+ fingerprint_features.uniq!
47
+ p fingerprint_features
48
+ =begin
49
+ p n
50
+ acts.each do |act|
51
+ weighted_sum += sim*Math.log10(act)
52
+ sim_sum += sim
53
+ end
54
+ end
55
+ =end
56
+ confidence = sim_sum/neighbors.size.to_f
57
+ sim_sum == 0 ? prediction = nil : prediction = 10**(weighted_sum/sim_sum)
58
+ {:value => prediction,:confidence => confidence}
59
+ end
60
+
37
61
  # Local support vector regression from neighbors
38
62
  # @param [Hash] params Keys `:props, :activities, :sims, :min_train_performance` are required
39
63
  # @return [Numeric] A prediction value.
@@ -2,6 +2,7 @@ require_relative "setup.rb"
2
2
 
3
3
  class LazarPhyschemDescriptorTest < MiniTest::Test
4
4
  def test_epafhm
5
+ skip "Physchem Regression not yet implemented."
5
6
  # check available descriptors
6
7
  @descriptors = OpenTox::Algorithm::Descriptor::DESCRIPTORS.keys
7
8
  assert_equal 111,@descriptors.size,"wrong number of physchem descriptors"
@@ -0,0 +1,27 @@
1
+ require_relative "setup.rb"
2
+
3
+ class LazarRegressionTest < MiniTest::Test
4
+
5
+ def test_weighted_average
6
+ training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
7
+ model = Model::LazarRegression.create training_dataset
8
+ compound = Compound.from_smiles "CC(C)(C)CN"
9
+ prediction = model.predict compound
10
+ assert_equal 13.6, prediction[:value].round(1)
11
+ assert_equal 0.83, prediction[:confidence].round(2)
12
+ assert_equal 1, prediction[:neighbors].size
13
+ end
14
+
15
+ def test_weighted_average_with_relevant_fingerprints
16
+ training_dataset = Dataset.from_csv_file "#{DATA_DIR}/EPAFHM.medi.csv"
17
+ model = Model::LazarRegression.create training_dataset
18
+ model.update(:prediction_algorithm => "OpenTox::Algorithm::Regression.weighted_average_with_relevant_fingerprints")
19
+ compound = Compound.from_smiles "CC(C)(C)CN"
20
+ prediction = model.predict compound
21
+ p prediction
22
+ #assert_equal 13.6, prediction[:value].round(1)
23
+ #assert_equal 0.83, prediction[:confidence].round(2)
24
+ #assert_equal 1, prediction[:neighbors].size
25
+ end
26
+
27
+ end
data/test/validation.rb CHANGED
@@ -4,10 +4,11 @@ class ValidationTest < MiniTest::Test
4
4
 
5
5
  def test_fminer_crossvalidation
6
6
  dataset = Dataset.from_csv_file "#{DATA_DIR}/hamster_carcinogenicity.csv"
7
- model = Model::LazarFminerClassification.create dataset#, features
7
+ model = Model::LazarFminerClassification.create dataset
8
8
  cv = ClassificationCrossValidation.create model
9
9
  p cv.accuracy
10
10
  p cv.weighted_accuracy
11
+ refute_empty cv.validation_ids
11
12
  assert cv.accuracy > 0.8
12
13
  assert cv.weighted_accuracy > cv.accuracy, "Weighted accuracy (#{cv.weighted_accuracy}) larger than unweighted accuracy(#{cv.accuracy}) "
13
14
  end
@@ -31,7 +32,7 @@ class ValidationTest < MiniTest::Test
31
32
  p cv.weighted_rmse
32
33
  p cv.mae
33
34
  p cv.weighted_mae
34
- `inkview #{cv.plot}`
35
+ #`inkview #{cv.plot}`
35
36
  assert cv.rmse < 30, "RMSE > 30"
36
37
  assert cv.weighted_rmse < cv.rmse, "Weighted RMSE (#{cv.weighted_rmse}) larger than unweighted RMSE(#{cv.rmse}) "
37
38
  assert cv.mae < 12
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: lazar
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - Christoph Helma, Martin Guetlein, Andreas Maunz, Micha Rautenberg, David Vorgrimmler,
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2015-08-19 00:00:00.000000000 Z
12
+ date: 2015-08-20 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: bundler
@@ -122,8 +122,8 @@ files:
122
122
  - lib/descriptor.rb
123
123
  - lib/error.rb
124
124
  - lib/feature.rb
125
- - lib/lazar-model.rb
126
125
  - lib/lazar.rb
126
+ - lib/model.rb
127
127
  - lib/neighbor.rb
128
128
  - lib/opentox.rb
129
129
  - lib/overwrite.rb
@@ -184,6 +184,7 @@ files:
184
184
  - test/lazar-fminer.rb
185
185
  - test/lazar-long.rb
186
186
  - test/lazar-physchem-short.rb
187
+ - test/lazar-regression.rb
187
188
  - test/setup.rb
188
189
  - test/validation.rb
189
190
  homepage: http://github.com/opentox/lazar
@@ -262,5 +263,6 @@ test_files:
262
263
  - test/lazar-fminer.rb
263
264
  - test/lazar-long.rb
264
265
  - test/lazar-physchem-short.rb
266
+ - test/lazar-regression.rb
265
267
  - test/setup.rb
266
268
  - test/validation.rb