data_modeler 0.3.0 → 0.3.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 762480cf9239c43cfe81e82634a63b52b2ac1d28
4
- data.tar.gz: 7554ba11f59112d0dddc39f80ad1c4b897fb02a0
3
+ metadata.gz: 173d569d4d705b32ca166d444766651f94b4a98d
4
+ data.tar.gz: ec61342d6188533751c874549c6f55d346d02bcb
5
5
  SHA512:
6
- metadata.gz: 84547a8cf68c84f42a58aec83961b644315095aac87c33f30d2aa0cacfbd2f6b966b95ef060aae253866bdb2e4dd2f37bf29a662eaa5d2b14429d790ab68a03f
7
- data.tar.gz: 2501a06535f433c6a58b45ccbad701c37ebb2be6c062ee9a2d627fa2a9f34235aa20dd88c57738af9f0d5f1f03a6c18fd4aa5336428d2bf0260f6682c57e7127
6
+ metadata.gz: 3cb220eeb8f7349321d35adb07efe919c9a23e9e0fe0459ca411b87632dd89ce21a7e1e04d350f1527e9e0f83b9924445b2c155c6af8fb9b71caace5b2600301
7
+ data.tar.gz: 74c46dea839cf5c1f99346ae197b7c87d06815c1d8c1e3c61238fac0801b6e20dd288efbaf7026f2a5991052c823a0179f48054473cbce4a628b5ff74f8b198d
data/data_modeler.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'data_modeler/version'
4
+ require 'data_modeler/support'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "data_modeler"
@@ -24,7 +24,7 @@ class DataModeler::Base
24
24
  @out_dir = prepare_output config[:results]
25
25
 
26
26
  @tset_gen = DataModeler::DatasetGen.new data, **opts_for(:datasetgen)
27
- @model = DataModeler::Model.from_conf **opts_for(:learner)
27
+ @model = DataModeler::Models.selector **opts_for(:learner)
28
28
  end
29
29
 
30
30
  # Main control: up to `nruns` (or until end of data) loop train-test-save
@@ -49,13 +49,18 @@ class DataModeler::Base
49
49
  end
50
50
 
51
51
  # Attribute reader for instance variable `@save_models`, ending in '?' since
52
- # it's a boolean value.
52
+ # it's a boolean value.
53
53
  # @return [true|false] value of instance variable @save_models
54
54
  # (false if nil/uninitialized)
55
55
  def save_models?
56
56
  @save_models || false
57
57
  end
58
58
 
59
+ # @return [String]
60
+ def to_s
61
+ config.to_s
62
+ end
63
+
59
64
  private
60
65
 
61
66
  # Loads the data in a Hash ready for `DatasetGen` (and `Dataset`)
@@ -97,13 +102,13 @@ class DataModeler::Base
97
102
  when :dataset
98
103
  { inputs: inputs,
99
104
  targets: targets,
100
- ntimes: config[:tset][:ntimes],
105
+ ninput_points: config[:tset][:ninput_points],
101
106
  tspread: config[:tset][:tspread],
102
107
  look_ahead: config[:tset][:look_ahead]
103
108
  }
104
109
  when :learner
105
110
  config[:learner].merge({
106
- ninputs: (config[:tset][:ntimes] * inputs.size),
111
+ ninputs: (config[:tset][:ninput_points] * inputs.size),
107
112
  noutputs: targets.size
108
113
  })
109
114
  else abort "Unrecognized `who`: '#{who}'"
@@ -1,15 +1,12 @@
1
1
 
2
2
  # Build complex inputs and targets from the data to train the model.
3
- #
4
- # @note checks to validate if enough data is present (given `ntimes`, `tspread`
5
- # and `look_ahead`) should be done on the caller (typically `DatasetGen`)
6
3
  class DataModeler::Dataset
7
4
 
8
5
  attr_reader :data, :input_series, :target_series, :first_idx, :end_idx,
9
- :ntimes, :tspread, :look_ahead, :first_idx, :target_idx,
6
+ :ninput_points, :tspread, :look_ahead, :first_idx, :target_idx,
10
7
  :input_idxs, :nrows
11
8
 
12
- # @param data [Hash-like] the data, in an object that can be
9
+ # @param data [Hash] the data, in an object that can be
13
10
  # accessed by keys and return a time series per each key.
14
11
  # It is required to include and be sorted by a series named `time`,
15
12
  # and for all series to have equal length.
@@ -17,22 +14,22 @@ class DataModeler::Dataset
17
14
  # @param targets [Array] data key accessors for target series
18
15
  # @param first_idx [Integer] index where the dataset starts on data
19
16
  # @param end_idx [Integer] index where the dataset ends on data
20
- # @param ntimes [Integer] number of lines/times/datapoints to be
17
+ # @param ninput_points [Integer] number of lines/datapoints to be
21
18
  # used to construct the input
22
- # @param tspread [Numeric] distance (in `time`!) between the `ntimes`
23
- # lines/times/datapoints used to construct the input
19
+ # @param tspread [Numeric] distance (in `time`!) between the `ninput_points`
20
+ # lines/datapoints used to construct the input
24
21
  # @param look_ahead [Numeric] distance (in `time`!) between the
25
22
  # most recent line/time/datapoint used for the input and
26
23
  # the target -- i.e., how far ahead the model is trained to predict
27
24
  # @note we expect Datasets indices to be used with left inclusion but
28
25
  # right exclusion, i.e. targets are considered in the range `[from,to)`
29
- def initialize data, inputs:, targets:, first_idx:, end_idx:, ntimes:, tspread:, look_ahead:
26
+ def initialize data, inputs:, targets:, first_idx:, end_idx:, ninput_points:, tspread:, look_ahead:
30
27
  @data = data
31
28
  @input_series = inputs
32
29
  @target_series = targets
33
30
  @first_idx = first_idx
34
31
  @end_idx = end_idx
35
- @ntimes = ntimes
32
+ @ninput_points = ninput_points
36
33
  @nrows = data[:time].size
37
34
  @tspread = tspread
38
35
  @look_ahead = look_ahead
@@ -92,9 +89,9 @@ class DataModeler::Dataset
92
89
  to_a.transpose
93
90
  end
94
91
 
95
- # Overloaded comparison for easier testing
92
+ # Equality operator -- most useful in testing
96
93
  # @param other [Dataset] what needs comparing to
97
- # @return [void]
94
+ # @return [true|false]
98
95
  def == other
99
96
  self.class == other.class && # terminate check here if wrong class
100
97
  data.object_id == other.data.object_id && # both `data` point to same object
@@ -120,7 +117,7 @@ class DataModeler::Dataset
120
117
  def init_inputs
121
118
  if target_idx < end_idx
122
119
  # build list of incremental time buffers
123
- bufs = ntimes.times.collect { |n| look_ahead + n * tspread }
120
+ bufs = ninput_points.times.collect { |n| look_ahead + n * tspread }
124
121
  # reverse it and subtract from the target's time
125
122
  times = bufs.reverse.collect { |s| time(target_idx) - s }
126
123
  # now you have the list of times at which each pointer should point
@@ -1,27 +1,32 @@
1
1
 
2
2
  # Build train and test datasets for each run of the training.
3
3
  #
4
- # This diagram should help understanding how it works
5
- # (win is the input+look_ahead window for first training target)
6
- # ----------------------------------------> data (time)
7
- # |win|train1|t1| -> train starts after window, test after training
8
- # |train2|t2| -> train starts after window + 1 tset
9
- # |train3|t3| -> train starts after window + 2 tset
4
+ # Train and test sets are seen as moving windows on the data.
5
+ # Alignment is designed to provide continuous testing results over (most of) the data.
6
+ # The following diagram exemplifies this: the training sets `t1`, `t2` and `t3` are
7
+ # aligned such that their results can be plotted countinuously against the obserevations.
8
+ # (b) is the amount of data covering for the input+look_ahead window uset for the first
9
+ # target.
10
+ # data: ----------------------> (time, datapoints)
11
+ # run1: (b)|train1|t1| -> train starts after (b), test after training
12
+ # run2: |train2|t2| -> train starts after (b) + 1 tset
13
+ # run3: |train3|t3| -> train starts after (b) + 2 tset
10
14
  # Note how the test sets line up. This allows the testing results plots
11
- # to be continuous, no model is tested on data on which *itself* has been
12
- # trained, and all data is used multiple times
15
+ # to be continuous, while no model is tested on data on which _itself_ has been trained.
16
+ # All data is used multiple times, alternately both as train and test sets.
13
17
  class DataModeler::DatasetGen
14
18
 
15
19
  attr_reader :data, :ds_args, :first_idx, :train_size, :test_size, :nrows
16
20
 
17
- # @param data [Hash-like] the data, in an object that can be
21
+ # @param data [Hash] the data, in an object that can be
18
22
  # accessed by keys and return a time series per each key.
19
- # It is required to include and be sorted by a series named `time`,
23
+ # It is required to include (and be sorted by) a series named `:time`,
20
24
  # and for all series to have equal length.
21
- # @param ds_args [Hash] parameters for the Datasets: inputs, targets,
22
- # first_idx, end_idx, ntimes. Check class Dataset for details.
23
- # @train_size: how many points to predict for each training set
24
- # @test_size: how many points to predict for each test set
25
+ # @param ds_args [Hash] parameters hash for `Dataset`s initialization.
26
+ # Keys: `%i[inputs, targets, first_idx, end_idx, ninput_points]`.
27
+ # See `Dataset#initialize` for details.
28
+ # @param train_size [Integer] how many points to expose as targets in each training set
29
+ # @param test_size [Integer] how many points to expose as targets in each test set
25
30
  def initialize data, ds_args:, train_size:, test_size:, min_nruns: 1
26
31
  @data = data
27
32
  @ds_args = ds_args
@@ -36,22 +41,24 @@ class DataModeler::DatasetGen
36
41
 
37
42
  ### DATA ACCESS
38
43
 
39
- # Builds training set for the training
40
- # @param nrun [Integer] will build different train+test for each run
44
+ # Builds training sets for model training
45
+ # @param nrun [Integer] will build different trainset for each run
41
46
  # @return [Dataset]
42
47
  # @raise [NoDataLeft] when there's not enough data left for a full train+test
48
+ # @note train or test have no meaning alone, and train always comes first.
49
+ # Hence, `#train` checks if enough `data` is available for both `train`+`test`.
43
50
  def train nrun
44
51
  first = min_eligible_trg + (nrun-1) * test_size
45
52
  last = first + train_size
46
- # make sure there's enough data for both train and test
47
- raise NoDataLeft unless last + test_size < nrows
53
+ raise NoDataLeft unless last + test_size < nrows # make sure there's enough data
48
54
  DataModeler::Dataset.new data, ds_args.merge(first_idx: first, end_idx: last)
49
55
  end
50
56
 
51
- # Builds test set for the training
52
- # @param nrun [Integer] will build different train+test for each run
57
+ # Builds test sets for model testing
58
+ # @param nrun [Integer] will build different testset for each run
53
59
  # @return [Dataset]
54
- # @note we already checked pre-training there's enough data for the test too
60
+ # @note train or test have no meaning alone, and train always comes first.
61
+ # Hence, `#train` checks if enough `data` is available for both `train`+`test`.
55
62
  def test nrun
56
63
  first = min_eligible_trg + (nrun-1) * test_size + train_size
57
64
  last = first + test_size
@@ -62,13 +69,13 @@ class DataModeler::DatasetGen
62
69
 
63
70
  # TODO: @local_nrun is an ugly name, refactor it!
64
71
 
65
- # Returns the next pair [trainset, testset]
72
+ # Returns the next pair `[trainset, testset]`
66
73
  # @return [Array<Dataset, Dataset>]
67
74
  def peek
68
75
  [self.train(@local_nrun), self.test(@local_nrun)]
69
76
  end
70
77
 
71
- # Returns the next pair [trainset, testset] and increments the counter
78
+ # Returns the next pair `[trainset, testset]` and increments the counter
72
79
  # @return [Array<Dataset, Dataset>]
73
80
  def next
74
81
  peek.tap { @local_nrun += 1 }
@@ -106,7 +113,7 @@ class DataModeler::DatasetGen
106
113
  def min_eligible_trg
107
114
  @min_eligible_trg ||= idx( time(0) +
108
115
  # minimum time span required as input for the first target
109
- ds_args[:look_ahead] + (ds_args[:ntimes]-1) * ds_args[:tspread]
116
+ ds_args[:look_ahead] + (ds_args[:ninput_points]-1) * ds_args[:tspread]
110
117
  )
111
118
  end
112
119
 
@@ -3,7 +3,7 @@ class DataModeler::Dataset
3
3
  module ConvertingTimeAndIndices
4
4
  # Returns the time for a given index
5
5
  # @param [Integer] idx row index
6
- # @return [kind_of_time]
6
+ # @return [type of `data[:time]`]
7
7
  def time idx
8
8
  data[:time][idx]
9
9
  end
@@ -25,10 +25,10 @@ class DataModeler::Dataset
25
25
  end
26
26
  end
27
27
 
28
- # Provides each (which can return an `Iterator`) and `to_a` based on `#next`
28
+ # Provides `#each` (which can return an `Iterator`) and `#to_a` based on `#next`
29
29
  module IteratingBasedOnNext
30
30
  # Yields on each [inputs, targets] pair.
31
- # @return [nil, Iterator] `block_given? ? nil : Iterator`
31
+ # @return [nil|Iterator] `block_given? ? nil : Iterator`
32
32
  def each
33
33
  reset_iteration
34
34
  return enum_for(:each) unless block_given?
@@ -36,8 +36,8 @@ class DataModeler::Dataset
36
36
  nil
37
37
  end
38
38
 
39
- # Yields on each [inputs, targets] pair, collecting the input.
40
- # @return [Array, Iterator] `block_given? ? nil : Iterator`
39
+ # Yields on each `[inputs, targets]` pair, collecting the input.
40
+ # @return [Array|Iterator] `block_given? ? nil : Iterator`
41
41
  def map
42
42
  reset_iteration
43
43
  return enum_for(:collect) unless block_given?
@@ -0,0 +1,113 @@
1
+ require 'ruby-fann'
2
+
3
+ # Model the data using an artificial neural network, based on the
4
+ # Fast Artificial Neural Networks (FANN) implementation
5
+ class DataModeler::Models::FANN
6
+
7
+ attr_reader :fann_opts, :ngens, :fann, :algo, :actfn, :init_weights_range
8
+
9
+ # @param ngens [Integer] number of generations (repetitions) alloted for training
10
+ # @param hidden_layers [Array<Integer>] list of number of hidden neurons
11
+ # per each hidden layer in the network
12
+ # @param ninputs [Integer] number of inputs in the network
13
+ # @param noutputs [Integer] number of outputs in the network
14
+ # @param algo [:rprop, :rwg, ...] training algorithm
15
+ # @param actfn [:sigmoid, ...] activation function
16
+ # @param init_weights_range [Array<min_w, max_w>] minimum and maximum value for weight initialization range
17
+ def initialize ngens:, hidden_layers:, ninputs:, noutputs:, algo: nil, actfn: nil, init_weights_range: nil
18
+ @fann_opts = {
19
+ num_inputs: ninputs,
20
+ hidden_neurons: hidden_layers,
21
+ num_outputs: noutputs
22
+ }
23
+ @ngens = ngens
24
+ @algo = algo
25
+ @actfn = actfn
26
+ @init_weights_range = init_weights_range
27
+ reset
28
+ end
29
+
30
+ # Resets / initializes the model
31
+ # @return [void]
32
+ def reset
33
+ @fann = RubyFann::Standard.new fann_opts
34
+ if algo && algo != :rwg
35
+ fann.set_training_algorithm(algo)
36
+ end
37
+ if actfn
38
+ fann.set_activation_function_hidden(actfn)
39
+ fann.set_activation_function_output(actfn)
40
+ end
41
+ if init_weights_range
42
+ fann.randomize_weights(*init_weights_range.map(&method(:Float)))
43
+ end
44
+ end
45
+
46
+ # Trains the model for ngens on the trainset
47
+ # @param trainset [Hash<input: Array, target: Array>] training set
48
+ # @param ngens [Integer] number of training generations
49
+ # @return [void]
50
+ def train trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
51
+ # special case: not implemented in FANN
52
+ if algo == :rwg
53
+ return train_rwg(trainset, ngens,
54
+ report_interval: report_interval, desired_error: desired_error)
55
+ end
56
+ # TODO: optimize maybe?
57
+ inputs, targets = trainset.values
58
+ tset = RubyFann::TrainData.new inputs: inputs, desired_outputs: targets
59
+ # fann.init_weights tset # test this weights initialization
60
+
61
+ # params: train_data, max_epochs, report_interval, desired_error
62
+ fann.train_on_data(tset, ngens, report_interval, desired_error)
63
+ end
64
+
65
+ # Trains the model for ngens on the trainset using Random Weight Guessing
66
+ # @param trainset [Hash-like<input: Array, target: Array>] training set
67
+ # @param ngens [Integer] number of training generations
68
+ # @return [void]
69
+ def train_rwg trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
70
+ # TODO: use report_interval and desired_error
71
+ # initialize weight with random values in an interval [min_weight, max_weight]
72
+ # NOTE: if the RWG training is unsuccessful, this range is the first place to
73
+ # check to improve performance
74
+ fann.randomize_weights(*init_weights_range.map(&method(:Float)))
75
+ # test it on inputs
76
+ inputs, targets = trainset.values
77
+ outputs = test(inputs)
78
+ # calculate RMSE
79
+ rmse_fn = -> (outs) do
80
+ sq_err = outs.zip(targets).flat_map do |os,ts|
81
+ os.zip(ts).collect { |o,t| (t-o)**2 }
82
+ end
83
+ Math.sqrt(sq_err.reduce(:+) / sq_err.size)
84
+ end
85
+ rmse = rmse_fn.call(outputs)
86
+ # initialize best
87
+ best = [fann,rmse]
88
+ # rinse and repeat
89
+ ngens.times do
90
+ outputs = test(inputs)
91
+ rmse = rmse_fn.call(outputs)
92
+ (best = [fann,rmse]; puts rmse) if rmse < best.last
93
+ end
94
+ # expose the best to the interface
95
+ fann = best.first
96
+ end
97
+
98
+ # Tests the model on inputs.
99
+ # @param inputs [Array<Array<inputs>>] sequence of inputs for the model
100
+ # @return [Array<Array<outputs>>] outputs corresponding to each input
101
+ def test inputs
102
+ inputs.collect { |i| fann.run i }
103
+ end
104
+
105
+ # Saves the model
106
+ # @param filename [String/path] where to save the model
107
+ # @return [void]
108
+ def save filename
109
+ # can do filename check here...?
110
+ # TODO: I'd like to have a kind of `to_s`, and do all the saving in the modeler...
111
+ fann.save filename.to_s
112
+ end
113
+ end
@@ -0,0 +1,18 @@
1
+
2
+ # All models for the framework should belong to this module.
3
+ # Also includes a model selector for initialization from config.
4
+ module DataModeler::Models
5
+ # Returns a new `Model` based on the `type` of choice initialized
6
+ # with `opts` parameters
7
+ # @param type [Symbol] selects the type of `Model`
8
+ # @param opts [**Hash] the rest of the parameters will be passed
9
+ # to the model for its initialization
10
+ # @return [Model] an initialized `Model` of type `type`
11
+ def self.selector type:, **opts
12
+ case type
13
+ when :fann
14
+ FANN.new opts
15
+ else abort "Unrecognized model: #{type}"
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,41 @@
1
+
2
+ # Main gem module
3
+ module DataModeler
4
+
5
+ ### VERSION
6
+
7
+ # Version number
8
+ VERSION = "0.3.3"
9
+
10
+ ### HELPER FUNCTIONS
11
+
12
+ # Returns a standardized String ID from a (sequentially named) file
13
+ # @return [String]
14
+ # @note convenient method to have available in the config
15
+ def self.id_from_filename filename=__FILE__
16
+ format "%02d", Integer(filename[/_(\d+).rb$/,1])
17
+ end
18
+
19
+ # Returns an instance of the Base class
20
+ # @param config [Hash] Base class configuration
21
+ # @return [Base] initialized instance of Base class
22
+ def self.new config
23
+ DataModeler::Base.new config
24
+ end
25
+
26
+ ### EXCEPTIONS
27
+
28
+ class DataModeler::Dataset
29
+ # Exception: the requested `time` is not present in the data
30
+ class TimeNotFoundError < StandardError; end
31
+ end
32
+
33
+ class DataModeler::DatasetGen
34
+ # Exception: not enough `data` was provided for even a single train+test setup
35
+ class NotEnoughDataError < StandardError; end
36
+
37
+ # Exception: not enough `data` left to build another train+test
38
+ # @note subclassed from `StopIteration` -> it will break loops
39
+ class NoDataLeft < StopIteration; end
40
+ end
41
+ end
data/lib/data_modeler.rb CHANGED
@@ -1,16 +1,13 @@
1
- # Helpers
2
- require "data_modeler/version"
3
- require "data_modeler/exceptions"
4
- require "data_modeler/helpers"
1
+ require "data_modeler/support"
5
2
 
6
3
  # Dataset
7
- require "data_modeler/dataset/dataset_helper"
4
+ require "data_modeler/dataset/helper"
8
5
  require "data_modeler/dataset/dataset"
9
6
  require "data_modeler/dataset/dataset_gen"
10
7
 
11
8
  # Models
12
- require "data_modeler/model"
13
- require "data_modeler/model/fann"
9
+ require "data_modeler/models/selector"
10
+ require "data_modeler/models/fann"
14
11
 
15
- # Modeler
12
+ # Framework core
16
13
  require "data_modeler/base"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_modeler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Giuseppe Cuccu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-05-16 00:00:00.000000000 Z
11
+ date: 2017-05-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-fann
@@ -159,12 +159,10 @@ files:
159
159
  - lib/data_modeler/base.rb
160
160
  - lib/data_modeler/dataset/dataset.rb
161
161
  - lib/data_modeler/dataset/dataset_gen.rb
162
- - lib/data_modeler/dataset/dataset_helper.rb
163
- - lib/data_modeler/exceptions.rb
164
- - lib/data_modeler/helpers.rb
165
- - lib/data_modeler/model.rb
166
- - lib/data_modeler/model/fann.rb
167
- - lib/data_modeler/version.rb
162
+ - lib/data_modeler/dataset/helper.rb
163
+ - lib/data_modeler/models/fann.rb
164
+ - lib/data_modeler/models/selector.rb
165
+ - lib/data_modeler/support.rb
168
166
  homepage: https://github.com/giuse/data_modeler
169
167
  licenses:
170
168
  - MIT
@@ -1,12 +0,0 @@
1
- class DataModeler::Dataset
2
- # Exception: the requested `time` is not present in the data
3
- class TimeNotFoundError < StandardError; end
4
- end
5
-
6
- class DataModeler::DatasetGen
7
- # Exception: the `data` is not sufficient for the training setup
8
- class NotEnoughDataError < StandardError; end
9
- # Exception: not enough `data` left to build another train+test
10
- # @note being subclassed from `StopIteration`, it will break loops
11
- class NoDataLeft < StopIteration; end
12
- end
@@ -1,17 +0,0 @@
1
-
2
- # Helper functions go here
3
- module DataModeler
4
- # Returns a standardized String ID from a (sequentially named) file
5
- # @return [String]
6
- # @note convenient method to have available in the config
7
- def self.id_from_filename filename=__FILE__
8
- format "%02d", Integer(filename[/_(\d+).rb$/,1])
9
- end
10
-
11
- # Returns an instance of the Base class
12
- # @param config [Hash] Base class configuration
13
- # @return [Base] initialized instance of Base class
14
- def self.new config
15
- DataModeler::Base.new config
16
- end
17
- end
@@ -1,68 +0,0 @@
1
- require 'ruby-fann'
2
-
3
- # Model class based on Fast Artificial Neural Networks (FANN)
4
- class DataModeler::Model::FANN
5
-
6
- attr_reader :fann_opts, :ngens, :fann, :algo, :actfn
7
-
8
- # @param ngens [Integer] number of generations alloted for training
9
- # @param hidden_layers [Array<Integer>] list of number of hidden neurons
10
- # per each hidden layer in the network
11
- # @param ninputs [Integer] number of inputs of the network
12
- # @param noutputs [Integer] number of outputs of the network
13
- # @param algo [:incremental, :batch, :rprop, :quickprop] training algorithm
14
- # @param actfn [:sigmoid, ...] activation function
15
- def initialize ngens:, hidden_layers:, ninputs:, noutputs:, algo: nil, actfn: nil
16
- @fann_opts = {
17
- num_inputs: ninputs,
18
- hidden_neurons: hidden_layers,
19
- num_outputs: noutputs
20
- }
21
- @ngens = ngens
22
- @algo = algo
23
- @actfn = actfn
24
- reset
25
- end
26
-
27
- # Resets / initializes the model
28
- # @return [void]
29
- def reset
30
- @fann = RubyFann::Standard.new fann_opts
31
- fann.set_training_algorithm(algo) if algo
32
- if actfn
33
- fann.set_activation_function_hidden(actfn)
34
- fann.set_activation_function_output(actfn)
35
- end
36
- nil
37
- end
38
-
39
- # Trains the model for ngens on the trainset
40
- # @param trainset [Hash-like<input: Array, target: Array>] training set
41
- # @param ngens [Integer] number of training generations
42
- # @return [void]
43
- def train trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
44
- # TODO: optimize maybe?
45
- inputs, targets = trainset.values
46
- tset = RubyFann::TrainData.new inputs: inputs, desired_outputs: targets
47
- # fann.init_weights tset # test this weights initialization
48
-
49
- # params: train_data, max_epochs, report_interval, desired_error
50
- fann.train_on_data(tset, ngens, report_interval, desired_error)
51
- end
52
-
53
- # Tests the model on inputs.
54
- # @param inputs [Array<Array<inputs>>] sequence of inputs for the model
55
- # @return [Array<Array<outputs>>] outputs corresponding to each input
56
- def test inputs
57
- inputs.collect { |i| fann.run i }
58
- end
59
-
60
- # Save the model
61
- # @param filename [String/path] where to save the model
62
- # @return [void]
63
- def save filename
64
- # can do filename check here...?
65
- # TODO: I'd like to have a kind of `to_s`, and do all the saving in the modeler...
66
- fann.save filename.to_s
67
- end
68
- end
@@ -1,17 +0,0 @@
1
-
2
- # All models for the framework should belong to this module.
3
- # Also includes a model selector for initialization from config.
4
- module DataModeler::Model
5
- # Returns a new Model correctly initialized based on the `type` of choice
6
- # @param type [Symbol] which type of Model is chosen
7
- # @param opts [splatted Hash params] the rest of the parameters will be passed
8
- # to the model for initialization
9
- # @return [Model] a correctly initialized Model of type `type`
10
- def self.from_conf type:, **opts
11
- case type
12
- when :fann
13
- FANN.new opts
14
- else abort "Unrecognized model: #{type}"
15
- end
16
- end
17
- end
@@ -1,5 +0,0 @@
1
- # Main gem module
2
- module DataModeler
3
- # Version number
4
- VERSION = "0.3.0"
5
- end