data_modeler 0.3.0 → 0.3.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 762480cf9239c43cfe81e82634a63b52b2ac1d28
4
- data.tar.gz: 7554ba11f59112d0dddc39f80ad1c4b897fb02a0
3
+ metadata.gz: 173d569d4d705b32ca166d444766651f94b4a98d
4
+ data.tar.gz: ec61342d6188533751c874549c6f55d346d02bcb
5
5
  SHA512:
6
- metadata.gz: 84547a8cf68c84f42a58aec83961b644315095aac87c33f30d2aa0cacfbd2f6b966b95ef060aae253866bdb2e4dd2f37bf29a662eaa5d2b14429d790ab68a03f
7
- data.tar.gz: 2501a06535f433c6a58b45ccbad701c37ebb2be6c062ee9a2d627fa2a9f34235aa20dd88c57738af9f0d5f1f03a6c18fd4aa5336428d2bf0260f6682c57e7127
6
+ metadata.gz: 3cb220eeb8f7349321d35adb07efe919c9a23e9e0fe0459ca411b87632dd89ce21a7e1e04d350f1527e9e0f83b9924445b2c155c6af8fb9b71caace5b2600301
7
+ data.tar.gz: 74c46dea839cf5c1f99346ae197b7c87d06815c1d8c1e3c61238fac0801b6e20dd288efbaf7026f2a5991052c823a0179f48054473cbce4a628b5ff74f8b198d
data/data_modeler.gemspec CHANGED
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
2
  lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
- require 'data_modeler/version'
4
+ require 'data_modeler/support'
5
5
 
6
6
  Gem::Specification.new do |spec|
7
7
  spec.name = "data_modeler"
@@ -24,7 +24,7 @@ class DataModeler::Base
24
24
  @out_dir = prepare_output config[:results]
25
25
 
26
26
  @tset_gen = DataModeler::DatasetGen.new data, **opts_for(:datasetgen)
27
- @model = DataModeler::Model.from_conf **opts_for(:learner)
27
+ @model = DataModeler::Models.selector **opts_for(:learner)
28
28
  end
29
29
 
30
30
  # Main control: up to `nruns` (or until end of data) loop train-test-save
@@ -49,13 +49,18 @@ class DataModeler::Base
49
49
  end
50
50
 
51
51
  # Attribute reader for instance variable `@save_models`, ending in '?' since
52
- # it's a boolean value.
52
+ # it's a boolean value.
53
53
  # @return [true|false] value of instance variable @save_models
54
54
  # (false if nil/uninitialized)
55
55
  def save_models?
56
56
  @save_models || false
57
57
  end
58
58
 
59
+ # @return [String]
60
+ def to_s
61
+ config.to_s
62
+ end
63
+
59
64
  private
60
65
 
61
66
  # Loads the data in a Hash ready for `DatasetGen` (and `Dataset`)
@@ -97,13 +102,13 @@ class DataModeler::Base
97
102
  when :dataset
98
103
  { inputs: inputs,
99
104
  targets: targets,
100
- ntimes: config[:tset][:ntimes],
105
+ ninput_points: config[:tset][:ninput_points],
101
106
  tspread: config[:tset][:tspread],
102
107
  look_ahead: config[:tset][:look_ahead]
103
108
  }
104
109
  when :learner
105
110
  config[:learner].merge({
106
- ninputs: (config[:tset][:ntimes] * inputs.size),
111
+ ninputs: (config[:tset][:ninput_points] * inputs.size),
107
112
  noutputs: targets.size
108
113
  })
109
114
  else abort "Unrecognized `who`: '#{who}'"
@@ -1,15 +1,12 @@
1
1
 
2
2
  # Build complex inputs and targets from the data to train the model.
3
- #
4
- # @note checks to validate if enough data is present (given `ntimes`, `tspread`
5
- # and `look_ahead`) should be done on the caller (typically `DatasetGen`)
6
3
  class DataModeler::Dataset
7
4
 
8
5
  attr_reader :data, :input_series, :target_series, :first_idx, :end_idx,
9
- :ntimes, :tspread, :look_ahead, :first_idx, :target_idx,
6
+ :ninput_points, :tspread, :look_ahead, :first_idx, :target_idx,
10
7
  :input_idxs, :nrows
11
8
 
12
- # @param data [Hash-like] the data, in an object that can be
9
+ # @param data [Hash] the data, in an object that can be
13
10
  # accessed by keys and return a time series per each key.
14
11
  # It is required to include and be sorted by a series named `time`,
15
12
  # and for all series to have equal length.
@@ -17,22 +14,22 @@ class DataModeler::Dataset
17
14
  # @param targets [Array] data key accessors for target series
18
15
  # @param first_idx [Integer] index where the dataset starts on data
19
16
  # @param end_idx [Integer] index where the dataset ends on data
20
- # @param ntimes [Integer] number of lines/times/datapoints to be
17
+ # @param ninput_points [Integer] number of lines/datapoints to be
21
18
  # used to construct the input
22
- # @param tspread [Numeric] distance (in `time`!) between the `ntimes`
23
- # lines/times/datapoints used to construct the input
19
+ # @param tspread [Numeric] distance (in `time`!) between the `ninput_points`
20
+ # lines/datapoints used to construct the input
24
21
  # @param look_ahead [Numeric] distance (in `time`!) between the
25
22
  # most recent line/time/datapoint used for the input and
26
23
  # the target -- i.e., how far ahead the model is trained to predict
27
24
  # @note we expect Datasets indices to be used with left inclusion but
28
25
  # right exclusion, i.e. targets are considered in the range `[from,to)`
29
- def initialize data, inputs:, targets:, first_idx:, end_idx:, ntimes:, tspread:, look_ahead:
26
+ def initialize data, inputs:, targets:, first_idx:, end_idx:, ninput_points:, tspread:, look_ahead:
30
27
  @data = data
31
28
  @input_series = inputs
32
29
  @target_series = targets
33
30
  @first_idx = first_idx
34
31
  @end_idx = end_idx
35
- @ntimes = ntimes
32
+ @ninput_points = ninput_points
36
33
  @nrows = data[:time].size
37
34
  @tspread = tspread
38
35
  @look_ahead = look_ahead
@@ -92,9 +89,9 @@ class DataModeler::Dataset
92
89
  to_a.transpose
93
90
  end
94
91
 
95
- # Overloaded comparison for easier testing
92
+ # Equality operator -- most useful in testing
96
93
  # @param other [Dataset] what needs comparing to
97
- # @return [void]
94
+ # @return [true|false]
98
95
  def == other
99
96
  self.class == other.class && # terminate check here if wrong class
100
97
  data.object_id == other.data.object_id && # both `data` point to same object
@@ -120,7 +117,7 @@ class DataModeler::Dataset
120
117
  def init_inputs
121
118
  if target_idx < end_idx
122
119
  # build list of incremental time buffers
123
- bufs = ntimes.times.collect { |n| look_ahead + n * tspread }
120
+ bufs = ninput_points.times.collect { |n| look_ahead + n * tspread }
124
121
  # reverse it and subtract from the target's time
125
122
  times = bufs.reverse.collect { |s| time(target_idx) - s }
126
123
  # now you have the list of times at which each pointer should point
@@ -1,27 +1,32 @@
1
1
 
2
2
  # Build train and test datasets for each run of the training.
3
3
  #
4
- # This diagram should help understanding how it works
5
- # (win is the input+look_ahead window for first training target)
6
- # ----------------------------------------> data (time)
7
- # |win|train1|t1| -> train starts after window, test after training
8
- # |train2|t2| -> train starts after window + 1 tset
9
- # |train3|t3| -> train starts after window + 2 tset
4
+ # Train and test sets are seen as moving windows on the data.
5
+ # Alignment is designed to provide continuous testing results over (most of) the data.
6
+ # The following diagram exemplifies this: the training sets `t1`, `t2` and `t3` are
7
+ # aligned such that their results can be plotted countinuously against the obserevations.
8
+ # (b) is the amount of data covering for the input+look_ahead window uset for the first
9
+ # target.
10
+ # data: ----------------------> (time, datapoints)
11
+ # run1: (b)|train1|t1| -> train starts after (b), test after training
12
+ # run2: |train2|t2| -> train starts after (b) + 1 tset
13
+ # run3: |train3|t3| -> train starts after (b) + 2 tset
10
14
  # Note how the test sets line up. This allows the testing results plots
11
- # to be continuous, no model is tested on data on which *itself* has been
12
- # trained, and all data is used multiple times
15
+ # to be continuous, while no model is tested on data on which _itself_ has been trained.
16
+ # All data is used multiple times, alternately both as train and test sets.
13
17
  class DataModeler::DatasetGen
14
18
 
15
19
  attr_reader :data, :ds_args, :first_idx, :train_size, :test_size, :nrows
16
20
 
17
- # @param data [Hash-like] the data, in an object that can be
21
+ # @param data [Hash] the data, in an object that can be
18
22
  # accessed by keys and return a time series per each key.
19
- # It is required to include and be sorted by a series named `time`,
23
+ # It is required to include (and be sorted by) a series named `:time`,
20
24
  # and for all series to have equal length.
21
- # @param ds_args [Hash] parameters for the Datasets: inputs, targets,
22
- # first_idx, end_idx, ntimes. Check class Dataset for details.
23
- # @train_size: how many points to predict for each training set
24
- # @test_size: how many points to predict for each test set
25
+ # @param ds_args [Hash] parameters hash for `Dataset`s initialization.
26
+ # Keys: `%i[inputs, targets, first_idx, end_idx, ninput_points]`.
27
+ # See `Dataset#initialize` for details.
28
+ # @param train_size [Integer] how many points to expose as targets in each training set
29
+ # @param test_size [Integer] how many points to expose as targets in each test set
25
30
  def initialize data, ds_args:, train_size:, test_size:, min_nruns: 1
26
31
  @data = data
27
32
  @ds_args = ds_args
@@ -36,22 +41,24 @@ class DataModeler::DatasetGen
36
41
 
37
42
  ### DATA ACCESS
38
43
 
39
- # Builds training set for the training
40
- # @param nrun [Integer] will build different train+test for each run
44
+ # Builds training sets for model training
45
+ # @param nrun [Integer] will build different trainset for each run
41
46
  # @return [Dataset]
42
47
  # @raise [NoDataLeft] when there's not enough data left for a full train+test
48
+ # @note train or test have no meaning alone, and train always comes first.
49
+ # Hence, `#train` checks if enough `data` is available for both `train`+`test`.
43
50
  def train nrun
44
51
  first = min_eligible_trg + (nrun-1) * test_size
45
52
  last = first + train_size
46
- # make sure there's enough data for both train and test
47
- raise NoDataLeft unless last + test_size < nrows
53
+ raise NoDataLeft unless last + test_size < nrows # make sure there's enough data
48
54
  DataModeler::Dataset.new data, ds_args.merge(first_idx: first, end_idx: last)
49
55
  end
50
56
 
51
- # Builds test set for the training
52
- # @param nrun [Integer] will build different train+test for each run
57
+ # Builds test sets for model testing
58
+ # @param nrun [Integer] will build different testset for each run
53
59
  # @return [Dataset]
54
- # @note we already checked pre-training there's enough data for the test too
60
+ # @note train or test have no meaning alone, and train always comes first.
61
+ # Hence, `#train` checks if enough `data` is available for both `train`+`test`.
55
62
  def test nrun
56
63
  first = min_eligible_trg + (nrun-1) * test_size + train_size
57
64
  last = first + test_size
@@ -62,13 +69,13 @@ class DataModeler::DatasetGen
62
69
 
63
70
  # TODO: @local_nrun is an ugly name, refactor it!
64
71
 
65
- # Returns the next pair [trainset, testset]
72
+ # Returns the next pair `[trainset, testset]`
66
73
  # @return [Array<Dataset, Dataset>]
67
74
  def peek
68
75
  [self.train(@local_nrun), self.test(@local_nrun)]
69
76
  end
70
77
 
71
- # Returns the next pair [trainset, testset] and increments the counter
78
+ # Returns the next pair `[trainset, testset]` and increments the counter
72
79
  # @return [Array<Dataset, Dataset>]
73
80
  def next
74
81
  peek.tap { @local_nrun += 1 }
@@ -106,7 +113,7 @@ class DataModeler::DatasetGen
106
113
  def min_eligible_trg
107
114
  @min_eligible_trg ||= idx( time(0) +
108
115
  # minimum time span required as input for the first target
109
- ds_args[:look_ahead] + (ds_args[:ntimes]-1) * ds_args[:tspread]
116
+ ds_args[:look_ahead] + (ds_args[:ninput_points]-1) * ds_args[:tspread]
110
117
  )
111
118
  end
112
119
 
@@ -3,7 +3,7 @@ class DataModeler::Dataset
3
3
  module ConvertingTimeAndIndices
4
4
  # Returns the time for a given index
5
5
  # @param [Integer] idx row index
6
- # @return [kind_of_time]
6
+ # @return [type of `data[:time]`]
7
7
  def time idx
8
8
  data[:time][idx]
9
9
  end
@@ -25,10 +25,10 @@ class DataModeler::Dataset
25
25
  end
26
26
  end
27
27
 
28
- # Provides each (which can return an `Iterator`) and `to_a` based on `#next`
28
+ # Provides `#each` (which can return an `Iterator`) and `#to_a` based on `#next`
29
29
  module IteratingBasedOnNext
30
30
  # Yields on each [inputs, targets] pair.
31
- # @return [nil, Iterator] `block_given? ? nil : Iterator`
31
+ # @return [nil|Iterator] `block_given? ? nil : Iterator`
32
32
  def each
33
33
  reset_iteration
34
34
  return enum_for(:each) unless block_given?
@@ -36,8 +36,8 @@ class DataModeler::Dataset
36
36
  nil
37
37
  end
38
38
 
39
- # Yields on each [inputs, targets] pair, collecting the input.
40
- # @return [Array, Iterator] `block_given? ? nil : Iterator`
39
+ # Yields on each `[inputs, targets]` pair, collecting the input.
40
+ # @return [Array|Iterator] `block_given? ? nil : Iterator`
41
41
  def map
42
42
  reset_iteration
43
43
  return enum_for(:collect) unless block_given?
@@ -0,0 +1,113 @@
1
+ require 'ruby-fann'
2
+
3
+ # Model the data using an artificial neural network, based on the
4
+ # Fast Artificial Neural Networks (FANN) implementation
5
+ class DataModeler::Models::FANN
6
+
7
+ attr_reader :fann_opts, :ngens, :fann, :algo, :actfn, :init_weights_range
8
+
9
+ # @param ngens [Integer] number of generations (repetitions) alloted for training
10
+ # @param hidden_layers [Array<Integer>] list of number of hidden neurons
11
+ # per each hidden layer in the network
12
+ # @param ninputs [Integer] number of inputs in the network
13
+ # @param noutputs [Integer] number of outputs in the network
14
+ # @param algo [:rprop, :rwg, ...] training algorithm
15
+ # @param actfn [:sigmoid, ...] activation function
16
+ # @param init_weights_range [Array<min_w, max_w>] minimum and maximum value for weight initialization range
17
+ def initialize ngens:, hidden_layers:, ninputs:, noutputs:, algo: nil, actfn: nil, init_weights_range: nil
18
+ @fann_opts = {
19
+ num_inputs: ninputs,
20
+ hidden_neurons: hidden_layers,
21
+ num_outputs: noutputs
22
+ }
23
+ @ngens = ngens
24
+ @algo = algo
25
+ @actfn = actfn
26
+ @init_weights_range = init_weights_range
27
+ reset
28
+ end
29
+
30
+ # Resets / initializes the model
31
+ # @return [void]
32
+ def reset
33
+ @fann = RubyFann::Standard.new fann_opts
34
+ if algo && algo != :rwg
35
+ fann.set_training_algorithm(algo)
36
+ end
37
+ if actfn
38
+ fann.set_activation_function_hidden(actfn)
39
+ fann.set_activation_function_output(actfn)
40
+ end
41
+ if init_weights_range
42
+ fann.randomize_weights(*init_weights_range.map(&method(:Float)))
43
+ end
44
+ end
45
+
46
+ # Trains the model for ngens on the trainset
47
+ # @param trainset [Hash<input: Array, target: Array>] training set
48
+ # @param ngens [Integer] number of training generations
49
+ # @return [void]
50
+ def train trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
51
+ # special case: not implemented in FANN
52
+ if algo == :rwg
53
+ return train_rwg(trainset, ngens,
54
+ report_interval: report_interval, desired_error: desired_error)
55
+ end
56
+ # TODO: optimize maybe?
57
+ inputs, targets = trainset.values
58
+ tset = RubyFann::TrainData.new inputs: inputs, desired_outputs: targets
59
+ # fann.init_weights tset # test this weights initialization
60
+
61
+ # params: train_data, max_epochs, report_interval, desired_error
62
+ fann.train_on_data(tset, ngens, report_interval, desired_error)
63
+ end
64
+
65
+ # Trains the model for ngens on the trainset using Random Weight Guessing
66
+ # @param trainset [Hash-like<input: Array, target: Array>] training set
67
+ # @param ngens [Integer] number of training generations
68
+ # @return [void]
69
+ def train_rwg trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
70
+ # TODO: use report_interval and desired_error
71
+ # initialize weight with random values in an interval [min_weight, max_weight]
72
+ # NOTE: if the RWG training is unsuccessful, this range is the first place to
73
+ # check to improve performance
74
+ fann.randomize_weights(*init_weights_range.map(&method(:Float)))
75
+ # test it on inputs
76
+ inputs, targets = trainset.values
77
+ outputs = test(inputs)
78
+ # calculate RMSE
79
+ rmse_fn = -> (outs) do
80
+ sq_err = outs.zip(targets).flat_map do |os,ts|
81
+ os.zip(ts).collect { |o,t| (t-o)**2 }
82
+ end
83
+ Math.sqrt(sq_err.reduce(:+) / sq_err.size)
84
+ end
85
+ rmse = rmse_fn.call(outputs)
86
+ # initialize best
87
+ best = [fann,rmse]
88
+ # rinse and repeat
89
+ ngens.times do
90
+ outputs = test(inputs)
91
+ rmse = rmse_fn.call(outputs)
92
+ (best = [fann,rmse]; puts rmse) if rmse < best.last
93
+ end
94
+ # expose the best to the interface
95
+ fann = best.first
96
+ end
97
+
98
+ # Tests the model on inputs.
99
+ # @param inputs [Array<Array<inputs>>] sequence of inputs for the model
100
+ # @return [Array<Array<outputs>>] outputs corresponding to each input
101
+ def test inputs
102
+ inputs.collect { |i| fann.run i }
103
+ end
104
+
105
+ # Saves the model
106
+ # @param filename [String/path] where to save the model
107
+ # @return [void]
108
+ def save filename
109
+ # can do filename check here...?
110
+ # TODO: I'd like to have a kind of `to_s`, and do all the saving in the modeler...
111
+ fann.save filename.to_s
112
+ end
113
+ end
@@ -0,0 +1,18 @@
1
+
2
+ # All models for the framework should belong to this module.
3
+ # Also includes a model selector for initialization from config.
4
+ module DataModeler::Models
5
+ # Returns a new `Model` based on the `type` of choice initialized
6
+ # with `opts` parameters
7
+ # @param type [Symbol] selects the type of `Model`
8
+ # @param opts [**Hash] the rest of the parameters will be passed
9
+ # to the model for its initialization
10
+ # @return [Model] an initialized `Model` of type `type`
11
+ def self.selector type:, **opts
12
+ case type
13
+ when :fann
14
+ FANN.new opts
15
+ else abort "Unrecognized model: #{type}"
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,41 @@
1
+
2
+ # Main gem module
3
+ module DataModeler
4
+
5
+ ### VERSION
6
+
7
+ # Version number
8
+ VERSION = "0.3.3"
9
+
10
+ ### HELPER FUNCTIONS
11
+
12
+ # Returns a standardized String ID from a (sequentially named) file
13
+ # @return [String]
14
+ # @note convenient method to have available in the config
15
+ def self.id_from_filename filename=__FILE__
16
+ format "%02d", Integer(filename[/_(\d+).rb$/,1])
17
+ end
18
+
19
+ # Returns an instance of the Base class
20
+ # @param config [Hash] Base class configuration
21
+ # @return [Base] initialized instance of Base class
22
+ def self.new config
23
+ DataModeler::Base.new config
24
+ end
25
+
26
+ ### EXCEPTIONS
27
+
28
+ class DataModeler::Dataset
29
+ # Exception: the requested `time` is not present in the data
30
+ class TimeNotFoundError < StandardError; end
31
+ end
32
+
33
+ class DataModeler::DatasetGen
34
+ # Exception: not enough `data` was provided for even a single train+test setup
35
+ class NotEnoughDataError < StandardError; end
36
+
37
+ # Exception: not enough `data` left to build another train+test
38
+ # @note subclassed from `StopIteration` -> it will break loops
39
+ class NoDataLeft < StopIteration; end
40
+ end
41
+ end
data/lib/data_modeler.rb CHANGED
@@ -1,16 +1,13 @@
1
- # Helpers
2
- require "data_modeler/version"
3
- require "data_modeler/exceptions"
4
- require "data_modeler/helpers"
1
+ require "data_modeler/support"
5
2
 
6
3
  # Dataset
7
- require "data_modeler/dataset/dataset_helper"
4
+ require "data_modeler/dataset/helper"
8
5
  require "data_modeler/dataset/dataset"
9
6
  require "data_modeler/dataset/dataset_gen"
10
7
 
11
8
  # Models
12
- require "data_modeler/model"
13
- require "data_modeler/model/fann"
9
+ require "data_modeler/models/selector"
10
+ require "data_modeler/models/fann"
14
11
 
15
- # Modeler
12
+ # Framework core
16
13
  require "data_modeler/base"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: data_modeler
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Giuseppe Cuccu
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-05-16 00:00:00.000000000 Z
11
+ date: 2017-05-18 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: ruby-fann
@@ -159,12 +159,10 @@ files:
159
159
  - lib/data_modeler/base.rb
160
160
  - lib/data_modeler/dataset/dataset.rb
161
161
  - lib/data_modeler/dataset/dataset_gen.rb
162
- - lib/data_modeler/dataset/dataset_helper.rb
163
- - lib/data_modeler/exceptions.rb
164
- - lib/data_modeler/helpers.rb
165
- - lib/data_modeler/model.rb
166
- - lib/data_modeler/model/fann.rb
167
- - lib/data_modeler/version.rb
162
+ - lib/data_modeler/dataset/helper.rb
163
+ - lib/data_modeler/models/fann.rb
164
+ - lib/data_modeler/models/selector.rb
165
+ - lib/data_modeler/support.rb
168
166
  homepage: https://github.com/giuse/data_modeler
169
167
  licenses:
170
168
  - MIT
@@ -1,12 +0,0 @@
1
- class DataModeler::Dataset
2
- # Exception: the requested `time` is not present in the data
3
- class TimeNotFoundError < StandardError; end
4
- end
5
-
6
- class DataModeler::DatasetGen
7
- # Exception: the `data` is not sufficient for the training setup
8
- class NotEnoughDataError < StandardError; end
9
- # Exception: not enough `data` left to build another train+test
10
- # @note being subclassed from `StopIteration`, it will break loops
11
- class NoDataLeft < StopIteration; end
12
- end
@@ -1,17 +0,0 @@
1
-
2
- # Helper functions go here
3
- module DataModeler
4
- # Returns a standardized String ID from a (sequentially named) file
5
- # @return [String]
6
- # @note convenient method to have available in the config
7
- def self.id_from_filename filename=__FILE__
8
- format "%02d", Integer(filename[/_(\d+).rb$/,1])
9
- end
10
-
11
- # Returns an instance of the Base class
12
- # @param config [Hash] Base class configuration
13
- # @return [Base] initialized instance of Base class
14
- def self.new config
15
- DataModeler::Base.new config
16
- end
17
- end
@@ -1,68 +0,0 @@
1
- require 'ruby-fann'
2
-
3
- # Model class based on Fast Artificial Neural Networks (FANN)
4
- class DataModeler::Model::FANN
5
-
6
- attr_reader :fann_opts, :ngens, :fann, :algo, :actfn
7
-
8
- # @param ngens [Integer] number of generations alloted for training
9
- # @param hidden_layers [Array<Integer>] list of number of hidden neurons
10
- # per each hidden layer in the network
11
- # @param ninputs [Integer] number of inputs of the network
12
- # @param noutputs [Integer] number of outputs of the network
13
- # @param algo [:incremental, :batch, :rprop, :quickprop] training algorithm
14
- # @param actfn [:sigmoid, ...] activation function
15
- def initialize ngens:, hidden_layers:, ninputs:, noutputs:, algo: nil, actfn: nil
16
- @fann_opts = {
17
- num_inputs: ninputs,
18
- hidden_neurons: hidden_layers,
19
- num_outputs: noutputs
20
- }
21
- @ngens = ngens
22
- @algo = algo
23
- @actfn = actfn
24
- reset
25
- end
26
-
27
- # Resets / initializes the model
28
- # @return [void]
29
- def reset
30
- @fann = RubyFann::Standard.new fann_opts
31
- fann.set_training_algorithm(algo) if algo
32
- if actfn
33
- fann.set_activation_function_hidden(actfn)
34
- fann.set_activation_function_output(actfn)
35
- end
36
- nil
37
- end
38
-
39
- # Trains the model for ngens on the trainset
40
- # @param trainset [Hash-like<input: Array, target: Array>] training set
41
- # @param ngens [Integer] number of training generations
42
- # @return [void]
43
- def train trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
44
- # TODO: optimize maybe?
45
- inputs, targets = trainset.values
46
- tset = RubyFann::TrainData.new inputs: inputs, desired_outputs: targets
47
- # fann.init_weights tset # test this weights initialization
48
-
49
- # params: train_data, max_epochs, report_interval, desired_error
50
- fann.train_on_data(tset, ngens, report_interval, desired_error)
51
- end
52
-
53
- # Tests the model on inputs.
54
- # @param inputs [Array<Array<inputs>>] sequence of inputs for the model
55
- # @return [Array<Array<outputs>>] outputs corresponding to each input
56
- def test inputs
57
- inputs.collect { |i| fann.run i }
58
- end
59
-
60
- # Save the model
61
- # @param filename [String/path] where to save the model
62
- # @return [void]
63
- def save filename
64
- # can do filename check here...?
65
- # TODO: I'd like to have a kind of `to_s`, and do all the saving in the modeler...
66
- fann.save filename.to_s
67
- end
68
- end
@@ -1,17 +0,0 @@
1
-
2
- # All models for the framework should belong to this module.
3
- # Also includes a model selector for initialization from config.
4
- module DataModeler::Model
5
- # Returns a new Model correctly initialized based on the `type` of choice
6
- # @param type [Symbol] which type of Model is chosen
7
- # @param opts [splatted Hash params] the rest of the parameters will be passed
8
- # to the model for initialization
9
- # @return [Model] a correctly initialized Model of type `type`
10
- def self.from_conf type:, **opts
11
- case type
12
- when :fann
13
- FANN.new opts
14
- else abort "Unrecognized model: #{type}"
15
- end
16
- end
17
- end
@@ -1,5 +0,0 @@
1
- # Main gem module
2
- module DataModeler
3
- # Version number
4
- VERSION = "0.3.0"
5
- end