data_modeler 0.3.0 → 0.3.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/data_modeler.gemspec +1 -1
- data/lib/data_modeler/base.rb +9 -4
- data/lib/data_modeler/dataset/dataset.rb +10 -13
- data/lib/data_modeler/dataset/dataset_gen.rb +31 -24
- data/lib/data_modeler/dataset/{dataset_helper.rb → helper.rb} +5 -5
- data/lib/data_modeler/models/fann.rb +113 -0
- data/lib/data_modeler/models/selector.rb +18 -0
- data/lib/data_modeler/support.rb +41 -0
- data/lib/data_modeler.rb +5 -8
- metadata +6 -8
- data/lib/data_modeler/exceptions.rb +0 -12
- data/lib/data_modeler/helpers.rb +0 -17
- data/lib/data_modeler/model/fann.rb +0 -68
- data/lib/data_modeler/model.rb +0 -17
- data/lib/data_modeler/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 173d569d4d705b32ca166d444766651f94b4a98d
|
4
|
+
data.tar.gz: ec61342d6188533751c874549c6f55d346d02bcb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3cb220eeb8f7349321d35adb07efe919c9a23e9e0fe0459ca411b87632dd89ce21a7e1e04d350f1527e9e0f83b9924445b2c155c6af8fb9b71caace5b2600301
|
7
|
+
data.tar.gz: 74c46dea839cf5c1f99346ae197b7c87d06815c1d8c1e3c61238fac0801b6e20dd288efbaf7026f2a5991052c823a0179f48054473cbce4a628b5ff74f8b198d
|
data/data_modeler.gemspec
CHANGED
data/lib/data_modeler/base.rb
CHANGED
@@ -24,7 +24,7 @@ class DataModeler::Base
|
|
24
24
|
@out_dir = prepare_output config[:results]
|
25
25
|
|
26
26
|
@tset_gen = DataModeler::DatasetGen.new data, **opts_for(:datasetgen)
|
27
|
-
@model = DataModeler::
|
27
|
+
@model = DataModeler::Models.selector **opts_for(:learner)
|
28
28
|
end
|
29
29
|
|
30
30
|
# Main control: up to `nruns` (or until end of data) loop train-test-save
|
@@ -49,13 +49,18 @@ class DataModeler::Base
|
|
49
49
|
end
|
50
50
|
|
51
51
|
# Attribute reader for instance variable `@save_models`, ending in '?' since
|
52
|
-
#
|
52
|
+
# it's a boolean value.
|
53
53
|
# @return [true|false] value of instance variable @save_models
|
54
54
|
# (false if nil/uninitialized)
|
55
55
|
def save_models?
|
56
56
|
@save_models || false
|
57
57
|
end
|
58
58
|
|
59
|
+
# @return [String]
|
60
|
+
def to_s
|
61
|
+
config.to_s
|
62
|
+
end
|
63
|
+
|
59
64
|
private
|
60
65
|
|
61
66
|
# Loads the data in a Hash ready for `DatasetGen` (and `Dataset`)
|
@@ -97,13 +102,13 @@ class DataModeler::Base
|
|
97
102
|
when :dataset
|
98
103
|
{ inputs: inputs,
|
99
104
|
targets: targets,
|
100
|
-
|
105
|
+
ninput_points: config[:tset][:ninput_points],
|
101
106
|
tspread: config[:tset][:tspread],
|
102
107
|
look_ahead: config[:tset][:look_ahead]
|
103
108
|
}
|
104
109
|
when :learner
|
105
110
|
config[:learner].merge({
|
106
|
-
ninputs: (config[:tset][:
|
111
|
+
ninputs: (config[:tset][:ninput_points] * inputs.size),
|
107
112
|
noutputs: targets.size
|
108
113
|
})
|
109
114
|
else abort "Unrecognized `who`: '#{who}'"
|
@@ -1,15 +1,12 @@
|
|
1
1
|
|
2
2
|
# Build complex inputs and targets from the data to train the model.
|
3
|
-
#
|
4
|
-
# @note checks to validate if enough data is present (given `ntimes`, `tspread`
|
5
|
-
# and `look_ahead`) should be done on the caller (typically `DatasetGen`)
|
6
3
|
class DataModeler::Dataset
|
7
4
|
|
8
5
|
attr_reader :data, :input_series, :target_series, :first_idx, :end_idx,
|
9
|
-
:
|
6
|
+
:ninput_points, :tspread, :look_ahead, :first_idx, :target_idx,
|
10
7
|
:input_idxs, :nrows
|
11
8
|
|
12
|
-
# @param data [Hash
|
9
|
+
# @param data [Hash] the data, in an object that can be
|
13
10
|
# accessed by keys and return a time series per each key.
|
14
11
|
# It is required to include and be sorted by a series named `time`,
|
15
12
|
# and for all series to have equal length.
|
@@ -17,22 +14,22 @@ class DataModeler::Dataset
|
|
17
14
|
# @param targets [Array] data key accessors for target series
|
18
15
|
# @param first_idx [Integer] index where the dataset starts on data
|
19
16
|
# @param end_idx [Integer] index where the dataset ends on data
|
20
|
-
# @param
|
17
|
+
# @param ninput_points [Integer] number of lines/datapoints to be
|
21
18
|
# used to construct the input
|
22
|
-
# @param tspread [Numeric] distance (in `time`!) between the `
|
23
|
-
# lines/
|
19
|
+
# @param tspread [Numeric] distance (in `time`!) between the `ninput_points`
|
20
|
+
# lines/datapoints used to construct the input
|
24
21
|
# @param look_ahead [Numeric] distance (in `time`!) between the
|
25
22
|
# most recent line/time/datapoint used for the input and
|
26
23
|
# the target -- i.e., how far ahead the model is trained to predict
|
27
24
|
# @note we expect Datasets indices to be used with left inclusion but
|
28
25
|
# right exclusion, i.e. targets are considered in the range `[from,to)`
|
29
|
-
def initialize data, inputs:, targets:, first_idx:, end_idx:,
|
26
|
+
def initialize data, inputs:, targets:, first_idx:, end_idx:, ninput_points:, tspread:, look_ahead:
|
30
27
|
@data = data
|
31
28
|
@input_series = inputs
|
32
29
|
@target_series = targets
|
33
30
|
@first_idx = first_idx
|
34
31
|
@end_idx = end_idx
|
35
|
-
@
|
32
|
+
@ninput_points = ninput_points
|
36
33
|
@nrows = data[:time].size
|
37
34
|
@tspread = tspread
|
38
35
|
@look_ahead = look_ahead
|
@@ -92,9 +89,9 @@ class DataModeler::Dataset
|
|
92
89
|
to_a.transpose
|
93
90
|
end
|
94
91
|
|
95
|
-
#
|
92
|
+
# Equality operator -- most useful in testing
|
96
93
|
# @param other [Dataset] what needs comparing to
|
97
|
-
# @return [
|
94
|
+
# @return [true|false]
|
98
95
|
def == other
|
99
96
|
self.class == other.class && # terminate check here if wrong class
|
100
97
|
data.object_id == other.data.object_id && # both `data` point to same object
|
@@ -120,7 +117,7 @@ class DataModeler::Dataset
|
|
120
117
|
def init_inputs
|
121
118
|
if target_idx < end_idx
|
122
119
|
# build list of incremental time buffers
|
123
|
-
bufs =
|
120
|
+
bufs = ninput_points.times.collect { |n| look_ahead + n * tspread }
|
124
121
|
# reverse it and subtract from the target's time
|
125
122
|
times = bufs.reverse.collect { |s| time(target_idx) - s }
|
126
123
|
# now you have the list of times at which each pointer should point
|
@@ -1,27 +1,32 @@
|
|
1
1
|
|
2
2
|
# Build train and test datasets for each run of the training.
|
3
3
|
#
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
4
|
+
# Train and test sets are seen as moving windows on the data.
|
5
|
+
# Alignment is designed to provide continuous testing results over (most of) the data.
|
6
|
+
# The following diagram exemplifies this: the training sets `t1`, `t2` and `t3` are
|
7
|
+
# aligned such that their results can be plotted countinuously against the obserevations.
|
8
|
+
# (b) is the amount of data covering for the input+look_ahead window uset for the first
|
9
|
+
# target.
|
10
|
+
# data: ----------------------> (time, datapoints)
|
11
|
+
# run1: (b)|train1|t1| -> train starts after (b), test after training
|
12
|
+
# run2: |train2|t2| -> train starts after (b) + 1 tset
|
13
|
+
# run3: |train3|t3| -> train starts after (b) + 2 tset
|
10
14
|
# Note how the test sets line up. This allows the testing results plots
|
11
|
-
# to be continuous, no model is tested on data on which
|
12
|
-
#
|
15
|
+
# to be continuous, while no model is tested on data on which _itself_ has been trained.
|
16
|
+
# All data is used multiple times, alternately both as train and test sets.
|
13
17
|
class DataModeler::DatasetGen
|
14
18
|
|
15
19
|
attr_reader :data, :ds_args, :first_idx, :train_size, :test_size, :nrows
|
16
20
|
|
17
|
-
# @param data [Hash
|
21
|
+
# @param data [Hash] the data, in an object that can be
|
18
22
|
# accessed by keys and return a time series per each key.
|
19
|
-
# It is required to include and be sorted by a series named
|
23
|
+
# It is required to include (and be sorted by) a series named `:time`,
|
20
24
|
# and for all series to have equal length.
|
21
|
-
# @param ds_args [Hash] parameters for
|
22
|
-
# first_idx, end_idx,
|
23
|
-
#
|
24
|
-
# @
|
25
|
+
# @param ds_args [Hash] parameters hash for `Dataset`s initialization.
|
26
|
+
# Keys: `%i[inputs, targets, first_idx, end_idx, ninput_points]`.
|
27
|
+
# See `Dataset#initialize` for details.
|
28
|
+
# @param train_size [Integer] how many points to expose as targets in each training set
|
29
|
+
# @param test_size [Integer] how many points to expose as targets in each test set
|
25
30
|
def initialize data, ds_args:, train_size:, test_size:, min_nruns: 1
|
26
31
|
@data = data
|
27
32
|
@ds_args = ds_args
|
@@ -36,22 +41,24 @@ class DataModeler::DatasetGen
|
|
36
41
|
|
37
42
|
### DATA ACCESS
|
38
43
|
|
39
|
-
# Builds training
|
40
|
-
# @param nrun [Integer] will build different
|
44
|
+
# Builds training sets for model training
|
45
|
+
# @param nrun [Integer] will build different trainset for each run
|
41
46
|
# @return [Dataset]
|
42
47
|
# @raise [NoDataLeft] when there's not enough data left for a full train+test
|
48
|
+
# @note train or test have no meaning alone, and train always comes first.
|
49
|
+
# Hence, `#train` checks if enough `data` is available for both `train`+`test`.
|
43
50
|
def train nrun
|
44
51
|
first = min_eligible_trg + (nrun-1) * test_size
|
45
52
|
last = first + train_size
|
46
|
-
# make sure there's enough data
|
47
|
-
raise NoDataLeft unless last + test_size < nrows
|
53
|
+
raise NoDataLeft unless last + test_size < nrows # make sure there's enough data
|
48
54
|
DataModeler::Dataset.new data, ds_args.merge(first_idx: first, end_idx: last)
|
49
55
|
end
|
50
56
|
|
51
|
-
# Builds test
|
52
|
-
# @param nrun [Integer] will build different
|
57
|
+
# Builds test sets for model testing
|
58
|
+
# @param nrun [Integer] will build different testset for each run
|
53
59
|
# @return [Dataset]
|
54
|
-
# @note
|
60
|
+
# @note train or test have no meaning alone, and train always comes first.
|
61
|
+
# Hence, `#train` checks if enough `data` is available for both `train`+`test`.
|
55
62
|
def test nrun
|
56
63
|
first = min_eligible_trg + (nrun-1) * test_size + train_size
|
57
64
|
last = first + test_size
|
@@ -62,13 +69,13 @@ class DataModeler::DatasetGen
|
|
62
69
|
|
63
70
|
# TODO: @local_nrun is an ugly name, refactor it!
|
64
71
|
|
65
|
-
# Returns the next pair [trainset, testset]
|
72
|
+
# Returns the next pair `[trainset, testset]`
|
66
73
|
# @return [Array<Dataset, Dataset>]
|
67
74
|
def peek
|
68
75
|
[self.train(@local_nrun), self.test(@local_nrun)]
|
69
76
|
end
|
70
77
|
|
71
|
-
# Returns the next pair [trainset, testset] and increments the counter
|
78
|
+
# Returns the next pair `[trainset, testset]` and increments the counter
|
72
79
|
# @return [Array<Dataset, Dataset>]
|
73
80
|
def next
|
74
81
|
peek.tap { @local_nrun += 1 }
|
@@ -106,7 +113,7 @@ class DataModeler::DatasetGen
|
|
106
113
|
def min_eligible_trg
|
107
114
|
@min_eligible_trg ||= idx( time(0) +
|
108
115
|
# minimum time span required as input for the first target
|
109
|
-
ds_args[:look_ahead] + (ds_args[:
|
116
|
+
ds_args[:look_ahead] + (ds_args[:ninput_points]-1) * ds_args[:tspread]
|
110
117
|
)
|
111
118
|
end
|
112
119
|
|
@@ -3,7 +3,7 @@ class DataModeler::Dataset
|
|
3
3
|
module ConvertingTimeAndIndices
|
4
4
|
# Returns the time for a given index
|
5
5
|
# @param [Integer] idx row index
|
6
|
-
# @return [
|
6
|
+
# @return [type of `data[:time]`]
|
7
7
|
def time idx
|
8
8
|
data[:time][idx]
|
9
9
|
end
|
@@ -25,10 +25,10 @@ class DataModeler::Dataset
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
-
# Provides each (which can return an `Iterator`) and
|
28
|
+
# Provides `#each` (which can return an `Iterator`) and `#to_a` based on `#next`
|
29
29
|
module IteratingBasedOnNext
|
30
30
|
# Yields on each [inputs, targets] pair.
|
31
|
-
# @return [nil
|
31
|
+
# @return [nil|Iterator] `block_given? ? nil : Iterator`
|
32
32
|
def each
|
33
33
|
reset_iteration
|
34
34
|
return enum_for(:each) unless block_given?
|
@@ -36,8 +36,8 @@ class DataModeler::Dataset
|
|
36
36
|
nil
|
37
37
|
end
|
38
38
|
|
39
|
-
# Yields on each [inputs, targets] pair, collecting the input.
|
40
|
-
# @return [Array
|
39
|
+
# Yields on each `[inputs, targets]` pair, collecting the input.
|
40
|
+
# @return [Array|Iterator] `block_given? ? nil : Iterator`
|
41
41
|
def map
|
42
42
|
reset_iteration
|
43
43
|
return enum_for(:collect) unless block_given?
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'ruby-fann'
|
2
|
+
|
3
|
+
# Model the data using an artificial neural network, based on the
|
4
|
+
# Fast Artificial Neural Networks (FANN) implementation
|
5
|
+
class DataModeler::Models::FANN
|
6
|
+
|
7
|
+
attr_reader :fann_opts, :ngens, :fann, :algo, :actfn, :init_weights_range
|
8
|
+
|
9
|
+
# @param ngens [Integer] number of generations (repetitions) alloted for training
|
10
|
+
# @param hidden_layers [Array<Integer>] list of number of hidden neurons
|
11
|
+
# per each hidden layer in the network
|
12
|
+
# @param ninputs [Integer] number of inputs in the network
|
13
|
+
# @param noutputs [Integer] number of outputs in the network
|
14
|
+
# @param algo [:rprop, :rwg, ...] training algorithm
|
15
|
+
# @param actfn [:sigmoid, ...] activation function
|
16
|
+
# @param init_weights_range [Array<min_w, max_w>] minimum and maximum value for weight initialization range
|
17
|
+
def initialize ngens:, hidden_layers:, ninputs:, noutputs:, algo: nil, actfn: nil, init_weights_range: nil
|
18
|
+
@fann_opts = {
|
19
|
+
num_inputs: ninputs,
|
20
|
+
hidden_neurons: hidden_layers,
|
21
|
+
num_outputs: noutputs
|
22
|
+
}
|
23
|
+
@ngens = ngens
|
24
|
+
@algo = algo
|
25
|
+
@actfn = actfn
|
26
|
+
@init_weights_range = init_weights_range
|
27
|
+
reset
|
28
|
+
end
|
29
|
+
|
30
|
+
# Resets / initializes the model
|
31
|
+
# @return [void]
|
32
|
+
def reset
|
33
|
+
@fann = RubyFann::Standard.new fann_opts
|
34
|
+
if algo && algo != :rwg
|
35
|
+
fann.set_training_algorithm(algo)
|
36
|
+
end
|
37
|
+
if actfn
|
38
|
+
fann.set_activation_function_hidden(actfn)
|
39
|
+
fann.set_activation_function_output(actfn)
|
40
|
+
end
|
41
|
+
if init_weights_range
|
42
|
+
fann.randomize_weights(*init_weights_range.map(&method(:Float)))
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Trains the model for ngens on the trainset
|
47
|
+
# @param trainset [Hash<input: Array, target: Array>] training set
|
48
|
+
# @param ngens [Integer] number of training generations
|
49
|
+
# @return [void]
|
50
|
+
def train trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
|
51
|
+
# special case: not implemented in FANN
|
52
|
+
if algo == :rwg
|
53
|
+
return train_rwg(trainset, ngens,
|
54
|
+
report_interval: report_interval, desired_error: desired_error)
|
55
|
+
end
|
56
|
+
# TODO: optimize maybe?
|
57
|
+
inputs, targets = trainset.values
|
58
|
+
tset = RubyFann::TrainData.new inputs: inputs, desired_outputs: targets
|
59
|
+
# fann.init_weights tset # test this weights initialization
|
60
|
+
|
61
|
+
# params: train_data, max_epochs, report_interval, desired_error
|
62
|
+
fann.train_on_data(tset, ngens, report_interval, desired_error)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Trains the model for ngens on the trainset using Random Weight Guessing
|
66
|
+
# @param trainset [Hash-like<input: Array, target: Array>] training set
|
67
|
+
# @param ngens [Integer] number of training generations
|
68
|
+
# @return [void]
|
69
|
+
def train_rwg trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
|
70
|
+
# TODO: use report_interval and desired_error
|
71
|
+
# initialize weight with random values in an interval [min_weight, max_weight]
|
72
|
+
# NOTE: if the RWG training is unsuccessful, this range is the first place to
|
73
|
+
# check to improve performance
|
74
|
+
fann.randomize_weights(*init_weights_range.map(&method(:Float)))
|
75
|
+
# test it on inputs
|
76
|
+
inputs, targets = trainset.values
|
77
|
+
outputs = test(inputs)
|
78
|
+
# calculate RMSE
|
79
|
+
rmse_fn = -> (outs) do
|
80
|
+
sq_err = outs.zip(targets).flat_map do |os,ts|
|
81
|
+
os.zip(ts).collect { |o,t| (t-o)**2 }
|
82
|
+
end
|
83
|
+
Math.sqrt(sq_err.reduce(:+) / sq_err.size)
|
84
|
+
end
|
85
|
+
rmse = rmse_fn.call(outputs)
|
86
|
+
# initialize best
|
87
|
+
best = [fann,rmse]
|
88
|
+
# rinse and repeat
|
89
|
+
ngens.times do
|
90
|
+
outputs = test(inputs)
|
91
|
+
rmse = rmse_fn.call(outputs)
|
92
|
+
(best = [fann,rmse]; puts rmse) if rmse < best.last
|
93
|
+
end
|
94
|
+
# expose the best to the interface
|
95
|
+
fann = best.first
|
96
|
+
end
|
97
|
+
|
98
|
+
# Tests the model on inputs.
|
99
|
+
# @param inputs [Array<Array<inputs>>] sequence of inputs for the model
|
100
|
+
# @return [Array<Array<outputs>>] outputs corresponding to each input
|
101
|
+
def test inputs
|
102
|
+
inputs.collect { |i| fann.run i }
|
103
|
+
end
|
104
|
+
|
105
|
+
# Saves the model
|
106
|
+
# @param filename [String/path] where to save the model
|
107
|
+
# @return [void]
|
108
|
+
def save filename
|
109
|
+
# can do filename check here...?
|
110
|
+
# TODO: I'd like to have a kind of `to_s`, and do all the saving in the modeler...
|
111
|
+
fann.save filename.to_s
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
# All models for the framework should belong to this module.
|
3
|
+
# Also includes a model selector for initialization from config.
|
4
|
+
module DataModeler::Models
|
5
|
+
# Returns a new `Model` based on the `type` of choice initialized
|
6
|
+
# with `opts` parameters
|
7
|
+
# @param type [Symbol] selects the type of `Model`
|
8
|
+
# @param opts [**Hash] the rest of the parameters will be passed
|
9
|
+
# to the model for its initialization
|
10
|
+
# @return [Model] an initialized `Model` of type `type`
|
11
|
+
def self.selector type:, **opts
|
12
|
+
case type
|
13
|
+
when :fann
|
14
|
+
FANN.new opts
|
15
|
+
else abort "Unrecognized model: #{type}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
# Main gem module
|
3
|
+
module DataModeler
|
4
|
+
|
5
|
+
### VERSION
|
6
|
+
|
7
|
+
# Version number
|
8
|
+
VERSION = "0.3.3"
|
9
|
+
|
10
|
+
### HELPER FUNCTIONS
|
11
|
+
|
12
|
+
# Returns a standardized String ID from a (sequentially named) file
|
13
|
+
# @return [String]
|
14
|
+
# @note convenient method to have available in the config
|
15
|
+
def self.id_from_filename filename=__FILE__
|
16
|
+
format "%02d", Integer(filename[/_(\d+).rb$/,1])
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns an instance of the Base class
|
20
|
+
# @param config [Hash] Base class configuration
|
21
|
+
# @return [Base] initialized instance of Base class
|
22
|
+
def self.new config
|
23
|
+
DataModeler::Base.new config
|
24
|
+
end
|
25
|
+
|
26
|
+
### EXCEPTIONS
|
27
|
+
|
28
|
+
class DataModeler::Dataset
|
29
|
+
# Exception: the requested `time` is not present in the data
|
30
|
+
class TimeNotFoundError < StandardError; end
|
31
|
+
end
|
32
|
+
|
33
|
+
class DataModeler::DatasetGen
|
34
|
+
# Exception: not enough `data` was provided for even a single train+test setup
|
35
|
+
class NotEnoughDataError < StandardError; end
|
36
|
+
|
37
|
+
# Exception: not enough `data` left to build another train+test
|
38
|
+
# @note subclassed from `StopIteration` -> it will break loops
|
39
|
+
class NoDataLeft < StopIteration; end
|
40
|
+
end
|
41
|
+
end
|
data/lib/data_modeler.rb
CHANGED
@@ -1,16 +1,13 @@
|
|
1
|
-
|
2
|
-
require "data_modeler/version"
|
3
|
-
require "data_modeler/exceptions"
|
4
|
-
require "data_modeler/helpers"
|
1
|
+
require "data_modeler/support"
|
5
2
|
|
6
3
|
# Dataset
|
7
|
-
require "data_modeler/dataset/
|
4
|
+
require "data_modeler/dataset/helper"
|
8
5
|
require "data_modeler/dataset/dataset"
|
9
6
|
require "data_modeler/dataset/dataset_gen"
|
10
7
|
|
11
8
|
# Models
|
12
|
-
require "data_modeler/
|
13
|
-
require "data_modeler/
|
9
|
+
require "data_modeler/models/selector"
|
10
|
+
require "data_modeler/models/fann"
|
14
11
|
|
15
|
-
#
|
12
|
+
# Framework core
|
16
13
|
require "data_modeler/base"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_modeler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Giuseppe Cuccu
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-05-
|
11
|
+
date: 2017-05-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-fann
|
@@ -159,12 +159,10 @@ files:
|
|
159
159
|
- lib/data_modeler/base.rb
|
160
160
|
- lib/data_modeler/dataset/dataset.rb
|
161
161
|
- lib/data_modeler/dataset/dataset_gen.rb
|
162
|
-
- lib/data_modeler/dataset/
|
163
|
-
- lib/data_modeler/
|
164
|
-
- lib/data_modeler/
|
165
|
-
- lib/data_modeler/
|
166
|
-
- lib/data_modeler/model/fann.rb
|
167
|
-
- lib/data_modeler/version.rb
|
162
|
+
- lib/data_modeler/dataset/helper.rb
|
163
|
+
- lib/data_modeler/models/fann.rb
|
164
|
+
- lib/data_modeler/models/selector.rb
|
165
|
+
- lib/data_modeler/support.rb
|
168
166
|
homepage: https://github.com/giuse/data_modeler
|
169
167
|
licenses:
|
170
168
|
- MIT
|
@@ -1,12 +0,0 @@
|
|
1
|
-
class DataModeler::Dataset
|
2
|
-
# Exception: the requested `time` is not present in the data
|
3
|
-
class TimeNotFoundError < StandardError; end
|
4
|
-
end
|
5
|
-
|
6
|
-
class DataModeler::DatasetGen
|
7
|
-
# Exception: the `data` is not sufficient for the training setup
|
8
|
-
class NotEnoughDataError < StandardError; end
|
9
|
-
# Exception: not enough `data` left to build another train+test
|
10
|
-
# @note being subclassed from `StopIteration`, it will break loops
|
11
|
-
class NoDataLeft < StopIteration; end
|
12
|
-
end
|
data/lib/data_modeler/helpers.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
# Helper functions go here
|
3
|
-
module DataModeler
|
4
|
-
# Returns a standardized String ID from a (sequentially named) file
|
5
|
-
# @return [String]
|
6
|
-
# @note convenient method to have available in the config
|
7
|
-
def self.id_from_filename filename=__FILE__
|
8
|
-
format "%02d", Integer(filename[/_(\d+).rb$/,1])
|
9
|
-
end
|
10
|
-
|
11
|
-
# Returns an instance of the Base class
|
12
|
-
# @param config [Hash] Base class configuration
|
13
|
-
# @return [Base] initialized instance of Base class
|
14
|
-
def self.new config
|
15
|
-
DataModeler::Base.new config
|
16
|
-
end
|
17
|
-
end
|
@@ -1,68 +0,0 @@
|
|
1
|
-
require 'ruby-fann'
|
2
|
-
|
3
|
-
# Model class based on Fast Artificial Neural Networks (FANN)
|
4
|
-
class DataModeler::Model::FANN
|
5
|
-
|
6
|
-
attr_reader :fann_opts, :ngens, :fann, :algo, :actfn
|
7
|
-
|
8
|
-
# @param ngens [Integer] number of generations alloted for training
|
9
|
-
# @param hidden_layers [Array<Integer>] list of number of hidden neurons
|
10
|
-
# per each hidden layer in the network
|
11
|
-
# @param ninputs [Integer] number of inputs of the network
|
12
|
-
# @param noutputs [Integer] number of outputs of the network
|
13
|
-
# @param algo [:incremental, :batch, :rprop, :quickprop] training algorithm
|
14
|
-
# @param actfn [:sigmoid, ...] activation function
|
15
|
-
def initialize ngens:, hidden_layers:, ninputs:, noutputs:, algo: nil, actfn: nil
|
16
|
-
@fann_opts = {
|
17
|
-
num_inputs: ninputs,
|
18
|
-
hidden_neurons: hidden_layers,
|
19
|
-
num_outputs: noutputs
|
20
|
-
}
|
21
|
-
@ngens = ngens
|
22
|
-
@algo = algo
|
23
|
-
@actfn = actfn
|
24
|
-
reset
|
25
|
-
end
|
26
|
-
|
27
|
-
# Resets / initializes the model
|
28
|
-
# @return [void]
|
29
|
-
def reset
|
30
|
-
@fann = RubyFann::Standard.new fann_opts
|
31
|
-
fann.set_training_algorithm(algo) if algo
|
32
|
-
if actfn
|
33
|
-
fann.set_activation_function_hidden(actfn)
|
34
|
-
fann.set_activation_function_output(actfn)
|
35
|
-
end
|
36
|
-
nil
|
37
|
-
end
|
38
|
-
|
39
|
-
# Trains the model for ngens on the trainset
|
40
|
-
# @param trainset [Hash-like<input: Array, target: Array>] training set
|
41
|
-
# @param ngens [Integer] number of training generations
|
42
|
-
# @return [void]
|
43
|
-
def train trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
|
44
|
-
# TODO: optimize maybe?
|
45
|
-
inputs, targets = trainset.values
|
46
|
-
tset = RubyFann::TrainData.new inputs: inputs, desired_outputs: targets
|
47
|
-
# fann.init_weights tset # test this weights initialization
|
48
|
-
|
49
|
-
# params: train_data, max_epochs, report_interval, desired_error
|
50
|
-
fann.train_on_data(tset, ngens, report_interval, desired_error)
|
51
|
-
end
|
52
|
-
|
53
|
-
# Tests the model on inputs.
|
54
|
-
# @param inputs [Array<Array<inputs>>] sequence of inputs for the model
|
55
|
-
# @return [Array<Array<outputs>>] outputs corresponding to each input
|
56
|
-
def test inputs
|
57
|
-
inputs.collect { |i| fann.run i }
|
58
|
-
end
|
59
|
-
|
60
|
-
# Save the model
|
61
|
-
# @param filename [String/path] where to save the model
|
62
|
-
# @return [void]
|
63
|
-
def save filename
|
64
|
-
# can do filename check here...?
|
65
|
-
# TODO: I'd like to have a kind of `to_s`, and do all the saving in the modeler...
|
66
|
-
fann.save filename.to_s
|
67
|
-
end
|
68
|
-
end
|
data/lib/data_modeler/model.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
# All models for the framework should belong to this module.
|
3
|
-
# Also includes a model selector for initialization from config.
|
4
|
-
module DataModeler::Model
|
5
|
-
# Returns a new Model correctly initialized based on the `type` of choice
|
6
|
-
# @param type [Symbol] which type of Model is chosen
|
7
|
-
# @param opts [splatted Hash params] the rest of the parameters will be passed
|
8
|
-
# to the model for initialization
|
9
|
-
# @return [Model] a correctly initialized Model of type `type`
|
10
|
-
def self.from_conf type:, **opts
|
11
|
-
case type
|
12
|
-
when :fann
|
13
|
-
FANN.new opts
|
14
|
-
else abort "Unrecognized model: #{type}"
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|