data_modeler 0.3.0 → 0.3.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/data_modeler.gemspec +1 -1
- data/lib/data_modeler/base.rb +9 -4
- data/lib/data_modeler/dataset/dataset.rb +10 -13
- data/lib/data_modeler/dataset/dataset_gen.rb +31 -24
- data/lib/data_modeler/dataset/{dataset_helper.rb → helper.rb} +5 -5
- data/lib/data_modeler/models/fann.rb +113 -0
- data/lib/data_modeler/models/selector.rb +18 -0
- data/lib/data_modeler/support.rb +41 -0
- data/lib/data_modeler.rb +5 -8
- metadata +6 -8
- data/lib/data_modeler/exceptions.rb +0 -12
- data/lib/data_modeler/helpers.rb +0 -17
- data/lib/data_modeler/model/fann.rb +0 -68
- data/lib/data_modeler/model.rb +0 -17
- data/lib/data_modeler/version.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 173d569d4d705b32ca166d444766651f94b4a98d
|
4
|
+
data.tar.gz: ec61342d6188533751c874549c6f55d346d02bcb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 3cb220eeb8f7349321d35adb07efe919c9a23e9e0fe0459ca411b87632dd89ce21a7e1e04d350f1527e9e0f83b9924445b2c155c6af8fb9b71caace5b2600301
|
7
|
+
data.tar.gz: 74c46dea839cf5c1f99346ae197b7c87d06815c1d8c1e3c61238fac0801b6e20dd288efbaf7026f2a5991052c823a0179f48054473cbce4a628b5ff74f8b198d
|
data/data_modeler.gemspec
CHANGED
data/lib/data_modeler/base.rb
CHANGED
@@ -24,7 +24,7 @@ class DataModeler::Base
|
|
24
24
|
@out_dir = prepare_output config[:results]
|
25
25
|
|
26
26
|
@tset_gen = DataModeler::DatasetGen.new data, **opts_for(:datasetgen)
|
27
|
-
@model = DataModeler::
|
27
|
+
@model = DataModeler::Models.selector **opts_for(:learner)
|
28
28
|
end
|
29
29
|
|
30
30
|
# Main control: up to `nruns` (or until end of data) loop train-test-save
|
@@ -49,13 +49,18 @@ class DataModeler::Base
|
|
49
49
|
end
|
50
50
|
|
51
51
|
# Attribute reader for instance variable `@save_models`, ending in '?' since
|
52
|
-
#
|
52
|
+
# it's a boolean value.
|
53
53
|
# @return [true|false] value of instance variable @save_models
|
54
54
|
# (false if nil/uninitialized)
|
55
55
|
def save_models?
|
56
56
|
@save_models || false
|
57
57
|
end
|
58
58
|
|
59
|
+
# @return [String]
|
60
|
+
def to_s
|
61
|
+
config.to_s
|
62
|
+
end
|
63
|
+
|
59
64
|
private
|
60
65
|
|
61
66
|
# Loads the data in a Hash ready for `DatasetGen` (and `Dataset`)
|
@@ -97,13 +102,13 @@ class DataModeler::Base
|
|
97
102
|
when :dataset
|
98
103
|
{ inputs: inputs,
|
99
104
|
targets: targets,
|
100
|
-
|
105
|
+
ninput_points: config[:tset][:ninput_points],
|
101
106
|
tspread: config[:tset][:tspread],
|
102
107
|
look_ahead: config[:tset][:look_ahead]
|
103
108
|
}
|
104
109
|
when :learner
|
105
110
|
config[:learner].merge({
|
106
|
-
ninputs: (config[:tset][:
|
111
|
+
ninputs: (config[:tset][:ninput_points] * inputs.size),
|
107
112
|
noutputs: targets.size
|
108
113
|
})
|
109
114
|
else abort "Unrecognized `who`: '#{who}'"
|
@@ -1,15 +1,12 @@
|
|
1
1
|
|
2
2
|
# Build complex inputs and targets from the data to train the model.
|
3
|
-
#
|
4
|
-
# @note checks to validate if enough data is present (given `ntimes`, `tspread`
|
5
|
-
# and `look_ahead`) should be done on the caller (typically `DatasetGen`)
|
6
3
|
class DataModeler::Dataset
|
7
4
|
|
8
5
|
attr_reader :data, :input_series, :target_series, :first_idx, :end_idx,
|
9
|
-
:
|
6
|
+
:ninput_points, :tspread, :look_ahead, :first_idx, :target_idx,
|
10
7
|
:input_idxs, :nrows
|
11
8
|
|
12
|
-
# @param data [Hash
|
9
|
+
# @param data [Hash] the data, in an object that can be
|
13
10
|
# accessed by keys and return a time series per each key.
|
14
11
|
# It is required to include and be sorted by a series named `time`,
|
15
12
|
# and for all series to have equal length.
|
@@ -17,22 +14,22 @@ class DataModeler::Dataset
|
|
17
14
|
# @param targets [Array] data key accessors for target series
|
18
15
|
# @param first_idx [Integer] index where the dataset starts on data
|
19
16
|
# @param end_idx [Integer] index where the dataset ends on data
|
20
|
-
# @param
|
17
|
+
# @param ninput_points [Integer] number of lines/datapoints to be
|
21
18
|
# used to construct the input
|
22
|
-
# @param tspread [Numeric] distance (in `time`!) between the `
|
23
|
-
# lines/
|
19
|
+
# @param tspread [Numeric] distance (in `time`!) between the `ninput_points`
|
20
|
+
# lines/datapoints used to construct the input
|
24
21
|
# @param look_ahead [Numeric] distance (in `time`!) between the
|
25
22
|
# most recent line/time/datapoint used for the input and
|
26
23
|
# the target -- i.e., how far ahead the model is trained to predict
|
27
24
|
# @note we expect Datasets indices to be used with left inclusion but
|
28
25
|
# right exclusion, i.e. targets are considered in the range `[from,to)`
|
29
|
-
def initialize data, inputs:, targets:, first_idx:, end_idx:,
|
26
|
+
def initialize data, inputs:, targets:, first_idx:, end_idx:, ninput_points:, tspread:, look_ahead:
|
30
27
|
@data = data
|
31
28
|
@input_series = inputs
|
32
29
|
@target_series = targets
|
33
30
|
@first_idx = first_idx
|
34
31
|
@end_idx = end_idx
|
35
|
-
@
|
32
|
+
@ninput_points = ninput_points
|
36
33
|
@nrows = data[:time].size
|
37
34
|
@tspread = tspread
|
38
35
|
@look_ahead = look_ahead
|
@@ -92,9 +89,9 @@ class DataModeler::Dataset
|
|
92
89
|
to_a.transpose
|
93
90
|
end
|
94
91
|
|
95
|
-
#
|
92
|
+
# Equality operator -- most useful in testing
|
96
93
|
# @param other [Dataset] what needs comparing to
|
97
|
-
# @return [
|
94
|
+
# @return [true|false]
|
98
95
|
def == other
|
99
96
|
self.class == other.class && # terminate check here if wrong class
|
100
97
|
data.object_id == other.data.object_id && # both `data` point to same object
|
@@ -120,7 +117,7 @@ class DataModeler::Dataset
|
|
120
117
|
def init_inputs
|
121
118
|
if target_idx < end_idx
|
122
119
|
# build list of incremental time buffers
|
123
|
-
bufs =
|
120
|
+
bufs = ninput_points.times.collect { |n| look_ahead + n * tspread }
|
124
121
|
# reverse it and subtract from the target's time
|
125
122
|
times = bufs.reverse.collect { |s| time(target_idx) - s }
|
126
123
|
# now you have the list of times at which each pointer should point
|
@@ -1,27 +1,32 @@
|
|
1
1
|
|
2
2
|
# Build train and test datasets for each run of the training.
|
3
3
|
#
|
4
|
-
#
|
5
|
-
#
|
6
|
-
#
|
7
|
-
#
|
8
|
-
#
|
9
|
-
#
|
4
|
+
# Train and test sets are seen as moving windows on the data.
|
5
|
+
# Alignment is designed to provide continuous testing results over (most of) the data.
|
6
|
+
# The following diagram exemplifies this: the training sets `t1`, `t2` and `t3` are
|
7
|
+
# aligned such that their results can be plotted countinuously against the obserevations.
|
8
|
+
# (b) is the amount of data covering for the input+look_ahead window uset for the first
|
9
|
+
# target.
|
10
|
+
# data: ----------------------> (time, datapoints)
|
11
|
+
# run1: (b)|train1|t1| -> train starts after (b), test after training
|
12
|
+
# run2: |train2|t2| -> train starts after (b) + 1 tset
|
13
|
+
# run3: |train3|t3| -> train starts after (b) + 2 tset
|
10
14
|
# Note how the test sets line up. This allows the testing results plots
|
11
|
-
# to be continuous, no model is tested on data on which
|
12
|
-
#
|
15
|
+
# to be continuous, while no model is tested on data on which _itself_ has been trained.
|
16
|
+
# All data is used multiple times, alternately both as train and test sets.
|
13
17
|
class DataModeler::DatasetGen
|
14
18
|
|
15
19
|
attr_reader :data, :ds_args, :first_idx, :train_size, :test_size, :nrows
|
16
20
|
|
17
|
-
# @param data [Hash
|
21
|
+
# @param data [Hash] the data, in an object that can be
|
18
22
|
# accessed by keys and return a time series per each key.
|
19
|
-
# It is required to include and be sorted by a series named
|
23
|
+
# It is required to include (and be sorted by) a series named `:time`,
|
20
24
|
# and for all series to have equal length.
|
21
|
-
# @param ds_args [Hash] parameters for
|
22
|
-
# first_idx, end_idx,
|
23
|
-
#
|
24
|
-
# @
|
25
|
+
# @param ds_args [Hash] parameters hash for `Dataset`s initialization.
|
26
|
+
# Keys: `%i[inputs, targets, first_idx, end_idx, ninput_points]`.
|
27
|
+
# See `Dataset#initialize` for details.
|
28
|
+
# @param train_size [Integer] how many points to expose as targets in each training set
|
29
|
+
# @param test_size [Integer] how many points to expose as targets in each test set
|
25
30
|
def initialize data, ds_args:, train_size:, test_size:, min_nruns: 1
|
26
31
|
@data = data
|
27
32
|
@ds_args = ds_args
|
@@ -36,22 +41,24 @@ class DataModeler::DatasetGen
|
|
36
41
|
|
37
42
|
### DATA ACCESS
|
38
43
|
|
39
|
-
# Builds training
|
40
|
-
# @param nrun [Integer] will build different
|
44
|
+
# Builds training sets for model training
|
45
|
+
# @param nrun [Integer] will build different trainset for each run
|
41
46
|
# @return [Dataset]
|
42
47
|
# @raise [NoDataLeft] when there's not enough data left for a full train+test
|
48
|
+
# @note train or test have no meaning alone, and train always comes first.
|
49
|
+
# Hence, `#train` checks if enough `data` is available for both `train`+`test`.
|
43
50
|
def train nrun
|
44
51
|
first = min_eligible_trg + (nrun-1) * test_size
|
45
52
|
last = first + train_size
|
46
|
-
# make sure there's enough data
|
47
|
-
raise NoDataLeft unless last + test_size < nrows
|
53
|
+
raise NoDataLeft unless last + test_size < nrows # make sure there's enough data
|
48
54
|
DataModeler::Dataset.new data, ds_args.merge(first_idx: first, end_idx: last)
|
49
55
|
end
|
50
56
|
|
51
|
-
# Builds test
|
52
|
-
# @param nrun [Integer] will build different
|
57
|
+
# Builds test sets for model testing
|
58
|
+
# @param nrun [Integer] will build different testset for each run
|
53
59
|
# @return [Dataset]
|
54
|
-
# @note
|
60
|
+
# @note train or test have no meaning alone, and train always comes first.
|
61
|
+
# Hence, `#train` checks if enough `data` is available for both `train`+`test`.
|
55
62
|
def test nrun
|
56
63
|
first = min_eligible_trg + (nrun-1) * test_size + train_size
|
57
64
|
last = first + test_size
|
@@ -62,13 +69,13 @@ class DataModeler::DatasetGen
|
|
62
69
|
|
63
70
|
# TODO: @local_nrun is an ugly name, refactor it!
|
64
71
|
|
65
|
-
# Returns the next pair [trainset, testset]
|
72
|
+
# Returns the next pair `[trainset, testset]`
|
66
73
|
# @return [Array<Dataset, Dataset>]
|
67
74
|
def peek
|
68
75
|
[self.train(@local_nrun), self.test(@local_nrun)]
|
69
76
|
end
|
70
77
|
|
71
|
-
# Returns the next pair [trainset, testset] and increments the counter
|
78
|
+
# Returns the next pair `[trainset, testset]` and increments the counter
|
72
79
|
# @return [Array<Dataset, Dataset>]
|
73
80
|
def next
|
74
81
|
peek.tap { @local_nrun += 1 }
|
@@ -106,7 +113,7 @@ class DataModeler::DatasetGen
|
|
106
113
|
def min_eligible_trg
|
107
114
|
@min_eligible_trg ||= idx( time(0) +
|
108
115
|
# minimum time span required as input for the first target
|
109
|
-
ds_args[:look_ahead] + (ds_args[:
|
116
|
+
ds_args[:look_ahead] + (ds_args[:ninput_points]-1) * ds_args[:tspread]
|
110
117
|
)
|
111
118
|
end
|
112
119
|
|
@@ -3,7 +3,7 @@ class DataModeler::Dataset
|
|
3
3
|
module ConvertingTimeAndIndices
|
4
4
|
# Returns the time for a given index
|
5
5
|
# @param [Integer] idx row index
|
6
|
-
# @return [
|
6
|
+
# @return [type of `data[:time]`]
|
7
7
|
def time idx
|
8
8
|
data[:time][idx]
|
9
9
|
end
|
@@ -25,10 +25,10 @@ class DataModeler::Dataset
|
|
25
25
|
end
|
26
26
|
end
|
27
27
|
|
28
|
-
# Provides each (which can return an `Iterator`) and
|
28
|
+
# Provides `#each` (which can return an `Iterator`) and `#to_a` based on `#next`
|
29
29
|
module IteratingBasedOnNext
|
30
30
|
# Yields on each [inputs, targets] pair.
|
31
|
-
# @return [nil
|
31
|
+
# @return [nil|Iterator] `block_given? ? nil : Iterator`
|
32
32
|
def each
|
33
33
|
reset_iteration
|
34
34
|
return enum_for(:each) unless block_given?
|
@@ -36,8 +36,8 @@ class DataModeler::Dataset
|
|
36
36
|
nil
|
37
37
|
end
|
38
38
|
|
39
|
-
# Yields on each [inputs, targets] pair, collecting the input.
|
40
|
-
# @return [Array
|
39
|
+
# Yields on each `[inputs, targets]` pair, collecting the input.
|
40
|
+
# @return [Array|Iterator] `block_given? ? nil : Iterator`
|
41
41
|
def map
|
42
42
|
reset_iteration
|
43
43
|
return enum_for(:collect) unless block_given?
|
@@ -0,0 +1,113 @@
|
|
1
|
+
require 'ruby-fann'
|
2
|
+
|
3
|
+
# Model the data using an artificial neural network, based on the
|
4
|
+
# Fast Artificial Neural Networks (FANN) implementation
|
5
|
+
class DataModeler::Models::FANN
|
6
|
+
|
7
|
+
attr_reader :fann_opts, :ngens, :fann, :algo, :actfn, :init_weights_range
|
8
|
+
|
9
|
+
# @param ngens [Integer] number of generations (repetitions) alloted for training
|
10
|
+
# @param hidden_layers [Array<Integer>] list of number of hidden neurons
|
11
|
+
# per each hidden layer in the network
|
12
|
+
# @param ninputs [Integer] number of inputs in the network
|
13
|
+
# @param noutputs [Integer] number of outputs in the network
|
14
|
+
# @param algo [:rprop, :rwg, ...] training algorithm
|
15
|
+
# @param actfn [:sigmoid, ...] activation function
|
16
|
+
# @param init_weights_range [Array<min_w, max_w>] minimum and maximum value for weight initialization range
|
17
|
+
def initialize ngens:, hidden_layers:, ninputs:, noutputs:, algo: nil, actfn: nil, init_weights_range: nil
|
18
|
+
@fann_opts = {
|
19
|
+
num_inputs: ninputs,
|
20
|
+
hidden_neurons: hidden_layers,
|
21
|
+
num_outputs: noutputs
|
22
|
+
}
|
23
|
+
@ngens = ngens
|
24
|
+
@algo = algo
|
25
|
+
@actfn = actfn
|
26
|
+
@init_weights_range = init_weights_range
|
27
|
+
reset
|
28
|
+
end
|
29
|
+
|
30
|
+
# Resets / initializes the model
|
31
|
+
# @return [void]
|
32
|
+
def reset
|
33
|
+
@fann = RubyFann::Standard.new fann_opts
|
34
|
+
if algo && algo != :rwg
|
35
|
+
fann.set_training_algorithm(algo)
|
36
|
+
end
|
37
|
+
if actfn
|
38
|
+
fann.set_activation_function_hidden(actfn)
|
39
|
+
fann.set_activation_function_output(actfn)
|
40
|
+
end
|
41
|
+
if init_weights_range
|
42
|
+
fann.randomize_weights(*init_weights_range.map(&method(:Float)))
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Trains the model for ngens on the trainset
|
47
|
+
# @param trainset [Hash<input: Array, target: Array>] training set
|
48
|
+
# @param ngens [Integer] number of training generations
|
49
|
+
# @return [void]
|
50
|
+
def train trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
|
51
|
+
# special case: not implemented in FANN
|
52
|
+
if algo == :rwg
|
53
|
+
return train_rwg(trainset, ngens,
|
54
|
+
report_interval: report_interval, desired_error: desired_error)
|
55
|
+
end
|
56
|
+
# TODO: optimize maybe?
|
57
|
+
inputs, targets = trainset.values
|
58
|
+
tset = RubyFann::TrainData.new inputs: inputs, desired_outputs: targets
|
59
|
+
# fann.init_weights tset # test this weights initialization
|
60
|
+
|
61
|
+
# params: train_data, max_epochs, report_interval, desired_error
|
62
|
+
fann.train_on_data(tset, ngens, report_interval, desired_error)
|
63
|
+
end
|
64
|
+
|
65
|
+
# Trains the model for ngens on the trainset using Random Weight Guessing
|
66
|
+
# @param trainset [Hash-like<input: Array, target: Array>] training set
|
67
|
+
# @param ngens [Integer] number of training generations
|
68
|
+
# @return [void]
|
69
|
+
def train_rwg trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
|
70
|
+
# TODO: use report_interval and desired_error
|
71
|
+
# initialize weight with random values in an interval [min_weight, max_weight]
|
72
|
+
# NOTE: if the RWG training is unsuccessful, this range is the first place to
|
73
|
+
# check to improve performance
|
74
|
+
fann.randomize_weights(*init_weights_range.map(&method(:Float)))
|
75
|
+
# test it on inputs
|
76
|
+
inputs, targets = trainset.values
|
77
|
+
outputs = test(inputs)
|
78
|
+
# calculate RMSE
|
79
|
+
rmse_fn = -> (outs) do
|
80
|
+
sq_err = outs.zip(targets).flat_map do |os,ts|
|
81
|
+
os.zip(ts).collect { |o,t| (t-o)**2 }
|
82
|
+
end
|
83
|
+
Math.sqrt(sq_err.reduce(:+) / sq_err.size)
|
84
|
+
end
|
85
|
+
rmse = rmse_fn.call(outputs)
|
86
|
+
# initialize best
|
87
|
+
best = [fann,rmse]
|
88
|
+
# rinse and repeat
|
89
|
+
ngens.times do
|
90
|
+
outputs = test(inputs)
|
91
|
+
rmse = rmse_fn.call(outputs)
|
92
|
+
(best = [fann,rmse]; puts rmse) if rmse < best.last
|
93
|
+
end
|
94
|
+
# expose the best to the interface
|
95
|
+
fann = best.first
|
96
|
+
end
|
97
|
+
|
98
|
+
# Tests the model on inputs.
|
99
|
+
# @param inputs [Array<Array<inputs>>] sequence of inputs for the model
|
100
|
+
# @return [Array<Array<outputs>>] outputs corresponding to each input
|
101
|
+
def test inputs
|
102
|
+
inputs.collect { |i| fann.run i }
|
103
|
+
end
|
104
|
+
|
105
|
+
# Saves the model
|
106
|
+
# @param filename [String/path] where to save the model
|
107
|
+
# @return [void]
|
108
|
+
def save filename
|
109
|
+
# can do filename check here...?
|
110
|
+
# TODO: I'd like to have a kind of `to_s`, and do all the saving in the modeler...
|
111
|
+
fann.save filename.to_s
|
112
|
+
end
|
113
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
|
2
|
+
# All models for the framework should belong to this module.
|
3
|
+
# Also includes a model selector for initialization from config.
|
4
|
+
module DataModeler::Models
|
5
|
+
# Returns a new `Model` based on the `type` of choice initialized
|
6
|
+
# with `opts` parameters
|
7
|
+
# @param type [Symbol] selects the type of `Model`
|
8
|
+
# @param opts [**Hash] the rest of the parameters will be passed
|
9
|
+
# to the model for its initialization
|
10
|
+
# @return [Model] an initialized `Model` of type `type`
|
11
|
+
def self.selector type:, **opts
|
12
|
+
case type
|
13
|
+
when :fann
|
14
|
+
FANN.new opts
|
15
|
+
else abort "Unrecognized model: #{type}"
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
|
2
|
+
# Main gem module
|
3
|
+
module DataModeler
|
4
|
+
|
5
|
+
### VERSION
|
6
|
+
|
7
|
+
# Version number
|
8
|
+
VERSION = "0.3.3"
|
9
|
+
|
10
|
+
### HELPER FUNCTIONS
|
11
|
+
|
12
|
+
# Returns a standardized String ID from a (sequentially named) file
|
13
|
+
# @return [String]
|
14
|
+
# @note convenient method to have available in the config
|
15
|
+
def self.id_from_filename filename=__FILE__
|
16
|
+
format "%02d", Integer(filename[/_(\d+).rb$/,1])
|
17
|
+
end
|
18
|
+
|
19
|
+
# Returns an instance of the Base class
|
20
|
+
# @param config [Hash] Base class configuration
|
21
|
+
# @return [Base] initialized instance of Base class
|
22
|
+
def self.new config
|
23
|
+
DataModeler::Base.new config
|
24
|
+
end
|
25
|
+
|
26
|
+
### EXCEPTIONS
|
27
|
+
|
28
|
+
class DataModeler::Dataset
|
29
|
+
# Exception: the requested `time` is not present in the data
|
30
|
+
class TimeNotFoundError < StandardError; end
|
31
|
+
end
|
32
|
+
|
33
|
+
class DataModeler::DatasetGen
|
34
|
+
# Exception: not enough `data` was provided for even a single train+test setup
|
35
|
+
class NotEnoughDataError < StandardError; end
|
36
|
+
|
37
|
+
# Exception: not enough `data` left to build another train+test
|
38
|
+
# @note subclassed from `StopIteration` -> it will break loops
|
39
|
+
class NoDataLeft < StopIteration; end
|
40
|
+
end
|
41
|
+
end
|
data/lib/data_modeler.rb
CHANGED
@@ -1,16 +1,13 @@
|
|
1
|
-
|
2
|
-
require "data_modeler/version"
|
3
|
-
require "data_modeler/exceptions"
|
4
|
-
require "data_modeler/helpers"
|
1
|
+
require "data_modeler/support"
|
5
2
|
|
6
3
|
# Dataset
|
7
|
-
require "data_modeler/dataset/
|
4
|
+
require "data_modeler/dataset/helper"
|
8
5
|
require "data_modeler/dataset/dataset"
|
9
6
|
require "data_modeler/dataset/dataset_gen"
|
10
7
|
|
11
8
|
# Models
|
12
|
-
require "data_modeler/
|
13
|
-
require "data_modeler/
|
9
|
+
require "data_modeler/models/selector"
|
10
|
+
require "data_modeler/models/fann"
|
14
11
|
|
15
|
-
#
|
12
|
+
# Framework core
|
16
13
|
require "data_modeler/base"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: data_modeler
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.3.
|
4
|
+
version: 0.3.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Giuseppe Cuccu
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2017-05-
|
11
|
+
date: 2017-05-18 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: ruby-fann
|
@@ -159,12 +159,10 @@ files:
|
|
159
159
|
- lib/data_modeler/base.rb
|
160
160
|
- lib/data_modeler/dataset/dataset.rb
|
161
161
|
- lib/data_modeler/dataset/dataset_gen.rb
|
162
|
-
- lib/data_modeler/dataset/
|
163
|
-
- lib/data_modeler/
|
164
|
-
- lib/data_modeler/
|
165
|
-
- lib/data_modeler/
|
166
|
-
- lib/data_modeler/model/fann.rb
|
167
|
-
- lib/data_modeler/version.rb
|
162
|
+
- lib/data_modeler/dataset/helper.rb
|
163
|
+
- lib/data_modeler/models/fann.rb
|
164
|
+
- lib/data_modeler/models/selector.rb
|
165
|
+
- lib/data_modeler/support.rb
|
168
166
|
homepage: https://github.com/giuse/data_modeler
|
169
167
|
licenses:
|
170
168
|
- MIT
|
@@ -1,12 +0,0 @@
|
|
1
|
-
class DataModeler::Dataset
|
2
|
-
# Exception: the requested `time` is not present in the data
|
3
|
-
class TimeNotFoundError < StandardError; end
|
4
|
-
end
|
5
|
-
|
6
|
-
class DataModeler::DatasetGen
|
7
|
-
# Exception: the `data` is not sufficient for the training setup
|
8
|
-
class NotEnoughDataError < StandardError; end
|
9
|
-
# Exception: not enough `data` left to build another train+test
|
10
|
-
# @note being subclassed from `StopIteration`, it will break loops
|
11
|
-
class NoDataLeft < StopIteration; end
|
12
|
-
end
|
data/lib/data_modeler/helpers.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
# Helper functions go here
|
3
|
-
module DataModeler
|
4
|
-
# Returns a standardized String ID from a (sequentially named) file
|
5
|
-
# @return [String]
|
6
|
-
# @note convenient method to have available in the config
|
7
|
-
def self.id_from_filename filename=__FILE__
|
8
|
-
format "%02d", Integer(filename[/_(\d+).rb$/,1])
|
9
|
-
end
|
10
|
-
|
11
|
-
# Returns an instance of the Base class
|
12
|
-
# @param config [Hash] Base class configuration
|
13
|
-
# @return [Base] initialized instance of Base class
|
14
|
-
def self.new config
|
15
|
-
DataModeler::Base.new config
|
16
|
-
end
|
17
|
-
end
|
@@ -1,68 +0,0 @@
|
|
1
|
-
require 'ruby-fann'
|
2
|
-
|
3
|
-
# Model class based on Fast Artificial Neural Networks (FANN)
|
4
|
-
class DataModeler::Model::FANN
|
5
|
-
|
6
|
-
attr_reader :fann_opts, :ngens, :fann, :algo, :actfn
|
7
|
-
|
8
|
-
# @param ngens [Integer] number of generations alloted for training
|
9
|
-
# @param hidden_layers [Array<Integer>] list of number of hidden neurons
|
10
|
-
# per each hidden layer in the network
|
11
|
-
# @param ninputs [Integer] number of inputs of the network
|
12
|
-
# @param noutputs [Integer] number of outputs of the network
|
13
|
-
# @param algo [:incremental, :batch, :rprop, :quickprop] training algorithm
|
14
|
-
# @param actfn [:sigmoid, ...] activation function
|
15
|
-
def initialize ngens:, hidden_layers:, ninputs:, noutputs:, algo: nil, actfn: nil
|
16
|
-
@fann_opts = {
|
17
|
-
num_inputs: ninputs,
|
18
|
-
hidden_neurons: hidden_layers,
|
19
|
-
num_outputs: noutputs
|
20
|
-
}
|
21
|
-
@ngens = ngens
|
22
|
-
@algo = algo
|
23
|
-
@actfn = actfn
|
24
|
-
reset
|
25
|
-
end
|
26
|
-
|
27
|
-
# Resets / initializes the model
|
28
|
-
# @return [void]
|
29
|
-
def reset
|
30
|
-
@fann = RubyFann::Standard.new fann_opts
|
31
|
-
fann.set_training_algorithm(algo) if algo
|
32
|
-
if actfn
|
33
|
-
fann.set_activation_function_hidden(actfn)
|
34
|
-
fann.set_activation_function_output(actfn)
|
35
|
-
end
|
36
|
-
nil
|
37
|
-
end
|
38
|
-
|
39
|
-
# Trains the model for ngens on the trainset
|
40
|
-
# @param trainset [Hash-like<input: Array, target: Array>] training set
|
41
|
-
# @param ngens [Integer] number of training generations
|
42
|
-
# @return [void]
|
43
|
-
def train trainset, ngens=@ngens, report_interval: 1000, desired_error: 1e-10
|
44
|
-
# TODO: optimize maybe?
|
45
|
-
inputs, targets = trainset.values
|
46
|
-
tset = RubyFann::TrainData.new inputs: inputs, desired_outputs: targets
|
47
|
-
# fann.init_weights tset # test this weights initialization
|
48
|
-
|
49
|
-
# params: train_data, max_epochs, report_interval, desired_error
|
50
|
-
fann.train_on_data(tset, ngens, report_interval, desired_error)
|
51
|
-
end
|
52
|
-
|
53
|
-
# Tests the model on inputs.
|
54
|
-
# @param inputs [Array<Array<inputs>>] sequence of inputs for the model
|
55
|
-
# @return [Array<Array<outputs>>] outputs corresponding to each input
|
56
|
-
def test inputs
|
57
|
-
inputs.collect { |i| fann.run i }
|
58
|
-
end
|
59
|
-
|
60
|
-
# Save the model
|
61
|
-
# @param filename [String/path] where to save the model
|
62
|
-
# @return [void]
|
63
|
-
def save filename
|
64
|
-
# can do filename check here...?
|
65
|
-
# TODO: I'd like to have a kind of `to_s`, and do all the saving in the modeler...
|
66
|
-
fann.save filename.to_s
|
67
|
-
end
|
68
|
-
end
|
data/lib/data_modeler/model.rb
DELETED
@@ -1,17 +0,0 @@
|
|
1
|
-
|
2
|
-
# All models for the framework should belong to this module.
|
3
|
-
# Also includes a model selector for initialization from config.
|
4
|
-
module DataModeler::Model
|
5
|
-
# Returns a new Model correctly initialized based on the `type` of choice
|
6
|
-
# @param type [Symbol] which type of Model is chosen
|
7
|
-
# @param opts [splatted Hash params] the rest of the parameters will be passed
|
8
|
-
# to the model for initialization
|
9
|
-
# @return [Model] a correctly initialized Model of type `type`
|
10
|
-
def self.from_conf type:, **opts
|
11
|
-
case type
|
12
|
-
when :fann
|
13
|
-
FANN.new opts
|
14
|
-
else abort "Unrecognized model: #{type}"
|
15
|
-
end
|
16
|
-
end
|
17
|
-
end
|