CooCoo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +16 -0
- data/CooCoo.gemspec +47 -0
- data/Gemfile +4 -0
- data/Gemfile.lock +88 -0
- data/README.md +123 -0
- data/Rakefile +81 -0
- data/bin/cuda-dev-info +25 -0
- data/bin/cuda-free +28 -0
- data/bin/cuda-free-trend +7 -0
- data/bin/ffi-gen +267 -0
- data/bin/spec_runner_html.sh +42 -0
- data/bin/trainer +198 -0
- data/bin/trend-cost +13 -0
- data/examples/char-rnn.rb +405 -0
- data/examples/cifar/cifar.rb +94 -0
- data/examples/img-similarity.rb +201 -0
- data/examples/math_ops.rb +57 -0
- data/examples/mnist.rb +365 -0
- data/examples/mnist_classifier.rb +293 -0
- data/examples/mnist_dream.rb +214 -0
- data/examples/seeds.rb +268 -0
- data/examples/seeds_dataset.txt +210 -0
- data/examples/t10k-images-idx3-ubyte +0 -0
- data/examples/t10k-labels-idx1-ubyte +0 -0
- data/examples/train-images-idx3-ubyte +0 -0
- data/examples/train-labels-idx1-ubyte +0 -0
- data/ext/buffer/Rakefile +50 -0
- data/ext/buffer/buffer.pre.cu +727 -0
- data/ext/buffer/matrix.pre.cu +49 -0
- data/lib/CooCoo.rb +1 -0
- data/lib/coo-coo.rb +18 -0
- data/lib/coo-coo/activation_functions.rb +344 -0
- data/lib/coo-coo/consts.rb +5 -0
- data/lib/coo-coo/convolution.rb +298 -0
- data/lib/coo-coo/core_ext.rb +75 -0
- data/lib/coo-coo/cost_functions.rb +91 -0
- data/lib/coo-coo/cuda.rb +116 -0
- data/lib/coo-coo/cuda/device_buffer.rb +240 -0
- data/lib/coo-coo/cuda/device_buffer/ffi.rb +109 -0
- data/lib/coo-coo/cuda/error.rb +51 -0
- data/lib/coo-coo/cuda/host_buffer.rb +117 -0
- data/lib/coo-coo/cuda/runtime.rb +157 -0
- data/lib/coo-coo/cuda/vector.rb +315 -0
- data/lib/coo-coo/data_sources.rb +2 -0
- data/lib/coo-coo/data_sources/xournal.rb +25 -0
- data/lib/coo-coo/data_sources/xournal/bitmap_stream.rb +197 -0
- data/lib/coo-coo/data_sources/xournal/document.rb +377 -0
- data/lib/coo-coo/data_sources/xournal/loader.rb +144 -0
- data/lib/coo-coo/data_sources/xournal/renderer.rb +101 -0
- data/lib/coo-coo/data_sources/xournal/saver.rb +99 -0
- data/lib/coo-coo/data_sources/xournal/training_document.rb +78 -0
- data/lib/coo-coo/data_sources/xournal/training_document/constants.rb +15 -0
- data/lib/coo-coo/data_sources/xournal/training_document/document_maker.rb +89 -0
- data/lib/coo-coo/data_sources/xournal/training_document/document_reader.rb +105 -0
- data/lib/coo-coo/data_sources/xournal/training_document/example.rb +37 -0
- data/lib/coo-coo/data_sources/xournal/training_document/sets.rb +76 -0
- data/lib/coo-coo/debug.rb +8 -0
- data/lib/coo-coo/dot.rb +129 -0
- data/lib/coo-coo/drawing.rb +4 -0
- data/lib/coo-coo/drawing/cairo_canvas.rb +100 -0
- data/lib/coo-coo/drawing/canvas.rb +68 -0
- data/lib/coo-coo/drawing/chunky_canvas.rb +101 -0
- data/lib/coo-coo/drawing/sixel.rb +214 -0
- data/lib/coo-coo/enum.rb +17 -0
- data/lib/coo-coo/from_name.rb +58 -0
- data/lib/coo-coo/fully_connected_layer.rb +205 -0
- data/lib/coo-coo/generation_script.rb +38 -0
- data/lib/coo-coo/grapher.rb +140 -0
- data/lib/coo-coo/image.rb +286 -0
- data/lib/coo-coo/layer.rb +67 -0
- data/lib/coo-coo/layer_factory.rb +26 -0
- data/lib/coo-coo/linear_layer.rb +59 -0
- data/lib/coo-coo/math.rb +607 -0
- data/lib/coo-coo/math/abstract_vector.rb +121 -0
- data/lib/coo-coo/math/functions.rb +39 -0
- data/lib/coo-coo/math/interpolation.rb +7 -0
- data/lib/coo-coo/network.rb +264 -0
- data/lib/coo-coo/neuron.rb +112 -0
- data/lib/coo-coo/neuron_layer.rb +168 -0
- data/lib/coo-coo/option_parser.rb +18 -0
- data/lib/coo-coo/platform.rb +17 -0
- data/lib/coo-coo/progress_bar.rb +11 -0
- data/lib/coo-coo/recurrence/backend.rb +99 -0
- data/lib/coo-coo/recurrence/frontend.rb +101 -0
- data/lib/coo-coo/sequence.rb +187 -0
- data/lib/coo-coo/shell.rb +2 -0
- data/lib/coo-coo/temporal_network.rb +291 -0
- data/lib/coo-coo/trainer.rb +21 -0
- data/lib/coo-coo/trainer/base.rb +67 -0
- data/lib/coo-coo/trainer/batch.rb +82 -0
- data/lib/coo-coo/trainer/batch_stats.rb +27 -0
- data/lib/coo-coo/trainer/momentum_stochastic.rb +59 -0
- data/lib/coo-coo/trainer/stochastic.rb +47 -0
- data/lib/coo-coo/transformer.rb +272 -0
- data/lib/coo-coo/vector_layer.rb +194 -0
- data/lib/coo-coo/version.rb +3 -0
- data/lib/coo-coo/weight_deltas.rb +23 -0
- data/prototypes/convolution.rb +116 -0
- data/prototypes/linear_drop.rb +51 -0
- data/prototypes/recurrent_layers.rb +79 -0
- data/www/images/screamer.png +0 -0
- data/www/images/screamer.xcf +0 -0
- data/www/index.html +82 -0
- metadata +373 -0
|
@@ -0,0 +1,49 @@
|
|
|
1
|
+
#include <stdio.h>
|
|
2
|
+
#include <math.h>
|
|
3
|
+
|
|
4
|
+
#include "public.h"
|
|
5
|
+
#include "buffer.h"
|
|
6
|
+
#include "matrix.h"
|
|
7
|
+
|
|
8
|
+
#ifdef IN_PUBLIC
|
|
9
|
+
typedef struct Matrix2D_s
|
|
10
|
+
{
|
|
11
|
+
size_t width, height;
|
|
12
|
+
Buffer buffer;
|
|
13
|
+
double **rows;
|
|
14
|
+
} *Matrix2D;
|
|
15
|
+
#endif
|
|
16
|
+
|
|
17
|
+
PUBLIC Matrix2D matrix_new(Buffer b, size_t w, size_t h)
|
|
18
|
+
{
|
|
19
|
+
if(w * h >= b->length) {
|
|
20
|
+
return NULL;
|
|
21
|
+
}
|
|
22
|
+
|
|
23
|
+
Matrix2D m = (Matrix2D)malloc(sizeof(Matrix2D_s));
|
|
24
|
+
m->width = w;
|
|
25
|
+
m->height = h;
|
|
26
|
+
m->buffer = b;
|
|
27
|
+
m->rows = (double **)malloc(sizeof(double) * h);
|
|
28
|
+
|
|
29
|
+
for(size_t i = 0; i < h; i++) {
|
|
30
|
+
m->rows[i] = b->data + (i * w);
|
|
31
|
+
}
|
|
32
|
+
|
|
33
|
+
return m;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
PUBLIC void matrix_free(Matrix2D m)
|
|
37
|
+
{
|
|
38
|
+
for(size_t i = 0; i < m->height; i++) {
|
|
39
|
+
free(m->rows[i]);
|
|
40
|
+
m->rows[i] = NULL;
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
free(m);
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
PUBLIC Buffer matrix_buffer(Matrix2D m)
|
|
47
|
+
{
|
|
48
|
+
return m->buffer;
|
|
49
|
+
}
|
data/lib/CooCoo.rb
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
require 'coo-coo'
|
data/lib/coo-coo.rb
ADDED
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
require 'coo-coo/platform'
|
|
2
|
+
require 'coo-coo/consts'
|
|
3
|
+
require 'coo-coo/math'
|
|
4
|
+
require 'coo-coo/neuron'
|
|
5
|
+
require 'coo-coo/layer'
|
|
6
|
+
require 'coo-coo/network'
|
|
7
|
+
require 'coo-coo/sequence'
|
|
8
|
+
require 'coo-coo/temporal_network'
|
|
9
|
+
require 'coo-coo/convolution'
|
|
10
|
+
require 'coo-coo/recurrence/frontend'
|
|
11
|
+
require 'coo-coo/trainer'
|
|
12
|
+
require 'coo-coo/progress_bar'
|
|
13
|
+
require 'coo-coo/activation_functions'
|
|
14
|
+
require 'coo-coo/cost_functions'
|
|
15
|
+
require 'coo-coo/option_parser'
|
|
16
|
+
require 'coo-coo/generation_script'
|
|
17
|
+
require 'coo-coo/drawing'
|
|
18
|
+
require 'coo-coo/data_sources'
|
|
@@ -0,0 +1,344 @@
|
|
|
1
|
+
require 'singleton'
|
|
2
|
+
require 'coo-coo/from_name'
|
|
3
|
+
|
|
4
|
+
module CooCoo
|
|
5
|
+
# Activation functions are functions of a single variable used by some
|
|
6
|
+
# {Layer}s to introduce non-linearities into or to alter data from a
|
|
7
|
+
# previous layer.
|
|
8
|
+
#
|
|
9
|
+
# To get an activation function instance use the included {#from_name}.
|
|
10
|
+
# From there you can call the methods found on the {Identity} activation
|
|
11
|
+
# function on any activation function.
|
|
12
|
+
#
|
|
13
|
+
# To create a new activation function that can be used in stored networks,
|
|
14
|
+
# you must subclass {Identity} and call {ActivationFunctions.register}.
|
|
15
|
+
module ActivationFunctions
|
|
16
|
+
class << self
|
|
17
|
+
include FromName
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# The base for all the ActivationFunctions. Implements a do nothing
|
|
21
|
+
# activation function for a {Layer}.
|
|
22
|
+
class Identity
|
|
23
|
+
include Singleton
|
|
24
|
+
ActivationFunctions.register(self)
|
|
25
|
+
|
|
26
|
+
# Forwards missing class methods to the #instance.
|
|
27
|
+
def self.method_missing(mid, *args, &block)
|
|
28
|
+
instance.send(mid, *args, &block)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# A file friendly name for the activation function.
|
|
32
|
+
def name
|
|
33
|
+
self.class.name.split("::").last
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
def to_s
|
|
37
|
+
name
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Perform the activation.
|
|
41
|
+
# @param x [Numeric, Vector]
|
|
42
|
+
# @return [Numeric, Vector]
|
|
43
|
+
def call(x)
|
|
44
|
+
x
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Calculate the derivative at +x+.
|
|
48
|
+
# @param x [Numeric, Vector]
|
|
49
|
+
# @param y [Numeric, Vector, nil] Optional precomputed return value from #call.
|
|
50
|
+
def derivative(x, y = nil)
|
|
51
|
+
if (y || x).kind_of?(Numeric)
|
|
52
|
+
1.0
|
|
53
|
+
else
|
|
54
|
+
(y || x).class.ones((y || x).size)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Initial weights a {Layer} should use when using this function.
|
|
59
|
+
# @param num_inputs [Integer] Number of inputs into the {Layer}
|
|
60
|
+
# @param size [Integer] The size or number of outputs of the {Layer}.
|
|
61
|
+
# @return [Vector] of weights that are randomly distributed
|
|
62
|
+
# between -1.0 and 1.0.
|
|
63
|
+
def initial_weights(num_inputs, size)
|
|
64
|
+
(CooCoo::Vector.rand(num_inputs * size) * 2.0 - 1.0) / num_inputs.to_f.sqrt
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Initial bias for a {Layer}.
|
|
68
|
+
# @param size [Integer] Number of bias elements to return.
|
|
69
|
+
# @return [Vector]
|
|
70
|
+
def initial_bias(size)
|
|
71
|
+
CooCoo::Vector.ones(size)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# Adjusts a {Network}'s inputs to the domain of the function.
|
|
75
|
+
# @param x [Vector]
|
|
76
|
+
# @return [Vector]
|
|
77
|
+
def prep_input(x)
|
|
78
|
+
x
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Adjusts a training set's target domain from +0..1+ to domain of the
|
|
82
|
+
# function's output.
|
|
83
|
+
# @param x [Vector]
|
|
84
|
+
# @return [Vector]
|
|
85
|
+
def prep_output_target(x)
|
|
86
|
+
x
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
class Logistic < Identity
|
|
91
|
+
ActivationFunctions.register(self)
|
|
92
|
+
|
|
93
|
+
def call(x)
|
|
94
|
+
1.0 / ( 1.0 + (-x).exp)
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
def derivative(x, y = nil)
|
|
98
|
+
y ||= call(x)
|
|
99
|
+
y * (1.0 - y)
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
class TanH < Identity
|
|
104
|
+
ActivationFunctions.register(self)
|
|
105
|
+
|
|
106
|
+
def call(x)
|
|
107
|
+
2.0 / (1.0 + (x * -2.0).exp) - 1.0
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def derivative(x, y = nil)
|
|
111
|
+
y ||= call(x)
|
|
112
|
+
1.0 - y * y
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def initial_bias(size)
|
|
116
|
+
CooCoo::Vector.zeros(size)
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def prep_input(arr)
|
|
120
|
+
(arr.minmax_normalize(true) * 2.0) - 1.0
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
def prep_output_target(arr)
|
|
124
|
+
prep_input(arr)
|
|
125
|
+
end
|
|
126
|
+
end
|
|
127
|
+
|
|
128
|
+
class ReLU < Identity
|
|
129
|
+
ActivationFunctions.register(self)
|
|
130
|
+
|
|
131
|
+
def call(x)
|
|
132
|
+
t = x > 0
|
|
133
|
+
if t.kind_of?(FalseClass)
|
|
134
|
+
0.0
|
|
135
|
+
elsif t.kind_of?(TrueClass)
|
|
136
|
+
x
|
|
137
|
+
else
|
|
138
|
+
x * t
|
|
139
|
+
end
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
def derivative(x, y = nil)
|
|
143
|
+
y ||= call(x)
|
|
144
|
+
t = y > 0
|
|
145
|
+
if t.kind_of?(FalseClass)
|
|
146
|
+
0.0
|
|
147
|
+
elsif t.kind_of?(TrueClass)
|
|
148
|
+
1.0
|
|
149
|
+
else
|
|
150
|
+
t
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
def initial_weights(num_inputs, size)
|
|
155
|
+
CooCoo::Vector.rand(num_inputs * size) * (2.0 / (num_inputs * size).to_f).sqrt
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
class LeakyReLU < Identity
|
|
160
|
+
ActivationFunctions.register(self)
|
|
161
|
+
public_class_method :new
|
|
162
|
+
|
|
163
|
+
def initialize(pos = 1.0, neg = 0.0001)
|
|
164
|
+
@positive_coeff = pos.to_f
|
|
165
|
+
@negative_coeff = neg.to_f
|
|
166
|
+
end
|
|
167
|
+
|
|
168
|
+
attr_accessor :positive_coeff
|
|
169
|
+
attr_accessor :negative_coeff
|
|
170
|
+
|
|
171
|
+
def call(x)
|
|
172
|
+
pos = x > 0
|
|
173
|
+
|
|
174
|
+
if pos.kind_of?(FalseClass)
|
|
175
|
+
x * @negative_coeff
|
|
176
|
+
elsif pos.kind_of?(TrueClass)
|
|
177
|
+
x * @positive_coeff
|
|
178
|
+
else
|
|
179
|
+
neg = x <= 0
|
|
180
|
+
(x * pos * @positive_coeff) + (x * neg * @negative_coeff)
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def derivative(x, y = nil)
|
|
185
|
+
y ||= call(x)
|
|
186
|
+
pos = y > 0
|
|
187
|
+
if pos.kind_of?(FalseClass)
|
|
188
|
+
@negative_coeff
|
|
189
|
+
elsif pos.kind_of?(TrueClass)
|
|
190
|
+
@positive_coeff
|
|
191
|
+
else
|
|
192
|
+
neg = y <= 0
|
|
193
|
+
(pos * @positive_coeff) + (neg * @negative_coeff)
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
def initial_weights(num_inputs, size)
|
|
198
|
+
CooCoo::Vector.rand(num_inputs * size) * (2.0 / (num_inputs * size).to_f).sqrt
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
def ==(other)
|
|
202
|
+
other.kind_of?(self.class) &&
|
|
203
|
+
positive_coeff == other.positive_coeff &&
|
|
204
|
+
negative_coeff == other.negative_coeff
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
# Computes the Softmax function given a {Vector}:
|
|
209
|
+
# y_i = e ** x_i / sum(e ** x)
|
|
210
|
+
# @see https://deepnotes.io/softmax-crossentropy
|
|
211
|
+
# @see https://becominghuman.ai/back-propagation-is-very-simple-who-made-it-complicated-97b794c97e5c
|
|
212
|
+
class SoftMax < Identity
|
|
213
|
+
ActivationFunctions.register(self)
|
|
214
|
+
|
|
215
|
+
def call(x)
|
|
216
|
+
e = x.exp
|
|
217
|
+
e / e.sum
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
def derivative(x, y = nil)
|
|
221
|
+
y ||= call(x)
|
|
222
|
+
s = x.exp.sum
|
|
223
|
+
y * (s - x) / s
|
|
224
|
+
end
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
# Computes the Softmax function given a {Vector} but subtracts the
|
|
228
|
+
# maximum value from every element prior to Softmax to prevent overflows:
|
|
229
|
+
# y_i = e ** (x_i - max(x)) / sum(e ** (x - max(x)))
|
|
230
|
+
class ShiftedSoftMax < SoftMax
|
|
231
|
+
ActivationFunctions.register(self)
|
|
232
|
+
|
|
233
|
+
def call(x)
|
|
234
|
+
super(x - x.max)
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
def derivative(x, y = nil)
|
|
238
|
+
super(x - x.max, y)
|
|
239
|
+
end
|
|
240
|
+
end
|
|
241
|
+
|
|
242
|
+
class MinMax < Identity
|
|
243
|
+
ActivationFunctions.register(self)
|
|
244
|
+
|
|
245
|
+
def call(x)
|
|
246
|
+
if x.respond_to?(:minmax_normalize)
|
|
247
|
+
x.minmax_normalize
|
|
248
|
+
else
|
|
249
|
+
x
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def derivative(x, y = nil)
|
|
254
|
+
min, max = x.minmax
|
|
255
|
+
(y || x).class.new((y || x).size, 1.0 / (max - min))
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def prep_output_target(x)
|
|
259
|
+
x.minmax_normalize(true)
|
|
260
|
+
end
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Like the {MinMax} but safe when the input is all the same value.
|
|
264
|
+
class ZeroSafeMinMax < Identity
|
|
265
|
+
ActivationFunctions.register(self)
|
|
266
|
+
|
|
267
|
+
def call(x)
|
|
268
|
+
if x.respond_to?(:minmax_normalize)
|
|
269
|
+
x.minmax_normalize(true)
|
|
270
|
+
else
|
|
271
|
+
x
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def derivative(x, y = nil)
|
|
276
|
+
min, max = x.minmax
|
|
277
|
+
delta = max - min
|
|
278
|
+
if delta == 0.0
|
|
279
|
+
x.zero
|
|
280
|
+
else
|
|
281
|
+
(y || x).class.new((y || x).size, 1.0 / (max - min))
|
|
282
|
+
end
|
|
283
|
+
end
|
|
284
|
+
|
|
285
|
+
def prep_output_target(x)
|
|
286
|
+
call(x)
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
|
|
290
|
+
class Normalize < Identity
|
|
291
|
+
ActivationFunctions.register(self)
|
|
292
|
+
|
|
293
|
+
def call(x)
|
|
294
|
+
if x.respond_to?(:normalize)
|
|
295
|
+
x.normalize
|
|
296
|
+
else
|
|
297
|
+
x.coerce(0)
|
|
298
|
+
end
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def derivative(x, y = nil)
|
|
302
|
+
mag = x.magnitude()
|
|
303
|
+
y ||= call(x)
|
|
304
|
+
1.0 / mag - y * y / mag
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
def prep_output_target(x)
|
|
308
|
+
x.normalize
|
|
309
|
+
end
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# Like the {Normalize} but safe when the input is all the same value.
|
|
313
|
+
class ZeroSafeNormalize < Identity
|
|
314
|
+
ActivationFunctions.register(self)
|
|
315
|
+
|
|
316
|
+
def call(x)
|
|
317
|
+
if x.respond_to?(:normalize)
|
|
318
|
+
m = x.magnitude
|
|
319
|
+
if m == 0.0
|
|
320
|
+
0.0
|
|
321
|
+
else
|
|
322
|
+
x / magnitude
|
|
323
|
+
end
|
|
324
|
+
else
|
|
325
|
+
x.coerce(0)
|
|
326
|
+
end
|
|
327
|
+
end
|
|
328
|
+
|
|
329
|
+
def derivative(x, y = nil)
|
|
330
|
+
mag = x.magnitude()
|
|
331
|
+
if mag == 0.0
|
|
332
|
+
0.0
|
|
333
|
+
else
|
|
334
|
+
y ||= call(x)
|
|
335
|
+
1.0 / mag - y * y / mag
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
def prep_output_target(x)
|
|
340
|
+
x.normalize
|
|
341
|
+
end
|
|
342
|
+
end
|
|
343
|
+
end
|
|
344
|
+
end
|
|
@@ -0,0 +1,298 @@
|
|
|
1
|
+
require 'coo-coo/layer_factory'
|
|
2
|
+
|
|
3
|
+
module CooCoo
|
|
4
|
+
module Convolution
|
|
5
|
+
class BoxLayer
|
|
6
|
+
LayerFactory.register_type(self)
|
|
7
|
+
|
|
8
|
+
attr_reader :width
|
|
9
|
+
attr_reader :height
|
|
10
|
+
attr_reader :horizontal_step
|
|
11
|
+
attr_reader :vertical_step
|
|
12
|
+
attr_reader :input_width
|
|
13
|
+
attr_reader :input_height
|
|
14
|
+
attr_reader :int_output_width
|
|
15
|
+
attr_reader :int_output_height
|
|
16
|
+
attr_reader :internal_layer
|
|
17
|
+
attr_reader :delta_accumulator
|
|
18
|
+
|
|
19
|
+
def initialize(width, height, horizontal_step, vertical_step, internal_layer, input_width, input_height, int_output_width, int_output_height, update_weights_with = :average)
|
|
20
|
+
@internal_layer = internal_layer
|
|
21
|
+
@width = width
|
|
22
|
+
@height = height
|
|
23
|
+
@horizontal_step = horizontal_step
|
|
24
|
+
@vertical_step = vertical_step
|
|
25
|
+
@input_width = input_width
|
|
26
|
+
@input_height = input_height
|
|
27
|
+
raise ArgumentError.new("Input size mismatch: #{input_width * input_height} is not #{internal_layer.num_inputs}") if internal_layer.num_inputs != (input_width * input_height)
|
|
28
|
+
@int_output_width = int_output_width
|
|
29
|
+
@int_output_height = int_output_height
|
|
30
|
+
raise ArgumentError.new("Input size mismatch: #{int_output_width * int_output_height} is not #{internal_layer.size}") if internal_layer.size != (int_output_width * int_output_height)
|
|
31
|
+
@delta_accumulator = delta_accumulator || :average
|
|
32
|
+
raise ArgumentError.new("Weights delta accumulator can only be averaged or summed") unless [ :average, :sum ].include?(@delta_accumulator)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def activation_function
|
|
36
|
+
internal_layer.activation_function
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
def horizontal_span
|
|
40
|
+
@horizontal_span ||= (@width / @horizontal_step.to_f).ceil
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
def vertical_span
|
|
44
|
+
@vertical_span ||= (@height / @vertical_step.to_f).ceil
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
def num_inputs
|
|
48
|
+
@width * @height
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def output_width
|
|
52
|
+
(horizontal_span * int_output_width).to_i
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def output_height
|
|
56
|
+
(vertical_span * int_output_height).to_i
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
def size
|
|
60
|
+
output_height * output_width
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def neurons
|
|
64
|
+
internal_layer.neurons
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def flatten_areas(outputs, w, h, inner_width)
|
|
68
|
+
out = CooCoo::Vector.new(w * h)
|
|
69
|
+
|
|
70
|
+
each_area do |grid_x, grid_y|
|
|
71
|
+
area_output = outputs[grid_y][grid_x]
|
|
72
|
+
gx = grid_x * w / horizontal_span.to_f
|
|
73
|
+
gy = grid_y * h / vertical_span.to_f
|
|
74
|
+
out.set2d!(w, area_output, inner_width, gx, gy)
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
out
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def forward(input, hidden_state)
|
|
81
|
+
hs = hidden_state[self] || Array.new
|
|
82
|
+
outputs = each_area do |grid_x, grid_y|
|
|
83
|
+
hs_index = (grid_y * horizontal_span + grid_x).to_i
|
|
84
|
+
output, layer_hs = @internal_layer.forward(slice_input(input, grid_x, grid_y), hs[hs_index])
|
|
85
|
+
hs[hs_index] = layer_hs
|
|
86
|
+
output
|
|
87
|
+
end
|
|
88
|
+
hidden_state[self] = hs
|
|
89
|
+
[ flatten_areas(outputs, horizontal_span * int_output_width, vertical_span * int_output_height, int_output_width), hidden_state ]
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def backprop(input, output, errors, hidden_state)
|
|
93
|
+
hs = hidden_state[self] || Array.new
|
|
94
|
+
deltas = each_area do |grid_x, grid_y|
|
|
95
|
+
hs_index = grid_y * horizontal_span + grid_x
|
|
96
|
+
d, layer_hs = @internal_layer.backprop(slice_input(input, grid_x, grid_y), slice_output(output, grid_x, grid_y), slice_output(errors, grid_x, grid_y), hs[hs_index])
|
|
97
|
+
hs[hs_index] = layer_hs
|
|
98
|
+
d
|
|
99
|
+
end
|
|
100
|
+
hidden_state[self] = hs
|
|
101
|
+
[ Sequence[deltas.collect { |d| Sequence[d] }], hidden_state ]
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def transfer_error(deltas)
|
|
105
|
+
flatten_areas(each_area do |grid_x, grid_y|
|
|
106
|
+
@internal_layer.transfer_error(deltas[grid_y][grid_x]).to_a
|
|
107
|
+
end, width, height, input_width)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def update_weights!(inputs, deltas)
|
|
111
|
+
adjust_weights!(*weight_deltas(inputs, deltas))
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
def adjust_weights!(deltas)
|
|
115
|
+
@internal_layer.adjust_weights!(deltas)
|
|
116
|
+
self
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def weight_deltas(inputs, deltas)
|
|
120
|
+
#rate = rate / (@horizontal_span * @vertical_span).to_f
|
|
121
|
+
change = []
|
|
122
|
+
wd = []
|
|
123
|
+
|
|
124
|
+
d = []
|
|
125
|
+
each_area do |grid_x, grid_y|
|
|
126
|
+
hs_index = grid_y * horizontal_span + grid_x
|
|
127
|
+
delta, hs = @internal_layer.
|
|
128
|
+
weight_deltas(slice_input(inputs, grid_x, grid_y),
|
|
129
|
+
deltas[grid_y][grid_x])
|
|
130
|
+
d << delta
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
Sequence[d].send(@delta_accumulator)
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
def ==(other)
|
|
137
|
+
other.kind_of?(self.class) &&
|
|
138
|
+
width == other.width &&
|
|
139
|
+
height == other.height &&
|
|
140
|
+
horizontal_step == other.horizontal_step &&
|
|
141
|
+
vertical_step == other.vertical_step &&
|
|
142
|
+
input_width == other.input_width &&
|
|
143
|
+
input_height == other.input_height &&
|
|
144
|
+
int_output_width == other.int_output_width &&
|
|
145
|
+
int_output_height == other.int_output_height &&
|
|
146
|
+
internal_layer == other.internal_layer &&
|
|
147
|
+
delta_accumulator == other.delta_accumulator
|
|
148
|
+
end
|
|
149
|
+
|
|
150
|
+
def to_hash(network = nil)
|
|
151
|
+
{ type: self.class.to_s,
|
|
152
|
+
width: @width,
|
|
153
|
+
height: @height,
|
|
154
|
+
horizontal_step: @horizontal_step,
|
|
155
|
+
vertical_step: @vertical_step,
|
|
156
|
+
input_width: @input_width,
|
|
157
|
+
input_height: @input_height,
|
|
158
|
+
int_output_width: @int_output_width,
|
|
159
|
+
int_output_height: @int_output_height,
|
|
160
|
+
delta_accumulator: @delta_accumulator,
|
|
161
|
+
internal_layer: @internal_layer.to_hash(network)
|
|
162
|
+
}
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
def self.from_hash(h, network = nil)
|
|
166
|
+
self.new(h.fetch(:width), h.fetch(:height),
|
|
167
|
+
h.fetch(:horizontal_step), h.fetch(:vertical_step),
|
|
168
|
+
LayerFactory.from_hash(h.fetch(:internal_layer)),
|
|
169
|
+
h.fetch(:input_width), h.fetch(:input_height),
|
|
170
|
+
h.fetch(:int_output_width), h.fetch(:int_output_height),
|
|
171
|
+
h.fetch(:delta_accumulator, :average))
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
#private
|
|
175
|
+
|
|
176
|
+
def each_area
|
|
177
|
+
return to_enum(:each_area) unless block_given?
|
|
178
|
+
|
|
179
|
+
vertical_span.to_i.times.collect do |grid_y|
|
|
180
|
+
horizontal_span.to_i.times.collect do |grid_x|
|
|
181
|
+
yield(grid_x, grid_y)
|
|
182
|
+
end
|
|
183
|
+
end
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
def slice_input(input, grid_x, grid_y)
|
|
187
|
+
origin_x = grid_x * @horizontal_step
|
|
188
|
+
origin_y = grid_y * @vertical_step
|
|
189
|
+
input.slice_2d(@width,
|
|
190
|
+
@height,
|
|
191
|
+
origin_x, origin_y,
|
|
192
|
+
@input_width, @input_height,
|
|
193
|
+
0.0)
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
def slice_output(output, grid_x, grid_y)
|
|
197
|
+
origin_x = grid_x * @int_output_width
|
|
198
|
+
origin_y = grid_y * @int_output_height
|
|
199
|
+
output.slice_2d((horizontal_span * @int_output_width).to_i,
|
|
200
|
+
(vertical_span * @int_output_height).to_i,
|
|
201
|
+
origin_x, origin_y,
|
|
202
|
+
@int_output_width, @int_output_height,
|
|
203
|
+
0.0)
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
end
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
if __FILE__ == $0
|
|
210
|
+
require 'coo-coo/layer'
|
|
211
|
+
require 'coo-coo/cost_functions'
|
|
212
|
+
|
|
213
|
+
WIDTH = 16
|
|
214
|
+
HEIGHT = 16
|
|
215
|
+
X_STEP = 4
|
|
216
|
+
Y_STEP = 4
|
|
217
|
+
CONV_WIDTH = 4
|
|
218
|
+
CONV_HEIGHT = 4
|
|
219
|
+
CONV_OUT_WIDTH = 1
|
|
220
|
+
CONV_OUT_HEIGHT = 1
|
|
221
|
+
activation = CooCoo::ActivationFunctions.from_name(ENV.fetch('ACTIVATION', 'Logistic'))
|
|
222
|
+
cost_function = CooCoo::CostFunctions.from_name(ENV.fetch('COST', 'MeanSquare'))
|
|
223
|
+
|
|
224
|
+
inner_layer = CooCoo::Layer.new(CONV_WIDTH * CONV_HEIGHT, CONV_OUT_WIDTH * CONV_OUT_HEIGHT, activation)
|
|
225
|
+
layer = CooCoo::Convolution::BoxLayer.new(WIDTH, HEIGHT, X_STEP, Y_STEP, inner_layer, CONV_WIDTH, CONV_HEIGHT, CONV_OUT_WIDTH, CONV_OUT_HEIGHT)
|
|
226
|
+
|
|
227
|
+
INPUT_SIZE = layer.num_inputs
|
|
228
|
+
OUT_WIDTH = layer.output_width
|
|
229
|
+
OUT_HEIGHT = layer.output_height
|
|
230
|
+
OUTPUT_SIZE = layer.size
|
|
231
|
+
learning_rate = ENV.fetch('RATE', 0.3).to_f
|
|
232
|
+
|
|
233
|
+
input = [ 1.0 ] + (INPUT_SIZE - 2).times.collect { 0.0 } + [ 1.0 ]
|
|
234
|
+
input = CooCoo::Vector[input, INPUT_SIZE]
|
|
235
|
+
target = CooCoo::Vector.zeros(OUTPUT_SIZE)
|
|
236
|
+
target[0] = 1.0
|
|
237
|
+
target[-1] = 1.0
|
|
238
|
+
|
|
239
|
+
input = activation.prep_input(input)
|
|
240
|
+
target = activation.prep_input(target)
|
|
241
|
+
|
|
242
|
+
#input = (input - 0.5) * 2.0
|
|
243
|
+
#target = (target - 0.5) * 2.0
|
|
244
|
+
|
|
245
|
+
def matrix_image(m, width)
|
|
246
|
+
puts("matrix image #{width}")
|
|
247
|
+
s = m.to_a.each_slice(width).collect do |line|
|
|
248
|
+
line.collect do |c|
|
|
249
|
+
if c > 0.75
|
|
250
|
+
'#'
|
|
251
|
+
elsif c > 0.5
|
|
252
|
+
'X'
|
|
253
|
+
elsif c > 0.25
|
|
254
|
+
'x'
|
|
255
|
+
elsif c >= 0.0
|
|
256
|
+
'.'
|
|
257
|
+
elsif c >= -0.5
|
|
258
|
+
'-'
|
|
259
|
+
else
|
|
260
|
+
'~'
|
|
261
|
+
end
|
|
262
|
+
end.join
|
|
263
|
+
end.join("\n")
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
require 'benchmark'
|
|
267
|
+
|
|
268
|
+
Benchmark.bm(3) do |bm|
|
|
269
|
+
bm.report("loops") do
|
|
270
|
+
ENV.fetch("LOOPS", 100).to_i.times do |i|
|
|
271
|
+
puts("#{i}\n========\n")
|
|
272
|
+
#puts("Inputs =\n#{matrix_image(input, WIDTH)}")
|
|
273
|
+
output, hs = layer.forward(input, {})
|
|
274
|
+
#puts("Output = #{output}\n#{matrix_image(output, OUT_WIDTH)}")
|
|
275
|
+
err = cost_function.derivative(target, output)
|
|
276
|
+
#puts("Target = #{target}\n#{matrix_image(target, OUT_WIDTH)}")
|
|
277
|
+
#puts("Err = #{err}\n#{matrix_image(err * 10.0, OUT_WIDTH)}")
|
|
278
|
+
puts("|Err| = #{err.magnitude} #{(err * err).magnitude}")
|
|
279
|
+
deltas, hs = layer.backprop(input, output, err, hs)
|
|
280
|
+
#puts("Deltas = #{deltas}\n#{matrix_image(deltas, OUT_WIDTH)}")
|
|
281
|
+
xfer = layer.transfer_error(deltas)
|
|
282
|
+
#puts("Xfer error = #{xfer}\n#{matrix_image(xfer, OUT_WIDTH)}")
|
|
283
|
+
layer.update_weights!(input, deltas * learning_rate)
|
|
284
|
+
#puts("Weights updated")
|
|
285
|
+
output, hs = layer.forward(input, {})
|
|
286
|
+
puts("New output = #{output}\n#{matrix_image(output, OUT_WIDTH)}")
|
|
287
|
+
end
|
|
288
|
+
end
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
# layer.each_area do |x, y|
|
|
292
|
+
# puts("#{x}, #{y}\t#{x * CONV_WIDTH}, #{y * CONV_HEIGHT}")
|
|
293
|
+
# puts(matrix_image(layer.slice_input(input, x, y), CONV_WIDTH))
|
|
294
|
+
# puts
|
|
295
|
+
# puts(matrix_image(layer.slice_output(target, x, y), CONV_OUT_WIDTH))
|
|
296
|
+
# puts
|
|
297
|
+
# end
|
|
298
|
+
end
|