neuro 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +116 -0
- data/VERSION +1 -0
- data/examples/ocr.rb +125 -0
- data/ext/extconf.rb +6 -0
- data/ext/neuro.c +694 -0
- data/install.rb +22 -0
- data/lib/neuro/display.rb +433 -0
- data/tests/runner.rb +18 -0
- data/tests/test_even_odd.rb +69 -0
- data/tests/test_parity.rb +58 -0
- metadata +59 -0
data/Rakefile
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'rake/clean'
|
2
|
+
require 'rake/testtask'
|
3
|
+
require 'rake/gempackagetask'
|
4
|
+
require 'rbconfig'
|
5
|
+
include Config
|
6
|
+
require 'find'
|
7
|
+
include Find
|
8
|
+
|
9
|
+
PKG_NAME = 'neuro'
|
10
|
+
PKG_VERSION = File.read('VERSION').chomp
|
11
|
+
PKG_FILES = FileList['**/*']
|
12
|
+
PKG_FILES.exclude('CVS')
|
13
|
+
PKG_FILES.exclude('pkg')
|
14
|
+
PKG_FILES.exclude(/\.dump$/)
|
15
|
+
|
16
|
+
task :default => [:test]
|
17
|
+
|
18
|
+
desc "Run unit tests"
|
19
|
+
task(:test => [:compile]) do
|
20
|
+
cd 'tests' do
|
21
|
+
ruby %{-I../ext runner.rb}
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "Creating documentation"
|
26
|
+
task :doc do
|
27
|
+
sh 'rdoc -m Neuro -d -o doc ext/neuro.c'# lib/neuro/display.rb'
|
28
|
+
end
|
29
|
+
|
30
|
+
desc "Compiling library"
|
31
|
+
task :compile do
|
32
|
+
cd 'ext' do
|
33
|
+
ruby 'extconf.rb'
|
34
|
+
sh 'make'
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
desc "Installing library"
|
39
|
+
task(:install => [:test]) do
|
40
|
+
src = "ext/neuro.#{CONFIG['DLEXT']}"
|
41
|
+
filename = File.basename(src)
|
42
|
+
dst = File.join(CONFIG["sitelibdir"], filename)
|
43
|
+
install(src, dst, :verbose => true, :mode => 0644)
|
44
|
+
src = 'lib/neuro/display.rb'
|
45
|
+
filename = File.basename(src)
|
46
|
+
dst_dir = File.join(CONFIG["sitelibdir"], 'neuro')
|
47
|
+
mkdir_p dst_dir
|
48
|
+
dst = File.join(dst_dir, filename)
|
49
|
+
install(src, dst, :verbose => true, :mode => 0644)
|
50
|
+
end
|
51
|
+
|
52
|
+
task :clean do
|
53
|
+
find('.') do |f|
|
54
|
+
if f =~ /\.dump$/
|
55
|
+
rm f
|
56
|
+
end
|
57
|
+
end
|
58
|
+
cd 'ext' do
|
59
|
+
sh 'make distclean' rescue nil
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
spec = Gem::Specification.new do |s|
|
64
|
+
|
65
|
+
#### Basic information.
|
66
|
+
|
67
|
+
s.name = 'neuro'
|
68
|
+
s.version = PKG_VERSION
|
69
|
+
s.summary = "Neural Network Extension for Ruby"
|
70
|
+
s.description = <<EOF
|
71
|
+
A Ruby extension that provides a 2-Layer Back Propagation Neural Network, which
|
72
|
+
can be used to categorize datasets of arbitrary size.
|
73
|
+
EOF
|
74
|
+
|
75
|
+
#### Dependencies and requirements.
|
76
|
+
|
77
|
+
#s.add_dependency('log4r', '> 1.0.4')
|
78
|
+
#s.requirements << ""
|
79
|
+
|
80
|
+
s.files = PKG_FILES
|
81
|
+
|
82
|
+
#### C code extensions.
|
83
|
+
|
84
|
+
s.extensions << "ext/extconf.rb"
|
85
|
+
|
86
|
+
#### Load-time details: library and application (you will need one or both).
|
87
|
+
|
88
|
+
s.require_path = 'ext' # Use these for libraries.
|
89
|
+
s.autorequire = 'neuro'
|
90
|
+
|
91
|
+
#s.bindir = "bin" # Use these for applications.
|
92
|
+
#s.executables = ["foo.rb"]
|
93
|
+
#s.default_executable = "foo.rb"
|
94
|
+
|
95
|
+
#### Documentation and testing.
|
96
|
+
|
97
|
+
s.has_rdoc = true
|
98
|
+
s.extra_rdoc_files = [ 'ext/neuro.c' ]
|
99
|
+
s.rdoc_options << '--main' << 'Neuro'
|
100
|
+
s.test_files << 'tests/runner.rb'
|
101
|
+
|
102
|
+
#### Author and project details.
|
103
|
+
|
104
|
+
s.author = "Florian Frank"
|
105
|
+
s.email = "flori@ping.de"
|
106
|
+
s.homepage = "http://neuro.rubyforge.org"
|
107
|
+
s.rubyforge_project = "neuro"
|
108
|
+
end
|
109
|
+
|
110
|
+
Rake::GemPackageTask.new(spec) do |pkg|
|
111
|
+
pkg.package_files += PKG_FILES
|
112
|
+
pkg.need_tar = true
|
113
|
+
end
|
114
|
+
|
115
|
+
task :release => [ :clean, :compile, :package ]
|
116
|
+
# vim: set et sw=2 ts=2:
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.4.0
|
data/examples/ocr.rb
ADDED
@@ -0,0 +1,125 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'neuro'
|
4
|
+
require 'enumerator'
|
5
|
+
|
6
|
+
class OCR
|
7
|
+
include Neuro
|
8
|
+
|
9
|
+
class Character
|
10
|
+
def initialize(char, number, vector)
|
11
|
+
@char, @number, @vector = char, number, vector
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :char, :number, :vector
|
15
|
+
|
16
|
+
def to_s
|
17
|
+
result = ''
|
18
|
+
@vector.each_slice(5) do |row|
|
19
|
+
row.each { |pixel| result << (pixel < 0 ? ' ' : '*') }
|
20
|
+
result << "\n"
|
21
|
+
end
|
22
|
+
result
|
23
|
+
end
|
24
|
+
|
25
|
+
def dup
|
26
|
+
self.class.new(@char.dup, @number, @vector.dup)
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
CHAR_BTIMAP = [
|
31
|
+
"_***__****__*****_****__*****_*****_*****_*___*_____*_____*_*___*_*_____*___*_****__*****_*****_*****_*****_*****_*****_*___*_*___*_*___*_*___*_*___*_*****_",
|
32
|
+
"*___*_*___*_*_____*___*_*_____*_____*_____*___*_____*_____*_*___*_*_____**_**_*___*_*___*_*___*_*___*_*___*_*_______*___*___*_*___*_*___*_*___*_*___*_____*_",
|
33
|
+
"*___*_*___*_*_____*___*_*_____*_____*_____*___*_____*_____*_*__*__*_____*_*_*_*___*_*___*_*___*_*___*_*___*_*_______*___*___*_*___*_*___*__*_*__*___*____*__",
|
34
|
+
"*****_****__*_____*___*_****__****__*_***_*****_____*_____*_***___*_____*___*_*___*_*___*_*****_**__*_****__*****___*___*___*_*___*_*___*___*___*****___*___",
|
35
|
+
"*___*_*___*_*_____*___*_*_____*_____*___*_*___*_____*_____*_*__*__*_____*___*_*___*_*___*_*_____*_*_*_*___*_____*___*___*___*_*___*_*_*_*__*_*______*__*____",
|
36
|
+
"*___*_*___*_*_____*___*_*_____*_____*___*_*___*_____*_____*_*___*_*_____*___*_*___*_*___*_*_____*__**_*___*_____*___*___*___*__*_*__**_**_*___*_____*_*_____",
|
37
|
+
"*___*_****__*****_****__*****_*_____*****_*___*_____*_*****_*___*_****__*___*_*___*_*****_*_____*****_*___*_*****___*____****___*___*___*_*___*******_*****_",
|
38
|
+
]
|
39
|
+
|
40
|
+
CHARACTERS = []
|
41
|
+
('A'..'Z').each_with_index do |char, number|
|
42
|
+
vector = []
|
43
|
+
7.times do |j|
|
44
|
+
c = CHAR_BTIMAP[j][6 * number, 5]
|
45
|
+
vector += c.enum_for(:each_byte).map { |x| x == ?* ? 1.0 : -1.0 }
|
46
|
+
end
|
47
|
+
CHARACTERS << Character.new(char, number, vector)
|
48
|
+
end
|
49
|
+
|
50
|
+
def initialize
|
51
|
+
filename = File.basename($0) + '.dump'
|
52
|
+
if File.exist?(filename)
|
53
|
+
File.open(filename, 'rb') do |f|
|
54
|
+
@network = Network.load(f)
|
55
|
+
end
|
56
|
+
else
|
57
|
+
STDERR.puts "Wait a momemt until the network has learned enough..."
|
58
|
+
@network = Network.new(5 * 7, 70, 26)
|
59
|
+
@network.debug = STDERR
|
60
|
+
@network.debug_step = 100
|
61
|
+
max_error = 1.0E-5
|
62
|
+
eta = 0.2
|
63
|
+
max_count = CHARACTERS.size * 10
|
64
|
+
count = max_count
|
65
|
+
until count < max_error
|
66
|
+
count = 0
|
67
|
+
CHARACTERS.sort_by { rand }.each do |character|
|
68
|
+
count += @network.learn(character.vector,
|
69
|
+
make_result_vector(character.number), max_error, eta)
|
70
|
+
end
|
71
|
+
end
|
72
|
+
STDERR.print "Dumping network (learned #{@network.learned} times)... "
|
73
|
+
File.open(filename, 'wb') do |f|
|
74
|
+
@network.dump(f)
|
75
|
+
end
|
76
|
+
STDERR.puts "done!"
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
attr_reader :network
|
81
|
+
|
82
|
+
def make_result_vector(number)
|
83
|
+
Array.new(CHARACTERS.size) { |i| number == i ? 0.9 : 0.1 }
|
84
|
+
end
|
85
|
+
|
86
|
+
def vector_to_number(vector)
|
87
|
+
vector.enum_for(:each_with_index).max[1]
|
88
|
+
end
|
89
|
+
|
90
|
+
def vector_to_char(vector)
|
91
|
+
number = vector_to_number(vector)
|
92
|
+
CHARACTERS[number]
|
93
|
+
end
|
94
|
+
|
95
|
+
def categorize(scan_vector)
|
96
|
+
decision = @network.decide(scan_vector)
|
97
|
+
vector_to_char(decision)
|
98
|
+
end
|
99
|
+
|
100
|
+
def self.noisify(character, percentage)
|
101
|
+
char = CHARACTERS.find { |c| c.char == character }
|
102
|
+
copy = char.dup
|
103
|
+
pixels = (copy.vector.size * (percentage / 100.0)).round
|
104
|
+
pixels.times do
|
105
|
+
picked = rand(copy.vector.size)
|
106
|
+
copy.vector[picked] = copy.vector[picked] < 0.0 ? 1.0 : -1.0
|
107
|
+
end
|
108
|
+
copy
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
if $0 == __FILE__
|
113
|
+
ocr = OCR.new
|
114
|
+
loop do
|
115
|
+
puts "", "Input a character from 'A'-'Z': "
|
116
|
+
c = gets.chomp
|
117
|
+
c.tr!('a-z', 'A-Z')
|
118
|
+
break unless /^[A-Z]$/.match(c)
|
119
|
+
input_char = OCR.noisify(c, 5)
|
120
|
+
puts "Noisy Character:", input_char, ""
|
121
|
+
rec_char = ocr.categorize(input_char.vector)
|
122
|
+
puts "Understood '#{rec_char.char}':", rec_char
|
123
|
+
end
|
124
|
+
end
|
125
|
+
# vim: set et sw=2 ts=2:
|
data/ext/extconf.rb
ADDED
data/ext/neuro.c
ADDED
@@ -0,0 +1,694 @@
|
|
1
|
+
#include "ruby.h"
|
2
|
+
#include <assert.h>
|
3
|
+
#include <math.h>
|
4
|
+
|
5
|
+
#define CAST2FLOAT(obj) \
|
6
|
+
if (TYPE(obj) != T_FLOAT && rb_respond_to(obj, id_to_f)) \
|
7
|
+
obj = rb_funcall(obj, id_to_f, 0, 0); \
|
8
|
+
else \
|
9
|
+
Check_Type(obj, T_FLOAT)
|
10
|
+
#define SYM(x) ID2SYM(rb_intern(x))
|
11
|
+
#define feed \
|
12
|
+
feed2layer(network->input_size, network->hidden_size, \
|
13
|
+
network->hidden_layer, network->tmp_input); \
|
14
|
+
for (i = 0; i < network->hidden_size; i++) \
|
15
|
+
network->tmp_hidden[i] = network->hidden_layer[i]->output; \
|
16
|
+
feed2layer(network->hidden_size, network->output_size, \
|
17
|
+
network->output_layer, network->tmp_hidden)
|
18
|
+
#define DEFAULT_MAX_ITERATIONS 10000
|
19
|
+
#define DEFAULT_DEBUG_STEP 1000
|
20
|
+
|
21
|
+
static VALUE rb_mNeuro, rb_cNetwork, rb_cNeuroError;
|
22
|
+
static ID id_to_f, id_class, id_name;
|
23
|
+
|
24
|
+
/* Infrastructure */
|
25
|
+
|
26
|
+
typedef struct NodeStruct {
|
27
|
+
int number_weights;
|
28
|
+
double *weights;
|
29
|
+
double output;
|
30
|
+
} Node;
|
31
|
+
|
32
|
+
typedef struct NetworkStruct {
|
33
|
+
int input_size;
|
34
|
+
int hidden_size;
|
35
|
+
int output_size;
|
36
|
+
Node** hidden_layer;
|
37
|
+
Node** output_layer;
|
38
|
+
int learned;
|
39
|
+
int debug_step;
|
40
|
+
VALUE debug;
|
41
|
+
int max_iterations;
|
42
|
+
double *tmp_input;
|
43
|
+
double *tmp_hidden;
|
44
|
+
double *tmp_output;
|
45
|
+
} Network;
|
46
|
+
|
47
|
+
/* Node methods */
|
48
|
+
|
49
|
+
static Node *Node_create(int weights)
|
50
|
+
{
|
51
|
+
Node *node;
|
52
|
+
int i;
|
53
|
+
assert(weights > 0);
|
54
|
+
node = ALLOC(Node);
|
55
|
+
MEMZERO(node, Node, 1);
|
56
|
+
node->weights = ALLOC_N(double, weights);
|
57
|
+
node->number_weights = weights;
|
58
|
+
for (i = 0; i < weights; i++)
|
59
|
+
node->weights[i] = 0.5 - rand() / (float) RAND_MAX;
|
60
|
+
node->output = 0.0;
|
61
|
+
return node;
|
62
|
+
}
|
63
|
+
|
64
|
+
static Node *Node_from_hash(VALUE hash)
|
65
|
+
{
|
66
|
+
Node *node;
|
67
|
+
VALUE weights = rb_hash_aref(hash, SYM("weights"));
|
68
|
+
VALUE output = rb_hash_aref(hash, SYM("output"));
|
69
|
+
int i, len;
|
70
|
+
Check_Type(output, T_FLOAT);
|
71
|
+
Check_Type(weights, T_ARRAY);
|
72
|
+
len = RARRAY(weights)->len;
|
73
|
+
node = Node_create(len);
|
74
|
+
node->output = RFLOAT(output)->value;
|
75
|
+
for (i = 0; i < len; i++)
|
76
|
+
node->weights[i] = RFLOAT(rb_ary_entry(weights, i))->value;
|
77
|
+
return node;
|
78
|
+
}
|
79
|
+
|
80
|
+
static void Node_destroy(Node *node)
|
81
|
+
{
|
82
|
+
MEMZERO(node->weights, double, node->number_weights);
|
83
|
+
xfree(node->weights);
|
84
|
+
MEMZERO(node, Node, 1);
|
85
|
+
xfree(node);
|
86
|
+
}
|
87
|
+
|
88
|
+
static VALUE Node_to_hash(Node *node)
|
89
|
+
{
|
90
|
+
VALUE result = rb_hash_new(), weights = rb_ary_new2(node->number_weights);
|
91
|
+
int i;
|
92
|
+
rb_hash_aset(result, SYM("output"), rb_float_new(node->output));
|
93
|
+
for (i = 0; i < node->number_weights; i++)
|
94
|
+
rb_ary_store(weights, i, rb_float_new(node->weights[i]));
|
95
|
+
rb_hash_aset(result, SYM("weights"), weights);
|
96
|
+
return result;
|
97
|
+
}
|
98
|
+
|
99
|
+
/* Network methods */
|
100
|
+
|
101
|
+
static Network *Network_allocate()
|
102
|
+
{
|
103
|
+
Network *network = ALLOC(Network);
|
104
|
+
MEMZERO(network, Network, 1);
|
105
|
+
return network;
|
106
|
+
}
|
107
|
+
|
108
|
+
static void Network_init(Network *network, int input_size, int hidden_size,
|
109
|
+
int output_size, int learned)
|
110
|
+
{
|
111
|
+
if (input_size <= 0) rb_raise(rb_cNeuroError, "input_size <= 0");
|
112
|
+
if (hidden_size <= 0) rb_raise(rb_cNeuroError, "hidden_size <= 0");
|
113
|
+
if (output_size <= 0) rb_raise(rb_cNeuroError, "output_size <= 0");
|
114
|
+
if (learned < 0) rb_raise(rb_cNeuroError, "learned < 0");
|
115
|
+
network->input_size = input_size;
|
116
|
+
network->hidden_size = hidden_size;
|
117
|
+
network->output_size = output_size;
|
118
|
+
network->learned = learned;
|
119
|
+
network->hidden_layer = ALLOC_N(Node*, hidden_size);
|
120
|
+
network->output_layer = ALLOC_N(Node*, output_size);
|
121
|
+
network->debug = Qnil; /* Debugging switched off */
|
122
|
+
network->debug_step = DEFAULT_DEBUG_STEP;
|
123
|
+
network->max_iterations = DEFAULT_MAX_ITERATIONS;
|
124
|
+
network->tmp_input = ALLOC_N(double, input_size);
|
125
|
+
MEMZERO(network->tmp_input, double, network->input_size);
|
126
|
+
network->tmp_hidden = ALLOC_N(double, hidden_size);
|
127
|
+
MEMZERO(network->tmp_hidden, double, network->hidden_size);
|
128
|
+
network->tmp_output = ALLOC_N(double, output_size);
|
129
|
+
MEMZERO(network->tmp_output, double, network->output_size);
|
130
|
+
}
|
131
|
+
|
132
|
+
static void Network_init_weights(Network *network)
|
133
|
+
{
|
134
|
+
int i;
|
135
|
+
for (i = 0; i < network->hidden_size; i++)
|
136
|
+
network->hidden_layer[i] = Node_create(network->input_size);
|
137
|
+
for (i = 0; i < network->output_size; i++)
|
138
|
+
network->output_layer[i] = Node_create(network->hidden_size);
|
139
|
+
}
|
140
|
+
|
141
|
+
static void Network_debug_error(Network *network, int count, float error, float
|
142
|
+
max_error)
|
143
|
+
{
|
144
|
+
VALUE argv[5];
|
145
|
+
int argc = 5;
|
146
|
+
if (!NIL_P(network->debug)) {
|
147
|
+
argv[0] = rb_str_new2("%6u.\tcount = %u\terror = %e\tmax_error = %e\n");
|
148
|
+
argv[1] = INT2NUM(network->learned);
|
149
|
+
argv[2] = INT2NUM(count);
|
150
|
+
argv[3] = rb_float_new(error / 2.0);
|
151
|
+
argv[4] = rb_float_new(max_error / 2.0);
|
152
|
+
rb_io_write(network->debug, rb_f_sprintf(argc, argv));
|
153
|
+
}
|
154
|
+
}
|
155
|
+
|
156
|
+
static void Network_debug_bail_out(Network *network)
|
157
|
+
{
|
158
|
+
VALUE argv[2];
|
159
|
+
int argc = 2;
|
160
|
+
if (!NIL_P(network->debug)) {
|
161
|
+
argv[0] = rb_str_new2("Network didn't converge after %u iterations! => Bailing out!\n");
|
162
|
+
argv[1] = INT2NUM(network->max_iterations);
|
163
|
+
rb_io_write(network->debug, rb_f_sprintf(argc, argv));
|
164
|
+
}
|
165
|
+
}
|
166
|
+
|
167
|
+
static VALUE Network_to_hash(Network *network)
|
168
|
+
{
|
169
|
+
int i;
|
170
|
+
VALUE hidden_layer, output_layer, result = rb_hash_new();
|
171
|
+
|
172
|
+
rb_hash_aset(result, SYM("input_size"), INT2NUM(network->input_size));
|
173
|
+
rb_hash_aset(result, SYM("hidden_size"), INT2NUM(network->hidden_size));
|
174
|
+
rb_hash_aset(result, SYM("output_size"), INT2NUM(network->output_size));
|
175
|
+
hidden_layer = rb_ary_new2(network->hidden_size);
|
176
|
+
for (i = 0; i < network->hidden_size; i++)
|
177
|
+
rb_ary_store(hidden_layer, i, Node_to_hash(network->hidden_layer[i]));
|
178
|
+
rb_hash_aset(result, SYM("hidden_layer"), hidden_layer);
|
179
|
+
output_layer = rb_ary_new2(network->output_size);
|
180
|
+
for (i = 0; i < network->output_size; i++)
|
181
|
+
rb_ary_store(output_layer, i, Node_to_hash(network->output_layer[i]));
|
182
|
+
rb_hash_aset(result, SYM("output_layer"), output_layer);
|
183
|
+
rb_hash_aset(result, SYM("learned"), INT2NUM(network->learned));
|
184
|
+
return result;
|
185
|
+
}
|
186
|
+
|
187
|
+
/*
|
188
|
+
* Helper Functions
|
189
|
+
*/
|
190
|
+
|
191
|
+
static void transform_data(double *data_vector, VALUE data)
|
192
|
+
{
|
193
|
+
int i;
|
194
|
+
VALUE current;
|
195
|
+
for (i = 0; i < RARRAY(data)->len; i++) {
|
196
|
+
current = rb_ary_entry(data, i);
|
197
|
+
CAST2FLOAT(current);
|
198
|
+
data_vector[i] = RFLOAT(current)->value;
|
199
|
+
}
|
200
|
+
}
|
201
|
+
|
202
|
+
static void feed2layer(int in_size, int out_size, Node **layer, double *data)
|
203
|
+
{
|
204
|
+
int i, j;
|
205
|
+
double sum;
|
206
|
+
for (i = 0; i < out_size; i++) {
|
207
|
+
sum = 0.0;
|
208
|
+
for (j = 0; j < in_size; j++)
|
209
|
+
sum += layer[i]->weights[j] * data[j];
|
210
|
+
layer[i]->output = 1.0 / (1.0 + exp(-sum));
|
211
|
+
/* sigmoid(sum), beta = 0.5 */
|
212
|
+
}
|
213
|
+
}
|
214
|
+
|
215
|
+
/*
|
216
|
+
* Ruby API
|
217
|
+
*/
|
218
|
+
|
219
|
+
/*
|
220
|
+
* call-seq: learn(data, desired, max_error, eta)
|
221
|
+
*
|
222
|
+
* The network should respond with the Array _desired_ (size == output_size),
|
223
|
+
* if it was given the Array _data_ (size == input_size). The learning process
|
224
|
+
* ends, if the resulting error sinks below _max_error_ and convergence is
|
225
|
+
* assumed. A lower _eta_ parameter leads to slower learning, because of low
|
226
|
+
* weight changes. A too high _eta_ can lead to wildly oscillating weights, and
|
227
|
+
* result in slower learning or no learning at all. The last two parameters
|
228
|
+
* should be chosen appropriately to the problem at hand. ;)
|
229
|
+
*
|
230
|
+
* The return value is an Integer value, that denotes the number of learning
|
231
|
+
* steps, which were necessary, to learn the _data_, or _max_iterations_, if
|
232
|
+
* the _data_ couldn't be learned.
|
233
|
+
*/
|
234
|
+
static VALUE rb_network_learn(VALUE self, VALUE data, VALUE desired, VALUE
|
235
|
+
max_error, VALUE eta)
|
236
|
+
{
|
237
|
+
Network *network;
|
238
|
+
double max_error_float, eta_float, error, sum,
|
239
|
+
*output_delta, *hidden_delta;
|
240
|
+
int i, j, count;
|
241
|
+
|
242
|
+
Data_Get_Struct(self, Network, network);
|
243
|
+
|
244
|
+
Check_Type(data, T_ARRAY);
|
245
|
+
if (RARRAY(data)->len != network->input_size)
|
246
|
+
rb_raise(rb_cNeuroError, "size of data != input_size");
|
247
|
+
transform_data(network->tmp_input, data);
|
248
|
+
|
249
|
+
Check_Type(desired, T_ARRAY);
|
250
|
+
if (RARRAY(desired)->len != network->output_size)
|
251
|
+
rb_raise(rb_cNeuroError, "size of desired != output_size");
|
252
|
+
transform_data(network->tmp_output, desired);
|
253
|
+
CAST2FLOAT(max_error);
|
254
|
+
max_error_float = RFLOAT(max_error)->value;
|
255
|
+
if (max_error_float <= 0) rb_raise(rb_cNeuroError, "max_error <= 0");
|
256
|
+
max_error_float *= 2.0;
|
257
|
+
CAST2FLOAT(eta);
|
258
|
+
eta_float = RFLOAT(eta)->value;
|
259
|
+
if (eta_float <= 0) rb_raise(rb_cNeuroError, "eta <= 0");
|
260
|
+
|
261
|
+
output_delta = ALLOCA_N(double, network->output_size);
|
262
|
+
hidden_delta = ALLOCA_N(double, network->hidden_size);
|
263
|
+
for(count = 0; count < network->max_iterations; count++) {
|
264
|
+
feed;
|
265
|
+
|
266
|
+
/* Compute output weight deltas and current error */
|
267
|
+
error = 0.0;
|
268
|
+
for (i = 0; i < network->output_size; i++) {
|
269
|
+
output_delta[i] = network->tmp_output[i] -
|
270
|
+
network->output_layer[i]->output;
|
271
|
+
error += output_delta[i] * output_delta[i];
|
272
|
+
output_delta[i] *= network->output_layer[i]->output *
|
273
|
+
(1.0 - network->output_layer[i]->output);
|
274
|
+
/* diff * (sigmoid' = 2 * output * beta * (1 - output)) */
|
275
|
+
|
276
|
+
}
|
277
|
+
|
278
|
+
if (count % network->debug_step == 0)
|
279
|
+
Network_debug_error(network, count, error, max_error_float);
|
280
|
+
|
281
|
+
/* Get out if error is below max_error ^ 2 */
|
282
|
+
if (error < max_error_float) goto CONVERGED;
|
283
|
+
|
284
|
+
/* Compute hidden weight deltas */
|
285
|
+
|
286
|
+
for (i = 0; i < network->hidden_size; i++) {
|
287
|
+
sum = 0.0;
|
288
|
+
for (j = 0; j < network->output_size; j++)
|
289
|
+
sum += output_delta[j] *
|
290
|
+
network->output_layer[j]->weights[i];
|
291
|
+
hidden_delta[i] = sum * network->hidden_layer[i]->output *
|
292
|
+
(1.0 - network->hidden_layer[i]->output);
|
293
|
+
/* sum * (sigmoid' = 2 * output * beta * (1 - output)) */
|
294
|
+
}
|
295
|
+
|
296
|
+
/* Adjust weights */
|
297
|
+
|
298
|
+
for (i = 0; i < network->output_size; i++)
|
299
|
+
for (j = 0; j < network->hidden_size; j++)
|
300
|
+
network->output_layer[i]->weights[j] +=
|
301
|
+
eta_float * output_delta[i] *
|
302
|
+
network->hidden_layer[j]->output;
|
303
|
+
|
304
|
+
for (i = 0; i < network->hidden_size; i++)
|
305
|
+
for (j = 0; j < network->input_size; j++)
|
306
|
+
network->hidden_layer[i]->weights[j] += eta_float *
|
307
|
+
hidden_delta[i] * network->tmp_input[j];
|
308
|
+
}
|
309
|
+
Network_debug_bail_out(network);
|
310
|
+
CONVERGED:
|
311
|
+
network->learned++;
|
312
|
+
return INT2NUM(count);
|
313
|
+
}
|
314
|
+
|
315
|
+
/*
|
316
|
+
* call-seq: decide(data)
|
317
|
+
*
|
318
|
+
* The network is given the Array _data_ (size has to be == input_size), and it
|
319
|
+
* responds with another Array (size == output_size) by returning it.
|
320
|
+
*/
|
321
|
+
static VALUE rb_network_decide(VALUE self, VALUE data)
|
322
|
+
{
|
323
|
+
Network *network;
|
324
|
+
VALUE result;
|
325
|
+
int i;
|
326
|
+
|
327
|
+
Data_Get_Struct(self, Network, network);
|
328
|
+
|
329
|
+
Check_Type(data, T_ARRAY);
|
330
|
+
if (RARRAY(data)->len != network->input_size)
|
331
|
+
rb_raise(rb_cNeuroError, "size of data != input_size");
|
332
|
+
transform_data(network->tmp_input, data);
|
333
|
+
feed;
|
334
|
+
result = rb_ary_new2(network->output_size);
|
335
|
+
for (i = 0; i < network->output_size; i++) {
|
336
|
+
rb_ary_store(result, i,
|
337
|
+
rb_float_new(network->output_layer[i]->output));
|
338
|
+
}
|
339
|
+
return result;
|
340
|
+
}
|
341
|
+
|
342
|
+
/*
|
343
|
+
* Returns the _input_size_ of this Network as an Integer. This is the number
|
344
|
+
* of weights, that are connected to the input of the hidden layer.
|
345
|
+
*/
|
346
|
+
static VALUE rb_network_input_size(VALUE self)
|
347
|
+
{
|
348
|
+
Network *network;
|
349
|
+
|
350
|
+
Data_Get_Struct(self, Network, network);
|
351
|
+
return INT2NUM(network->input_size);
|
352
|
+
}
|
353
|
+
|
354
|
+
/*
|
355
|
+
* Returns the _hidden_size_ of this Network as an Integer. This is the number of nodes in
|
356
|
+
* the hidden layer.
|
357
|
+
*/
|
358
|
+
static VALUE rb_network_hidden_size(VALUE self)
|
359
|
+
{
|
360
|
+
Network *network;
|
361
|
+
|
362
|
+
Data_Get_Struct(self, Network, network);
|
363
|
+
return INT2NUM(network->hidden_size);
|
364
|
+
}
|
365
|
+
|
366
|
+
/*
|
367
|
+
* Returns the _output_size_ of this Network as an Integer. This is the number
|
368
|
+
* of nodes in the output layer.
|
369
|
+
*/
|
370
|
+
static VALUE rb_network_output_size(VALUE self)
|
371
|
+
{
|
372
|
+
Network *network;
|
373
|
+
|
374
|
+
Data_Get_Struct(self, Network, network);
|
375
|
+
return INT2NUM(network->output_size);
|
376
|
+
}
|
377
|
+
|
378
|
+
/*
|
379
|
+
* Returns the number of calls to #learn as an integer.
|
380
|
+
*/
|
381
|
+
static VALUE rb_network_learned(VALUE self)
|
382
|
+
{
|
383
|
+
Network *network;
|
384
|
+
|
385
|
+
Data_Get_Struct(self, Network, network);
|
386
|
+
return INT2NUM(network->learned);
|
387
|
+
}
|
388
|
+
|
389
|
+
/*
|
390
|
+
* Returns nil, if debugging is switchted off. Returns the IO object, that is
|
391
|
+
* used for debugging output, if debugging is switchted on.
|
392
|
+
*/
|
393
|
+
static VALUE rb_network_debug(VALUE self)
|
394
|
+
{
|
395
|
+
Network *network;
|
396
|
+
|
397
|
+
Data_Get_Struct(self, Network, network);
|
398
|
+
return network->debug;
|
399
|
+
}
|
400
|
+
|
401
|
+
/*
|
402
|
+
* call-seq: debug=(io)
|
403
|
+
*
|
404
|
+
* Switches debugging on, if _io_ is an IO object. If it is nil,
|
405
|
+
* debugging is switched off.
|
406
|
+
*/
|
407
|
+
static VALUE rb_network_debug_set(VALUE self, VALUE io)
|
408
|
+
{
|
409
|
+
Network *network;
|
410
|
+
|
411
|
+
Data_Get_Struct(self, Network, network);
|
412
|
+
network->debug = io;
|
413
|
+
return io;
|
414
|
+
}
|
415
|
+
|
416
|
+
/*
|
417
|
+
* Returns the Integer number of steps, that are done during learning, before a
|
418
|
+
* debugging message is printed to #debug.
|
419
|
+
*/
|
420
|
+
static VALUE rb_network_debug_step(VALUE self)
|
421
|
+
{
|
422
|
+
Network *network;
|
423
|
+
|
424
|
+
Data_Get_Struct(self, Network, network);
|
425
|
+
return INT2NUM(network->debug_step);
|
426
|
+
}
|
427
|
+
|
428
|
+
/*
|
429
|
+
* call-seq: debug_step=(step)
|
430
|
+
*
|
431
|
+
* Sets the number of steps, that are done during learning, before a
|
432
|
+
* debugging message is printed to _step_. If _step_ is equal to or less than 0
|
433
|
+
* the default value (=1000) is set.
|
434
|
+
*/
|
435
|
+
static VALUE rb_network_debug_step_set(VALUE self, VALUE step)
|
436
|
+
{
|
437
|
+
Network *network;
|
438
|
+
|
439
|
+
Data_Get_Struct(self, Network, network);
|
440
|
+
Check_Type(step, T_FIXNUM);
|
441
|
+
network->debug_step = NUM2INT(step);
|
442
|
+
if (network->debug_step <= 0) network->debug_step = DEFAULT_DEBUG_STEP;
|
443
|
+
return step;
|
444
|
+
}
|
445
|
+
|
446
|
+
/*
|
447
|
+
* Returns the maximal number of iterations, that are done before #learn gives
|
448
|
+
* up and returns without having learned the given _data_.
|
449
|
+
*/
|
450
|
+
static VALUE rb_network_max_iterations(VALUE self)
|
451
|
+
{
|
452
|
+
Network *network;
|
453
|
+
|
454
|
+
Data_Get_Struct(self, Network, network);
|
455
|
+
return INT2NUM(network->max_iterations);
|
456
|
+
}
|
457
|
+
|
458
|
+
/*
|
459
|
+
* call-seq: max_iterations=(iterations)
|
460
|
+
*
|
461
|
+
* Sets the maximal number of iterations, that are done before #learn gives
|
462
|
+
* up and returns without having learned the given _data_, to _iterations_.
|
463
|
+
* If _iterations_ is equal to or less than 0, the default value (=10_000) is
|
464
|
+
* set.
|
465
|
+
*/
|
466
|
+
static VALUE rb_network_max_iterations_set(VALUE self, VALUE iterations)
|
467
|
+
{
|
468
|
+
Network *network;
|
469
|
+
|
470
|
+
Data_Get_Struct(self, Network, network);
|
471
|
+
Check_Type(iterations, T_FIXNUM);
|
472
|
+
network->max_iterations = NUM2INT(iterations);
|
473
|
+
if (network->max_iterations <= 0)
|
474
|
+
network->max_iterations = DEFAULT_MAX_ITERATIONS;
|
475
|
+
return iterations;
|
476
|
+
}
|
477
|
+
|
478
|
+
/*
|
479
|
+
* Returns the state of the network as a Hash.
|
480
|
+
*/
|
481
|
+
static VALUE rb_network_to_h(VALUE self)
|
482
|
+
{
|
483
|
+
Network *network;
|
484
|
+
|
485
|
+
Data_Get_Struct(self, Network, network);
|
486
|
+
return Network_to_hash(network);
|
487
|
+
}
|
488
|
+
|
489
|
+
|
490
|
+
/*
|
491
|
+
* Returns a short string for the network.
|
492
|
+
*/
|
493
|
+
static VALUE rb_network_to_s(VALUE self)
|
494
|
+
{
|
495
|
+
Network *network;
|
496
|
+
VALUE argv[5];
|
497
|
+
int argc = 5;
|
498
|
+
|
499
|
+
Data_Get_Struct(self, Network, network);
|
500
|
+
argv[0] = rb_str_new2("#<%s:%u,%u,%u>");
|
501
|
+
argv[1] = rb_funcall(self, id_class, 0, 0);
|
502
|
+
argv[1] = rb_funcall(argv[1], id_name, 0, 0);
|
503
|
+
argv[2] = INT2NUM(network->input_size);
|
504
|
+
argv[3] = INT2NUM(network->hidden_size);
|
505
|
+
argv[4] = INT2NUM(network->output_size);
|
506
|
+
return rb_f_sprintf(argc, argv);
|
507
|
+
}
|
508
|
+
|
509
|
+
/* Allocation and Construction */
|
510
|
+
|
511
|
+
static void rb_network_mark(Network *network)
|
512
|
+
{
|
513
|
+
if (!NIL_P(network->debug)) rb_gc_mark(network->debug);
|
514
|
+
}
|
515
|
+
|
516
|
+
static void rb_network_free(Network *network)
|
517
|
+
{
|
518
|
+
int i;
|
519
|
+
for (i = 0; i < network->hidden_size; i++)
|
520
|
+
Node_destroy(network->hidden_layer[i]);
|
521
|
+
MEMZERO(network->hidden_layer, Node*, network->hidden_size);
|
522
|
+
xfree(network->hidden_layer);
|
523
|
+
for (i = 0; i < network->output_size; i++)
|
524
|
+
Node_destroy(network->output_layer[i]);
|
525
|
+
MEMZERO(network->output_layer, Node*, network->output_size);
|
526
|
+
xfree(network->output_layer);
|
527
|
+
MEMZERO(network->tmp_input, double, network->input_size);
|
528
|
+
xfree(network->tmp_input);
|
529
|
+
MEMZERO(network->tmp_hidden, double, network->hidden_size);
|
530
|
+
xfree(network->tmp_hidden);
|
531
|
+
MEMZERO(network->tmp_output, double, network->output_size);
|
532
|
+
xfree(network->tmp_output);
|
533
|
+
MEMZERO(network, Network, 1);
|
534
|
+
xfree(network);
|
535
|
+
}
|
536
|
+
|
537
|
+
static VALUE rb_network_s_allocate(VALUE klass)
|
538
|
+
{
|
539
|
+
Network *network = Network_allocate();
|
540
|
+
return Data_Wrap_Struct(klass, rb_network_mark, rb_network_free, network);
|
541
|
+
}
|
542
|
+
|
543
|
+
/*
|
544
|
+
* call-seq: new(input_size, hidden_size, output_size)
|
545
|
+
*
|
546
|
+
* Returns a Neuro::Network instance of the given size specification.
|
547
|
+
*/
|
548
|
+
static VALUE rb_network_initialize(int argc, VALUE *argv, VALUE self)
|
549
|
+
{
|
550
|
+
Network *network;
|
551
|
+
VALUE input_size, hidden_size, output_size;
|
552
|
+
|
553
|
+
rb_scan_args(argc, argv, "3", &input_size, &hidden_size, &output_size);
|
554
|
+
Check_Type(input_size, T_FIXNUM);
|
555
|
+
Check_Type(hidden_size, T_FIXNUM);
|
556
|
+
Check_Type(output_size, T_FIXNUM);
|
557
|
+
Data_Get_Struct(self, Network, network);
|
558
|
+
Network_init(network, NUM2INT(input_size), NUM2INT(hidden_size),
|
559
|
+
NUM2INT(output_size), 0);
|
560
|
+
Network_init_weights(network);
|
561
|
+
return self;
|
562
|
+
}
|
563
|
+
|
564
|
+
/*
|
565
|
+
* Returns the serialized data for this Network instance for the Marshal
|
566
|
+
* module.
|
567
|
+
*/
|
568
|
+
static VALUE rb_network_dump(int argc, VALUE *argv, VALUE self)
|
569
|
+
{
|
570
|
+
VALUE port = Qnil, hash;
|
571
|
+
Network *network;
|
572
|
+
|
573
|
+
rb_scan_args(argc, argv, "01", &port);
|
574
|
+
Data_Get_Struct(self, Network, network);
|
575
|
+
hash = Network_to_hash(network);
|
576
|
+
return rb_marshal_dump(hash, port);
|
577
|
+
}
|
578
|
+
|
579
|
+
static VALUE
|
580
|
+
setup_layer_i(VALUE node_hash, VALUE pair_value)
|
581
|
+
{
|
582
|
+
VALUE *pair = (VALUE *) pair_value;
|
583
|
+
Node **layer = (Node **) pair[0];
|
584
|
+
int index = (int) pair[1];
|
585
|
+
Check_Type(node_hash, T_HASH);
|
586
|
+
layer[index] = Node_from_hash(node_hash);
|
587
|
+
pair[1] = (VALUE) 1 + index;
|
588
|
+
return Qnil;
|
589
|
+
}
|
590
|
+
|
591
|
+
/*
|
592
|
+
* call-seq: Neuro::Network.load(string)
|
593
|
+
*
|
594
|
+
* Creates a Network object plus state
|
595
|
+
* from the Marshal dumped string _string_, and returns it.
|
596
|
+
*/
|
597
|
+
static VALUE rb_network_load(VALUE klass, VALUE string)
|
598
|
+
{
|
599
|
+
VALUE input_size, hidden_size, output_size, learned,
|
600
|
+
hidden_layer, output_layer, pair[2];
|
601
|
+
Network *network;
|
602
|
+
VALUE hash = rb_marshal_load(string);
|
603
|
+
input_size = rb_hash_aref(hash, SYM("input_size"));
|
604
|
+
hidden_size = rb_hash_aref(hash, SYM("hidden_size"));
|
605
|
+
output_size = rb_hash_aref(hash, SYM("output_size"));
|
606
|
+
learned = rb_hash_aref(hash, SYM("learned"));
|
607
|
+
Check_Type(input_size, T_FIXNUM);
|
608
|
+
Check_Type(hidden_size, T_FIXNUM);
|
609
|
+
Check_Type(output_size, T_FIXNUM);
|
610
|
+
Check_Type(learned, T_FIXNUM);
|
611
|
+
network = Network_allocate();
|
612
|
+
Network_init(network, NUM2INT(input_size), NUM2INT(hidden_size),
|
613
|
+
NUM2INT(output_size), NUM2INT(learned));
|
614
|
+
hidden_layer = rb_hash_aref(hash, SYM("hidden_layer"));
|
615
|
+
output_layer = rb_hash_aref(hash, SYM("output_layer"));
|
616
|
+
Check_Type(hidden_layer, T_ARRAY);
|
617
|
+
Check_Type(output_layer, T_ARRAY);
|
618
|
+
pair[0] = (VALUE) network->hidden_layer;
|
619
|
+
pair[1] = (VALUE) 0;
|
620
|
+
rb_iterate(rb_each, hidden_layer, setup_layer_i, (VALUE) pair);
|
621
|
+
pair[0] = (VALUE) network->output_layer;
|
622
|
+
pair[1] = (VALUE) 0;
|
623
|
+
rb_iterate(rb_each, output_layer, setup_layer_i, (VALUE) pair);
|
624
|
+
return Data_Wrap_Struct(klass, NULL, rb_network_free, network);
|
625
|
+
}
|
626
|
+
|
627
|
+
/*
|
628
|
+
*
|
629
|
+
* = neuro - Neuronal Network Extension for Ruby
|
630
|
+
*
|
631
|
+
* == Description
|
632
|
+
*
|
633
|
+
* A Ruby extension that provides a 2-Layer Back Propagation Neural Network,
|
634
|
+
* which can be used to categorize datasets of arbitrary size.
|
635
|
+
*
|
636
|
+
* The network can be easily stored to or restored from the hard disk with
|
637
|
+
* the help of Ruby's Marshal facility.
|
638
|
+
*
|
639
|
+
* == Author
|
640
|
+
*
|
641
|
+
* Florian Frank <mailto:flori@ping.de>
|
642
|
+
*
|
643
|
+
* == License
|
644
|
+
*
|
645
|
+
* This is free software; you can redistribute it and/or modify it under
|
646
|
+
* the terms of the GNU General Public License Version 2 as published by
|
647
|
+
* the Free Software Foundation: http://www.gnu.org/copyleft/gpl.html
|
648
|
+
*
|
649
|
+
* == Download
|
650
|
+
*
|
651
|
+
* The latest version of <b>neuro</b> can be found at
|
652
|
+
*
|
653
|
+
* * http://rubyforge.org/frs/?group_id=554
|
654
|
+
*
|
655
|
+
* The homepage should be located at
|
656
|
+
*
|
657
|
+
* * http://neuro.rubyforge.org
|
658
|
+
*
|
659
|
+
* == Examples
|
660
|
+
*
|
661
|
+
* An example for optical character recognition can be found in the examples
|
662
|
+
* subdirectory. Don't forget to check out the tests subdirectory, which
|
663
|
+
* contains some additional examples.
|
664
|
+
*/
|
665
|
+
void Init_neuro()
|
666
|
+
{
|
667
|
+
rb_mNeuro = rb_define_module("Neuro");
|
668
|
+
rb_cNetwork = rb_define_class_under(rb_mNeuro, "Network", rb_cObject);
|
669
|
+
rb_cNeuroError = rb_define_class("NetworkError", rb_eStandardError);
|
670
|
+
rb_define_alloc_func(rb_cNetwork, rb_network_s_allocate);
|
671
|
+
rb_define_method(rb_cNetwork, "initialize", rb_network_initialize, -1);
|
672
|
+
rb_define_method(rb_cNetwork, "learn", rb_network_learn, 4);
|
673
|
+
rb_define_method(rb_cNetwork, "decide", rb_network_decide, 1);
|
674
|
+
rb_define_method(rb_cNetwork, "input_size", rb_network_input_size, 0);
|
675
|
+
rb_define_method(rb_cNetwork, "hidden_size", rb_network_hidden_size, 0);
|
676
|
+
rb_define_method(rb_cNetwork, "output_size", rb_network_output_size, 0);
|
677
|
+
rb_define_method(rb_cNetwork, "learned", rb_network_learned, 0);
|
678
|
+
rb_define_method(rb_cNetwork, "debug", rb_network_debug, 0);
|
679
|
+
rb_define_method(rb_cNetwork, "debug=", rb_network_debug_set, 1);
|
680
|
+
rb_define_method(rb_cNetwork, "debug_step", rb_network_debug_step, 0);
|
681
|
+
rb_define_method(rb_cNetwork, "debug_step=", rb_network_debug_step_set, 1);
|
682
|
+
rb_define_method(rb_cNetwork, "max_iterations", rb_network_max_iterations, 0);
|
683
|
+
rb_define_method(rb_cNetwork, "max_iterations=", rb_network_max_iterations_set, 1);
|
684
|
+
rb_define_method(rb_cNetwork, "_dump", rb_network_dump, -1);
|
685
|
+
rb_define_method(rb_cNetwork, "dump", rb_network_dump, -1);
|
686
|
+
rb_define_method(rb_cNetwork, "to_h", rb_network_to_h, 0);
|
687
|
+
rb_define_method(rb_cNetwork, "to_s", rb_network_to_s, 0);
|
688
|
+
rb_define_singleton_method(rb_cNetwork, "_load", rb_network_load, 1);
|
689
|
+
rb_define_singleton_method(rb_cNetwork, "load", rb_network_load, 1);
|
690
|
+
id_to_f = rb_intern("to_f");
|
691
|
+
id_class = rb_intern("class");
|
692
|
+
id_name = rb_intern("name");
|
693
|
+
}
|
694
|
+
/* vim: set cin sw=4 ts=4: */
|