newral 0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +278 -0
- data/Rakefile +10 -0
- data/lib/newral.rb +53 -0
- data/lib/newral/bayes.rb +39 -0
- data/lib/newral/classifier/dendogram.rb +68 -0
- data/lib/newral/classifier/k_means_cluster.rb +45 -0
- data/lib/newral/classifier/node.rb +58 -0
- data/lib/newral/classifier/node_distance.rb +19 -0
- data/lib/newral/data/base.rb +153 -0
- data/lib/newral/data/cluster.rb +37 -0
- data/lib/newral/data/cluster_set.rb +38 -0
- data/lib/newral/data/csv.rb +23 -0
- data/lib/newral/data/idx.rb +48 -0
- data/lib/newral/error_calculation.rb +28 -0
- data/lib/newral/functions/base.rb +102 -0
- data/lib/newral/functions/block.rb +34 -0
- data/lib/newral/functions/gaussian.rb +41 -0
- data/lib/newral/functions/line.rb +52 -0
- data/lib/newral/functions/polynomial.rb +48 -0
- data/lib/newral/functions/radial_basis_function_network.rb +54 -0
- data/lib/newral/functions/ricker_wavelet.rb +13 -0
- data/lib/newral/functions/vector.rb +59 -0
- data/lib/newral/genetic/tree.rb +70 -0
- data/lib/newral/graphs/a_star.rb +12 -0
- data/lib/newral/graphs/cheapest_first.rb +11 -0
- data/lib/newral/graphs/edge.rb +24 -0
- data/lib/newral/graphs/graph.rb +63 -0
- data/lib/newral/graphs/node.rb +11 -0
- data/lib/newral/graphs/path.rb +50 -0
- data/lib/newral/graphs/tree_search.rb +60 -0
- data/lib/newral/networks/backpropagation_network.rb +68 -0
- data/lib/newral/networks/layer.rb +28 -0
- data/lib/newral/networks/network.rb +146 -0
- data/lib/newral/networks/perceptron.rb +84 -0
- data/lib/newral/networks/sigmoid.rb +55 -0
- data/lib/newral/probability.rb +42 -0
- data/lib/newral/probability_set.rb +108 -0
- data/lib/newral/q_learning/base.rb +90 -0
- data/lib/newral/tools.rb +135 -0
- data/lib/newral/training/gradient_descent.rb +36 -0
- data/lib/newral/training/greedy.rb +36 -0
- data/lib/newral/training/hill_climbing.rb +77 -0
- data/lib/newral/training/linear_regression.rb +30 -0
- data/lib/newral/training/linear_regression_matrix.rb +32 -0
- metadata +147 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
module Newral
|
2
|
+
|
3
|
+
module Classifier
|
4
|
+
|
5
|
+
class KMeansCluster
|
6
|
+
|
7
|
+
# input array of points, cluster_labels: how many clusters to find, max_iterations stop after x approximations
|
8
|
+
# output hash of clusters where has keys are cluster_labels and value is points(Array) and center(point)
|
9
|
+
def initialize( points, cluster_labels: [:a,:b], max_iterations: 20 )
|
10
|
+
@points = points
|
11
|
+
@cluster_labels = cluster_labels
|
12
|
+
@max_iterations = max_iterations
|
13
|
+
end
|
14
|
+
|
15
|
+
def process
|
16
|
+
@cluster_set = Newral::Data::ClusterSet.new( cluster_labels: @cluster_labels )
|
17
|
+
runs = 0
|
18
|
+
@points.sample( @cluster_set.cluster_array.length ).each_with_index do |point,idx|
|
19
|
+
@cluster_set.cluster_array[ idx ].center = point
|
20
|
+
end
|
21
|
+
|
22
|
+
while @cluster_set.cluster_array.collect{ |cluster| cluster.moved }.member?( true ) && runs < @max_iterations
|
23
|
+
@cluster_set.clusters.each do |key,cluster|
|
24
|
+
cluster.points=[]
|
25
|
+
end
|
26
|
+
|
27
|
+
# iterate over points assign, best cluster
|
28
|
+
@points.each do |point|
|
29
|
+
min_distance = { cluster:'none', distance: 99**99 }
|
30
|
+
@cluster_set.clusters.each do |key,cluster|
|
31
|
+
distance = Newral::Tools::euclidian_distance( cluster.center, point )
|
32
|
+
min_distance = {cluster: cluster, distance: distance } if distance < min_distance[:distance]
|
33
|
+
end
|
34
|
+
min_distance[:cluster].add_point point
|
35
|
+
end
|
36
|
+
@cluster_set.update_centers
|
37
|
+
runs=runs+1
|
38
|
+
end
|
39
|
+
@cluster_set
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Newral
|
2
|
+
|
3
|
+
module Classifier
|
4
|
+
|
5
|
+
|
6
|
+
class Node
|
7
|
+
|
8
|
+
attr_writer :parent_node
|
9
|
+
attr_accessor :center
|
10
|
+
attr_reader :sub_nodes
|
11
|
+
def initialize( sub_nodes, from_point: false )
|
12
|
+
if from_point
|
13
|
+
@sub_nodes = [Vector.elements( sub_nodes )]
|
14
|
+
@center = Vector.elements sub_nodes
|
15
|
+
else
|
16
|
+
@sub_nodes = sub_nodes
|
17
|
+
@center = Vector.elements( [0]*sub_nodes.first.center.size )
|
18
|
+
sub_nodes.each do |node|
|
19
|
+
@center = @center + node.center
|
20
|
+
end
|
21
|
+
@center = @center/@sub_nodes.size.to_f
|
22
|
+
@sub_nodes.each do |node|
|
23
|
+
node.parent_node = self
|
24
|
+
end
|
25
|
+
end
|
26
|
+
@parent_node = nil
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
if @sub_nodes.size == 1
|
32
|
+
@sub_nodes.first.to_s
|
33
|
+
else
|
34
|
+
"=>(#{@sub_nodes.collect{|node| node.to_s }.join(',')})"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def flatten_points
|
39
|
+
@sub_nodes.collect do |node|
|
40
|
+
if !node.kind_of?( Node )
|
41
|
+
[node]
|
42
|
+
elsif node.sub_nodes.size == 1
|
43
|
+
node.center
|
44
|
+
else
|
45
|
+
node.flatten_points
|
46
|
+
end
|
47
|
+
end.flatten
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_cluster
|
51
|
+
points = flatten_points
|
52
|
+
Data::Cluster.new( points: points.collect{|p| p.to_a } )
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Newral
|
2
|
+
|
3
|
+
module Classifier
|
4
|
+
class NodeDistance
|
5
|
+
attr_reader :node1, :node2, :distance
|
6
|
+
def initialize( node1, node2 )
|
7
|
+
@node1 = node1
|
8
|
+
@node2 = node2
|
9
|
+
@distance = Newral::Tools.euclidian_distance( node1.center, node2.center )
|
10
|
+
end
|
11
|
+
|
12
|
+
def <=>( other )
|
13
|
+
self.distance <=> other.distance
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
module Errors
|
4
|
+
class InputOutputSizeMismatch < ::StandardError; end
|
5
|
+
class WrongPointDimension < ::StandardError; end
|
6
|
+
class UnknownValue < ::StandardError; end
|
7
|
+
class UnknownSet < ::StandardError; end
|
8
|
+
class UnknownCategory < ::StandardError; end
|
9
|
+
class DownSamplingImpossible < ::StandardError; end
|
10
|
+
end
|
11
|
+
|
12
|
+
class Base
|
13
|
+
attr_accessor :outputs, :labels, :inputs
|
14
|
+
def initialize( inputs: [], outputs: [], labels:[] )
|
15
|
+
@inputs = inputs
|
16
|
+
@outputs = outputs
|
17
|
+
@labels = labels
|
18
|
+
raise Errors::InputOutputSizeMismatch unless @inputs.size == @outputs.size
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_input( input, output: nil, label:nil )
|
22
|
+
@labels << label
|
23
|
+
@outputs << output
|
24
|
+
@inputs << input
|
25
|
+
end
|
26
|
+
|
27
|
+
def sub_set( set: :inputs, category: :all )
|
28
|
+
data = case set
|
29
|
+
when :inputs then @inputs
|
30
|
+
when :outputs then @outputs
|
31
|
+
else
|
32
|
+
raise Errors::UnknownSet
|
33
|
+
end
|
34
|
+
|
35
|
+
case category
|
36
|
+
when :all then data
|
37
|
+
when :training then data[0..(data.size.to_f*0.7).to_i]
|
38
|
+
when :validation then data[(data.size.to_f*0.7).to_i+1..(data.size.to_f*0.8).to_i ]
|
39
|
+
when :testing then data[(data.size.to_f*0.8).to_i+1,data.size ]
|
40
|
+
else
|
41
|
+
raise Errors::UnknownCategory, category.to_s
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def values_for( searched_value, only_first: false, return_objects: [], search_objects: [] )
|
46
|
+
results = []
|
47
|
+
search_objects.each_with_index do |each_value,idx|
|
48
|
+
if only_first
|
49
|
+
return return_objects[idx] if searched_value == each_value || [searched_value] == each_value
|
50
|
+
else
|
51
|
+
results << return_objects[idx] if searched_value == each_value || [searched_value] == each_value
|
52
|
+
end
|
53
|
+
end
|
54
|
+
results unless only_first
|
55
|
+
end
|
56
|
+
|
57
|
+
def output_for_input( input )
|
58
|
+
values_for input, search_objects: @inputs, return_objects: @outputs, only_first: true
|
59
|
+
end
|
60
|
+
|
61
|
+
def label_for_input( input )
|
62
|
+
values_for input, search_objects: @inputs, return_objects: @labels, only_first: true
|
63
|
+
end
|
64
|
+
|
65
|
+
def inputs_for_output( output )
|
66
|
+
values_for output, search_objects: @outputs, return_objects: @inputs
|
67
|
+
end
|
68
|
+
|
69
|
+
def normalized_inputs(normalized_high: 1, normalized_low:-1 )
|
70
|
+
return [] if @inputs.size == 0 || !@inputs.first.kind_of?( Array )
|
71
|
+
max_values = [Float::MIN]*@inputs.first.size
|
72
|
+
min_values = [Float::MAX]*@inputs.first.size
|
73
|
+
@inputs.each do |input|
|
74
|
+
input.each_with_index do |value,idx|
|
75
|
+
max_values[idx] = value.to_f if value > max_values[idx]
|
76
|
+
min_values[idx] = value.to_f if value < min_values[idx]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
@inputs.collect do |input|
|
80
|
+
row = [0]*input.size
|
81
|
+
input.each_with_index do |value,idx|
|
82
|
+
row[idx] = (value-min_values[idx])/(max_values[idx]-min_values[idx]).to_f*(normalized_high-normalized_low)+normalized_low
|
83
|
+
end
|
84
|
+
row
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def output_hash( normalized_high: 1, normalized_low:-1 )
|
89
|
+
@output_hash = @outputs.inject({}) do |hash,output|
|
90
|
+
hash[output] = ( hash[output] || 0 )+1
|
91
|
+
hash
|
92
|
+
end
|
93
|
+
new_hash = {}
|
94
|
+
@output_hash.keys.sort.each_with_index do |key,idx|
|
95
|
+
new_hash[ key ] = normalized_low+((normalized_high.to_f-normalized_low)*idx)/( [@output_hash.keys.length-1,1].max )
|
96
|
+
end
|
97
|
+
@output_hash = new_hash
|
98
|
+
end
|
99
|
+
|
100
|
+
def output_normalized
|
101
|
+
hash = output_hash
|
102
|
+
@outputs.collect{ |k| hash[k]}
|
103
|
+
end
|
104
|
+
|
105
|
+
# this will make it easier to use outputs for neura networks
|
106
|
+
# as it translates them to vectors like [1,0,0]
|
107
|
+
# if you have 3 possible outputs this will return [1,0,0],[0,1,0],[0,0,1]
|
108
|
+
def output_as_vector( category: :all )
|
109
|
+
hash = output_hash
|
110
|
+
sub_set( set: :outputs, category: category ).collect do |k|
|
111
|
+
vector = [0]*output_hash.keys.size
|
112
|
+
vector[ output_hash.keys.index( k ) ] = 1 # output_hash.keys
|
113
|
+
vector
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def count_outputs
|
118
|
+
output_hash = {}
|
119
|
+
@outputs.each do |output|
|
120
|
+
output_key = output.size == 1 ? output.first.to_s.to_sym : output.join('-')
|
121
|
+
output_hash[output_key] = (output_hash[output_key] || 0) + 1
|
122
|
+
end
|
123
|
+
output_hash
|
124
|
+
end
|
125
|
+
|
126
|
+
def sample( offset:0,limit:100 )
|
127
|
+
Base.new( inputs: @inputs[offset..limit+offset] , outputs: @outputs[offset..limit+offset] )
|
128
|
+
end
|
129
|
+
|
130
|
+
def downsample_input!( height:1, width: 1, width_of_line: nil )
|
131
|
+
raise DownSamplingImpossible unless @inputs.first.size % ( width*height ) == 0
|
132
|
+
total_height = @inputs.first.size/width_of_line
|
133
|
+
|
134
|
+
@inputs.collect! do |input|
|
135
|
+
downsampled = []
|
136
|
+
pos = 0
|
137
|
+
while pos < input.size do
|
138
|
+
matrix = []
|
139
|
+
height.times do |h|
|
140
|
+
start_pos = pos+(width_of_line*h)
|
141
|
+
end_pos = pos+width+(width_of_line)*h-1
|
142
|
+
matrix = matrix+input[start_pos..end_pos]
|
143
|
+
end
|
144
|
+
downsampled << ( matrix.inject(0){|sum,e| sum+e }/matrix.length.to_f > 0.5 ? 1 : 0 )
|
145
|
+
pos = pos+width
|
146
|
+
pos=pos+width_of_line*(height-1) if (pos%width_of_line) == 0
|
147
|
+
end
|
148
|
+
downsampled
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
class Cluster
|
4
|
+
attr_accessor :label, :points, :center, :moved
|
5
|
+
def initialize( label: nil, points: [], center: nil, moved: true )
|
6
|
+
@label = label
|
7
|
+
@points = points
|
8
|
+
@point_size = points.size > 0 ? points.first.size : 1
|
9
|
+
@moved = moved # did center move when updating it
|
10
|
+
@center = center
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_point( point )
|
14
|
+
if @points.size == 0
|
15
|
+
@point_size = point.size
|
16
|
+
@center ||= point
|
17
|
+
else
|
18
|
+
# all points must be of same dimension
|
19
|
+
raise Errors::WrongPointDimension unless point.size == @point_size
|
20
|
+
end
|
21
|
+
@points << point
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
def update_center
|
26
|
+
return unless @points.size > 0
|
27
|
+
new_center = Vector.elements( [0]*points.first.size )
|
28
|
+
@points.each do |point|
|
29
|
+
new_center = new_center + Vector.elements( point )
|
30
|
+
end
|
31
|
+
new_center = ( new_center/@points.size.to_f).to_a
|
32
|
+
@moved = new_center != @center
|
33
|
+
@center = new_center
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
class ClusterSet
|
4
|
+
attr_accessor :clusters
|
5
|
+
def initialize( cluster_labels: [], clusters: nil )
|
6
|
+
if clusters
|
7
|
+
idx = 0
|
8
|
+
@clusters = clusters.inject({}){ |h,cluster| cluster.label = "cluster_#{ idx }";h[cluster.label] = cluster;idx=idx+1; h }
|
9
|
+
else
|
10
|
+
@clusters = cluster_labels.inject({}){ |h,label| h[label] = Cluster.new(label: label); h }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def []( label )
|
15
|
+
label.kind_of?(String) || label.kind_of?(Symbol) ? @clusters[ label ] : cluster_array[ label ]
|
16
|
+
end
|
17
|
+
|
18
|
+
def cluster_array
|
19
|
+
@clusters.values
|
20
|
+
end
|
21
|
+
|
22
|
+
def update_centers
|
23
|
+
@clusters.each do |key,cluster|
|
24
|
+
cluster.update_center
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def clusters_count
|
29
|
+
@clusters.inject({}) do |h,cluster|
|
30
|
+
h[cluster[0]] = cluster[1].points.size
|
31
|
+
h
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
require "CSV"
|
4
|
+
require "open-uri"
|
5
|
+
class Csv < Base
|
6
|
+
def initialize( file_name: nil, output_fields: 1 )
|
7
|
+
@file_name = file_name
|
8
|
+
@output_fields = output_fields
|
9
|
+
super( inputs: [], outputs: [])
|
10
|
+
end
|
11
|
+
|
12
|
+
def process
|
13
|
+
open( @file_name ) do |file|
|
14
|
+
file.each_line do |line|
|
15
|
+
input = CSV.parse_line( line ).collect{ |field| field.match(/^\d*\.?\d+$/) ? field.to_f : field }
|
16
|
+
add_input( input.slice(0,input.size-1-@output_fields), output: input.slice(input.size-@output_fields, input.size ))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
require "open-uri"
|
4
|
+
module Errors
|
5
|
+
class UnexpectedEOF < StandardError; end
|
6
|
+
class EOFExpected < StandardError; end
|
7
|
+
class LabelsNotMatchingItems < StandardError; end
|
8
|
+
end
|
9
|
+
|
10
|
+
class Idx < Base
|
11
|
+
# http://yann.lecun.com/exdb/mnist/
|
12
|
+
# used for Handwritten Images
|
13
|
+
def initialize( file_name: nil, label_file_name: nil )
|
14
|
+
@file_name = file_name
|
15
|
+
@label_file_name = label_file_name
|
16
|
+
super( inputs: [], outputs: [])
|
17
|
+
end
|
18
|
+
|
19
|
+
def process
|
20
|
+
number_of_items = 0
|
21
|
+
open( @file_name, 'rb' ) do |file|
|
22
|
+
magic,number_of_items = file.read(8).unpack("NN")
|
23
|
+
width,height = file.read(8).unpack("NN")
|
24
|
+
number_of_items.times do
|
25
|
+
raise Errors::UnexpectedEOF if file.eof?
|
26
|
+
image = file.read(width*height).unpack("C"*width*height)
|
27
|
+
@inputs << image
|
28
|
+
end
|
29
|
+
raise Errors::EOFExpected unless file.eof?
|
30
|
+
end
|
31
|
+
|
32
|
+
open( @label_file_name, 'rb' ) do |file|
|
33
|
+
magic,number_of_labels = file.read(8).unpack("NN")
|
34
|
+
raise Errors::LabelsNotMatchingItems unless number_of_labels==number_of_items
|
35
|
+
number_of_items.times do
|
36
|
+
raise Errors::UnexpectedEOF,"#{ @outputs.size } vs. #{ number_of_labels }" if file.eof?
|
37
|
+
label = file.read(1).unpack("c").first
|
38
|
+
@outputs << label
|
39
|
+
end
|
40
|
+
raise Errors::EOFExpected,"#{ @outputs.size } #{file.read.size}" unless file.eof?
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end#
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Newral
|
2
|
+
module ErrorCalculation
|
3
|
+
class DimensionsNotMatching < StandardError ; end
|
4
|
+
def self.sum_of_squares( results, expected )
|
5
|
+
sum = 0
|
6
|
+
raise DimensionsNotMatching, "results: #{ results.size } expected: #{ expected.size }" unless expected.size == results.size
|
7
|
+
results.each_with_index do |result,idx|
|
8
|
+
Array(result).each_with_index do |r,r_idx|
|
9
|
+
exp = Array(expected[idx])
|
10
|
+
sum = sum+(r-exp[r_idx])**2
|
11
|
+
end
|
12
|
+
end
|
13
|
+
sum
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.root_mean_square( results, expected )
|
17
|
+
sum = sum_of_squares( results, expected )
|
18
|
+
(sum.to_f/results.size)**0.5
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.mean_square( results, expected )
|
22
|
+
sum = sum_of_squares( results, expected )
|
23
|
+
sum.to_f/results.size
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|