newral 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +278 -0
- data/Rakefile +10 -0
- data/lib/newral.rb +53 -0
- data/lib/newral/bayes.rb +39 -0
- data/lib/newral/classifier/dendogram.rb +68 -0
- data/lib/newral/classifier/k_means_cluster.rb +45 -0
- data/lib/newral/classifier/node.rb +58 -0
- data/lib/newral/classifier/node_distance.rb +19 -0
- data/lib/newral/data/base.rb +153 -0
- data/lib/newral/data/cluster.rb +37 -0
- data/lib/newral/data/cluster_set.rb +38 -0
- data/lib/newral/data/csv.rb +23 -0
- data/lib/newral/data/idx.rb +48 -0
- data/lib/newral/error_calculation.rb +28 -0
- data/lib/newral/functions/base.rb +102 -0
- data/lib/newral/functions/block.rb +34 -0
- data/lib/newral/functions/gaussian.rb +41 -0
- data/lib/newral/functions/line.rb +52 -0
- data/lib/newral/functions/polynomial.rb +48 -0
- data/lib/newral/functions/radial_basis_function_network.rb +54 -0
- data/lib/newral/functions/ricker_wavelet.rb +13 -0
- data/lib/newral/functions/vector.rb +59 -0
- data/lib/newral/genetic/tree.rb +70 -0
- data/lib/newral/graphs/a_star.rb +12 -0
- data/lib/newral/graphs/cheapest_first.rb +11 -0
- data/lib/newral/graphs/edge.rb +24 -0
- data/lib/newral/graphs/graph.rb +63 -0
- data/lib/newral/graphs/node.rb +11 -0
- data/lib/newral/graphs/path.rb +50 -0
- data/lib/newral/graphs/tree_search.rb +60 -0
- data/lib/newral/networks/backpropagation_network.rb +68 -0
- data/lib/newral/networks/layer.rb +28 -0
- data/lib/newral/networks/network.rb +146 -0
- data/lib/newral/networks/perceptron.rb +84 -0
- data/lib/newral/networks/sigmoid.rb +55 -0
- data/lib/newral/probability.rb +42 -0
- data/lib/newral/probability_set.rb +108 -0
- data/lib/newral/q_learning/base.rb +90 -0
- data/lib/newral/tools.rb +135 -0
- data/lib/newral/training/gradient_descent.rb +36 -0
- data/lib/newral/training/greedy.rb +36 -0
- data/lib/newral/training/hill_climbing.rb +77 -0
- data/lib/newral/training/linear_regression.rb +30 -0
- data/lib/newral/training/linear_regression_matrix.rb +32 -0
- metadata +147 -0
@@ -0,0 +1,45 @@
|
|
1
|
+
module Newral
|
2
|
+
|
3
|
+
module Classifier
|
4
|
+
|
5
|
+
class KMeansCluster
|
6
|
+
|
7
|
+
# input array of points, cluster_labels: how many clusters to find, max_iterations stop after x approximations
|
8
|
+
# output hash of clusters where has keys are cluster_labels and value is points(Array) and center(point)
|
9
|
+
def initialize( points, cluster_labels: [:a,:b], max_iterations: 20 )
|
10
|
+
@points = points
|
11
|
+
@cluster_labels = cluster_labels
|
12
|
+
@max_iterations = max_iterations
|
13
|
+
end
|
14
|
+
|
15
|
+
def process
|
16
|
+
@cluster_set = Newral::Data::ClusterSet.new( cluster_labels: @cluster_labels )
|
17
|
+
runs = 0
|
18
|
+
@points.sample( @cluster_set.cluster_array.length ).each_with_index do |point,idx|
|
19
|
+
@cluster_set.cluster_array[ idx ].center = point
|
20
|
+
end
|
21
|
+
|
22
|
+
while @cluster_set.cluster_array.collect{ |cluster| cluster.moved }.member?( true ) && runs < @max_iterations
|
23
|
+
@cluster_set.clusters.each do |key,cluster|
|
24
|
+
cluster.points=[]
|
25
|
+
end
|
26
|
+
|
27
|
+
# iterate over points assign, best cluster
|
28
|
+
@points.each do |point|
|
29
|
+
min_distance = { cluster:'none', distance: 99**99 }
|
30
|
+
@cluster_set.clusters.each do |key,cluster|
|
31
|
+
distance = Newral::Tools::euclidian_distance( cluster.center, point )
|
32
|
+
min_distance = {cluster: cluster, distance: distance } if distance < min_distance[:distance]
|
33
|
+
end
|
34
|
+
min_distance[:cluster].add_point point
|
35
|
+
end
|
36
|
+
@cluster_set.update_centers
|
37
|
+
runs=runs+1
|
38
|
+
end
|
39
|
+
@cluster_set
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Newral
|
2
|
+
|
3
|
+
module Classifier
|
4
|
+
|
5
|
+
|
6
|
+
class Node
|
7
|
+
|
8
|
+
attr_writer :parent_node
|
9
|
+
attr_accessor :center
|
10
|
+
attr_reader :sub_nodes
|
11
|
+
def initialize( sub_nodes, from_point: false )
|
12
|
+
if from_point
|
13
|
+
@sub_nodes = [Vector.elements( sub_nodes )]
|
14
|
+
@center = Vector.elements sub_nodes
|
15
|
+
else
|
16
|
+
@sub_nodes = sub_nodes
|
17
|
+
@center = Vector.elements( [0]*sub_nodes.first.center.size )
|
18
|
+
sub_nodes.each do |node|
|
19
|
+
@center = @center + node.center
|
20
|
+
end
|
21
|
+
@center = @center/@sub_nodes.size.to_f
|
22
|
+
@sub_nodes.each do |node|
|
23
|
+
node.parent_node = self
|
24
|
+
end
|
25
|
+
end
|
26
|
+
@parent_node = nil
|
27
|
+
|
28
|
+
end
|
29
|
+
|
30
|
+
def to_s
|
31
|
+
if @sub_nodes.size == 1
|
32
|
+
@sub_nodes.first.to_s
|
33
|
+
else
|
34
|
+
"=>(#{@sub_nodes.collect{|node| node.to_s }.join(',')})"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def flatten_points
|
39
|
+
@sub_nodes.collect do |node|
|
40
|
+
if !node.kind_of?( Node )
|
41
|
+
[node]
|
42
|
+
elsif node.sub_nodes.size == 1
|
43
|
+
node.center
|
44
|
+
else
|
45
|
+
node.flatten_points
|
46
|
+
end
|
47
|
+
end.flatten
|
48
|
+
end
|
49
|
+
|
50
|
+
def to_cluster
|
51
|
+
points = flatten_points
|
52
|
+
Data::Cluster.new( points: points.collect{|p| p.to_a } )
|
53
|
+
end
|
54
|
+
|
55
|
+
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Newral
|
2
|
+
|
3
|
+
module Classifier
|
4
|
+
class NodeDistance
|
5
|
+
attr_reader :node1, :node2, :distance
|
6
|
+
def initialize( node1, node2 )
|
7
|
+
@node1 = node1
|
8
|
+
@node2 = node2
|
9
|
+
@distance = Newral::Tools.euclidian_distance( node1.center, node2.center )
|
10
|
+
end
|
11
|
+
|
12
|
+
def <=>( other )
|
13
|
+
self.distance <=> other.distance
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,153 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
module Errors
|
4
|
+
class InputOutputSizeMismatch < ::StandardError; end
|
5
|
+
class WrongPointDimension < ::StandardError; end
|
6
|
+
class UnknownValue < ::StandardError; end
|
7
|
+
class UnknownSet < ::StandardError; end
|
8
|
+
class UnknownCategory < ::StandardError; end
|
9
|
+
class DownSamplingImpossible < ::StandardError; end
|
10
|
+
end
|
11
|
+
|
12
|
+
class Base
|
13
|
+
attr_accessor :outputs, :labels, :inputs
|
14
|
+
def initialize( inputs: [], outputs: [], labels:[] )
|
15
|
+
@inputs = inputs
|
16
|
+
@outputs = outputs
|
17
|
+
@labels = labels
|
18
|
+
raise Errors::InputOutputSizeMismatch unless @inputs.size == @outputs.size
|
19
|
+
end
|
20
|
+
|
21
|
+
def add_input( input, output: nil, label:nil )
|
22
|
+
@labels << label
|
23
|
+
@outputs << output
|
24
|
+
@inputs << input
|
25
|
+
end
|
26
|
+
|
27
|
+
def sub_set( set: :inputs, category: :all )
|
28
|
+
data = case set
|
29
|
+
when :inputs then @inputs
|
30
|
+
when :outputs then @outputs
|
31
|
+
else
|
32
|
+
raise Errors::UnknownSet
|
33
|
+
end
|
34
|
+
|
35
|
+
case category
|
36
|
+
when :all then data
|
37
|
+
when :training then data[0..(data.size.to_f*0.7).to_i]
|
38
|
+
when :validation then data[(data.size.to_f*0.7).to_i+1..(data.size.to_f*0.8).to_i ]
|
39
|
+
when :testing then data[(data.size.to_f*0.8).to_i+1,data.size ]
|
40
|
+
else
|
41
|
+
raise Errors::UnknownCategory, category.to_s
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def values_for( searched_value, only_first: false, return_objects: [], search_objects: [] )
|
46
|
+
results = []
|
47
|
+
search_objects.each_with_index do |each_value,idx|
|
48
|
+
if only_first
|
49
|
+
return return_objects[idx] if searched_value == each_value || [searched_value] == each_value
|
50
|
+
else
|
51
|
+
results << return_objects[idx] if searched_value == each_value || [searched_value] == each_value
|
52
|
+
end
|
53
|
+
end
|
54
|
+
results unless only_first
|
55
|
+
end
|
56
|
+
|
57
|
+
def output_for_input( input )
|
58
|
+
values_for input, search_objects: @inputs, return_objects: @outputs, only_first: true
|
59
|
+
end
|
60
|
+
|
61
|
+
def label_for_input( input )
|
62
|
+
values_for input, search_objects: @inputs, return_objects: @labels, only_first: true
|
63
|
+
end
|
64
|
+
|
65
|
+
def inputs_for_output( output )
|
66
|
+
values_for output, search_objects: @outputs, return_objects: @inputs
|
67
|
+
end
|
68
|
+
|
69
|
+
def normalized_inputs(normalized_high: 1, normalized_low:-1 )
|
70
|
+
return [] if @inputs.size == 0 || !@inputs.first.kind_of?( Array )
|
71
|
+
max_values = [Float::MIN]*@inputs.first.size
|
72
|
+
min_values = [Float::MAX]*@inputs.first.size
|
73
|
+
@inputs.each do |input|
|
74
|
+
input.each_with_index do |value,idx|
|
75
|
+
max_values[idx] = value.to_f if value > max_values[idx]
|
76
|
+
min_values[idx] = value.to_f if value < min_values[idx]
|
77
|
+
end
|
78
|
+
end
|
79
|
+
@inputs.collect do |input|
|
80
|
+
row = [0]*input.size
|
81
|
+
input.each_with_index do |value,idx|
|
82
|
+
row[idx] = (value-min_values[idx])/(max_values[idx]-min_values[idx]).to_f*(normalized_high-normalized_low)+normalized_low
|
83
|
+
end
|
84
|
+
row
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def output_hash( normalized_high: 1, normalized_low:-1 )
|
89
|
+
@output_hash = @outputs.inject({}) do |hash,output|
|
90
|
+
hash[output] = ( hash[output] || 0 )+1
|
91
|
+
hash
|
92
|
+
end
|
93
|
+
new_hash = {}
|
94
|
+
@output_hash.keys.sort.each_with_index do |key,idx|
|
95
|
+
new_hash[ key ] = normalized_low+((normalized_high.to_f-normalized_low)*idx)/( [@output_hash.keys.length-1,1].max )
|
96
|
+
end
|
97
|
+
@output_hash = new_hash
|
98
|
+
end
|
99
|
+
|
100
|
+
def output_normalized
|
101
|
+
hash = output_hash
|
102
|
+
@outputs.collect{ |k| hash[k]}
|
103
|
+
end
|
104
|
+
|
105
|
+
# this will make it easier to use outputs for neura networks
|
106
|
+
# as it translates them to vectors like [1,0,0]
|
107
|
+
# if you have 3 possible outputs this will return [1,0,0],[0,1,0],[0,0,1]
|
108
|
+
def output_as_vector( category: :all )
|
109
|
+
hash = output_hash
|
110
|
+
sub_set( set: :outputs, category: category ).collect do |k|
|
111
|
+
vector = [0]*output_hash.keys.size
|
112
|
+
vector[ output_hash.keys.index( k ) ] = 1 # output_hash.keys
|
113
|
+
vector
|
114
|
+
end
|
115
|
+
end
|
116
|
+
|
117
|
+
def count_outputs
|
118
|
+
output_hash = {}
|
119
|
+
@outputs.each do |output|
|
120
|
+
output_key = output.size == 1 ? output.first.to_s.to_sym : output.join('-')
|
121
|
+
output_hash[output_key] = (output_hash[output_key] || 0) + 1
|
122
|
+
end
|
123
|
+
output_hash
|
124
|
+
end
|
125
|
+
|
126
|
+
def sample( offset:0,limit:100 )
|
127
|
+
Base.new( inputs: @inputs[offset..limit+offset] , outputs: @outputs[offset..limit+offset] )
|
128
|
+
end
|
129
|
+
|
130
|
+
def downsample_input!( height:1, width: 1, width_of_line: nil )
|
131
|
+
raise DownSamplingImpossible unless @inputs.first.size % ( width*height ) == 0
|
132
|
+
total_height = @inputs.first.size/width_of_line
|
133
|
+
|
134
|
+
@inputs.collect! do |input|
|
135
|
+
downsampled = []
|
136
|
+
pos = 0
|
137
|
+
while pos < input.size do
|
138
|
+
matrix = []
|
139
|
+
height.times do |h|
|
140
|
+
start_pos = pos+(width_of_line*h)
|
141
|
+
end_pos = pos+width+(width_of_line)*h-1
|
142
|
+
matrix = matrix+input[start_pos..end_pos]
|
143
|
+
end
|
144
|
+
downsampled << ( matrix.inject(0){|sum,e| sum+e }/matrix.length.to_f > 0.5 ? 1 : 0 )
|
145
|
+
pos = pos+width
|
146
|
+
pos=pos+width_of_line*(height-1) if (pos%width_of_line) == 0
|
147
|
+
end
|
148
|
+
downsampled
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
class Cluster
|
4
|
+
attr_accessor :label, :points, :center, :moved
|
5
|
+
def initialize( label: nil, points: [], center: nil, moved: true )
|
6
|
+
@label = label
|
7
|
+
@points = points
|
8
|
+
@point_size = points.size > 0 ? points.first.size : 1
|
9
|
+
@moved = moved # did center move when updating it
|
10
|
+
@center = center
|
11
|
+
end
|
12
|
+
|
13
|
+
def add_point( point )
|
14
|
+
if @points.size == 0
|
15
|
+
@point_size = point.size
|
16
|
+
@center ||= point
|
17
|
+
else
|
18
|
+
# all points must be of same dimension
|
19
|
+
raise Errors::WrongPointDimension unless point.size == @point_size
|
20
|
+
end
|
21
|
+
@points << point
|
22
|
+
self
|
23
|
+
end
|
24
|
+
|
25
|
+
def update_center
|
26
|
+
return unless @points.size > 0
|
27
|
+
new_center = Vector.elements( [0]*points.first.size )
|
28
|
+
@points.each do |point|
|
29
|
+
new_center = new_center + Vector.elements( point )
|
30
|
+
end
|
31
|
+
new_center = ( new_center/@points.size.to_f).to_a
|
32
|
+
@moved = new_center != @center
|
33
|
+
@center = new_center
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
class ClusterSet
|
4
|
+
attr_accessor :clusters
|
5
|
+
def initialize( cluster_labels: [], clusters: nil )
|
6
|
+
if clusters
|
7
|
+
idx = 0
|
8
|
+
@clusters = clusters.inject({}){ |h,cluster| cluster.label = "cluster_#{ idx }";h[cluster.label] = cluster;idx=idx+1; h }
|
9
|
+
else
|
10
|
+
@clusters = cluster_labels.inject({}){ |h,label| h[label] = Cluster.new(label: label); h }
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
def []( label )
|
15
|
+
label.kind_of?(String) || label.kind_of?(Symbol) ? @clusters[ label ] : cluster_array[ label ]
|
16
|
+
end
|
17
|
+
|
18
|
+
def cluster_array
|
19
|
+
@clusters.values
|
20
|
+
end
|
21
|
+
|
22
|
+
def update_centers
|
23
|
+
@clusters.each do |key,cluster|
|
24
|
+
cluster.update_center
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
def clusters_count
|
29
|
+
@clusters.inject({}) do |h,cluster|
|
30
|
+
h[cluster[0]] = cluster[1].points.size
|
31
|
+
h
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,23 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
require "CSV"
|
4
|
+
require "open-uri"
|
5
|
+
class Csv < Base
|
6
|
+
def initialize( file_name: nil, output_fields: 1 )
|
7
|
+
@file_name = file_name
|
8
|
+
@output_fields = output_fields
|
9
|
+
super( inputs: [], outputs: [])
|
10
|
+
end
|
11
|
+
|
12
|
+
def process
|
13
|
+
open( @file_name ) do |file|
|
14
|
+
file.each_line do |line|
|
15
|
+
input = CSV.parse_line( line ).collect{ |field| field.match(/^\d*\.?\d+$/) ? field.to_f : field }
|
16
|
+
add_input( input.slice(0,input.size-1-@output_fields), output: input.slice(input.size-@output_fields, input.size ))
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
@@ -0,0 +1,48 @@
|
|
1
|
+
module Newral
|
2
|
+
module Data
|
3
|
+
require "open-uri"
|
4
|
+
module Errors
|
5
|
+
class UnexpectedEOF < StandardError; end
|
6
|
+
class EOFExpected < StandardError; end
|
7
|
+
class LabelsNotMatchingItems < StandardError; end
|
8
|
+
end
|
9
|
+
|
10
|
+
class Idx < Base
|
11
|
+
# http://yann.lecun.com/exdb/mnist/
|
12
|
+
# used for Handwritten Images
|
13
|
+
def initialize( file_name: nil, label_file_name: nil )
|
14
|
+
@file_name = file_name
|
15
|
+
@label_file_name = label_file_name
|
16
|
+
super( inputs: [], outputs: [])
|
17
|
+
end
|
18
|
+
|
19
|
+
def process
|
20
|
+
number_of_items = 0
|
21
|
+
open( @file_name, 'rb' ) do |file|
|
22
|
+
magic,number_of_items = file.read(8).unpack("NN")
|
23
|
+
width,height = file.read(8).unpack("NN")
|
24
|
+
number_of_items.times do
|
25
|
+
raise Errors::UnexpectedEOF if file.eof?
|
26
|
+
image = file.read(width*height).unpack("C"*width*height)
|
27
|
+
@inputs << image
|
28
|
+
end
|
29
|
+
raise Errors::EOFExpected unless file.eof?
|
30
|
+
end
|
31
|
+
|
32
|
+
open( @label_file_name, 'rb' ) do |file|
|
33
|
+
magic,number_of_labels = file.read(8).unpack("NN")
|
34
|
+
raise Errors::LabelsNotMatchingItems unless number_of_labels==number_of_items
|
35
|
+
number_of_items.times do
|
36
|
+
raise Errors::UnexpectedEOF,"#{ @outputs.size } vs. #{ number_of_labels }" if file.eof?
|
37
|
+
label = file.read(1).unpack("c").first
|
38
|
+
@outputs << label
|
39
|
+
end
|
40
|
+
raise Errors::EOFExpected,"#{ @outputs.size } #{file.read.size}" unless file.eof?
|
41
|
+
end
|
42
|
+
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end#
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Newral
|
2
|
+
module ErrorCalculation
|
3
|
+
class DimensionsNotMatching < StandardError ; end
|
4
|
+
def self.sum_of_squares( results, expected )
|
5
|
+
sum = 0
|
6
|
+
raise DimensionsNotMatching, "results: #{ results.size } expected: #{ expected.size }" unless expected.size == results.size
|
7
|
+
results.each_with_index do |result,idx|
|
8
|
+
Array(result).each_with_index do |r,r_idx|
|
9
|
+
exp = Array(expected[idx])
|
10
|
+
sum = sum+(r-exp[r_idx])**2
|
11
|
+
end
|
12
|
+
end
|
13
|
+
sum
|
14
|
+
end
|
15
|
+
|
16
|
+
def self.root_mean_square( results, expected )
|
17
|
+
sum = sum_of_squares( results, expected )
|
18
|
+
(sum.to_f/results.size)**0.5
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.mean_square( results, expected )
|
22
|
+
sum = sum_of_squares( results, expected )
|
23
|
+
sum.to_f/results.size
|
24
|
+
end
|
25
|
+
|
26
|
+
|
27
|
+
end
|
28
|
+
end
|