newral 0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE +21 -0
  4. data/README.md +278 -0
  5. data/Rakefile +10 -0
  6. data/lib/newral.rb +53 -0
  7. data/lib/newral/bayes.rb +39 -0
  8. data/lib/newral/classifier/dendogram.rb +68 -0
  9. data/lib/newral/classifier/k_means_cluster.rb +45 -0
  10. data/lib/newral/classifier/node.rb +58 -0
  11. data/lib/newral/classifier/node_distance.rb +19 -0
  12. data/lib/newral/data/base.rb +153 -0
  13. data/lib/newral/data/cluster.rb +37 -0
  14. data/lib/newral/data/cluster_set.rb +38 -0
  15. data/lib/newral/data/csv.rb +23 -0
  16. data/lib/newral/data/idx.rb +48 -0
  17. data/lib/newral/error_calculation.rb +28 -0
  18. data/lib/newral/functions/base.rb +102 -0
  19. data/lib/newral/functions/block.rb +34 -0
  20. data/lib/newral/functions/gaussian.rb +41 -0
  21. data/lib/newral/functions/line.rb +52 -0
  22. data/lib/newral/functions/polynomial.rb +48 -0
  23. data/lib/newral/functions/radial_basis_function_network.rb +54 -0
  24. data/lib/newral/functions/ricker_wavelet.rb +13 -0
  25. data/lib/newral/functions/vector.rb +59 -0
  26. data/lib/newral/genetic/tree.rb +70 -0
  27. data/lib/newral/graphs/a_star.rb +12 -0
  28. data/lib/newral/graphs/cheapest_first.rb +11 -0
  29. data/lib/newral/graphs/edge.rb +24 -0
  30. data/lib/newral/graphs/graph.rb +63 -0
  31. data/lib/newral/graphs/node.rb +11 -0
  32. data/lib/newral/graphs/path.rb +50 -0
  33. data/lib/newral/graphs/tree_search.rb +60 -0
  34. data/lib/newral/networks/backpropagation_network.rb +68 -0
  35. data/lib/newral/networks/layer.rb +28 -0
  36. data/lib/newral/networks/network.rb +146 -0
  37. data/lib/newral/networks/perceptron.rb +84 -0
  38. data/lib/newral/networks/sigmoid.rb +55 -0
  39. data/lib/newral/probability.rb +42 -0
  40. data/lib/newral/probability_set.rb +108 -0
  41. data/lib/newral/q_learning/base.rb +90 -0
  42. data/lib/newral/tools.rb +135 -0
  43. data/lib/newral/training/gradient_descent.rb +36 -0
  44. data/lib/newral/training/greedy.rb +36 -0
  45. data/lib/newral/training/hill_climbing.rb +77 -0
  46. data/lib/newral/training/linear_regression.rb +30 -0
  47. data/lib/newral/training/linear_regression_matrix.rb +32 -0
  48. metadata +147 -0
@@ -0,0 +1,45 @@
1
+ module Newral
2
+
3
+ module Classifier
4
+
5
+ class KMeansCluster
6
+
7
+ # input array of points, cluster_labels: how many clusters to find, max_iterations stop after x approximations
8
+ # output hash of clusters where has keys are cluster_labels and value is points(Array) and center(point)
9
+ def initialize( points, cluster_labels: [:a,:b], max_iterations: 20 )
10
+ @points = points
11
+ @cluster_labels = cluster_labels
12
+ @max_iterations = max_iterations
13
+ end
14
+
15
+ def process
16
+ @cluster_set = Newral::Data::ClusterSet.new( cluster_labels: @cluster_labels )
17
+ runs = 0
18
+ @points.sample( @cluster_set.cluster_array.length ).each_with_index do |point,idx|
19
+ @cluster_set.cluster_array[ idx ].center = point
20
+ end
21
+
22
+ while @cluster_set.cluster_array.collect{ |cluster| cluster.moved }.member?( true ) && runs < @max_iterations
23
+ @cluster_set.clusters.each do |key,cluster|
24
+ cluster.points=[]
25
+ end
26
+
27
+ # iterate over points assign, best cluster
28
+ @points.each do |point|
29
+ min_distance = { cluster:'none', distance: 99**99 }
30
+ @cluster_set.clusters.each do |key,cluster|
31
+ distance = Newral::Tools::euclidian_distance( cluster.center, point )
32
+ min_distance = {cluster: cluster, distance: distance } if distance < min_distance[:distance]
33
+ end
34
+ min_distance[:cluster].add_point point
35
+ end
36
+ @cluster_set.update_centers
37
+ runs=runs+1
38
+ end
39
+ @cluster_set
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,58 @@
1
+ module Newral
2
+
3
+ module Classifier
4
+
5
+
6
+ class Node
7
+
8
+ attr_writer :parent_node
9
+ attr_accessor :center
10
+ attr_reader :sub_nodes
11
+ def initialize( sub_nodes, from_point: false )
12
+ if from_point
13
+ @sub_nodes = [Vector.elements( sub_nodes )]
14
+ @center = Vector.elements sub_nodes
15
+ else
16
+ @sub_nodes = sub_nodes
17
+ @center = Vector.elements( [0]*sub_nodes.first.center.size )
18
+ sub_nodes.each do |node|
19
+ @center = @center + node.center
20
+ end
21
+ @center = @center/@sub_nodes.size.to_f
22
+ @sub_nodes.each do |node|
23
+ node.parent_node = self
24
+ end
25
+ end
26
+ @parent_node = nil
27
+
28
+ end
29
+
30
+ def to_s
31
+ if @sub_nodes.size == 1
32
+ @sub_nodes.first.to_s
33
+ else
34
+ "=>(#{@sub_nodes.collect{|node| node.to_s }.join(',')})"
35
+ end
36
+ end
37
+
38
+ def flatten_points
39
+ @sub_nodes.collect do |node|
40
+ if !node.kind_of?( Node )
41
+ [node]
42
+ elsif node.sub_nodes.size == 1
43
+ node.center
44
+ else
45
+ node.flatten_points
46
+ end
47
+ end.flatten
48
+ end
49
+
50
+ def to_cluster
51
+ points = flatten_points
52
+ Data::Cluster.new( points: points.collect{|p| p.to_a } )
53
+ end
54
+
55
+
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,19 @@
1
+ module Newral
2
+
3
+ module Classifier
4
+ class NodeDistance
5
+ attr_reader :node1, :node2, :distance
6
+ def initialize( node1, node2 )
7
+ @node1 = node1
8
+ @node2 = node2
9
+ @distance = Newral::Tools.euclidian_distance( node1.center, node2.center )
10
+ end
11
+
12
+ def <=>( other )
13
+ self.distance <=> other.distance
14
+ end
15
+
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,153 @@
1
+ module Newral
2
+ module Data
3
+ module Errors
4
+ class InputOutputSizeMismatch < ::StandardError; end
5
+ class WrongPointDimension < ::StandardError; end
6
+ class UnknownValue < ::StandardError; end
7
+ class UnknownSet < ::StandardError; end
8
+ class UnknownCategory < ::StandardError; end
9
+ class DownSamplingImpossible < ::StandardError; end
10
+ end
11
+
12
+ class Base
13
+ attr_accessor :outputs, :labels, :inputs
14
+ def initialize( inputs: [], outputs: [], labels:[] )
15
+ @inputs = inputs
16
+ @outputs = outputs
17
+ @labels = labels
18
+ raise Errors::InputOutputSizeMismatch unless @inputs.size == @outputs.size
19
+ end
20
+
21
+ def add_input( input, output: nil, label:nil )
22
+ @labels << label
23
+ @outputs << output
24
+ @inputs << input
25
+ end
26
+
27
+ def sub_set( set: :inputs, category: :all )
28
+ data = case set
29
+ when :inputs then @inputs
30
+ when :outputs then @outputs
31
+ else
32
+ raise Errors::UnknownSet
33
+ end
34
+
35
+ case category
36
+ when :all then data
37
+ when :training then data[0..(data.size.to_f*0.7).to_i]
38
+ when :validation then data[(data.size.to_f*0.7).to_i+1..(data.size.to_f*0.8).to_i ]
39
+ when :testing then data[(data.size.to_f*0.8).to_i+1,data.size ]
40
+ else
41
+ raise Errors::UnknownCategory, category.to_s
42
+ end
43
+ end
44
+
45
+ def values_for( searched_value, only_first: false, return_objects: [], search_objects: [] )
46
+ results = []
47
+ search_objects.each_with_index do |each_value,idx|
48
+ if only_first
49
+ return return_objects[idx] if searched_value == each_value || [searched_value] == each_value
50
+ else
51
+ results << return_objects[idx] if searched_value == each_value || [searched_value] == each_value
52
+ end
53
+ end
54
+ results unless only_first
55
+ end
56
+
57
+ def output_for_input( input )
58
+ values_for input, search_objects: @inputs, return_objects: @outputs, only_first: true
59
+ end
60
+
61
+ def label_for_input( input )
62
+ values_for input, search_objects: @inputs, return_objects: @labels, only_first: true
63
+ end
64
+
65
+ def inputs_for_output( output )
66
+ values_for output, search_objects: @outputs, return_objects: @inputs
67
+ end
68
+
69
+ def normalized_inputs(normalized_high: 1, normalized_low:-1 )
70
+ return [] if @inputs.size == 0 || !@inputs.first.kind_of?( Array )
71
+ max_values = [Float::MIN]*@inputs.first.size
72
+ min_values = [Float::MAX]*@inputs.first.size
73
+ @inputs.each do |input|
74
+ input.each_with_index do |value,idx|
75
+ max_values[idx] = value.to_f if value > max_values[idx]
76
+ min_values[idx] = value.to_f if value < min_values[idx]
77
+ end
78
+ end
79
+ @inputs.collect do |input|
80
+ row = [0]*input.size
81
+ input.each_with_index do |value,idx|
82
+ row[idx] = (value-min_values[idx])/(max_values[idx]-min_values[idx]).to_f*(normalized_high-normalized_low)+normalized_low
83
+ end
84
+ row
85
+ end
86
+ end
87
+
88
+ def output_hash( normalized_high: 1, normalized_low:-1 )
89
+ @output_hash = @outputs.inject({}) do |hash,output|
90
+ hash[output] = ( hash[output] || 0 )+1
91
+ hash
92
+ end
93
+ new_hash = {}
94
+ @output_hash.keys.sort.each_with_index do |key,idx|
95
+ new_hash[ key ] = normalized_low+((normalized_high.to_f-normalized_low)*idx)/( [@output_hash.keys.length-1,1].max )
96
+ end
97
+ @output_hash = new_hash
98
+ end
99
+
100
+ def output_normalized
101
+ hash = output_hash
102
+ @outputs.collect{ |k| hash[k]}
103
+ end
104
+
105
+ # this will make it easier to use outputs for neura networks
106
+ # as it translates them to vectors like [1,0,0]
107
+ # if you have 3 possible outputs this will return [1,0,0],[0,1,0],[0,0,1]
108
+ def output_as_vector( category: :all )
109
+ hash = output_hash
110
+ sub_set( set: :outputs, category: category ).collect do |k|
111
+ vector = [0]*output_hash.keys.size
112
+ vector[ output_hash.keys.index( k ) ] = 1 # output_hash.keys
113
+ vector
114
+ end
115
+ end
116
+
117
+ def count_outputs
118
+ output_hash = {}
119
+ @outputs.each do |output|
120
+ output_key = output.size == 1 ? output.first.to_s.to_sym : output.join('-')
121
+ output_hash[output_key] = (output_hash[output_key] || 0) + 1
122
+ end
123
+ output_hash
124
+ end
125
+
126
+ def sample( offset:0,limit:100 )
127
+ Base.new( inputs: @inputs[offset..limit+offset] , outputs: @outputs[offset..limit+offset] )
128
+ end
129
+
130
+ def downsample_input!( height:1, width: 1, width_of_line: nil )
131
+ raise DownSamplingImpossible unless @inputs.first.size % ( width*height ) == 0
132
+ total_height = @inputs.first.size/width_of_line
133
+
134
+ @inputs.collect! do |input|
135
+ downsampled = []
136
+ pos = 0
137
+ while pos < input.size do
138
+ matrix = []
139
+ height.times do |h|
140
+ start_pos = pos+(width_of_line*h)
141
+ end_pos = pos+width+(width_of_line)*h-1
142
+ matrix = matrix+input[start_pos..end_pos]
143
+ end
144
+ downsampled << ( matrix.inject(0){|sum,e| sum+e }/matrix.length.to_f > 0.5 ? 1 : 0 )
145
+ pos = pos+width
146
+ pos=pos+width_of_line*(height-1) if (pos%width_of_line) == 0
147
+ end
148
+ downsampled
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,37 @@
1
+ module Newral
2
+ module Data
3
+ class Cluster
4
+ attr_accessor :label, :points, :center, :moved
5
+ def initialize( label: nil, points: [], center: nil, moved: true )
6
+ @label = label
7
+ @points = points
8
+ @point_size = points.size > 0 ? points.first.size : 1
9
+ @moved = moved # did center move when updating it
10
+ @center = center
11
+ end
12
+
13
+ def add_point( point )
14
+ if @points.size == 0
15
+ @point_size = point.size
16
+ @center ||= point
17
+ else
18
+ # all points must be of same dimension
19
+ raise Errors::WrongPointDimension unless point.size == @point_size
20
+ end
21
+ @points << point
22
+ self
23
+ end
24
+
25
+ def update_center
26
+ return unless @points.size > 0
27
+ new_center = Vector.elements( [0]*points.first.size )
28
+ @points.each do |point|
29
+ new_center = new_center + Vector.elements( point )
30
+ end
31
+ new_center = ( new_center/@points.size.to_f).to_a
32
+ @moved = new_center != @center
33
+ @center = new_center
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,38 @@
1
+ module Newral
2
+ module Data
3
+ class ClusterSet
4
+ attr_accessor :clusters
5
+ def initialize( cluster_labels: [], clusters: nil )
6
+ if clusters
7
+ idx = 0
8
+ @clusters = clusters.inject({}){ |h,cluster| cluster.label = "cluster_#{ idx }";h[cluster.label] = cluster;idx=idx+1; h }
9
+ else
10
+ @clusters = cluster_labels.inject({}){ |h,label| h[label] = Cluster.new(label: label); h }
11
+ end
12
+ end
13
+
14
+ def []( label )
15
+ label.kind_of?(String) || label.kind_of?(Symbol) ? @clusters[ label ] : cluster_array[ label ]
16
+ end
17
+
18
+ def cluster_array
19
+ @clusters.values
20
+ end
21
+
22
+ def update_centers
23
+ @clusters.each do |key,cluster|
24
+ cluster.update_center
25
+ end
26
+ end
27
+
28
+ def clusters_count
29
+ @clusters.inject({}) do |h,cluster|
30
+ h[cluster[0]] = cluster[1].points.size
31
+ h
32
+ end
33
+ end
34
+
35
+
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,23 @@
1
+ module Newral
2
+ module Data
3
+ require "CSV"
4
+ require "open-uri"
5
+ class Csv < Base
6
+ def initialize( file_name: nil, output_fields: 1 )
7
+ @file_name = file_name
8
+ @output_fields = output_fields
9
+ super( inputs: [], outputs: [])
10
+ end
11
+
12
+ def process
13
+ open( @file_name ) do |file|
14
+ file.each_line do |line|
15
+ input = CSV.parse_line( line ).collect{ |field| field.match(/^\d*\.?\d+$/) ? field.to_f : field }
16
+ add_input( input.slice(0,input.size-1-@output_fields), output: input.slice(input.size-@output_fields, input.size ))
17
+ end
18
+ end
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,48 @@
1
+ module Newral
2
+ module Data
3
+ require "open-uri"
4
+ module Errors
5
+ class UnexpectedEOF < StandardError; end
6
+ class EOFExpected < StandardError; end
7
+ class LabelsNotMatchingItems < StandardError; end
8
+ end
9
+
10
+ class Idx < Base
11
+ # http://yann.lecun.com/exdb/mnist/
12
+ # used for Handwritten Images
13
+ def initialize( file_name: nil, label_file_name: nil )
14
+ @file_name = file_name
15
+ @label_file_name = label_file_name
16
+ super( inputs: [], outputs: [])
17
+ end
18
+
19
+ def process
20
+ number_of_items = 0
21
+ open( @file_name, 'rb' ) do |file|
22
+ magic,number_of_items = file.read(8).unpack("NN")
23
+ width,height = file.read(8).unpack("NN")
24
+ number_of_items.times do
25
+ raise Errors::UnexpectedEOF if file.eof?
26
+ image = file.read(width*height).unpack("C"*width*height)
27
+ @inputs << image
28
+ end
29
+ raise Errors::EOFExpected unless file.eof?
30
+ end
31
+
32
+ open( @label_file_name, 'rb' ) do |file|
33
+ magic,number_of_labels = file.read(8).unpack("NN")
34
+ raise Errors::LabelsNotMatchingItems unless number_of_labels==number_of_items
35
+ number_of_items.times do
36
+ raise Errors::UnexpectedEOF,"#{ @outputs.size } vs. #{ number_of_labels }" if file.eof?
37
+ label = file.read(1).unpack("c").first
38
+ @outputs << label
39
+ end
40
+ raise Errors::EOFExpected,"#{ @outputs.size } #{file.read.size}" unless file.eof?
41
+ end
42
+
43
+
44
+ end
45
+
46
+ end
47
+ end
48
+ end#
@@ -0,0 +1,28 @@
1
+ module Newral
2
+ module ErrorCalculation
3
+ class DimensionsNotMatching < StandardError ; end
4
+ def self.sum_of_squares( results, expected )
5
+ sum = 0
6
+ raise DimensionsNotMatching, "results: #{ results.size } expected: #{ expected.size }" unless expected.size == results.size
7
+ results.each_with_index do |result,idx|
8
+ Array(result).each_with_index do |r,r_idx|
9
+ exp = Array(expected[idx])
10
+ sum = sum+(r-exp[r_idx])**2
11
+ end
12
+ end
13
+ sum
14
+ end
15
+
16
+ def self.root_mean_square( results, expected )
17
+ sum = sum_of_squares( results, expected )
18
+ (sum.to_f/results.size)**0.5
19
+ end
20
+
21
+ def self.mean_square( results, expected )
22
+ sum = sum_of_squares( results, expected )
23
+ sum.to_f/results.size
24
+ end
25
+
26
+
27
+ end
28
+ end