newral 0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +7 -0
  2. data/Gemfile +4 -0
  3. data/LICENSE +21 -0
  4. data/README.md +278 -0
  5. data/Rakefile +10 -0
  6. data/lib/newral.rb +53 -0
  7. data/lib/newral/bayes.rb +39 -0
  8. data/lib/newral/classifier/dendogram.rb +68 -0
  9. data/lib/newral/classifier/k_means_cluster.rb +45 -0
  10. data/lib/newral/classifier/node.rb +58 -0
  11. data/lib/newral/classifier/node_distance.rb +19 -0
  12. data/lib/newral/data/base.rb +153 -0
  13. data/lib/newral/data/cluster.rb +37 -0
  14. data/lib/newral/data/cluster_set.rb +38 -0
  15. data/lib/newral/data/csv.rb +23 -0
  16. data/lib/newral/data/idx.rb +48 -0
  17. data/lib/newral/error_calculation.rb +28 -0
  18. data/lib/newral/functions/base.rb +102 -0
  19. data/lib/newral/functions/block.rb +34 -0
  20. data/lib/newral/functions/gaussian.rb +41 -0
  21. data/lib/newral/functions/line.rb +52 -0
  22. data/lib/newral/functions/polynomial.rb +48 -0
  23. data/lib/newral/functions/radial_basis_function_network.rb +54 -0
  24. data/lib/newral/functions/ricker_wavelet.rb +13 -0
  25. data/lib/newral/functions/vector.rb +59 -0
  26. data/lib/newral/genetic/tree.rb +70 -0
  27. data/lib/newral/graphs/a_star.rb +12 -0
  28. data/lib/newral/graphs/cheapest_first.rb +11 -0
  29. data/lib/newral/graphs/edge.rb +24 -0
  30. data/lib/newral/graphs/graph.rb +63 -0
  31. data/lib/newral/graphs/node.rb +11 -0
  32. data/lib/newral/graphs/path.rb +50 -0
  33. data/lib/newral/graphs/tree_search.rb +60 -0
  34. data/lib/newral/networks/backpropagation_network.rb +68 -0
  35. data/lib/newral/networks/layer.rb +28 -0
  36. data/lib/newral/networks/network.rb +146 -0
  37. data/lib/newral/networks/perceptron.rb +84 -0
  38. data/lib/newral/networks/sigmoid.rb +55 -0
  39. data/lib/newral/probability.rb +42 -0
  40. data/lib/newral/probability_set.rb +108 -0
  41. data/lib/newral/q_learning/base.rb +90 -0
  42. data/lib/newral/tools.rb +135 -0
  43. data/lib/newral/training/gradient_descent.rb +36 -0
  44. data/lib/newral/training/greedy.rb +36 -0
  45. data/lib/newral/training/hill_climbing.rb +77 -0
  46. data/lib/newral/training/linear_regression.rb +30 -0
  47. data/lib/newral/training/linear_regression_matrix.rb +32 -0
  48. metadata +147 -0
@@ -0,0 +1,45 @@
1
+ module Newral
2
+
3
+ module Classifier
4
+
5
+ class KMeansCluster
6
+
7
+ # input array of points, cluster_labels: how many clusters to find, max_iterations stop after x approximations
8
+ # output hash of clusters where has keys are cluster_labels and value is points(Array) and center(point)
9
+ def initialize( points, cluster_labels: [:a,:b], max_iterations: 20 )
10
+ @points = points
11
+ @cluster_labels = cluster_labels
12
+ @max_iterations = max_iterations
13
+ end
14
+
15
+ def process
16
+ @cluster_set = Newral::Data::ClusterSet.new( cluster_labels: @cluster_labels )
17
+ runs = 0
18
+ @points.sample( @cluster_set.cluster_array.length ).each_with_index do |point,idx|
19
+ @cluster_set.cluster_array[ idx ].center = point
20
+ end
21
+
22
+ while @cluster_set.cluster_array.collect{ |cluster| cluster.moved }.member?( true ) && runs < @max_iterations
23
+ @cluster_set.clusters.each do |key,cluster|
24
+ cluster.points=[]
25
+ end
26
+
27
+ # iterate over points assign, best cluster
28
+ @points.each do |point|
29
+ min_distance = { cluster:'none', distance: 99**99 }
30
+ @cluster_set.clusters.each do |key,cluster|
31
+ distance = Newral::Tools::euclidian_distance( cluster.center, point )
32
+ min_distance = {cluster: cluster, distance: distance } if distance < min_distance[:distance]
33
+ end
34
+ min_distance[:cluster].add_point point
35
+ end
36
+ @cluster_set.update_centers
37
+ runs=runs+1
38
+ end
39
+ @cluster_set
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+ end
@@ -0,0 +1,58 @@
1
+ module Newral
2
+
3
+ module Classifier
4
+
5
+
6
+ class Node
7
+
8
+ attr_writer :parent_node
9
+ attr_accessor :center
10
+ attr_reader :sub_nodes
11
+ def initialize( sub_nodes, from_point: false )
12
+ if from_point
13
+ @sub_nodes = [Vector.elements( sub_nodes )]
14
+ @center = Vector.elements sub_nodes
15
+ else
16
+ @sub_nodes = sub_nodes
17
+ @center = Vector.elements( [0]*sub_nodes.first.center.size )
18
+ sub_nodes.each do |node|
19
+ @center = @center + node.center
20
+ end
21
+ @center = @center/@sub_nodes.size.to_f
22
+ @sub_nodes.each do |node|
23
+ node.parent_node = self
24
+ end
25
+ end
26
+ @parent_node = nil
27
+
28
+ end
29
+
30
+ def to_s
31
+ if @sub_nodes.size == 1
32
+ @sub_nodes.first.to_s
33
+ else
34
+ "=>(#{@sub_nodes.collect{|node| node.to_s }.join(',')})"
35
+ end
36
+ end
37
+
38
+ def flatten_points
39
+ @sub_nodes.collect do |node|
40
+ if !node.kind_of?( Node )
41
+ [node]
42
+ elsif node.sub_nodes.size == 1
43
+ node.center
44
+ else
45
+ node.flatten_points
46
+ end
47
+ end.flatten
48
+ end
49
+
50
+ def to_cluster
51
+ points = flatten_points
52
+ Data::Cluster.new( points: points.collect{|p| p.to_a } )
53
+ end
54
+
55
+
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,19 @@
1
+ module Newral
2
+
3
+ module Classifier
4
+ class NodeDistance
5
+ attr_reader :node1, :node2, :distance
6
+ def initialize( node1, node2 )
7
+ @node1 = node1
8
+ @node2 = node2
9
+ @distance = Newral::Tools.euclidian_distance( node1.center, node2.center )
10
+ end
11
+
12
+ def <=>( other )
13
+ self.distance <=> other.distance
14
+ end
15
+
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,153 @@
1
+ module Newral
2
+ module Data
3
+ module Errors
4
+ class InputOutputSizeMismatch < ::StandardError; end
5
+ class WrongPointDimension < ::StandardError; end
6
+ class UnknownValue < ::StandardError; end
7
+ class UnknownSet < ::StandardError; end
8
+ class UnknownCategory < ::StandardError; end
9
+ class DownSamplingImpossible < ::StandardError; end
10
+ end
11
+
12
+ class Base
13
+ attr_accessor :outputs, :labels, :inputs
14
+ def initialize( inputs: [], outputs: [], labels:[] )
15
+ @inputs = inputs
16
+ @outputs = outputs
17
+ @labels = labels
18
+ raise Errors::InputOutputSizeMismatch unless @inputs.size == @outputs.size
19
+ end
20
+
21
+ def add_input( input, output: nil, label:nil )
22
+ @labels << label
23
+ @outputs << output
24
+ @inputs << input
25
+ end
26
+
27
+ def sub_set( set: :inputs, category: :all )
28
+ data = case set
29
+ when :inputs then @inputs
30
+ when :outputs then @outputs
31
+ else
32
+ raise Errors::UnknownSet
33
+ end
34
+
35
+ case category
36
+ when :all then data
37
+ when :training then data[0..(data.size.to_f*0.7).to_i]
38
+ when :validation then data[(data.size.to_f*0.7).to_i+1..(data.size.to_f*0.8).to_i ]
39
+ when :testing then data[(data.size.to_f*0.8).to_i+1,data.size ]
40
+ else
41
+ raise Errors::UnknownCategory, category.to_s
42
+ end
43
+ end
44
+
45
+ def values_for( searched_value, only_first: false, return_objects: [], search_objects: [] )
46
+ results = []
47
+ search_objects.each_with_index do |each_value,idx|
48
+ if only_first
49
+ return return_objects[idx] if searched_value == each_value || [searched_value] == each_value
50
+ else
51
+ results << return_objects[idx] if searched_value == each_value || [searched_value] == each_value
52
+ end
53
+ end
54
+ results unless only_first
55
+ end
56
+
57
+ def output_for_input( input )
58
+ values_for input, search_objects: @inputs, return_objects: @outputs, only_first: true
59
+ end
60
+
61
+ def label_for_input( input )
62
+ values_for input, search_objects: @inputs, return_objects: @labels, only_first: true
63
+ end
64
+
65
+ def inputs_for_output( output )
66
+ values_for output, search_objects: @outputs, return_objects: @inputs
67
+ end
68
+
69
+ def normalized_inputs(normalized_high: 1, normalized_low:-1 )
70
+ return [] if @inputs.size == 0 || !@inputs.first.kind_of?( Array )
71
+ max_values = [Float::MIN]*@inputs.first.size
72
+ min_values = [Float::MAX]*@inputs.first.size
73
+ @inputs.each do |input|
74
+ input.each_with_index do |value,idx|
75
+ max_values[idx] = value.to_f if value > max_values[idx]
76
+ min_values[idx] = value.to_f if value < min_values[idx]
77
+ end
78
+ end
79
+ @inputs.collect do |input|
80
+ row = [0]*input.size
81
+ input.each_with_index do |value,idx|
82
+ row[idx] = (value-min_values[idx])/(max_values[idx]-min_values[idx]).to_f*(normalized_high-normalized_low)+normalized_low
83
+ end
84
+ row
85
+ end
86
+ end
87
+
88
+ def output_hash( normalized_high: 1, normalized_low:-1 )
89
+ @output_hash = @outputs.inject({}) do |hash,output|
90
+ hash[output] = ( hash[output] || 0 )+1
91
+ hash
92
+ end
93
+ new_hash = {}
94
+ @output_hash.keys.sort.each_with_index do |key,idx|
95
+ new_hash[ key ] = normalized_low+((normalized_high.to_f-normalized_low)*idx)/( [@output_hash.keys.length-1,1].max )
96
+ end
97
+ @output_hash = new_hash
98
+ end
99
+
100
+ def output_normalized
101
+ hash = output_hash
102
+ @outputs.collect{ |k| hash[k]}
103
+ end
104
+
105
+ # this will make it easier to use outputs for neura networks
106
+ # as it translates them to vectors like [1,0,0]
107
+ # if you have 3 possible outputs this will return [1,0,0],[0,1,0],[0,0,1]
108
+ def output_as_vector( category: :all )
109
+ hash = output_hash
110
+ sub_set( set: :outputs, category: category ).collect do |k|
111
+ vector = [0]*output_hash.keys.size
112
+ vector[ output_hash.keys.index( k ) ] = 1 # output_hash.keys
113
+ vector
114
+ end
115
+ end
116
+
117
+ def count_outputs
118
+ output_hash = {}
119
+ @outputs.each do |output|
120
+ output_key = output.size == 1 ? output.first.to_s.to_sym : output.join('-')
121
+ output_hash[output_key] = (output_hash[output_key] || 0) + 1
122
+ end
123
+ output_hash
124
+ end
125
+
126
+ def sample( offset:0,limit:100 )
127
+ Base.new( inputs: @inputs[offset..limit+offset] , outputs: @outputs[offset..limit+offset] )
128
+ end
129
+
130
+ def downsample_input!( height:1, width: 1, width_of_line: nil )
131
+ raise DownSamplingImpossible unless @inputs.first.size % ( width*height ) == 0
132
+ total_height = @inputs.first.size/width_of_line
133
+
134
+ @inputs.collect! do |input|
135
+ downsampled = []
136
+ pos = 0
137
+ while pos < input.size do
138
+ matrix = []
139
+ height.times do |h|
140
+ start_pos = pos+(width_of_line*h)
141
+ end_pos = pos+width+(width_of_line)*h-1
142
+ matrix = matrix+input[start_pos..end_pos]
143
+ end
144
+ downsampled << ( matrix.inject(0){|sum,e| sum+e }/matrix.length.to_f > 0.5 ? 1 : 0 )
145
+ pos = pos+width
146
+ pos=pos+width_of_line*(height-1) if (pos%width_of_line) == 0
147
+ end
148
+ downsampled
149
+ end
150
+ end
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,37 @@
1
+ module Newral
2
+ module Data
3
+ class Cluster
4
+ attr_accessor :label, :points, :center, :moved
5
+ def initialize( label: nil, points: [], center: nil, moved: true )
6
+ @label = label
7
+ @points = points
8
+ @point_size = points.size > 0 ? points.first.size : 1
9
+ @moved = moved # did center move when updating it
10
+ @center = center
11
+ end
12
+
13
+ def add_point( point )
14
+ if @points.size == 0
15
+ @point_size = point.size
16
+ @center ||= point
17
+ else
18
+ # all points must be of same dimension
19
+ raise Errors::WrongPointDimension unless point.size == @point_size
20
+ end
21
+ @points << point
22
+ self
23
+ end
24
+
25
+ def update_center
26
+ return unless @points.size > 0
27
+ new_center = Vector.elements( [0]*points.first.size )
28
+ @points.each do |point|
29
+ new_center = new_center + Vector.elements( point )
30
+ end
31
+ new_center = ( new_center/@points.size.to_f).to_a
32
+ @moved = new_center != @center
33
+ @center = new_center
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,38 @@
1
+ module Newral
2
+ module Data
3
+ class ClusterSet
4
+ attr_accessor :clusters
5
+ def initialize( cluster_labels: [], clusters: nil )
6
+ if clusters
7
+ idx = 0
8
+ @clusters = clusters.inject({}){ |h,cluster| cluster.label = "cluster_#{ idx }";h[cluster.label] = cluster;idx=idx+1; h }
9
+ else
10
+ @clusters = cluster_labels.inject({}){ |h,label| h[label] = Cluster.new(label: label); h }
11
+ end
12
+ end
13
+
14
+ def []( label )
15
+ label.kind_of?(String) || label.kind_of?(Symbol) ? @clusters[ label ] : cluster_array[ label ]
16
+ end
17
+
18
+ def cluster_array
19
+ @clusters.values
20
+ end
21
+
22
+ def update_centers
23
+ @clusters.each do |key,cluster|
24
+ cluster.update_center
25
+ end
26
+ end
27
+
28
+ def clusters_count
29
+ @clusters.inject({}) do |h,cluster|
30
+ h[cluster[0]] = cluster[1].points.size
31
+ h
32
+ end
33
+ end
34
+
35
+
36
+ end
37
+ end
38
+ end
@@ -0,0 +1,23 @@
1
+ module Newral
2
+ module Data
3
+ require "CSV"
4
+ require "open-uri"
5
+ class Csv < Base
6
+ def initialize( file_name: nil, output_fields: 1 )
7
+ @file_name = file_name
8
+ @output_fields = output_fields
9
+ super( inputs: [], outputs: [])
10
+ end
11
+
12
+ def process
13
+ open( @file_name ) do |file|
14
+ file.each_line do |line|
15
+ input = CSV.parse_line( line ).collect{ |field| field.match(/^\d*\.?\d+$/) ? field.to_f : field }
16
+ add_input( input.slice(0,input.size-1-@output_fields), output: input.slice(input.size-@output_fields, input.size ))
17
+ end
18
+ end
19
+ end
20
+
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,48 @@
1
+ module Newral
2
+ module Data
3
+ require "open-uri"
4
+ module Errors
5
+ class UnexpectedEOF < StandardError; end
6
+ class EOFExpected < StandardError; end
7
+ class LabelsNotMatchingItems < StandardError; end
8
+ end
9
+
10
+ class Idx < Base
11
+ # http://yann.lecun.com/exdb/mnist/
12
+ # used for Handwritten Images
13
+ def initialize( file_name: nil, label_file_name: nil )
14
+ @file_name = file_name
15
+ @label_file_name = label_file_name
16
+ super( inputs: [], outputs: [])
17
+ end
18
+
19
+ def process
20
+ number_of_items = 0
21
+ open( @file_name, 'rb' ) do |file|
22
+ magic,number_of_items = file.read(8).unpack("NN")
23
+ width,height = file.read(8).unpack("NN")
24
+ number_of_items.times do
25
+ raise Errors::UnexpectedEOF if file.eof?
26
+ image = file.read(width*height).unpack("C"*width*height)
27
+ @inputs << image
28
+ end
29
+ raise Errors::EOFExpected unless file.eof?
30
+ end
31
+
32
+ open( @label_file_name, 'rb' ) do |file|
33
+ magic,number_of_labels = file.read(8).unpack("NN")
34
+ raise Errors::LabelsNotMatchingItems unless number_of_labels==number_of_items
35
+ number_of_items.times do
36
+ raise Errors::UnexpectedEOF,"#{ @outputs.size } vs. #{ number_of_labels }" if file.eof?
37
+ label = file.read(1).unpack("c").first
38
+ @outputs << label
39
+ end
40
+ raise Errors::EOFExpected,"#{ @outputs.size } #{file.read.size}" unless file.eof?
41
+ end
42
+
43
+
44
+ end
45
+
46
+ end
47
+ end
48
+ end#
@@ -0,0 +1,28 @@
1
+ module Newral
2
+ module ErrorCalculation
3
+ class DimensionsNotMatching < StandardError ; end
4
+ def self.sum_of_squares( results, expected )
5
+ sum = 0
6
+ raise DimensionsNotMatching, "results: #{ results.size } expected: #{ expected.size }" unless expected.size == results.size
7
+ results.each_with_index do |result,idx|
8
+ Array(result).each_with_index do |r,r_idx|
9
+ exp = Array(expected[idx])
10
+ sum = sum+(r-exp[r_idx])**2
11
+ end
12
+ end
13
+ sum
14
+ end
15
+
16
+ def self.root_mean_square( results, expected )
17
+ sum = sum_of_squares( results, expected )
18
+ (sum.to_f/results.size)**0.5
19
+ end
20
+
21
+ def self.mean_square( results, expected )
22
+ sum = sum_of_squares( results, expected )
23
+ sum.to_f/results.size
24
+ end
25
+
26
+
27
+ end
28
+ end