newral 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/Gemfile +4 -0
- data/LICENSE +21 -0
- data/README.md +278 -0
- data/Rakefile +10 -0
- data/lib/newral.rb +53 -0
- data/lib/newral/bayes.rb +39 -0
- data/lib/newral/classifier/dendogram.rb +68 -0
- data/lib/newral/classifier/k_means_cluster.rb +45 -0
- data/lib/newral/classifier/node.rb +58 -0
- data/lib/newral/classifier/node_distance.rb +19 -0
- data/lib/newral/data/base.rb +153 -0
- data/lib/newral/data/cluster.rb +37 -0
- data/lib/newral/data/cluster_set.rb +38 -0
- data/lib/newral/data/csv.rb +23 -0
- data/lib/newral/data/idx.rb +48 -0
- data/lib/newral/error_calculation.rb +28 -0
- data/lib/newral/functions/base.rb +102 -0
- data/lib/newral/functions/block.rb +34 -0
- data/lib/newral/functions/gaussian.rb +41 -0
- data/lib/newral/functions/line.rb +52 -0
- data/lib/newral/functions/polynomial.rb +48 -0
- data/lib/newral/functions/radial_basis_function_network.rb +54 -0
- data/lib/newral/functions/ricker_wavelet.rb +13 -0
- data/lib/newral/functions/vector.rb +59 -0
- data/lib/newral/genetic/tree.rb +70 -0
- data/lib/newral/graphs/a_star.rb +12 -0
- data/lib/newral/graphs/cheapest_first.rb +11 -0
- data/lib/newral/graphs/edge.rb +24 -0
- data/lib/newral/graphs/graph.rb +63 -0
- data/lib/newral/graphs/node.rb +11 -0
- data/lib/newral/graphs/path.rb +50 -0
- data/lib/newral/graphs/tree_search.rb +60 -0
- data/lib/newral/networks/backpropagation_network.rb +68 -0
- data/lib/newral/networks/layer.rb +28 -0
- data/lib/newral/networks/network.rb +146 -0
- data/lib/newral/networks/perceptron.rb +84 -0
- data/lib/newral/networks/sigmoid.rb +55 -0
- data/lib/newral/probability.rb +42 -0
- data/lib/newral/probability_set.rb +108 -0
- data/lib/newral/q_learning/base.rb +90 -0
- data/lib/newral/tools.rb +135 -0
- data/lib/newral/training/gradient_descent.rb +36 -0
- data/lib/newral/training/greedy.rb +36 -0
- data/lib/newral/training/hill_climbing.rb +77 -0
- data/lib/newral/training/linear_regression.rb +30 -0
- data/lib/newral/training/linear_regression_matrix.rb +32 -0
- metadata +147 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 281bfa9ba74cbd19d659513e73eae475f6f1ec7e
|
4
|
+
data.tar.gz: 99a0085f6bc6a1b6a38237dc330110950df04f19
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 63af6de490824297c3561b6e1c77f1681d099e17e38323fbd15080b4447a08abe8fd83c257a8e1c2936e08591260798895cf526af2288dda188ad12490b3e19d
|
7
|
+
data.tar.gz: 786ff86c34e25ad4bbbb4cfd322c0d40efa39308778ab18efd6641bb3126e62e3ad6af02ecc608bc2225fcbb66519d15645e8a603d744e7feb9110d61a8401f1
|
data/Gemfile
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2017 ExistsAndIs1
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
13
|
+
copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
21
|
+
SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,278 @@
|
|
1
|
+
# Newral
|
2
|
+
|
3
|
+
I recently started to learn about AI.
|
4
|
+
Of course there are great libraries out there but I wanted to have something that makes it easy to test the different concepts to really understand them.
|
5
|
+
Also I wanted to have a playground to easily see how good different approaches work for different data sets.
|
6
|
+
I chose the name newral as its for newbies trying out neural networks and other AI related concepts
|
7
|
+
|
8
|
+
In the implementation I tried to write as little code as possible and used classes trying to avoid "array index hell".
|
9
|
+
So the data structures are in no way tuned for efficiency, rather I tried to make clear what actually is going on.
|
10
|
+
For every concept there should be at least one test to show it in action.
|
11
|
+
|
12
|
+
|
13
|
+
Everything is still quite early stages but there are a lot of things you can do already
|
14
|
+
|
15
|
+
* Training Functions
|
16
|
+
* Hill Climbing
|
17
|
+
* Greedy
|
18
|
+
* Gradient Descent
|
19
|
+
|
20
|
+
* K-Means Clustering
|
21
|
+
* K-Nearest Neighbour
|
22
|
+
|
23
|
+
* Neural Networks
|
24
|
+
* Easily define simple ones often used in Tutorials
|
25
|
+
* Backpropagation
|
26
|
+
|
27
|
+
* Graphs
|
28
|
+
* Tree Search
|
29
|
+
* Cheapest First
|
30
|
+
* A Star
|
31
|
+
|
32
|
+
* Q-Learning
|
33
|
+
Learn the computer to play Tic-Tac Toe (or other simple games )
|
34
|
+
|
35
|
+
I must say that this is really a total side project for me, so don´t expect lots of updates or bugfixes.
|
36
|
+
Whenever I thought about it there are links to the tutorials or websites I used (which will explain the theory much better than I ever could).
|
37
|
+
Please check out the tests where there are a few examples of possible use cases.
|
38
|
+
|
39
|
+
Stuff still in even earlier stages
|
40
|
+
* everything in genetic folder
|
41
|
+
* bayes / probability
|
42
|
+
|
43
|
+
|
44
|
+
So lets do some basic stuff
|
45
|
+
|
46
|
+
## Error Calculation
|
47
|
+
|
48
|
+
lets assume we have 3 calculated results by our function and 3 expected outputs
|
49
|
+
```ruby
|
50
|
+
current = [1,2,3]
|
51
|
+
expected = [2,4,6]
|
52
|
+
|
53
|
+
so what´s the error
|
54
|
+
|
55
|
+
Newral::ErrorCalculation.mean_square( current, expected )
|
56
|
+
```
|
57
|
+
|
58
|
+
same thing for vectors
|
59
|
+
```ruby
|
60
|
+
current = [
|
61
|
+
[1,2,3],
|
62
|
+
[3,9,16]
|
63
|
+
|
64
|
+
]
|
65
|
+
|
66
|
+
expected = [
|
67
|
+
[2,4,6],
|
68
|
+
[4,8,9]
|
69
|
+
]
|
70
|
+
|
71
|
+
Newral::ErrorCalculation.mean_square( current, expected )
|
72
|
+
```
|
73
|
+
|
74
|
+
## Classifiers
|
75
|
+
|
76
|
+
```ruby
|
77
|
+
points = [
|
78
|
+
[1,1],[2,2],[4,4],
|
79
|
+
[10,9],[11,12],[13,7]
|
80
|
+
].shuffle
|
81
|
+
|
82
|
+
n= Newral::Classifier::KMeansCluster.new( points, cluster_labels:[:cows,:elefants] ).process
|
83
|
+
n.clusters[:elefants].points
|
84
|
+
n.clusters[:cows].points
|
85
|
+
|
86
|
+
n=Newral::Classifier::Dendogram.new( points ).process
|
87
|
+
n.to_s
|
88
|
+
```
|
89
|
+
|
90
|
+
## Neural Networks
|
91
|
+
|
92
|
+
### create some neurons
|
93
|
+
|
94
|
+
```ruby
|
95
|
+
perceptron = Newral::Networks::Perceptron.new(weights: [-2,-2],bias: 3) # look its a NAND gate
|
96
|
+
perceptron.update_with_vector [1,1]
|
97
|
+
|
98
|
+
sigmoid = Newral::Networks::Sigmoid.new(weights: [-2,-2],bias: 3) # sigmoids are much cooler
|
99
|
+
sigmoid.update_with_vector [1,1]
|
100
|
+
```
|
101
|
+
|
102
|
+
### create a basic network
|
103
|
+
```ruby
|
104
|
+
network = Newral::Networks::Network.define do
|
105
|
+
add_layer "input" do
|
106
|
+
add_neuron 'a', weights: [-2,-2],bias: 3, type: 'perceptron'
|
107
|
+
add_neuron 'b', weights: [-2,-2],bias: 3, type: 'perceptron'
|
108
|
+
end
|
109
|
+
add_layer "output" do
|
110
|
+
add_neuron 'c', weights: [-2,-2],bias: 3, type: 'perceptron'
|
111
|
+
end
|
112
|
+
|
113
|
+
connect from:'a', to:'c'
|
114
|
+
connect from:'b', to:'c'
|
115
|
+
end
|
116
|
+
|
117
|
+
network.update_with_vector [1,1]
|
118
|
+
```
|
119
|
+
|
120
|
+
### create a network and perform backpropagation
|
121
|
+
```ruby
|
122
|
+
inputs = [
|
123
|
+
[0.05,0.1]
|
124
|
+
]
|
125
|
+
outputs = [
|
126
|
+
[0.01,0.99]
|
127
|
+
]
|
128
|
+
network = Newral::Networks::BackpropagationNetwork.new( number_of_hidden: 2, number_of_outputs: 2)
|
129
|
+
network.set_weights_and_bias( layer: 'hidden', weights:[[0.15,0.2],[0.25,0.3]],bias:[0.35,0.35])
|
130
|
+
network.set_weights_and_bias( layer: 'output', weights:[[0.4,0.45],[0.5,0.55]], bias:[0.6,0.6])
|
131
|
+
network.calculate_error( input: inputs, output: outputs ) # stupid network
|
132
|
+
1000.times do
|
133
|
+
network.train input: inputs , output:outputs
|
134
|
+
end
|
135
|
+
|
136
|
+
network.calculate_error( input: inputs, output: outputs ) # look it learned
|
137
|
+
```
|
138
|
+
|
139
|
+
|
140
|
+
|
141
|
+
## Load some data
|
142
|
+
|
143
|
+
load the IRIS data set (Hello World of AI) located in test folder
|
144
|
+
```ruby
|
145
|
+
data = Newral::Data::Csv.new(file_name:File.expand_path('../test/fixtures/IRIS.csv',__FILE__))
|
146
|
+
data.process
|
147
|
+
cluster_set = Newral::Classifier::KMeansCluster.new( data.inputs, cluster_labels: data.output_hash.keys ).process
|
148
|
+
cluster_set.clusters.length # There are 3 different types
|
149
|
+
```
|
150
|
+
|
151
|
+
```ruby
|
152
|
+
data = Newral::Data::Csv.new(file_name:File.expand_path('../test/fixtures/IRIS.csv',__FILE__))
|
153
|
+
data.process
|
154
|
+
|
155
|
+
network = Newral::Networks::BackpropagationNetwork.new( number_of_inputs: data.inputs.first.size, number_of_hidden: data.inputs.first.size, number_of_outputs: data.output_hash.keys.size )
|
156
|
+
network.calculate_error( input: data.inputs, output: data.output_as_vector ) # using a network with random weights
|
157
|
+
100.times do
|
158
|
+
network.train( input: data.inputs, output: data.output_as_vector ) # Hard training is the key to success in any neural nets life
|
159
|
+
end
|
160
|
+
network.calculate_error( input: data.inputs, output: data.output_as_vector ) # hey it now knows flowers better than me!
|
161
|
+
```
|
162
|
+
|
163
|
+
Of course we don´t want oversampling so we should train and test on different data sets
|
164
|
+
|
165
|
+
```ruby
|
166
|
+
data = Newral::Data::Csv.new(file_name:File.expand_path('../test/fixtures/IRIS.csv',__FILE__))
|
167
|
+
data.process
|
168
|
+
|
169
|
+
network = Newral::Networks::BackpropagationNetwork.new( number_of_inputs: data.inputs.first.size, number_of_hidden: data.inputs.first.size, number_of_outputs: data.output_hash.keys.size )
|
170
|
+
network.calculate_error( input: data.sub_set(set: :inputs, category: :validation ), output: data.output_as_vector( category: :testing ) )
|
171
|
+
|
172
|
+
100.times do
|
173
|
+
network.train( input: data.sub_set(set: :inputs, category: :training ), output: data.output_as_vector( category: :training ) )
|
174
|
+
end
|
175
|
+
|
176
|
+
network.calculate_error( input: data.sub_set(set: :inputs, category: :validation ), output: data.output_as_vector( category: :testing ) )
|
177
|
+
|
178
|
+
```
|
179
|
+
|
180
|
+
here comes the heavy stuff for this little library, load the MNIST data set (60000 images with 28*28 pixels).
|
181
|
+
You can read more about MNIST http://yann.lecun.com/exdb/mnist/
|
182
|
+
```ruby
|
183
|
+
data = Newral::Data::Idx.new( file_name:'~/Downloads/train-images-idx3-ubyte', label_file_name:'~/Downloads/train-labels-idx1-ubyte')
|
184
|
+
data.process
|
185
|
+
|
186
|
+
sample_data = data.sample( limit:100 )
|
187
|
+
sample_data.downsample_input!( width:2,height:2,width_of_line:28 ) # create less resolution pictures
|
188
|
+
|
189
|
+
sample_data2 = data.sample( limit:100, offset:100 ) # a 2bd sample
|
190
|
+
sample_data2.downsample_input!( width:2,height:2,width_of_line:28 )
|
191
|
+
|
192
|
+
|
193
|
+
network = Newral::Networks::BackpropagationNetwork.new( number_of_inputs: sample_data.inputs.first.size, number_of_hidden: sample_data.inputs.first.size, number_of_outputs: sample_data.output_hash.keys.size )
|
194
|
+
|
195
|
+
# lets compare the error of a random network vs one trained one
|
196
|
+
network.calculate_error( input: sample_data2.inputs, output: sample_data2.output_as_vector )
|
197
|
+
|
198
|
+
# use first sample to train
|
199
|
+
network.train( input: sample_data.inputs, output: sample_data.output_as_vector )
|
200
|
+
|
201
|
+
# now calculate the error of untrained sample
|
202
|
+
# it should still go down
|
203
|
+
network.calculate_error( input: sample_data2.inputs, output: sample_data2.output_as_vector )
|
204
|
+
|
205
|
+
```
|
206
|
+
|
207
|
+
|
208
|
+
## use a tree Search to find the fastest path from Arad to Bucharest
|
209
|
+
```ruby
|
210
|
+
edges,nodes,node_locations = setup_bulgarian_map # find this in the test folder
|
211
|
+
g = Newral::Graphs::Graph.new
|
212
|
+
g.add_nodes nodes
|
213
|
+
g.add_edges edges
|
214
|
+
t=Newral::Graphs::CheapestFirst.new( graph: g, start_node: 'Arad', end_node:'Bucharest')
|
215
|
+
path = t.run
|
216
|
+
path.cost
|
217
|
+
```
|
218
|
+
|
219
|
+
## Use QLearning to play Tic Tac Toe
|
220
|
+
|
221
|
+
as we know good players will always reach a draw
|
222
|
+
|
223
|
+
```ruby
|
224
|
+
require './test/games/tic_tac_toe_game'
|
225
|
+
|
226
|
+
game = TicTacToeGame.new # ( width: 8, height: 6, in_a_row: 4 )
|
227
|
+
player1 = Newral::QLearning::Base.new( game: game, id: 0 )
|
228
|
+
player2 = Newral::QLearning::Base.new( game: game, id: 1 )
|
229
|
+
# training
|
230
|
+
1000.times do
|
231
|
+
game.run
|
232
|
+
game.reset
|
233
|
+
end
|
234
|
+
game.reset( reset_score: 1 )
|
235
|
+
player1.set_epsilon 1 # stop doing random moves, we know the game
|
236
|
+
player2.set_epsilon 1
|
237
|
+
|
238
|
+
game.run # => its a draw
|
239
|
+
```
|
240
|
+
|
241
|
+
## Use Training Algorithms to best approximate data with a function
|
242
|
+
Many typical functions suited for such approximations are already there
|
243
|
+
```ruby
|
244
|
+
f= Newral::Functions::Vector.new vector: [1,6], bias:1
|
245
|
+
f.calculate [4,7] # 4*1+6*7+1 => 47
|
246
|
+
|
247
|
+
|
248
|
+
Newral::Functions::Polynomial.new factors: [2,5,1]
|
249
|
+
f.calculate 2 # 2*(2**2)+5*2+1 => 19
|
250
|
+
```
|
251
|
+
|
252
|
+
first lets use a basic polynominal function
|
253
|
+
```ruby
|
254
|
+
input = [2,4,8]
|
255
|
+
output = [4,16,64] # best function is x**2, lets see if our training algorithms find them
|
256
|
+
g=Newral::Training::Greedy.new( input: input, output: output, klass: Newral::Functions::Polynomial )
|
257
|
+
g.process
|
258
|
+
g.best_function.calculate_error( input: input, output: output )
|
259
|
+
|
260
|
+
h=Newral::Training::HillClimbing.new( input: input, output: output, klass: Newral::Functions::Polynomial, start_function: g.best_function )
|
261
|
+
h.process
|
262
|
+
h.best_function.calculate_error( input: input, output: output )
|
263
|
+
|
264
|
+
# Gradient descent with error gradient approximation function
|
265
|
+
d=Newral::Training::GradientDescent.new( input: input, output: output, klass: Newral::Functions::Polynomial )
|
266
|
+
d.process
|
267
|
+
d.best_function.calculate_error( input: input, output: output )
|
268
|
+
```
|
269
|
+
|
270
|
+
now lets use a Vector
|
271
|
+
```ruby
|
272
|
+
|
273
|
+
input = [[1,2],[2,4]]
|
274
|
+
output=[3,7]
|
275
|
+
g=Newral::Training::GradientDescent.new( input: input, output: output, klass: Newral::Functions::Vector )
|
276
|
+
g.process
|
277
|
+
g.best_function.calculate_error( input: input, output: output )
|
278
|
+
```
|
data/Rakefile
ADDED
data/lib/newral.rb
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
module Newral
|
2
|
+
require "matrix"
|
3
|
+
require "nmatrix"
|
4
|
+
require "newral/tools"
|
5
|
+
|
6
|
+
require "newral/data/base"
|
7
|
+
require "newral/data/csv"
|
8
|
+
require "newral/data/idx"
|
9
|
+
require "newral/data/cluster"
|
10
|
+
require "newral/data/cluster_set"
|
11
|
+
|
12
|
+
require "newral/error_calculation"
|
13
|
+
|
14
|
+
require "newral/networks/perceptron"
|
15
|
+
require "newral/networks/sigmoid"
|
16
|
+
require "newral/networks/layer"
|
17
|
+
require "newral/networks/network"
|
18
|
+
require "newral/networks/backpropagation_network"
|
19
|
+
|
20
|
+
# require "newral/probability"
|
21
|
+
require "newral/probability_set"
|
22
|
+
# require "newral/bayes"
|
23
|
+
|
24
|
+
require "newral/classifier/node"
|
25
|
+
require "newral/classifier/node_distance"
|
26
|
+
require "newral/classifier/dendogram"
|
27
|
+
require "newral/classifier/k_means_cluster"
|
28
|
+
|
29
|
+
require "newral/functions/base"
|
30
|
+
require "newral/functions/line"
|
31
|
+
require "newral/functions/vector"
|
32
|
+
require "newral/functions/block"
|
33
|
+
require "newral/functions/polynomial"
|
34
|
+
require "newral/functions/gaussian"
|
35
|
+
require "newral/functions/ricker_wavelet"
|
36
|
+
require "newral/functions/radial_basis_function_network"
|
37
|
+
|
38
|
+
require "newral/training/greedy"
|
39
|
+
require "newral/training/hill_climbing"
|
40
|
+
require "newral/training/linear_regression"
|
41
|
+
require "newral/training/linear_regression_matrix"
|
42
|
+
require "newral/training/gradient_descent"
|
43
|
+
require "newral/q_learning/base"
|
44
|
+
|
45
|
+
require "newral/genetic/tree"
|
46
|
+
require "newral/graphs/node"
|
47
|
+
require "newral/graphs/edge"
|
48
|
+
require "newral/graphs/graph"
|
49
|
+
require "newral/graphs/path"
|
50
|
+
require "newral/graphs/tree_search"
|
51
|
+
require "newral/graphs/cheapest_first"
|
52
|
+
require "newral/graphs/a_star"
|
53
|
+
end
|
data/lib/newral/bayes.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
module Newral
|
2
|
+
|
3
|
+
class Bayes
|
4
|
+
attr_reader :theorem, :probabilities
|
5
|
+
def initialize( theorem )
|
6
|
+
@theorem = theorem
|
7
|
+
@probabilities = {}
|
8
|
+
end
|
9
|
+
|
10
|
+
def add_probability(key,probability,apriori: nil)
|
11
|
+
probability = Probability.new(key,probability,apriori: apriori)
|
12
|
+
@probabilities[ probability.key ] = probability
|
13
|
+
end
|
14
|
+
|
15
|
+
def compute( key )
|
16
|
+
probability = if @probabilities[key]
|
17
|
+
@probabilities[key]
|
18
|
+
elsif key.start_with?("!") && @probabilities[key.sub("!","")]
|
19
|
+
!@probabilities[key.sub("!",'')]
|
20
|
+
elsif key.match('\|')
|
21
|
+
key,apriori=key.split("|")
|
22
|
+
compute("#{apriori}|#{key}")*compute(key)/compute(apriori)
|
23
|
+
else
|
24
|
+
apriori = @probabilities.keys.find{|p| p.split("|")[0]==key && !p.split("|")[1].match('!') }
|
25
|
+
if apriori
|
26
|
+
apriori = apriori.split("|")[1]
|
27
|
+
compute("#{key}|#{apriori}")*compute(apriori)+compute("#{key}|!#{apriori}")*compute("!#{apriori}")
|
28
|
+
else
|
29
|
+
puts "not found #{key}"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
@probabilities[ probability.key ] = probability
|
33
|
+
probability
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,68 @@
|
|
1
|
+
module Newral
|
2
|
+
|
3
|
+
module Classifier
|
4
|
+
|
5
|
+
class Dendogram
|
6
|
+
attr_reader :nodes, :max_distance, :distances
|
7
|
+
def initialize( points, max_runs: 100, abort_at_distance: 0.5 )
|
8
|
+
@distances = []
|
9
|
+
@abort_at_distance = abort_at_distance
|
10
|
+
@max_runs = max_runs
|
11
|
+
@nodes = points.collect{ |point| Node.new( point, from_point: true ) }
|
12
|
+
end
|
13
|
+
|
14
|
+
def process
|
15
|
+
runs = 0
|
16
|
+
@nodes.each do |node|
|
17
|
+
calculate_distances( node )
|
18
|
+
end
|
19
|
+
@distances.sort!
|
20
|
+
@max_distance = @distances.last.distance
|
21
|
+
while @distances.size > 2 && @distances.first.distance/@max_distance < @abort_at_distance && runs < @max_runs
|
22
|
+
combine_nodes( @distances.first.node1, @distances.first.node2 )
|
23
|
+
runs = runs+1
|
24
|
+
end
|
25
|
+
self
|
26
|
+
end
|
27
|
+
|
28
|
+
def calculate_distances( node )
|
29
|
+
@nodes.each do |other_node|
|
30
|
+
@distances << NodeDistance.new( node, other_node ) unless node==other_node
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
def combine_nodes( node1, node2 )
|
35
|
+
new_node = Node.new([node1,node2])
|
36
|
+
node1.parent_node = new_node
|
37
|
+
node2.parent_node = new_node
|
38
|
+
|
39
|
+
# remove node1 and node2
|
40
|
+
@nodes = @nodes.collect do |node|
|
41
|
+
node unless node == node1 || node == node2
|
42
|
+
end.compact
|
43
|
+
# remove distances for these 2 nodes
|
44
|
+
@distances = @distances.collect do |distance|
|
45
|
+
distance unless distance.node1 == node1 || distance.node1 == node2 || distance.node2 == node2 || distance.node2 == node1
|
46
|
+
end.compact
|
47
|
+
|
48
|
+
# insert new node
|
49
|
+
@nodes << new_node
|
50
|
+
# calculate_distances for new node
|
51
|
+
calculate_distances( new_node )
|
52
|
+
@distances.sort!
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_s
|
56
|
+
@nodes.collect do |node|
|
57
|
+
node.to_s
|
58
|
+
end.join(" / ")
|
59
|
+
end
|
60
|
+
|
61
|
+
def to_cluster_set
|
62
|
+
clusters = @nodes.collect{|node| node.to_cluster }
|
63
|
+
Data::ClusterSet.new( clusters: clusters )
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
end
|