ai4r 1.13 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +174 -0
- data/examples/classifiers/hyperpipes_data.csv +14 -0
- data/examples/classifiers/hyperpipes_example.rb +22 -0
- data/examples/classifiers/ib1_example.rb +12 -0
- data/examples/classifiers/id3_example.rb +15 -10
- data/examples/classifiers/id3_graphviz_example.rb +17 -0
- data/examples/classifiers/logistic_regression_example.rb +11 -0
- data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
- data/examples/classifiers/naive_bayes_example.rb +12 -13
- data/examples/classifiers/one_r_example.rb +27 -0
- data/examples/classifiers/parameter_tutorial.rb +29 -0
- data/examples/classifiers/prism_nominal_example.rb +15 -0
- data/examples/classifiers/prism_numeric_example.rb +21 -0
- data/examples/classifiers/simple_linear_regression_example.rb +14 -11
- data/examples/classifiers/zero_and_one_r_example.rb +34 -0
- data/examples/classifiers/zero_one_r_data.csv +8 -0
- data/examples/clusterers/clusterer_example.rb +40 -34
- data/examples/clusterers/dbscan_example.rb +17 -0
- data/examples/clusterers/dendrogram_example.rb +17 -0
- data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
- data/examples/clusterers/kmeans_custom_example.rb +26 -0
- data/examples/genetic_algorithm/bitstring_example.rb +41 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
- data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
- data/examples/neural_network/backpropagation_example.rb +48 -48
- data/examples/neural_network/hopfield_example.rb +45 -0
- data/examples/neural_network/patterns_with_base_noise.rb +39 -39
- data/examples/neural_network/patterns_with_noise.rb +41 -39
- data/examples/neural_network/train_epochs_callback.rb +25 -0
- data/examples/neural_network/training_patterns.rb +39 -39
- data/examples/neural_network/transformer_text_classification.rb +78 -0
- data/examples/neural_network/xor_example.rb +23 -22
- data/examples/reinforcement/q_learning_example.rb +10 -0
- data/examples/som/som_data.rb +155 -152
- data/examples/som/som_multi_node_example.rb +12 -13
- data/examples/som/som_single_example.rb +12 -15
- data/examples/transformer/decode_classifier_example.rb +68 -0
- data/examples/transformer/deterministic_example.rb +10 -0
- data/examples/transformer/seq2seq_example.rb +16 -0
- data/lib/ai4r/classifiers/classifier.rb +24 -16
- data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
- data/lib/ai4r/classifiers/ib1.rb +122 -32
- data/lib/ai4r/classifiers/id3.rb +524 -145
- data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
- data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
- data/lib/ai4r/classifiers/one_r.rb +112 -44
- data/lib/ai4r/classifiers/prism.rb +167 -76
- data/lib/ai4r/classifiers/random_forest.rb +72 -0
- data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
- data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
- data/lib/ai4r/classifiers/votes.rb +57 -0
- data/lib/ai4r/classifiers/zero_r.rb +71 -30
- data/lib/ai4r/clusterers/average_linkage.rb +46 -27
- data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
- data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
- data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
- data/lib/ai4r/clusterers/clusterer.rb +29 -14
- data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
- data/lib/ai4r/clusterers/dbscan.rb +134 -0
- data/lib/ai4r/clusterers/diana.rb +75 -49
- data/lib/ai4r/clusterers/k_means.rb +270 -135
- data/lib/ai4r/clusterers/median_linkage.rb +49 -33
- data/lib/ai4r/clusterers/single_linkage.rb +196 -88
- data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
- data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
- data/lib/ai4r/data/data_set.rb +223 -103
- data/lib/ai4r/data/parameterizable.rb +31 -25
- data/lib/ai4r/data/proximity.rb +62 -62
- data/lib/ai4r/data/statistics.rb +46 -35
- data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
- data/lib/ai4r/experiment/split.rb +39 -0
- data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
- data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
- data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
- data/lib/ai4r/neural_network/activation_functions.rb +37 -0
- data/lib/ai4r/neural_network/backpropagation.rb +399 -134
- data/lib/ai4r/neural_network/hopfield.rb +175 -58
- data/lib/ai4r/neural_network/transformer.rb +194 -0
- data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
- data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
- data/lib/ai4r/reinforcement/q_learning.rb +51 -0
- data/lib/ai4r/search/a_star.rb +76 -0
- data/lib/ai4r/search/bfs.rb +50 -0
- data/lib/ai4r/search/dfs.rb +50 -0
- data/lib/ai4r/search/mcts.rb +118 -0
- data/lib/ai4r/search.rb +12 -0
- data/lib/ai4r/som/distance_metrics.rb +29 -0
- data/lib/ai4r/som/layer.rb +28 -17
- data/lib/ai4r/som/node.rb +61 -32
- data/lib/ai4r/som/som.rb +158 -41
- data/lib/ai4r/som/two_phase_layer.rb +21 -25
- data/lib/ai4r/version.rb +3 -0
- data/lib/ai4r.rb +57 -28
- metadata +79 -109
- data/README.rdoc +0 -39
- data/test/classifiers/hyperpipes_test.rb +0 -84
- data/test/classifiers/ib1_test.rb +0 -78
- data/test/classifiers/id3_test.rb +0 -220
- data/test/classifiers/multilayer_perceptron_test.rb +0 -79
- data/test/classifiers/naive_bayes_test.rb +0 -43
- data/test/classifiers/one_r_test.rb +0 -62
- data/test/classifiers/prism_test.rb +0 -85
- data/test/classifiers/simple_linear_regression_test.rb +0 -37
- data/test/classifiers/zero_r_test.rb +0 -50
- data/test/clusterers/average_linkage_test.rb +0 -51
- data/test/clusterers/bisecting_k_means_test.rb +0 -66
- data/test/clusterers/centroid_linkage_test.rb +0 -53
- data/test/clusterers/complete_linkage_test.rb +0 -57
- data/test/clusterers/diana_test.rb +0 -69
- data/test/clusterers/k_means_test.rb +0 -167
- data/test/clusterers/median_linkage_test.rb +0 -53
- data/test/clusterers/single_linkage_test.rb +0 -122
- data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
- data/test/clusterers/ward_linkage_test.rb +0 -53
- data/test/clusterers/weighted_average_linkage_test.rb +0 -53
- data/test/data/data_set_test.rb +0 -104
- data/test/data/proximity_test.rb +0 -87
- data/test/data/statistics_test.rb +0 -65
- data/test/experiment/classifier_evaluator_test.rb +0 -76
- data/test/genetic_algorithm/chromosome_test.rb +0 -57
- data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
- data/test/neural_network/backpropagation_test.rb +0 -82
- data/test/neural_network/hopfield_test.rb +0 -72
- data/test/som/som_test.rb +0 -97
@@ -1,167 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://www.ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/k_means'
|
12
|
-
|
13
|
-
class KMeansTest < Test::Unit::TestCase
|
14
|
-
|
15
|
-
include Ai4r::Clusterers
|
16
|
-
include Ai4r::Data
|
17
|
-
|
18
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
19
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
20
|
-
|
21
|
-
# k-means will generate an empty cluster with this data and initial centroid assignment
|
22
|
-
@@empty_cluster_data = [[-0.1, 0], [0, 0], [0.1, 0], [-0.1, 10], [0.1, 10], [0.2, 10]]
|
23
|
-
@@empty_centroid_indices = [0,1,2]
|
24
|
-
|
25
|
-
def test_build
|
26
|
-
data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
|
27
|
-
clusterer = KMeans.new.build(data_set, 4)
|
28
|
-
#draw_map(clusterer)
|
29
|
-
# Verify that all 4 clusters are created
|
30
|
-
assert_equal 4, clusterer.clusters.length
|
31
|
-
assert_equal 4, clusterer.centroids.length
|
32
|
-
# The addition of all instances of every cluster must be equal to
|
33
|
-
# the number of data points
|
34
|
-
total_length = 0
|
35
|
-
clusterer.clusters.each do |cluster|
|
36
|
-
total_length += cluster.data_items.length
|
37
|
-
end
|
38
|
-
assert_equal @@data.length, total_length
|
39
|
-
# Data inside clusters must be the same as original data
|
40
|
-
clusterer.clusters.each do |cluster|
|
41
|
-
cluster.data_items.each do |data_item|
|
42
|
-
assert @@data.include?(data_item)
|
43
|
-
end
|
44
|
-
end
|
45
|
-
end
|
46
|
-
|
47
|
-
def test_build_and_eliminate_empty_clusters
|
48
|
-
data_set = DataSet.new(:data_items => @@empty_cluster_data, :data_labels => ["X", "Y"])
|
49
|
-
# :eliminate is the :on_empty default, so we don't need to pass it as a parameter for it
|
50
|
-
clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices}).build(data_set, @@empty_centroid_indices.size)
|
51
|
-
|
52
|
-
# Verify that one cluster was eliminated
|
53
|
-
assert_equal @@empty_centroid_indices.size - 1, clusterer.clusters.length
|
54
|
-
assert_equal @@empty_centroid_indices.size - 1, clusterer.centroids.length
|
55
|
-
|
56
|
-
# The addition of all instances of every cluster must be equal to
|
57
|
-
# the number of data points
|
58
|
-
total_length = 0
|
59
|
-
clusterer.clusters.each do |cluster|
|
60
|
-
total_length += cluster.data_items.length
|
61
|
-
end
|
62
|
-
assert_equal @@empty_cluster_data.length, total_length
|
63
|
-
# Data inside clusters must be the same as original data
|
64
|
-
clusterer.clusters.each do |cluster|
|
65
|
-
cluster.data_items.each do |data_item|
|
66
|
-
assert @@empty_cluster_data.include?(data_item)
|
67
|
-
end
|
68
|
-
end
|
69
|
-
end
|
70
|
-
|
71
|
-
def test_eval
|
72
|
-
data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
|
73
|
-
clusterer = KMeans.new.build(data_set, 4)
|
74
|
-
item = [10,0]
|
75
|
-
cluster_index = clusterer.eval(item)
|
76
|
-
# Must return a valid cluster index [0-3]
|
77
|
-
assert cluster_index >= 0 && cluster_index < 4
|
78
|
-
# Distance to cluster centroid must be less than distance to any other
|
79
|
-
# centroid
|
80
|
-
min_distance = clusterer.distance(clusterer.centroids[cluster_index], item)
|
81
|
-
clusterer.centroids.each do |centroid|
|
82
|
-
assert clusterer.distance(centroid, item) >= min_distance
|
83
|
-
end
|
84
|
-
end
|
85
|
-
|
86
|
-
def test_distance
|
87
|
-
clusterer = KMeans.new
|
88
|
-
# By default, distance returns the euclidean distance to the power of 2
|
89
|
-
assert_equal 2385, clusterer.distance(
|
90
|
-
[1, 10, "Chicago", 2],
|
91
|
-
[10, 10, "London", 50])
|
92
|
-
|
93
|
-
# Ensure default distance raises error for nil argument
|
94
|
-
exception = assert_raise(TypeError) {clusterer.distance([1, 10], [nil, nil])}
|
95
|
-
assert_equal("nil can't be coerced into Fixnum", exception.message)
|
96
|
-
|
97
|
-
# Test new distance definition
|
98
|
-
manhattan_distance = lambda do |a, b|
|
99
|
-
dist = 0.0
|
100
|
-
a.each_index do |index|
|
101
|
-
if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
|
102
|
-
dist = dist + (a[index]-b[index]).abs
|
103
|
-
end
|
104
|
-
end
|
105
|
-
dist
|
106
|
-
end
|
107
|
-
clusterer.set_parameters({:distance_function => manhattan_distance})
|
108
|
-
assert_equal 57, clusterer.distance(
|
109
|
-
[1, 10, "Chicago", 2],
|
110
|
-
[10, 10, "London", 50])
|
111
|
-
end
|
112
|
-
|
113
|
-
def test_max_iterations
|
114
|
-
data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
|
115
|
-
clusterer = KMeans.new.
|
116
|
-
set_parameters({:max_iterations=>1}).
|
117
|
-
build(data_set, 4)
|
118
|
-
assert_equal 1, clusterer.iterations
|
119
|
-
end
|
120
|
-
|
121
|
-
def test_centroid_indices
|
122
|
-
data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
|
123
|
-
# centroid_indices need not be specified:
|
124
|
-
KMeans.new.build(data_set, 4)
|
125
|
-
# centroid_indices can be specified:
|
126
|
-
KMeans.new.set_parameters({:centroid_indices=>[0,1,2,3]}).build(data_set, 4)
|
127
|
-
# raises exception if number of clusters differs from length of centroid_indices:
|
128
|
-
exception = assert_raise(ArgumentError) {KMeans.new.set_parameters({:centroid_indices=>[0,1,2,3]}).build(data_set, 2)}
|
129
|
-
assert_equal('Length of centroid indices array differs from the specified number of clusters', exception.message)
|
130
|
-
# raises exception for bad centroid index:
|
131
|
-
exception = assert_raise(ArgumentError) {KMeans.new.set_parameters({:centroid_indices=>[0,1,2,@@data.size+10]}).build(data_set, 4)}
|
132
|
-
assert_equal("Invalid centroid index #{@@data.size+10}", exception.message)
|
133
|
-
end
|
134
|
-
|
135
|
-
def test_on_empty
|
136
|
-
data_set = DataSet.new(:data_items => @@empty_cluster_data, :data_labels => ["X", "Y"])
|
137
|
-
clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices}).build(data_set, @@empty_centroid_indices.size)
|
138
|
-
# Verify that one cluster was eliminated
|
139
|
-
assert_equal @@empty_centroid_indices.size - 1, clusterer.clusters.length
|
140
|
-
# Verify that eliminate is the on_empty default
|
141
|
-
assert_equal 'eliminate', clusterer.on_empty
|
142
|
-
# Verify that invalid on_empty option throws an argument error
|
143
|
-
exception = assert_raise(ArgumentError) {KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'ldkfje'}).build(data_set, @@empty_centroid_indices.size)}
|
144
|
-
assert_equal("Invalid value for on_empty", exception.message)
|
145
|
-
# Verify that on_empty option 'terminate' raises an error when an empty cluster arises
|
146
|
-
exception = assert_raise(TypeError) {KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'terminate'}).build(data_set, @@empty_centroid_indices.size)}
|
147
|
-
assert_equal("nil can't be coerced into Float", exception.message)
|
148
|
-
clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'random'}).build(data_set, @@empty_centroid_indices.size)
|
149
|
-
# Verify that cluster was not eliminated
|
150
|
-
assert_equal @@empty_centroid_indices.size, clusterer.clusters.length
|
151
|
-
clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'outlier'}).build(data_set, @@empty_centroid_indices.size)
|
152
|
-
# Verify that cluster was not eliminated
|
153
|
-
assert_equal @@empty_centroid_indices.size, clusterer.clusters.length
|
154
|
-
end
|
155
|
-
|
156
|
-
private
|
157
|
-
def draw_map(clusterer)
|
158
|
-
map = Array.new(11) {Array.new(11, 0)}
|
159
|
-
clusterer.clusters.each_index do |i|
|
160
|
-
clusterer.clusters[i].data_items.each do |point|
|
161
|
-
map[point.first][point.last]=(i+1)
|
162
|
-
end
|
163
|
-
end
|
164
|
-
map.each { |row| puts row.inspect}
|
165
|
-
end
|
166
|
-
end
|
167
|
-
|
@@ -1,53 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/median_linkage'
|
12
|
-
|
13
|
-
class Ai4r::Clusterers::MedianLinkage
|
14
|
-
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
end
|
16
|
-
|
17
|
-
class Ai4r::Clusterers::MedianLinkageTest < Test::Unit::TestCase
|
18
|
-
|
19
|
-
include Ai4r::Clusterers
|
20
|
-
include Ai4r::Data
|
21
|
-
|
22
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
23
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
24
|
-
|
25
|
-
@@expected_distance_matrix = [
|
26
|
-
[98.0],
|
27
|
-
[89.0, 5.0],
|
28
|
-
[68.0, 26.0, 9.0],
|
29
|
-
[74.0, 4.0, 1.0, 10.0],
|
30
|
-
[0.0, 98.0, 89.0, 68.0, 74.0],
|
31
|
-
[81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
|
32
|
-
[8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
|
33
|
-
[100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
|
34
|
-
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
35
|
-
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
36
|
-
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
-
|
38
|
-
def setup
|
39
|
-
Ai4r::Clusterers::MedianLinkage.send(:public,
|
40
|
-
*Ai4r::Clusterers::MedianLinkage.protected_instance_methods)
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_linkage_distance
|
44
|
-
clusterer = Ai4r::Clusterers::MedianLinkage.new
|
45
|
-
clusterer.data_set = DataSet.new :data_items => @@data
|
46
|
-
clusterer.index_clusters = clusterer.create_initial_index_clusters
|
47
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
48
|
-
assert_equal 92.25, clusterer.linkage_distance(0,1,2)
|
49
|
-
assert_equal 15.25, clusterer.linkage_distance(4,2,5)
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
@@ -1,122 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/single_linkage'
|
12
|
-
|
13
|
-
class Ai4r::Clusterers::SingleLinkage
|
14
|
-
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix
|
15
|
-
end
|
16
|
-
|
17
|
-
class SingleLinkageTest < Test::Unit::TestCase
|
18
|
-
|
19
|
-
include Ai4r::Clusterers
|
20
|
-
include Ai4r::Data
|
21
|
-
|
22
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
23
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
24
|
-
|
25
|
-
@@expected_distance_matrix = [
|
26
|
-
[98.0],
|
27
|
-
[89.0, 5.0],
|
28
|
-
[68.0, 26.0, 9.0],
|
29
|
-
[74.0, 4.0, 1.0, 10.0],
|
30
|
-
[0.0, 98.0, 89.0, 68.0, 74.0],
|
31
|
-
[81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
|
32
|
-
[8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
|
33
|
-
[100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
|
34
|
-
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
35
|
-
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
36
|
-
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
-
|
38
|
-
def setup
|
39
|
-
SingleLinkage.send(:public, *SingleLinkage.protected_instance_methods)
|
40
|
-
end
|
41
|
-
|
42
|
-
def test_build
|
43
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
44
|
-
clusterer.build(DataSet.new(:data_items => @@data), 4)
|
45
|
-
#draw_map(clusterer)
|
46
|
-
assert_equal 4, clusterer.clusters.length
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_eval
|
50
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
51
|
-
clusterer.build(DataSet.new(:data_items => @@data), 4)
|
52
|
-
assert_equal 2, clusterer.eval([0,8])
|
53
|
-
assert_equal 0, clusterer.eval([8,0])
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_create_distance_matrix
|
57
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
58
|
-
clusterer.create_distance_matrix(DataSet.new(:data_items => @@data))
|
59
|
-
assert clusterer.distance_matrix
|
60
|
-
clusterer.distance_matrix.each_with_index do |row, row_index|
|
61
|
-
assert_equal row_index+1, row.length
|
62
|
-
end
|
63
|
-
assert_equal @@expected_distance_matrix, clusterer.distance_matrix
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_read_distance_matrix
|
67
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
68
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
69
|
-
assert_equal 9.0, clusterer.read_distance_matrix(3, 2)
|
70
|
-
assert_equal 9.0, clusterer.read_distance_matrix(2, 3)
|
71
|
-
assert_equal 0, clusterer.read_distance_matrix(5, 5)
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_linkage_distance
|
75
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
76
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
77
|
-
assert_equal 89, clusterer.linkage_distance(0,1,2)
|
78
|
-
assert_equal 1, clusterer.linkage_distance(4,2,5)
|
79
|
-
end
|
80
|
-
|
81
|
-
def test_get_closest_clusters
|
82
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
83
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
84
|
-
assert_equal [1,0], clusterer.get_closest_clusters([[0,1], [3,4]])
|
85
|
-
assert_equal [2,1], clusterer.get_closest_clusters([[3,4], [0,1], [5,6]])
|
86
|
-
end
|
87
|
-
|
88
|
-
def test_create_initial_index_clusters
|
89
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
90
|
-
clusterer.data_set = DataSet.new :data_items => @@data
|
91
|
-
index_clusters = clusterer.create_initial_index_clusters
|
92
|
-
assert_equal @@data.length, index_clusters.length
|
93
|
-
assert_equal 0, index_clusters.first.first
|
94
|
-
assert_equal @@data.length-1, index_clusters.last.first
|
95
|
-
end
|
96
|
-
|
97
|
-
def test_merge_clusters
|
98
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
99
|
-
clusters = clusterer.merge_clusters(1,2, [[1,2],[3,4],[5,6]])
|
100
|
-
assert_equal [[1,2], [3,4,5,6]], clusters.collect {|x| x.sort}
|
101
|
-
clusters = clusterer.merge_clusters(2,1, [[1,2],[3,4],[5,6]])
|
102
|
-
assert_equal [[1,2], [3,4,5,6]], clusters.collect {|x| x.sort}
|
103
|
-
end
|
104
|
-
|
105
|
-
def test_distance_between_item_and_cluster
|
106
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
107
|
-
assert_equal 8.0, clusterer.distance_between_item_and_cluster([1,2],
|
108
|
-
DataSet.new(:data_items => [[3,4],[5,6]]))
|
109
|
-
end
|
110
|
-
|
111
|
-
private
|
112
|
-
def draw_map(clusterer)
|
113
|
-
map = Array.new(11) {Array.new(11, 0)}
|
114
|
-
clusterer.clusters.each_index do |i|
|
115
|
-
clusterer.clusters[i].data_items.each do |point|
|
116
|
-
map[point.first][point.last]=(i+1)
|
117
|
-
end
|
118
|
-
end
|
119
|
-
map.each { |row| puts row.inspect}
|
120
|
-
end
|
121
|
-
|
122
|
-
end
|
@@ -1,81 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://ai4r.rubyforge.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/ward_linkage_hierarchical'
|
12
|
-
|
13
|
-
class Ai4r::Clusterers::WardLinkageHierarchical
|
14
|
-
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
end
|
16
|
-
|
17
|
-
class Ai4r::Clusterers::WardLinkageHierarchicalTest < Test::Unit::TestCase
|
18
|
-
|
19
|
-
include Ai4r::Clusterers
|
20
|
-
include Ai4r::Data
|
21
|
-
|
22
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
23
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
24
|
-
|
25
|
-
@@expected_distance_matrix = [
|
26
|
-
[98.0],
|
27
|
-
[89.0, 5.0],
|
28
|
-
[68.0, 26.0, 9.0],
|
29
|
-
[74.0, 4.0, 1.0, 10.0],
|
30
|
-
[0.0, 98.0, 89.0, 68.0, 74.0],
|
31
|
-
[81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
|
32
|
-
[8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
|
33
|
-
[100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
|
34
|
-
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
35
|
-
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
36
|
-
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
-
|
38
|
-
def setup
|
39
|
-
Ai4r::Clusterers::WardLinkageHierarchical.send(:public,
|
40
|
-
*Ai4r::Clusterers::WardLinkageHierarchical.protected_instance_methods)
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_linkage_distance
|
44
|
-
clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new
|
45
|
-
clusterer.data_set = DataSet.new :data_items => @@data
|
46
|
-
clusterer.index_clusters = clusterer.create_initial_index_clusters
|
47
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
48
|
-
assert_in_delta 123.4166, clusterer.linkage_distance(0,1,2), 0.0001
|
49
|
-
assert_equal 27.75, clusterer.linkage_distance(4,2,5)
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_cluster_tree
|
53
|
-
clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new
|
54
|
-
clusterer.build(DataSet.new(:data_items => @@data), 1)
|
55
|
-
assert_equal @@data.length, clusterer.cluster_tree.length
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_cluster_tree_limit
|
59
|
-
depth = 5
|
60
|
-
clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new(5)
|
61
|
-
clusterer.build(DataSet.new(:data_items => @@data), 1)
|
62
|
-
assert_equal 5, clusterer.cluster_tree.length
|
63
|
-
end
|
64
|
-
|
65
|
-
def test_cluster_tree_first_length
|
66
|
-
depth = 5
|
67
|
-
clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new(5)
|
68
|
-
clusterer.build(DataSet.new(:data_items => @@data), 1)
|
69
|
-
assert_equal 1, clusterer.cluster_tree.first.length
|
70
|
-
end
|
71
|
-
|
72
|
-
def test_cluster_tree_last_length
|
73
|
-
depth = 5
|
74
|
-
clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new(5)
|
75
|
-
clusterer.build(DataSet.new(:data_items => @@data), 1)
|
76
|
-
assert_equal 5, clusterer.cluster_tree.last.length
|
77
|
-
end
|
78
|
-
|
79
|
-
|
80
|
-
end
|
81
|
-
|
@@ -1,53 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/ward_linkage'
|
12
|
-
|
13
|
-
class Ai4r::Clusterers::WardLinkage
|
14
|
-
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
end
|
16
|
-
|
17
|
-
class Ai4r::Clusterers::WardLinkageTest < Test::Unit::TestCase
|
18
|
-
|
19
|
-
include Ai4r::Clusterers
|
20
|
-
include Ai4r::Data
|
21
|
-
|
22
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
23
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
24
|
-
|
25
|
-
@@expected_distance_matrix = [
|
26
|
-
[98.0],
|
27
|
-
[89.0, 5.0],
|
28
|
-
[68.0, 26.0, 9.0],
|
29
|
-
[74.0, 4.0, 1.0, 10.0],
|
30
|
-
[0.0, 98.0, 89.0, 68.0, 74.0],
|
31
|
-
[81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
|
32
|
-
[8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
|
33
|
-
[100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
|
34
|
-
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
35
|
-
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
36
|
-
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
-
|
38
|
-
def setup
|
39
|
-
Ai4r::Clusterers::WardLinkage.send(:public,
|
40
|
-
*Ai4r::Clusterers::WardLinkage.protected_instance_methods)
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_linkage_distance
|
44
|
-
clusterer = Ai4r::Clusterers::WardLinkage.new
|
45
|
-
clusterer.data_set = DataSet.new :data_items => @@data
|
46
|
-
clusterer.index_clusters = clusterer.create_initial_index_clusters
|
47
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
48
|
-
assert_in_delta 123.4166, clusterer.linkage_distance(0,1,2), 0.0001
|
49
|
-
assert_equal 27.75, clusterer.linkage_distance(4,2,5)
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
@@ -1,53 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/weighted_average_linkage'
|
12
|
-
|
13
|
-
class Ai4r::Clusterers::WeightedAverageLinkage
|
14
|
-
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
end
|
16
|
-
|
17
|
-
class Ai4r::Clusterers::WeightedAverageLinkageTest < Test::Unit::TestCase
|
18
|
-
|
19
|
-
include Ai4r::Clusterers
|
20
|
-
include Ai4r::Data
|
21
|
-
|
22
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
23
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
24
|
-
|
25
|
-
@@expected_distance_matrix = [
|
26
|
-
[98.0],
|
27
|
-
[89.0, 5.0],
|
28
|
-
[68.0, 26.0, 9.0],
|
29
|
-
[74.0, 4.0, 1.0, 10.0],
|
30
|
-
[0.0, 98.0, 89.0, 68.0, 74.0],
|
31
|
-
[81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
|
32
|
-
[8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
|
33
|
-
[100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
|
34
|
-
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
35
|
-
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
36
|
-
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
-
|
38
|
-
def setup
|
39
|
-
Ai4r::Clusterers::WeightedAverageLinkage.send(:public,
|
40
|
-
*Ai4r::Clusterers::WeightedAverageLinkage.protected_instance_methods)
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_linkage_distance
|
44
|
-
clusterer = Ai4r::Clusterers::WeightedAverageLinkage.new
|
45
|
-
clusterer.data_set = DataSet.new :data_items => @@data
|
46
|
-
clusterer.index_clusters = clusterer.create_initial_index_clusters
|
47
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
48
|
-
assert_equal 93.5, clusterer.linkage_distance(0,1,2)
|
49
|
-
assert_equal 37.5, clusterer.linkage_distance(4,2,5)
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
data/test/data/data_set_test.rb
DELETED
@@ -1,104 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://www.ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/data/data_set'
|
12
|
-
|
13
|
-
module Ai4r
|
14
|
-
module Data
|
15
|
-
class DataSetTest < Test::Unit::TestCase
|
16
|
-
|
17
|
-
def test_load_csv_with_labels
|
18
|
-
set = DataSet.new.load_csv_with_labels("#{File.dirname(__FILE__)}/data_set.csv")
|
19
|
-
assert_equal 120, set.data_items.length
|
20
|
-
assert_equal ["zone", "rooms", "size", "price"], set.data_labels
|
21
|
-
assert_equal ["Moron Sur (GBA)","2","[28 m2 - 39 m2]","[29K-35K]"], set.data_items.first
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_parse_csv_with_labels
|
25
|
-
set = DataSet.new.parse_csv_with_labels("#{File.dirname(__FILE__)}/data_set.csv")
|
26
|
-
assert_equal 120, set.data_items.length
|
27
|
-
assert_equal ["zone", "rooms", "size", "price"], set.data_labels
|
28
|
-
assert_equal ["Moron Sur (GBA)",2.0,"[28 m2 - 39 m2]","[29K-35K]"], set.data_items.first
|
29
|
-
end
|
30
|
-
|
31
|
-
def test_build_domains
|
32
|
-
domains = [ Set.new(["New York", "Chicago"]),
|
33
|
-
Set.new(["M", "F"]),
|
34
|
-
[5, 85],
|
35
|
-
Set.new(["Y", "N"]) ]
|
36
|
-
data = [ [ "New York", "M", 23, "Y"],
|
37
|
-
[ "Chicago", "M", 85, "Y"],
|
38
|
-
[ "New York", "F", 32, "Y"],
|
39
|
-
[ "New York", "M", 5, "N"],
|
40
|
-
[ "Chicago", "M", 15, "N"],
|
41
|
-
[ "Chicago", "F", 45, "Y"] ]
|
42
|
-
labels = ["city", "gender", "age", "result"]
|
43
|
-
set = DataSet.new({:data_items => data, :data_labels => labels})
|
44
|
-
assert_equal domains, set.build_domains
|
45
|
-
assert_equal domains[0], set.build_domain("city")
|
46
|
-
assert_equal domains[1], set.build_domain(1)
|
47
|
-
assert_equal domains[2], set.build_domain("age")
|
48
|
-
assert_equal domains[3], set.build_domain("result")
|
49
|
-
end
|
50
|
-
|
51
|
-
def test_set_data_labels
|
52
|
-
labels = ["A", "B"]
|
53
|
-
set = DataSet.new.set_data_labels(labels)
|
54
|
-
assert_equal labels, set.data_labels
|
55
|
-
set = DataSet.new(:data_labels => labels)
|
56
|
-
assert_equal labels, set.data_labels
|
57
|
-
set = DataSet.new(:data_items => [[ 1, 2, 3]])
|
58
|
-
assert_raise(ArgumentError) { set.set_data_labels(labels) }
|
59
|
-
end
|
60
|
-
|
61
|
-
def test_set_data_items
|
62
|
-
items = [ [ "New York", "M", "Y"],
|
63
|
-
[ "Chicago", "M", "Y"],
|
64
|
-
[ "New York", "F", "Y"],
|
65
|
-
[ "New York", "M", "N"],
|
66
|
-
[ "Chicago", "M", "N"],
|
67
|
-
[ "Chicago", "F", "Y"] ]
|
68
|
-
set = DataSet.new.set_data_items(items)
|
69
|
-
assert_equal items, set.data_items
|
70
|
-
assert_equal 3, set.data_labels.length
|
71
|
-
items << items.first[0..-2]
|
72
|
-
assert_raise(ArgumentError) { set.set_data_items(items) }
|
73
|
-
assert_raise(ArgumentError) { set.set_data_items(nil) }
|
74
|
-
assert_raise(ArgumentError) { set.set_data_items([1]) }
|
75
|
-
end
|
76
|
-
|
77
|
-
def test_get_mean_or_mode
|
78
|
-
items = [ [ "New York", 25, "Y"],
|
79
|
-
[ "New York", 55, "Y"],
|
80
|
-
[ "Chicago", 23, "Y"],
|
81
|
-
[ "Boston", 23, "N"],
|
82
|
-
[ "Chicago", 12, "N"],
|
83
|
-
[ "Chicago", 87, "Y"] ]
|
84
|
-
set = DataSet.new.set_data_items(items)
|
85
|
-
assert_equal ["Chicago", 37.5, "Y"], set.get_mean_or_mode
|
86
|
-
end
|
87
|
-
|
88
|
-
def test_index
|
89
|
-
items = [ [ "New York", 25, "Y"],
|
90
|
-
[ "New York", 55, "Y"],
|
91
|
-
[ "Chicago", 23, "Y"],
|
92
|
-
[ "Boston", 23, "N"],
|
93
|
-
[ "Chicago", 12, "N"],
|
94
|
-
[ "Chicago", 87, "Y"] ]
|
95
|
-
set = DataSet.new.set_data_items(items)
|
96
|
-
assert_equal set.data_labels, set[0].data_labels
|
97
|
-
assert_equal [[ "New York", 25, "Y"]], set[0].data_items
|
98
|
-
assert_equal [[ "Chicago", 23, "Y"],[ "Boston", 23, "N"]], set[2..3].data_items
|
99
|
-
assert_equal items[1..-1], set[1..-1].data_items
|
100
|
-
end
|
101
|
-
|
102
|
-
end
|
103
|
-
end
|
104
|
-
end
|