ai4r 1.12 → 2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/README.md +174 -0
- data/examples/classifiers/hyperpipes_data.csv +14 -0
- data/examples/classifiers/hyperpipes_example.rb +22 -0
- data/examples/classifiers/ib1_example.rb +12 -0
- data/examples/classifiers/id3_example.rb +15 -10
- data/examples/classifiers/id3_graphviz_example.rb +17 -0
- data/examples/classifiers/logistic_regression_example.rb +11 -0
- data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
- data/examples/classifiers/naive_bayes_example.rb +12 -13
- data/examples/classifiers/one_r_example.rb +27 -0
- data/examples/classifiers/parameter_tutorial.rb +29 -0
- data/examples/classifiers/prism_nominal_example.rb +15 -0
- data/examples/classifiers/prism_numeric_example.rb +21 -0
- data/examples/classifiers/simple_linear_regression_example.csv +159 -0
- data/examples/classifiers/simple_linear_regression_example.rb +18 -0
- data/examples/classifiers/zero_and_one_r_example.rb +34 -0
- data/examples/classifiers/zero_one_r_data.csv +8 -0
- data/examples/clusterers/clusterer_example.rb +62 -0
- data/examples/clusterers/dbscan_example.rb +17 -0
- data/examples/clusterers/dendrogram_example.rb +17 -0
- data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
- data/examples/clusterers/kmeans_custom_example.rb +26 -0
- data/examples/genetic_algorithm/bitstring_example.rb +41 -0
- data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
- data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
- data/examples/neural_network/backpropagation_example.rb +49 -48
- data/examples/neural_network/hopfield_example.rb +45 -0
- data/examples/neural_network/patterns_with_base_noise.rb +39 -39
- data/examples/neural_network/patterns_with_noise.rb +41 -39
- data/examples/neural_network/train_epochs_callback.rb +25 -0
- data/examples/neural_network/training_patterns.rb +39 -39
- data/examples/neural_network/transformer_text_classification.rb +78 -0
- data/examples/neural_network/xor_example.rb +23 -22
- data/examples/reinforcement/q_learning_example.rb +10 -0
- data/examples/som/som_data.rb +155 -152
- data/examples/som/som_multi_node_example.rb +12 -13
- data/examples/som/som_single_example.rb +12 -15
- data/examples/transformer/decode_classifier_example.rb +68 -0
- data/examples/transformer/deterministic_example.rb +10 -0
- data/examples/transformer/seq2seq_example.rb +16 -0
- data/lib/ai4r/classifiers/classifier.rb +24 -16
- data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
- data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
- data/lib/ai4r/classifiers/ib1.rb +122 -32
- data/lib/ai4r/classifiers/id3.rb +527 -144
- data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
- data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
- data/lib/ai4r/classifiers/naive_bayes.rb +112 -48
- data/lib/ai4r/classifiers/one_r.rb +112 -44
- data/lib/ai4r/classifiers/prism.rb +167 -76
- data/lib/ai4r/classifiers/random_forest.rb +72 -0
- data/lib/ai4r/classifiers/simple_linear_regression.rb +143 -0
- data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
- data/lib/ai4r/classifiers/votes.rb +57 -0
- data/lib/ai4r/classifiers/zero_r.rb +71 -30
- data/lib/ai4r/clusterers/average_linkage.rb +46 -27
- data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
- data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
- data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
- data/lib/ai4r/clusterers/clusterer.rb +28 -24
- data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
- data/lib/ai4r/clusterers/dbscan.rb +134 -0
- data/lib/ai4r/clusterers/diana.rb +75 -49
- data/lib/ai4r/clusterers/k_means.rb +309 -72
- data/lib/ai4r/clusterers/median_linkage.rb +49 -33
- data/lib/ai4r/clusterers/single_linkage.rb +196 -88
- data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
- data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +63 -0
- data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
- data/lib/ai4r/data/data_set.rb +229 -100
- data/lib/ai4r/data/parameterizable.rb +31 -25
- data/lib/ai4r/data/proximity.rb +72 -50
- data/lib/ai4r/data/statistics.rb +46 -35
- data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
- data/lib/ai4r/experiment/split.rb +39 -0
- data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
- data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
- data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
- data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
- data/lib/ai4r/neural_network/activation_functions.rb +37 -0
- data/lib/ai4r/neural_network/backpropagation.rb +419 -143
- data/lib/ai4r/neural_network/hopfield.rb +175 -58
- data/lib/ai4r/neural_network/transformer.rb +194 -0
- data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
- data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
- data/lib/ai4r/reinforcement/q_learning.rb +51 -0
- data/lib/ai4r/search/a_star.rb +76 -0
- data/lib/ai4r/search/bfs.rb +50 -0
- data/lib/ai4r/search/dfs.rb +50 -0
- data/lib/ai4r/search/mcts.rb +118 -0
- data/lib/ai4r/search.rb +12 -0
- data/lib/ai4r/som/distance_metrics.rb +29 -0
- data/lib/ai4r/som/layer.rb +28 -17
- data/lib/ai4r/som/node.rb +61 -32
- data/lib/ai4r/som/som.rb +158 -41
- data/lib/ai4r/som/two_phase_layer.rb +21 -25
- data/lib/ai4r/version.rb +3 -0
- data/lib/ai4r.rb +58 -27
- metadata +117 -106
- data/README.rdoc +0 -44
- data/test/classifiers/hyperpipes_test.rb +0 -84
- data/test/classifiers/ib1_test.rb +0 -78
- data/test/classifiers/id3_test.rb +0 -208
- data/test/classifiers/multilayer_perceptron_test.rb +0 -79
- data/test/classifiers/naive_bayes_test.rb +0 -43
- data/test/classifiers/one_r_test.rb +0 -62
- data/test/classifiers/prism_test.rb +0 -85
- data/test/classifiers/zero_r_test.rb +0 -50
- data/test/clusterers/average_linkage_test.rb +0 -51
- data/test/clusterers/bisecting_k_means_test.rb +0 -66
- data/test/clusterers/centroid_linkage_test.rb +0 -53
- data/test/clusterers/complete_linkage_test.rb +0 -57
- data/test/clusterers/diana_test.rb +0 -69
- data/test/clusterers/k_means_test.rb +0 -100
- data/test/clusterers/median_linkage_test.rb +0 -53
- data/test/clusterers/single_linkage_test.rb +0 -122
- data/test/clusterers/ward_linkage_test.rb +0 -53
- data/test/clusterers/weighted_average_linkage_test.rb +0 -53
- data/test/data/data_set_test.rb +0 -96
- data/test/data/proximity_test.rb +0 -81
- data/test/data/statistics_test.rb +0 -65
- data/test/experiment/classifier_evaluator_test.rb +0 -76
- data/test/genetic_algorithm/chromosome_test.rb +0 -57
- data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
- data/test/neural_network/backpropagation_test.rb +0 -82
- data/test/neural_network/hopfield_test.rb +0 -72
- data/test/som/som_test.rb +0 -97
@@ -1,100 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://www.ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/k_means'
|
12
|
-
|
13
|
-
class KMeansTest < Test::Unit::TestCase
|
14
|
-
|
15
|
-
include Ai4r::Clusterers
|
16
|
-
include Ai4r::Data
|
17
|
-
|
18
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
19
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
20
|
-
|
21
|
-
def test_build
|
22
|
-
data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
|
23
|
-
clusterer = KMeans.new.build(data_set, 4)
|
24
|
-
#draw_map(clusterer)
|
25
|
-
# Verify that all 4 clusters are created
|
26
|
-
assert_equal 4, clusterer.clusters.length
|
27
|
-
assert_equal 4, clusterer.centroids.length
|
28
|
-
# The addition of all instances of every cluster must be equal than
|
29
|
-
# the number of data points
|
30
|
-
total_length = 0
|
31
|
-
clusterer.clusters.each do |cluster|
|
32
|
-
total_length += cluster.data_items.length
|
33
|
-
end
|
34
|
-
assert_equal @@data.length, total_length
|
35
|
-
# Data inside clusters must be the same as orifinal data
|
36
|
-
clusterer.clusters.each do |cluster|
|
37
|
-
cluster.data_items.each do |data_item|
|
38
|
-
assert @@data.include?(data_item)
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_eval
|
44
|
-
data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
|
45
|
-
clusterer = KMeans.new.build(data_set, 4)
|
46
|
-
item = [10,0]
|
47
|
-
cluster_index = clusterer.eval(item)
|
48
|
-
# Must return a valid cluster index [0-3]
|
49
|
-
assert cluster_index >= 0 && cluster_index < 4
|
50
|
-
# Distance to cluster centroid must be less than distance to any other
|
51
|
-
# centroid
|
52
|
-
min_distance = clusterer.distance(clusterer.centroids[cluster_index], item)
|
53
|
-
clusterer.centroids.each do |centroid|
|
54
|
-
assert clusterer.distance(centroid, item) >= min_distance
|
55
|
-
end
|
56
|
-
end
|
57
|
-
|
58
|
-
def test_distance
|
59
|
-
clusterer = KMeans.new
|
60
|
-
# By default, distance returns the eucledian distance to the power of 2
|
61
|
-
assert_equal 2385, clusterer.distance(
|
62
|
-
[1, 10, "Chicago", 2],
|
63
|
-
[10, 10, "London", 50])
|
64
|
-
# Test new distance definition
|
65
|
-
manhattan_distance = lambda do |a, b|
|
66
|
-
dist = 0.0
|
67
|
-
a.each_index do |index|
|
68
|
-
if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
|
69
|
-
dist = dist + (a[index]-b[index]).abs
|
70
|
-
end
|
71
|
-
end
|
72
|
-
dist
|
73
|
-
end
|
74
|
-
clusterer.set_parameters({:distance_function => manhattan_distance})
|
75
|
-
assert_equal 57, clusterer.distance(
|
76
|
-
[1, 10, "Chicago", 2],
|
77
|
-
[10, 10, "London", 50])
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_max_iterations
|
81
|
-
data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
|
82
|
-
clusterer = KMeans.new.
|
83
|
-
set_parameters({:max_iterations=>1}).
|
84
|
-
build(data_set, 4)
|
85
|
-
assert_equal 1, clusterer.iterations
|
86
|
-
end
|
87
|
-
|
88
|
-
private
|
89
|
-
def draw_map(clusterer)
|
90
|
-
map = Array.new(11) {Array.new(11, 0)}
|
91
|
-
clusterer.clusters.each_index do |i|
|
92
|
-
clusterer.clusters[i].data_items.each do |point|
|
93
|
-
map[point.first][point.last]=(i+1)
|
94
|
-
end
|
95
|
-
end
|
96
|
-
map.each { |row| puts row.inspect}
|
97
|
-
end
|
98
|
-
|
99
|
-
end
|
100
|
-
|
@@ -1,53 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/median_linkage'
|
12
|
-
|
13
|
-
class Ai4r::Clusterers::MedianLinkage
|
14
|
-
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
end
|
16
|
-
|
17
|
-
class Ai4r::Clusterers::MedianLinkageTest < Test::Unit::TestCase
|
18
|
-
|
19
|
-
include Ai4r::Clusterers
|
20
|
-
include Ai4r::Data
|
21
|
-
|
22
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
23
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
24
|
-
|
25
|
-
@@expected_distance_matrix = [
|
26
|
-
[98.0],
|
27
|
-
[89.0, 5.0],
|
28
|
-
[68.0, 26.0, 9.0],
|
29
|
-
[74.0, 4.0, 1.0, 10.0],
|
30
|
-
[0.0, 98.0, 89.0, 68.0, 74.0],
|
31
|
-
[81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
|
32
|
-
[8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
|
33
|
-
[100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
|
34
|
-
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
35
|
-
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
36
|
-
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
-
|
38
|
-
def setup
|
39
|
-
Ai4r::Clusterers::MedianLinkage.send(:public,
|
40
|
-
*Ai4r::Clusterers::MedianLinkage.protected_instance_methods)
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_linkage_distance
|
44
|
-
clusterer = Ai4r::Clusterers::MedianLinkage.new
|
45
|
-
clusterer.data_set = DataSet.new :data_items => @@data
|
46
|
-
clusterer.index_clusters = clusterer.create_initial_index_clusters
|
47
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
48
|
-
assert_equal 92.25, clusterer.linkage_distance(0,1,2)
|
49
|
-
assert_equal 15.25, clusterer.linkage_distance(4,2,5)
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
@@ -1,122 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/single_linkage'
|
12
|
-
|
13
|
-
class Ai4r::Clusterers::SingleLinkage
|
14
|
-
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix
|
15
|
-
end
|
16
|
-
|
17
|
-
class SingleLinkageTest < Test::Unit::TestCase
|
18
|
-
|
19
|
-
include Ai4r::Clusterers
|
20
|
-
include Ai4r::Data
|
21
|
-
|
22
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
23
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
24
|
-
|
25
|
-
@@expected_distance_matrix = [
|
26
|
-
[98.0],
|
27
|
-
[89.0, 5.0],
|
28
|
-
[68.0, 26.0, 9.0],
|
29
|
-
[74.0, 4.0, 1.0, 10.0],
|
30
|
-
[0.0, 98.0, 89.0, 68.0, 74.0],
|
31
|
-
[81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
|
32
|
-
[8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
|
33
|
-
[100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
|
34
|
-
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
35
|
-
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
36
|
-
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
-
|
38
|
-
def setup
|
39
|
-
SingleLinkage.send(:public, *SingleLinkage.protected_instance_methods)
|
40
|
-
end
|
41
|
-
|
42
|
-
def test_build
|
43
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
44
|
-
clusterer.build(DataSet.new(:data_items => @@data), 4)
|
45
|
-
#draw_map(clusterer)
|
46
|
-
assert_equal 4, clusterer.clusters.length
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_eval
|
50
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
51
|
-
clusterer.build(DataSet.new(:data_items => @@data), 4)
|
52
|
-
assert_equal 2, clusterer.eval([0,8])
|
53
|
-
assert_equal 0, clusterer.eval([8,0])
|
54
|
-
end
|
55
|
-
|
56
|
-
def test_create_distance_matrix
|
57
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
58
|
-
clusterer.create_distance_matrix(DataSet.new(:data_items => @@data))
|
59
|
-
assert clusterer.distance_matrix
|
60
|
-
clusterer.distance_matrix.each_with_index do |row, row_index|
|
61
|
-
assert_equal row_index+1, row.length
|
62
|
-
end
|
63
|
-
assert_equal @@expected_distance_matrix, clusterer.distance_matrix
|
64
|
-
end
|
65
|
-
|
66
|
-
def test_read_distance_matrix
|
67
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
68
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
69
|
-
assert_equal 9.0, clusterer.read_distance_matrix(3, 2)
|
70
|
-
assert_equal 9.0, clusterer.read_distance_matrix(2, 3)
|
71
|
-
assert_equal 0, clusterer.read_distance_matrix(5, 5)
|
72
|
-
end
|
73
|
-
|
74
|
-
def test_linkage_distance
|
75
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
76
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
77
|
-
assert_equal 89, clusterer.linkage_distance(0,1,2)
|
78
|
-
assert_equal 1, clusterer.linkage_distance(4,2,5)
|
79
|
-
end
|
80
|
-
|
81
|
-
def test_get_closest_clusters
|
82
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
83
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
84
|
-
assert_equal [1,0], clusterer.get_closest_clusters([[0,1], [3,4]])
|
85
|
-
assert_equal [2,1], clusterer.get_closest_clusters([[3,4], [0,1], [5,6]])
|
86
|
-
end
|
87
|
-
|
88
|
-
def test_create_initial_index_clusters
|
89
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
90
|
-
clusterer.data_set = DataSet.new :data_items => @@data
|
91
|
-
index_clusters = clusterer.create_initial_index_clusters
|
92
|
-
assert_equal @@data.length, index_clusters.length
|
93
|
-
assert_equal 0, index_clusters.first.first
|
94
|
-
assert_equal @@data.length-1, index_clusters.last.first
|
95
|
-
end
|
96
|
-
|
97
|
-
def test_merge_clusters
|
98
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
99
|
-
clusters = clusterer.merge_clusters(1,2, [[1,2],[3,4],[5,6]])
|
100
|
-
assert_equal [[1,2], [3,4,5,6]], clusters.collect {|x| x.sort}
|
101
|
-
clusters = clusterer.merge_clusters(2,1, [[1,2],[3,4],[5,6]])
|
102
|
-
assert_equal [[1,2], [3,4,5,6]], clusters.collect {|x| x.sort}
|
103
|
-
end
|
104
|
-
|
105
|
-
def test_distance_between_item_and_cluster
|
106
|
-
clusterer = Ai4r::Clusterers::SingleLinkage.new
|
107
|
-
assert_equal 8.0, clusterer.distance_between_item_and_cluster([1,2],
|
108
|
-
DataSet.new(:data_items => [[3,4],[5,6]]))
|
109
|
-
end
|
110
|
-
|
111
|
-
private
|
112
|
-
def draw_map(clusterer)
|
113
|
-
map = Array.new(11) {Array.new(11, 0)}
|
114
|
-
clusterer.clusters.each_index do |i|
|
115
|
-
clusterer.clusters[i].data_items.each do |point|
|
116
|
-
map[point.first][point.last]=(i+1)
|
117
|
-
end
|
118
|
-
end
|
119
|
-
map.each { |row| puts row.inspect}
|
120
|
-
end
|
121
|
-
|
122
|
-
end
|
@@ -1,53 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/ward_linkage'
|
12
|
-
|
13
|
-
class Ai4r::Clusterers::WardLinkage
|
14
|
-
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
end
|
16
|
-
|
17
|
-
class Ai4r::Clusterers::WardLinkageTest < Test::Unit::TestCase
|
18
|
-
|
19
|
-
include Ai4r::Clusterers
|
20
|
-
include Ai4r::Data
|
21
|
-
|
22
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
23
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
24
|
-
|
25
|
-
@@expected_distance_matrix = [
|
26
|
-
[98.0],
|
27
|
-
[89.0, 5.0],
|
28
|
-
[68.0, 26.0, 9.0],
|
29
|
-
[74.0, 4.0, 1.0, 10.0],
|
30
|
-
[0.0, 98.0, 89.0, 68.0, 74.0],
|
31
|
-
[81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
|
32
|
-
[8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
|
33
|
-
[100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
|
34
|
-
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
35
|
-
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
36
|
-
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
-
|
38
|
-
def setup
|
39
|
-
Ai4r::Clusterers::WardLinkage.send(:public,
|
40
|
-
*Ai4r::Clusterers::WardLinkage.protected_instance_methods)
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_linkage_distance
|
44
|
-
clusterer = Ai4r::Clusterers::WardLinkage.new
|
45
|
-
clusterer.data_set = DataSet.new :data_items => @@data
|
46
|
-
clusterer.index_clusters = clusterer.create_initial_index_clusters
|
47
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
48
|
-
assert_in_delta 123.4166, clusterer.linkage_distance(0,1,2), 0.0001
|
49
|
-
assert_equal 27.75, clusterer.linkage_distance(4,2,5)
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
@@ -1,53 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens (implementation)
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/clusterers/weighted_average_linkage'
|
12
|
-
|
13
|
-
class Ai4r::Clusterers::WeightedAverageLinkage
|
14
|
-
attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
|
15
|
-
end
|
16
|
-
|
17
|
-
class Ai4r::Clusterers::WeightedAverageLinkageTest < Test::Unit::TestCase
|
18
|
-
|
19
|
-
include Ai4r::Clusterers
|
20
|
-
include Ai4r::Data
|
21
|
-
|
22
|
-
@@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
|
23
|
-
[1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
|
24
|
-
|
25
|
-
@@expected_distance_matrix = [
|
26
|
-
[98.0],
|
27
|
-
[89.0, 5.0],
|
28
|
-
[68.0, 26.0, 9.0],
|
29
|
-
[74.0, 4.0, 1.0, 10.0],
|
30
|
-
[0.0, 98.0, 89.0, 68.0, 74.0],
|
31
|
-
[81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
|
32
|
-
[8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
|
33
|
-
[100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
|
34
|
-
[68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
|
35
|
-
[49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
|
36
|
-
[2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
|
37
|
-
|
38
|
-
def setup
|
39
|
-
Ai4r::Clusterers::WeightedAverageLinkage.send(:public,
|
40
|
-
*Ai4r::Clusterers::WeightedAverageLinkage.protected_instance_methods)
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_linkage_distance
|
44
|
-
clusterer = Ai4r::Clusterers::WeightedAverageLinkage.new
|
45
|
-
clusterer.data_set = DataSet.new :data_items => @@data
|
46
|
-
clusterer.index_clusters = clusterer.create_initial_index_clusters
|
47
|
-
clusterer.distance_matrix = @@expected_distance_matrix
|
48
|
-
assert_equal 93.5, clusterer.linkage_distance(0,1,2)
|
49
|
-
assert_equal 37.5, clusterer.linkage_distance(4,2,5)
|
50
|
-
end
|
51
|
-
|
52
|
-
end
|
53
|
-
|
data/test/data/data_set_test.rb
DELETED
@@ -1,96 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://www.ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/data/data_set'
|
12
|
-
|
13
|
-
module Ai4r
|
14
|
-
module Data
|
15
|
-
class DataSetTest < Test::Unit::TestCase
|
16
|
-
|
17
|
-
def test_load_csv_with_labels
|
18
|
-
set = DataSet.new.load_csv_with_labels("#{File.dirname(__FILE__)}/data_set.csv")
|
19
|
-
assert_equal 120, set.data_items.length
|
20
|
-
assert_equal ["zone", "rooms", "size", "price"], set.data_labels
|
21
|
-
end
|
22
|
-
|
23
|
-
def test_build_domains
|
24
|
-
domains = [ Set.new(["New York", "Chicago"]),
|
25
|
-
Set.new(["M", "F"]),
|
26
|
-
[5, 85],
|
27
|
-
Set.new(["Y", "N"]) ]
|
28
|
-
data = [ [ "New York", "M", 23, "Y"],
|
29
|
-
[ "Chicago", "M", 85, "Y"],
|
30
|
-
[ "New York", "F", 32, "Y"],
|
31
|
-
[ "New York", "M", 5, "N"],
|
32
|
-
[ "Chicago", "M", 15, "N"],
|
33
|
-
[ "Chicago", "F", 45, "Y"] ]
|
34
|
-
labels = ["city", "gender", "age", "result"]
|
35
|
-
set = DataSet.new({:data_items => data, :data_labels => labels})
|
36
|
-
assert_equal domains, set.build_domains
|
37
|
-
assert_equal domains[0], set.build_domain("city")
|
38
|
-
assert_equal domains[1], set.build_domain(1)
|
39
|
-
assert_equal domains[2], set.build_domain("age")
|
40
|
-
assert_equal domains[3], set.build_domain("result")
|
41
|
-
end
|
42
|
-
|
43
|
-
def test_set_data_labels
|
44
|
-
labels = ["A", "B"]
|
45
|
-
set = DataSet.new.set_data_labels(labels)
|
46
|
-
assert_equal labels, set.data_labels
|
47
|
-
set = DataSet.new(:data_labels => labels)
|
48
|
-
assert_equal labels, set.data_labels
|
49
|
-
set = DataSet.new(:data_items => [[ 1, 2, 3]])
|
50
|
-
assert_raise(ArgumentError) { set.set_data_labels(labels) }
|
51
|
-
end
|
52
|
-
|
53
|
-
def test_set_data_items
|
54
|
-
items = [ [ "New York", "M", "Y"],
|
55
|
-
[ "Chicago", "M", "Y"],
|
56
|
-
[ "New York", "F", "Y"],
|
57
|
-
[ "New York", "M", "N"],
|
58
|
-
[ "Chicago", "M", "N"],
|
59
|
-
[ "Chicago", "F", "Y"] ]
|
60
|
-
set = DataSet.new.set_data_items(items)
|
61
|
-
assert_equal items, set.data_items
|
62
|
-
assert_equal 3, set.data_labels.length
|
63
|
-
items << items.first[0..-2]
|
64
|
-
assert_raise(ArgumentError) { set.set_data_items(items) }
|
65
|
-
assert_raise(ArgumentError) { set.set_data_items(nil) }
|
66
|
-
assert_raise(ArgumentError) { set.set_data_items([1]) }
|
67
|
-
end
|
68
|
-
|
69
|
-
def test_get_mean_or_mode
|
70
|
-
items = [ [ "New York", 25, "Y"],
|
71
|
-
[ "New York", 55, "Y"],
|
72
|
-
[ "Chicago", 23, "Y"],
|
73
|
-
[ "Boston", 23, "N"],
|
74
|
-
[ "Chicago", 12, "N"],
|
75
|
-
[ "Chicago", 87, "Y"] ]
|
76
|
-
set = DataSet.new.set_data_items(items)
|
77
|
-
assert_equal ["Chicago", 37.5, "Y"], set.get_mean_or_mode
|
78
|
-
end
|
79
|
-
|
80
|
-
def test_index
|
81
|
-
items = [ [ "New York", 25, "Y"],
|
82
|
-
[ "New York", 55, "Y"],
|
83
|
-
[ "Chicago", 23, "Y"],
|
84
|
-
[ "Boston", 23, "N"],
|
85
|
-
[ "Chicago", 12, "N"],
|
86
|
-
[ "Chicago", 87, "Y"] ]
|
87
|
-
set = DataSet.new.set_data_items(items)
|
88
|
-
assert_equal set.data_labels, set[0].data_labels
|
89
|
-
assert_equal [[ "New York", 25, "Y"]], set[0].data_items
|
90
|
-
assert_equal [[ "Chicago", 23, "Y"],[ "Boston", 23, "N"]], set[2..3].data_items
|
91
|
-
assert_equal items[1..-1], set[1..-1].data_items
|
92
|
-
end
|
93
|
-
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|
data/test/data/proximity_test.rb
DELETED
@@ -1,81 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://www.ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/data/proximity'
|
12
|
-
|
13
|
-
module Ai4r
|
14
|
-
module Data
|
15
|
-
class ProximityTest < Test::Unit::TestCase
|
16
|
-
|
17
|
-
@@delta = 0.0001
|
18
|
-
@@data1 = [rand*10, rand*10, rand*-10]
|
19
|
-
@@data2 = [rand*10, rand*-10, rand*10]
|
20
|
-
|
21
|
-
def test_squared_euclidean_distance
|
22
|
-
assert_equal 0, Proximity.squared_euclidean_distance(@@data1, @@data1)
|
23
|
-
assert_equal Proximity.squared_euclidean_distance(@@data1, @@data2),
|
24
|
-
Proximity.squared_euclidean_distance(@@data2, @@data1)
|
25
|
-
assert 0 <= Proximity.squared_euclidean_distance(@@data1, @@data1)
|
26
|
-
assert_equal 2, Proximity.squared_euclidean_distance([1,1], [2,2])
|
27
|
-
assert_equal 9, Proximity.squared_euclidean_distance([3], [0])
|
28
|
-
end
|
29
|
-
|
30
|
-
def test_euclidean_distance
|
31
|
-
assert_equal 0, Proximity.euclidean_distance(@@data1, @@data1)
|
32
|
-
assert_equal Proximity.euclidean_distance(@@data1, @@data2),
|
33
|
-
Proximity.euclidean_distance(@@data2, @@data1)
|
34
|
-
assert 0 <= Proximity.euclidean_distance(@@data1, @@data1)
|
35
|
-
assert_equal Math.sqrt(2), Proximity.euclidean_distance([1,1], [2,2])
|
36
|
-
assert_equal 3, Proximity.euclidean_distance([3], [0])
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_manhattan_distance
|
40
|
-
assert_equal 0, Proximity.manhattan_distance(@@data1, @@data1)
|
41
|
-
assert_equal Proximity.manhattan_distance(@@data1, @@data2),
|
42
|
-
Proximity.manhattan_distance(@@data2, @@data1)
|
43
|
-
assert 0 <= Proximity.manhattan_distance(@@data1, @@data1)
|
44
|
-
assert_equal 2, Proximity.manhattan_distance([1,1], [2,2])
|
45
|
-
assert_equal 9, Proximity.manhattan_distance([1,10], [2,2])
|
46
|
-
assert_equal 3, Proximity.manhattan_distance([3], [0])
|
47
|
-
end
|
48
|
-
|
49
|
-
def test_sup_distance
|
50
|
-
assert_equal 0, Proximity.sup_distance(@@data1, @@data1)
|
51
|
-
assert_equal Proximity.sup_distance(@@data1, @@data2),
|
52
|
-
Proximity.sup_distance(@@data2, @@data1)
|
53
|
-
assert 0 <= Proximity.sup_distance(@@data1, @@data1)
|
54
|
-
assert_equal 1, Proximity.sup_distance([1,1], [2,2])
|
55
|
-
assert_equal 8, Proximity.sup_distance([1,10], [2,2])
|
56
|
-
assert_equal 3, Proximity.sup_distance([3], [0])
|
57
|
-
end
|
58
|
-
|
59
|
-
def test_hamming_distance
|
60
|
-
assert_equal 0, Proximity.hamming_distance(@@data1, @@data1)
|
61
|
-
assert_equal Proximity.hamming_distance(@@data1, @@data2),
|
62
|
-
Proximity.hamming_distance(@@data2, @@data1)
|
63
|
-
assert 0 <= Proximity.hamming_distance(@@data1, @@data1)
|
64
|
-
assert_equal 1, Proximity.hamming_distance([1,1], [0,1])
|
65
|
-
assert_equal 2, Proximity.hamming_distance([1,10], [2,2])
|
66
|
-
assert_equal 1, Proximity.hamming_distance([3], [0])
|
67
|
-
end
|
68
|
-
|
69
|
-
def test_simple_matching_distance
|
70
|
-
assert_equal 0, Proximity.simple_matching_distance(@@data1, @@data1)
|
71
|
-
assert_equal Proximity.simple_matching_distance(@@data1, @@data2),
|
72
|
-
Proximity.simple_matching_distance(@@data2, @@data1)
|
73
|
-
assert 0 <= Proximity.simple_matching_distance(@@data1, @@data1)
|
74
|
-
assert_equal 1, Proximity.simple_matching_distance([1,2], [0,1])
|
75
|
-
assert_equal 1.0/0, Proximity.simple_matching_distance([1,10], [2,2])
|
76
|
-
assert_equal 1.0/0, Proximity.simple_matching_distance([3], [0])
|
77
|
-
end
|
78
|
-
|
79
|
-
end
|
80
|
-
end
|
81
|
-
end
|
@@ -1,65 +0,0 @@
|
|
1
|
-
# Author:: Sergio Fierens
|
2
|
-
# License:: MPL 1.1
|
3
|
-
# Project:: ai4r
|
4
|
-
# Url:: http://www.ai4r.org/
|
5
|
-
#
|
6
|
-
# You can redistribute it and/or modify it under the terms of
|
7
|
-
# the Mozilla Public License version 1.1 as published by the
|
8
|
-
# Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
|
9
|
-
|
10
|
-
require 'test/unit'
|
11
|
-
require 'ai4r/data/statistics'
|
12
|
-
|
13
|
-
module Ai4r
|
14
|
-
module Data
|
15
|
-
class StatisticsTest < Test::Unit::TestCase
|
16
|
-
|
17
|
-
DELTA = 0.00001
|
18
|
-
|
19
|
-
def setup
|
20
|
-
@data_set = DataSet.new.
|
21
|
-
parse_csv "#{File.dirname(__FILE__)}/statistics_data_set.csv"
|
22
|
-
end
|
23
|
-
|
24
|
-
def test_mean
|
25
|
-
assert_equal 2, Statistics.mean(@data_set, 1)
|
26
|
-
assert_equal 2.502, Statistics.mean(@data_set, 0)
|
27
|
-
end
|
28
|
-
|
29
|
-
def test_variance
|
30
|
-
assert_equal 0, Statistics.variance(@data_set, 1)
|
31
|
-
assert_in_delta 4.47302, Statistics.variance(@data_set, 0), DELTA
|
32
|
-
end
|
33
|
-
|
34
|
-
def test_standard_deviation
|
35
|
-
assert_equal 0, Statistics.standard_deviation(@data_set, 1)
|
36
|
-
assert_in_delta 2.11495, Statistics.standard_deviation(@data_set, 0), DELTA
|
37
|
-
end
|
38
|
-
|
39
|
-
def test_mode
|
40
|
-
items = [ [ "New York", 25, "Y"],
|
41
|
-
[ "New York", 55, "Y"],
|
42
|
-
[ "Chicago", 23, "Y"],
|
43
|
-
[ "Boston", 23, "N"],
|
44
|
-
[ "Chicago", 12, "N"],
|
45
|
-
[ "Chicago", 87, "Y"] ]
|
46
|
-
set = DataSet.new.set_data_items(items)
|
47
|
-
assert_equal "Chicago", Statistics.mode(set,0)
|
48
|
-
assert_equal 23, Statistics.mode(set,1)
|
49
|
-
assert_equal "Y", Statistics.mode(set,2)
|
50
|
-
end
|
51
|
-
|
52
|
-
def test_min
|
53
|
-
assert_equal 2, Statistics.min(@data_set, 1)
|
54
|
-
assert_equal 1, Statistics.min(@data_set, 0)
|
55
|
-
end
|
56
|
-
|
57
|
-
def test_max
|
58
|
-
assert_equal 2, Statistics.max(@data_set, 1)
|
59
|
-
assert_equal 6, Statistics.max(@data_set, 0)
|
60
|
-
assert_equal 3.7, Statistics.max(@data_set, 2)
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
64
|
-
end
|
65
|
-
end
|