ai4r 1.13 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.rb +14 -11
  16. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  17. data/examples/classifiers/zero_one_r_data.csv +8 -0
  18. data/examples/clusterers/clusterer_example.rb +40 -34
  19. data/examples/clusterers/dbscan_example.rb +17 -0
  20. data/examples/clusterers/dendrogram_example.rb +17 -0
  21. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  22. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  23. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  24. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  25. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  26. data/examples/neural_network/backpropagation_example.rb +48 -48
  27. data/examples/neural_network/hopfield_example.rb +45 -0
  28. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  29. data/examples/neural_network/patterns_with_noise.rb +41 -39
  30. data/examples/neural_network/train_epochs_callback.rb +25 -0
  31. data/examples/neural_network/training_patterns.rb +39 -39
  32. data/examples/neural_network/transformer_text_classification.rb +78 -0
  33. data/examples/neural_network/xor_example.rb +23 -22
  34. data/examples/reinforcement/q_learning_example.rb +10 -0
  35. data/examples/som/som_data.rb +155 -152
  36. data/examples/som/som_multi_node_example.rb +12 -13
  37. data/examples/som/som_single_example.rb +12 -15
  38. data/examples/transformer/decode_classifier_example.rb +68 -0
  39. data/examples/transformer/deterministic_example.rb +10 -0
  40. data/examples/transformer/seq2seq_example.rb +16 -0
  41. data/lib/ai4r/classifiers/classifier.rb +24 -16
  42. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  43. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  44. data/lib/ai4r/classifiers/ib1.rb +122 -32
  45. data/lib/ai4r/classifiers/id3.rb +524 -145
  46. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  47. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  48. data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
  49. data/lib/ai4r/classifiers/one_r.rb +112 -44
  50. data/lib/ai4r/classifiers/prism.rb +167 -76
  51. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  52. data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
  53. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  54. data/lib/ai4r/classifiers/votes.rb +57 -0
  55. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  56. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  57. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  58. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  59. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  60. data/lib/ai4r/clusterers/clusterer.rb +29 -14
  61. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  62. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  63. data/lib/ai4r/clusterers/diana.rb +75 -49
  64. data/lib/ai4r/clusterers/k_means.rb +270 -135
  65. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  66. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  67. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  68. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
  69. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  70. data/lib/ai4r/data/data_set.rb +223 -103
  71. data/lib/ai4r/data/parameterizable.rb +31 -25
  72. data/lib/ai4r/data/proximity.rb +62 -62
  73. data/lib/ai4r/data/statistics.rb +46 -35
  74. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  75. data/lib/ai4r/experiment/split.rb +39 -0
  76. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  77. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  78. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  79. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  80. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  81. data/lib/ai4r/neural_network/backpropagation.rb +399 -134
  82. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  83. data/lib/ai4r/neural_network/transformer.rb +194 -0
  84. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  85. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  86. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  87. data/lib/ai4r/search/a_star.rb +76 -0
  88. data/lib/ai4r/search/bfs.rb +50 -0
  89. data/lib/ai4r/search/dfs.rb +50 -0
  90. data/lib/ai4r/search/mcts.rb +118 -0
  91. data/lib/ai4r/search.rb +12 -0
  92. data/lib/ai4r/som/distance_metrics.rb +29 -0
  93. data/lib/ai4r/som/layer.rb +28 -17
  94. data/lib/ai4r/som/node.rb +61 -32
  95. data/lib/ai4r/som/som.rb +158 -41
  96. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  97. data/lib/ai4r/version.rb +3 -0
  98. data/lib/ai4r.rb +57 -28
  99. metadata +79 -109
  100. data/README.rdoc +0 -39
  101. data/test/classifiers/hyperpipes_test.rb +0 -84
  102. data/test/classifiers/ib1_test.rb +0 -78
  103. data/test/classifiers/id3_test.rb +0 -220
  104. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  105. data/test/classifiers/naive_bayes_test.rb +0 -43
  106. data/test/classifiers/one_r_test.rb +0 -62
  107. data/test/classifiers/prism_test.rb +0 -85
  108. data/test/classifiers/simple_linear_regression_test.rb +0 -37
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -167
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
  119. data/test/clusterers/ward_linkage_test.rb +0 -53
  120. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  121. data/test/data/data_set_test.rb +0 -104
  122. data/test/data/proximity_test.rb +0 -87
  123. data/test/data/statistics_test.rb +0 -65
  124. data/test/experiment/classifier_evaluator_test.rb +0 -76
  125. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  126. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  127. data/test/neural_network/backpropagation_test.rb +0 -82
  128. data/test/neural_network/hopfield_test.rb +0 -72
  129. data/test/som/som_test.rb +0 -97
@@ -1,167 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://www.ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/k_means'
12
-
13
- class KMeansTest < Test::Unit::TestCase
14
-
15
- include Ai4r::Clusterers
16
- include Ai4r::Data
17
-
18
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
19
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
20
-
21
- # k-means will generate an empty cluster with this data and initial centroid assignment
22
- @@empty_cluster_data = [[-0.1, 0], [0, 0], [0.1, 0], [-0.1, 10], [0.1, 10], [0.2, 10]]
23
- @@empty_centroid_indices = [0,1,2]
24
-
25
- def test_build
26
- data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
27
- clusterer = KMeans.new.build(data_set, 4)
28
- #draw_map(clusterer)
29
- # Verify that all 4 clusters are created
30
- assert_equal 4, clusterer.clusters.length
31
- assert_equal 4, clusterer.centroids.length
32
- # The addition of all instances of every cluster must be equal to
33
- # the number of data points
34
- total_length = 0
35
- clusterer.clusters.each do |cluster|
36
- total_length += cluster.data_items.length
37
- end
38
- assert_equal @@data.length, total_length
39
- # Data inside clusters must be the same as original data
40
- clusterer.clusters.each do |cluster|
41
- cluster.data_items.each do |data_item|
42
- assert @@data.include?(data_item)
43
- end
44
- end
45
- end
46
-
47
- def test_build_and_eliminate_empty_clusters
48
- data_set = DataSet.new(:data_items => @@empty_cluster_data, :data_labels => ["X", "Y"])
49
- # :eliminate is the :on_empty default, so we don't need to pass it as a parameter for it
50
- clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices}).build(data_set, @@empty_centroid_indices.size)
51
-
52
- # Verify that one cluster was eliminated
53
- assert_equal @@empty_centroid_indices.size - 1, clusterer.clusters.length
54
- assert_equal @@empty_centroid_indices.size - 1, clusterer.centroids.length
55
-
56
- # The addition of all instances of every cluster must be equal to
57
- # the number of data points
58
- total_length = 0
59
- clusterer.clusters.each do |cluster|
60
- total_length += cluster.data_items.length
61
- end
62
- assert_equal @@empty_cluster_data.length, total_length
63
- # Data inside clusters must be the same as original data
64
- clusterer.clusters.each do |cluster|
65
- cluster.data_items.each do |data_item|
66
- assert @@empty_cluster_data.include?(data_item)
67
- end
68
- end
69
- end
70
-
71
- def test_eval
72
- data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
73
- clusterer = KMeans.new.build(data_set, 4)
74
- item = [10,0]
75
- cluster_index = clusterer.eval(item)
76
- # Must return a valid cluster index [0-3]
77
- assert cluster_index >= 0 && cluster_index < 4
78
- # Distance to cluster centroid must be less than distance to any other
79
- # centroid
80
- min_distance = clusterer.distance(clusterer.centroids[cluster_index], item)
81
- clusterer.centroids.each do |centroid|
82
- assert clusterer.distance(centroid, item) >= min_distance
83
- end
84
- end
85
-
86
- def test_distance
87
- clusterer = KMeans.new
88
- # By default, distance returns the euclidean distance to the power of 2
89
- assert_equal 2385, clusterer.distance(
90
- [1, 10, "Chicago", 2],
91
- [10, 10, "London", 50])
92
-
93
- # Ensure default distance raises error for nil argument
94
- exception = assert_raise(TypeError) {clusterer.distance([1, 10], [nil, nil])}
95
- assert_equal("nil can't be coerced into Fixnum", exception.message)
96
-
97
- # Test new distance definition
98
- manhattan_distance = lambda do |a, b|
99
- dist = 0.0
100
- a.each_index do |index|
101
- if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
102
- dist = dist + (a[index]-b[index]).abs
103
- end
104
- end
105
- dist
106
- end
107
- clusterer.set_parameters({:distance_function => manhattan_distance})
108
- assert_equal 57, clusterer.distance(
109
- [1, 10, "Chicago", 2],
110
- [10, 10, "London", 50])
111
- end
112
-
113
- def test_max_iterations
114
- data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
115
- clusterer = KMeans.new.
116
- set_parameters({:max_iterations=>1}).
117
- build(data_set, 4)
118
- assert_equal 1, clusterer.iterations
119
- end
120
-
121
- def test_centroid_indices
122
- data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
123
- # centroid_indices need not be specified:
124
- KMeans.new.build(data_set, 4)
125
- # centroid_indices can be specified:
126
- KMeans.new.set_parameters({:centroid_indices=>[0,1,2,3]}).build(data_set, 4)
127
- # raises exception if number of clusters differs from length of centroid_indices:
128
- exception = assert_raise(ArgumentError) {KMeans.new.set_parameters({:centroid_indices=>[0,1,2,3]}).build(data_set, 2)}
129
- assert_equal('Length of centroid indices array differs from the specified number of clusters', exception.message)
130
- # raises exception for bad centroid index:
131
- exception = assert_raise(ArgumentError) {KMeans.new.set_parameters({:centroid_indices=>[0,1,2,@@data.size+10]}).build(data_set, 4)}
132
- assert_equal("Invalid centroid index #{@@data.size+10}", exception.message)
133
- end
134
-
135
- def test_on_empty
136
- data_set = DataSet.new(:data_items => @@empty_cluster_data, :data_labels => ["X", "Y"])
137
- clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices}).build(data_set, @@empty_centroid_indices.size)
138
- # Verify that one cluster was eliminated
139
- assert_equal @@empty_centroid_indices.size - 1, clusterer.clusters.length
140
- # Verify that eliminate is the on_empty default
141
- assert_equal 'eliminate', clusterer.on_empty
142
- # Verify that invalid on_empty option throws an argument error
143
- exception = assert_raise(ArgumentError) {KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'ldkfje'}).build(data_set, @@empty_centroid_indices.size)}
144
- assert_equal("Invalid value for on_empty", exception.message)
145
- # Verify that on_empty option 'terminate' raises an error when an empty cluster arises
146
- exception = assert_raise(TypeError) {KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'terminate'}).build(data_set, @@empty_centroid_indices.size)}
147
- assert_equal("nil can't be coerced into Float", exception.message)
148
- clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'random'}).build(data_set, @@empty_centroid_indices.size)
149
- # Verify that cluster was not eliminated
150
- assert_equal @@empty_centroid_indices.size, clusterer.clusters.length
151
- clusterer = KMeans.new.set_parameters({:centroid_indices=>@@empty_centroid_indices, :on_empty=>'outlier'}).build(data_set, @@empty_centroid_indices.size)
152
- # Verify that cluster was not eliminated
153
- assert_equal @@empty_centroid_indices.size, clusterer.clusters.length
154
- end
155
-
156
- private
157
- def draw_map(clusterer)
158
- map = Array.new(11) {Array.new(11, 0)}
159
- clusterer.clusters.each_index do |i|
160
- clusterer.clusters[i].data_items.each do |point|
161
- map[point.first][point.last]=(i+1)
162
- end
163
- end
164
- map.each { |row| puts row.inspect}
165
- end
166
- end
167
-
@@ -1,53 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/median_linkage'
12
-
13
- class Ai4r::Clusterers::MedianLinkage
14
- attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
15
- end
16
-
17
- class Ai4r::Clusterers::MedianLinkageTest < Test::Unit::TestCase
18
-
19
- include Ai4r::Clusterers
20
- include Ai4r::Data
21
-
22
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
23
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
24
-
25
- @@expected_distance_matrix = [
26
- [98.0],
27
- [89.0, 5.0],
28
- [68.0, 26.0, 9.0],
29
- [74.0, 4.0, 1.0, 10.0],
30
- [0.0, 98.0, 89.0, 68.0, 74.0],
31
- [81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
32
- [8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
33
- [100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
34
- [68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
35
- [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
36
- [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
37
-
38
- def setup
39
- Ai4r::Clusterers::MedianLinkage.send(:public,
40
- *Ai4r::Clusterers::MedianLinkage.protected_instance_methods)
41
- end
42
-
43
- def test_linkage_distance
44
- clusterer = Ai4r::Clusterers::MedianLinkage.new
45
- clusterer.data_set = DataSet.new :data_items => @@data
46
- clusterer.index_clusters = clusterer.create_initial_index_clusters
47
- clusterer.distance_matrix = @@expected_distance_matrix
48
- assert_equal 92.25, clusterer.linkage_distance(0,1,2)
49
- assert_equal 15.25, clusterer.linkage_distance(4,2,5)
50
- end
51
-
52
- end
53
-
@@ -1,122 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/single_linkage'
12
-
13
- class Ai4r::Clusterers::SingleLinkage
14
- attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix
15
- end
16
-
17
- class SingleLinkageTest < Test::Unit::TestCase
18
-
19
- include Ai4r::Clusterers
20
- include Ai4r::Data
21
-
22
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
23
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
24
-
25
- @@expected_distance_matrix = [
26
- [98.0],
27
- [89.0, 5.0],
28
- [68.0, 26.0, 9.0],
29
- [74.0, 4.0, 1.0, 10.0],
30
- [0.0, 98.0, 89.0, 68.0, 74.0],
31
- [81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
32
- [8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
33
- [100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
34
- [68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
35
- [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
36
- [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
37
-
38
- def setup
39
- SingleLinkage.send(:public, *SingleLinkage.protected_instance_methods)
40
- end
41
-
42
- def test_build
43
- clusterer = Ai4r::Clusterers::SingleLinkage.new
44
- clusterer.build(DataSet.new(:data_items => @@data), 4)
45
- #draw_map(clusterer)
46
- assert_equal 4, clusterer.clusters.length
47
- end
48
-
49
- def test_eval
50
- clusterer = Ai4r::Clusterers::SingleLinkage.new
51
- clusterer.build(DataSet.new(:data_items => @@data), 4)
52
- assert_equal 2, clusterer.eval([0,8])
53
- assert_equal 0, clusterer.eval([8,0])
54
- end
55
-
56
- def test_create_distance_matrix
57
- clusterer = Ai4r::Clusterers::SingleLinkage.new
58
- clusterer.create_distance_matrix(DataSet.new(:data_items => @@data))
59
- assert clusterer.distance_matrix
60
- clusterer.distance_matrix.each_with_index do |row, row_index|
61
- assert_equal row_index+1, row.length
62
- end
63
- assert_equal @@expected_distance_matrix, clusterer.distance_matrix
64
- end
65
-
66
- def test_read_distance_matrix
67
- clusterer = Ai4r::Clusterers::SingleLinkage.new
68
- clusterer.distance_matrix = @@expected_distance_matrix
69
- assert_equal 9.0, clusterer.read_distance_matrix(3, 2)
70
- assert_equal 9.0, clusterer.read_distance_matrix(2, 3)
71
- assert_equal 0, clusterer.read_distance_matrix(5, 5)
72
- end
73
-
74
- def test_linkage_distance
75
- clusterer = Ai4r::Clusterers::SingleLinkage.new
76
- clusterer.distance_matrix = @@expected_distance_matrix
77
- assert_equal 89, clusterer.linkage_distance(0,1,2)
78
- assert_equal 1, clusterer.linkage_distance(4,2,5)
79
- end
80
-
81
- def test_get_closest_clusters
82
- clusterer = Ai4r::Clusterers::SingleLinkage.new
83
- clusterer.distance_matrix = @@expected_distance_matrix
84
- assert_equal [1,0], clusterer.get_closest_clusters([[0,1], [3,4]])
85
- assert_equal [2,1], clusterer.get_closest_clusters([[3,4], [0,1], [5,6]])
86
- end
87
-
88
- def test_create_initial_index_clusters
89
- clusterer = Ai4r::Clusterers::SingleLinkage.new
90
- clusterer.data_set = DataSet.new :data_items => @@data
91
- index_clusters = clusterer.create_initial_index_clusters
92
- assert_equal @@data.length, index_clusters.length
93
- assert_equal 0, index_clusters.first.first
94
- assert_equal @@data.length-1, index_clusters.last.first
95
- end
96
-
97
- def test_merge_clusters
98
- clusterer = Ai4r::Clusterers::SingleLinkage.new
99
- clusters = clusterer.merge_clusters(1,2, [[1,2],[3,4],[5,6]])
100
- assert_equal [[1,2], [3,4,5,6]], clusters.collect {|x| x.sort}
101
- clusters = clusterer.merge_clusters(2,1, [[1,2],[3,4],[5,6]])
102
- assert_equal [[1,2], [3,4,5,6]], clusters.collect {|x| x.sort}
103
- end
104
-
105
- def test_distance_between_item_and_cluster
106
- clusterer = Ai4r::Clusterers::SingleLinkage.new
107
- assert_equal 8.0, clusterer.distance_between_item_and_cluster([1,2],
108
- DataSet.new(:data_items => [[3,4],[5,6]]))
109
- end
110
-
111
- private
112
- def draw_map(clusterer)
113
- map = Array.new(11) {Array.new(11, 0)}
114
- clusterer.clusters.each_index do |i|
115
- clusterer.clusters[i].data_items.each do |point|
116
- map[point.first][point.last]=(i+1)
117
- end
118
- end
119
- map.each { |row| puts row.inspect}
120
- end
121
-
122
- end
@@ -1,81 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://ai4r.rubyforge.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require File.dirname(__FILE__) + '/../../lib/ai4r/clusterers/ward_linkage_hierarchical'
12
-
13
- class Ai4r::Clusterers::WardLinkageHierarchical
14
- attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
15
- end
16
-
17
- class Ai4r::Clusterers::WardLinkageHierarchicalTest < Test::Unit::TestCase
18
-
19
- include Ai4r::Clusterers
20
- include Ai4r::Data
21
-
22
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
23
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
24
-
25
- @@expected_distance_matrix = [
26
- [98.0],
27
- [89.0, 5.0],
28
- [68.0, 26.0, 9.0],
29
- [74.0, 4.0, 1.0, 10.0],
30
- [0.0, 98.0, 89.0, 68.0, 74.0],
31
- [81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
32
- [8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
33
- [100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
34
- [68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
35
- [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
36
- [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
37
-
38
- def setup
39
- Ai4r::Clusterers::WardLinkageHierarchical.send(:public,
40
- *Ai4r::Clusterers::WardLinkageHierarchical.protected_instance_methods)
41
- end
42
-
43
- def test_linkage_distance
44
- clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new
45
- clusterer.data_set = DataSet.new :data_items => @@data
46
- clusterer.index_clusters = clusterer.create_initial_index_clusters
47
- clusterer.distance_matrix = @@expected_distance_matrix
48
- assert_in_delta 123.4166, clusterer.linkage_distance(0,1,2), 0.0001
49
- assert_equal 27.75, clusterer.linkage_distance(4,2,5)
50
- end
51
-
52
- def test_cluster_tree
53
- clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new
54
- clusterer.build(DataSet.new(:data_items => @@data), 1)
55
- assert_equal @@data.length, clusterer.cluster_tree.length
56
- end
57
-
58
- def test_cluster_tree_limit
59
- depth = 5
60
- clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new(5)
61
- clusterer.build(DataSet.new(:data_items => @@data), 1)
62
- assert_equal 5, clusterer.cluster_tree.length
63
- end
64
-
65
- def test_cluster_tree_first_length
66
- depth = 5
67
- clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new(5)
68
- clusterer.build(DataSet.new(:data_items => @@data), 1)
69
- assert_equal 1, clusterer.cluster_tree.first.length
70
- end
71
-
72
- def test_cluster_tree_last_length
73
- depth = 5
74
- clusterer = Ai4r::Clusterers::WardLinkageHierarchical.new(5)
75
- clusterer.build(DataSet.new(:data_items => @@data), 1)
76
- assert_equal 5, clusterer.cluster_tree.last.length
77
- end
78
-
79
-
80
- end
81
-
@@ -1,53 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/ward_linkage'
12
-
13
- class Ai4r::Clusterers::WardLinkage
14
- attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
15
- end
16
-
17
- class Ai4r::Clusterers::WardLinkageTest < Test::Unit::TestCase
18
-
19
- include Ai4r::Clusterers
20
- include Ai4r::Data
21
-
22
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
23
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
24
-
25
- @@expected_distance_matrix = [
26
- [98.0],
27
- [89.0, 5.0],
28
- [68.0, 26.0, 9.0],
29
- [74.0, 4.0, 1.0, 10.0],
30
- [0.0, 98.0, 89.0, 68.0, 74.0],
31
- [81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
32
- [8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
33
- [100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
34
- [68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
35
- [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
36
- [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
37
-
38
- def setup
39
- Ai4r::Clusterers::WardLinkage.send(:public,
40
- *Ai4r::Clusterers::WardLinkage.protected_instance_methods)
41
- end
42
-
43
- def test_linkage_distance
44
- clusterer = Ai4r::Clusterers::WardLinkage.new
45
- clusterer.data_set = DataSet.new :data_items => @@data
46
- clusterer.index_clusters = clusterer.create_initial_index_clusters
47
- clusterer.distance_matrix = @@expected_distance_matrix
48
- assert_in_delta 123.4166, clusterer.linkage_distance(0,1,2), 0.0001
49
- assert_equal 27.75, clusterer.linkage_distance(4,2,5)
50
- end
51
-
52
- end
53
-
@@ -1,53 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/weighted_average_linkage'
12
-
13
- class Ai4r::Clusterers::WeightedAverageLinkage
14
- attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
15
- end
16
-
17
- class Ai4r::Clusterers::WeightedAverageLinkageTest < Test::Unit::TestCase
18
-
19
- include Ai4r::Clusterers
20
- include Ai4r::Data
21
-
22
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
23
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
24
-
25
- @@expected_distance_matrix = [
26
- [98.0],
27
- [89.0, 5.0],
28
- [68.0, 26.0, 9.0],
29
- [74.0, 4.0, 1.0, 10.0],
30
- [0.0, 98.0, 89.0, 68.0, 74.0],
31
- [81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
32
- [8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
33
- [100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
34
- [68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
35
- [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
36
- [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
37
-
38
- def setup
39
- Ai4r::Clusterers::WeightedAverageLinkage.send(:public,
40
- *Ai4r::Clusterers::WeightedAverageLinkage.protected_instance_methods)
41
- end
42
-
43
- def test_linkage_distance
44
- clusterer = Ai4r::Clusterers::WeightedAverageLinkage.new
45
- clusterer.data_set = DataSet.new :data_items => @@data
46
- clusterer.index_clusters = clusterer.create_initial_index_clusters
47
- clusterer.distance_matrix = @@expected_distance_matrix
48
- assert_equal 93.5, clusterer.linkage_distance(0,1,2)
49
- assert_equal 37.5, clusterer.linkage_distance(4,2,5)
50
- end
51
-
52
- end
53
-
@@ -1,104 +0,0 @@
1
- # Author:: Sergio Fierens
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://www.ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/data/data_set'
12
-
13
- module Ai4r
14
- module Data
15
- class DataSetTest < Test::Unit::TestCase
16
-
17
- def test_load_csv_with_labels
18
- set = DataSet.new.load_csv_with_labels("#{File.dirname(__FILE__)}/data_set.csv")
19
- assert_equal 120, set.data_items.length
20
- assert_equal ["zone", "rooms", "size", "price"], set.data_labels
21
- assert_equal ["Moron Sur (GBA)","2","[28 m2 - 39 m2]","[29K-35K]"], set.data_items.first
22
- end
23
-
24
- def test_parse_csv_with_labels
25
- set = DataSet.new.parse_csv_with_labels("#{File.dirname(__FILE__)}/data_set.csv")
26
- assert_equal 120, set.data_items.length
27
- assert_equal ["zone", "rooms", "size", "price"], set.data_labels
28
- assert_equal ["Moron Sur (GBA)",2.0,"[28 m2 - 39 m2]","[29K-35K]"], set.data_items.first
29
- end
30
-
31
- def test_build_domains
32
- domains = [ Set.new(["New York", "Chicago"]),
33
- Set.new(["M", "F"]),
34
- [5, 85],
35
- Set.new(["Y", "N"]) ]
36
- data = [ [ "New York", "M", 23, "Y"],
37
- [ "Chicago", "M", 85, "Y"],
38
- [ "New York", "F", 32, "Y"],
39
- [ "New York", "M", 5, "N"],
40
- [ "Chicago", "M", 15, "N"],
41
- [ "Chicago", "F", 45, "Y"] ]
42
- labels = ["city", "gender", "age", "result"]
43
- set = DataSet.new({:data_items => data, :data_labels => labels})
44
- assert_equal domains, set.build_domains
45
- assert_equal domains[0], set.build_domain("city")
46
- assert_equal domains[1], set.build_domain(1)
47
- assert_equal domains[2], set.build_domain("age")
48
- assert_equal domains[3], set.build_domain("result")
49
- end
50
-
51
- def test_set_data_labels
52
- labels = ["A", "B"]
53
- set = DataSet.new.set_data_labels(labels)
54
- assert_equal labels, set.data_labels
55
- set = DataSet.new(:data_labels => labels)
56
- assert_equal labels, set.data_labels
57
- set = DataSet.new(:data_items => [[ 1, 2, 3]])
58
- assert_raise(ArgumentError) { set.set_data_labels(labels) }
59
- end
60
-
61
- def test_set_data_items
62
- items = [ [ "New York", "M", "Y"],
63
- [ "Chicago", "M", "Y"],
64
- [ "New York", "F", "Y"],
65
- [ "New York", "M", "N"],
66
- [ "Chicago", "M", "N"],
67
- [ "Chicago", "F", "Y"] ]
68
- set = DataSet.new.set_data_items(items)
69
- assert_equal items, set.data_items
70
- assert_equal 3, set.data_labels.length
71
- items << items.first[0..-2]
72
- assert_raise(ArgumentError) { set.set_data_items(items) }
73
- assert_raise(ArgumentError) { set.set_data_items(nil) }
74
- assert_raise(ArgumentError) { set.set_data_items([1]) }
75
- end
76
-
77
- def test_get_mean_or_mode
78
- items = [ [ "New York", 25, "Y"],
79
- [ "New York", 55, "Y"],
80
- [ "Chicago", 23, "Y"],
81
- [ "Boston", 23, "N"],
82
- [ "Chicago", 12, "N"],
83
- [ "Chicago", 87, "Y"] ]
84
- set = DataSet.new.set_data_items(items)
85
- assert_equal ["Chicago", 37.5, "Y"], set.get_mean_or_mode
86
- end
87
-
88
- def test_index
89
- items = [ [ "New York", 25, "Y"],
90
- [ "New York", 55, "Y"],
91
- [ "Chicago", 23, "Y"],
92
- [ "Boston", 23, "N"],
93
- [ "Chicago", 12, "N"],
94
- [ "Chicago", 87, "Y"] ]
95
- set = DataSet.new.set_data_items(items)
96
- assert_equal set.data_labels, set[0].data_labels
97
- assert_equal [[ "New York", 25, "Y"]], set[0].data_items
98
- assert_equal [[ "Chicago", 23, "Y"],[ "Boston", 23, "N"]], set[2..3].data_items
99
- assert_equal items[1..-1], set[1..-1].data_items
100
- end
101
-
102
- end
103
- end
104
- end