ai4r 1.12 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (128) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.csv +159 -0
  16. data/examples/classifiers/simple_linear_regression_example.rb +18 -0
  17. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  18. data/examples/classifiers/zero_one_r_data.csv +8 -0
  19. data/examples/clusterers/clusterer_example.rb +62 -0
  20. data/examples/clusterers/dbscan_example.rb +17 -0
  21. data/examples/clusterers/dendrogram_example.rb +17 -0
  22. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  23. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  24. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  25. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  26. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  27. data/examples/neural_network/backpropagation_example.rb +49 -48
  28. data/examples/neural_network/hopfield_example.rb +45 -0
  29. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  30. data/examples/neural_network/patterns_with_noise.rb +41 -39
  31. data/examples/neural_network/train_epochs_callback.rb +25 -0
  32. data/examples/neural_network/training_patterns.rb +39 -39
  33. data/examples/neural_network/transformer_text_classification.rb +78 -0
  34. data/examples/neural_network/xor_example.rb +23 -22
  35. data/examples/reinforcement/q_learning_example.rb +10 -0
  36. data/examples/som/som_data.rb +155 -152
  37. data/examples/som/som_multi_node_example.rb +12 -13
  38. data/examples/som/som_single_example.rb +12 -15
  39. data/examples/transformer/decode_classifier_example.rb +68 -0
  40. data/examples/transformer/deterministic_example.rb +10 -0
  41. data/examples/transformer/seq2seq_example.rb +16 -0
  42. data/lib/ai4r/classifiers/classifier.rb +24 -16
  43. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  44. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  45. data/lib/ai4r/classifiers/ib1.rb +122 -32
  46. data/lib/ai4r/classifiers/id3.rb +527 -144
  47. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  48. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  49. data/lib/ai4r/classifiers/naive_bayes.rb +112 -48
  50. data/lib/ai4r/classifiers/one_r.rb +112 -44
  51. data/lib/ai4r/classifiers/prism.rb +167 -76
  52. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  53. data/lib/ai4r/classifiers/simple_linear_regression.rb +143 -0
  54. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  55. data/lib/ai4r/classifiers/votes.rb +57 -0
  56. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  57. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  58. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  59. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  60. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  61. data/lib/ai4r/clusterers/clusterer.rb +28 -24
  62. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  63. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  64. data/lib/ai4r/clusterers/diana.rb +75 -49
  65. data/lib/ai4r/clusterers/k_means.rb +309 -72
  66. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  67. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  68. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  69. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +63 -0
  70. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  71. data/lib/ai4r/data/data_set.rb +229 -100
  72. data/lib/ai4r/data/parameterizable.rb +31 -25
  73. data/lib/ai4r/data/proximity.rb +72 -50
  74. data/lib/ai4r/data/statistics.rb +46 -35
  75. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  76. data/lib/ai4r/experiment/split.rb +39 -0
  77. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  78. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  79. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  80. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  81. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  82. data/lib/ai4r/neural_network/backpropagation.rb +419 -143
  83. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  84. data/lib/ai4r/neural_network/transformer.rb +194 -0
  85. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  86. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  87. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  88. data/lib/ai4r/search/a_star.rb +76 -0
  89. data/lib/ai4r/search/bfs.rb +50 -0
  90. data/lib/ai4r/search/dfs.rb +50 -0
  91. data/lib/ai4r/search/mcts.rb +118 -0
  92. data/lib/ai4r/search.rb +12 -0
  93. data/lib/ai4r/som/distance_metrics.rb +29 -0
  94. data/lib/ai4r/som/layer.rb +28 -17
  95. data/lib/ai4r/som/node.rb +61 -32
  96. data/lib/ai4r/som/som.rb +158 -41
  97. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  98. data/lib/ai4r/version.rb +3 -0
  99. data/lib/ai4r.rb +58 -27
  100. metadata +117 -106
  101. data/README.rdoc +0 -44
  102. data/test/classifiers/hyperpipes_test.rb +0 -84
  103. data/test/classifiers/ib1_test.rb +0 -78
  104. data/test/classifiers/id3_test.rb +0 -208
  105. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  106. data/test/classifiers/naive_bayes_test.rb +0 -43
  107. data/test/classifiers/one_r_test.rb +0 -62
  108. data/test/classifiers/prism_test.rb +0 -85
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -100
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_test.rb +0 -53
  119. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  120. data/test/data/data_set_test.rb +0 -96
  121. data/test/data/proximity_test.rb +0 -81
  122. data/test/data/statistics_test.rb +0 -65
  123. data/test/experiment/classifier_evaluator_test.rb +0 -76
  124. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  125. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  126. data/test/neural_network/backpropagation_test.rb +0 -82
  127. data/test/neural_network/hopfield_test.rb +0 -72
  128. data/test/som/som_test.rb +0 -97
@@ -1,100 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://www.ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/k_means'
12
-
13
- class KMeansTest < Test::Unit::TestCase
14
-
15
- include Ai4r::Clusterers
16
- include Ai4r::Data
17
-
18
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
19
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
20
-
21
- def test_build
22
- data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
23
- clusterer = KMeans.new.build(data_set, 4)
24
- #draw_map(clusterer)
25
- # Verify that all 4 clusters are created
26
- assert_equal 4, clusterer.clusters.length
27
- assert_equal 4, clusterer.centroids.length
28
- # The addition of all instances of every cluster must be equal than
29
- # the number of data points
30
- total_length = 0
31
- clusterer.clusters.each do |cluster|
32
- total_length += cluster.data_items.length
33
- end
34
- assert_equal @@data.length, total_length
35
- # Data inside clusters must be the same as orifinal data
36
- clusterer.clusters.each do |cluster|
37
- cluster.data_items.each do |data_item|
38
- assert @@data.include?(data_item)
39
- end
40
- end
41
- end
42
-
43
- def test_eval
44
- data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
45
- clusterer = KMeans.new.build(data_set, 4)
46
- item = [10,0]
47
- cluster_index = clusterer.eval(item)
48
- # Must return a valid cluster index [0-3]
49
- assert cluster_index >= 0 && cluster_index < 4
50
- # Distance to cluster centroid must be less than distance to any other
51
- # centroid
52
- min_distance = clusterer.distance(clusterer.centroids[cluster_index], item)
53
- clusterer.centroids.each do |centroid|
54
- assert clusterer.distance(centroid, item) >= min_distance
55
- end
56
- end
57
-
58
- def test_distance
59
- clusterer = KMeans.new
60
- # By default, distance returns the eucledian distance to the power of 2
61
- assert_equal 2385, clusterer.distance(
62
- [1, 10, "Chicago", 2],
63
- [10, 10, "London", 50])
64
- # Test new distance definition
65
- manhattan_distance = lambda do |a, b|
66
- dist = 0.0
67
- a.each_index do |index|
68
- if a[index].is_a?(Numeric) && b[index].is_a?(Numeric)
69
- dist = dist + (a[index]-b[index]).abs
70
- end
71
- end
72
- dist
73
- end
74
- clusterer.set_parameters({:distance_function => manhattan_distance})
75
- assert_equal 57, clusterer.distance(
76
- [1, 10, "Chicago", 2],
77
- [10, 10, "London", 50])
78
- end
79
-
80
- def test_max_iterations
81
- data_set = DataSet.new(:data_items => @@data, :data_labels => ["X", "Y"])
82
- clusterer = KMeans.new.
83
- set_parameters({:max_iterations=>1}).
84
- build(data_set, 4)
85
- assert_equal 1, clusterer.iterations
86
- end
87
-
88
- private
89
- def draw_map(clusterer)
90
- map = Array.new(11) {Array.new(11, 0)}
91
- clusterer.clusters.each_index do |i|
92
- clusterer.clusters[i].data_items.each do |point|
93
- map[point.first][point.last]=(i+1)
94
- end
95
- end
96
- map.each { |row| puts row.inspect}
97
- end
98
-
99
- end
100
-
@@ -1,53 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/median_linkage'
12
-
13
- class Ai4r::Clusterers::MedianLinkage
14
- attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
15
- end
16
-
17
- class Ai4r::Clusterers::MedianLinkageTest < Test::Unit::TestCase
18
-
19
- include Ai4r::Clusterers
20
- include Ai4r::Data
21
-
22
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
23
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
24
-
25
- @@expected_distance_matrix = [
26
- [98.0],
27
- [89.0, 5.0],
28
- [68.0, 26.0, 9.0],
29
- [74.0, 4.0, 1.0, 10.0],
30
- [0.0, 98.0, 89.0, 68.0, 74.0],
31
- [81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
32
- [8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
33
- [100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
34
- [68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
35
- [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
36
- [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
37
-
38
- def setup
39
- Ai4r::Clusterers::MedianLinkage.send(:public,
40
- *Ai4r::Clusterers::MedianLinkage.protected_instance_methods)
41
- end
42
-
43
- def test_linkage_distance
44
- clusterer = Ai4r::Clusterers::MedianLinkage.new
45
- clusterer.data_set = DataSet.new :data_items => @@data
46
- clusterer.index_clusters = clusterer.create_initial_index_clusters
47
- clusterer.distance_matrix = @@expected_distance_matrix
48
- assert_equal 92.25, clusterer.linkage_distance(0,1,2)
49
- assert_equal 15.25, clusterer.linkage_distance(4,2,5)
50
- end
51
-
52
- end
53
-
@@ -1,122 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/single_linkage'
12
-
13
- class Ai4r::Clusterers::SingleLinkage
14
- attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix
15
- end
16
-
17
- class SingleLinkageTest < Test::Unit::TestCase
18
-
19
- include Ai4r::Clusterers
20
- include Ai4r::Data
21
-
22
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
23
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
24
-
25
- @@expected_distance_matrix = [
26
- [98.0],
27
- [89.0, 5.0],
28
- [68.0, 26.0, 9.0],
29
- [74.0, 4.0, 1.0, 10.0],
30
- [0.0, 98.0, 89.0, 68.0, 74.0],
31
- [81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
32
- [8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
33
- [100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
34
- [68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
35
- [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
36
- [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
37
-
38
- def setup
39
- SingleLinkage.send(:public, *SingleLinkage.protected_instance_methods)
40
- end
41
-
42
- def test_build
43
- clusterer = Ai4r::Clusterers::SingleLinkage.new
44
- clusterer.build(DataSet.new(:data_items => @@data), 4)
45
- #draw_map(clusterer)
46
- assert_equal 4, clusterer.clusters.length
47
- end
48
-
49
- def test_eval
50
- clusterer = Ai4r::Clusterers::SingleLinkage.new
51
- clusterer.build(DataSet.new(:data_items => @@data), 4)
52
- assert_equal 2, clusterer.eval([0,8])
53
- assert_equal 0, clusterer.eval([8,0])
54
- end
55
-
56
- def test_create_distance_matrix
57
- clusterer = Ai4r::Clusterers::SingleLinkage.new
58
- clusterer.create_distance_matrix(DataSet.new(:data_items => @@data))
59
- assert clusterer.distance_matrix
60
- clusterer.distance_matrix.each_with_index do |row, row_index|
61
- assert_equal row_index+1, row.length
62
- end
63
- assert_equal @@expected_distance_matrix, clusterer.distance_matrix
64
- end
65
-
66
- def test_read_distance_matrix
67
- clusterer = Ai4r::Clusterers::SingleLinkage.new
68
- clusterer.distance_matrix = @@expected_distance_matrix
69
- assert_equal 9.0, clusterer.read_distance_matrix(3, 2)
70
- assert_equal 9.0, clusterer.read_distance_matrix(2, 3)
71
- assert_equal 0, clusterer.read_distance_matrix(5, 5)
72
- end
73
-
74
- def test_linkage_distance
75
- clusterer = Ai4r::Clusterers::SingleLinkage.new
76
- clusterer.distance_matrix = @@expected_distance_matrix
77
- assert_equal 89, clusterer.linkage_distance(0,1,2)
78
- assert_equal 1, clusterer.linkage_distance(4,2,5)
79
- end
80
-
81
- def test_get_closest_clusters
82
- clusterer = Ai4r::Clusterers::SingleLinkage.new
83
- clusterer.distance_matrix = @@expected_distance_matrix
84
- assert_equal [1,0], clusterer.get_closest_clusters([[0,1], [3,4]])
85
- assert_equal [2,1], clusterer.get_closest_clusters([[3,4], [0,1], [5,6]])
86
- end
87
-
88
- def test_create_initial_index_clusters
89
- clusterer = Ai4r::Clusterers::SingleLinkage.new
90
- clusterer.data_set = DataSet.new :data_items => @@data
91
- index_clusters = clusterer.create_initial_index_clusters
92
- assert_equal @@data.length, index_clusters.length
93
- assert_equal 0, index_clusters.first.first
94
- assert_equal @@data.length-1, index_clusters.last.first
95
- end
96
-
97
- def test_merge_clusters
98
- clusterer = Ai4r::Clusterers::SingleLinkage.new
99
- clusters = clusterer.merge_clusters(1,2, [[1,2],[3,4],[5,6]])
100
- assert_equal [[1,2], [3,4,5,6]], clusters.collect {|x| x.sort}
101
- clusters = clusterer.merge_clusters(2,1, [[1,2],[3,4],[5,6]])
102
- assert_equal [[1,2], [3,4,5,6]], clusters.collect {|x| x.sort}
103
- end
104
-
105
- def test_distance_between_item_and_cluster
106
- clusterer = Ai4r::Clusterers::SingleLinkage.new
107
- assert_equal 8.0, clusterer.distance_between_item_and_cluster([1,2],
108
- DataSet.new(:data_items => [[3,4],[5,6]]))
109
- end
110
-
111
- private
112
- def draw_map(clusterer)
113
- map = Array.new(11) {Array.new(11, 0)}
114
- clusterer.clusters.each_index do |i|
115
- clusterer.clusters[i].data_items.each do |point|
116
- map[point.first][point.last]=(i+1)
117
- end
118
- end
119
- map.each { |row| puts row.inspect}
120
- end
121
-
122
- end
@@ -1,53 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/ward_linkage'
12
-
13
- class Ai4r::Clusterers::WardLinkage
14
- attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
15
- end
16
-
17
- class Ai4r::Clusterers::WardLinkageTest < Test::Unit::TestCase
18
-
19
- include Ai4r::Clusterers
20
- include Ai4r::Data
21
-
22
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
23
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
24
-
25
- @@expected_distance_matrix = [
26
- [98.0],
27
- [89.0, 5.0],
28
- [68.0, 26.0, 9.0],
29
- [74.0, 4.0, 1.0, 10.0],
30
- [0.0, 98.0, 89.0, 68.0, 74.0],
31
- [81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
32
- [8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
33
- [100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
34
- [68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
35
- [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
36
- [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
37
-
38
- def setup
39
- Ai4r::Clusterers::WardLinkage.send(:public,
40
- *Ai4r::Clusterers::WardLinkage.protected_instance_methods)
41
- end
42
-
43
- def test_linkage_distance
44
- clusterer = Ai4r::Clusterers::WardLinkage.new
45
- clusterer.data_set = DataSet.new :data_items => @@data
46
- clusterer.index_clusters = clusterer.create_initial_index_clusters
47
- clusterer.distance_matrix = @@expected_distance_matrix
48
- assert_in_delta 123.4166, clusterer.linkage_distance(0,1,2), 0.0001
49
- assert_equal 27.75, clusterer.linkage_distance(4,2,5)
50
- end
51
-
52
- end
53
-
@@ -1,53 +0,0 @@
1
- # Author:: Sergio Fierens (implementation)
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/clusterers/weighted_average_linkage'
12
-
13
- class Ai4r::Clusterers::WeightedAverageLinkage
14
- attr_accessor :data_set, :number_of_clusters, :clusters, :distance_matrix, :index_clusters
15
- end
16
-
17
- class Ai4r::Clusterers::WeightedAverageLinkageTest < Test::Unit::TestCase
18
-
19
- include Ai4r::Clusterers
20
- include Ai4r::Data
21
-
22
- @@data = [ [10, 3], [3, 10], [2, 8], [2, 5], [3, 8], [10, 3],
23
- [1, 3], [8, 1], [2, 9], [2, 5], [3, 3], [9, 4]]
24
-
25
- @@expected_distance_matrix = [
26
- [98.0],
27
- [89.0, 5.0],
28
- [68.0, 26.0, 9.0],
29
- [74.0, 4.0, 1.0, 10.0],
30
- [0.0, 98.0, 89.0, 68.0, 74.0],
31
- [81.0, 53.0, 26.0, 5.0, 29.0, 81.0],
32
- [8.0, 106.0, 85.0, 52.0, 74.0, 8.0, 53.0],
33
- [100.0, 2.0, 1.0, 16.0, 2.0, 100.0, 37.0, 100.0],
34
- [68.0, 26.0, 9.0, 0.0, 10.0, 68.0, 5.0, 52.0, 16.0],
35
- [49.0, 49.0, 26.0, 5.0, 25.0, 49.0, 4.0, 29.0, 37.0, 5.0],
36
- [2.0, 72.0, 65.0, 50.0, 52.0, 2.0, 65.0, 10.0, 74.0, 50.0, 37.0]]
37
-
38
- def setup
39
- Ai4r::Clusterers::WeightedAverageLinkage.send(:public,
40
- *Ai4r::Clusterers::WeightedAverageLinkage.protected_instance_methods)
41
- end
42
-
43
- def test_linkage_distance
44
- clusterer = Ai4r::Clusterers::WeightedAverageLinkage.new
45
- clusterer.data_set = DataSet.new :data_items => @@data
46
- clusterer.index_clusters = clusterer.create_initial_index_clusters
47
- clusterer.distance_matrix = @@expected_distance_matrix
48
- assert_equal 93.5, clusterer.linkage_distance(0,1,2)
49
- assert_equal 37.5, clusterer.linkage_distance(4,2,5)
50
- end
51
-
52
- end
53
-
@@ -1,96 +0,0 @@
1
- # Author:: Sergio Fierens
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://www.ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/data/data_set'
12
-
13
- module Ai4r
14
- module Data
15
- class DataSetTest < Test::Unit::TestCase
16
-
17
- def test_load_csv_with_labels
18
- set = DataSet.new.load_csv_with_labels("#{File.dirname(__FILE__)}/data_set.csv")
19
- assert_equal 120, set.data_items.length
20
- assert_equal ["zone", "rooms", "size", "price"], set.data_labels
21
- end
22
-
23
- def test_build_domains
24
- domains = [ Set.new(["New York", "Chicago"]),
25
- Set.new(["M", "F"]),
26
- [5, 85],
27
- Set.new(["Y", "N"]) ]
28
- data = [ [ "New York", "M", 23, "Y"],
29
- [ "Chicago", "M", 85, "Y"],
30
- [ "New York", "F", 32, "Y"],
31
- [ "New York", "M", 5, "N"],
32
- [ "Chicago", "M", 15, "N"],
33
- [ "Chicago", "F", 45, "Y"] ]
34
- labels = ["city", "gender", "age", "result"]
35
- set = DataSet.new({:data_items => data, :data_labels => labels})
36
- assert_equal domains, set.build_domains
37
- assert_equal domains[0], set.build_domain("city")
38
- assert_equal domains[1], set.build_domain(1)
39
- assert_equal domains[2], set.build_domain("age")
40
- assert_equal domains[3], set.build_domain("result")
41
- end
42
-
43
- def test_set_data_labels
44
- labels = ["A", "B"]
45
- set = DataSet.new.set_data_labels(labels)
46
- assert_equal labels, set.data_labels
47
- set = DataSet.new(:data_labels => labels)
48
- assert_equal labels, set.data_labels
49
- set = DataSet.new(:data_items => [[ 1, 2, 3]])
50
- assert_raise(ArgumentError) { set.set_data_labels(labels) }
51
- end
52
-
53
- def test_set_data_items
54
- items = [ [ "New York", "M", "Y"],
55
- [ "Chicago", "M", "Y"],
56
- [ "New York", "F", "Y"],
57
- [ "New York", "M", "N"],
58
- [ "Chicago", "M", "N"],
59
- [ "Chicago", "F", "Y"] ]
60
- set = DataSet.new.set_data_items(items)
61
- assert_equal items, set.data_items
62
- assert_equal 3, set.data_labels.length
63
- items << items.first[0..-2]
64
- assert_raise(ArgumentError) { set.set_data_items(items) }
65
- assert_raise(ArgumentError) { set.set_data_items(nil) }
66
- assert_raise(ArgumentError) { set.set_data_items([1]) }
67
- end
68
-
69
- def test_get_mean_or_mode
70
- items = [ [ "New York", 25, "Y"],
71
- [ "New York", 55, "Y"],
72
- [ "Chicago", 23, "Y"],
73
- [ "Boston", 23, "N"],
74
- [ "Chicago", 12, "N"],
75
- [ "Chicago", 87, "Y"] ]
76
- set = DataSet.new.set_data_items(items)
77
- assert_equal ["Chicago", 37.5, "Y"], set.get_mean_or_mode
78
- end
79
-
80
- def test_index
81
- items = [ [ "New York", 25, "Y"],
82
- [ "New York", 55, "Y"],
83
- [ "Chicago", 23, "Y"],
84
- [ "Boston", 23, "N"],
85
- [ "Chicago", 12, "N"],
86
- [ "Chicago", 87, "Y"] ]
87
- set = DataSet.new.set_data_items(items)
88
- assert_equal set.data_labels, set[0].data_labels
89
- assert_equal [[ "New York", 25, "Y"]], set[0].data_items
90
- assert_equal [[ "Chicago", 23, "Y"],[ "Boston", 23, "N"]], set[2..3].data_items
91
- assert_equal items[1..-1], set[1..-1].data_items
92
- end
93
-
94
- end
95
- end
96
- end
@@ -1,81 +0,0 @@
1
- # Author:: Sergio Fierens
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://www.ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/data/proximity'
12
-
13
- module Ai4r
14
- module Data
15
- class ProximityTest < Test::Unit::TestCase
16
-
17
- @@delta = 0.0001
18
- @@data1 = [rand*10, rand*10, rand*-10]
19
- @@data2 = [rand*10, rand*-10, rand*10]
20
-
21
- def test_squared_euclidean_distance
22
- assert_equal 0, Proximity.squared_euclidean_distance(@@data1, @@data1)
23
- assert_equal Proximity.squared_euclidean_distance(@@data1, @@data2),
24
- Proximity.squared_euclidean_distance(@@data2, @@data1)
25
- assert 0 <= Proximity.squared_euclidean_distance(@@data1, @@data1)
26
- assert_equal 2, Proximity.squared_euclidean_distance([1,1], [2,2])
27
- assert_equal 9, Proximity.squared_euclidean_distance([3], [0])
28
- end
29
-
30
- def test_euclidean_distance
31
- assert_equal 0, Proximity.euclidean_distance(@@data1, @@data1)
32
- assert_equal Proximity.euclidean_distance(@@data1, @@data2),
33
- Proximity.euclidean_distance(@@data2, @@data1)
34
- assert 0 <= Proximity.euclidean_distance(@@data1, @@data1)
35
- assert_equal Math.sqrt(2), Proximity.euclidean_distance([1,1], [2,2])
36
- assert_equal 3, Proximity.euclidean_distance([3], [0])
37
- end
38
-
39
- def test_manhattan_distance
40
- assert_equal 0, Proximity.manhattan_distance(@@data1, @@data1)
41
- assert_equal Proximity.manhattan_distance(@@data1, @@data2),
42
- Proximity.manhattan_distance(@@data2, @@data1)
43
- assert 0 <= Proximity.manhattan_distance(@@data1, @@data1)
44
- assert_equal 2, Proximity.manhattan_distance([1,1], [2,2])
45
- assert_equal 9, Proximity.manhattan_distance([1,10], [2,2])
46
- assert_equal 3, Proximity.manhattan_distance([3], [0])
47
- end
48
-
49
- def test_sup_distance
50
- assert_equal 0, Proximity.sup_distance(@@data1, @@data1)
51
- assert_equal Proximity.sup_distance(@@data1, @@data2),
52
- Proximity.sup_distance(@@data2, @@data1)
53
- assert 0 <= Proximity.sup_distance(@@data1, @@data1)
54
- assert_equal 1, Proximity.sup_distance([1,1], [2,2])
55
- assert_equal 8, Proximity.sup_distance([1,10], [2,2])
56
- assert_equal 3, Proximity.sup_distance([3], [0])
57
- end
58
-
59
- def test_hamming_distance
60
- assert_equal 0, Proximity.hamming_distance(@@data1, @@data1)
61
- assert_equal Proximity.hamming_distance(@@data1, @@data2),
62
- Proximity.hamming_distance(@@data2, @@data1)
63
- assert 0 <= Proximity.hamming_distance(@@data1, @@data1)
64
- assert_equal 1, Proximity.hamming_distance([1,1], [0,1])
65
- assert_equal 2, Proximity.hamming_distance([1,10], [2,2])
66
- assert_equal 1, Proximity.hamming_distance([3], [0])
67
- end
68
-
69
- def test_simple_matching_distance
70
- assert_equal 0, Proximity.simple_matching_distance(@@data1, @@data1)
71
- assert_equal Proximity.simple_matching_distance(@@data1, @@data2),
72
- Proximity.simple_matching_distance(@@data2, @@data1)
73
- assert 0 <= Proximity.simple_matching_distance(@@data1, @@data1)
74
- assert_equal 1, Proximity.simple_matching_distance([1,2], [0,1])
75
- assert_equal 1.0/0, Proximity.simple_matching_distance([1,10], [2,2])
76
- assert_equal 1.0/0, Proximity.simple_matching_distance([3], [0])
77
- end
78
-
79
- end
80
- end
81
- end
@@ -1,65 +0,0 @@
1
- # Author:: Sergio Fierens
2
- # License:: MPL 1.1
3
- # Project:: ai4r
4
- # Url:: http://www.ai4r.org/
5
- #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
- # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
-
10
- require 'test/unit'
11
- require 'ai4r/data/statistics'
12
-
13
- module Ai4r
14
- module Data
15
- class StatisticsTest < Test::Unit::TestCase
16
-
17
- DELTA = 0.00001
18
-
19
- def setup
20
- @data_set = DataSet.new.
21
- parse_csv "#{File.dirname(__FILE__)}/statistics_data_set.csv"
22
- end
23
-
24
- def test_mean
25
- assert_equal 2, Statistics.mean(@data_set, 1)
26
- assert_equal 2.502, Statistics.mean(@data_set, 0)
27
- end
28
-
29
- def test_variance
30
- assert_equal 0, Statistics.variance(@data_set, 1)
31
- assert_in_delta 4.47302, Statistics.variance(@data_set, 0), DELTA
32
- end
33
-
34
- def test_standard_deviation
35
- assert_equal 0, Statistics.standard_deviation(@data_set, 1)
36
- assert_in_delta 2.11495, Statistics.standard_deviation(@data_set, 0), DELTA
37
- end
38
-
39
- def test_mode
40
- items = [ [ "New York", 25, "Y"],
41
- [ "New York", 55, "Y"],
42
- [ "Chicago", 23, "Y"],
43
- [ "Boston", 23, "N"],
44
- [ "Chicago", 12, "N"],
45
- [ "Chicago", 87, "Y"] ]
46
- set = DataSet.new.set_data_items(items)
47
- assert_equal "Chicago", Statistics.mode(set,0)
48
- assert_equal 23, Statistics.mode(set,1)
49
- assert_equal "Y", Statistics.mode(set,2)
50
- end
51
-
52
- def test_min
53
- assert_equal 2, Statistics.min(@data_set, 1)
54
- assert_equal 1, Statistics.min(@data_set, 0)
55
- end
56
-
57
- def test_max
58
- assert_equal 2, Statistics.max(@data_set, 1)
59
- assert_equal 6, Statistics.max(@data_set, 0)
60
- assert_equal 3.7, Statistics.max(@data_set, 2)
61
- end
62
-
63
- end
64
- end
65
- end