ai4r 1.13 → 2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (129) hide show
  1. checksums.yaml +7 -0
  2. data/README.md +174 -0
  3. data/examples/classifiers/hyperpipes_data.csv +14 -0
  4. data/examples/classifiers/hyperpipes_example.rb +22 -0
  5. data/examples/classifiers/ib1_example.rb +12 -0
  6. data/examples/classifiers/id3_example.rb +15 -10
  7. data/examples/classifiers/id3_graphviz_example.rb +17 -0
  8. data/examples/classifiers/logistic_regression_example.rb +11 -0
  9. data/examples/classifiers/naive_bayes_attributes_example.rb +13 -0
  10. data/examples/classifiers/naive_bayes_example.rb +12 -13
  11. data/examples/classifiers/one_r_example.rb +27 -0
  12. data/examples/classifiers/parameter_tutorial.rb +29 -0
  13. data/examples/classifiers/prism_nominal_example.rb +15 -0
  14. data/examples/classifiers/prism_numeric_example.rb +21 -0
  15. data/examples/classifiers/simple_linear_regression_example.rb +14 -11
  16. data/examples/classifiers/zero_and_one_r_example.rb +34 -0
  17. data/examples/classifiers/zero_one_r_data.csv +8 -0
  18. data/examples/clusterers/clusterer_example.rb +40 -34
  19. data/examples/clusterers/dbscan_example.rb +17 -0
  20. data/examples/clusterers/dendrogram_example.rb +17 -0
  21. data/examples/clusterers/hierarchical_dendrogram_example.rb +20 -0
  22. data/examples/clusterers/kmeans_custom_example.rb +26 -0
  23. data/examples/genetic_algorithm/bitstring_example.rb +41 -0
  24. data/examples/genetic_algorithm/genetic_algorithm_example.rb +26 -18
  25. data/examples/genetic_algorithm/kmeans_seed_tuning.rb +45 -0
  26. data/examples/neural_network/backpropagation_example.rb +48 -48
  27. data/examples/neural_network/hopfield_example.rb +45 -0
  28. data/examples/neural_network/patterns_with_base_noise.rb +39 -39
  29. data/examples/neural_network/patterns_with_noise.rb +41 -39
  30. data/examples/neural_network/train_epochs_callback.rb +25 -0
  31. data/examples/neural_network/training_patterns.rb +39 -39
  32. data/examples/neural_network/transformer_text_classification.rb +78 -0
  33. data/examples/neural_network/xor_example.rb +23 -22
  34. data/examples/reinforcement/q_learning_example.rb +10 -0
  35. data/examples/som/som_data.rb +155 -152
  36. data/examples/som/som_multi_node_example.rb +12 -13
  37. data/examples/som/som_single_example.rb +12 -15
  38. data/examples/transformer/decode_classifier_example.rb +68 -0
  39. data/examples/transformer/deterministic_example.rb +10 -0
  40. data/examples/transformer/seq2seq_example.rb +16 -0
  41. data/lib/ai4r/classifiers/classifier.rb +24 -16
  42. data/lib/ai4r/classifiers/gradient_boosting.rb +64 -0
  43. data/lib/ai4r/classifiers/hyperpipes.rb +119 -43
  44. data/lib/ai4r/classifiers/ib1.rb +122 -32
  45. data/lib/ai4r/classifiers/id3.rb +524 -145
  46. data/lib/ai4r/classifiers/logistic_regression.rb +96 -0
  47. data/lib/ai4r/classifiers/multilayer_perceptron.rb +75 -59
  48. data/lib/ai4r/classifiers/naive_bayes.rb +95 -34
  49. data/lib/ai4r/classifiers/one_r.rb +112 -44
  50. data/lib/ai4r/classifiers/prism.rb +167 -76
  51. data/lib/ai4r/classifiers/random_forest.rb +72 -0
  52. data/lib/ai4r/classifiers/simple_linear_regression.rb +83 -58
  53. data/lib/ai4r/classifiers/support_vector_machine.rb +91 -0
  54. data/lib/ai4r/classifiers/votes.rb +57 -0
  55. data/lib/ai4r/classifiers/zero_r.rb +71 -30
  56. data/lib/ai4r/clusterers/average_linkage.rb +46 -27
  57. data/lib/ai4r/clusterers/bisecting_k_means.rb +50 -44
  58. data/lib/ai4r/clusterers/centroid_linkage.rb +52 -36
  59. data/lib/ai4r/clusterers/cluster_tree.rb +50 -0
  60. data/lib/ai4r/clusterers/clusterer.rb +29 -14
  61. data/lib/ai4r/clusterers/complete_linkage.rb +42 -31
  62. data/lib/ai4r/clusterers/dbscan.rb +134 -0
  63. data/lib/ai4r/clusterers/diana.rb +75 -49
  64. data/lib/ai4r/clusterers/k_means.rb +270 -135
  65. data/lib/ai4r/clusterers/median_linkage.rb +49 -33
  66. data/lib/ai4r/clusterers/single_linkage.rb +196 -88
  67. data/lib/ai4r/clusterers/ward_linkage.rb +51 -35
  68. data/lib/ai4r/clusterers/ward_linkage_hierarchical.rb +25 -10
  69. data/lib/ai4r/clusterers/weighted_average_linkage.rb +48 -32
  70. data/lib/ai4r/data/data_set.rb +223 -103
  71. data/lib/ai4r/data/parameterizable.rb +31 -25
  72. data/lib/ai4r/data/proximity.rb +62 -62
  73. data/lib/ai4r/data/statistics.rb +46 -35
  74. data/lib/ai4r/experiment/classifier_evaluator.rb +84 -32
  75. data/lib/ai4r/experiment/split.rb +39 -0
  76. data/lib/ai4r/genetic_algorithm/chromosome_base.rb +43 -0
  77. data/lib/ai4r/genetic_algorithm/genetic_algorithm.rb +92 -170
  78. data/lib/ai4r/genetic_algorithm/tsp_chromosome.rb +83 -0
  79. data/lib/ai4r/hmm/hidden_markov_model.rb +134 -0
  80. data/lib/ai4r/neural_network/activation_functions.rb +37 -0
  81. data/lib/ai4r/neural_network/backpropagation.rb +399 -134
  82. data/lib/ai4r/neural_network/hopfield.rb +175 -58
  83. data/lib/ai4r/neural_network/transformer.rb +194 -0
  84. data/lib/ai4r/neural_network/weight_initializations.rb +40 -0
  85. data/lib/ai4r/reinforcement/policy_iteration.rb +66 -0
  86. data/lib/ai4r/reinforcement/q_learning.rb +51 -0
  87. data/lib/ai4r/search/a_star.rb +76 -0
  88. data/lib/ai4r/search/bfs.rb +50 -0
  89. data/lib/ai4r/search/dfs.rb +50 -0
  90. data/lib/ai4r/search/mcts.rb +118 -0
  91. data/lib/ai4r/search.rb +12 -0
  92. data/lib/ai4r/som/distance_metrics.rb +29 -0
  93. data/lib/ai4r/som/layer.rb +28 -17
  94. data/lib/ai4r/som/node.rb +61 -32
  95. data/lib/ai4r/som/som.rb +158 -41
  96. data/lib/ai4r/som/two_phase_layer.rb +21 -25
  97. data/lib/ai4r/version.rb +3 -0
  98. data/lib/ai4r.rb +57 -28
  99. metadata +79 -109
  100. data/README.rdoc +0 -39
  101. data/test/classifiers/hyperpipes_test.rb +0 -84
  102. data/test/classifiers/ib1_test.rb +0 -78
  103. data/test/classifiers/id3_test.rb +0 -220
  104. data/test/classifiers/multilayer_perceptron_test.rb +0 -79
  105. data/test/classifiers/naive_bayes_test.rb +0 -43
  106. data/test/classifiers/one_r_test.rb +0 -62
  107. data/test/classifiers/prism_test.rb +0 -85
  108. data/test/classifiers/simple_linear_regression_test.rb +0 -37
  109. data/test/classifiers/zero_r_test.rb +0 -50
  110. data/test/clusterers/average_linkage_test.rb +0 -51
  111. data/test/clusterers/bisecting_k_means_test.rb +0 -66
  112. data/test/clusterers/centroid_linkage_test.rb +0 -53
  113. data/test/clusterers/complete_linkage_test.rb +0 -57
  114. data/test/clusterers/diana_test.rb +0 -69
  115. data/test/clusterers/k_means_test.rb +0 -167
  116. data/test/clusterers/median_linkage_test.rb +0 -53
  117. data/test/clusterers/single_linkage_test.rb +0 -122
  118. data/test/clusterers/ward_linkage_hierarchical_test.rb +0 -81
  119. data/test/clusterers/ward_linkage_test.rb +0 -53
  120. data/test/clusterers/weighted_average_linkage_test.rb +0 -53
  121. data/test/data/data_set_test.rb +0 -104
  122. data/test/data/proximity_test.rb +0 -87
  123. data/test/data/statistics_test.rb +0 -65
  124. data/test/experiment/classifier_evaluator_test.rb +0 -76
  125. data/test/genetic_algorithm/chromosome_test.rb +0 -57
  126. data/test/genetic_algorithm/genetic_algorithm_test.rb +0 -81
  127. data/test/neural_network/backpropagation_test.rb +0 -82
  128. data/test/neural_network/hopfield_test.rb +0 -72
  129. data/test/som/som_test.rb +0 -97
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA256:
3
+ metadata.gz: 8d01465e193f7bda084a9cd76bc10e1dd4422b7c384eb0fe924d7a5f705123db
4
+ data.tar.gz: 64ffbe98250389997fbdc9fa1a93c4b79154c1b9d3cb4a5a6577c086ee392214
5
+ SHA512:
6
+ metadata.gz: 2bf0626ab8227c93fbc8a784717806ec3c3b72a4e6d2f375f232be8ad0be9e5f1ee70c6dfc2be02cd2c6dd40311cb5ac1d840f3acc92300cc208c8ccfc839425
7
+ data.tar.gz: 5140c67b5213fa3ed1a9017c1d4dd09c05cba0786d27bc8b97c32e7d72d8638347f80ae22536ecacd4efb10742a88c2e7e263878baf73edcb064749564a1d8a0
data/README.md ADDED
@@ -0,0 +1,174 @@
1
+ # AI4R — Artificial Intelligence for Ruby
2
+
3
+ 🎓 **Welcome to AI4R**
4
+
5
+ Current version: 2.0
6
+
7
+ Your Lightweight Lab for AI and Machine Learning in Ruby.
8
+
9
+ AI4R isn’t just another machine learning library. It’s a learning playground. No black boxes and no bulky dependencies—just clean, readable Ruby implementations of core AI algorithms so you can explore, modify and really understand how they work.
10
+
11
+ ## Installation
12
+
13
+ AI4R is distributed as a gem and requires Ruby 3.2 or later.
14
+
15
+ Install the gem using RubyGems:
16
+
17
+ ```bash
18
+ gem install ai4r
19
+ ```
20
+
21
+ Add the library to your code:
22
+
23
+ ```ruby
24
+ require 'ai4r'
25
+ ```
26
+
27
+ ## Where to start?
28
+ - [Beginner Track](docs/learning_path_1_beginner.md) – Build core intuition for AI—step by step, in Ruby.
29
+ - [Intermediate Track](docs/learning_path_2_intermediate.md) – From "I can run a model" to "I can tune, extend, and build smart stuff that actually works."
30
+ - [Advanced Track](docs/learning_path_3_advanced.md) – Time to stop following recipes and start writing your own.
31
+
32
+
33
+ ## 🧭 What’s Inside?
34
+
35
+ A quick map to AI4R’s built‑in toolkits, grouped by type. Each folder comes with examples and benchmark runners so you can dive right in.
36
+
37
+ ### 🤖 Transformers – *Play with the Building Blocks of Modern LLMs*
38
+
39
+ > *“Meet your future coworker / overlord.”*
40
+
41
+ This is not a full GPT—but it is the core logic, stripped down and readable.
42
+ AI4R ships with a bite-sized, dependency-free Transformer implementation that supports:
43
+
44
+ - **Encoder-only** mode (like BERT)
45
+ - **Decoder-only** mode (like GPT)
46
+ - **Seq2Seq** mode (like T5)
47
+
48
+ 📂 Code: `lib/ai4r/neural_network/transformer.rb`
49
+ Docs: [Transformer guide](docs/transformer.md)
50
+
51
+ 💡 **Try this**:
52
+ Load up the transformer and walk through a simple forward pass.
53
+ Everything from attention weights to layer normalization is short enough to read and understand in one go.
54
+
55
+ ### 🧠 Classifiers – Make Predictions
56
+ "What’s the most likely outcome?"
57
+
58
+ You’ll find in [lib/ai4r/classifiers/](lib/ai4r/classifiers/):
59
+
60
+ - `ZeroR`, `OneR` – the simplest baselines
61
+ - `LogisticRegression`, `SimpleLinearRegression`
62
+ - `SupportVectorMachine`
63
+ - `RandomForest`, `GradientBoosting`
64
+ - `MultilayerPerceptron`
65
+
66
+ Docs: [logistic_regression.md](docs/logistic_regression.md), [random_forest.md](docs/random_forest.md)
67
+
68
+ Try this: run `compare_all.rb` to benchmark classifiers on real datasets.
69
+
70
+ ### 🔍 Clusterers – Find Hidden Patterns
71
+ "What belongs together?"
72
+
73
+ Includes in [lib/ai4r/clusterers/](lib/ai4r/clusterers/):
74
+
75
+ - `KMeans`
76
+ - `DBSCAN`
77
+ - `Hierarchical` clustering variants
78
+
79
+ Docs: [kmeans.md](docs/kmeans.md), [dbscan.md](docs/dbscan.md)
80
+
81
+ Try this: cluster the Iris dataset using both KMeans and DBSCAN.
82
+
83
+ ### 🧬 Neural Networks – Learn From Data
84
+ "What if we build a brain?"
85
+
86
+ - Backpropagation – classic feedforward network
87
+ - Hopfield – associative memory model
88
+ - Transformer – a tiny GPT‑style block (encoder, decoder, seq2seq)
89
+
90
+ Code: `lib/ai4r/neural_network/`
91
+
92
+ Try this: open `transformer.rb` and trace each step—it’s short enough to grok in one sitting.
93
+
94
+ ### 🔎 Search Algorithms – Explore Possibility Spaces
95
+ "What’s the best path?"
96
+
97
+ You’ll find in [lib/ai4r/search/](lib/ai4r/search/):
98
+
99
+ - `BreadthFirst`, `DepthFirst`, `IterativeDeepening`
100
+ - `A*`
101
+ - `MonteCarloTreeSearch`
102
+
103
+ Docs: [search_algorithms.md](docs/search_algorithms.md)
104
+
105
+ Try this: run A* and DFS on a maze and time the difference.
106
+
107
+ ### 🧪 Genetic Algorithms – Evolve a Solution
108
+ "Let’s mutate our way to a better answer."
109
+
110
+ - Generic GA framework
111
+ - A Traveling Salesman Problem (TSP) chromosome
112
+
113
+ Code: `lib/ai4r/genetic_algorithm/`
114
+
115
+ Try this: tweak the mutation rate in the TSP example.
116
+
117
+ ### 🧭 Reinforcement Learning – Learn by Doing
118
+ "Reward me, and I’ll improve."
119
+
120
+ - Q‑Learning
121
+ - Policy Iteration
122
+
123
+ Code: `lib/ai4r/reinforcement/`
124
+
125
+ Docs: `docs/reinforcement_learning.md`
126
+
127
+ Try this: run a grid‑world training loop and watch the agent build its own policy.
128
+
129
+ ### 🕵️ Hidden Markov Models – Guess What’s Hidden
130
+ "You can’t see the states—but you can infer them."
131
+
132
+ Code: `lib/ai4r/hmm/hidden_markov_model.rb`
133
+
134
+ Docs: `docs/hmm.md`
135
+
136
+ Try this: model a weather prediction problem with hidden states and visible activities.
137
+
138
+ ### 🧠 Self‑Organizing Maps – Compress Dimensions
139
+ "Can we project complex data onto a simpler map?"
140
+
141
+ - Kohonen‑style SOM
142
+
143
+ Code: `lib/ai4r/som/`
144
+
145
+ Try this: reduce high‑dimensional vectors into a 2D neuron grid and color it based on class.
146
+
147
+ ## 🧪 Benchmarks: Experiment & Compare
148
+
149
+ Each algorithm family has a benchmark runner:
150
+
151
+ - `bench/classifier/`
152
+ - `bench/clusterer/`
153
+ - `bench/search/`
154
+
155
+ Shared tools in `bench/common/` make it easy to run head‑to‑head comparisons, track runtime, accuracy and more, and output clean reports.
156
+
157
+ Docs: `docs/benches_overview.md`
158
+
159
+ Try this: run `bench/search/astar_vs_dfs.rb` and explain why A* usually wins.
160
+
161
+
162
+ ```bash
163
+ git clone https://github.com/SergioFierens/ai4r
164
+ cd ai4r
165
+ bundle install
166
+ ruby bench/classifier/compare_all.rb
167
+ ```
168
+
169
+
170
+
171
+ ## 💬 Feedback?
172
+
173
+ This library is maintained for the joy of it (and perhaps a misplaced sense of duty to Ruby). You can do whatever you want with it—it’s unlicensed. If you build something cool or just find it useful, drop a note in the [project's comments](https://github.com/SergioFierens/ai4r/discussions).
174
+
@@ -0,0 +1,14 @@
1
+ city,age,gender,marketing_target
2
+ New York,25,M,Y
3
+ New York,23,M,Y
4
+ New York,18,M,Y
5
+ Chicago,43,M,Y
6
+ New York,34,F,N
7
+ Chicago,33,F,Y
8
+ New York,31,F,N
9
+ Chicago,55,M,N
10
+ New York,58,F,N
11
+ New York,59,M,N
12
+ Chicago,71,M,N
13
+ New York,60,F,N
14
+ Chicago,85,F,Y
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../lib/ai4r/classifiers/hyperpipes'
4
+ require_relative '../../lib/ai4r/data/data_set'
5
+
6
+ # Use fully qualified class names instead of including modules.
7
+
8
+ # Load the training data
9
+ file = "#{File.dirname(__FILE__)}/hyperpipes_data.csv"
10
+ data = Ai4r::Data::DataSet.new.parse_csv_with_labels(file)
11
+
12
+ # Build the classifier using custom parameters
13
+ classifier = Ai4r::Classifiers::Hyperpipes.new.set_parameters(tie_break: :random).build(data)
14
+
15
+ # Inspect the generated pipes
16
+ pipes_summary = classifier.pipes
17
+ puts 'Pipes summary:'
18
+ pp pipes_summary
19
+
20
+ # Classify new instances
21
+ puts "Prediction for ['Chicago', 85, 'F']: #{classifier.eval(['Chicago', 85, 'F'])}"
22
+ puts "Prediction for ['New York', 25, 'M']: #{classifier.eval(['New York', 25, 'M'])}"
@@ -0,0 +1,12 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../lib/ai4r/classifiers/ib1'
4
+ require_relative '../../lib/ai4r/data/data_set'
5
+
6
+ file = "#{File.dirname(__FILE__)}/hyperpipes_data.csv"
7
+ data = Ai4r::Data::DataSet.new.parse_csv_with_labels(file)
8
+
9
+ classifier = Ai4r::Classifiers::IB1.new.build(data)
10
+
11
+ sample = ['Chicago', 55, 'M']
12
+ puts "Prediction for #{sample.inspect}: #{classifier.eval(sample)}"
@@ -1,13 +1,15 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
6
  # Url:: http://www.ai4r.org/
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
- require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
12
+ require_relative '../../lib/ai4r/classifiers/id3'
11
13
 
12
14
  # Load data from data_set.csv
13
15
  data_filename = "#{File.dirname(__FILE__)}/id3_data.csv"
@@ -17,13 +19,16 @@ data_set = Ai4r::Data::DataSet.new.load_csv_with_labels data_filename
17
19
  id3 = Ai4r::Classifiers::ID3.new.build(data_set)
18
20
 
19
21
  # Show rules
20
- puts "Discovered rules are:"
22
+ puts 'Discovered rules are:'
21
23
  puts id3.get_rules
22
- puts
24
+ puts
23
25
 
24
26
  # Try to predict some values
25
- puts "Prediction samples:"
26
- puts "['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => " + id3.eval(['Moron Sur (GBA)','4','[86 m2 - 100 m2]'])
27
- puts "['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => " + id3.eval(['Moron Sur (GBA)','3','[101 m2 - 125 m2]'])
28
- puts "['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => " + id3.eval(['Recoleta (CABA)','3','[86 m2 - 100 m2]',])
29
- puts "['Tigre (GBA)','3','[71 m2 - 85 m2]'] => " + id3.eval(['Tigre (GBA)','3','[71 m2 - 85 m2]',])
27
+ puts 'Prediction samples:'
28
+ puts "['Moron Sur (GBA)','4','[86 m2 - 100 m2]'] => #{id3.eval(['Moron Sur (GBA)', '4',
29
+ '[86 m2 - 100 m2]'])}"
30
+ puts "['Moron Sur (GBA)','3','[101 m2 - 125 m2]'] => #{id3.eval(['Moron Sur (GBA)', '3',
31
+ '[101 m2 - 125 m2]'])}"
32
+ puts "['Recoleta (CABA)','3','[86 m2 - 100 m2]'] => #{id3.eval(['Recoleta (CABA)', '3',
33
+ '[86 m2 - 100 m2]'])}"
34
+ puts "['Tigre (GBA)','3','[71 m2 - 85 m2]'] => #{id3.eval(['Tigre (GBA)', '3', '[71 m2 - 85 m2]'])}"
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../lib/ai4r/classifiers/id3'
4
+
5
+ # Load the training data
6
+ file = "#{File.dirname(__FILE__)}/id3_data.csv"
7
+ data_set = Ai4r::Data::DataSet.new.load_csv_with_labels(file)
8
+
9
+ # Build the tree
10
+ id3 = Ai4r::Classifiers::ID3.new.build(data_set)
11
+
12
+ # Export DOT representation
13
+ File.open('id3_tree.dot', 'w') { |f| f.puts id3.to_graphviz }
14
+ puts 'Decision tree saved to id3_tree.dot'
15
+
16
+ # You can also inspect the tree as nested hashes
17
+ p id3.to_h
@@ -0,0 +1,11 @@
1
+ require 'ai4r/classifiers/logistic_regression'
2
+ require 'ai4r/data/data_set'
3
+
4
+ items = [[0, 0, 0], [0, 1, 1], [1, 0, 1], [1, 1, 1]]
5
+ labels = %w[x1 x2 class]
6
+ set = Ai4r::Data::DataSet.new(data_items: items, data_labels: labels)
7
+
8
+ reg = Ai4r::Classifiers::LogisticRegression.new
9
+ reg.set_parameters(learning_rate: 0.5, iterations: 2000).build(set)
10
+
11
+ puts reg.eval([1, 0])
@@ -0,0 +1,13 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../lib/ai4r/classifiers/naive_bayes'
4
+ require_relative '../../lib/ai4r/data/data_set'
5
+
6
+ file = "#{File.dirname(__FILE__)}/naive_bayes_data.csv"
7
+ set = Ai4r::Data::DataSet.new.load_csv_with_labels(file)
8
+
9
+ bayes = Ai4r::Classifiers::NaiveBayes.new.set_parameters(m: 3).build(set)
10
+
11
+ puts bayes.class_prob.inspect
12
+ puts bayes.pcc.inspect
13
+ puts bayes.pcp.inspect
@@ -1,16 +1,15 @@
1
- require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/naive_bayes'
2
- require File.dirname(__FILE__) + '/../../lib/ai4r/data/data_set'
3
- require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/id3'
4
- require 'benchmark'
1
+ # frozen_string_literal: true
5
2
 
6
- include Ai4r::Classifiers
7
- include Ai4r::Data
3
+ require_relative '../../lib/ai4r/classifiers/naive_bayes'
4
+ require_relative '../../lib/ai4r/data/data_set'
5
+ require_relative '../../lib/ai4r/classifiers/id3'
6
+ require 'benchmark'
8
7
 
9
- data_set = DataSet.new
10
- data_set.load_csv_with_labels File.dirname(__FILE__) + "/naive_bayes_data.csv"
8
+ data_set = Ai4r::Data::DataSet.new
9
+ data_set.load_csv_with_labels "#{File.dirname(__FILE__)}/naive_bayes_data.csv"
11
10
 
12
- b = NaiveBayes.new.
13
- set_parameters({:m=>3}).
14
- build data_set
15
- p b.eval(["Red", "SUV", "Domestic"])
16
- p b.get_probability_map(["Red", "SUV", "Domestic"])
11
+ b = Ai4r::Classifiers::NaiveBayes.new
12
+ .set_parameters({ m: 3 })
13
+ .build data_set
14
+ p b.eval(%w[Red SUV Domestic])
15
+ p b.get_probability_map(%w[Red SUV Domestic])
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Author:: Example contributor
4
+ # License:: MPL 1.1
5
+ # Project:: ai4r
6
+ #
7
+ # Simple example showing how to use OneR with numeric attributes.
8
+
9
+ require_relative '../../lib/ai4r/classifiers/one_r'
10
+ require_relative '../../lib/ai4r/data/data_set'
11
+
12
+ items = [
13
+ ['New York', 20, 'M', 'Y'],
14
+ ['Chicago', 25, 'M', 'Y'],
15
+ ['New York', 28, 'M', 'Y'],
16
+ ['New York', 35, 'F', 'N'],
17
+ ['Chicago', 40, 'F', 'Y'],
18
+ ['New York', 45, 'F', 'N'],
19
+ ['Chicago', 55, 'M', 'N']
20
+ ]
21
+ labels = %w[city age gender marketing_target]
22
+
23
+ ds = Ai4r::Data::DataSet.new(data_items: items, data_labels: labels)
24
+
25
+ classifier = Ai4r::Classifiers::OneR.new.build(ds)
26
+ puts classifier.get_rules
27
+ puts classifier.eval(['Chicago', 55, 'M'])
@@ -0,0 +1,29 @@
1
+ # frozen_string_literal: true
2
+
3
+ # This small tutorial shows how changing parameters of ZeroR and OneR
4
+ # affects the generated rules. Run it with `ruby parameter_tutorial.rb`.
5
+
6
+ require_relative '../../lib/ai4r/classifiers/zero_r'
7
+ require_relative '../../lib/ai4r/classifiers/one_r'
8
+ require_relative '../../lib/ai4r/data/data_set'
9
+
10
+ # Load the demonstration data set
11
+ file = "#{File.dirname(__FILE__)}/zero_one_r_data.csv"
12
+ set = Ai4r::Data::DataSet.new.load_csv_with_labels file
13
+
14
+ puts '== ZeroR with default parameters =='
15
+ zero_default = Ai4r::Classifiers::ZeroR.new.build(set)
16
+ puts zero_default.get_rules
17
+
18
+ puts "\n== ZeroR with :tie_break => :random =="
19
+ zero_rand = Ai4r::Classifiers::ZeroR.new.set_parameters(tie_break: :random).build(set)
20
+ puts zero_rand.get_rules
21
+
22
+ puts "\n== OneR default behaviour =="
23
+ one_default = Ai4r::Classifiers::OneR.new.build(set)
24
+ puts one_default.get_rules
25
+
26
+ puts "\n== OneR forcing first attribute and :last tie break =="
27
+ one_custom = Ai4r::Classifiers::OneR.new.set_parameters(selected_attribute: 0,
28
+ tie_break: :last).build(set)
29
+ puts one_custom.get_rules
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../lib/ai4r/classifiers/prism'
4
+ require_relative '../../lib/ai4r/data/data_set'
5
+
6
+ data_file = "#{File.dirname(__FILE__)}/zero_one_r_data.csv"
7
+ data = Ai4r::Data::DataSet.new.load_csv_with_labels(data_file)
8
+
9
+ classifier = Ai4r::Classifiers::Prism.new.build(data)
10
+
11
+ puts 'Discovered rules:'
12
+ puts classifier.get_rules
13
+ puts
14
+ sample = data.data_items.first[0...-1]
15
+ puts "Prediction for #{sample.inspect}: #{classifier.eval(sample)}"
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../lib/ai4r/classifiers/prism'
4
+ require_relative '../../lib/ai4r/data/data_set'
5
+
6
+ items = [
7
+ [20, 70, 'N'],
8
+ [25, 80, 'N'],
9
+ [30, 60, 'Y'],
10
+ [35, 65, 'Y']
11
+ ]
12
+ labels = %w[temperature humidity play]
13
+
14
+ data = Ai4r::Data::DataSet.new(data_items: items, data_labels: labels)
15
+
16
+ classifier = Ai4r::Classifiers::Prism.new.build(data)
17
+
18
+ puts 'Rules:'
19
+ puts classifier.get_rules
20
+ puts
21
+ puts "Prediction for [30, 70]: #{classifier.eval([30, 70])}"
@@ -1,15 +1,18 @@
1
- require File.dirname(__FILE__) + '/../../lib/ai4r/classifiers/simple_linear_regression'
2
- require File.dirname(__FILE__) + '/../../lib/ai4r/data/data_set'
3
- require 'benchmark'
1
+ # frozen_string_literal: true
4
2
 
5
- include Ai4r::Classifiers
6
- include Ai4r::Data
3
+ require_relative '../../lib/ai4r/classifiers/simple_linear_regression'
4
+ require_relative '../../lib/ai4r/data/data_set'
7
5
 
8
- data_set = DataSet.new
9
- data_set.parse_csv_with_labels File.dirname(__FILE__) + "/simple_linear_regression_example.csv"
6
+ # Load training data
7
+ file = "#{File.dirname(__FILE__)}/simple_linear_regression_example.csv"
8
+ data_set = Ai4r::Data::DataSet.new.parse_csv_with_labels file
10
9
 
11
- r = SimpleLinearRegression.new.build data_set
12
- p r.eval([-1,95,109.1,188.8,68.9,55.5,3062,141,3.78,3.15,9.5,114,5400,19,25])
10
+ # Build the regression model and inspect its coefficients
11
+ r = Ai4r::Classifiers::SimpleLinearRegression.new.build data_set
12
+ puts "Selected attribute: #{r.attribute}"
13
+ puts "Slope: #{r.slope}, Intercept: #{r.intercept}"
13
14
 
14
- # => 11662.949367088606
15
- #Actual price 22625
15
+ # Predict a new sample
16
+ predicted = r.eval([-1, 95, 109.1, 188.8, 68.9, 55.5, 3062, 141, 3.78, 3.15, 9.5, 114, 5400, 19,
17
+ 25])
18
+ puts "Predicted value: #{predicted}"
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../../lib/ai4r/classifiers/zero_r'
4
+ require_relative '../../lib/ai4r/classifiers/one_r'
5
+ require_relative '../../lib/ai4r/data/data_set'
6
+
7
+ # Load tutorial data
8
+ data_file = "#{File.dirname(__FILE__)}/zero_one_r_data.csv"
9
+ data = Ai4r::Data::DataSet.new.load_csv_with_labels data_file
10
+
11
+ puts "Data labels: #{data.data_labels.inspect}"
12
+ puts
13
+
14
+ # Build a default ZeroR classifier
15
+ zero_default = Ai4r::Classifiers::ZeroR.new.build(data)
16
+ puts "ZeroR default prediction: #{zero_default.eval(data.data_items.first)}"
17
+ puts "Generated rule: #{zero_default.get_rules}"
18
+
19
+ # Build ZeroR with custom tie strategy
20
+ zero_random = Ai4r::Classifiers::ZeroR.new.set_parameters(tie_break: :random).build(data)
21
+ puts "ZeroR random tie strategy prediction: #{zero_random.eval(data.data_items.first)}"
22
+
23
+ puts
24
+
25
+ # Build a default OneR classifier
26
+ one_default = Ai4r::Classifiers::OneR.new.build(data)
27
+ puts "OneR chose attribute index #{one_default.rule[:attr_index]}"
28
+ puts "OneR rules:\n#{one_default.get_rules}"
29
+
30
+ # Build OneR selecting the first attribute and using :last tie break
31
+ one_custom = Ai4r::Classifiers::OneR.new.set_parameters(selected_attribute: 0,
32
+ tie_break: :last).build(data)
33
+ puts "OneR forced attribute: #{one_custom.rule[:attr_index]}"
34
+ puts "Custom rules:\n#{one_custom.get_rules}"
@@ -0,0 +1,8 @@
1
+ city,age_range,gender,marketing_target
2
+ New York,[30-50),F,N
3
+ New York,<30,M,Y
4
+ Chicago,<30,M,Y
5
+ New York,<30,M,Y
6
+ Chicago,[30-50),F,Y
7
+ New York,[30-50),F,N
8
+ Chicago,[50-80],M,N
@@ -1,56 +1,62 @@
1
+ # frozen_string_literal: true
2
+
1
3
  # Author:: Sergio Fierens (implementation)
2
4
  # License:: MPL 1.1
3
5
  # Project:: ai4r
4
6
  # Url:: http://www.ai4r.org/
5
7
  #
6
- # You can redistribute it and/or modify it under the terms of
7
- # the Mozilla Public License version 1.1 as published by the
8
+ # You can redistribute it and/or modify it under the terms of
9
+ # the Mozilla Public License version 1.1 as published by the
8
10
  # Mozilla Foundation at http://www.mozilla.org/MPL/MPL-1.1.txt
9
11
 
10
12
  # In this example we group results from a post-training survey into 4 groups.
11
- # The Diana algorithm is used, but you can try other algorithms by changing
13
+ # The Diana algorithm is used, but you can try other algorithms by changing
12
14
  # the word "Diana" by "KMeans", "AverageLinkage", or any other cluster implementation.
13
15
  # The cluster API is the same, so you can play around and observe different results.
14
16
 
15
- require 'rubygems'
16
17
  require 'ai4r'
17
- include Ai4r::Data
18
- include Ai4r::Clusterers
19
18
 
20
19
  # 5 Questions on a post training survey
21
- questions = [ "The material covered was appropriate for someone with my level of knowledge of the subject.",
22
- "The material was presented in a clear and logical fashion",
23
- "There was sufficient time in the session to cover the material that was presented",
24
- "The instructor was respectful of students",
25
- "The instructor provided good examples"]
20
+ questions = ['The material covered was appropriate for someone with my level of ' \
21
+ 'knowledge of the subject.',
22
+ 'The material was presented in a clear and logical fashion',
23
+ 'There was sufficient time in the session to cover the material that was presented',
24
+ 'The instructor was respectful of students',
25
+ 'The instructor provided good examples']
26
26
 
27
27
  # Answers to each question go from 1 (bad) to 5 (excellent)
28
- # The answers array has an element per survey complemented.
28
+ # The answers array has an element per survey complemented.
29
29
  # Each survey completed is in turn an array with the answer of each question.
30
- answers = [ [ 1, 2, 3, 2, 2], # Answers of person 1
31
- [ 5, 5, 3, 2, 2], # Answers of person 2
32
- [ 1, 2, 3, 2, 2], # Answers of person 3
33
- [ 1, 2, 2, 2, 2], # ...
34
- [ 1, 2, 5, 5, 2],
35
- [ 3, 3, 3, 3, 3],
36
- [ 1, 2, 3, 2, 2],
37
- [ 3, 2, 3, 5, 5],
38
- [ 3, 3, 3, 5, 2],
39
- [ 4, 4, 3, 1, 1],
40
- [ 5, 5, 5, 5, 5],
41
- [ 4, 2, 4, 2, 1],
42
- [ 4, 4, 5, 5, 5],
43
- [ 4, 4, 3, 2, 2],
44
- [ 2, 2, 3, 2, 3],
45
- [ 3, 3, 3, 1, 1]] # Answers of person 16
46
-
47
- data_set = DataSet.new(:data_items => answers, :data_labels => questions)
30
+ answers = [[1, 2, 3, 2, 2], # Answers of person 1
31
+ [5, 5, 3, 2, 2], # Answers of person 2
32
+ [1, 2, 3, 2, 2], # Answers of person 3
33
+ [1, 2, 2, 2, 2], # ...
34
+ [1, 2, 5, 5, 2],
35
+ [3, 3, 3, 3, 3],
36
+ [1, 2, 3, 2, 2],
37
+ [3, 2, 3, 5, 5],
38
+ [3, 3, 3, 5, 2],
39
+ [4, 4, 3, 1, 1],
40
+ [5, 5, 5, 5, 5],
41
+ [4, 2, 4, 2, 1],
42
+ [4, 4, 5, 5, 5],
43
+ [4, 4, 3, 2, 2],
44
+ [2, 2, 3, 2, 3],
45
+ [3, 3, 3, 1, 1]] # Answers of person 16
46
+
47
+ data_set = Ai4r::Data::DataSet.new(data_items: answers, data_labels: questions)
48
48
 
49
49
  # Let's group answers in 4 groups
50
- clusterer = Diana.new.build(data_set, 4)
50
+ clusterer = Ai4r::Clusterers::Diana.new.build(data_set, 4)
51
51
 
52
- clusterer.clusters.each_with_index do |cluster, index|
53
- puts "Group #{index+1}"
54
- p cluster.data_items
52
+ clusterer.clusters.each_with_index do |cluster, index|
53
+ puts "Group #{index + 1}"
54
+ p cluster.data_items
55
55
  end
56
56
 
57
+ # Check if this algorithm supports evaluating new data items
58
+ if clusterer.supports_eval?
59
+ puts "First survey belongs to group #{clusterer.eval(answers.first)}"
60
+ else
61
+ puts 'This algorithm does not support eval on unseen data.'
62
+ end
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Example showcasing DBSCAN clustering with custom parameters.
4
+ require 'ai4r'
5
+
6
+ points = [
7
+ [1, 1], [1, 2], [1, 3], [2, 1], [2, 2], [2, 3],
8
+ [8, 8], [8, 9], [8, 10], [9, 8], [9, 9], [9, 10],
9
+ [5, 5], [1, 9], [10, 0]
10
+ ]
11
+ set = Ai4r::Data::DataSet.new(data_items: points)
12
+
13
+ clusterer = Ai4r::Clusterers::DBSCAN.new
14
+ clusterer.set_parameters(epsilon: 10, min_points: 2).build(set)
15
+
16
+ pp clusterer.labels
17
+ pp clusterer.clusters.map(&:data_items)
@@ -0,0 +1,17 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'ai4r'
4
+ require 'dendrograms'
5
+
6
+ points = [[0, 0], [0, 1], [1, 0], [1, 1]]
7
+ data = Ai4r::Data::DataSet.new(data_items: points)
8
+
9
+ clusterer = Ai4r::Clusterers::WardLinkage.new.build(data, 1)
10
+
11
+ # Convert stored tree to a simple array of point sets
12
+ steps = clusterer.cluster_tree.map do |clusters|
13
+ clusters.map(&:data_items)
14
+ end
15
+
16
+ Dendrograms::Dendrogram.new(steps).draw('dendrogram.png')
17
+ puts 'Dendrogram saved to dendrogram.png'