ml_ai 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rubocop.yml +8 -0
- data/CHANGELOG.md +5 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/LICENSE.txt +21 -0
- data/README.md +437 -0
- data/Rakefile +12 -0
- data/benchmarks/car_price_prediction_benchmark.py +38 -0
- data/benchmarks/employee_salary_prediction_benchmark.py +38 -0
- data/benchmarks/energy_consumption_prediction_benchmark.py +37 -0
- data/benchmarks/evaluation_metrics.py +32 -0
- data/benchmarks/house_price_prediction_benchmark.py +38 -0
- data/benchmarks/multiple_linear_regression.py +78 -0
- data/benchmarks/simple_linear_regression_benchmark.py +55 -0
- data/data/advertising_revenue.csv +6 -0
- data/data/car_prices.csv +6 -0
- data/data/employee_salaries.csv +6 -0
- data/data/energy_consumption.csv +11 -0
- data/data/house_prices.csv +6 -0
- data/data/multiple_linear_regression_data.csv +6 -0
- data/data/simple_linear_regression_data.csv +6 -0
- data/examples/car_price_prediction.rb +30 -0
- data/examples/employee_salary_prediction.rb +34 -0
- data/examples/energy_consumption_prediction.rb +39 -0
- data/examples/house_price_prediction.rb +30 -0
- data/lib/ml_ai/dataset.rb +25 -0
- data/lib/ml_ai/multiple_linear_regression.rb +113 -0
- data/lib/ml_ai/simple_linear_regression.rb +87 -0
- data/lib/ml_ai/version.rb +5 -0
- data/lib/ml_ai.rb +11 -0
- data/sig/ml_ai.rbs +4 -0
- metadata +77 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 72d305d5bfb2a41043e31de593461c0b4ab206eaab090c6ff4168e57066206e0
|
4
|
+
data.tar.gz: b0b0e2ef85d517eee9e9d12d58cc329a59ec78d820e0db384f1f37fa6ffcc18a
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: b142c7c71b3524c5fc97bb8be4af03098f7ab516e35c7bb1b1712a60fbde757725192b658d2b1cf91d3d8275458f84177fbf46a2f6523bc1ff506dcf7bf6aab4
|
7
|
+
data.tar.gz: b237928b1a3ffcecc610597c341ea7494ac88b9b266343a07064bbc0e1c0a077e8718b3b9ea8a4cd66ae6d8219dc2aeb21836047d455d2d9f72a856a21fe55a6
|
data/.rubocop.yml
ADDED
data/CHANGELOG.md
ADDED
data/CODE_OF_CONDUCT.md
ADDED
@@ -0,0 +1,132 @@
|
|
1
|
+
# Contributor Covenant Code of Conduct
|
2
|
+
|
3
|
+
## Our Pledge
|
4
|
+
|
5
|
+
We as members, contributors, and leaders pledge to make participation in our
|
6
|
+
community a harassment-free experience for everyone, regardless of age, body
|
7
|
+
size, visible or invisible disability, ethnicity, sex characteristics, gender
|
8
|
+
identity and expression, level of experience, education, socio-economic status,
|
9
|
+
nationality, personal appearance, race, caste, color, religion, or sexual
|
10
|
+
identity and orientation.
|
11
|
+
|
12
|
+
We pledge to act and interact in ways that contribute to an open, welcoming,
|
13
|
+
diverse, inclusive, and healthy community.
|
14
|
+
|
15
|
+
## Our Standards
|
16
|
+
|
17
|
+
Examples of behavior that contributes to a positive environment for our
|
18
|
+
community include:
|
19
|
+
|
20
|
+
* Demonstrating empathy and kindness toward other people
|
21
|
+
* Being respectful of differing opinions, viewpoints, and experiences
|
22
|
+
* Giving and gracefully accepting constructive feedback
|
23
|
+
* Accepting responsibility and apologizing to those affected by our mistakes,
|
24
|
+
and learning from the experience
|
25
|
+
* Focusing on what is best not just for us as individuals, but for the overall
|
26
|
+
community
|
27
|
+
|
28
|
+
Examples of unacceptable behavior include:
|
29
|
+
|
30
|
+
* The use of sexualized language or imagery, and sexual attention or advances of
|
31
|
+
any kind
|
32
|
+
* Trolling, insulting or derogatory comments, and personal or political attacks
|
33
|
+
* Public or private harassment
|
34
|
+
* Publishing others' private information, such as a physical or email address,
|
35
|
+
without their explicit permission
|
36
|
+
* Other conduct which could reasonably be considered inappropriate in a
|
37
|
+
professional setting
|
38
|
+
|
39
|
+
## Enforcement Responsibilities
|
40
|
+
|
41
|
+
Community leaders are responsible for clarifying and enforcing our standards of
|
42
|
+
acceptable behavior and will take appropriate and fair corrective action in
|
43
|
+
response to any behavior that they deem inappropriate, threatening, offensive,
|
44
|
+
or harmful.
|
45
|
+
|
46
|
+
Community leaders have the right and responsibility to remove, edit, or reject
|
47
|
+
comments, commits, code, wiki edits, issues, and other contributions that are
|
48
|
+
not aligned to this Code of Conduct, and will communicate reasons for moderation
|
49
|
+
decisions when appropriate.
|
50
|
+
|
51
|
+
## Scope
|
52
|
+
|
53
|
+
This Code of Conduct applies within all community spaces, and also applies when
|
54
|
+
an individual is officially representing the community in public spaces.
|
55
|
+
Examples of representing our community include using an official email address,
|
56
|
+
posting via an official social media account, or acting as an appointed
|
57
|
+
representative at an online or offline event.
|
58
|
+
|
59
|
+
## Enforcement
|
60
|
+
|
61
|
+
Instances of abusive, harassing, or otherwise unacceptable behavior may be
|
62
|
+
reported to the community leaders responsible for enforcement at
|
63
|
+
[INSERT CONTACT METHOD].
|
64
|
+
All complaints will be reviewed and investigated promptly and fairly.
|
65
|
+
|
66
|
+
All community leaders are obligated to respect the privacy and security of the
|
67
|
+
reporter of any incident.
|
68
|
+
|
69
|
+
## Enforcement Guidelines
|
70
|
+
|
71
|
+
Community leaders will follow these Community Impact Guidelines in determining
|
72
|
+
the consequences for any action they deem in violation of this Code of Conduct:
|
73
|
+
|
74
|
+
### 1. Correction
|
75
|
+
|
76
|
+
**Community Impact**: Use of inappropriate language or other behavior deemed
|
77
|
+
unprofessional or unwelcome in the community.
|
78
|
+
|
79
|
+
**Consequence**: A private, written warning from community leaders, providing
|
80
|
+
clarity around the nature of the violation and an explanation of why the
|
81
|
+
behavior was inappropriate. A public apology may be requested.
|
82
|
+
|
83
|
+
### 2. Warning
|
84
|
+
|
85
|
+
**Community Impact**: A violation through a single incident or series of
|
86
|
+
actions.
|
87
|
+
|
88
|
+
**Consequence**: A warning with consequences for continued behavior. No
|
89
|
+
interaction with the people involved, including unsolicited interaction with
|
90
|
+
those enforcing the Code of Conduct, for a specified period of time. This
|
91
|
+
includes avoiding interactions in community spaces as well as external channels
|
92
|
+
like social media. Violating these terms may lead to a temporary or permanent
|
93
|
+
ban.
|
94
|
+
|
95
|
+
### 3. Temporary Ban
|
96
|
+
|
97
|
+
**Community Impact**: A serious violation of community standards, including
|
98
|
+
sustained inappropriate behavior.
|
99
|
+
|
100
|
+
**Consequence**: A temporary ban from any sort of interaction or public
|
101
|
+
communication with the community for a specified period of time. No public or
|
102
|
+
private interaction with the people involved, including unsolicited interaction
|
103
|
+
with those enforcing the Code of Conduct, is allowed during this period.
|
104
|
+
Violating these terms may lead to a permanent ban.
|
105
|
+
|
106
|
+
### 4. Permanent Ban
|
107
|
+
|
108
|
+
**Community Impact**: Demonstrating a pattern of violation of community
|
109
|
+
standards, including sustained inappropriate behavior, harassment of an
|
110
|
+
individual, or aggression toward or disparagement of classes of individuals.
|
111
|
+
|
112
|
+
**Consequence**: A permanent ban from any sort of public interaction within the
|
113
|
+
community.
|
114
|
+
|
115
|
+
## Attribution
|
116
|
+
|
117
|
+
This Code of Conduct is adapted from the [Contributor Covenant][homepage],
|
118
|
+
version 2.1, available at
|
119
|
+
[https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1].
|
120
|
+
|
121
|
+
Community Impact Guidelines were inspired by
|
122
|
+
[Mozilla's code of conduct enforcement ladder][Mozilla CoC].
|
123
|
+
|
124
|
+
For answers to common questions about this code of conduct, see the FAQ at
|
125
|
+
[https://www.contributor-covenant.org/faq][FAQ]. Translations are available at
|
126
|
+
[https://www.contributor-covenant.org/translations][translations].
|
127
|
+
|
128
|
+
[homepage]: https://www.contributor-covenant.org
|
129
|
+
[v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html
|
130
|
+
[Mozilla CoC]: https://github.com/mozilla/diversity
|
131
|
+
[FAQ]: https://www.contributor-covenant.org/faq
|
132
|
+
[translations]: https://www.contributor-covenant.org/translations
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024 David William Silva
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,437 @@
|
|
1
|
+
# MLAI
|
2
|
+
|
3
|
+
Algorithms for machine learning and artificial intelligence in [Ruby](https://www.ruby-lang.org/en/).
|
4
|
+
|
5
|
+
To check this Ruby implementation, all features are implemented and benchmarked against [scikit-learn](https://scikit-learn.org/), a well-known library in [Python](https://www.python.org/) for machine learning.
|
6
|
+
|
7
|
+
## Why?
|
8
|
+
|
9
|
+
[Python](https://www.python.org/) is the go-to language for machine learning. But even if you don't plan to use it in production, if you are working with Ruby, you deserve to have ML/AI algorithms available in [Ruby](https://www.ruby-lang.org/en/). :)
|
10
|
+
|
11
|
+
I also know that many libraries are available in Ruby for machine learning and related areas. Here is a [curated list](https://github.com/arbox/machine-learning-with-ruby).
|
12
|
+
|
13
|
+
Then why? I could name several reasons, but it will all come down to my interest in the computational challenges associated with machine learning and AI. There is no better way to identify, isolate, and inspect bottlenecks and opportunities for improvement and even the proposal of new ideas than to implement core functionalities from scratch. This will be true for most things in software: if you really want to have a deeper, more precise, and more comprehensive view of some resource, implement that resource yourself.
|
14
|
+
|
15
|
+
Plus, it is a lot of fun!
|
16
|
+
|
17
|
+
## Observations on Benchmarking
|
18
|
+
|
19
|
+
Using the `scikit-learn` library in Python as a reference (and also to keep this Ruby implementation in check), allowed me to arrive at some interesting observations.
|
20
|
+
|
21
|
+
### Intermediary Results
|
22
|
+
|
23
|
+
Throughout the development process, a series of cases were encountered where intermediary computations—such as matrix operations and inversions—differed between the two implementations, sometimes significantly. However, these differences often had no impact on the final results, such as predictions, mean squared error (MSE), or R-squared values. The underlying reason for this lies in how numerical computations are handled in each language.
|
24
|
+
|
25
|
+
### Language-Specific Matrix Computations and Precision Matters
|
26
|
+
|
27
|
+
First, the way matrices are computed and inverted can vary between Ruby and Python, especially considering the libraries and methods used. Differences in how these operations are optimized and executed can lead to slight variations in intermediate results. Additionally, floating-point arithmetic is handled differently in each language, influenced by factors such as precision, rounding methods, and how numbers are represented internally. Ruby and Python might employ distinct strategies to manage these computations, leading to the observed differences.
|
28
|
+
|
29
|
+
### Theory vs Practice
|
30
|
+
|
31
|
+
These differences highlight the importance of understanding that while numerical algorithms are deterministic in theory, their practical implementations can introduce variability due to the intricacies of the programming languages and their respective libraries. Despite these variations, the ability of both implementations to yield virtually identical final results speaks the robustness of the underlying mathematical principles. This also emphasizes the value of cross-referencing implementations in different environments to ensure the reliability and accuracy of the models developed. By the way, one more reason for writing a new ML/AI llibrary in Ruby. :)
|
32
|
+
|
33
|
+
## Installation
|
34
|
+
|
35
|
+
Run:
|
36
|
+
|
37
|
+
$ gem install ml_ai
|
38
|
+
|
39
|
+
If you use Bundler, add the following to your Gemfile:
|
40
|
+
|
41
|
+
gem 'ml_ai'
|
42
|
+
|
43
|
+
## Usage
|
44
|
+
|
45
|
+
### Simple Linear Regression
|
46
|
+
|
47
|
+
The `SimpleLinearRegression` class in the `MLAI` gem allows you to fit a linear model with a single feature, make predictions, and evaluate the model using metrics like Mean Squared Error (MSE) and R-squared. Additionally, it now supports loading data directly from a CSV file using the `Dataset` class.
|
48
|
+
|
49
|
+
### Using Raw Arrays
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
require 'ml_ai'
|
53
|
+
|
54
|
+
# Initialize the model
|
55
|
+
model = MLAI::SimpleLinearRegression.new
|
56
|
+
|
57
|
+
# Define the dataset with a single feature
|
58
|
+
x_values = [1, 2, 3, 4, 5]
|
59
|
+
y_values = [3, 5, 7, 9, 11]
|
60
|
+
|
61
|
+
# Fit the model to the data
|
62
|
+
model.fit(x_values: x_values, y_values: y_values)
|
63
|
+
|
64
|
+
# Make predictions on the original data
|
65
|
+
predictions = model.predict(x_values)
|
66
|
+
puts "Predictions: #{predictions}"
|
67
|
+
# Output: Predictions: [3.0, 5.0, 7.0, 9.0, 11.0]
|
68
|
+
|
69
|
+
# Make predictions on new data
|
70
|
+
new_data = [6, 7]
|
71
|
+
new_predictions = model.predict(new_data)
|
72
|
+
puts "New Predictions: #{new_predictions}"
|
73
|
+
# Output: New Predictions: [13.0, 15.0]
|
74
|
+
|
75
|
+
# Calculate evaluation metrics
|
76
|
+
mse = model.mean_squared_error(y_values, predictions)
|
77
|
+
r2 = model.r_squared(y_values, predictions)
|
78
|
+
|
79
|
+
puts "Mean Squared Error: #{mse}"
|
80
|
+
# Output: Mean Squared Error: 0.0
|
81
|
+
|
82
|
+
puts "R-squared: #{r2}"
|
83
|
+
# Output: R-squared: 1.0
|
84
|
+
```
|
85
|
+
|
86
|
+
#### Using a CSV Dataset
|
87
|
+
|
88
|
+
```ruby
|
89
|
+
require 'ml_ai'
|
90
|
+
|
91
|
+
# Initialize the model
|
92
|
+
model = MLAI::SimpleLinearRegression.new
|
93
|
+
|
94
|
+
# Create a Dataset from a CSV file
|
95
|
+
dataset = MLAI::Dataset.new('data/simple_linear_regression_data.csv')
|
96
|
+
|
97
|
+
# Fit the model using the Dataset
|
98
|
+
model.fit(dataset: dataset, feature_column: 'Feature', target_column: 'Target')
|
99
|
+
|
100
|
+
# Make predictions on new feature values
|
101
|
+
new_features = [6, 7]
|
102
|
+
new_predictions = model.predict(new_features)
|
103
|
+
puts "New Predictions: #{new_predictions}"
|
104
|
+
# Output: New Predictions: [13.0, 15.0]
|
105
|
+
|
106
|
+
# Evaluate the model using the original dataset
|
107
|
+
original_features = dataset.data.map { |row| row[0] } # Assuming 'Feature' is the first column
|
108
|
+
original_targets = dataset.data.map { |row| row[1] } # Assuming 'Target' is the second column
|
109
|
+
original_predictions = model.predict(original_features)
|
110
|
+
|
111
|
+
mse = model.mean_squared_error(original_targets, original_predictions)
|
112
|
+
r2 = model.r_squared(original_targets, original_predictions)
|
113
|
+
|
114
|
+
puts "Mean Squared Error: #{mse}"
|
115
|
+
# Output: Mean Squared Error: 0.0
|
116
|
+
|
117
|
+
puts "R-squared: #{r2}"
|
118
|
+
# Output: R-squared: 1.0
|
119
|
+
```
|
120
|
+
|
121
|
+
#### Benchmark
|
122
|
+
|
123
|
+
To check the Ruby implementation, run the Python benchmark using the same data:
|
124
|
+
|
125
|
+
```
|
126
|
+
$ python3 benchmarks/simple_linear_regression_benchmark.py
|
127
|
+
```
|
128
|
+
|
129
|
+
### Evaluation Metrics
|
130
|
+
|
131
|
+
Fit a linear model, make predictions, and evaluate the model using common metrics like Mean Squared Error (MSE) and R-squared.
|
132
|
+
|
133
|
+
```ruby
|
134
|
+
require 'ml_ai'
|
135
|
+
|
136
|
+
# Initialize the model
|
137
|
+
model = MLAI::SimpleLinearRegression.new
|
138
|
+
|
139
|
+
# Define the dataset
|
140
|
+
x_values = [1, 2, 3, 4, 5]
|
141
|
+
y_values = [2, 4, 5, 4, 5]
|
142
|
+
|
143
|
+
# Fit the model to the data
|
144
|
+
model.fit(x_values, y_values)
|
145
|
+
|
146
|
+
# Make predictions
|
147
|
+
predictions = model.predict(x_values)
|
148
|
+
|
149
|
+
# Calculate evaluation metrics
|
150
|
+
mse = model.mean_squared_error(y_values, predictions)
|
151
|
+
r2 = model.r_squared(y_values, predictions)
|
152
|
+
|
153
|
+
# Output the results
|
154
|
+
puts "Predictions: #{predictions}"
|
155
|
+
puts "Mean Squared Error: #{mse}"
|
156
|
+
puts "R-squared: #{r2}"
|
157
|
+
|
158
|
+
# Example output:
|
159
|
+
# Predictions: [2.8, 3.4, 4.0, 4.6, 5.2]
|
160
|
+
# Mean Squared Error: 0.48
|
161
|
+
# R-squared: 0.6
|
162
|
+
```
|
163
|
+
|
164
|
+
#### Benchmark
|
165
|
+
|
166
|
+
To check the Ruby implementation, run the Python benchmark using the same data:
|
167
|
+
|
168
|
+
```
|
169
|
+
$ python3 benchmarks/evaluation_metrics.py
|
170
|
+
```
|
171
|
+
|
172
|
+
#### Example
|
173
|
+
|
174
|
+
Here is an example closer to the real world. Imagine you're working in real estate and want to predict the price of a house based on its size. You've gathered data from various houses, including their sizes in square feet and their prices in thousands of dollars. You want to use this data to predict the price of new houses based on their size.
|
175
|
+
|
176
|
+
You can run this example here:
|
177
|
+
|
178
|
+
```
|
179
|
+
ruby examples/house_price_prediction.rb
|
180
|
+
```
|
181
|
+
|
182
|
+
and check the results with the following benchmark in Python:
|
183
|
+
|
184
|
+
```
|
185
|
+
python3 benchmarks/house_price_prediction_benchmark.py
|
186
|
+
```
|
187
|
+
|
188
|
+
### Multiple Linear Regression
|
189
|
+
|
190
|
+
The `MultipleLinearRegression` class in the `MLAI` gem allows you to fit a linear model with multiple features, make predictions, and evaluate the model using metrics like Mean Squared Error (MSE) and R-squared. The class supports loading data directly from a CSV file using the `Dataset` class, as well as passing raw arrays directly.
|
191
|
+
|
192
|
+
### Using Raw Arrays
|
193
|
+
|
194
|
+
```ruby
|
195
|
+
require 'ml_ai'
|
196
|
+
|
197
|
+
# Initialize the model
|
198
|
+
model = MLAI::MultipleLinearRegression.new
|
199
|
+
|
200
|
+
# Define the dataset with multiple features
|
201
|
+
x_values = [
|
202
|
+
[1, 2],
|
203
|
+
[2, 3],
|
204
|
+
[3, 4],
|
205
|
+
[4, 5],
|
206
|
+
[5, 6]
|
207
|
+
]
|
208
|
+
y_values = [5, 7, 9, 11, 13]
|
209
|
+
|
210
|
+
# Fit the model to the data
|
211
|
+
model.fit(x_values: x_values, y_values: y_values)
|
212
|
+
|
213
|
+
# Make predictions on the original data
|
214
|
+
predictions = model.predict(x_values)
|
215
|
+
puts "Predictions: #{predictions}"
|
216
|
+
# Output: Predictions: [5.0, 7.0, 9.0, 11.0, 13.0]
|
217
|
+
|
218
|
+
# Make predictions on new data
|
219
|
+
new_data = [
|
220
|
+
[6, 7],
|
221
|
+
[7, 8]
|
222
|
+
]
|
223
|
+
new_predictions = model.predict(new_data)
|
224
|
+
puts "New Predictions: #{new_predictions}"
|
225
|
+
# Output: New Predictions: [15.0, 17.0]
|
226
|
+
|
227
|
+
# Calculate evaluation metrics
|
228
|
+
mse = model.mean_squared_error(y_values, predictions)
|
229
|
+
r2 = model.r_squared(y_values, predictions)
|
230
|
+
|
231
|
+
puts "Mean Squared Error: #{mse}"
|
232
|
+
# Output: Mean Squared Error: 0.0
|
233
|
+
|
234
|
+
puts "R-squared: #{r2}"
|
235
|
+
# Output: R-squared: 1.0
|
236
|
+
```
|
237
|
+
|
238
|
+
#### Using a CSV Dataset
|
239
|
+
|
240
|
+
```ruby
|
241
|
+
require 'ml_ai'
|
242
|
+
|
243
|
+
# Initialize the model
|
244
|
+
model = MLAI::MultipleLinearRegression.new
|
245
|
+
|
246
|
+
# Create a Dataset from a CSV file
|
247
|
+
dataset = MLAI::Dataset.new('data/multiple_linear_regression_data.csv')
|
248
|
+
|
249
|
+
# Fit the model using the Dataset
|
250
|
+
model.fit(dataset: dataset, feature_columns: ['Feature1', 'Feature2'], target_column: 'Target')
|
251
|
+
|
252
|
+
# Make predictions on new feature values
|
253
|
+
new_features = [
|
254
|
+
[6, 7],
|
255
|
+
[7, 8]
|
256
|
+
]
|
257
|
+
new_predictions = model.predict(new_features)
|
258
|
+
puts "New Predictions: #{new_predictions}"
|
259
|
+
# Output: New Predictions: [15.0, 17.0]
|
260
|
+
|
261
|
+
# Evaluate the model using the original dataset
|
262
|
+
original_features = dataset.data.map { |row| row[0..1] } # Assuming 'Feature1' and 'Feature2' are the first two columns
|
263
|
+
original_targets = dataset.data.map { |row| row[2] } # Assuming 'Target' is the third column
|
264
|
+
original_predictions = model.predict(original_features)
|
265
|
+
|
266
|
+
mse = model.mean_squared_error(original_targets, original_predictions)
|
267
|
+
r2 = model.r_squared(original_targets, original_predictions)
|
268
|
+
|
269
|
+
puts "Mean Squared Error: #{mse}"
|
270
|
+
# Output: Mean Squared Error: 0.0
|
271
|
+
|
272
|
+
puts "R-squared: #{r2}"
|
273
|
+
# Output: R-squared: 1.0
|
274
|
+
```
|
275
|
+
|
276
|
+
#### Benchmark
|
277
|
+
|
278
|
+
To check the Ruby implementation, run the Python benchmark using the same data:
|
279
|
+
|
280
|
+
```
|
281
|
+
$ python3 benchmarks/multiple_linear_regression.py
|
282
|
+
```
|
283
|
+
|
284
|
+
#### Example
|
285
|
+
|
286
|
+
Here is an example closer to the real world. Imagine you work at a car dealership, and you want to predict the price of used cars based on various features such as the car's age, mileage, and horsepower. You have collected data from previous sales and want to use this data to predict the price of new cars based on these features.
|
287
|
+
|
288
|
+
You can run this example here:
|
289
|
+
|
290
|
+
```
|
291
|
+
ruby examples/car_price_prediction.rb
|
292
|
+
```
|
293
|
+
|
294
|
+
and check the results with the following benchmark in Python:
|
295
|
+
|
296
|
+
```
|
297
|
+
python3 benchmarks/car_price_prediction_benchmark.py
|
298
|
+
```
|
299
|
+
|
300
|
+
### Regularization
|
301
|
+
|
302
|
+
Regularization helps prevent overfitting by adding a penalty to large coefficients, making the model more generalizable. In this example, we'll predict advertising revenue based on the amount spent on TV, radio, and newspaper advertisements. Regularization is applied to prevent overfitting and ensure the model generalizes well to new data.
|
303
|
+
|
304
|
+
```ruby
|
305
|
+
# frozen_string_literal: true
|
306
|
+
|
307
|
+
# Example: Predicting Advertising Revenue with Regularization
|
308
|
+
|
309
|
+
require_relative '../lib/ml_ai'
|
310
|
+
|
311
|
+
# Initialize the model with a regularization parameter
|
312
|
+
model = MLAI::MultipleLinearRegression.new(regularization: 0.00083)
|
313
|
+
|
314
|
+
# Create a Dataset from a CSV file
|
315
|
+
dataset = MLAI::Dataset.new('data/advertising_revenue.csv')
|
316
|
+
|
317
|
+
# Fit the model using the Dataset
|
318
|
+
model.fit(dataset: dataset, feature_columns: ['TV', 'Radio', 'Newspaper'], target_column: 'Revenue')
|
319
|
+
|
320
|
+
# Print coefficients and intercept
|
321
|
+
puts "Coefficients: #{model.coefficients.map { |coef| coef.round(2) }}"
|
322
|
+
puts "Intercept: #{model.intercept.round(2)}"
|
323
|
+
|
324
|
+
# Predict the revenue based on new advertising spends
|
325
|
+
new_ad_spend = [[230, 37, 69]] # TV: $230, Radio: $37, Newspaper: $69
|
326
|
+
predicted_revenue = model.predict(new_ad_spend).first.round(2)
|
327
|
+
puts "Predicted Advertising Revenue: $#{predicted_revenue}"
|
328
|
+
|
329
|
+
# Evaluate the model using the original dataset
|
330
|
+
original_features = dataset.data.map { |row| row[0..2] } # Extracting 'TV', 'Radio', 'Newspaper'
|
331
|
+
original_revenue = dataset.data.map { |row| row[3] } # Extracting 'Revenue'
|
332
|
+
predictions = model.predict(original_features).map { |p| p.round(2) }
|
333
|
+
|
334
|
+
mse = model.mean_squared_error(original_revenue, predictions).round(2)
|
335
|
+
r2 = model.r_squared(original_revenue, predictions).round(2)
|
336
|
+
|
337
|
+
puts "Mean Squared Error: #{mse}"
|
338
|
+
puts "R-squared: #{r2}"
|
339
|
+
```
|
340
|
+
|
341
|
+
#### Example
|
342
|
+
|
343
|
+
Here is an example closer to the real world. Imagine you're working for a company that wants to predict the salary of employees based on several factors, including their years of experience, level of education, and number of relevant skills. You have collected data from existing employees and want to use this data to predict salaries for new hires.
|
344
|
+
|
345
|
+
You can run this example here:
|
346
|
+
|
347
|
+
```
|
348
|
+
ruby examples/employee_salary_prediction.rb
|
349
|
+
```
|
350
|
+
|
351
|
+
and check the results with the following benchmark in Python:
|
352
|
+
|
353
|
+
```
|
354
|
+
python3 benchmarks/employee_salary_prediction_benchmark.py
|
355
|
+
```
|
356
|
+
|
357
|
+
### Cross-Validation
|
358
|
+
|
359
|
+
Cross-validation is a powerful technique to evaluate the performance of your model by splitting your dataset into multiple folds. The model is trained on a subset of the data and tested on the remaining data, and this process is repeated multiple times. The final evaluation metric is the average of all the individual metrics across the folds.
|
360
|
+
|
361
|
+
#### Example Usage with Raw Arrays
|
362
|
+
|
363
|
+
```ruby
|
364
|
+
# Require the necessary files
|
365
|
+
require 'ml_ai'
|
366
|
+
|
367
|
+
# Initialize the model with regularization
|
368
|
+
model = MLAI::MultipleLinearRegression.new(regularization: 0.01)
|
369
|
+
|
370
|
+
# Define the input data
|
371
|
+
x_values = [
|
372
|
+
[1, 2],
|
373
|
+
[2, 3],
|
374
|
+
[3, 4],
|
375
|
+
[4, 5],
|
376
|
+
[5, 6]
|
377
|
+
]
|
378
|
+
y_values = [5, 7, 9, 11, 13]
|
379
|
+
|
380
|
+
# Perform 3-fold cross-validation
|
381
|
+
average_mse = model.cross_validate(x_values: x_values, y_values: y_values, k: 3)
|
382
|
+
puts "Average Mean Squared Error across 3 folds: #{average_mse.round(4)}"
|
383
|
+
```
|
384
|
+
|
385
|
+
#### Example Using a CSV Dataset
|
386
|
+
|
387
|
+
```ruby
|
388
|
+
# Require the necessary files
|
389
|
+
require 'ml_ai'
|
390
|
+
|
391
|
+
# Create a Dataset from a CSV file
|
392
|
+
dataset = MLAI::Dataset.new('path_to_your_dataset.csv')
|
393
|
+
|
394
|
+
# Initialize the model with regularization
|
395
|
+
model = MLAI::MultipleLinearRegression.new(regularization: 0.01)
|
396
|
+
|
397
|
+
# Perform 3-fold cross-validation
|
398
|
+
average_mse = model.cross_validate(dataset: dataset, feature_columns: ["Feature1", "Feature2"], target_column: "Target", k: 3)
|
399
|
+
puts "Average Mean Squared Error across 3 folds: #{average_mse.round(4)}"
|
400
|
+
```
|
401
|
+
|
402
|
+
#### Example
|
403
|
+
|
404
|
+
Here is an example closer to the real world. Imagine that we want to predict the energy consumption of a building based on its size, the number of occupants, and the number of computers it houses. We'll use the `MultipleLinearRegression` class with cross-validation.
|
405
|
+
|
406
|
+
You can run this example here:
|
407
|
+
|
408
|
+
```
|
409
|
+
ruby examples/energy_consumption_prediction.rb
|
410
|
+
```
|
411
|
+
|
412
|
+
and check the results with the following benchmark in Python:
|
413
|
+
|
414
|
+
```
|
415
|
+
python3 benchmarks/energy_consumption_prediction_benchmark.py
|
416
|
+
```
|
417
|
+
|
418
|
+
### Dataset
|
419
|
+
|
420
|
+
|
421
|
+
## Development
|
422
|
+
|
423
|
+
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake test` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
424
|
+
|
425
|
+
To install this gem onto your local machine, run `bundle exec rake install`. To release a new version, update the version number in `version.rb`, and then run `bundle exec rake release`, which will create a git tag for the version, push git commits and the created tag, and push the `.gem` file to [rubygems.org](https://rubygems.org).
|
426
|
+
|
427
|
+
## Contributing
|
428
|
+
|
429
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/davidwilliam/MLAI. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [code of conduct](https://github.com/davidwilliam/MLAI/blob/main/CODE_OF_CONDUCT.md).
|
430
|
+
|
431
|
+
## License
|
432
|
+
|
433
|
+
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
434
|
+
|
435
|
+
## Code of Conduct
|
436
|
+
|
437
|
+
Everyone interacting in the MlAi project's codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/ml_ai/blob/main/CODE_OF_CONDUCT.md).
|
data/Rakefile
ADDED
@@ -0,0 +1,38 @@
|
|
1
|
+
import numpy as np
|
2
|
+
import pandas as pd
|
3
|
+
from sklearn.linear_model import LinearRegression
|
4
|
+
from sklearn.metrics import mean_squared_error, r2_score
|
5
|
+
|
6
|
+
# Load data from CSV file
|
7
|
+
csv_file_path = "data/car_prices.csv"
|
8
|
+
data = pd.read_csv(csv_file_path)
|
9
|
+
|
10
|
+
# Extract feature and target columns
|
11
|
+
x = data[['Age', 'Mileage', 'Horsepower']].values
|
12
|
+
y = data['Price'].values
|
13
|
+
|
14
|
+
# Initialize and fit the model
|
15
|
+
model = LinearRegression()
|
16
|
+
model.fit(x, y)
|
17
|
+
|
18
|
+
# Make predictions on the same data
|
19
|
+
predictions = model.predict(x)
|
20
|
+
predictions = np.round(predictions, 2) # Limit to two decimal places
|
21
|
+
|
22
|
+
# Calculate evaluation metrics
|
23
|
+
mse = mean_squared_error(y, predictions)
|
24
|
+
r2 = r2_score(y, predictions)
|
25
|
+
|
26
|
+
# Print the results
|
27
|
+
print(f"Coefficients: {np.round(model.coef_, 2)}")
|
28
|
+
print(f"Intercept: {round(model.intercept_, 2)}")
|
29
|
+
print(f"Predictions: {predictions}")
|
30
|
+
print(f"MSE: {round(mse, 2)}")
|
31
|
+
print(f"R-squared: {round(r2, 2)}")
|
32
|
+
|
33
|
+
# Predict on new data
|
34
|
+
new_data = np.array([[4, 55000, 140]])
|
35
|
+
new_prediction = model.predict(new_data)
|
36
|
+
new_prediction = np.round(new_prediction, 2) # Limit to two decimal places
|
37
|
+
|
38
|
+
print(f"Predicted Price for the car: ${new_prediction[0]}")
|