aprendizaje_maquina 0.1.2 → 0.1.3a
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +91 -13
- data/aprendizaje_maquina.gemspec +0 -1
- data/examples/clasificacion_logistica_example.rb +16 -0
- data/examples/clustering_data.csv +177 -0
- data/examples/clustering_example.rb +9 -0
- data/examples/data_lg.csv +145 -0
- data/examples/regresion_lineal_example.rb +11 -0
- data/examples/train.csv +700 -0
- data/lib/aprendizaje_maquina.rb +3 -5
- data/lib/aprendizaje_maquina/cargar.rb +42 -40
- data/lib/aprendizaje_maquina/clasificacion_logistica.rb +104 -0
- data/lib/aprendizaje_maquina/clustering.rb +96 -0
- data/lib/aprendizaje_maquina/matrixx.rb +3 -3
- data/lib/aprendizaje_maquina/regresion_lineal.rb +4 -6
- data/lib/aprendizaje_maquina/version.rb +1 -1
- metadata +14 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 14f57cb6236d901eaef534c5ec123f3d002eae54
|
4
|
+
data.tar.gz: 5ace7512b5596622c67a940225d0d59209e21d2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c76b07f5bc6f93056138f77cace472344090a6476ecc9f5ec977caa5327f6cd1b7bf99e1bf1841051baba9f6a9e18b5f5239c146bbd59f0bca760ef14e6baf7a
|
7
|
+
data.tar.gz: d84bd39aa0916ae0b5125819bc2a0d64ad922a4a59d5430d8ed6a46e03a1b04cb1b9173a4654d95964eb4fef77baf557326778341a0030466166373f3e27ab4d
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
|
21
|
+
## linear regression model
|
22
22
|
|
23
23
|
first
|
24
24
|
|
@@ -27,20 +27,31 @@ first
|
|
27
27
|
load data from a CSV file
|
28
28
|
|
29
29
|
load = AprendizajeMaquina::Cargar.new("file.csv")
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
30
|
+
|
31
|
+
# specify the column that you want to store on a vector
|
32
|
+
y = load.to_vector(3)
|
33
|
+
|
34
|
+
# if you don't specify the column or range of columns
|
35
|
+
# this put all the data of the csv file in a matrix
|
36
|
+
matrix = load.to_matrix
|
37
|
+
|
38
|
+
# create a matrix with the data in the column 0 of the csv file
|
39
|
+
x = load.to_matrix(0) # you can specify range like this load.to_matrix(0..4)
|
40
|
+
|
35
41
|
x_with_ones = x.add_ones # this add a column of ones to the matrix
|
36
42
|
|
43
|
+
to normalize data
|
44
|
+
|
45
|
+
x.normalize
|
46
|
+
|
37
47
|
create an instance of the class RegresionLineal
|
38
48
|
|
39
49
|
regresion_lineal = AprendizajeMaquina::RegresionLineal.new(x_matrix,y_vector)
|
40
|
-
regresion_lineal.
|
50
|
+
regresion_lineal.find_ecuation # (or use the alias :train) return a Vector
|
51
|
+
|
41
52
|
m = Matrix[[1,95]]
|
42
|
-
|
43
|
-
|
53
|
+
puts regresion_lineal.make_prediction(m) # (or use the alias :predict) to make predictions
|
54
|
+
# => Vector[193.45225618631895]
|
44
55
|
|
45
56
|
linear regresion with arrays
|
46
57
|
|
@@ -48,14 +59,81 @@ linear regresion with arrays
|
|
48
59
|
y = [168,196,170,175,162,169,190,186,176,170,176,179]
|
49
60
|
|
50
61
|
regresion_simple = AprendizajeMaquina::RegresionLineal.new(x,y)
|
51
|
-
regresion_simple.
|
62
|
+
regresion_simple.train
|
52
63
|
p regresion_simple.ecuacion
|
53
|
-
p regresion_simple.
|
64
|
+
p regresion_simple.predict(95)
|
65
|
+
|
66
|
+
## Logistic Classification
|
67
|
+
|
68
|
+
data = AprendizajeMaquina::Cargar.new("data.csv")
|
69
|
+
|
70
|
+
x = data.to_matrix(0..1).add_ones
|
71
|
+
y = data.to_vector(2)
|
72
|
+
initial_theta = Vector[0,0,0]
|
73
|
+
|
74
|
+
cl = AprendizajeMaquina::ClasificacionLogistica.new(x,y,initial_theta)
|
75
|
+
|
76
|
+
training
|
77
|
+
|
78
|
+
the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
|
79
|
+
|
80
|
+
example 1:
|
81
|
+
cl.train(12,0.01,'SGD')
|
82
|
+
example 2:
|
83
|
+
cl.train(10,'NewM') # Newton's method dont use alpha
|
84
|
+
example 3:
|
85
|
+
cl.train(400,0.001,'Grad')
|
86
|
+
|
87
|
+
predictions
|
88
|
+
|
89
|
+
if cl.predict(Matrix[[1,24,0]]) == 1
|
90
|
+
p "CANSADO"
|
91
|
+
else
|
92
|
+
p "DESCANSADO"
|
93
|
+
end
|
94
|
+
|
95
|
+
make predictions for multiclass(one vs all)
|
96
|
+
|
97
|
+
initial_theta_for_each_class = [
|
98
|
+
Vector[-38.98494868465186, 3.133704064187691,-1.0058753929521247],
|
99
|
+
Vector[40.93814883472139,-3.2195737672278586, -0.8080682715294277],
|
100
|
+
Vector[-7.220460,0.256681,1.141166]
|
101
|
+
]
|
102
|
+
|
103
|
+
predicted_val = []
|
104
|
+
|
105
|
+
initial_theta_for_each_class.each do |e|
|
106
|
+
multiclass = AprendizajeMaquina::ClasificacionLogistica.new(x,y,e)
|
107
|
+
predicted_val << multiclass.predict(Matrix[[1,13.5,1.83]])
|
108
|
+
end
|
109
|
+
|
110
|
+
if predicted_val[0] == 1
|
111
|
+
puts "Vino Tinto"
|
112
|
+
elsif predicted_val[1] == 1
|
113
|
+
puts "Vino Rosado"
|
114
|
+
elsif predicted_val[2] == 1
|
115
|
+
puts "Vino Blanco"
|
116
|
+
else
|
117
|
+
puts predicted_val
|
118
|
+
end
|
119
|
+
|
120
|
+
## Clustering
|
121
|
+
|
122
|
+
load_data = AprendizajeMaquina::Cargar.new('clustering_data.csv')
|
123
|
+
dataset = load_data.to_matrix
|
124
|
+
|
125
|
+
# initialize with 2 cluster centroids
|
126
|
+
clustering = AprendizajeMaquina::KmeansClustering.new(2,dataset)
|
54
127
|
|
128
|
+
# fit the model with 20 iterations
|
129
|
+
clustering.fit(20)
|
55
130
|
|
56
|
-
|
131
|
+
# watch the values in their respective cluster
|
132
|
+
p clustering.cluster(0)
|
133
|
+
p clustering.cluster(1)
|
57
134
|
|
58
|
-
|
135
|
+
# Predict the closest cluster
|
136
|
+
p clustering.predict(Vector[63,190])
|
59
137
|
|
60
138
|
## License
|
61
139
|
|
data/aprendizaje_maquina.gemspec
CHANGED
@@ -12,7 +12,6 @@ Gem::Specification.new do |spec|
|
|
12
12
|
|
13
13
|
spec.summary = "Machine learning gem / Una gema para el aprendizaje de maquinas."
|
14
14
|
spec.description = "This is a gem to help ruby developers to write machine learning algorithms easier and faster / Esta es una gema para ayudar a los desarrolladores de ruby a escribir algoritmos de aprendizaje automático más fácil y rápido."
|
15
|
-
spec.homepage = ""
|
16
15
|
spec.license = "MIT"
|
17
16
|
|
18
17
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'aprendizaje_maquina'
|
2
|
+
|
3
|
+
data = AprendizajeMaquina::Cargar.new("data_lg.csv")
|
4
|
+
x = data.to_matrix(0..1).add_ones
|
5
|
+
y = data.to_vector(2)
|
6
|
+
theta = Vector[0,0,0]
|
7
|
+
rl = AprendizajeMaquina::ClasificacionLogistica.new(x,y,theta)
|
8
|
+
|
9
|
+
rl.train(12,0.01,'SGD')
|
10
|
+
prediction = rl.predict(Matrix[[1,9,22]])
|
11
|
+
|
12
|
+
if prediction == 1
|
13
|
+
puts "cansado"
|
14
|
+
else
|
15
|
+
puts "descansado"
|
16
|
+
end
|
@@ -0,0 +1,177 @@
|
|
1
|
+
63, 190
|
2
|
+
65, 145
|
3
|
+
69, 160
|
4
|
+
66, 130
|
5
|
+
63, 120
|
6
|
+
65, 125
|
7
|
+
68, 124
|
8
|
+
64, 135
|
9
|
+
64, 98
|
10
|
+
66, 150
|
11
|
+
67, 160
|
12
|
+
67, 140
|
13
|
+
71, 170
|
14
|
+
60, 109
|
15
|
+
68, 170
|
16
|
+
63, 165
|
17
|
+
67, 147
|
18
|
+
63, 110
|
19
|
+
68, 160
|
20
|
+
60, 125
|
21
|
+
66, 142
|
22
|
+
65, 154
|
23
|
+
62, 119
|
24
|
+
65, 135
|
25
|
+
69, 195
|
26
|
+
63, 110
|
27
|
+
63, 140
|
28
|
+
64, 219
|
29
|
+
66, 135
|
30
|
+
66, 140
|
31
|
+
68, 135
|
32
|
+
68, 99
|
33
|
+
64, 100
|
34
|
+
62, 135
|
35
|
+
74, 200
|
36
|
+
63, 150
|
37
|
+
69, 163
|
38
|
+
71, 116
|
39
|
+
68, 145
|
40
|
+
71, 140
|
41
|
+
68, 125
|
42
|
+
66, 130
|
43
|
+
72, 157
|
44
|
+
68, 200
|
45
|
+
61, 110
|
46
|
+
64, 165
|
47
|
+
70, 115
|
48
|
+
67, 153
|
49
|
+
65, 135
|
50
|
+
62, 116
|
51
|
+
63, 122
|
52
|
+
66, 130
|
53
|
+
73, 180
|
54
|
+
66, 110
|
55
|
+
69, 190
|
56
|
+
68, 160
|
57
|
+
62, 115
|
58
|
+
66, 130
|
59
|
+
64, 110
|
60
|
+
70, 150
|
61
|
+
72, 185
|
62
|
+
70, 175
|
63
|
+
66, 200
|
64
|
+
68, 180
|
65
|
+
62, 135
|
66
|
+
63, 120
|
67
|
+
63, 134
|
68
|
+
72, 185
|
69
|
+
72, 160
|
70
|
+
68, 160
|
71
|
+
65, 143
|
72
|
+
68, 180
|
73
|
+
65, 130
|
74
|
+
65, 200
|
75
|
+
65, 195
|
76
|
+
66, 160
|
77
|
+
70, 195
|
78
|
+
63, 120
|
79
|
+
69, 120
|
80
|
+
64, 110
|
81
|
+
67, 140
|
82
|
+
66, 104
|
83
|
+
63, 125
|
84
|
+
71, 140
|
85
|
+
68, 190
|
86
|
+
65, 125
|
87
|
+
66, 125
|
88
|
+
64, 125
|
89
|
+
72, 245
|
90
|
+
74, 235
|
91
|
+
62, 130
|
92
|
+
68, 135
|
93
|
+
62, 120
|
94
|
+
63, 155
|
95
|
+
65, 130
|
96
|
+
64, 130
|
97
|
+
74, 200
|
98
|
+
70, 145
|
99
|
+
68, 140
|
100
|
+
69, 135
|
101
|
+
61, 124
|
102
|
+
66, 125
|
103
|
+
76, 216
|
104
|
+
66, 120
|
105
|
+
62, 103
|
106
|
+
65, 162
|
107
|
+
69, 165
|
108
|
+
73, 164
|
109
|
+
70, 153
|
110
|
+
75, 172
|
111
|
+
58, 103
|
112
|
+
63, 135
|
113
|
+
68, 160
|
114
|
+
69, 141
|
115
|
+
67, 145
|
116
|
+
68, 148
|
117
|
+
67, 180
|
118
|
+
70, 150
|
119
|
+
70, 160
|
120
|
+
60, 170
|
121
|
+
70, 210
|
122
|
+
66, 175
|
123
|
+
64, 130
|
124
|
+
72, 172
|
125
|
+
64, 108
|
126
|
+
72, 190
|
127
|
+
66, 142
|
128
|
+
69, 235
|
129
|
+
72, 145
|
130
|
+
66, 143
|
131
|
+
70, 141
|
132
|
+
64, 110
|
133
|
+
70, 196
|
134
|
+
65, 118
|
135
|
+
72, 200
|
136
|
+
61, 145
|
137
|
+
63, 110
|
138
|
+
65, 115
|
139
|
+
70, 170
|
140
|
+
66, 160
|
141
|
+
62, 123
|
142
|
+
65, 135
|
143
|
+
68, 140
|
144
|
+
65, 160
|
145
|
+
60, 150
|
146
|
+
72, 168
|
147
|
+
61, 100
|
148
|
+
62, 100
|
149
|
+
66, 135
|
150
|
+
67, 155
|
151
|
+
62, 135
|
152
|
+
64, 200
|
153
|
+
66, 140
|
154
|
+
62, 185
|
155
|
+
65, 125
|
156
|
+
72, 165
|
157
|
+
64, 95
|
158
|
+
72, 155
|
159
|
+
65, 110
|
160
|
+
67, 165
|
161
|
+
65, 132
|
162
|
+
66, 125
|
163
|
+
64, 120
|
164
|
+
63, 130
|
165
|
+
66, 130
|
166
|
+
63, 115
|
167
|
+
59, 160
|
168
|
+
64, 150
|
169
|
+
64, 130
|
170
|
+
61, 120
|
171
|
+
66, 150
|
172
|
+
66, 250
|
173
|
+
71, 150
|
174
|
+
70, 180
|
175
|
+
69, 210
|
176
|
+
61, 105
|
177
|
+
64, 140
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'aprendizaje_maquina'
|
2
|
+
|
3
|
+
load_data = AprendizajeMaquina::Cargar.new('clustering_data.csv')
|
4
|
+
dataset = load_data.to_matrix
|
5
|
+
clustering = AprendizajeMaquina::KmeansClustering.new(2,dataset)
|
6
|
+
clustering.fit(20)
|
7
|
+
p clustering.cluster(0)
|
8
|
+
p clustering.cluster(1)
|
9
|
+
p clustering.predict(Vector[63,190])
|
@@ -0,0 +1,145 @@
|
|
1
|
+
0,24,1
|
2
|
+
0.1,23.5,1
|
3
|
+
0.2,23.4,1
|
4
|
+
0.3,23.3,1
|
5
|
+
0.4,23.2,1
|
6
|
+
0.5,23.1,1
|
7
|
+
1,23,1
|
8
|
+
1.1,23.5,1
|
9
|
+
1.2,23.4,1
|
10
|
+
1.3,23.3,1
|
11
|
+
1.4,23.2,1
|
12
|
+
1.5,23.1,1
|
13
|
+
2,22,1
|
14
|
+
2.1,22.5,1
|
15
|
+
2.2,22.4,1
|
16
|
+
2.3,22.3,1
|
17
|
+
2.4,22.2,1
|
18
|
+
2.5,22.1,1
|
19
|
+
3,21,1
|
20
|
+
3.1,21.5,1
|
21
|
+
3.2,21.4,1
|
22
|
+
3.3,21.3,1
|
23
|
+
3.4,21.2,1
|
24
|
+
3.5,21.1,1
|
25
|
+
4,20,1
|
26
|
+
4.1,20.5,1
|
27
|
+
4.2,20.4,1
|
28
|
+
4.3,20.3,1
|
29
|
+
4.4,20.2,1
|
30
|
+
4.5,20.1,1
|
31
|
+
5,19,1
|
32
|
+
5.1,19.5,1
|
33
|
+
5.2,19.4,1
|
34
|
+
5.3,19.3,1
|
35
|
+
5.4,19.2,1
|
36
|
+
5.5,19.1,1
|
37
|
+
6,18,1
|
38
|
+
6.1,18.5,1
|
39
|
+
6.2,18.4,1
|
40
|
+
6.3,18.3,1
|
41
|
+
6.4,18.2,1
|
42
|
+
6.5,18.1,1
|
43
|
+
7,17,1
|
44
|
+
7.1,17.5,1
|
45
|
+
7.2,17.4,1
|
46
|
+
7.3,17.3,1
|
47
|
+
7.4,17.2,1
|
48
|
+
7.5,17.1,1
|
49
|
+
8,16,0
|
50
|
+
8.1,16.5,0
|
51
|
+
8.2,16.4,0
|
52
|
+
8.3,16.3,0
|
53
|
+
8.4,16.2,0
|
54
|
+
8.5,16.1,0
|
55
|
+
9,15,0
|
56
|
+
9.1,15.5,0
|
57
|
+
9.2,15.4,0
|
58
|
+
9.3,15.3,0
|
59
|
+
9.4,15.2,0
|
60
|
+
9.5,15.1,0
|
61
|
+
10,14,0
|
62
|
+
10.1,14.5,0
|
63
|
+
10.2,14.4,0
|
64
|
+
10.3,14.3,0
|
65
|
+
10.4,14.2,0
|
66
|
+
10.5,14.1,0
|
67
|
+
11,13,0
|
68
|
+
11.1,13.5,0
|
69
|
+
11.2,13.4,0
|
70
|
+
11.3,13.3,0
|
71
|
+
11.4,13.2,0
|
72
|
+
11.5,13.1,0
|
73
|
+
12,12,0
|
74
|
+
12.1,12.5,0
|
75
|
+
12.2,12.4,0
|
76
|
+
12.3,12.3,0
|
77
|
+
12.4,12.2,0
|
78
|
+
12.5,12.1,0
|
79
|
+
13,11,0
|
80
|
+
13.1,11.5,0
|
81
|
+
13.2,11.4,0
|
82
|
+
13.3,11.3,0
|
83
|
+
13.4,11.2,0
|
84
|
+
13.5,11.1,0
|
85
|
+
14,10,0
|
86
|
+
14.1,10.5,0
|
87
|
+
14.2,10.4,0
|
88
|
+
14.3,10.3,0
|
89
|
+
14.4,10.2,0
|
90
|
+
14.5,10.1,0
|
91
|
+
15,9,0
|
92
|
+
15.1,9.5,0
|
93
|
+
15.2,9.4,0
|
94
|
+
15.3,9.3,0
|
95
|
+
15.4,9.2,0
|
96
|
+
15.5,9.1,0
|
97
|
+
16,8,0
|
98
|
+
16.1,8.5,0
|
99
|
+
16.2,8.4,0
|
100
|
+
16.3,8.3,0
|
101
|
+
16.4,8.2,0
|
102
|
+
16.5,8.1,0
|
103
|
+
17,7,0
|
104
|
+
17.1,7.5,0
|
105
|
+
17.2,7.4,0
|
106
|
+
17.3,7.3,0
|
107
|
+
17.4,7.2,0
|
108
|
+
17.5,7.1,0
|
109
|
+
18,6,0
|
110
|
+
18.1,6.5,0
|
111
|
+
18.2,6.4,0
|
112
|
+
18.3,6.3,0
|
113
|
+
18.4,6.2,0
|
114
|
+
18.5,6.1,0
|
115
|
+
19,5,0
|
116
|
+
19.1,5.5,0
|
117
|
+
19.2,5.4,0
|
118
|
+
19.3,5.3,0
|
119
|
+
19.4,5.2,0
|
120
|
+
19.5,5.1,0
|
121
|
+
20,4,0
|
122
|
+
20.1,4.5,0
|
123
|
+
20.2,4.4,0
|
124
|
+
20.3,4.3,0
|
125
|
+
20.4,4.2,0
|
126
|
+
20.5,4.1,0
|
127
|
+
21,3,0
|
128
|
+
21.1,3.5,0
|
129
|
+
21.2,3.4,0
|
130
|
+
21.3,3.3,0
|
131
|
+
21.4,3.2,0
|
132
|
+
21.5,3.1,0
|
133
|
+
22,2,0
|
134
|
+
22.1,2.5,0
|
135
|
+
22.2,2.4,0
|
136
|
+
22.3,2.3,0
|
137
|
+
22.4,2.2,0
|
138
|
+
22.5,2.1,0
|
139
|
+
23,1,0
|
140
|
+
23.1,1.5,0
|
141
|
+
23.2,1.4,0
|
142
|
+
23.3,1.3,0
|
143
|
+
23.4,1.2,0
|
144
|
+
23.5,1.1,0
|
145
|
+
24,0,0
|