aprendizaje_maquina 0.1.3 → 0.1.4.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +50 -28
- data/aprendizaje_maquina.gemspec +0 -1
- data/examples/clasificacion_logistica_example.rb +16 -0
- data/examples/clustering_data.csv +177 -0
- data/examples/clustering_example.rb +9 -0
- data/examples/data_lg.csv +145 -0
- data/examples/regresion_lineal_example.rb +11 -0
- data/examples/train.csv +700 -0
- data/lib/aprendizaje_maquina.rb +1 -0
- data/lib/aprendizaje_maquina/cargar.rb +42 -43
- data/lib/aprendizaje_maquina/clasificacion_logistica.rb +103 -105
- data/lib/aprendizaje_maquina/clustering.rb +96 -0
- data/lib/aprendizaje_maquina/matrixx.rb +3 -3
- data/lib/aprendizaje_maquina/regresion_lineal.rb +4 -6
- data/lib/aprendizaje_maquina/version.rb +1 -1
- metadata +13 -6
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 03e7fac70b0b1de20e1496d918b96fbf5c716be6
|
4
|
+
data.tar.gz: 38669d3a47c811f6af8794223c6c5395c3b1e35d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 74c9a63aa42c3844846ddd5db0dc6300ec6022c071448efd7ddc092e5f857afbbffd47cb859fa6d3a3d495cc68ecfa97b7560cea820c87c8200a410b95ca09a9
|
7
|
+
data.tar.gz: 948708cb197af7a4fdbeba2c423ced683df600ee15ebf744dfbd4e8723877432a7cd1d170a817cdb3887fed53378fbd29f4339a71fede89c6e4c8b4a94c2e241
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
|
21
|
+
## linear regression model
|
22
22
|
|
23
23
|
first
|
24
24
|
|
@@ -27,11 +27,17 @@ first
|
|
27
27
|
load data from a CSV file
|
28
28
|
|
29
29
|
load = AprendizajeMaquina::Cargar.new("file.csv")
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
30
|
+
|
31
|
+
# specify the column that you want to store on a vector
|
32
|
+
y = load.to_vector(3)
|
33
|
+
|
34
|
+
# if you don't specify the column or range of columns
|
35
|
+
# this put all the data of the csv file in a matrix
|
36
|
+
matrix = load.to_matrix
|
37
|
+
|
38
|
+
# create a matrix with the data in the column 0 of the csv file
|
39
|
+
x = load.to_matrix(0) # you can specify range like this load.to_matrix(0..4)
|
40
|
+
|
35
41
|
x_with_ones = x.add_ones # this add a column of ones to the matrix
|
36
42
|
|
37
43
|
to normalize data
|
@@ -41,10 +47,11 @@ to normalize data
|
|
41
47
|
create an instance of the class RegresionLineal
|
42
48
|
|
43
49
|
regresion_lineal = AprendizajeMaquina::RegresionLineal.new(x_matrix,y_vector)
|
44
|
-
regresion_lineal.find_ecuation
|
50
|
+
regresion_lineal.find_ecuation # (or use the alias :train) return a Vector
|
51
|
+
|
45
52
|
m = Matrix[[1,95]]
|
46
|
-
|
47
|
-
|
53
|
+
puts regresion_lineal.make_prediction(m) # (or use the alias :predict) to make predictions
|
54
|
+
# => Vector[193.45225618631895]
|
48
55
|
|
49
56
|
linear regresion with arrays
|
50
57
|
|
@@ -56,7 +63,7 @@ linear regresion with arrays
|
|
56
63
|
p regresion_simple.ecuacion
|
57
64
|
p regresion_simple.predict(95)
|
58
65
|
|
59
|
-
Logistic Classification
|
66
|
+
## Logistic Classification
|
60
67
|
|
61
68
|
data = AprendizajeMaquina::Cargar.new("data.csv")
|
62
69
|
|
@@ -68,50 +75,65 @@ Logistic Classification
|
|
68
75
|
|
69
76
|
training
|
70
77
|
|
71
|
-
|
78
|
+
the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
|
72
79
|
|
73
80
|
example 1:
|
74
|
-
|
81
|
+
cl.train(12,0.01,'SGD')
|
75
82
|
example 2:
|
76
|
-
|
83
|
+
cl.train(10,'NewM') # Newton's method dont use alpha
|
77
84
|
example 3:
|
78
|
-
|
85
|
+
cl.train(400,0.001,'Grad')
|
79
86
|
|
80
87
|
predictions
|
81
88
|
|
82
89
|
if cl.predict(Matrix[[1,24,0]]) == 1
|
83
|
-
|
90
|
+
p "CANSADO"
|
84
91
|
else
|
85
|
-
|
92
|
+
p "DESCANSADO"
|
86
93
|
end
|
87
94
|
|
88
95
|
make predictions for multiclass(one vs all)
|
89
96
|
|
90
|
-
|
91
|
-
|
92
|
-
|
97
|
+
initial_theta_for_each_class = [
|
98
|
+
Vector[-38.98494868465186, 3.133704064187691,-1.0058753929521247],
|
99
|
+
Vector[40.93814883472139,-3.2195737672278586, -0.8080682715294277],
|
100
|
+
Vector[-7.220460,0.256681,1.141166]
|
101
|
+
]
|
93
102
|
|
94
103
|
predicted_val = []
|
95
104
|
|
96
|
-
|
97
|
-
|
98
|
-
|
105
|
+
initial_theta_for_each_class.each do |e|
|
106
|
+
multiclass = AprendizajeMaquina::ClasificacionLogistica.new(x,y,e)
|
107
|
+
predicted_val << multiclass.predict(Matrix[[1,13.5,1.83]])
|
99
108
|
end
|
100
109
|
|
101
110
|
if predicted_val[0] == 1
|
102
|
-
|
111
|
+
puts "Vino Tinto"
|
103
112
|
elsif predicted_val[1] == 1
|
104
|
-
|
113
|
+
puts "Vino Rosado"
|
105
114
|
elsif predicted_val[2] == 1
|
106
|
-
|
115
|
+
puts "Vino Blanco"
|
107
116
|
else
|
108
|
-
|
117
|
+
puts predicted_val
|
109
118
|
end
|
110
119
|
|
120
|
+
## Clustering
|
121
|
+
|
122
|
+
load_data = AprendizajeMaquina::Cargar.new('clustering_data.csv')
|
123
|
+
dataset = load_data.to_matrix
|
124
|
+
|
125
|
+
# initialize with 2 cluster centroids
|
126
|
+
clustering = AprendizajeMaquina::KmeansClustering.new(2,dataset)
|
127
|
+
|
128
|
+
# fit the model with 20 iterations
|
129
|
+
clustering.fit(20)
|
111
130
|
|
112
|
-
|
131
|
+
# watch the values in their respective cluster
|
132
|
+
p clustering.cluster(0)
|
133
|
+
p clustering.cluster(1)
|
113
134
|
|
114
|
-
|
135
|
+
# Predict the closest cluster
|
136
|
+
p clustering.predict(Vector[63,190])
|
115
137
|
|
116
138
|
## License
|
117
139
|
|
data/aprendizaje_maquina.gemspec
CHANGED
@@ -12,7 +12,6 @@ Gem::Specification.new do |spec|
|
|
12
12
|
|
13
13
|
spec.summary = "Machine learning gem / Una gema para el aprendizaje de maquinas."
|
14
14
|
spec.description = "This is a gem to help ruby developers to write machine learning algorithms easier and faster / Esta es una gema para ayudar a los desarrolladores de ruby a escribir algoritmos de aprendizaje automático más fácil y rápido."
|
15
|
-
spec.homepage = ""
|
16
15
|
spec.license = "MIT"
|
17
16
|
|
18
17
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
@@ -0,0 +1,16 @@
|
|
1
|
+
require 'aprendizaje_maquina'
|
2
|
+
|
3
|
+
data = AprendizajeMaquina::Cargar.new("data_lg.csv")
|
4
|
+
x = data.to_matrix(0..1).add_ones
|
5
|
+
y = data.to_vector(2)
|
6
|
+
theta = Vector[0,0,0]
|
7
|
+
rl = AprendizajeMaquina::ClasificacionLogistica.new(x,y,theta)
|
8
|
+
|
9
|
+
rl.train(12,0.01,'SGD')
|
10
|
+
prediction = rl.predict(Matrix[[1,9,22]])
|
11
|
+
|
12
|
+
if prediction == 1
|
13
|
+
puts "cansado"
|
14
|
+
else
|
15
|
+
puts "descansado"
|
16
|
+
end
|
@@ -0,0 +1,177 @@
|
|
1
|
+
63, 190
|
2
|
+
65, 145
|
3
|
+
69, 160
|
4
|
+
66, 130
|
5
|
+
63, 120
|
6
|
+
65, 125
|
7
|
+
68, 124
|
8
|
+
64, 135
|
9
|
+
64, 98
|
10
|
+
66, 150
|
11
|
+
67, 160
|
12
|
+
67, 140
|
13
|
+
71, 170
|
14
|
+
60, 109
|
15
|
+
68, 170
|
16
|
+
63, 165
|
17
|
+
67, 147
|
18
|
+
63, 110
|
19
|
+
68, 160
|
20
|
+
60, 125
|
21
|
+
66, 142
|
22
|
+
65, 154
|
23
|
+
62, 119
|
24
|
+
65, 135
|
25
|
+
69, 195
|
26
|
+
63, 110
|
27
|
+
63, 140
|
28
|
+
64, 219
|
29
|
+
66, 135
|
30
|
+
66, 140
|
31
|
+
68, 135
|
32
|
+
68, 99
|
33
|
+
64, 100
|
34
|
+
62, 135
|
35
|
+
74, 200
|
36
|
+
63, 150
|
37
|
+
69, 163
|
38
|
+
71, 116
|
39
|
+
68, 145
|
40
|
+
71, 140
|
41
|
+
68, 125
|
42
|
+
66, 130
|
43
|
+
72, 157
|
44
|
+
68, 200
|
45
|
+
61, 110
|
46
|
+
64, 165
|
47
|
+
70, 115
|
48
|
+
67, 153
|
49
|
+
65, 135
|
50
|
+
62, 116
|
51
|
+
63, 122
|
52
|
+
66, 130
|
53
|
+
73, 180
|
54
|
+
66, 110
|
55
|
+
69, 190
|
56
|
+
68, 160
|
57
|
+
62, 115
|
58
|
+
66, 130
|
59
|
+
64, 110
|
60
|
+
70, 150
|
61
|
+
72, 185
|
62
|
+
70, 175
|
63
|
+
66, 200
|
64
|
+
68, 180
|
65
|
+
62, 135
|
66
|
+
63, 120
|
67
|
+
63, 134
|
68
|
+
72, 185
|
69
|
+
72, 160
|
70
|
+
68, 160
|
71
|
+
65, 143
|
72
|
+
68, 180
|
73
|
+
65, 130
|
74
|
+
65, 200
|
75
|
+
65, 195
|
76
|
+
66, 160
|
77
|
+
70, 195
|
78
|
+
63, 120
|
79
|
+
69, 120
|
80
|
+
64, 110
|
81
|
+
67, 140
|
82
|
+
66, 104
|
83
|
+
63, 125
|
84
|
+
71, 140
|
85
|
+
68, 190
|
86
|
+
65, 125
|
87
|
+
66, 125
|
88
|
+
64, 125
|
89
|
+
72, 245
|
90
|
+
74, 235
|
91
|
+
62, 130
|
92
|
+
68, 135
|
93
|
+
62, 120
|
94
|
+
63, 155
|
95
|
+
65, 130
|
96
|
+
64, 130
|
97
|
+
74, 200
|
98
|
+
70, 145
|
99
|
+
68, 140
|
100
|
+
69, 135
|
101
|
+
61, 124
|
102
|
+
66, 125
|
103
|
+
76, 216
|
104
|
+
66, 120
|
105
|
+
62, 103
|
106
|
+
65, 162
|
107
|
+
69, 165
|
108
|
+
73, 164
|
109
|
+
70, 153
|
110
|
+
75, 172
|
111
|
+
58, 103
|
112
|
+
63, 135
|
113
|
+
68, 160
|
114
|
+
69, 141
|
115
|
+
67, 145
|
116
|
+
68, 148
|
117
|
+
67, 180
|
118
|
+
70, 150
|
119
|
+
70, 160
|
120
|
+
60, 170
|
121
|
+
70, 210
|
122
|
+
66, 175
|
123
|
+
64, 130
|
124
|
+
72, 172
|
125
|
+
64, 108
|
126
|
+
72, 190
|
127
|
+
66, 142
|
128
|
+
69, 235
|
129
|
+
72, 145
|
130
|
+
66, 143
|
131
|
+
70, 141
|
132
|
+
64, 110
|
133
|
+
70, 196
|
134
|
+
65, 118
|
135
|
+
72, 200
|
136
|
+
61, 145
|
137
|
+
63, 110
|
138
|
+
65, 115
|
139
|
+
70, 170
|
140
|
+
66, 160
|
141
|
+
62, 123
|
142
|
+
65, 135
|
143
|
+
68, 140
|
144
|
+
65, 160
|
145
|
+
60, 150
|
146
|
+
72, 168
|
147
|
+
61, 100
|
148
|
+
62, 100
|
149
|
+
66, 135
|
150
|
+
67, 155
|
151
|
+
62, 135
|
152
|
+
64, 200
|
153
|
+
66, 140
|
154
|
+
62, 185
|
155
|
+
65, 125
|
156
|
+
72, 165
|
157
|
+
64, 95
|
158
|
+
72, 155
|
159
|
+
65, 110
|
160
|
+
67, 165
|
161
|
+
65, 132
|
162
|
+
66, 125
|
163
|
+
64, 120
|
164
|
+
63, 130
|
165
|
+
66, 130
|
166
|
+
63, 115
|
167
|
+
59, 160
|
168
|
+
64, 150
|
169
|
+
64, 130
|
170
|
+
61, 120
|
171
|
+
66, 150
|
172
|
+
66, 250
|
173
|
+
71, 150
|
174
|
+
70, 180
|
175
|
+
69, 210
|
176
|
+
61, 105
|
177
|
+
64, 140
|
@@ -0,0 +1,9 @@
|
|
1
|
+
require 'aprendizaje_maquina'
|
2
|
+
|
3
|
+
load_data = AprendizajeMaquina::Cargar.new('clustering_data.csv')
|
4
|
+
dataset = load_data.to_matrix
|
5
|
+
clustering = AprendizajeMaquina::KmeansClustering.new(2,dataset)
|
6
|
+
clustering.fit(20)
|
7
|
+
p clustering.cluster(0)
|
8
|
+
p clustering.cluster(1)
|
9
|
+
p clustering.predict(Vector[63,190])
|
@@ -0,0 +1,145 @@
|
|
1
|
+
0,24,1
|
2
|
+
0.1,23.5,1
|
3
|
+
0.2,23.4,1
|
4
|
+
0.3,23.3,1
|
5
|
+
0.4,23.2,1
|
6
|
+
0.5,23.1,1
|
7
|
+
1,23,1
|
8
|
+
1.1,23.5,1
|
9
|
+
1.2,23.4,1
|
10
|
+
1.3,23.3,1
|
11
|
+
1.4,23.2,1
|
12
|
+
1.5,23.1,1
|
13
|
+
2,22,1
|
14
|
+
2.1,22.5,1
|
15
|
+
2.2,22.4,1
|
16
|
+
2.3,22.3,1
|
17
|
+
2.4,22.2,1
|
18
|
+
2.5,22.1,1
|
19
|
+
3,21,1
|
20
|
+
3.1,21.5,1
|
21
|
+
3.2,21.4,1
|
22
|
+
3.3,21.3,1
|
23
|
+
3.4,21.2,1
|
24
|
+
3.5,21.1,1
|
25
|
+
4,20,1
|
26
|
+
4.1,20.5,1
|
27
|
+
4.2,20.4,1
|
28
|
+
4.3,20.3,1
|
29
|
+
4.4,20.2,1
|
30
|
+
4.5,20.1,1
|
31
|
+
5,19,1
|
32
|
+
5.1,19.5,1
|
33
|
+
5.2,19.4,1
|
34
|
+
5.3,19.3,1
|
35
|
+
5.4,19.2,1
|
36
|
+
5.5,19.1,1
|
37
|
+
6,18,1
|
38
|
+
6.1,18.5,1
|
39
|
+
6.2,18.4,1
|
40
|
+
6.3,18.3,1
|
41
|
+
6.4,18.2,1
|
42
|
+
6.5,18.1,1
|
43
|
+
7,17,1
|
44
|
+
7.1,17.5,1
|
45
|
+
7.2,17.4,1
|
46
|
+
7.3,17.3,1
|
47
|
+
7.4,17.2,1
|
48
|
+
7.5,17.1,1
|
49
|
+
8,16,0
|
50
|
+
8.1,16.5,0
|
51
|
+
8.2,16.4,0
|
52
|
+
8.3,16.3,0
|
53
|
+
8.4,16.2,0
|
54
|
+
8.5,16.1,0
|
55
|
+
9,15,0
|
56
|
+
9.1,15.5,0
|
57
|
+
9.2,15.4,0
|
58
|
+
9.3,15.3,0
|
59
|
+
9.4,15.2,0
|
60
|
+
9.5,15.1,0
|
61
|
+
10,14,0
|
62
|
+
10.1,14.5,0
|
63
|
+
10.2,14.4,0
|
64
|
+
10.3,14.3,0
|
65
|
+
10.4,14.2,0
|
66
|
+
10.5,14.1,0
|
67
|
+
11,13,0
|
68
|
+
11.1,13.5,0
|
69
|
+
11.2,13.4,0
|
70
|
+
11.3,13.3,0
|
71
|
+
11.4,13.2,0
|
72
|
+
11.5,13.1,0
|
73
|
+
12,12,0
|
74
|
+
12.1,12.5,0
|
75
|
+
12.2,12.4,0
|
76
|
+
12.3,12.3,0
|
77
|
+
12.4,12.2,0
|
78
|
+
12.5,12.1,0
|
79
|
+
13,11,0
|
80
|
+
13.1,11.5,0
|
81
|
+
13.2,11.4,0
|
82
|
+
13.3,11.3,0
|
83
|
+
13.4,11.2,0
|
84
|
+
13.5,11.1,0
|
85
|
+
14,10,0
|
86
|
+
14.1,10.5,0
|
87
|
+
14.2,10.4,0
|
88
|
+
14.3,10.3,0
|
89
|
+
14.4,10.2,0
|
90
|
+
14.5,10.1,0
|
91
|
+
15,9,0
|
92
|
+
15.1,9.5,0
|
93
|
+
15.2,9.4,0
|
94
|
+
15.3,9.3,0
|
95
|
+
15.4,9.2,0
|
96
|
+
15.5,9.1,0
|
97
|
+
16,8,0
|
98
|
+
16.1,8.5,0
|
99
|
+
16.2,8.4,0
|
100
|
+
16.3,8.3,0
|
101
|
+
16.4,8.2,0
|
102
|
+
16.5,8.1,0
|
103
|
+
17,7,0
|
104
|
+
17.1,7.5,0
|
105
|
+
17.2,7.4,0
|
106
|
+
17.3,7.3,0
|
107
|
+
17.4,7.2,0
|
108
|
+
17.5,7.1,0
|
109
|
+
18,6,0
|
110
|
+
18.1,6.5,0
|
111
|
+
18.2,6.4,0
|
112
|
+
18.3,6.3,0
|
113
|
+
18.4,6.2,0
|
114
|
+
18.5,6.1,0
|
115
|
+
19,5,0
|
116
|
+
19.1,5.5,0
|
117
|
+
19.2,5.4,0
|
118
|
+
19.3,5.3,0
|
119
|
+
19.4,5.2,0
|
120
|
+
19.5,5.1,0
|
121
|
+
20,4,0
|
122
|
+
20.1,4.5,0
|
123
|
+
20.2,4.4,0
|
124
|
+
20.3,4.3,0
|
125
|
+
20.4,4.2,0
|
126
|
+
20.5,4.1,0
|
127
|
+
21,3,0
|
128
|
+
21.1,3.5,0
|
129
|
+
21.2,3.4,0
|
130
|
+
21.3,3.3,0
|
131
|
+
21.4,3.2,0
|
132
|
+
21.5,3.1,0
|
133
|
+
22,2,0
|
134
|
+
22.1,2.5,0
|
135
|
+
22.2,2.4,0
|
136
|
+
22.3,2.3,0
|
137
|
+
22.4,2.2,0
|
138
|
+
22.5,2.1,0
|
139
|
+
23,1,0
|
140
|
+
23.1,1.5,0
|
141
|
+
23.2,1.4,0
|
142
|
+
23.3,1.3,0
|
143
|
+
23.4,1.2,0
|
144
|
+
23.5,1.1,0
|
145
|
+
24,0,0
|