aprendizaje_maquina 0.1.3a → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +28 -50
- data/aprendizaje_maquina.gemspec +1 -0
- data/lib/aprendizaje_maquina.rb +0 -1
- data/lib/aprendizaje_maquina/cargar.rb +43 -42
- data/lib/aprendizaje_maquina/clasificacion_logistica.rb +105 -103
- data/lib/aprendizaje_maquina/matrixx.rb +3 -3
- data/lib/aprendizaje_maquina/regresion_lineal.rb +6 -4
- data/lib/aprendizaje_maquina/version.rb +1 -1
- metadata +6 -13
- data/examples/clasificacion_logistica_example.rb +0 -16
- data/examples/clustering_data.csv +0 -177
- data/examples/clustering_example.rb +0 -9
- data/examples/data_lg.csv +0 -145
- data/examples/regresion_lineal_example.rb +0 -11
- data/examples/train.csv +0 -700
- data/lib/aprendizaje_maquina/clustering.rb +0 -96
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 83991bcb2253d1f7a974363bb8bfd060436d6943
|
4
|
+
data.tar.gz: ae8327ba4923164cff4839189294ac57732fd0a6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 9b764a65f5b62d7a43de1ac1f1ac9eec9ead35d5046171ff16d988c7747d2d348f12eba26ff342845ea95a8d2dd7252cd1444b5186a0abe55a66433721e9601b
|
7
|
+
data.tar.gz: ef4ba5e1cd33969fe0bcdb21e76de1d7e0dee9f02b13303b65b9180a9a6dbd473fa94d4bf79a10ea88e621f2eb99c88a93119f3fa9efb0187d7e6e0650f7b560
|
data/README.md
CHANGED
@@ -18,7 +18,7 @@ Or install it yourself as:
|
|
18
18
|
|
19
19
|
## Usage
|
20
20
|
|
21
|
-
|
21
|
+
for make predictions with the linear regression model
|
22
22
|
|
23
23
|
first
|
24
24
|
|
@@ -27,17 +27,11 @@ first
|
|
27
27
|
load data from a CSV file
|
28
28
|
|
29
29
|
load = AprendizajeMaquina::Cargar.new("file.csv")
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
# this put all the data of the csv file in a matrix
|
36
|
-
matrix = load.to_matrix
|
37
|
-
|
38
|
-
# create a matrix with the data in the column 0 of the csv file
|
39
|
-
x = load.to_matrix(0) # you can specify range like this load.to_matrix(0..4)
|
40
|
-
|
30
|
+
y = load.to_vector(3) # specify the column that you want to store on a vector
|
31
|
+
matrix = load.to_matrix # this put all the data of the csv file in a matrix
|
32
|
+
# if you don't specify the column or range of columns
|
33
|
+
x = load.to_matrix(0) # create a matrix with the data in the column 0 of the csv file
|
34
|
+
# you can specify range like this load.to_matrix(0..4)
|
41
35
|
x_with_ones = x.add_ones # this add a column of ones to the matrix
|
42
36
|
|
43
37
|
to normalize data
|
@@ -47,11 +41,10 @@ to normalize data
|
|
47
41
|
create an instance of the class RegresionLineal
|
48
42
|
|
49
43
|
regresion_lineal = AprendizajeMaquina::RegresionLineal.new(x_matrix,y_vector)
|
50
|
-
regresion_lineal.find_ecuation
|
51
|
-
|
44
|
+
regresion_lineal.find_ecuation # (or use the alias :train) find the theta values => Vector[114.50684133915638, 0.8310043668122375]
|
52
45
|
m = Matrix[[1,95]]
|
53
|
-
|
54
|
-
|
46
|
+
p regresion_lineal.make_prediction(m) # (or use the alias :predict) to make predictions
|
47
|
+
# => Vector[193.45225618631895]
|
55
48
|
|
56
49
|
linear regresion with arrays
|
57
50
|
|
@@ -63,7 +56,7 @@ linear regresion with arrays
|
|
63
56
|
p regresion_simple.ecuacion
|
64
57
|
p regresion_simple.predict(95)
|
65
58
|
|
66
|
-
|
59
|
+
Logistic Classification
|
67
60
|
|
68
61
|
data = AprendizajeMaquina::Cargar.new("data.csv")
|
69
62
|
|
@@ -75,65 +68,50 @@ linear regresion with arrays
|
|
75
68
|
|
76
69
|
training
|
77
70
|
|
78
|
-
the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
|
71
|
+
the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
|
79
72
|
|
80
73
|
example 1:
|
81
|
-
|
74
|
+
cl.train(12,0.01,'SGD')
|
82
75
|
example 2:
|
83
|
-
|
76
|
+
cl.train(10,'NewM') # Newton's method dont use alpha
|
84
77
|
example 3:
|
85
|
-
|
78
|
+
cl.train(400,0.001,'Grad')
|
86
79
|
|
87
80
|
predictions
|
88
81
|
|
89
82
|
if cl.predict(Matrix[[1,24,0]]) == 1
|
90
|
-
|
83
|
+
p "CANSADO"
|
91
84
|
else
|
92
|
-
|
85
|
+
p "DESCANSADO"
|
93
86
|
end
|
94
87
|
|
95
88
|
make predictions for multiclass(one vs all)
|
96
89
|
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
Vector[-7.220460,0.256681,1.141166]
|
101
|
-
]
|
90
|
+
clases = [Vector[-38.98494868465186, 3.133704064187691,-1.0058753929521247],
|
91
|
+
Vector[40.93814883472139,-3.2195737672278586, -0.8080682715294277],
|
92
|
+
Vector[-7.220460,0.256681,1.141166]]
|
102
93
|
|
103
94
|
predicted_val = []
|
104
95
|
|
105
|
-
|
106
|
-
|
107
|
-
|
96
|
+
clases.each do |e|
|
97
|
+
multiclass = AprendizajeMaquina::ClasificacionLogistica.new(x,y,e)
|
98
|
+
predicted_val << multiclass.predict(Matrix[[1,13.5,1.83]])
|
108
99
|
end
|
109
100
|
|
110
101
|
if predicted_val[0] == 1
|
111
|
-
|
102
|
+
puts "Vino Tinto"
|
112
103
|
elsif predicted_val[1] == 1
|
113
|
-
|
104
|
+
puts "Vino Rosado"
|
114
105
|
elsif predicted_val[2] == 1
|
115
|
-
|
106
|
+
puts "Vino Blanco"
|
116
107
|
else
|
117
|
-
|
108
|
+
puts predicted_val
|
118
109
|
end
|
119
110
|
|
120
|
-
## Clustering
|
121
|
-
|
122
|
-
load_data = AprendizajeMaquina::Cargar.new('clustering_data.csv')
|
123
|
-
dataset = load_data.to_matrix
|
124
|
-
|
125
|
-
# initialize with 2 cluster centroids
|
126
|
-
clustering = AprendizajeMaquina::KmeansClustering.new(2,dataset)
|
127
|
-
|
128
|
-
# fit the model with 20 iterations
|
129
|
-
clustering.fit(20)
|
130
111
|
|
131
|
-
|
132
|
-
p clustering.cluster(0)
|
133
|
-
p clustering.cluster(1)
|
112
|
+
## Contributing
|
134
113
|
|
135
|
-
|
136
|
-
p clustering.predict(Vector[63,190])
|
114
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/TheNoskOneVzla/aprendizaje_maquina.
|
137
115
|
|
138
116
|
## License
|
139
117
|
|
data/aprendizaje_maquina.gemspec
CHANGED
@@ -12,6 +12,7 @@ Gem::Specification.new do |spec|
|
|
12
12
|
|
13
13
|
spec.summary = "Machine learning gem / Una gema para el aprendizaje de maquinas."
|
14
14
|
spec.description = "This is a gem to help ruby developers to write machine learning algorithms easier and faster / Esta es una gema para ayudar a los desarrolladores de ruby a escribir algoritmos de aprendizaje automático más fácil y rápido."
|
15
|
+
spec.homepage = ""
|
15
16
|
spec.license = "MIT"
|
16
17
|
|
17
18
|
# Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
|
data/lib/aprendizaje_maquina.rb
CHANGED
@@ -3,7 +3,6 @@ require "aprendizaje_maquina/cargar"
|
|
3
3
|
require "aprendizaje_maquina/regresion_lineal"
|
4
4
|
require "aprendizaje_maquina/matrixx"
|
5
5
|
require "aprendizaje_maquina/clasificacion_logistica"
|
6
|
-
require "aprendizaje_maquina/clustering"
|
7
6
|
|
8
7
|
module AprendizajeMaquina
|
9
8
|
#class RedNeuronal
|
@@ -1,48 +1,49 @@
|
|
1
1
|
require 'csv'
|
2
2
|
require 'matrix'
|
3
|
-
|
4
3
|
module AprendizajeMaquina
|
5
|
-
class Cargar
|
6
|
-
def initialize(path_file)
|
7
|
-
@path_file = path_file
|
8
|
-
@csv_data = CSV.read(@path_file)
|
9
|
-
@largo_colum = @csv_data[0].length
|
10
|
-
end
|
11
4
|
|
12
|
-
|
13
|
-
if columnas == nil
|
14
|
-
array = @csv_data.map{ |e| e.map{ |o| o.include?(".") ? o.to_f : o.to_i } }
|
15
|
-
matrix = Matrix.rows(array,copy=true)
|
16
|
-
matrix
|
17
|
-
elsif columnas.is_a?(Range)
|
18
|
-
if columnas.last >= @largo_colum
|
19
|
-
raise ArgumentError, "Number of columns don't exist"
|
20
|
-
else
|
21
|
-
array = @csv_data.map{ |e| e[columnas].map{ |i| i.include?(".") ? i.to_f : i.to_i} }
|
22
|
-
matrix = Matrix.rows(array,copy=true)
|
23
|
-
matrix
|
24
|
-
end
|
25
|
-
elsif columnas.is_a?(Integer)
|
26
|
-
if columnas >= @largo_colum
|
27
|
-
raise ArgumentError, "Number of columns don't exist"
|
28
|
-
else
|
29
|
-
array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
|
30
|
-
matrix = Matrix[array].transpose
|
31
|
-
matrix
|
32
|
-
end
|
33
|
-
else
|
34
|
-
raise ArgumentError, "Must be nil, range or integer"
|
35
|
-
end
|
36
|
-
end
|
5
|
+
class Cargar
|
37
6
|
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
7
|
+
def initialize(path_file)
|
8
|
+
@path_file = path_file
|
9
|
+
@csv_data = CSV.read(@path_file)
|
10
|
+
@largo_colum = @csv_data[0].length
|
11
|
+
end
|
12
|
+
|
13
|
+
def to_matrix(columnas = nil)
|
14
|
+
if columnas == nil
|
15
|
+
array = @csv_data.map{|e| e.map{|o| o.include?(".") ? o.to_f : o.to_i } }
|
16
|
+
matrix = Matrix.rows(array,copy=true)
|
17
|
+
matrix
|
18
|
+
elsif columnas.is_a?(Range)
|
19
|
+
if columnas.last >= @largo_colum
|
20
|
+
raise ArgumentError, "Number of columns don't exist"
|
21
|
+
else
|
22
|
+
array = @csv_data.map{|e| e[columnas].map{|i| i.include?(".") ? i.to_f : i.to_i} }
|
23
|
+
matrix = Matrix.rows(array,copy=true)
|
24
|
+
matrix
|
25
|
+
end
|
26
|
+
elsif columnas.is_a?(Integer)
|
27
|
+
if columnas >= @largo_colum
|
28
|
+
raise ArgumentError, "Number of columns don't exist"
|
29
|
+
else
|
30
|
+
array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
|
31
|
+
matrix = Matrix[array].transpose
|
32
|
+
matrix
|
33
|
+
end
|
34
|
+
else
|
35
|
+
raise ArgumentError, "Must be nil, range or integer"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def to_vector(columna)
|
40
|
+
if columna >= @largo_colum
|
41
|
+
raise ArgumentError, "Column don't exist"
|
42
|
+
else
|
43
|
+
array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
|
44
|
+
vector = Vector.elements(array,copy=true)
|
45
|
+
vector
|
46
|
+
end
|
47
|
+
end
|
48
|
+
end
|
48
49
|
end
|
@@ -1,104 +1,106 @@
|
|
1
|
-
module AprendizajeMaquina
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
1
|
+
module AprendizajeMaquina
|
2
|
+
|
3
|
+
class ClasificacionLogistica
|
4
|
+
|
5
|
+
def initialize(x,y,theta)
|
6
|
+
@x = x
|
7
|
+
@y = y
|
8
|
+
@theta = theta
|
9
|
+
if y.is_a? Matrix
|
10
|
+
@m = y.row_count
|
11
|
+
elsif y.is_a? Vector
|
12
|
+
@m = y.size
|
13
|
+
else
|
14
|
+
@m = y.length
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
def train(iterations,alpha = nil,type_of_train)
|
19
|
+
case type_of_train
|
20
|
+
when 'Grad' then
|
21
|
+
# gradiente de descenso
|
22
|
+
@cost_history = []
|
23
|
+
for i in 0..iterations
|
24
|
+
x = @x*@theta
|
25
|
+
hx = x.map {|e| sigmoid(e) }
|
26
|
+
@theta = @theta - alpha / @m * @x.transpose * (hx - @y)
|
27
|
+
costo = 0
|
28
|
+
cost.to_a.map{|e| costo = e }
|
29
|
+
@cost_history << ["iteracion: #{i}",costo]
|
30
|
+
end
|
31
|
+
@cost_history
|
32
|
+
p "theta values => #{@theta} | cost => #{costo}"
|
33
|
+
when 'Newm' then
|
34
|
+
# metodo de newton
|
35
|
+
@cost_history = []
|
36
|
+
for i in 0..iterations
|
37
|
+
x = @x*@theta
|
38
|
+
hx = x.map {|e| sigmoid(e) }
|
39
|
+
uno_menos_hx = hx.map{|e| (1-e) }
|
40
|
+
escalar = []
|
41
|
+
for u in 0...hx.size
|
42
|
+
escalar << hx[u] * uno_menos_hx[u]
|
43
|
+
end
|
44
|
+
gradiente = (1.0/@m) * @x.transpose * (hx - @y)
|
45
|
+
hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
|
46
|
+
inversa = (1.0/hessian.det)*(hessian.adjugate)
|
47
|
+
@theta = @theta - inversa * gradiente
|
48
|
+
costo = 0
|
49
|
+
cost.to_a.map{|e| costo = e }
|
50
|
+
@cost_history << ["iteracion: #{i}",costo]
|
51
|
+
end
|
52
|
+
@cost_history
|
53
|
+
p "theta values => #{@theta} | cost => #{costo}"
|
54
|
+
when 'SGD' then
|
55
|
+
# Stochastic Gradient Descent
|
56
|
+
@cost_history = []
|
57
|
+
for i in 0..iterations
|
58
|
+
for i in 0..i
|
59
|
+
x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
|
60
|
+
hx = x.map {|e| sigmoid(e) }
|
61
|
+
@theta = @theta - alpha / @m * @x.transpose * (hx - @y)
|
62
|
+
costo = 0
|
63
|
+
cost.to_a.map{|e| costo = e }
|
64
|
+
@cost_history << ["iteracion: #{i}",costo]
|
65
|
+
end
|
66
|
+
end
|
67
|
+
@cost_history
|
68
|
+
p "theta values => #{@theta} | cost => #{costo}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def predict(x)
|
73
|
+
hipo = x * @theta
|
74
|
+
var = 0
|
75
|
+
hipo.map {|x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
|
76
|
+
if sigmoid(var) >= 0.5
|
77
|
+
1
|
78
|
+
else
|
79
|
+
0
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
def sumatoria(array)
|
86
|
+
array.inject(0) { |elem1, elem2| elem1 + elem2 }
|
87
|
+
end
|
88
|
+
|
89
|
+
def sigmoid(x)
|
90
|
+
1.0 / (1.0 + Math.exp(-x))
|
91
|
+
end
|
92
|
+
|
93
|
+
def cost
|
94
|
+
x = @x*@theta
|
95
|
+
hx = x.map {|e| sigmoid(e) }
|
96
|
+
log_hx = hx.map{|e| Math.log(e) }
|
97
|
+
log_uno_menos_hx = hx.map{|e| Math.log(1-e) }
|
98
|
+
costo = -1.0/@m * ( Matrix[@y.to_a] * log_hx + ( Matrix[@y.to_a].map{|i| 1 - i } ) * log_uno_menos_hx )
|
99
|
+
costo
|
100
|
+
end
|
101
|
+
|
102
|
+
def matrix(columns)
|
103
|
+
Matrix.rows(columns, false)
|
104
|
+
end
|
105
|
+
end
|
104
106
|
end
|