aprendizaje_maquina 0.1.3a → 0.1.3

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 14f57cb6236d901eaef534c5ec123f3d002eae54
4
- data.tar.gz: 5ace7512b5596622c67a940225d0d59209e21d2e
3
+ metadata.gz: 83991bcb2253d1f7a974363bb8bfd060436d6943
4
+ data.tar.gz: ae8327ba4923164cff4839189294ac57732fd0a6
5
5
  SHA512:
6
- metadata.gz: c76b07f5bc6f93056138f77cace472344090a6476ecc9f5ec977caa5327f6cd1b7bf99e1bf1841051baba9f6a9e18b5f5239c146bbd59f0bca760ef14e6baf7a
7
- data.tar.gz: d84bd39aa0916ae0b5125819bc2a0d64ad922a4a59d5430d8ed6a46e03a1b04cb1b9173a4654d95964eb4fef77baf557326778341a0030466166373f3e27ab4d
6
+ metadata.gz: 9b764a65f5b62d7a43de1ac1f1ac9eec9ead35d5046171ff16d988c7747d2d348f12eba26ff342845ea95a8d2dd7252cd1444b5186a0abe55a66433721e9601b
7
+ data.tar.gz: ef4ba5e1cd33969fe0bcdb21e76de1d7e0dee9f02b13303b65b9180a9a6dbd473fa94d4bf79a10ea88e621f2eb99c88a93119f3fa9efb0187d7e6e0650f7b560
data/README.md CHANGED
@@ -18,7 +18,7 @@ Or install it yourself as:
18
18
 
19
19
  ## Usage
20
20
 
21
- ## linear regression model
21
+ for make predictions with the linear regression model
22
22
 
23
23
  first
24
24
 
@@ -27,17 +27,11 @@ first
27
27
  load data from a CSV file
28
28
 
29
29
  load = AprendizajeMaquina::Cargar.new("file.csv")
30
-
31
- # specify the column that you want to store on a vector
32
- y = load.to_vector(3)
33
-
34
- # if you don't specify the column or range of columns
35
- # this put all the data of the csv file in a matrix
36
- matrix = load.to_matrix
37
-
38
- # create a matrix with the data in the column 0 of the csv file
39
- x = load.to_matrix(0) # you can specify range like this load.to_matrix(0..4)
40
-
30
+ y = load.to_vector(3) # specify the column that you want to store on a vector
31
+ matrix = load.to_matrix # this put all the data of the csv file in a matrix
32
+ # if you don't specify the column or range of columns
33
+ x = load.to_matrix(0) # create a matrix with the data in the column 0 of the csv file
34
+ # you can specify range like this load.to_matrix(0..4)
41
35
  x_with_ones = x.add_ones # this add a column of ones to the matrix
42
36
 
43
37
  to normalize data
@@ -47,11 +41,10 @@ to normalize data
47
41
  create an instance of the class RegresionLineal
48
42
 
49
43
  regresion_lineal = AprendizajeMaquina::RegresionLineal.new(x_matrix,y_vector)
50
- regresion_lineal.find_ecuation # (or use the alias :train) return a Vector
51
-
44
+ regresion_lineal.find_ecuation # (or use the alias :train) find the theta values => Vector[114.50684133915638, 0.8310043668122375]
52
45
  m = Matrix[[1,95]]
53
- puts regresion_lineal.make_prediction(m) # (or use the alias :predict) to make predictions
54
- # => Vector[193.45225618631895]
46
+ p regresion_lineal.make_prediction(m) # (or use the alias :predict) to make predictions
47
+ # => Vector[193.45225618631895]
55
48
 
56
49
  linear regresion with arrays
57
50
 
@@ -63,7 +56,7 @@ linear regresion with arrays
63
56
  p regresion_simple.ecuacion
64
57
  p regresion_simple.predict(95)
65
58
 
66
- ## Logistic Classification
59
+ Logistic Classification
67
60
 
68
61
  data = AprendizajeMaquina::Cargar.new("data.csv")
69
62
 
@@ -75,65 +68,50 @@ linear regresion with arrays
75
68
 
76
69
  training
77
70
 
78
- the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
71
+ the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
79
72
 
80
73
  example 1:
81
- cl.train(12,0.01,'SGD')
74
+ cl.train(12,0.01,'SGD')
82
75
  example 2:
83
- cl.train(10,'NewM') # Newton's method dont use alpha
76
+ cl.train(10,'NewM') # Newton's method dont use alpha
84
77
  example 3:
85
- cl.train(400,0.001,'Grad')
78
+ cl.train(400,0.001,'Grad')
86
79
 
87
80
  predictions
88
81
 
89
82
  if cl.predict(Matrix[[1,24,0]]) == 1
90
- p "CANSADO"
83
+ p "CANSADO"
91
84
  else
92
- p "DESCANSADO"
85
+ p "DESCANSADO"
93
86
  end
94
87
 
95
88
  make predictions for multiclass(one vs all)
96
89
 
97
- initial_theta_for_each_class = [
98
- Vector[-38.98494868465186, 3.133704064187691,-1.0058753929521247],
99
- Vector[40.93814883472139,-3.2195737672278586, -0.8080682715294277],
100
- Vector[-7.220460,0.256681,1.141166]
101
- ]
90
+ clases = [Vector[-38.98494868465186, 3.133704064187691,-1.0058753929521247],
91
+ Vector[40.93814883472139,-3.2195737672278586, -0.8080682715294277],
92
+ Vector[-7.220460,0.256681,1.141166]]
102
93
 
103
94
  predicted_val = []
104
95
 
105
- initial_theta_for_each_class.each do |e|
106
- multiclass = AprendizajeMaquina::ClasificacionLogistica.new(x,y,e)
107
- predicted_val << multiclass.predict(Matrix[[1,13.5,1.83]])
96
+ clases.each do |e|
97
+ multiclass = AprendizajeMaquina::ClasificacionLogistica.new(x,y,e)
98
+ predicted_val << multiclass.predict(Matrix[[1,13.5,1.83]])
108
99
  end
109
100
 
110
101
  if predicted_val[0] == 1
111
- puts "Vino Tinto"
102
+ puts "Vino Tinto"
112
103
  elsif predicted_val[1] == 1
113
- puts "Vino Rosado"
104
+ puts "Vino Rosado"
114
105
  elsif predicted_val[2] == 1
115
- puts "Vino Blanco"
106
+ puts "Vino Blanco"
116
107
  else
117
- puts predicted_val
108
+ puts predicted_val
118
109
  end
119
110
 
120
- ## Clustering
121
-
122
- load_data = AprendizajeMaquina::Cargar.new('clustering_data.csv')
123
- dataset = load_data.to_matrix
124
-
125
- # initialize with 2 cluster centroids
126
- clustering = AprendizajeMaquina::KmeansClustering.new(2,dataset)
127
-
128
- # fit the model with 20 iterations
129
- clustering.fit(20)
130
111
 
131
- # watch the values in their respective cluster
132
- p clustering.cluster(0)
133
- p clustering.cluster(1)
112
+ ## Contributing
134
113
 
135
- # Predict the closest cluster
136
- p clustering.predict(Vector[63,190])
114
+ Bug reports and pull requests are welcome on GitHub at https://github.com/TheNoskOneVzla/aprendizaje_maquina.
137
115
 
138
116
  ## License
139
117
 
@@ -12,6 +12,7 @@ Gem::Specification.new do |spec|
12
12
 
13
13
  spec.summary = "Machine learning gem / Una gema para el aprendizaje de maquinas."
14
14
  spec.description = "This is a gem to help ruby developers to write machine learning algorithms easier and faster / Esta es una gema para ayudar a los desarrolladores de ruby a escribir algoritmos de aprendizaje automático más fácil y rápido."
15
+ spec.homepage = ""
15
16
  spec.license = "MIT"
16
17
 
17
18
  # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'
@@ -3,7 +3,6 @@ require "aprendizaje_maquina/cargar"
3
3
  require "aprendizaje_maquina/regresion_lineal"
4
4
  require "aprendizaje_maquina/matrixx"
5
5
  require "aprendizaje_maquina/clasificacion_logistica"
6
- require "aprendizaje_maquina/clustering"
7
6
 
8
7
  module AprendizajeMaquina
9
8
  #class RedNeuronal
@@ -1,48 +1,49 @@
1
1
  require 'csv'
2
2
  require 'matrix'
3
-
4
3
  module AprendizajeMaquina
5
- class Cargar
6
- def initialize(path_file)
7
- @path_file = path_file
8
- @csv_data = CSV.read(@path_file)
9
- @largo_colum = @csv_data[0].length
10
- end
11
4
 
12
- def to_matrix(columnas = nil)
13
- if columnas == nil
14
- array = @csv_data.map{ |e| e.map{ |o| o.include?(".") ? o.to_f : o.to_i } }
15
- matrix = Matrix.rows(array,copy=true)
16
- matrix
17
- elsif columnas.is_a?(Range)
18
- if columnas.last >= @largo_colum
19
- raise ArgumentError, "Number of columns don't exist"
20
- else
21
- array = @csv_data.map{ |e| e[columnas].map{ |i| i.include?(".") ? i.to_f : i.to_i} }
22
- matrix = Matrix.rows(array,copy=true)
23
- matrix
24
- end
25
- elsif columnas.is_a?(Integer)
26
- if columnas >= @largo_colum
27
- raise ArgumentError, "Number of columns don't exist"
28
- else
29
- array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
30
- matrix = Matrix[array].transpose
31
- matrix
32
- end
33
- else
34
- raise ArgumentError, "Must be nil, range or integer"
35
- end
36
- end
5
+ class Cargar
37
6
 
38
- def to_vector(columna)
39
- if columna >= @largo_colum
40
- raise ArgumentError, "Column don't exist"
41
- else
42
- array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
43
- vector = Vector.elements(array,copy = true)
44
- vector
45
- end
46
- end
47
- end
7
+ def initialize(path_file)
8
+ @path_file = path_file
9
+ @csv_data = CSV.read(@path_file)
10
+ @largo_colum = @csv_data[0].length
11
+ end
12
+
13
+ def to_matrix(columnas = nil)
14
+ if columnas == nil
15
+ array = @csv_data.map{|e| e.map{|o| o.include?(".") ? o.to_f : o.to_i } }
16
+ matrix = Matrix.rows(array,copy=true)
17
+ matrix
18
+ elsif columnas.is_a?(Range)
19
+ if columnas.last >= @largo_colum
20
+ raise ArgumentError, "Number of columns don't exist"
21
+ else
22
+ array = @csv_data.map{|e| e[columnas].map{|i| i.include?(".") ? i.to_f : i.to_i} }
23
+ matrix = Matrix.rows(array,copy=true)
24
+ matrix
25
+ end
26
+ elsif columnas.is_a?(Integer)
27
+ if columnas >= @largo_colum
28
+ raise ArgumentError, "Number of columns don't exist"
29
+ else
30
+ array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
31
+ matrix = Matrix[array].transpose
32
+ matrix
33
+ end
34
+ else
35
+ raise ArgumentError, "Must be nil, range or integer"
36
+ end
37
+ end
38
+
39
+ def to_vector(columna)
40
+ if columna >= @largo_colum
41
+ raise ArgumentError, "Column don't exist"
42
+ else
43
+ array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
44
+ vector = Vector.elements(array,copy=true)
45
+ vector
46
+ end
47
+ end
48
+ end
48
49
  end
@@ -1,104 +1,106 @@
1
- module AprendizajeMaquina
2
- class ClasificacionLogistica
3
- def initialize(x,y,theta)
4
- @x = x
5
- @y = y
6
- @theta = theta
7
- if y.is_a? Matrix
8
- @m = y.row_count
9
- elsif y.is_a? Vector
10
- @m = y.size
11
- else
12
- @m = y.length
13
- end
14
- end
15
-
16
- def train(iterations,alpha = nil,type_of_train)
17
- case type_of_train
18
- when 'Grad' then
19
- # gradiente de descenso
20
- @cost_history = []
21
- for i in 0..iterations
22
- x = @x * @theta
23
- hx = x.map { |e| sigmoid(e) }
24
- @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
25
- costo = 0
26
- cost.to_a.map{ |e| costo = e }
27
- @cost_history << ["iteracion: #{i}",costo]
28
- end
29
- @cost_history
30
- "theta values => #{@theta} | cost => #{costo}"
31
- when 'Newm' then
32
- # metodo de newton
33
- @cost_history = []
34
- for i in 0..iterations
35
- x = @x * @theta
36
- hx = x.map { |e| sigmoid(e) }
37
- uno_menos_hx = hx.map{ |e| (1-e) }
38
- escalar = []
39
- for u in 0...hx.size
40
- escalar << hx[u] * uno_menos_hx[u]
41
- end
42
- gradiente = (1.0/@m) * @x.transpose * (hx - @y)
43
- hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
44
- inversa = (1.0/hessian.det) * (hessian.adjugate)
45
- @theta = @theta - inversa * gradiente
46
- costo = 0
47
- cost.to_a.map{ |e| costo = e }
48
- @cost_history << ["iteracion: #{i}",costo]
49
- end
50
- @cost_history
51
- "theta values => #{@theta} | cost => #{costo}"
52
- when 'SGD' then
53
- # Stochastic Gradient Descent
54
- @cost_history = []
55
- for i in 0..iterations
56
- for i in 0..i
57
- x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
58
- hx = x.map {|e| sigmoid(e) }
59
- @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
60
- costo = 0
61
- cost.to_a.map{|e| costo = e }
62
- @cost_history << ["iteracion: #{i}",costo]
63
- end
64
- end
65
- @cost_history
66
- "theta values => #{@theta} | cost => #{costo}"
67
- end
68
- end
69
-
70
- def predict(x)
71
- hipo = x * @theta
72
- var = 0
73
- hipo.map { |x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
74
- if sigmoid(var) >= 0.5
75
- 1
76
- else
77
- 0
78
- end
79
- end
80
-
81
- private
82
-
83
- def sumatoria(array)
84
- array.inject(0) { |elem1, elem2| elem1 + elem2 }
85
- end
86
-
87
- def sigmoid(x)
88
- 1.0 / (1.0 + Math.exp(-x))
89
- end
90
-
91
- def cost
92
- x = @x*@theta
93
- hx = x.map { |e| sigmoid(e) }
94
- log_hx = hx.map{ |e| Math.log(e) }
95
- log_uno_menos_hx = hx.map{ |e| Math.log(1 - e) }
96
- costo = -1.0/@m * (Matrix[@y.to_a] * log_hx + (Matrix[@y.to_a].map{ |i| 1 - i }) * log_uno_menos_hx)
97
- costo
98
- end
99
-
100
- def matrix(columns)
101
- Matrix.rows(columns, false)
102
- end
103
- end
1
+ module AprendizajeMaquina
2
+
3
+ class ClasificacionLogistica
4
+
5
+ def initialize(x,y,theta)
6
+ @x = x
7
+ @y = y
8
+ @theta = theta
9
+ if y.is_a? Matrix
10
+ @m = y.row_count
11
+ elsif y.is_a? Vector
12
+ @m = y.size
13
+ else
14
+ @m = y.length
15
+ end
16
+ end
17
+
18
+ def train(iterations,alpha = nil,type_of_train)
19
+ case type_of_train
20
+ when 'Grad' then
21
+ # gradiente de descenso
22
+ @cost_history = []
23
+ for i in 0..iterations
24
+ x = @x*@theta
25
+ hx = x.map {|e| sigmoid(e) }
26
+ @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
27
+ costo = 0
28
+ cost.to_a.map{|e| costo = e }
29
+ @cost_history << ["iteracion: #{i}",costo]
30
+ end
31
+ @cost_history
32
+ p "theta values => #{@theta} | cost => #{costo}"
33
+ when 'Newm' then
34
+ # metodo de newton
35
+ @cost_history = []
36
+ for i in 0..iterations
37
+ x = @x*@theta
38
+ hx = x.map {|e| sigmoid(e) }
39
+ uno_menos_hx = hx.map{|e| (1-e) }
40
+ escalar = []
41
+ for u in 0...hx.size
42
+ escalar << hx[u] * uno_menos_hx[u]
43
+ end
44
+ gradiente = (1.0/@m) * @x.transpose * (hx - @y)
45
+ hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
46
+ inversa = (1.0/hessian.det)*(hessian.adjugate)
47
+ @theta = @theta - inversa * gradiente
48
+ costo = 0
49
+ cost.to_a.map{|e| costo = e }
50
+ @cost_history << ["iteracion: #{i}",costo]
51
+ end
52
+ @cost_history
53
+ p "theta values => #{@theta} | cost => #{costo}"
54
+ when 'SGD' then
55
+ # Stochastic Gradient Descent
56
+ @cost_history = []
57
+ for i in 0..iterations
58
+ for i in 0..i
59
+ x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
60
+ hx = x.map {|e| sigmoid(e) }
61
+ @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
62
+ costo = 0
63
+ cost.to_a.map{|e| costo = e }
64
+ @cost_history << ["iteracion: #{i}",costo]
65
+ end
66
+ end
67
+ @cost_history
68
+ p "theta values => #{@theta} | cost => #{costo}"
69
+ end
70
+ end
71
+
72
+ def predict(x)
73
+ hipo = x * @theta
74
+ var = 0
75
+ hipo.map {|x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
76
+ if sigmoid(var) >= 0.5
77
+ 1
78
+ else
79
+ 0
80
+ end
81
+ end
82
+
83
+ private
84
+
85
+ def sumatoria(array)
86
+ array.inject(0) { |elem1, elem2| elem1 + elem2 }
87
+ end
88
+
89
+ def sigmoid(x)
90
+ 1.0 / (1.0 + Math.exp(-x))
91
+ end
92
+
93
+ def cost
94
+ x = @x*@theta
95
+ hx = x.map {|e| sigmoid(e) }
96
+ log_hx = hx.map{|e| Math.log(e) }
97
+ log_uno_menos_hx = hx.map{|e| Math.log(1-e) }
98
+ costo = -1.0/@m * ( Matrix[@y.to_a] * log_hx + ( Matrix[@y.to_a].map{|i| 1 - i } ) * log_uno_menos_hx )
99
+ costo
100
+ end
101
+
102
+ def matrix(columns)
103
+ Matrix.rows(columns, false)
104
+ end
105
+ end
104
106
  end