RubyGems - aprendizaje_maquina - Versions diffs - 0.1.3a → 0.1.3 - Mend

aprendizaje_maquina 0.1.3a → 0.1.3

Files changed (17) hide show

checksums.yaml +4 -4
data/README.md +28 -50
data/aprendizaje_maquina.gemspec +1 -0
data/lib/aprendizaje_maquina.rb +0 -1
data/lib/aprendizaje_maquina/cargar.rb +43 -42
data/lib/aprendizaje_maquina/clasificacion_logistica.rb +105 -103
data/lib/aprendizaje_maquina/matrixx.rb +3 -3
data/lib/aprendizaje_maquina/regresion_lineal.rb +6 -4
data/lib/aprendizaje_maquina/version.rb +1 -1
metadata +6 -13
data/examples/clasificacion_logistica_example.rb +0 -16
data/examples/clustering_data.csv +0 -177
data/examples/clustering_example.rb +0 -9
data/examples/data_lg.csv +0 -145
data/examples/regresion_lineal_example.rb +0 -11
data/examples/train.csv +0 -700
data/lib/aprendizaje_maquina/clustering.rb +0 -96

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 14f57cb6236d901eaef534c5ec123f3d002eae54
-  data.tar.gz: 5ace7512b5596622c67a940225d0d59209e21d2e
+  metadata.gz: 83991bcb2253d1f7a974363bb8bfd060436d6943
+  data.tar.gz: ae8327ba4923164cff4839189294ac57732fd0a6
 SHA512:
-  metadata.gz: c76b07f5bc6f93056138f77cace472344090a6476ecc9f5ec977caa5327f6cd1b7bf99e1bf1841051baba9f6a9e18b5f5239c146bbd59f0bca760ef14e6baf7a
-  data.tar.gz: d84bd39aa0916ae0b5125819bc2a0d64ad922a4a59d5430d8ed6a46e03a1b04cb1b9173a4654d95964eb4fef77baf557326778341a0030466166373f3e27ab4d
+  metadata.gz: 9b764a65f5b62d7a43de1ac1f1ac9eec9ead35d5046171ff16d988c7747d2d348f12eba26ff342845ea95a8d2dd7252cd1444b5186a0abe55a66433721e9601b
+  data.tar.gz: ef4ba5e1cd33969fe0bcdb21e76de1d7e0dee9f02b13303b65b9180a9a6dbd473fa94d4bf79a10ea88e621f2eb99c88a93119f3fa9efb0187d7e6e0650f7b560

data/README.md CHANGED Viewed

@@ -18,7 +18,7 @@ Or install it yourself as:
 ## Usage
-## linear regression model
+for make predictions with the linear regression model
 first
@@ -27,17 +27,11 @@ first
 load data from a CSV file
 	load = AprendizajeMaquina::Cargar.new("file.csv")
-	# specify the column that you want to store on a vector
-	y = load.to_vector(3)
-	# if you don't specify the column or range of columns
-	# this put all the data of the csv file in a matrix
-	matrix = load.to_matrix
-	# create a matrix with the data in the column 0 of the csv file
-	x = load.to_matrix(0)    # you can specify range like this load.to_matrix(0..4)
+	y = load.to_vector(3)    # specify the column that you want to store on a vector
+    matrix = load.to_matrix  # this put all the data of the csv file in a matrix
+                             # if you don't specify the column or range of columns
+	x = load.to_matrix(0)    # create a matrix with the data in the column 0 of the csv file
+	                         # you can specify range like this load.to_matrix(0..4)
 	x_with_ones = x.add_ones # this add a column of ones to the matrix
 to normalize data
@@ -47,11 +41,10 @@ to normalize data
 create an instance of the class RegresionLineal
 	regresion_lineal = AprendizajeMaquina::RegresionLineal.new(x_matrix,y_vector)
-	regresion_lineal.find_ecuation           # (or use the alias :train) return a Vector
+	regresion_lineal.find_ecuation         # (or use the alias :train) find the theta values => Vector[114.50684133915638, 0.8310043668122375]
 	m = Matrix[[1,95]]
-	puts regresion_lineal.make_prediction(m) # (or use the alias :predict) to make predictions
-					                                 # => Vector[193.45225618631895]
+	p regresion_lineal.make_prediction(m) # (or use the alias :predict)  to make predictions
+					      # => Vector[193.45225618631895]
 linear regresion with arrays
@@ -63,7 +56,7 @@ linear regresion with arrays
 	p regresion_simple.ecuacion
 	p regresion_simple.predict(95)
-## Logistic Classification
+Logistic Classification
 	data = AprendizajeMaquina::Cargar.new("data.csv")
@@ -75,65 +68,50 @@ linear regresion with arrays
 training
-the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
+	the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
 	example 1:
-	cl.train(12,0.01,'SGD')
+		cl.train(12,0.01,'SGD')
 	example 2:
-	cl.train(10,'NewM') # Newton's method dont use alpha
+		cl.train(10,'NewM') # Newton's method dont use alpha
 	example 3:
-	cl.train(400,0.001,'Grad')
+		cl.train(400,0.001,'Grad')
 predictions
 	if cl.predict(Matrix[[1,24,0]]) == 1
-	  p "CANSADO"
+		p "CANSADO"
 	else
-	  p "DESCANSADO"
+		p "DESCANSADO"
 	end
 make predictions for multiclass(one vs all)
-	initial_theta_for_each_class = [
-	  Vector[-38.98494868465186, 3.133704064187691,-1.0058753929521247],
-		Vector[40.93814883472139,-3.2195737672278586, -0.8080682715294277],
-	  Vector[-7.220460,0.256681,1.141166]
-	]
+	clases = [Vector[-38.98494868465186, 3.133704064187691,-1.0058753929521247],
+			  Vector[40.93814883472139,-3.2195737672278586, -0.8080682715294277],
+			  Vector[-7.220460,0.256681,1.141166]]
 	predicted_val = []
-	initial_theta_for_each_class.each do |e|
-	  multiclass = AprendizajeMaquina::ClasificacionLogistica.new(x,y,e)
-	  predicted_val << multiclass.predict(Matrix[[1,13.5,1.83]])
+	clases.each do |e|
+		multiclass = AprendizajeMaquina::ClasificacionLogistica.new(x,y,e)
+		predicted_val << multiclass.predict(Matrix[[1,13.5,1.83]])
 	end
 	if predicted_val[0] == 1
-	  puts "Vino Tinto"
+		puts "Vino Tinto"
 	elsif predicted_val[1] == 1
-	  puts "Vino Rosado"
+		puts "Vino Rosado"
 	elsif predicted_val[2] == 1
-	  puts "Vino Blanco"
+		puts "Vino Blanco"
 	else
-	  puts predicted_val
+		puts predicted_val
 	end
-## Clustering
-	load_data = AprendizajeMaquina::Cargar.new('clustering_data.csv')
-	dataset = load_data.to_matrix
-	# initialize with 2 cluster centroids
-	clustering = AprendizajeMaquina::KmeansClustering.new(2,dataset)
-	# fit the model with 20 iterations
-	clustering.fit(20)
-	# watch the values in their respective cluster
-	p clustering.cluster(0)
-	p clustering.cluster(1)
+## Contributing
-	# Predict the closest cluster
-	p clustering.predict(Vector[63,190])
+Bug reports and pull requests are welcome on GitHub at https://github.com/TheNoskOneVzla/aprendizaje_maquina.
 ## License

data/aprendizaje_maquina.gemspec CHANGED Viewed

@@ -12,6 +12,7 @@ Gem::Specification.new do |spec|
   spec.summary       = "Machine learning gem / Una gema para el aprendizaje de maquinas."
   spec.description   = "This is a gem to help ruby developers to write machine learning algorithms easier and faster / Esta es una gema para ayudar a los desarrolladores de ruby a escribir algoritmos de aprendizaje automático más fácil y rápido."
+  spec.homepage      = ""
   spec.license       = "MIT"
   # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'

data/lib/aprendizaje_maquina.rb CHANGED Viewed

@@ -3,7 +3,6 @@ require "aprendizaje_maquina/cargar"
 require "aprendizaje_maquina/regresion_lineal"
 require "aprendizaje_maquina/matrixx"
 require "aprendizaje_maquina/clasificacion_logistica"
-require "aprendizaje_maquina/clustering"
 module AprendizajeMaquina
 	#class RedNeuronal

data/lib/aprendizaje_maquina/cargar.rb CHANGED Viewed

@@ -1,48 +1,49 @@
 require 'csv'
 require 'matrix'
 module AprendizajeMaquina
-  class Cargar
-    def initialize(path_file)
-	    @path_file = path_file
-	    @csv_data = CSV.read(@path_file)
-	    @largo_colum = @csv_data[0].length
-    end
-    def to_matrix(columnas = nil)
-  	  if columnas == nil
-  	    array = @csv_data.map{ |e| e.map{ |o| o.include?(".") ? o.to_f : o.to_i } }
-  		  matrix = Matrix.rows(array,copy=true)
-  		  matrix
-  	  elsif columnas.is_a?(Range)
-  	    if columnas.last >= @largo_colum
-  		    raise ArgumentError, "Number of columns don't exist"
-  		  else
-  		    array = @csv_data.map{ |e| e[columnas].map{ |i| i.include?(".") ? i.to_f : i.to_i} }
-  		    matrix = Matrix.rows(array,copy=true)
-  		    matrix
-  		  end
-  	  elsif columnas.is_a?(Integer)
-  		  if columnas >= @largo_colum
-  		    raise ArgumentError, "Number of columns don't exist"
-  		  else
-  		    array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
-  		    matrix = Matrix[array].transpose
-  		    matrix
-  		  end
-  	  else
-  		  raise ArgumentError, "Must be nil, range or integer"
-  	  end
-	  end
+	class Cargar
-    def to_vector(columna)
-	    if columna >= @largo_colum
-		    raise ArgumentError, "Column don't exist"
-	    else
-		    array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
-		    vector = Vector.elements(array,copy = true)
-		    vector
-	    end
-	  end
-  end
+		def initialize(path_file)
+			@path_file = path_file
+			@csv_data = CSV.read(@path_file)
+			@largo_colum = @csv_data[0].length
+		end
+		def to_matrix(columnas = nil)
+			if columnas == nil
+				array = @csv_data.map{|e| e.map{|o| o.include?(".") ? o.to_f : o.to_i } }
+				matrix = Matrix.rows(array,copy=true)
+				matrix
+			elsif columnas.is_a?(Range)
+				if columnas.last >= @largo_colum
+					raise ArgumentError, "Number of columns don't exist"
+				else
+					array = @csv_data.map{|e| e[columnas].map{|i| i.include?(".") ? i.to_f : i.to_i} }
+					matrix = Matrix.rows(array,copy=true)
+					matrix
+				end
+			elsif columnas.is_a?(Integer)
+				if columnas >= @largo_colum
+					raise ArgumentError, "Number of columns don't exist"
+				else
+					array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
+					matrix = Matrix[array].transpose
+					matrix
+				end
+			else
+				raise ArgumentError, "Must be nil, range or integer"
+			end
+		end
+		def to_vector(columna)
+			if columna >= @largo_colum
+				raise ArgumentError, "Column don't exist"
+			else
+				array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
+				vector = Vector.elements(array,copy=true)
+				vector
+			end
+		end
+	end
 end

data/lib/aprendizaje_maquina/clasificacion_logistica.rb CHANGED Viewed

@@ -1,104 +1,106 @@
-module AprendizajeMaquina
-  class ClasificacionLogistica
-    def initialize(x,y,theta)
-      @x = x
-      @y = y
-      @theta = theta
-      if y.is_a? Matrix
-        @m = y.row_count
-      elsif y.is_a? Vector
-        @m = y.size
-      else
-        @m = y.length
-      end
-    end
-    def train(iterations,alpha = nil,type_of_train)
-      case type_of_train
-      when 'Grad' then
-        # gradiente de descenso
-        @cost_history = []
-        for i in 0..iterations
-          x = @x * @theta
-          hx = x.map { |e| sigmoid(e) }
-          @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
-          costo = 0
-          cost.to_a.map{ |e| costo = e }
-          @cost_history << ["iteracion: #{i}",costo]
-        end
-        @cost_history
-        "theta values => #{@theta} | cost => #{costo}"
-      when 'Newm' then
-        # metodo de newton
-        @cost_history = []
-        for i in 0..iterations
-          x = @x * @theta
-          hx = x.map { |e| sigmoid(e) }
-          uno_menos_hx = hx.map{ |e| (1-e) }
-          escalar = []
-          for u in 0...hx.size
-            escalar << hx[u] * uno_menos_hx[u]
-          end
-          gradiente = (1.0/@m) * @x.transpose * (hx - @y)
-          hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
-          inversa = (1.0/hessian.det) * (hessian.adjugate)
-          @theta = @theta - inversa * gradiente
-          costo = 0
-          cost.to_a.map{ |e| costo = e }
-          @cost_history << ["iteracion: #{i}",costo]
-        end
-        @cost_history
-        "theta values => #{@theta} | cost => #{costo}"
-      when 'SGD' then
-        # Stochastic Gradient Descent
-        @cost_history = []
-        for i in 0..iterations
-          for i in 0..i
-            x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
-            hx = x.map {|e| sigmoid(e) }
-            @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
-            costo = 0
-            cost.to_a.map{|e| costo = e }
-            @cost_history << ["iteracion: #{i}",costo]
-          end
-        end
-        @cost_history
-        "theta values => #{@theta} | cost => #{costo}"
-      end
-    end
-    def predict(x)
-      hipo = x * @theta
-      var = 0
-      hipo.map { |x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
-      if sigmoid(var) >= 0.5
-        1
-      else
-        0
-      end
-    end
-    private
-    def sumatoria(array)
-      array.inject(0) { |elem1, elem2| elem1 + elem2 }
-    end
-    def sigmoid(x)
-      1.0 / (1.0 + Math.exp(-x))
-    end
-    def cost
-      x = @x*@theta
-      hx = x.map { |e| sigmoid(e) }
-      log_hx = hx.map{ |e| Math.log(e) }
-      log_uno_menos_hx = hx.map{ |e| Math.log(1 - e) }
-      costo = -1.0/@m * (Matrix[@y.to_a] * log_hx + (Matrix[@y.to_a].map{ |i| 1 - i }) * log_uno_menos_hx)
-      costo
-    end
-    def matrix(columns)
-      Matrix.rows(columns, false)
-    end
-  end
+module AprendizajeMaquina
+	class ClasificacionLogistica
+	    def initialize(x,y,theta)
+	      @x = x
+	      @y = y
+	      @theta = theta
+	      if y.is_a? Matrix
+	        @m = y.row_count
+	      elsif y.is_a? Vector
+	         @m = y.size
+	      else
+	        @m = y.length
+	      end
+	    end
+	    def train(iterations,alpha = nil,type_of_train)
+			case type_of_train
+				when 'Grad' then
+					# gradiente de descenso
+					@cost_history = []
+					for i in 0..iterations
+						x = @x*@theta
+						hx = x.map {|e| sigmoid(e) }
+						@theta = @theta - alpha / @m * @x.transpose * (hx - @y)
+						costo = 0
+						cost.to_a.map{|e| costo = e }
+						@cost_history << ["iteracion: #{i}",costo]
+					end
+					@cost_history
+					p "theta values => #{@theta} | cost => #{costo}"
+				when 'Newm' then
+					# metodo de newton
+					@cost_history = []
+					for i in 0..iterations
+						x = @x*@theta
+						hx = x.map {|e| sigmoid(e) }
+						uno_menos_hx = hx.map{|e| (1-e) }
+						escalar = []
+						for u in 0...hx.size
+							escalar << hx[u] * uno_menos_hx[u]
+						end
+						gradiente = (1.0/@m) * @x.transpose * (hx - @y)
+						hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
+						inversa = (1.0/hessian.det)*(hessian.adjugate)
+						@theta = @theta - inversa * gradiente
+						costo = 0
+						cost.to_a.map{|e| costo = e }
+						@cost_history << ["iteracion: #{i}",costo]
+					end
+					@cost_history
+					p "theta values => #{@theta} | cost => #{costo}"
+				when 'SGD' then
+					# Stochastic Gradient Descent
+					@cost_history = []
+					for i in 0..iterations
+						for i in 0..i
+							x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
+							hx = x.map {|e| sigmoid(e) }
+							@theta = @theta - alpha / @m * @x.transpose * (hx - @y)
+							costo = 0
+							cost.to_a.map{|e| costo = e }
+							@cost_history << ["iteracion: #{i}",costo]
+						end
+					end
+					@cost_history
+					p "theta values => #{@theta} | cost => #{costo}"
+			end
+	    end
+	    def predict(x)
+	        hipo = x * @theta
+	        var = 0
+	        hipo.map {|x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
+			if sigmoid(var) >= 0.5
+	        	1
+	        else
+	        	0
+	        end
+	    end
+	    private
+		def sumatoria(array)
+			array.inject(0) { |elem1, elem2| elem1 + elem2 }
+		end
+		def sigmoid(x)
+		    1.0 / (1.0 + Math.exp(-x))
+		end
+		def cost
+			x = @x*@theta
+			hx = x.map {|e| sigmoid(e) }
+			log_hx = hx.map{|e| Math.log(e) }
+			log_uno_menos_hx = hx.map{|e| Math.log(1-e) }
+			costo = -1.0/@m * ( Matrix[@y.to_a] * log_hx + ( Matrix[@y.to_a].map{|i| 1 - i } ) * log_uno_menos_hx )
+			costo
+		end
+		def matrix(columns)
+	  		Matrix.rows(columns, false)
+		end
+	end
 end