RubyGems - aprendizaje_maquina - Versions diffs - 0.1.3 → 0.1.4.beta1 - Mend

aprendizaje_maquina 0.1.3 → 0.1.4.beta1

Files changed (17) hide show

checksums.yaml +4 -4
data/README.md +50 -28
data/aprendizaje_maquina.gemspec +0 -1
data/examples/clasificacion_logistica_example.rb +16 -0
data/examples/clustering_data.csv +177 -0
data/examples/clustering_example.rb +9 -0
data/examples/data_lg.csv +145 -0
data/examples/regresion_lineal_example.rb +11 -0
data/examples/train.csv +700 -0
data/lib/aprendizaje_maquina.rb +1 -0
data/lib/aprendizaje_maquina/cargar.rb +42 -43
data/lib/aprendizaje_maquina/clasificacion_logistica.rb +103 -105
data/lib/aprendizaje_maquina/clustering.rb +96 -0
data/lib/aprendizaje_maquina/matrixx.rb +3 -3
data/lib/aprendizaje_maquina/regresion_lineal.rb +4 -6
data/lib/aprendizaje_maquina/version.rb +1 -1
metadata +13 -6

data/lib/aprendizaje_maquina.rb CHANGED Viewed

@@ -3,6 +3,7 @@ require "aprendizaje_maquina/cargar"
 require "aprendizaje_maquina/regresion_lineal"
 require "aprendizaje_maquina/matrixx"
 require "aprendizaje_maquina/clasificacion_logistica"
+require "aprendizaje_maquina/clustering"
 module AprendizajeMaquina
 	#class RedNeuronal

data/lib/aprendizaje_maquina/cargar.rb CHANGED Viewed

@@ -1,49 +1,48 @@
 require 'csv'
 require 'matrix'
 module AprendizajeMaquina
+  class Cargar
+    def initialize(path_file)
+	    @path_file = path_file
+	    @csv_data = CSV.read(@path_file)
+	    @largo_colum = @csv_data[0].length
+    end
-	class Cargar
+    def to_matrix(columnas = nil)
+  	  if columnas == nil
+  	    array = @csv_data.map{ |e| e.map{ |o| o.include?(".") ? o.to_f : o.to_i } }
+  		  matrix = Matrix.rows(array,copy=true)
+  		  matrix
+  	  elsif columnas.is_a?(Range)
+  	    if columnas.last >= @largo_colum
+  		    raise ArgumentError, "Number of columns don't exist"
+  		  else
+  		    array = @csv_data.map{ |e| e[columnas].map{ |i| i.include?(".") ? i.to_f : i.to_i} }
+  		    matrix = Matrix.rows(array,copy=true)
+  		    matrix
+  		  end
+  	  elsif columnas.is_a?(Integer)
+  		  if columnas >= @largo_colum
+  		    raise ArgumentError, "Number of columns don't exist"
+  		  else
+  		    array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
+  		    matrix = Matrix[array].transpose
+  		    matrix
+  		  end
+  	  else
+  		  raise ArgumentError, "Must be nil, range or integer"
+  	  end
+	  end
-		def initialize(path_file)
-			@path_file = path_file
-			@csv_data = CSV.read(@path_file)
-			@largo_colum = @csv_data[0].length
-		end
-		def to_matrix(columnas = nil)
-			if columnas == nil
-				array = @csv_data.map{|e| e.map{|o| o.include?(".") ? o.to_f : o.to_i } }
-				matrix = Matrix.rows(array,copy=true)
-				matrix
-			elsif columnas.is_a?(Range)
-				if columnas.last >= @largo_colum
-					raise ArgumentError, "Number of columns don't exist"
-				else
-					array = @csv_data.map{|e| e[columnas].map{|i| i.include?(".") ? i.to_f : i.to_i} }
-					matrix = Matrix.rows(array,copy=true)
-					matrix
-				end
-			elsif columnas.is_a?(Integer)
-				if columnas >= @largo_colum
-					raise ArgumentError, "Number of columns don't exist"
-				else
-					array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
-					matrix = Matrix[array].transpose
-					matrix
-				end
-			else
-				raise ArgumentError, "Must be nil, range or integer"
-			end
-		end
-		def to_vector(columna)
-			if columna >= @largo_colum
-				raise ArgumentError, "Column don't exist"
-			else
-				array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
-				vector = Vector.elements(array,copy=true)
-				vector
-			end
-		end
-	end
+    def to_vector(columna)
+	    if columna >= @largo_colum
+		    raise ArgumentError, "Column don't exist"
+	    else
+		    array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
+		    vector = Vector.elements(array,copy = true)
+		    vector
+	    end
+	  end
+  end
 end

data/lib/aprendizaje_maquina/clasificacion_logistica.rb CHANGED Viewed

@@ -1,106 +1,104 @@
-module AprendizajeMaquina
-	class ClasificacionLogistica
-	    def initialize(x,y,theta)
-	      @x = x
-	      @y = y
-	      @theta = theta
-	      if y.is_a? Matrix
-	        @m = y.row_count
-	      elsif y.is_a? Vector
-	         @m = y.size
-	      else
-	        @m = y.length
-	      end
-	    end
-	    def train(iterations,alpha = nil,type_of_train)
-			case type_of_train
-				when 'Grad' then
-					# gradiente de descenso
-					@cost_history = []
-					for i in 0..iterations
-						x = @x*@theta
-						hx = x.map {|e| sigmoid(e) }
-						@theta = @theta - alpha / @m * @x.transpose * (hx - @y)
-						costo = 0
-						cost.to_a.map{|e| costo = e }
-						@cost_history << ["iteracion: #{i}",costo]
-					end
-					@cost_history
-					p "theta values => #{@theta} | cost => #{costo}"
-				when 'Newm' then
-					# metodo de newton
-					@cost_history = []
-					for i in 0..iterations
-						x = @x*@theta
-						hx = x.map {|e| sigmoid(e) }
-						uno_menos_hx = hx.map{|e| (1-e) }
-						escalar = []
-						for u in 0...hx.size
-							escalar << hx[u] * uno_menos_hx[u]
-						end
-						gradiente = (1.0/@m) * @x.transpose * (hx - @y)
-						hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
-						inversa = (1.0/hessian.det)*(hessian.adjugate)
-						@theta = @theta - inversa * gradiente
-						costo = 0
-						cost.to_a.map{|e| costo = e }
-						@cost_history << ["iteracion: #{i}",costo]
-					end
-					@cost_history
-					p "theta values => #{@theta} | cost => #{costo}"
-				when 'SGD' then
-					# Stochastic Gradient Descent
-					@cost_history = []
-					for i in 0..iterations
-						for i in 0..i
-							x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
-							hx = x.map {|e| sigmoid(e) }
-							@theta = @theta - alpha / @m * @x.transpose * (hx - @y)
-							costo = 0
-							cost.to_a.map{|e| costo = e }
-							@cost_history << ["iteracion: #{i}",costo]
-						end
-					end
-					@cost_history
-					p "theta values => #{@theta} | cost => #{costo}"
-			end
-	    end
-	    def predict(x)
-	        hipo = x * @theta
-	        var = 0
-	        hipo.map {|x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
-			if sigmoid(var) >= 0.5
-	        	1
-	        else
-	        	0
-	        end
-	    end
-	    private
-		def sumatoria(array)
-			array.inject(0) { |elem1, elem2| elem1 + elem2 }
-		end
-		def sigmoid(x)
-		    1.0 / (1.0 + Math.exp(-x))
-		end
-		def cost
-			x = @x*@theta
-			hx = x.map {|e| sigmoid(e) }
-			log_hx = hx.map{|e| Math.log(e) }
-			log_uno_menos_hx = hx.map{|e| Math.log(1-e) }
-			costo = -1.0/@m * ( Matrix[@y.to_a] * log_hx + ( Matrix[@y.to_a].map{|i| 1 - i } ) * log_uno_menos_hx )
-			costo
-		end
-		def matrix(columns)
-	  		Matrix.rows(columns, false)
-		end
-	end
+module AprendizajeMaquina
+  class ClasificacionLogistica
+    def initialize(x,y,theta)
+      @x = x
+      @y = y
+      @theta = theta
+      if y.is_a? Matrix
+        @m = y.row_count
+      elsif y.is_a? Vector
+        @m = y.size
+      else
+        @m = y.length
+      end
+    end
+    def train(iterations,alpha = nil,type_of_train)
+      case type_of_train
+      when 'Grad' then
+        # gradiente de descenso
+        @cost_history = []
+        for i in 0..iterations
+          x = @x * @theta
+          hx = x.map { |e| sigmoid(e) }
+          @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
+          costo = 0
+          cost.to_a.map{ |e| costo = e }
+          @cost_history << ["iteracion: #{i}",costo]
+        end
+        @cost_history
+        "theta values => #{@theta} | cost => #{costo}"
+      when 'Newm' then
+        # metodo de newton
+        @cost_history = []
+        for i in 0..iterations
+          x = @x * @theta
+          hx = x.map { |e| sigmoid(e) }
+          uno_menos_hx = hx.map{ |e| (1-e) }
+          escalar = []
+          for u in 0...hx.size
+            escalar << hx[u] * uno_menos_hx[u]
+          end
+          gradiente = (1.0/@m) * @x.transpose * (hx - @y)
+          hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
+          inversa = (1.0/hessian.det) * (hessian.adjugate)
+          @theta = @theta - inversa * gradiente
+          costo = 0
+          cost.to_a.map{ |e| costo = e }
+          @cost_history << ["iteracion: #{i}",costo]
+        end
+        @cost_history
+        "theta values => #{@theta} | cost => #{costo}"
+      when 'SGD' then
+        # Stochastic Gradient Descent
+        @cost_history = []
+        for i in 0..iterations
+          for i in 0..i
+            x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
+            hx = x.map {|e| sigmoid(e) }
+            @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
+            costo = 0
+            cost.to_a.map{|e| costo = e }
+            @cost_history << ["iteracion: #{i}",costo]
+          end
+        end
+        @cost_history
+        "theta values => #{@theta} | cost => #{costo}"
+      end
+    end
+    def predict(x)
+      hipo = x * @theta
+      var = 0
+      hipo.map { |x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
+      if sigmoid(var) >= 0.5
+        1
+      else
+        0
+      end
+    end
+    private
+    def sumatoria(array)
+      array.inject(0) { |elem1, elem2| elem1 + elem2 }
+    end
+    def sigmoid(x)
+      1.0 / (1.0 + Math.exp(-x))
+    end
+    def cost
+      x = @x*@theta
+      hx = x.map { |e| sigmoid(e) }
+      log_hx = hx.map{ |e| Math.log(e) }
+      log_uno_menos_hx = hx.map{ |e| Math.log(1 - e) }
+      costo = -1.0/@m * (Matrix[@y.to_a] * log_hx + (Matrix[@y.to_a].map{ |i| 1 - i }) * log_uno_menos_hx)
+      costo
+    end
+    def matrix(columns)
+      Matrix.rows(columns, false)
+    end
+  end
 end

data/lib/aprendizaje_maquina/clustering.rb ADDED Viewed

@@ -0,0 +1,96 @@
+module AprendizajeMaquina
+	class KmeansClustering
+		def initialize(num_of_cluster_centroids,dataset_matrix)
+			@num_of_cluster_centroids = num_of_cluster_centroids
+			@dataset_matrix = dataset_matrix
+			@num_columns = @dataset_matrix.column_count
+			@num_rows = @dataset_matrix.row_count
+			@cluster_centroids = init_cluster_centroids
+		end
+		def fit(iterations)
+			clustering(iterations)
+		end
+		def cluster(num)
+			get("@cluster_#{num}")
+		end
+		def predict(vector)
+			array = []
+			@cluster_centroids.each do |cluster|
+			  array << (vector-cluster).r
+			end
+			cluster = array.index(array.min)
+			cluster
+		end
+		private
+		def array_to_vector(array)
+			vector = Vector.elements(array, copy = true)
+			vector
+		end
+		def media(array)
+			if array.empty?
+		    	array#raise ArgumentError.new("array is empty")
+		    else
+		    	1.0/array.length * array.inject { |mem, var| mem + var }
+		  	end
+		end
+		def init_cluster_centroids
+			cluster_centroids = Array.new(@num_of_cluster_centroids) {
+				min_max_rand = []
+				for i in 0...@num_columns
+					min_max_rand << rand(@dataset_matrix.column(i).min..@dataset_matrix.column(i).max)
+				end
+				array_to_vector(min_max_rand)
+			}
+			cluster_centroids
+		end
+		def set(instance_variable_name,instance_variable_value)
+			instance_variable_set(instance_variable_name,instance_variable_value)
+		end
+		def get(instance_variable_name)
+			instance_variable_get(instance_variable_name)
+		end
+		def clustering(iterations)
+			iterations.times do
+				array2 = []
+				for i in 0...@num_rows
+					array = []
+					@cluster_centroids.each do |cluster|
+						array << (@dataset_matrix.row(i)-cluster).r
+					end
+					array2 << array
+				end
+				hash = {}
+				for i in 0...@num_rows
+					hash[@dataset_matrix.row(i)] = array2[i].index(array2[i].min)
+				end
+				@cluster_centroids.each_index do |index|
+					set("@cluster_#{index}", Array.new)
+				end
+				@cluster_centroids.each_index do |index|
+					hash.each do |key,value|
+						if value == index
+							get("@cluster_#{index}") << key
+						end
+					end
+				end
+				@cluster_centroids.each_index do |index|
+					@cluster_centroids[index] = media(get("@cluster_#{index}"))
+				end
+			end
+		end
+	end
+end

data/lib/aprendizaje_maquina/matrixx.rb CHANGED Viewed

@@ -1,7 +1,7 @@
 class Matrix
 	def add_ones
-		matrix = self.to_a.map{|i| i.insert(0,1)}
-		matrix = Matrix.rows(matrix,copy=true)
+		matrix = self.to_a.map{ |i| i.insert(0,1) }
+		matrix = Matrix.rows(matrix,copy = true)
 		matrix
 	end
@@ -10,7 +10,7 @@ class Matrix
 		self.column_count.times do |i|
 			array << self.column(i).normalize
 		end
-		matrix_normal = Matrix.rows(array,copy=true).transpose
+		matrix_normal = Matrix.rows(array,copy = true).transpose
 		matrix_normal
 	end
 end

data/lib/aprendizaje_maquina/regresion_lineal.rb CHANGED Viewed

@@ -1,8 +1,6 @@
 module AprendizajeMaquina
-	class RegresionLineal
-		attr_reader :m,:b, :ecuacion, :theta
+  class RegresionLineal
+	  attr_reader :m,:b, :ecuacion, :theta
 		def initialize(x,y)
 			@x = x
@@ -44,7 +42,7 @@ module AprendizajeMaquina
 					raise ArgumentError, "Must be a number or matrix 1xN"
 				end
 			else
-				return "There is not a equation to make predictions (first, run encontrar_ecuacion method)"
+				return "There is not a equation to make predictions (first, run find_ecuation method)"
 			end
 		end
@@ -67,7 +65,7 @@ module AprendizajeMaquina
 			iter = @n - 1
 			xy = []
 			for i in 0..iter
-				xy << array_1[i]*array_2[i]
+				xy << array_1[i] * array_2[i]
 			end
 			xy
 		end