RubyGems - aprendizaje_maquina - Versions diffs - 0.1.3 → 0.1.4.beta1 - Mend

aprendizaje_maquina 0.1.3 → 0.1.4.beta1

Files changed (17) hide show

checksums.yaml +4 -4
data/README.md +50 -28
data/aprendizaje_maquina.gemspec +0 -1
data/examples/clasificacion_logistica_example.rb +16 -0
data/examples/clustering_data.csv +177 -0
data/examples/clustering_example.rb +9 -0
data/examples/data_lg.csv +145 -0
data/examples/regresion_lineal_example.rb +11 -0
data/examples/train.csv +700 -0
data/lib/aprendizaje_maquina.rb +1 -0
data/lib/aprendizaje_maquina/cargar.rb +42 -43
data/lib/aprendizaje_maquina/clasificacion_logistica.rb +103 -105
data/lib/aprendizaje_maquina/clustering.rb +96 -0
data/lib/aprendizaje_maquina/matrixx.rb +3 -3
data/lib/aprendizaje_maquina/regresion_lineal.rb +4 -6
data/lib/aprendizaje_maquina/version.rb +1 -1
metadata +13 -6

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 83991bcb2253d1f7a974363bb8bfd060436d6943
-  data.tar.gz: ae8327ba4923164cff4839189294ac57732fd0a6
+  metadata.gz: 03e7fac70b0b1de20e1496d918b96fbf5c716be6
+  data.tar.gz: 38669d3a47c811f6af8794223c6c5395c3b1e35d
 SHA512:
-  metadata.gz: 9b764a65f5b62d7a43de1ac1f1ac9eec9ead35d5046171ff16d988c7747d2d348f12eba26ff342845ea95a8d2dd7252cd1444b5186a0abe55a66433721e9601b
-  data.tar.gz: ef4ba5e1cd33969fe0bcdb21e76de1d7e0dee9f02b13303b65b9180a9a6dbd473fa94d4bf79a10ea88e621f2eb99c88a93119f3fa9efb0187d7e6e0650f7b560
+  metadata.gz: 74c9a63aa42c3844846ddd5db0dc6300ec6022c071448efd7ddc092e5f857afbbffd47cb859fa6d3a3d495cc68ecfa97b7560cea820c87c8200a410b95ca09a9
+  data.tar.gz: 948708cb197af7a4fdbeba2c423ced683df600ee15ebf744dfbd4e8723877432a7cd1d170a817cdb3887fed53378fbd29f4339a71fede89c6e4c8b4a94c2e241

data/README.md CHANGED Viewed

@@ -18,7 +18,7 @@ Or install it yourself as:
 ## Usage
-for make predictions with the linear regression model
+## linear regression model
 first
@@ -27,11 +27,17 @@ first
 load data from a CSV file
 	load = AprendizajeMaquina::Cargar.new("file.csv")
-	y = load.to_vector(3)    # specify the column that you want to store on a vector
-    matrix = load.to_matrix  # this put all the data of the csv file in a matrix
-                             # if you don't specify the column or range of columns
-	x = load.to_matrix(0)    # create a matrix with the data in the column 0 of the csv file
-	                         # you can specify range like this load.to_matrix(0..4)
+	# specify the column that you want to store on a vector
+	y = load.to_vector(3)
+	# if you don't specify the column or range of columns
+	# this put all the data of the csv file in a matrix
+	matrix = load.to_matrix
+	# create a matrix with the data in the column 0 of the csv file
+	x = load.to_matrix(0)    # you can specify range like this load.to_matrix(0..4)
 	x_with_ones = x.add_ones # this add a column of ones to the matrix
 to normalize data
@@ -41,10 +47,11 @@ to normalize data
 create an instance of the class RegresionLineal
 	regresion_lineal = AprendizajeMaquina::RegresionLineal.new(x_matrix,y_vector)
-	regresion_lineal.find_ecuation         # (or use the alias :train) find the theta values => Vector[114.50684133915638, 0.8310043668122375]
+	regresion_lineal.find_ecuation           # (or use the alias :train) return a Vector
 	m = Matrix[[1,95]]
-	p regresion_lineal.make_prediction(m) # (or use the alias :predict)  to make predictions
-					      # => Vector[193.45225618631895]
+	puts regresion_lineal.make_prediction(m) # (or use the alias :predict) to make predictions
+					                                 # => Vector[193.45225618631895]
 linear regresion with arrays
@@ -56,7 +63,7 @@ linear regresion with arrays
 	p regresion_simple.ecuacion
 	p regresion_simple.predict(95)
-Logistic Classification
+## Logistic Classification
 	data = AprendizajeMaquina::Cargar.new("data.csv")
@@ -68,50 +75,65 @@ Logistic Classification
 training
-	the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
+the method ClasificacionLogistica#train receives 3 inputs, the first is the numbers of iterations, the second is the alpha value(step size), last one is type of training method ('SGD' for Stochastic Gradient Descents, 'Grad' for Batch Gradiendt Descent and 'Newm' for Newton's method)
 	example 1:
-		cl.train(12,0.01,'SGD')
+	cl.train(12,0.01,'SGD')
 	example 2:
-		cl.train(10,'NewM') # Newton's method dont use alpha
+	cl.train(10,'NewM') # Newton's method dont use alpha
 	example 3:
-		cl.train(400,0.001,'Grad')
+	cl.train(400,0.001,'Grad')
 predictions
 	if cl.predict(Matrix[[1,24,0]]) == 1
-		p "CANSADO"
+	  p "CANSADO"
 	else
-		p "DESCANSADO"
+	  p "DESCANSADO"
 	end
 make predictions for multiclass(one vs all)
-	clases = [Vector[-38.98494868465186, 3.133704064187691,-1.0058753929521247],
-			  Vector[40.93814883472139,-3.2195737672278586, -0.8080682715294277],
-			  Vector[-7.220460,0.256681,1.141166]]
+	initial_theta_for_each_class = [
+	  Vector[-38.98494868465186, 3.133704064187691,-1.0058753929521247],
+		Vector[40.93814883472139,-3.2195737672278586, -0.8080682715294277],
+	  Vector[-7.220460,0.256681,1.141166]
+	]
 	predicted_val = []
-	clases.each do |e|
-		multiclass = AprendizajeMaquina::ClasificacionLogistica.new(x,y,e)
-		predicted_val << multiclass.predict(Matrix[[1,13.5,1.83]])
+	initial_theta_for_each_class.each do |e|
+	  multiclass = AprendizajeMaquina::ClasificacionLogistica.new(x,y,e)
+	  predicted_val << multiclass.predict(Matrix[[1,13.5,1.83]])
 	end
 	if predicted_val[0] == 1
-		puts "Vino Tinto"
+	  puts "Vino Tinto"
 	elsif predicted_val[1] == 1
-		puts "Vino Rosado"
+	  puts "Vino Rosado"
 	elsif predicted_val[2] == 1
-		puts "Vino Blanco"
+	  puts "Vino Blanco"
 	else
-		puts predicted_val
+	  puts predicted_val
 	end
+## Clustering
+	load_data = AprendizajeMaquina::Cargar.new('clustering_data.csv')
+	dataset = load_data.to_matrix
+	# initialize with 2 cluster centroids
+	clustering = AprendizajeMaquina::KmeansClustering.new(2,dataset)
+	# fit the model with 20 iterations
+	clustering.fit(20)
-## Contributing
+	# watch the values in their respective cluster
+	p clustering.cluster(0)
+	p clustering.cluster(1)
-Bug reports and pull requests are welcome on GitHub at https://github.com/TheNoskOneVzla/aprendizaje_maquina.
+	# Predict the closest cluster
+	p clustering.predict(Vector[63,190])
 ## License

data/aprendizaje_maquina.gemspec CHANGED Viewed

@@ -12,7 +12,6 @@ Gem::Specification.new do |spec|
   spec.summary       = "Machine learning gem / Una gema para el aprendizaje de maquinas."
   spec.description   = "This is a gem to help ruby developers to write machine learning algorithms easier and faster / Esta es una gema para ayudar a los desarrolladores de ruby a escribir algoritmos de aprendizaje automático más fácil y rápido."
-  spec.homepage      = ""
   spec.license       = "MIT"
   # Prevent pushing this gem to RubyGems.org. To allow pushes either set the 'allowed_push_host'

data/examples/clasificacion_logistica_example.rb ADDED Viewed

@@ -0,0 +1,16 @@
+require 'aprendizaje_maquina'
+data = AprendizajeMaquina::Cargar.new("data_lg.csv")
+x = data.to_matrix(0..1).add_ones
+y = data.to_vector(2)
+theta = Vector[0,0,0]
+rl = AprendizajeMaquina::ClasificacionLogistica.new(x,y,theta)
+rl.train(12,0.01,'SGD')
+prediction = rl.predict(Matrix[[1,9,22]])
+if prediction == 1
+	puts "cansado"
+else
+	puts "descansado"
+end

data/examples/clustering_data.csv ADDED Viewed

@@ -0,0 +1,177 @@
+63, 190
+65, 145
+69, 160
+66, 130
+63, 120
+65, 125
+68, 124
+64, 135
+64, 98
+66, 150
+67, 160
+67, 140
+71, 170
+60, 109
+68, 170
+63, 165
+67, 147
+63, 110
+68, 160
+60, 125
+66, 142
+65, 154
+62, 119
+65, 135
+69, 195
+63, 110
+63, 140
+64, 219
+66, 135
+66, 140
+68, 135
+68, 99
+64, 100
+62, 135
+74, 200
+63, 150
+69, 163
+71, 116
+68, 145
+71, 140
+68, 125
+66, 130
+72, 157
+68, 200
+61, 110
+64, 165
+70, 115
+67, 153
+65, 135
+62, 116
+63, 122
+66, 130
+73, 180
+66, 110
+69, 190
+68, 160
+62, 115
+66, 130
+64, 110
+70, 150
+72, 185
+70, 175
+66, 200
+68, 180
+62, 135
+63, 120
+63, 134
+72, 185
+72, 160
+68, 160
+65, 143
+68, 180
+65, 130
+65, 200
+65, 195
+66, 160
+70, 195
+63, 120
+69, 120
+64, 110
+67, 140
+66, 104
+63, 125
+71, 140
+68, 190
+65, 125
+66, 125
+64, 125
+72, 245
+74, 235
+62, 130
+68, 135
+62, 120
+63, 155
+65, 130
+64, 130
+74, 200
+70, 145
+68, 140
+69, 135
+61, 124
+66, 125
+76, 216
+66, 120
+62, 103
+65, 162
+69, 165
+73, 164
+70, 153
+75, 172
+58, 103
+63, 135
+68, 160
+69, 141
+67, 145
+68, 148
+67, 180
+70, 150
+70, 160
+60, 170
+70, 210
+66, 175
+64, 130
+72, 172
+64, 108
+72, 190
+66, 142
+69, 235
+72, 145
+66, 143
+70, 141
+64, 110
+70, 196
+65, 118
+72, 200
+61, 145
+63, 110
+65, 115
+70, 170
+66, 160
+62, 123
+65, 135
+68, 140
+65, 160
+60, 150
+72, 168
+61, 100
+62, 100
+66, 135
+67, 155
+62, 135
+64, 200
+66, 140
+62, 185
+65, 125
+72, 165
+64, 95
+72, 155
+65, 110
+67, 165
+65, 132
+66, 125
+64, 120
+63, 130
+66, 130
+63, 115
+59, 160
+64, 150
+64, 130
+61, 120
+66, 150
+66, 250
+71, 150
+70, 180
+69, 210
+61, 105
+64, 140

data/examples/clustering_example.rb ADDED Viewed

@@ -0,0 +1,9 @@
+require 'aprendizaje_maquina'
+load_data = AprendizajeMaquina::Cargar.new('clustering_data.csv')
+dataset = load_data.to_matrix
+clustering = AprendizajeMaquina::KmeansClustering.new(2,dataset)
+clustering.fit(20)
+p clustering.cluster(0)
+p clustering.cluster(1)
+p clustering.predict(Vector[63,190])

data/examples/data_lg.csv ADDED Viewed

@@ -0,0 +1,145 @@
+0,24,1
+0.1,23.5,1
+0.2,23.4,1
+0.3,23.3,1
+0.4,23.2,1
+0.5,23.1,1
+1,23,1
+1.1,23.5,1
+1.2,23.4,1
+1.3,23.3,1
+1.4,23.2,1
+1.5,23.1,1
+2,22,1
+2.1,22.5,1
+2.2,22.4,1
+2.3,22.3,1
+2.4,22.2,1
+2.5,22.1,1
+3,21,1
+3.1,21.5,1
+3.2,21.4,1
+3.3,21.3,1
+3.4,21.2,1
+3.5,21.1,1
+4,20,1
+4.1,20.5,1
+4.2,20.4,1
+4.3,20.3,1
+4.4,20.2,1
+4.5,20.1,1
+5,19,1
+5.1,19.5,1
+5.2,19.4,1
+5.3,19.3,1
+5.4,19.2,1
+5.5,19.1,1
+6,18,1
+6.1,18.5,1
+6.2,18.4,1
+6.3,18.3,1
+6.4,18.2,1
+6.5,18.1,1
+7,17,1
+7.1,17.5,1
+7.2,17.4,1
+7.3,17.3,1
+7.4,17.2,1
+7.5,17.1,1
+8,16,0
+8.1,16.5,0
+8.2,16.4,0
+8.3,16.3,0
+8.4,16.2,0
+8.5,16.1,0
+9,15,0
+9.1,15.5,0
+9.2,15.4,0
+9.3,15.3,0
+9.4,15.2,0
+9.5,15.1,0
+10,14,0
+10.1,14.5,0
+10.2,14.4,0
+10.3,14.3,0
+10.4,14.2,0
+10.5,14.1,0
+11,13,0
+11.1,13.5,0
+11.2,13.4,0
+11.3,13.3,0
+11.4,13.2,0
+11.5,13.1,0
+12,12,0
+12.1,12.5,0
+12.2,12.4,0
+12.3,12.3,0
+12.4,12.2,0
+12.5,12.1,0
+13,11,0
+13.1,11.5,0
+13.2,11.4,0
+13.3,11.3,0
+13.4,11.2,0
+13.5,11.1,0
+14,10,0
+14.1,10.5,0
+14.2,10.4,0
+14.3,10.3,0
+14.4,10.2,0
+14.5,10.1,0
+15,9,0
+15.1,9.5,0
+15.2,9.4,0
+15.3,9.3,0
+15.4,9.2,0
+15.5,9.1,0
+16,8,0
+16.1,8.5,0
+16.2,8.4,0
+16.3,8.3,0
+16.4,8.2,0
+16.5,8.1,0
+17,7,0
+17.1,7.5,0
+17.2,7.4,0
+17.3,7.3,0
+17.4,7.2,0
+17.5,7.1,0
+18,6,0
+18.1,6.5,0
+18.2,6.4,0
+18.3,6.3,0
+18.4,6.2,0
+18.5,6.1,0
+19,5,0
+19.1,5.5,0
+19.2,5.4,0
+19.3,5.3,0
+19.4,5.2,0
+19.5,5.1,0
+20,4,0
+20.1,4.5,0
+20.2,4.4,0
+20.3,4.3,0
+20.4,4.2,0
+20.5,4.1,0
+21,3,0
+21.1,3.5,0
+21.2,3.4,0
+21.3,3.3,0
+21.4,3.2,0
+21.5,3.1,0
+22,2,0
+22.1,2.5,0
+22.2,2.4,0
+22.3,2.3,0
+22.4,2.2,0
+22.5,2.1,0
+23,1,0
+23.1,1.5,0
+23.2,1.4,0
+23.3,1.3,0
+23.4,1.2,0
+23.5,1.1,0
+24,0,0