aprendizaje_maquina 0.1.3 → 0.1.4.beta1

Sign up to get free protection for your applications and to get access to all the features.
@@ -3,6 +3,7 @@ require "aprendizaje_maquina/cargar"
3
3
  require "aprendizaje_maquina/regresion_lineal"
4
4
  require "aprendizaje_maquina/matrixx"
5
5
  require "aprendizaje_maquina/clasificacion_logistica"
6
+ require "aprendizaje_maquina/clustering"
6
7
 
7
8
  module AprendizajeMaquina
8
9
  #class RedNeuronal
@@ -1,49 +1,48 @@
1
1
  require 'csv'
2
2
  require 'matrix'
3
+
3
4
  module AprendizajeMaquina
5
+ class Cargar
6
+ def initialize(path_file)
7
+ @path_file = path_file
8
+ @csv_data = CSV.read(@path_file)
9
+ @largo_colum = @csv_data[0].length
10
+ end
4
11
 
5
- class Cargar
12
+ def to_matrix(columnas = nil)
13
+ if columnas == nil
14
+ array = @csv_data.map{ |e| e.map{ |o| o.include?(".") ? o.to_f : o.to_i } }
15
+ matrix = Matrix.rows(array,copy=true)
16
+ matrix
17
+ elsif columnas.is_a?(Range)
18
+ if columnas.last >= @largo_colum
19
+ raise ArgumentError, "Number of columns don't exist"
20
+ else
21
+ array = @csv_data.map{ |e| e[columnas].map{ |i| i.include?(".") ? i.to_f : i.to_i} }
22
+ matrix = Matrix.rows(array,copy=true)
23
+ matrix
24
+ end
25
+ elsif columnas.is_a?(Integer)
26
+ if columnas >= @largo_colum
27
+ raise ArgumentError, "Number of columns don't exist"
28
+ else
29
+ array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
30
+ matrix = Matrix[array].transpose
31
+ matrix
32
+ end
33
+ else
34
+ raise ArgumentError, "Must be nil, range or integer"
35
+ end
36
+ end
6
37
 
7
- def initialize(path_file)
8
- @path_file = path_file
9
- @csv_data = CSV.read(@path_file)
10
- @largo_colum = @csv_data[0].length
11
- end
12
-
13
- def to_matrix(columnas = nil)
14
- if columnas == nil
15
- array = @csv_data.map{|e| e.map{|o| o.include?(".") ? o.to_f : o.to_i } }
16
- matrix = Matrix.rows(array,copy=true)
17
- matrix
18
- elsif columnas.is_a?(Range)
19
- if columnas.last >= @largo_colum
20
- raise ArgumentError, "Number of columns don't exist"
21
- else
22
- array = @csv_data.map{|e| e[columnas].map{|i| i.include?(".") ? i.to_f : i.to_i} }
23
- matrix = Matrix.rows(array,copy=true)
24
- matrix
25
- end
26
- elsif columnas.is_a?(Integer)
27
- if columnas >= @largo_colum
28
- raise ArgumentError, "Number of columns don't exist"
29
- else
30
- array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
31
- matrix = Matrix[array].transpose
32
- matrix
33
- end
34
- else
35
- raise ArgumentError, "Must be nil, range or integer"
36
- end
37
- end
38
-
39
- def to_vector(columna)
40
- if columna >= @largo_colum
41
- raise ArgumentError, "Column don't exist"
42
- else
43
- array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
44
- vector = Vector.elements(array,copy=true)
45
- vector
46
- end
47
- end
48
- end
38
+ def to_vector(columna)
39
+ if columna >= @largo_colum
40
+ raise ArgumentError, "Column don't exist"
41
+ else
42
+ array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
43
+ vector = Vector.elements(array,copy = true)
44
+ vector
45
+ end
46
+ end
47
+ end
49
48
  end
@@ -1,106 +1,104 @@
1
- module AprendizajeMaquina
2
-
3
- class ClasificacionLogistica
4
-
5
- def initialize(x,y,theta)
6
- @x = x
7
- @y = y
8
- @theta = theta
9
- if y.is_a? Matrix
10
- @m = y.row_count
11
- elsif y.is_a? Vector
12
- @m = y.size
13
- else
14
- @m = y.length
15
- end
16
- end
17
-
18
- def train(iterations,alpha = nil,type_of_train)
19
- case type_of_train
20
- when 'Grad' then
21
- # gradiente de descenso
22
- @cost_history = []
23
- for i in 0..iterations
24
- x = @x*@theta
25
- hx = x.map {|e| sigmoid(e) }
26
- @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
27
- costo = 0
28
- cost.to_a.map{|e| costo = e }
29
- @cost_history << ["iteracion: #{i}",costo]
30
- end
31
- @cost_history
32
- p "theta values => #{@theta} | cost => #{costo}"
33
- when 'Newm' then
34
- # metodo de newton
35
- @cost_history = []
36
- for i in 0..iterations
37
- x = @x*@theta
38
- hx = x.map {|e| sigmoid(e) }
39
- uno_menos_hx = hx.map{|e| (1-e) }
40
- escalar = []
41
- for u in 0...hx.size
42
- escalar << hx[u] * uno_menos_hx[u]
43
- end
44
- gradiente = (1.0/@m) * @x.transpose * (hx - @y)
45
- hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
46
- inversa = (1.0/hessian.det)*(hessian.adjugate)
47
- @theta = @theta - inversa * gradiente
48
- costo = 0
49
- cost.to_a.map{|e| costo = e }
50
- @cost_history << ["iteracion: #{i}",costo]
51
- end
52
- @cost_history
53
- p "theta values => #{@theta} | cost => #{costo}"
54
- when 'SGD' then
55
- # Stochastic Gradient Descent
56
- @cost_history = []
57
- for i in 0..iterations
58
- for i in 0..i
59
- x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
60
- hx = x.map {|e| sigmoid(e) }
61
- @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
62
- costo = 0
63
- cost.to_a.map{|e| costo = e }
64
- @cost_history << ["iteracion: #{i}",costo]
65
- end
66
- end
67
- @cost_history
68
- p "theta values => #{@theta} | cost => #{costo}"
69
- end
70
- end
71
-
72
- def predict(x)
73
- hipo = x * @theta
74
- var = 0
75
- hipo.map {|x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
76
- if sigmoid(var) >= 0.5
77
- 1
78
- else
79
- 0
80
- end
81
- end
82
-
83
- private
84
-
85
- def sumatoria(array)
86
- array.inject(0) { |elem1, elem2| elem1 + elem2 }
87
- end
88
-
89
- def sigmoid(x)
90
- 1.0 / (1.0 + Math.exp(-x))
91
- end
92
-
93
- def cost
94
- x = @x*@theta
95
- hx = x.map {|e| sigmoid(e) }
96
- log_hx = hx.map{|e| Math.log(e) }
97
- log_uno_menos_hx = hx.map{|e| Math.log(1-e) }
98
- costo = -1.0/@m * ( Matrix[@y.to_a] * log_hx + ( Matrix[@y.to_a].map{|i| 1 - i } ) * log_uno_menos_hx )
99
- costo
100
- end
101
-
102
- def matrix(columns)
103
- Matrix.rows(columns, false)
104
- end
105
- end
1
+ module AprendizajeMaquina
2
+ class ClasificacionLogistica
3
+ def initialize(x,y,theta)
4
+ @x = x
5
+ @y = y
6
+ @theta = theta
7
+ if y.is_a? Matrix
8
+ @m = y.row_count
9
+ elsif y.is_a? Vector
10
+ @m = y.size
11
+ else
12
+ @m = y.length
13
+ end
14
+ end
15
+
16
+ def train(iterations,alpha = nil,type_of_train)
17
+ case type_of_train
18
+ when 'Grad' then
19
+ # gradiente de descenso
20
+ @cost_history = []
21
+ for i in 0..iterations
22
+ x = @x * @theta
23
+ hx = x.map { |e| sigmoid(e) }
24
+ @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
25
+ costo = 0
26
+ cost.to_a.map{ |e| costo = e }
27
+ @cost_history << ["iteracion: #{i}",costo]
28
+ end
29
+ @cost_history
30
+ "theta values => #{@theta} | cost => #{costo}"
31
+ when 'Newm' then
32
+ # metodo de newton
33
+ @cost_history = []
34
+ for i in 0..iterations
35
+ x = @x * @theta
36
+ hx = x.map { |e| sigmoid(e) }
37
+ uno_menos_hx = hx.map{ |e| (1-e) }
38
+ escalar = []
39
+ for u in 0...hx.size
40
+ escalar << hx[u] * uno_menos_hx[u]
41
+ end
42
+ gradiente = (1.0/@m) * @x.transpose * (hx - @y)
43
+ hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
44
+ inversa = (1.0/hessian.det) * (hessian.adjugate)
45
+ @theta = @theta - inversa * gradiente
46
+ costo = 0
47
+ cost.to_a.map{ |e| costo = e }
48
+ @cost_history << ["iteracion: #{i}",costo]
49
+ end
50
+ @cost_history
51
+ "theta values => #{@theta} | cost => #{costo}"
52
+ when 'SGD' then
53
+ # Stochastic Gradient Descent
54
+ @cost_history = []
55
+ for i in 0..iterations
56
+ for i in 0..i
57
+ x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
58
+ hx = x.map {|e| sigmoid(e) }
59
+ @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
60
+ costo = 0
61
+ cost.to_a.map{|e| costo = e }
62
+ @cost_history << ["iteracion: #{i}",costo]
63
+ end
64
+ end
65
+ @cost_history
66
+ "theta values => #{@theta} | cost => #{costo}"
67
+ end
68
+ end
69
+
70
+ def predict(x)
71
+ hipo = x * @theta
72
+ var = 0
73
+ hipo.map { |x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
74
+ if sigmoid(var) >= 0.5
75
+ 1
76
+ else
77
+ 0
78
+ end
79
+ end
80
+
81
+ private
82
+
83
+ def sumatoria(array)
84
+ array.inject(0) { |elem1, elem2| elem1 + elem2 }
85
+ end
86
+
87
+ def sigmoid(x)
88
+ 1.0 / (1.0 + Math.exp(-x))
89
+ end
90
+
91
+ def cost
92
+ x = @x*@theta
93
+ hx = x.map { |e| sigmoid(e) }
94
+ log_hx = hx.map{ |e| Math.log(e) }
95
+ log_uno_menos_hx = hx.map{ |e| Math.log(1 - e) }
96
+ costo = -1.0/@m * (Matrix[@y.to_a] * log_hx + (Matrix[@y.to_a].map{ |i| 1 - i }) * log_uno_menos_hx)
97
+ costo
98
+ end
99
+
100
+ def matrix(columns)
101
+ Matrix.rows(columns, false)
102
+ end
103
+ end
106
104
  end
@@ -0,0 +1,96 @@
1
+ module AprendizajeMaquina
2
+ class KmeansClustering
3
+ def initialize(num_of_cluster_centroids,dataset_matrix)
4
+ @num_of_cluster_centroids = num_of_cluster_centroids
5
+ @dataset_matrix = dataset_matrix
6
+ @num_columns = @dataset_matrix.column_count
7
+ @num_rows = @dataset_matrix.row_count
8
+ @cluster_centroids = init_cluster_centroids
9
+ end
10
+
11
+ def fit(iterations)
12
+ clustering(iterations)
13
+ end
14
+
15
+ def cluster(num)
16
+ get("@cluster_#{num}")
17
+ end
18
+
19
+ def predict(vector)
20
+ array = []
21
+ @cluster_centroids.each do |cluster|
22
+ array << (vector-cluster).r
23
+ end
24
+ cluster = array.index(array.min)
25
+ cluster
26
+ end
27
+
28
+ private
29
+
30
+ def array_to_vector(array)
31
+ vector = Vector.elements(array, copy = true)
32
+ vector
33
+ end
34
+
35
+ def media(array)
36
+ if array.empty?
37
+ array#raise ArgumentError.new("array is empty")
38
+ else
39
+ 1.0/array.length * array.inject { |mem, var| mem + var }
40
+ end
41
+ end
42
+
43
+ def init_cluster_centroids
44
+ cluster_centroids = Array.new(@num_of_cluster_centroids) {
45
+ min_max_rand = []
46
+ for i in 0...@num_columns
47
+ min_max_rand << rand(@dataset_matrix.column(i).min..@dataset_matrix.column(i).max)
48
+ end
49
+ array_to_vector(min_max_rand)
50
+ }
51
+ cluster_centroids
52
+ end
53
+
54
+ def set(instance_variable_name,instance_variable_value)
55
+ instance_variable_set(instance_variable_name,instance_variable_value)
56
+ end
57
+
58
+ def get(instance_variable_name)
59
+ instance_variable_get(instance_variable_name)
60
+ end
61
+
62
+ def clustering(iterations)
63
+ iterations.times do
64
+ array2 = []
65
+ for i in 0...@num_rows
66
+ array = []
67
+ @cluster_centroids.each do |cluster|
68
+ array << (@dataset_matrix.row(i)-cluster).r
69
+ end
70
+ array2 << array
71
+ end
72
+
73
+ hash = {}
74
+ for i in 0...@num_rows
75
+ hash[@dataset_matrix.row(i)] = array2[i].index(array2[i].min)
76
+ end
77
+
78
+ @cluster_centroids.each_index do |index|
79
+ set("@cluster_#{index}", Array.new)
80
+ end
81
+
82
+ @cluster_centroids.each_index do |index|
83
+ hash.each do |key,value|
84
+ if value == index
85
+ get("@cluster_#{index}") << key
86
+ end
87
+ end
88
+ end
89
+
90
+ @cluster_centroids.each_index do |index|
91
+ @cluster_centroids[index] = media(get("@cluster_#{index}"))
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,7 +1,7 @@
1
1
  class Matrix
2
2
  def add_ones
3
- matrix = self.to_a.map{|i| i.insert(0,1)}
4
- matrix = Matrix.rows(matrix,copy=true)
3
+ matrix = self.to_a.map{ |i| i.insert(0,1) }
4
+ matrix = Matrix.rows(matrix,copy = true)
5
5
  matrix
6
6
  end
7
7
 
@@ -10,7 +10,7 @@ class Matrix
10
10
  self.column_count.times do |i|
11
11
  array << self.column(i).normalize
12
12
  end
13
- matrix_normal = Matrix.rows(array,copy=true).transpose
13
+ matrix_normal = Matrix.rows(array,copy = true).transpose
14
14
  matrix_normal
15
15
  end
16
16
  end
@@ -1,8 +1,6 @@
1
1
  module AprendizajeMaquina
2
-
3
- class RegresionLineal
4
-
5
- attr_reader :m,:b, :ecuacion, :theta
2
+ class RegresionLineal
3
+ attr_reader :m,:b, :ecuacion, :theta
6
4
 
7
5
  def initialize(x,y)
8
6
  @x = x
@@ -44,7 +42,7 @@ module AprendizajeMaquina
44
42
  raise ArgumentError, "Must be a number or matrix 1xN"
45
43
  end
46
44
  else
47
- return "There is not a equation to make predictions (first, run encontrar_ecuacion method)"
45
+ return "There is not a equation to make predictions (first, run find_ecuation method)"
48
46
  end
49
47
  end
50
48
 
@@ -67,7 +65,7 @@ module AprendizajeMaquina
67
65
  iter = @n - 1
68
66
  xy = []
69
67
  for i in 0..iter
70
- xy << array_1[i]*array_2[i]
68
+ xy << array_1[i] * array_2[i]
71
69
  end
72
70
  xy
73
71
  end