aprendizaje_maquina 0.1.3 → 0.1.4.beta1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +50 -28
- data/aprendizaje_maquina.gemspec +0 -1
- data/examples/clasificacion_logistica_example.rb +16 -0
- data/examples/clustering_data.csv +177 -0
- data/examples/clustering_example.rb +9 -0
- data/examples/data_lg.csv +145 -0
- data/examples/regresion_lineal_example.rb +11 -0
- data/examples/train.csv +700 -0
- data/lib/aprendizaje_maquina.rb +1 -0
- data/lib/aprendizaje_maquina/cargar.rb +42 -43
- data/lib/aprendizaje_maquina/clasificacion_logistica.rb +103 -105
- data/lib/aprendizaje_maquina/clustering.rb +96 -0
- data/lib/aprendizaje_maquina/matrixx.rb +3 -3
- data/lib/aprendizaje_maquina/regresion_lineal.rb +4 -6
- data/lib/aprendizaje_maquina/version.rb +1 -1
- metadata +13 -6
data/lib/aprendizaje_maquina.rb
CHANGED
@@ -3,6 +3,7 @@ require "aprendizaje_maquina/cargar"
|
|
3
3
|
require "aprendizaje_maquina/regresion_lineal"
|
4
4
|
require "aprendizaje_maquina/matrixx"
|
5
5
|
require "aprendizaje_maquina/clasificacion_logistica"
|
6
|
+
require "aprendizaje_maquina/clustering"
|
6
7
|
|
7
8
|
module AprendizajeMaquina
|
8
9
|
#class RedNeuronal
|
@@ -1,49 +1,48 @@
|
|
1
1
|
require 'csv'
|
2
2
|
require 'matrix'
|
3
|
+
|
3
4
|
module AprendizajeMaquina
|
5
|
+
class Cargar
|
6
|
+
def initialize(path_file)
|
7
|
+
@path_file = path_file
|
8
|
+
@csv_data = CSV.read(@path_file)
|
9
|
+
@largo_colum = @csv_data[0].length
|
10
|
+
end
|
4
11
|
|
5
|
-
|
12
|
+
def to_matrix(columnas = nil)
|
13
|
+
if columnas == nil
|
14
|
+
array = @csv_data.map{ |e| e.map{ |o| o.include?(".") ? o.to_f : o.to_i } }
|
15
|
+
matrix = Matrix.rows(array,copy=true)
|
16
|
+
matrix
|
17
|
+
elsif columnas.is_a?(Range)
|
18
|
+
if columnas.last >= @largo_colum
|
19
|
+
raise ArgumentError, "Number of columns don't exist"
|
20
|
+
else
|
21
|
+
array = @csv_data.map{ |e| e[columnas].map{ |i| i.include?(".") ? i.to_f : i.to_i} }
|
22
|
+
matrix = Matrix.rows(array,copy=true)
|
23
|
+
matrix
|
24
|
+
end
|
25
|
+
elsif columnas.is_a?(Integer)
|
26
|
+
if columnas >= @largo_colum
|
27
|
+
raise ArgumentError, "Number of columns don't exist"
|
28
|
+
else
|
29
|
+
array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
|
30
|
+
matrix = Matrix[array].transpose
|
31
|
+
matrix
|
32
|
+
end
|
33
|
+
else
|
34
|
+
raise ArgumentError, "Must be nil, range or integer"
|
35
|
+
end
|
36
|
+
end
|
6
37
|
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
matrix
|
18
|
-
elsif columnas.is_a?(Range)
|
19
|
-
if columnas.last >= @largo_colum
|
20
|
-
raise ArgumentError, "Number of columns don't exist"
|
21
|
-
else
|
22
|
-
array = @csv_data.map{|e| e[columnas].map{|i| i.include?(".") ? i.to_f : i.to_i} }
|
23
|
-
matrix = Matrix.rows(array,copy=true)
|
24
|
-
matrix
|
25
|
-
end
|
26
|
-
elsif columnas.is_a?(Integer)
|
27
|
-
if columnas >= @largo_colum
|
28
|
-
raise ArgumentError, "Number of columns don't exist"
|
29
|
-
else
|
30
|
-
array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
|
31
|
-
matrix = Matrix[array].transpose
|
32
|
-
matrix
|
33
|
-
end
|
34
|
-
else
|
35
|
-
raise ArgumentError, "Must be nil, range or integer"
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
def to_vector(columna)
|
40
|
-
if columna >= @largo_colum
|
41
|
-
raise ArgumentError, "Column don't exist"
|
42
|
-
else
|
43
|
-
array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
|
44
|
-
vector = Vector.elements(array,copy=true)
|
45
|
-
vector
|
46
|
-
end
|
47
|
-
end
|
48
|
-
end
|
38
|
+
def to_vector(columna)
|
39
|
+
if columna >= @largo_colum
|
40
|
+
raise ArgumentError, "Column don't exist"
|
41
|
+
else
|
42
|
+
array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
|
43
|
+
vector = Vector.elements(array,copy = true)
|
44
|
+
vector
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
49
48
|
end
|
@@ -1,106 +1,104 @@
|
|
1
|
-
module AprendizajeMaquina
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
end
|
105
|
-
end
|
1
|
+
module AprendizajeMaquina
|
2
|
+
class ClasificacionLogistica
|
3
|
+
def initialize(x,y,theta)
|
4
|
+
@x = x
|
5
|
+
@y = y
|
6
|
+
@theta = theta
|
7
|
+
if y.is_a? Matrix
|
8
|
+
@m = y.row_count
|
9
|
+
elsif y.is_a? Vector
|
10
|
+
@m = y.size
|
11
|
+
else
|
12
|
+
@m = y.length
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
def train(iterations,alpha = nil,type_of_train)
|
17
|
+
case type_of_train
|
18
|
+
when 'Grad' then
|
19
|
+
# gradiente de descenso
|
20
|
+
@cost_history = []
|
21
|
+
for i in 0..iterations
|
22
|
+
x = @x * @theta
|
23
|
+
hx = x.map { |e| sigmoid(e) }
|
24
|
+
@theta = @theta - alpha / @m * @x.transpose * (hx - @y)
|
25
|
+
costo = 0
|
26
|
+
cost.to_a.map{ |e| costo = e }
|
27
|
+
@cost_history << ["iteracion: #{i}",costo]
|
28
|
+
end
|
29
|
+
@cost_history
|
30
|
+
"theta values => #{@theta} | cost => #{costo}"
|
31
|
+
when 'Newm' then
|
32
|
+
# metodo de newton
|
33
|
+
@cost_history = []
|
34
|
+
for i in 0..iterations
|
35
|
+
x = @x * @theta
|
36
|
+
hx = x.map { |e| sigmoid(e) }
|
37
|
+
uno_menos_hx = hx.map{ |e| (1-e) }
|
38
|
+
escalar = []
|
39
|
+
for u in 0...hx.size
|
40
|
+
escalar << hx[u] * uno_menos_hx[u]
|
41
|
+
end
|
42
|
+
gradiente = (1.0/@m) * @x.transpose * (hx - @y)
|
43
|
+
hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
|
44
|
+
inversa = (1.0/hessian.det) * (hessian.adjugate)
|
45
|
+
@theta = @theta - inversa * gradiente
|
46
|
+
costo = 0
|
47
|
+
cost.to_a.map{ |e| costo = e }
|
48
|
+
@cost_history << ["iteracion: #{i}",costo]
|
49
|
+
end
|
50
|
+
@cost_history
|
51
|
+
"theta values => #{@theta} | cost => #{costo}"
|
52
|
+
when 'SGD' then
|
53
|
+
# Stochastic Gradient Descent
|
54
|
+
@cost_history = []
|
55
|
+
for i in 0..iterations
|
56
|
+
for i in 0..i
|
57
|
+
x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
|
58
|
+
hx = x.map {|e| sigmoid(e) }
|
59
|
+
@theta = @theta - alpha / @m * @x.transpose * (hx - @y)
|
60
|
+
costo = 0
|
61
|
+
cost.to_a.map{|e| costo = e }
|
62
|
+
@cost_history << ["iteracion: #{i}",costo]
|
63
|
+
end
|
64
|
+
end
|
65
|
+
@cost_history
|
66
|
+
"theta values => #{@theta} | cost => #{costo}"
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def predict(x)
|
71
|
+
hipo = x * @theta
|
72
|
+
var = 0
|
73
|
+
hipo.map { |x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
|
74
|
+
if sigmoid(var) >= 0.5
|
75
|
+
1
|
76
|
+
else
|
77
|
+
0
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
private
|
82
|
+
|
83
|
+
def sumatoria(array)
|
84
|
+
array.inject(0) { |elem1, elem2| elem1 + elem2 }
|
85
|
+
end
|
86
|
+
|
87
|
+
def sigmoid(x)
|
88
|
+
1.0 / (1.0 + Math.exp(-x))
|
89
|
+
end
|
90
|
+
|
91
|
+
def cost
|
92
|
+
x = @x*@theta
|
93
|
+
hx = x.map { |e| sigmoid(e) }
|
94
|
+
log_hx = hx.map{ |e| Math.log(e) }
|
95
|
+
log_uno_menos_hx = hx.map{ |e| Math.log(1 - e) }
|
96
|
+
costo = -1.0/@m * (Matrix[@y.to_a] * log_hx + (Matrix[@y.to_a].map{ |i| 1 - i }) * log_uno_menos_hx)
|
97
|
+
costo
|
98
|
+
end
|
99
|
+
|
100
|
+
def matrix(columns)
|
101
|
+
Matrix.rows(columns, false)
|
102
|
+
end
|
103
|
+
end
|
106
104
|
end
|
@@ -0,0 +1,96 @@
|
|
1
|
+
module AprendizajeMaquina
|
2
|
+
class KmeansClustering
|
3
|
+
def initialize(num_of_cluster_centroids,dataset_matrix)
|
4
|
+
@num_of_cluster_centroids = num_of_cluster_centroids
|
5
|
+
@dataset_matrix = dataset_matrix
|
6
|
+
@num_columns = @dataset_matrix.column_count
|
7
|
+
@num_rows = @dataset_matrix.row_count
|
8
|
+
@cluster_centroids = init_cluster_centroids
|
9
|
+
end
|
10
|
+
|
11
|
+
def fit(iterations)
|
12
|
+
clustering(iterations)
|
13
|
+
end
|
14
|
+
|
15
|
+
def cluster(num)
|
16
|
+
get("@cluster_#{num}")
|
17
|
+
end
|
18
|
+
|
19
|
+
def predict(vector)
|
20
|
+
array = []
|
21
|
+
@cluster_centroids.each do |cluster|
|
22
|
+
array << (vector-cluster).r
|
23
|
+
end
|
24
|
+
cluster = array.index(array.min)
|
25
|
+
cluster
|
26
|
+
end
|
27
|
+
|
28
|
+
private
|
29
|
+
|
30
|
+
def array_to_vector(array)
|
31
|
+
vector = Vector.elements(array, copy = true)
|
32
|
+
vector
|
33
|
+
end
|
34
|
+
|
35
|
+
def media(array)
|
36
|
+
if array.empty?
|
37
|
+
array#raise ArgumentError.new("array is empty")
|
38
|
+
else
|
39
|
+
1.0/array.length * array.inject { |mem, var| mem + var }
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def init_cluster_centroids
|
44
|
+
cluster_centroids = Array.new(@num_of_cluster_centroids) {
|
45
|
+
min_max_rand = []
|
46
|
+
for i in 0...@num_columns
|
47
|
+
min_max_rand << rand(@dataset_matrix.column(i).min..@dataset_matrix.column(i).max)
|
48
|
+
end
|
49
|
+
array_to_vector(min_max_rand)
|
50
|
+
}
|
51
|
+
cluster_centroids
|
52
|
+
end
|
53
|
+
|
54
|
+
def set(instance_variable_name,instance_variable_value)
|
55
|
+
instance_variable_set(instance_variable_name,instance_variable_value)
|
56
|
+
end
|
57
|
+
|
58
|
+
def get(instance_variable_name)
|
59
|
+
instance_variable_get(instance_variable_name)
|
60
|
+
end
|
61
|
+
|
62
|
+
def clustering(iterations)
|
63
|
+
iterations.times do
|
64
|
+
array2 = []
|
65
|
+
for i in 0...@num_rows
|
66
|
+
array = []
|
67
|
+
@cluster_centroids.each do |cluster|
|
68
|
+
array << (@dataset_matrix.row(i)-cluster).r
|
69
|
+
end
|
70
|
+
array2 << array
|
71
|
+
end
|
72
|
+
|
73
|
+
hash = {}
|
74
|
+
for i in 0...@num_rows
|
75
|
+
hash[@dataset_matrix.row(i)] = array2[i].index(array2[i].min)
|
76
|
+
end
|
77
|
+
|
78
|
+
@cluster_centroids.each_index do |index|
|
79
|
+
set("@cluster_#{index}", Array.new)
|
80
|
+
end
|
81
|
+
|
82
|
+
@cluster_centroids.each_index do |index|
|
83
|
+
hash.each do |key,value|
|
84
|
+
if value == index
|
85
|
+
get("@cluster_#{index}") << key
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
@cluster_centroids.each_index do |index|
|
91
|
+
@cluster_centroids[index] = media(get("@cluster_#{index}"))
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
end
|
@@ -1,7 +1,7 @@
|
|
1
1
|
class Matrix
|
2
2
|
def add_ones
|
3
|
-
matrix = self.to_a.map{|i| i.insert(0,1)}
|
4
|
-
matrix = Matrix.rows(matrix,copy=true)
|
3
|
+
matrix = self.to_a.map{ |i| i.insert(0,1) }
|
4
|
+
matrix = Matrix.rows(matrix,copy = true)
|
5
5
|
matrix
|
6
6
|
end
|
7
7
|
|
@@ -10,7 +10,7 @@ class Matrix
|
|
10
10
|
self.column_count.times do |i|
|
11
11
|
array << self.column(i).normalize
|
12
12
|
end
|
13
|
-
matrix_normal = Matrix.rows(array,copy=true).transpose
|
13
|
+
matrix_normal = Matrix.rows(array,copy = true).transpose
|
14
14
|
matrix_normal
|
15
15
|
end
|
16
16
|
end
|
@@ -1,8 +1,6 @@
|
|
1
1
|
module AprendizajeMaquina
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
attr_reader :m,:b, :ecuacion, :theta
|
2
|
+
class RegresionLineal
|
3
|
+
attr_reader :m,:b, :ecuacion, :theta
|
6
4
|
|
7
5
|
def initialize(x,y)
|
8
6
|
@x = x
|
@@ -44,7 +42,7 @@ module AprendizajeMaquina
|
|
44
42
|
raise ArgumentError, "Must be a number or matrix 1xN"
|
45
43
|
end
|
46
44
|
else
|
47
|
-
return "There is not a equation to make predictions (first, run
|
45
|
+
return "There is not a equation to make predictions (first, run find_ecuation method)"
|
48
46
|
end
|
49
47
|
end
|
50
48
|
|
@@ -67,7 +65,7 @@ module AprendizajeMaquina
|
|
67
65
|
iter = @n - 1
|
68
66
|
xy = []
|
69
67
|
for i in 0..iter
|
70
|
-
xy << array_1[i]*array_2[i]
|
68
|
+
xy << array_1[i] * array_2[i]
|
71
69
|
end
|
72
70
|
xy
|
73
71
|
end
|