aprendizaje_maquina 0.1.2 → 0.1.3a

Sign up to get free protection for your applications and to get access to all the features.
@@ -2,14 +2,12 @@ require "aprendizaje_maquina/version"
2
2
  require "aprendizaje_maquina/cargar"
3
3
  require "aprendizaje_maquina/regresion_lineal"
4
4
  require "aprendizaje_maquina/matrixx"
5
+ require "aprendizaje_maquina/clasificacion_logistica"
6
+ require "aprendizaje_maquina/clustering"
5
7
 
6
8
  module AprendizajeMaquina
7
- #class RegresionLogistica
8
- # Coming soon...
9
- #end
10
-
11
9
  #class RedNeuronal
12
- # Coming soon...
10
+ # Coming soon...
13
11
  #end
14
12
 
15
13
  #class ArbolDecision
@@ -1,46 +1,48 @@
1
1
  require 'csv'
2
2
  require 'matrix'
3
+
3
4
  module AprendizajeMaquina
5
+ class Cargar
6
+ def initialize(path_file)
7
+ @path_file = path_file
8
+ @csv_data = CSV.read(@path_file)
9
+ @largo_colum = @csv_data[0].length
10
+ end
4
11
 
5
- class Cargar
12
+ def to_matrix(columnas = nil)
13
+ if columnas == nil
14
+ array = @csv_data.map{ |e| e.map{ |o| o.include?(".") ? o.to_f : o.to_i } }
15
+ matrix = Matrix.rows(array,copy=true)
16
+ matrix
17
+ elsif columnas.is_a?(Range)
18
+ if columnas.last >= @largo_colum
19
+ raise ArgumentError, "Number of columns don't exist"
20
+ else
21
+ array = @csv_data.map{ |e| e[columnas].map{ |i| i.include?(".") ? i.to_f : i.to_i} }
22
+ matrix = Matrix.rows(array,copy=true)
23
+ matrix
24
+ end
25
+ elsif columnas.is_a?(Integer)
26
+ if columnas >= @largo_colum
27
+ raise ArgumentError, "Number of columns don't exist"
28
+ else
29
+ array = @csv_data.map { |e| e[columnas].include?(".") ? e[columnas].to_f : e[columnas].to_i }
30
+ matrix = Matrix[array].transpose
31
+ matrix
32
+ end
33
+ else
34
+ raise ArgumentError, "Must be nil, range or integer"
35
+ end
36
+ end
6
37
 
7
- def initialize(path_file)
8
- @path_file = path_file
9
- @csv_data = CSV.read(@path_file)
10
- #@csv_data = CSV.read(File.join(Dir.pwd,@path_file))
11
- @largo_colum = @csv_data[0].length
12
- end
13
-
14
- def to_matrix(columnas = nil)
15
- if columnas == nil
16
- array = @csv_data.map{|e| e.map{|o| o.to_i } }
17
- matrix = Matrix.rows(array,copy=true)
18
- matrix
19
- elsif columnas.is_a?(Range)
20
- if columnas.last >= @largo_colum
21
- raise ArgumentError, "Number of columns don't exist"
22
- else
23
- array = @csv_data.map{|e| e[columnas].map{|i| i.to_i} }
24
- matrix = Matrix.rows(array,copy=true)
25
- matrix
26
- end
27
- elsif columnas.is_a?(Integer)
28
- array = @csv_data.map { |e| e[columnas].to_i }
29
- matrix = Matrix[array].transpose
30
- matrix
31
- else
32
- raise ArgumentError, "Must be nil, range or integer"
33
- end
34
- end
35
-
36
- def to_vector(columna)
37
- if columna >= @largo_colum
38
- raise ArgumentError, "Column don't exist"
39
- else
40
- array = @csv_data.map { |e| e[columna].to_i }
41
- vector = Vector.elements(array,copy=true)
42
- vector
43
- end
44
- end
45
- end
38
+ def to_vector(columna)
39
+ if columna >= @largo_colum
40
+ raise ArgumentError, "Column don't exist"
41
+ else
42
+ array = @csv_data.map { |e| e[columna].include?(".") ? e[columna].to_f : e[columna].to_i }
43
+ vector = Vector.elements(array,copy = true)
44
+ vector
45
+ end
46
+ end
47
+ end
46
48
  end
@@ -0,0 +1,104 @@
1
+ module AprendizajeMaquina
2
+ class ClasificacionLogistica
3
+ def initialize(x,y,theta)
4
+ @x = x
5
+ @y = y
6
+ @theta = theta
7
+ if y.is_a? Matrix
8
+ @m = y.row_count
9
+ elsif y.is_a? Vector
10
+ @m = y.size
11
+ else
12
+ @m = y.length
13
+ end
14
+ end
15
+
16
+ def train(iterations,alpha = nil,type_of_train)
17
+ case type_of_train
18
+ when 'Grad' then
19
+ # gradiente de descenso
20
+ @cost_history = []
21
+ for i in 0..iterations
22
+ x = @x * @theta
23
+ hx = x.map { |e| sigmoid(e) }
24
+ @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
25
+ costo = 0
26
+ cost.to_a.map{ |e| costo = e }
27
+ @cost_history << ["iteracion: #{i}",costo]
28
+ end
29
+ @cost_history
30
+ "theta values => #{@theta} | cost => #{costo}"
31
+ when 'Newm' then
32
+ # metodo de newton
33
+ @cost_history = []
34
+ for i in 0..iterations
35
+ x = @x * @theta
36
+ hx = x.map { |e| sigmoid(e) }
37
+ uno_menos_hx = hx.map{ |e| (1-e) }
38
+ escalar = []
39
+ for u in 0...hx.size
40
+ escalar << hx[u] * uno_menos_hx[u]
41
+ end
42
+ gradiente = (1.0/@m) * @x.transpose * (hx - @y)
43
+ hessian = (1.0/@m) * @x.transpose * sumatoria(escalar) * @x
44
+ inversa = (1.0/hessian.det) * (hessian.adjugate)
45
+ @theta = @theta - inversa * gradiente
46
+ costo = 0
47
+ cost.to_a.map{ |e| costo = e }
48
+ @cost_history << ["iteracion: #{i}",costo]
49
+ end
50
+ @cost_history
51
+ "theta values => #{@theta} | cost => #{costo}"
52
+ when 'SGD' then
53
+ # Stochastic Gradient Descent
54
+ @cost_history = []
55
+ for i in 0..iterations
56
+ for i in 0..i
57
+ x = matrix(@x.to_a.map{|e| e.shuffle })*@theta
58
+ hx = x.map {|e| sigmoid(e) }
59
+ @theta = @theta - alpha / @m * @x.transpose * (hx - @y)
60
+ costo = 0
61
+ cost.to_a.map{|e| costo = e }
62
+ @cost_history << ["iteracion: #{i}",costo]
63
+ end
64
+ end
65
+ @cost_history
66
+ "theta values => #{@theta} | cost => #{costo}"
67
+ end
68
+ end
69
+
70
+ def predict(x)
71
+ hipo = x * @theta
72
+ var = 0
73
+ hipo.map { |x| var = x.is_a?(Integer) ? x.to_i : x.to_f }
74
+ if sigmoid(var) >= 0.5
75
+ 1
76
+ else
77
+ 0
78
+ end
79
+ end
80
+
81
+ private
82
+
83
+ def sumatoria(array)
84
+ array.inject(0) { |elem1, elem2| elem1 + elem2 }
85
+ end
86
+
87
+ def sigmoid(x)
88
+ 1.0 / (1.0 + Math.exp(-x))
89
+ end
90
+
91
+ def cost
92
+ x = @x*@theta
93
+ hx = x.map { |e| sigmoid(e) }
94
+ log_hx = hx.map{ |e| Math.log(e) }
95
+ log_uno_menos_hx = hx.map{ |e| Math.log(1 - e) }
96
+ costo = -1.0/@m * (Matrix[@y.to_a] * log_hx + (Matrix[@y.to_a].map{ |i| 1 - i }) * log_uno_menos_hx)
97
+ costo
98
+ end
99
+
100
+ def matrix(columns)
101
+ Matrix.rows(columns, false)
102
+ end
103
+ end
104
+ end
@@ -0,0 +1,96 @@
1
+ module AprendizajeMaquina
2
+ class KmeansClustering
3
+ def initialize(num_of_cluster_centroids,dataset_matrix)
4
+ @num_of_cluster_centroids = num_of_cluster_centroids
5
+ @dataset_matrix = dataset_matrix
6
+ @num_columns = @dataset_matrix.column_count
7
+ @num_rows = @dataset_matrix.row_count
8
+ @cluster_centroids = init_cluster_centroids
9
+ end
10
+
11
+ def fit(iterations)
12
+ clustering(iterations)
13
+ end
14
+
15
+ def cluster(num)
16
+ get("@cluster_#{num}")
17
+ end
18
+
19
+ def predict(vector)
20
+ array = []
21
+ @cluster_centroids.each do |cluster|
22
+ array << (vector-cluster).r
23
+ end
24
+ cluster = array.index(array.min)
25
+ cluster
26
+ end
27
+
28
+ private
29
+
30
+ def array_to_vector(array)
31
+ vector = Vector.elements(array, copy = true)
32
+ vector
33
+ end
34
+
35
+ def media(array)
36
+ if array.empty?
37
+ array#raise ArgumentError.new("array is empty")
38
+ else
39
+ 1.0/array.length * array.inject { |mem, var| mem + var }
40
+ end
41
+ end
42
+
43
+ def init_cluster_centroids
44
+ cluster_centroids = Array.new(@num_of_cluster_centroids) {
45
+ min_max_rand = []
46
+ for i in 0...@num_columns
47
+ min_max_rand << rand(@dataset_matrix.column(i).min..@dataset_matrix.column(i).max)
48
+ end
49
+ array_to_vector(min_max_rand)
50
+ }
51
+ cluster_centroids
52
+ end
53
+
54
+ def set(instance_variable_name,instance_variable_value)
55
+ instance_variable_set(instance_variable_name,instance_variable_value)
56
+ end
57
+
58
+ def get(instance_variable_name)
59
+ instance_variable_get(instance_variable_name)
60
+ end
61
+
62
+ def clustering(iterations)
63
+ iterations.times do
64
+ array2 = []
65
+ for i in 0...@num_rows
66
+ array = []
67
+ @cluster_centroids.each do |cluster|
68
+ array << (@dataset_matrix.row(i)-cluster).r
69
+ end
70
+ array2 << array
71
+ end
72
+
73
+ hash = {}
74
+ for i in 0...@num_rows
75
+ hash[@dataset_matrix.row(i)] = array2[i].index(array2[i].min)
76
+ end
77
+
78
+ @cluster_centroids.each_index do |index|
79
+ set("@cluster_#{index}", Array.new)
80
+ end
81
+
82
+ @cluster_centroids.each_index do |index|
83
+ hash.each do |key,value|
84
+ if value == index
85
+ get("@cluster_#{index}") << key
86
+ end
87
+ end
88
+ end
89
+
90
+ @cluster_centroids.each_index do |index|
91
+ @cluster_centroids[index] = media(get("@cluster_#{index}"))
92
+ end
93
+ end
94
+ end
95
+ end
96
+ end
@@ -1,7 +1,7 @@
1
1
  class Matrix
2
2
  def add_ones
3
- matrix = self.to_a.map{|i| i.insert(0,1)}
4
- matrix = Matrix.rows(matrix,copy=true)
3
+ matrix = self.to_a.map{ |i| i.insert(0,1) }
4
+ matrix = Matrix.rows(matrix,copy = true)
5
5
  matrix
6
6
  end
7
7
 
@@ -10,7 +10,7 @@ class Matrix
10
10
  self.column_count.times do |i|
11
11
  array << self.column(i).normalize
12
12
  end
13
- matrix_normal = Matrix.rows(array,copy=true).transpose
13
+ matrix_normal = Matrix.rows(array,copy = true).transpose
14
14
  matrix_normal
15
15
  end
16
16
  end
@@ -1,8 +1,6 @@
1
1
  module AprendizajeMaquina
2
-
3
- class RegresionLineal
4
-
5
- attr_reader :m,:b, :ecuacion, :theta
2
+ class RegresionLineal
3
+ attr_reader :m,:b, :ecuacion, :theta
6
4
 
7
5
  def initialize(x,y)
8
6
  @x = x
@@ -44,7 +42,7 @@ module AprendizajeMaquina
44
42
  raise ArgumentError, "Must be a number or matrix 1xN"
45
43
  end
46
44
  else
47
- return "There is not a equation to make predictions (first, run encontrar_ecuacion method)"
45
+ return "There is not a equation to make predictions (first, run find_ecuation method)"
48
46
  end
49
47
  end
50
48
 
@@ -67,7 +65,7 @@ module AprendizajeMaquina
67
65
  iter = @n - 1
68
66
  xy = []
69
67
  for i in 0..iter
70
- xy << array_1[i]*array_2[i]
68
+ xy << array_1[i] * array_2[i]
71
69
  end
72
70
  xy
73
71
  end
@@ -1,3 +1,3 @@
1
1
  module AprendizajeMaquina
2
- VERSION = "0.1.2"
2
+ VERSION = "0.1.3a"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aprendizaje_maquina
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.2
4
+ version: 0.1.3a
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erickson Morales
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2017-11-12 00:00:00.000000000 Z
11
+ date: 2018-05-11 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -72,12 +72,20 @@ files:
72
72
  - aprendizaje_maquina.gemspec
73
73
  - bin/console
74
74
  - bin/setup
75
+ - examples/clasificacion_logistica_example.rb
76
+ - examples/clustering_data.csv
77
+ - examples/clustering_example.rb
78
+ - examples/data_lg.csv
79
+ - examples/regresion_lineal_example.rb
80
+ - examples/train.csv
75
81
  - lib/aprendizaje_maquina.rb
76
82
  - lib/aprendizaje_maquina/cargar.rb
83
+ - lib/aprendizaje_maquina/clasificacion_logistica.rb
84
+ - lib/aprendizaje_maquina/clustering.rb
77
85
  - lib/aprendizaje_maquina/matrixx.rb
78
86
  - lib/aprendizaje_maquina/regresion_lineal.rb
79
87
  - lib/aprendizaje_maquina/version.rb
80
- homepage: ''
88
+ homepage:
81
89
  licenses:
82
90
  - MIT
83
91
  metadata: {}
@@ -92,12 +100,12 @@ required_ruby_version: !ruby/object:Gem::Requirement
92
100
  version: '0'
93
101
  required_rubygems_version: !ruby/object:Gem::Requirement
94
102
  requirements:
95
- - - ">="
103
+ - - ">"
96
104
  - !ruby/object:Gem::Version
97
- version: '0'
105
+ version: 1.3.1
98
106
  requirements: []
99
107
  rubyforge_project:
100
- rubygems_version: 2.5.2
108
+ rubygems_version: 2.6.14
101
109
  signing_key:
102
110
  specification_version: 4
103
111
  summary: Machine learning gem / Una gema para el aprendizaje de maquinas.