aprendizaje_maquina 0.1.3a → 0.1.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +28 -50
- data/aprendizaje_maquina.gemspec +1 -0
- data/lib/aprendizaje_maquina.rb +0 -1
- data/lib/aprendizaje_maquina/cargar.rb +43 -42
- data/lib/aprendizaje_maquina/clasificacion_logistica.rb +105 -103
- data/lib/aprendizaje_maquina/matrixx.rb +3 -3
- data/lib/aprendizaje_maquina/regresion_lineal.rb +6 -4
- data/lib/aprendizaje_maquina/version.rb +1 -1
- metadata +6 -13
- data/examples/clasificacion_logistica_example.rb +0 -16
- data/examples/clustering_data.csv +0 -177
- data/examples/clustering_example.rb +0 -9
- data/examples/data_lg.csv +0 -145
- data/examples/regresion_lineal_example.rb +0 -11
- data/examples/train.csv +0 -700
- data/lib/aprendizaje_maquina/clustering.rb +0 -96
@@ -1,96 +0,0 @@
|
|
1
|
-
module AprendizajeMaquina
|
2
|
-
class KmeansClustering
|
3
|
-
def initialize(num_of_cluster_centroids,dataset_matrix)
|
4
|
-
@num_of_cluster_centroids = num_of_cluster_centroids
|
5
|
-
@dataset_matrix = dataset_matrix
|
6
|
-
@num_columns = @dataset_matrix.column_count
|
7
|
-
@num_rows = @dataset_matrix.row_count
|
8
|
-
@cluster_centroids = init_cluster_centroids
|
9
|
-
end
|
10
|
-
|
11
|
-
def fit(iterations)
|
12
|
-
clustering(iterations)
|
13
|
-
end
|
14
|
-
|
15
|
-
def cluster(num)
|
16
|
-
get("@cluster_#{num}")
|
17
|
-
end
|
18
|
-
|
19
|
-
def predict(vector)
|
20
|
-
array = []
|
21
|
-
@cluster_centroids.each do |cluster|
|
22
|
-
array << (vector-cluster).r
|
23
|
-
end
|
24
|
-
cluster = array.index(array.min)
|
25
|
-
cluster
|
26
|
-
end
|
27
|
-
|
28
|
-
private
|
29
|
-
|
30
|
-
def array_to_vector(array)
|
31
|
-
vector = Vector.elements(array, copy = true)
|
32
|
-
vector
|
33
|
-
end
|
34
|
-
|
35
|
-
def media(array)
|
36
|
-
if array.empty?
|
37
|
-
array#raise ArgumentError.new("array is empty")
|
38
|
-
else
|
39
|
-
1.0/array.length * array.inject { |mem, var| mem + var }
|
40
|
-
end
|
41
|
-
end
|
42
|
-
|
43
|
-
def init_cluster_centroids
|
44
|
-
cluster_centroids = Array.new(@num_of_cluster_centroids) {
|
45
|
-
min_max_rand = []
|
46
|
-
for i in 0...@num_columns
|
47
|
-
min_max_rand << rand(@dataset_matrix.column(i).min..@dataset_matrix.column(i).max)
|
48
|
-
end
|
49
|
-
array_to_vector(min_max_rand)
|
50
|
-
}
|
51
|
-
cluster_centroids
|
52
|
-
end
|
53
|
-
|
54
|
-
def set(instance_variable_name,instance_variable_value)
|
55
|
-
instance_variable_set(instance_variable_name,instance_variable_value)
|
56
|
-
end
|
57
|
-
|
58
|
-
def get(instance_variable_name)
|
59
|
-
instance_variable_get(instance_variable_name)
|
60
|
-
end
|
61
|
-
|
62
|
-
def clustering(iterations)
|
63
|
-
iterations.times do
|
64
|
-
array2 = []
|
65
|
-
for i in 0...@num_rows
|
66
|
-
array = []
|
67
|
-
@cluster_centroids.each do |cluster|
|
68
|
-
array << (@dataset_matrix.row(i)-cluster).r
|
69
|
-
end
|
70
|
-
array2 << array
|
71
|
-
end
|
72
|
-
|
73
|
-
hash = {}
|
74
|
-
for i in 0...@num_rows
|
75
|
-
hash[@dataset_matrix.row(i)] = array2[i].index(array2[i].min)
|
76
|
-
end
|
77
|
-
|
78
|
-
@cluster_centroids.each_index do |index|
|
79
|
-
set("@cluster_#{index}", Array.new)
|
80
|
-
end
|
81
|
-
|
82
|
-
@cluster_centroids.each_index do |index|
|
83
|
-
hash.each do |key,value|
|
84
|
-
if value == index
|
85
|
-
get("@cluster_#{index}") << key
|
86
|
-
end
|
87
|
-
end
|
88
|
-
end
|
89
|
-
|
90
|
-
@cluster_centroids.each_index do |index|
|
91
|
-
@cluster_centroids[index] = media(get("@cluster_#{index}"))
|
92
|
-
end
|
93
|
-
end
|
94
|
-
end
|
95
|
-
end
|
96
|
-
end
|