aprendizaje_maquina 0.1.4.beta1 → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +9 -0
- data/examples/decision_tree_example.rb +26 -0
- data/lib/aprendizaje_maquina.rb +1 -4
- data/lib/aprendizaje_maquina/decision_tree.rb +130 -0
- data/lib/aprendizaje_maquina/version.rb +1 -1
- metadata +6 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d7e2c5b8ffec0964f22044e3adc1891c19f242dc
|
4
|
+
data.tar.gz: c7698caf79f6506b2de12e63ba21b1943abaaa0f
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d8e516d6233105d702bf3f021d6c6da7cf90a8f42772c85d0a201ec2037c336c63953860d195a9135277379a58374c89b57e1c6d5243faa7c10dbb27bcf7a1f8
|
7
|
+
data.tar.gz: '098e44d8ffcf022fa88d78b6ec5a2667582278688d50d8fb46fb1d55cee6c94088f747e53758edd9ed6c639111aceacfd9b59f5d9d402a3aa98e48ee1f8efee2'
|
data/README.md
CHANGED
@@ -135,6 +135,15 @@ make predictions for multiclass(one vs all)
|
|
135
135
|
# Predict the closest cluster
|
136
136
|
p clustering.predict(Vector[63,190])
|
137
137
|
|
138
|
+
## Decision tree
|
139
|
+
|
140
|
+
tree = AprendizajeMaquina::DecisionTree.new(dataset)
|
141
|
+
|
142
|
+
print tree.display_tree
|
143
|
+
|
144
|
+
puts tree.predict(datatest)
|
145
|
+
|
146
|
+
|
138
147
|
## License
|
139
148
|
|
140
149
|
The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
@@ -0,0 +1,26 @@
|
|
1
|
+
require 'aprendizaje_maquina'
|
2
|
+
|
3
|
+
my_data = [['slashdot','USA','yes',18,'None'],
|
4
|
+
['google','France','yes',23,'Premium'],
|
5
|
+
['digg','USA','yes',24,'Basic'],
|
6
|
+
['kiwitobes','France','yes',23,'Basic'],
|
7
|
+
['google','UK','no',21,'Premium'],
|
8
|
+
['(direct)','New Zealand','no',12,'None'],
|
9
|
+
['(direct)','UK','no',21,'Basic'],
|
10
|
+
['google','USA','no',24,'Premium'],
|
11
|
+
['slashdot','France','yes',19,'None'],
|
12
|
+
['digg','USA','no',18,'None'],
|
13
|
+
['google','UK','no',18,'None'],
|
14
|
+
['kiwitobes','UK','no',19,'None'],
|
15
|
+
['digg','New Zealand','yes',12,'Basic'],
|
16
|
+
['slashdot','UK','no',21,'None'],
|
17
|
+
['google','UK','yes',18,'Basic'],
|
18
|
+
['kiwitobes','France','yes',19,'Basic']]
|
19
|
+
|
20
|
+
tree = AprendizajeMaquina::DecisionTree.new(my_data)
|
21
|
+
|
22
|
+
print tree.display_tree
|
23
|
+
|
24
|
+
test_data = ['(direct)','USA','yes',5]
|
25
|
+
|
26
|
+
p tree.predict(test_data)
|
data/lib/aprendizaje_maquina.rb
CHANGED
@@ -4,13 +4,10 @@ require "aprendizaje_maquina/regresion_lineal"
|
|
4
4
|
require "aprendizaje_maquina/matrixx"
|
5
5
|
require "aprendizaje_maquina/clasificacion_logistica"
|
6
6
|
require "aprendizaje_maquina/clustering"
|
7
|
+
require "aprendizaje_maquina/decision_tree"
|
7
8
|
|
8
9
|
module AprendizajeMaquina
|
9
10
|
#class RedNeuronal
|
10
11
|
# Coming soon...
|
11
12
|
#end
|
12
|
-
|
13
|
-
#class ArbolDecision
|
14
|
-
# Coming soon...
|
15
|
-
#end
|
16
13
|
end
|
@@ -0,0 +1,130 @@
|
|
1
|
+
module AprendizajeMaquina
|
2
|
+
class DecisionTree
|
3
|
+
def initialize(dataset)
|
4
|
+
@dataset = dataset
|
5
|
+
end
|
6
|
+
|
7
|
+
def display_tree
|
8
|
+
node_root = build_tree(@dataset)
|
9
|
+
colection = [node_root]
|
10
|
+
branches = []
|
11
|
+
tree = "root --> #{node_root[1][0]}:#{node_root[1][1]}?\n"
|
12
|
+
for node in 0...node_root[2].length
|
13
|
+
branches << build_tree(node_root[2][node])
|
14
|
+
colection << branches
|
15
|
+
1000.times do
|
16
|
+
subbranches = []
|
17
|
+
true_or_false = lambda { |node| node == 0 ? true : false }
|
18
|
+
branches.each do |branch|
|
19
|
+
if branch.is_a?(Array)
|
20
|
+
tree << "#{true_or_false.call(node)} --> "+"#{branch[1][0]}:#{branch[1][1]}?\n"
|
21
|
+
for node in 0...branch[2].length
|
22
|
+
if build_tree(branch[2][node]).is_a? Hash
|
23
|
+
tree << "#{true_or_false.call(node)} --> "+"#{build_tree(branch[2][node])}\n"
|
24
|
+
else
|
25
|
+
subbranches << build_tree(branch[2][node])
|
26
|
+
end
|
27
|
+
end
|
28
|
+
elsif branch.is_a?(Hash)
|
29
|
+
tree << "#{true_or_false.call(node)} --> "+"#{branch}\n"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
branches = subbranches
|
33
|
+
colection << branches
|
34
|
+
if colection.last.empty?
|
35
|
+
colection.pop
|
36
|
+
break
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
40
|
+
return tree
|
41
|
+
end
|
42
|
+
|
43
|
+
def predict(observation)
|
44
|
+
node_root = build_tree(@dataset)
|
45
|
+
until node_root.is_a?(Hash)
|
46
|
+
if observation[node_root[1][0]].is_a?(Integer) or observation[node_root[1][0]].is_a?(Float)
|
47
|
+
if observation[node_root[1][0]] >= node_root[1][1]
|
48
|
+
branch = build_tree(node_root[2][0])
|
49
|
+
else
|
50
|
+
branch = build_tree(node_root[2][1])
|
51
|
+
end
|
52
|
+
else
|
53
|
+
if observation[node_root[1][0]] == node_root[1][1]
|
54
|
+
branch = build_tree(node_root[2][0])
|
55
|
+
else
|
56
|
+
branch = build_tree(node_root[2][1])
|
57
|
+
end
|
58
|
+
end
|
59
|
+
node_root = branch
|
60
|
+
end
|
61
|
+
return node_root
|
62
|
+
end
|
63
|
+
|
64
|
+
private
|
65
|
+
|
66
|
+
def split_dataset(dataset, column, value)
|
67
|
+
if value.is_a? Integer or value.is_a? Float
|
68
|
+
split_function = lambda { |row| row[column] >= value }
|
69
|
+
else
|
70
|
+
split_function = lambda { |row| row[column] == value }
|
71
|
+
end
|
72
|
+
set1 = []
|
73
|
+
set2 = []
|
74
|
+
for row in dataset
|
75
|
+
if split_function.call(row)
|
76
|
+
set1 << row
|
77
|
+
else
|
78
|
+
set2 << row
|
79
|
+
end
|
80
|
+
end
|
81
|
+
return set1,set2
|
82
|
+
end
|
83
|
+
|
84
|
+
def count_classes(dataset)
|
85
|
+
hash_count = {}
|
86
|
+
dataset.each do |row|
|
87
|
+
if hash_count.include?(row[-1])
|
88
|
+
hash_count[row[-1]] += 1
|
89
|
+
else
|
90
|
+
hash_count[row[-1]] = 1
|
91
|
+
end
|
92
|
+
end
|
93
|
+
return hash_count
|
94
|
+
end
|
95
|
+
|
96
|
+
def entropy(dataset)
|
97
|
+
classes_count = count_classes(dataset)
|
98
|
+
ent = 0.0
|
99
|
+
classes_count.each_value do |value|
|
100
|
+
prob = value.to_f / dataset.length
|
101
|
+
ent -= prob * Math.log2(prob)
|
102
|
+
end
|
103
|
+
return ent
|
104
|
+
end
|
105
|
+
|
106
|
+
def build_tree(dataset)
|
107
|
+
best_info_gain = 0.0
|
108
|
+
column_and_value_attribute = nil
|
109
|
+
best_sets = nil
|
110
|
+
for column_attribute in 0...dataset[0].length-1 # elimina la etiqueta
|
111
|
+
for row in dataset
|
112
|
+
value_attribute = row[column_attribute]
|
113
|
+
node_true, node_false = split_dataset(dataset,column_attribute,value_attribute)
|
114
|
+
information_gain = entropy(dataset) - (node_true.length.to_f/dataset.length) * entropy(node_true) -
|
115
|
+
(node_false.length.to_f/dataset.length) * entropy(node_false)
|
116
|
+
if information_gain > best_info_gain # pick the highest information_gain
|
117
|
+
best_info_gain = information_gain
|
118
|
+
column_and_value_attribute = column_attribute, value_attribute
|
119
|
+
best_sets = node_true, node_false
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
if best_info_gain > 0
|
124
|
+
return best_info_gain, column_and_value_attribute, best_sets
|
125
|
+
else
|
126
|
+
return count_classes(dataset)
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
130
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: aprendizaje_maquina
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.4
|
4
|
+
version: 0.1.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Erickson Morales
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-05-
|
11
|
+
date: 2018-05-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -76,12 +76,14 @@ files:
|
|
76
76
|
- examples/clustering_data.csv
|
77
77
|
- examples/clustering_example.rb
|
78
78
|
- examples/data_lg.csv
|
79
|
+
- examples/decision_tree_example.rb
|
79
80
|
- examples/regresion_lineal_example.rb
|
80
81
|
- examples/train.csv
|
81
82
|
- lib/aprendizaje_maquina.rb
|
82
83
|
- lib/aprendizaje_maquina/cargar.rb
|
83
84
|
- lib/aprendizaje_maquina/clasificacion_logistica.rb
|
84
85
|
- lib/aprendizaje_maquina/clustering.rb
|
86
|
+
- lib/aprendizaje_maquina/decision_tree.rb
|
85
87
|
- lib/aprendizaje_maquina/matrixx.rb
|
86
88
|
- lib/aprendizaje_maquina/regresion_lineal.rb
|
87
89
|
- lib/aprendizaje_maquina/version.rb
|
@@ -100,9 +102,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
100
102
|
version: '0'
|
101
103
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
102
104
|
requirements:
|
103
|
-
- - "
|
105
|
+
- - ">="
|
104
106
|
- !ruby/object:Gem::Version
|
105
|
-
version:
|
107
|
+
version: '0'
|
106
108
|
requirements: []
|
107
109
|
rubyforge_project:
|
108
110
|
rubygems_version: 2.6.14
|