aprendizaje_maquina 0.1.4.beta1 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 03e7fac70b0b1de20e1496d918b96fbf5c716be6
4
- data.tar.gz: 38669d3a47c811f6af8794223c6c5395c3b1e35d
3
+ metadata.gz: d7e2c5b8ffec0964f22044e3adc1891c19f242dc
4
+ data.tar.gz: c7698caf79f6506b2de12e63ba21b1943abaaa0f
5
5
  SHA512:
6
- metadata.gz: 74c9a63aa42c3844846ddd5db0dc6300ec6022c071448efd7ddc092e5f857afbbffd47cb859fa6d3a3d495cc68ecfa97b7560cea820c87c8200a410b95ca09a9
7
- data.tar.gz: 948708cb197af7a4fdbeba2c423ced683df600ee15ebf744dfbd4e8723877432a7cd1d170a817cdb3887fed53378fbd29f4339a71fede89c6e4c8b4a94c2e241
6
+ metadata.gz: d8e516d6233105d702bf3f021d6c6da7cf90a8f42772c85d0a201ec2037c336c63953860d195a9135277379a58374c89b57e1c6d5243faa7c10dbb27bcf7a1f8
7
+ data.tar.gz: '098e44d8ffcf022fa88d78b6ec5a2667582278688d50d8fb46fb1d55cee6c94088f747e53758edd9ed6c639111aceacfd9b59f5d9d402a3aa98e48ee1f8efee2'
data/README.md CHANGED
@@ -135,6 +135,15 @@ make predictions for multiclass(one vs all)
135
135
  # Predict the closest cluster
136
136
  p clustering.predict(Vector[63,190])
137
137
 
138
+ ## Decision tree
139
+
140
+ tree = AprendizajeMaquina::DecisionTree.new(dataset)
141
+
142
+ print tree.display_tree
143
+
144
+ puts tree.predict(datatest)
145
+
146
+
138
147
  ## License
139
148
 
140
149
  The gem is available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
@@ -0,0 +1,26 @@
1
+ require 'aprendizaje_maquina'
2
+
3
+ my_data = [['slashdot','USA','yes',18,'None'],
4
+ ['google','France','yes',23,'Premium'],
5
+ ['digg','USA','yes',24,'Basic'],
6
+ ['kiwitobes','France','yes',23,'Basic'],
7
+ ['google','UK','no',21,'Premium'],
8
+ ['(direct)','New Zealand','no',12,'None'],
9
+ ['(direct)','UK','no',21,'Basic'],
10
+ ['google','USA','no',24,'Premium'],
11
+ ['slashdot','France','yes',19,'None'],
12
+ ['digg','USA','no',18,'None'],
13
+ ['google','UK','no',18,'None'],
14
+ ['kiwitobes','UK','no',19,'None'],
15
+ ['digg','New Zealand','yes',12,'Basic'],
16
+ ['slashdot','UK','no',21,'None'],
17
+ ['google','UK','yes',18,'Basic'],
18
+ ['kiwitobes','France','yes',19,'Basic']]
19
+
20
+ tree = AprendizajeMaquina::DecisionTree.new(my_data)
21
+
22
+ print tree.display_tree
23
+
24
+ test_data = ['(direct)','USA','yes',5]
25
+
26
+ p tree.predict(test_data)
@@ -4,13 +4,10 @@ require "aprendizaje_maquina/regresion_lineal"
4
4
  require "aprendizaje_maquina/matrixx"
5
5
  require "aprendizaje_maquina/clasificacion_logistica"
6
6
  require "aprendizaje_maquina/clustering"
7
+ require "aprendizaje_maquina/decision_tree"
7
8
 
8
9
  module AprendizajeMaquina
9
10
  #class RedNeuronal
10
11
  # Coming soon...
11
12
  #end
12
-
13
- #class ArbolDecision
14
- # Coming soon...
15
- #end
16
13
  end
@@ -0,0 +1,130 @@
1
+ module AprendizajeMaquina
2
+ class DecisionTree
3
+ def initialize(dataset)
4
+ @dataset = dataset
5
+ end
6
+
7
+ def display_tree
8
+ node_root = build_tree(@dataset)
9
+ colection = [node_root]
10
+ branches = []
11
+ tree = "root --> #{node_root[1][0]}:#{node_root[1][1]}?\n"
12
+ for node in 0...node_root[2].length
13
+ branches << build_tree(node_root[2][node])
14
+ colection << branches
15
+ 1000.times do
16
+ subbranches = []
17
+ true_or_false = lambda { |node| node == 0 ? true : false }
18
+ branches.each do |branch|
19
+ if branch.is_a?(Array)
20
+ tree << "#{true_or_false.call(node)} --> "+"#{branch[1][0]}:#{branch[1][1]}?\n"
21
+ for node in 0...branch[2].length
22
+ if build_tree(branch[2][node]).is_a? Hash
23
+ tree << "#{true_or_false.call(node)} --> "+"#{build_tree(branch[2][node])}\n"
24
+ else
25
+ subbranches << build_tree(branch[2][node])
26
+ end
27
+ end
28
+ elsif branch.is_a?(Hash)
29
+ tree << "#{true_or_false.call(node)} --> "+"#{branch}\n"
30
+ end
31
+ end
32
+ branches = subbranches
33
+ colection << branches
34
+ if colection.last.empty?
35
+ colection.pop
36
+ break
37
+ end
38
+ end
39
+ end
40
+ return tree
41
+ end
42
+
43
+ def predict(observation)
44
+ node_root = build_tree(@dataset)
45
+ until node_root.is_a?(Hash)
46
+ if observation[node_root[1][0]].is_a?(Integer) or observation[node_root[1][0]].is_a?(Float)
47
+ if observation[node_root[1][0]] >= node_root[1][1]
48
+ branch = build_tree(node_root[2][0])
49
+ else
50
+ branch = build_tree(node_root[2][1])
51
+ end
52
+ else
53
+ if observation[node_root[1][0]] == node_root[1][1]
54
+ branch = build_tree(node_root[2][0])
55
+ else
56
+ branch = build_tree(node_root[2][1])
57
+ end
58
+ end
59
+ node_root = branch
60
+ end
61
+ return node_root
62
+ end
63
+
64
+ private
65
+
66
+ def split_dataset(dataset, column, value)
67
+ if value.is_a? Integer or value.is_a? Float
68
+ split_function = lambda { |row| row[column] >= value }
69
+ else
70
+ split_function = lambda { |row| row[column] == value }
71
+ end
72
+ set1 = []
73
+ set2 = []
74
+ for row in dataset
75
+ if split_function.call(row)
76
+ set1 << row
77
+ else
78
+ set2 << row
79
+ end
80
+ end
81
+ return set1,set2
82
+ end
83
+
84
+ def count_classes(dataset)
85
+ hash_count = {}
86
+ dataset.each do |row|
87
+ if hash_count.include?(row[-1])
88
+ hash_count[row[-1]] += 1
89
+ else
90
+ hash_count[row[-1]] = 1
91
+ end
92
+ end
93
+ return hash_count
94
+ end
95
+
96
+ def entropy(dataset)
97
+ classes_count = count_classes(dataset)
98
+ ent = 0.0
99
+ classes_count.each_value do |value|
100
+ prob = value.to_f / dataset.length
101
+ ent -= prob * Math.log2(prob)
102
+ end
103
+ return ent
104
+ end
105
+
106
+ def build_tree(dataset)
107
+ best_info_gain = 0.0
108
+ column_and_value_attribute = nil
109
+ best_sets = nil
110
+ for column_attribute in 0...dataset[0].length-1 # elimina la etiqueta
111
+ for row in dataset
112
+ value_attribute = row[column_attribute]
113
+ node_true, node_false = split_dataset(dataset,column_attribute,value_attribute)
114
+ information_gain = entropy(dataset) - (node_true.length.to_f/dataset.length) * entropy(node_true) -
115
+ (node_false.length.to_f/dataset.length) * entropy(node_false)
116
+ if information_gain > best_info_gain # pick the highest information_gain
117
+ best_info_gain = information_gain
118
+ column_and_value_attribute = column_attribute, value_attribute
119
+ best_sets = node_true, node_false
120
+ end
121
+ end
122
+ end
123
+ if best_info_gain > 0
124
+ return best_info_gain, column_and_value_attribute, best_sets
125
+ else
126
+ return count_classes(dataset)
127
+ end
128
+ end
129
+ end
130
+ end
@@ -1,3 +1,3 @@
1
1
  module AprendizajeMaquina
2
- VERSION = "0.1.4.beta1"
2
+ VERSION = "0.1.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: aprendizaje_maquina
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.4.beta1
4
+ version: 0.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Erickson Morales
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-05-11 00:00:00.000000000 Z
11
+ date: 2018-05-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -76,12 +76,14 @@ files:
76
76
  - examples/clustering_data.csv
77
77
  - examples/clustering_example.rb
78
78
  - examples/data_lg.csv
79
+ - examples/decision_tree_example.rb
79
80
  - examples/regresion_lineal_example.rb
80
81
  - examples/train.csv
81
82
  - lib/aprendizaje_maquina.rb
82
83
  - lib/aprendizaje_maquina/cargar.rb
83
84
  - lib/aprendizaje_maquina/clasificacion_logistica.rb
84
85
  - lib/aprendizaje_maquina/clustering.rb
86
+ - lib/aprendizaje_maquina/decision_tree.rb
85
87
  - lib/aprendizaje_maquina/matrixx.rb
86
88
  - lib/aprendizaje_maquina/regresion_lineal.rb
87
89
  - lib/aprendizaje_maquina/version.rb
@@ -100,9 +102,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
100
102
  version: '0'
101
103
  required_rubygems_version: !ruby/object:Gem::Requirement
102
104
  requirements:
103
- - - ">"
105
+ - - ">="
104
106
  - !ruby/object:Gem::Version
105
- version: 1.3.1
107
+ version: '0'
106
108
  requirements: []
107
109
  rubyforge_project:
108
110
  rubygems_version: 2.6.14