fpgrowth 0.0.2 → 1

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -44,12 +44,24 @@ Or install it yourself as:
44
44
 
45
45
  ## Usage
46
46
 
47
+ ### Basic Usage
48
+
49
+ Just do it :
50
+
51
+ ```ruby
52
+ transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
53
+ patterns = FpGrowth.mine(transactions)
54
+ ```
55
+
56
+ ### Advanced Usage
57
+
58
+
47
59
  Build a tree from transactions and mine it
48
60
 
49
61
  ```ruby
50
62
  transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
51
63
  fp_tree = FpGrowth::FpTree.build(transactions)
52
- FpGrowth::Miner.fp_growth(fp_tree)
64
+ FpGrowth::Miner.td_fp_growth(fp_tree)
53
65
 
54
66
  ```
55
67
 
@@ -61,10 +73,39 @@ The larger is the number of transactions, the smaller should be the threshold. I
61
73
  transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
62
74
  fp_tree = FpGrowth::FpTree.build(transactions, 30)
63
75
  # 30 stands for 30% of transactions. Here, 'c' would be pruned.
64
- FpGrowth::Miner.fp_growth(fp_tree)
76
+ FpGrowth::Miner.td_fp_growth(fp_tree)
65
77
 
66
78
  ```
67
79
 
80
+ If you want to avoid worst case, then you should make it a Bonzai !
81
+ ```ruby
82
+ transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
83
+ fp_tree = FpGrowth::FpTree.build(transactions, 30)
84
+ bonzai = fp_tree.to_bonzai(20)
85
+ FpGrowth::Miner.td_fp_growth(bonzai)
86
+
87
+ ```
88
+ 20 stands for a hardness of 20%. It mean that a node is cut from the tree if it's not greater than 20% of it's father support.
89
+
90
+ There is two variant of FP-Growth.
91
+ The first one is the TopDown, it's the most efficient, in most cases.
92
+ For some reasons, it's alternative, the classical FpGrowth, it might be more efficient on a very small set.
93
+ Use it this way :
94
+ ```ruby
95
+ transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
96
+ patterns = FpGrowth.fp_growth(transactions)
97
+ ```
98
+ or
99
+ ```ruby
100
+ transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
101
+ fp_tree = FpGrowth::FpTree.build(transactions, 30)
102
+ bonzai = fp_tree.to_bonzai(20)
103
+ FpGrowth::Miner.fp_growth(bonzai)
104
+
105
+ ```
106
+
107
+
108
+
68
109
  ### Examples
69
110
 
70
111
  You can find in the test repository a few concrete example on Open Data.
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ require 'rake/testtask'
4
4
 
5
5
  Rake::TestTask.new do |t|
6
6
  t.libs << 'test'
7
- t.test_files = FileList['test/tc_*.rb']
7
+ t.test_files = FileList['test/tc*.rb']
8
8
  end
9
9
 
10
10
  desc "Run tests"
@@ -1,8 +1,20 @@
1
1
  require "fpgrowth/version.rb"
2
+ require 'fpgrowth/fp_tree'
3
+ require 'fpgrowth/miner'
2
4
 
3
5
  module FpGrowth
4
- # Your code goes here...
5
- end
6
+ def self.mine(transactions, threshold=1)
7
+ td_fp_growth(transactions, threshold)
8
+ end
6
9
 
7
- require 'fpgrowth/fp_tree'
8
- require 'fpgrowth/miner'
10
+ def self.fp_growth(transactions, threshold=1)
11
+ fp_tree = FpTree.build(transactions, threshold)
12
+ Miner.fp_growth(fp_tree)
13
+ end
14
+
15
+ def self.td_fp_growth(transactions, threshold=1)
16
+ fp_tree = FpTree.build(transactions, threshold)
17
+ Miner.td_fp_growth(fp_tree)
18
+ end
19
+
20
+ end
@@ -1,5 +1,7 @@
1
1
  require_relative 'fp_tree/node'
2
2
  require_relative 'fp_tree/builder'
3
+ require_relative 'fp_tree/bonzai_secateur'
4
+ require_relative 'fp_tree/header_table'
3
5
 
4
6
  require 'graphviz'
5
7
  require 'etc'
@@ -18,8 +20,16 @@ module FpGrowth
18
20
  Builder.build(transactions, threshold)
19
21
  end
20
22
 
21
- def initialize(supports={}, threshold=1)
22
- @root = Node.new()
23
+ def to_bonzai(hardness=20)
24
+ return BonzaiSecateur.new(self, hardness).execute()
25
+ end
26
+
27
+ def to_bonzai!(hardness=20)
28
+ return BonzaiSecateur.new(self, hardness).execute!()
29
+ end
30
+
31
+ def initialize(supports={}, threshold=1, root=Node.new())
32
+ @root = root
23
33
  @heads = Hash.new nil
24
34
  @supports = supports
25
35
  #initialiser les clés
@@ -69,7 +79,7 @@ module FpGrowth
69
79
  node=row
70
80
  while node != nil
71
81
  for child in node.children
72
- g.add_edges(nodonode[node], nodonode[child])
82
+ g.add_edges(nodonode[node], nodonode[child]) if nodonode[child]
73
83
  end
74
84
  node = node.lateral
75
85
  end
@@ -79,8 +89,8 @@ module FpGrowth
79
89
  for row in self.heads.values
80
90
  node=row
81
91
  while node != nil
82
-
83
- g.add_edges(nodonode[node], nodonode[node.lateral], :style => :dashed, :constraint => :false) if node.lateral
92
+ g.add_edges(nodonode[node], nodonode[node.lateral], :style => :dashed, :color=> :green, :constraint => :false) if node.lateral
93
+ g.add_edges(nodonode[node], nodonode[node.parent], :style => :dashed, :color=> :red, :constraint => :false) if node.parent
84
94
  node = node.lateral
85
95
  end
86
96
  end
@@ -109,6 +119,49 @@ module FpGrowth
109
119
  end
110
120
  end
111
121
 
122
+ def cut_branch(node)
123
+ node.children.each { |child| cut_branch(child) }
124
+ remove(node)
125
+ end
126
+
127
+ def remove_from_lateral(node, verbose=false)
128
+ if @heads[node.item].equal?(node)
129
+ if node.lateral
130
+ @heads[node.item] = node.lateral
131
+ else
132
+ @heads.delete(node.item)
133
+ end
134
+ else
135
+ puts "node #{node.to_s}" if verbose
136
+ puts "pas head" if verbose
137
+ left = @heads[node.item]
138
+ while left != nil and not left.equal? node and not left.lateral.equal? node
139
+ left = left.lateral
140
+ end
141
+ puts "left found #{left.lateral}" if verbose
142
+ left.lateral = node.lateral if left
143
+ puts "left found #{left.lateral}" if verbose
144
+ end
145
+ node.lateral=nil
146
+ end
147
+
148
+ def remove(node)
149
+ # Remove from lateral linked list
150
+ remove_from_lateral(node)
151
+
152
+ # attach childrens
153
+ node.parent.children += node.children
154
+ node.children.each { |x| x.parent = node.parent }
155
+
156
+ # Remove from parents
157
+ node.parent.children.delete(node)
158
+
159
+ # Remove from support
160
+ @supports[node.item] -= node.support if @supports[node.item]
161
+
162
+
163
+ end
164
+
112
165
  def items_count
113
166
  sum=0
114
167
  for val in supports.values
@@ -146,6 +199,93 @@ module FpGrowth
146
199
  return @heads.empty?
147
200
  end
148
201
 
202
+ def clone
203
+ clone = FpTree.new(@supports, @threshold, @root.clone_deep)
204
+ clone.link_down()
205
+ return clone
206
+ end
207
+
208
+ def link_down(cursor=@root)
209
+ children = cursor.children.clone
210
+ cursor.children=[]
211
+ children.each { |child|
212
+ append_node(cursor, child)
213
+ }
214
+ children.each { |child| link_down(child) }
215
+ end
216
+
217
+ def size(subtree=@root)
218
+ sum = 1
219
+ subtree.children.each { |child| sum+= size(child) }
220
+ return sum
221
+ end
222
+
223
+ def sum
224
+ sum = 0
225
+ @supports.each { |key, value| sum+=value}
226
+ return sum
227
+ end
228
+
229
+ def lateral_sum
230
+ sum=0
231
+ for cursor in @heads.values
232
+ while cursor != nil
233
+ sum+=cursor.support
234
+ cursor = cursor.lateral
235
+ end
236
+
237
+ end
238
+ return sum
239
+ end
240
+
241
+ def max_width
242
+ max_width=0
243
+ for cursor in @heads.values
244
+ width=0
245
+ while cursor != nil
246
+ width+=1
247
+ cursor = cursor.lateral
248
+ end
249
+ max_width = width if max_width < width
250
+ end
251
+ return max_width
252
+ end
253
+
254
+ def has_lateral_cycle?
255
+ i = 0
256
+ while i < @heads.keys.size
257
+ key = @heads.keys[i]
258
+ cursor = @heads[key]
259
+ stack = []
260
+ flag = false
261
+ j=0
262
+ while cursor != nil and not flag
263
+ flag = true if stack.include?(cursor.object_id)
264
+ stack.push(cursor.object_id)
265
+ cursor = cursor.lateral
266
+ j += 1
267
+ #puts "#{i}/#{@heads.keys.size} - #{j}"
268
+ end
269
+ return key if flag
270
+ i += 1
271
+ end
272
+ return false
273
+ end
274
+
275
+ def header_table
276
+ unless @header_table
277
+ @header_table = HeaderTable.new()
278
+ for row in @heads.keys
279
+ node = @heads[row]
280
+ while node != nil
281
+ @header_table << [node.item, node.support, node]
282
+ node = node.lateral
283
+ end
284
+ end
285
+ return @header_table
286
+ end
287
+ end
288
+
149
289
  end
150
290
  end
151
291
  end
@@ -0,0 +1,30 @@
1
+ module FpGrowth
2
+ module FpTree
3
+ class BonzaiSecateur
4
+
5
+ def initialize(fp_tree, hardness=20)
6
+ @fp_tree=fp_tree
7
+ @hardness=hardness
8
+ end
9
+
10
+ def execute(hardness=@hardness, fp_tree=@fp_tree.clone)
11
+ traverse(fp_tree)
12
+ return fp_tree
13
+ end
14
+
15
+ def execute!(hardness=@hardness)
16
+ return execute(hardness, @fp_tree)
17
+ end
18
+
19
+ def traverse(fp_tree, cursor = fp_tree.root, deepness=0)
20
+ children = cursor.children.clone
21
+ threshold = cursor.support.to_f / 100 * (@hardness + deepness)
22
+ children.each { |child|
23
+ fp_tree.cut_branch(child) if child.support < threshold
24
+ }
25
+ cursor.children.each { |child| traverse(fp_tree, child, deepness + 1) }
26
+ end
27
+
28
+ end
29
+ end
30
+ end
@@ -1,4 +1,5 @@
1
1
  require_relative 'builder/first_pass'
2
+ require_relative 'builder/header_table_builder'
2
3
  require_relative 'builder/second_pass'
3
4
 
4
5
 
@@ -1,4 +1,3 @@
1
-
2
1
  module FpGrowth
3
2
  module FpTree
4
3
  module Builder
@@ -35,16 +34,12 @@ module FpGrowth
35
34
  def pruning(transactions=@transactions, supports=@supports, threshold=@threshold)
36
35
 
37
36
  minimum = transactions.size.to_f / 100 * threshold
38
-
39
37
  for transaction in transactions
40
- for item in transaction
41
-
42
- transaction.delete(item) if supports[item] < minimum
43
- end
38
+ transaction.delete_if { |item| supports[item] < minimum }
44
39
  end
45
40
  transactions.delete([])
46
- supports.delete_if { |key, value| value < minimum }
47
41
 
42
+ supports.delete_if { |key, value| value < minimum }
48
43
  return supports
49
44
  end
50
45
 
@@ -0,0 +1,37 @@
1
+ require_relative "../header_table"
2
+
3
+ module FpGrowth
4
+ module FpTree
5
+ module Builder
6
+ class HeaderTableBuilder
7
+
8
+
9
+ def initialize(item, header_table)
10
+ @header_table = header_table
11
+ @item = item
12
+ @new_header_table = HeaderTable.new()
13
+ end
14
+
15
+ def execute()
16
+ # for each node n in header for item
17
+
18
+
19
+ for node in @header_table.nodes[@item]
20
+ # traverse tree from n to top
21
+ traverse_from_node_top_top(node.parent, node.support)
22
+ end
23
+ return @new_header_table
24
+ end
25
+
26
+ def traverse_from_node_top_top(node, support)
27
+ if node.item
28
+ # For each node m
29
+ @new_header_table << [node.item, support, node]
30
+ traverse_from_node_top_top(node.parent, support)
31
+ end
32
+ end
33
+
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,41 @@
1
+ require 'set'
2
+ module FpGrowth
3
+ module FpTree
4
+ class HeaderTable
5
+
6
+ def self.build(item, header_table)
7
+ builder = Builder::HeaderTableBuilder.new(item, header_table)
8
+ return builder.execute()
9
+ end
10
+
11
+
12
+ def initialize()
13
+ @count = Hash.new 0
14
+ @nodes = Hash.new { Set.new() }
15
+ end
16
+
17
+ attr_accessor :count, :nodes
18
+
19
+ def keys
20
+ @nodes.keys
21
+ end
22
+
23
+ # Append a Row
24
+ # @param row Array as [item, support, node]
25
+ #
26
+ def << (row)
27
+ # Add a link for m in HeaderTable
28
+ @nodes[row[0]] = @nodes[row[0]] << row[2]
29
+ # Add support m = previous + n
30
+ @count[row[0]] += row[1]
31
+ end
32
+
33
+ end
34
+ end
35
+ end
36
+
37
+ class Set
38
+ def to_s
39
+ to_a.join(', ')
40
+ end
41
+ end
@@ -41,6 +41,7 @@ module FpGrowth
41
41
  return flag
42
42
  end
43
43
 
44
+
44
45
  end
45
46
  end
46
47
  end
@@ -15,6 +15,11 @@ module FpGrowth
15
15
  return miner.pattern_set
16
16
  end
17
17
 
18
+ def self.td_fp_growth(fp_tree)
19
+ miner = Miner.new()
20
+ miner.top_down_fp_growth(fp_tree)
21
+ return miner.pattern_set
22
+ end
18
23
 
19
24
  class Miner
20
25
 
@@ -42,7 +47,7 @@ module FpGrowth
42
47
  pattern_beta << node
43
48
  end
44
49
  @pattern_set << pattern_beta
45
- #puts "Pattern extracted : #{pattern_beta.content.to_s}"
50
+ #puts "Pattern extracted : #{pattern_beta.content.to_s} - #{pattern_beta.support}"
46
51
  end
47
52
  else
48
53
  for item in fp_tree.supports.keys
@@ -56,7 +61,32 @@ module FpGrowth
56
61
  end
57
62
  end
58
63
  end
59
- end
64
+
65
+ def top_down_fp_growth(header_table, pattern_alpha=Pattern.new(), min_support=0)
66
+
67
+ if header_table.instance_of? FpTree::FpTree
68
+ header_table = header_table.header_table
69
+ end
70
+
71
+
72
+ # For each row of header_table
73
+ for row in header_table.keys
74
+ # If Support of header_table > min_support
75
+ if header_table.count[row] > min_support then
76
+ # output pattern extended with row.item
77
+ pattern_beta = Pattern.new(pattern_alpha.content + [row], header_table.count[row])
78
+ @pattern_set << pattern_beta
79
+ # puts "Pattern extracted : #{pattern_beta.content.to_s} - #{pattern_beta.support}"
80
+ # Build new Header Table
81
+ header_table_new = FpTree::HeaderTable.build(row, header_table)
82
+ # Mine extended pattern, new header table
83
+ top_down_fp_growth(header_table_new, pattern_beta)
84
+ end
85
+ end
86
+
87
+ end
88
+
89
+ end #class
60
90
 
61
91
  end
62
92
  end