fpgrowth 0.0.2 → 1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -44,12 +44,24 @@ Or install it yourself as:
44
44
 
45
45
  ## Usage
46
46
 
47
+ ### Basic Usage
48
+
49
+ Just do it :
50
+
51
+ ```ruby
52
+ transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
53
+ patterns = FpGrowth.mine(transactions)
54
+ ```
55
+
56
+ ### Advanced Usage
57
+
58
+
47
59
  Build a tree from transactions and mine it
48
60
 
49
61
  ```ruby
50
62
  transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
51
63
  fp_tree = FpGrowth::FpTree.build(transactions)
52
- FpGrowth::Miner.fp_growth(fp_tree)
64
+ FpGrowth::Miner.td_fp_growth(fp_tree)
53
65
 
54
66
  ```
55
67
 
@@ -61,10 +73,39 @@ The larger is the number of transactions, the smaller should be the threshold. I
61
73
  transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
62
74
  fp_tree = FpGrowth::FpTree.build(transactions, 30)
63
75
  # 30 stands for 30% of transactions. Here, 'c' would be pruned.
64
- FpGrowth::Miner.fp_growth(fp_tree)
76
+ FpGrowth::Miner.td_fp_growth(fp_tree)
65
77
 
66
78
  ```
67
79
 
80
+ If you want to avoid worst case, then you should make it a Bonzai !
81
+ ```ruby
82
+ transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
83
+ fp_tree = FpGrowth::FpTree.build(transactions, 30)
84
+ bonzai = fp_tree.to_bonzai(20)
85
+ FpGrowth::Miner.td_fp_growth(bonzai)
86
+
87
+ ```
88
+ 20 stands for a hardness of 20%. It mean that a node is cut from the tree if it's not greater than 20% of it's father support.
89
+
90
+ There is two variant of FP-Growth.
91
+ The first one is the TopDown, it's the most efficient, in most cases.
92
+ For some reasons, it's alternative, the classical FpGrowth, it might be more efficient on a very small set.
93
+ Use it this way :
94
+ ```ruby
95
+ transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
96
+ patterns = FpGrowth.fp_growth(transactions)
97
+ ```
98
+ or
99
+ ```ruby
100
+ transactions = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
101
+ fp_tree = FpGrowth::FpTree.build(transactions, 30)
102
+ bonzai = fp_tree.to_bonzai(20)
103
+ FpGrowth::Miner.fp_growth(bonzai)
104
+
105
+ ```
106
+
107
+
108
+
68
109
  ### Examples
69
110
 
70
111
  You can find in the test repository a few concrete example on Open Data.
data/Rakefile CHANGED
@@ -4,7 +4,7 @@ require 'rake/testtask'
4
4
 
5
5
  Rake::TestTask.new do |t|
6
6
  t.libs << 'test'
7
- t.test_files = FileList['test/tc_*.rb']
7
+ t.test_files = FileList['test/tc*.rb']
8
8
  end
9
9
 
10
10
  desc "Run tests"
@@ -1,8 +1,20 @@
1
1
  require "fpgrowth/version.rb"
2
+ require 'fpgrowth/fp_tree'
3
+ require 'fpgrowth/miner'
2
4
 
3
5
  module FpGrowth
4
- # Your code goes here...
5
- end
6
+ def self.mine(transactions, threshold=1)
7
+ td_fp_growth(transactions, threshold)
8
+ end
6
9
 
7
- require 'fpgrowth/fp_tree'
8
- require 'fpgrowth/miner'
10
+ def self.fp_growth(transactions, threshold=1)
11
+ fp_tree = FpTree.build(transactions, threshold)
12
+ Miner.fp_growth(fp_tree)
13
+ end
14
+
15
+ def self.td_fp_growth(transactions, threshold=1)
16
+ fp_tree = FpTree.build(transactions, threshold)
17
+ Miner.td_fp_growth(fp_tree)
18
+ end
19
+
20
+ end
@@ -1,5 +1,7 @@
1
1
  require_relative 'fp_tree/node'
2
2
  require_relative 'fp_tree/builder'
3
+ require_relative 'fp_tree/bonzai_secateur'
4
+ require_relative 'fp_tree/header_table'
3
5
 
4
6
  require 'graphviz'
5
7
  require 'etc'
@@ -18,8 +20,16 @@ module FpGrowth
18
20
  Builder.build(transactions, threshold)
19
21
  end
20
22
 
21
- def initialize(supports={}, threshold=1)
22
- @root = Node.new()
23
+ def to_bonzai(hardness=20)
24
+ return BonzaiSecateur.new(self, hardness).execute()
25
+ end
26
+
27
+ def to_bonzai!(hardness=20)
28
+ return BonzaiSecateur.new(self, hardness).execute!()
29
+ end
30
+
31
+ def initialize(supports={}, threshold=1, root=Node.new())
32
+ @root = root
23
33
  @heads = Hash.new nil
24
34
  @supports = supports
25
35
  #initialiser les clés
@@ -69,7 +79,7 @@ module FpGrowth
69
79
  node=row
70
80
  while node != nil
71
81
  for child in node.children
72
- g.add_edges(nodonode[node], nodonode[child])
82
+ g.add_edges(nodonode[node], nodonode[child]) if nodonode[child]
73
83
  end
74
84
  node = node.lateral
75
85
  end
@@ -79,8 +89,8 @@ module FpGrowth
79
89
  for row in self.heads.values
80
90
  node=row
81
91
  while node != nil
82
-
83
- g.add_edges(nodonode[node], nodonode[node.lateral], :style => :dashed, :constraint => :false) if node.lateral
92
+ g.add_edges(nodonode[node], nodonode[node.lateral], :style => :dashed, :color=> :green, :constraint => :false) if node.lateral
93
+ g.add_edges(nodonode[node], nodonode[node.parent], :style => :dashed, :color=> :red, :constraint => :false) if node.parent
84
94
  node = node.lateral
85
95
  end
86
96
  end
@@ -109,6 +119,49 @@ module FpGrowth
109
119
  end
110
120
  end
111
121
 
122
+ def cut_branch(node)
123
+ node.children.each { |child| cut_branch(child) }
124
+ remove(node)
125
+ end
126
+
127
+ def remove_from_lateral(node, verbose=false)
128
+ if @heads[node.item].equal?(node)
129
+ if node.lateral
130
+ @heads[node.item] = node.lateral
131
+ else
132
+ @heads.delete(node.item)
133
+ end
134
+ else
135
+ puts "node #{node.to_s}" if verbose
136
+ puts "pas head" if verbose
137
+ left = @heads[node.item]
138
+ while left != nil and not left.equal? node and not left.lateral.equal? node
139
+ left = left.lateral
140
+ end
141
+ puts "left found #{left.lateral}" if verbose
142
+ left.lateral = node.lateral if left
143
+ puts "left found #{left.lateral}" if verbose
144
+ end
145
+ node.lateral=nil
146
+ end
147
+
148
+ def remove(node)
149
+ # Remove from lateral linked list
150
+ remove_from_lateral(node)
151
+
152
+ # attach childrens
153
+ node.parent.children += node.children
154
+ node.children.each { |x| x.parent = node.parent }
155
+
156
+ # Remove from parents
157
+ node.parent.children.delete(node)
158
+
159
+ # Remove from support
160
+ @supports[node.item] -= node.support if @supports[node.item]
161
+
162
+
163
+ end
164
+
112
165
  def items_count
113
166
  sum=0
114
167
  for val in supports.values
@@ -146,6 +199,93 @@ module FpGrowth
146
199
  return @heads.empty?
147
200
  end
148
201
 
202
+ def clone
203
+ clone = FpTree.new(@supports, @threshold, @root.clone_deep)
204
+ clone.link_down()
205
+ return clone
206
+ end
207
+
208
+ def link_down(cursor=@root)
209
+ children = cursor.children.clone
210
+ cursor.children=[]
211
+ children.each { |child|
212
+ append_node(cursor, child)
213
+ }
214
+ children.each { |child| link_down(child) }
215
+ end
216
+
217
+ def size(subtree=@root)
218
+ sum = 1
219
+ subtree.children.each { |child| sum+= size(child) }
220
+ return sum
221
+ end
222
+
223
+ def sum
224
+ sum = 0
225
+ @supports.each { |key, value| sum+=value}
226
+ return sum
227
+ end
228
+
229
+ def lateral_sum
230
+ sum=0
231
+ for cursor in @heads.values
232
+ while cursor != nil
233
+ sum+=cursor.support
234
+ cursor = cursor.lateral
235
+ end
236
+
237
+ end
238
+ return sum
239
+ end
240
+
241
+ def max_width
242
+ max_width=0
243
+ for cursor in @heads.values
244
+ width=0
245
+ while cursor != nil
246
+ width+=1
247
+ cursor = cursor.lateral
248
+ end
249
+ max_width = width if max_width < width
250
+ end
251
+ return max_width
252
+ end
253
+
254
+ def has_lateral_cycle?
255
+ i = 0
256
+ while i < @heads.keys.size
257
+ key = @heads.keys[i]
258
+ cursor = @heads[key]
259
+ stack = []
260
+ flag = false
261
+ j=0
262
+ while cursor != nil and not flag
263
+ flag = true if stack.include?(cursor.object_id)
264
+ stack.push(cursor.object_id)
265
+ cursor = cursor.lateral
266
+ j += 1
267
+ #puts "#{i}/#{@heads.keys.size} - #{j}"
268
+ end
269
+ return key if flag
270
+ i += 1
271
+ end
272
+ return false
273
+ end
274
+
275
+ def header_table
276
+ unless @header_table
277
+ @header_table = HeaderTable.new()
278
+ for row in @heads.keys
279
+ node = @heads[row]
280
+ while node != nil
281
+ @header_table << [node.item, node.support, node]
282
+ node = node.lateral
283
+ end
284
+ end
285
+ return @header_table
286
+ end
287
+ end
288
+
149
289
  end
150
290
  end
151
291
  end
@@ -0,0 +1,30 @@
1
+ module FpGrowth
2
+ module FpTree
3
+ class BonzaiSecateur
4
+
5
+ def initialize(fp_tree, hardness=20)
6
+ @fp_tree=fp_tree
7
+ @hardness=hardness
8
+ end
9
+
10
+ def execute(hardness=@hardness, fp_tree=@fp_tree.clone)
11
+ traverse(fp_tree)
12
+ return fp_tree
13
+ end
14
+
15
+ def execute!(hardness=@hardness)
16
+ return execute(hardness, @fp_tree)
17
+ end
18
+
19
+ def traverse(fp_tree, cursor = fp_tree.root, deepness=0)
20
+ children = cursor.children.clone
21
+ threshold = cursor.support.to_f / 100 * (@hardness + deepness)
22
+ children.each { |child|
23
+ fp_tree.cut_branch(child) if child.support < threshold
24
+ }
25
+ cursor.children.each { |child| traverse(fp_tree, child, deepness + 1) }
26
+ end
27
+
28
+ end
29
+ end
30
+ end
@@ -1,4 +1,5 @@
1
1
  require_relative 'builder/first_pass'
2
+ require_relative 'builder/header_table_builder'
2
3
  require_relative 'builder/second_pass'
3
4
 
4
5
 
@@ -1,4 +1,3 @@
1
-
2
1
  module FpGrowth
3
2
  module FpTree
4
3
  module Builder
@@ -35,16 +34,12 @@ module FpGrowth
35
34
  def pruning(transactions=@transactions, supports=@supports, threshold=@threshold)
36
35
 
37
36
  minimum = transactions.size.to_f / 100 * threshold
38
-
39
37
  for transaction in transactions
40
- for item in transaction
41
-
42
- transaction.delete(item) if supports[item] < minimum
43
- end
38
+ transaction.delete_if { |item| supports[item] < minimum }
44
39
  end
45
40
  transactions.delete([])
46
- supports.delete_if { |key, value| value < minimum }
47
41
 
42
+ supports.delete_if { |key, value| value < minimum }
48
43
  return supports
49
44
  end
50
45
 
@@ -0,0 +1,37 @@
1
+ require_relative "../header_table"
2
+
3
+ module FpGrowth
4
+ module FpTree
5
+ module Builder
6
+ class HeaderTableBuilder
7
+
8
+
9
+ def initialize(item, header_table)
10
+ @header_table = header_table
11
+ @item = item
12
+ @new_header_table = HeaderTable.new()
13
+ end
14
+
15
+ def execute()
16
+ # for each node n in header for item
17
+
18
+
19
+ for node in @header_table.nodes[@item]
20
+ # traverse tree from n to top
21
+ traverse_from_node_top_top(node.parent, node.support)
22
+ end
23
+ return @new_header_table
24
+ end
25
+
26
+ def traverse_from_node_top_top(node, support)
27
+ if node.item
28
+ # For each node m
29
+ @new_header_table << [node.item, support, node]
30
+ traverse_from_node_top_top(node.parent, support)
31
+ end
32
+ end
33
+
34
+ end
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,41 @@
1
+ require 'set'
2
+ module FpGrowth
3
+ module FpTree
4
+ class HeaderTable
5
+
6
+ def self.build(item, header_table)
7
+ builder = Builder::HeaderTableBuilder.new(item, header_table)
8
+ return builder.execute()
9
+ end
10
+
11
+
12
+ def initialize()
13
+ @count = Hash.new 0
14
+ @nodes = Hash.new { Set.new() }
15
+ end
16
+
17
+ attr_accessor :count, :nodes
18
+
19
+ def keys
20
+ @nodes.keys
21
+ end
22
+
23
+ # Append a Row
24
+ # @param row Array as [item, support, node]
25
+ #
26
+ def << (row)
27
+ # Add a link for m in HeaderTable
28
+ @nodes[row[0]] = @nodes[row[0]] << row[2]
29
+ # Add support m = previous + n
30
+ @count[row[0]] += row[1]
31
+ end
32
+
33
+ end
34
+ end
35
+ end
36
+
37
+ class Set
38
+ def to_s
39
+ to_a.join(', ')
40
+ end
41
+ end
@@ -41,6 +41,7 @@ module FpGrowth
41
41
  return flag
42
42
  end
43
43
 
44
+
44
45
  end
45
46
  end
46
47
  end
@@ -15,6 +15,11 @@ module FpGrowth
15
15
  return miner.pattern_set
16
16
  end
17
17
 
18
+ def self.td_fp_growth(fp_tree)
19
+ miner = Miner.new()
20
+ miner.top_down_fp_growth(fp_tree)
21
+ return miner.pattern_set
22
+ end
18
23
 
19
24
  class Miner
20
25
 
@@ -42,7 +47,7 @@ module FpGrowth
42
47
  pattern_beta << node
43
48
  end
44
49
  @pattern_set << pattern_beta
45
- #puts "Pattern extracted : #{pattern_beta.content.to_s}"
50
+ #puts "Pattern extracted : #{pattern_beta.content.to_s} - #{pattern_beta.support}"
46
51
  end
47
52
  else
48
53
  for item in fp_tree.supports.keys
@@ -56,7 +61,32 @@ module FpGrowth
56
61
  end
57
62
  end
58
63
  end
59
- end
64
+
65
+ def top_down_fp_growth(header_table, pattern_alpha=Pattern.new(), min_support=0)
66
+
67
+ if header_table.instance_of? FpTree::FpTree
68
+ header_table = header_table.header_table
69
+ end
70
+
71
+
72
+ # For each row of header_table
73
+ for row in header_table.keys
74
+ # If Support of header_table > min_support
75
+ if header_table.count[row] > min_support then
76
+ # output pattern extended with row.item
77
+ pattern_beta = Pattern.new(pattern_alpha.content + [row], header_table.count[row])
78
+ @pattern_set << pattern_beta
79
+ # puts "Pattern extracted : #{pattern_beta.content.to_s} - #{pattern_beta.support}"
80
+ # Build new Header Table
81
+ header_table_new = FpTree::HeaderTable.build(row, header_table)
82
+ # Mine extended pattern, new header table
83
+ top_down_fp_growth(header_table_new, pattern_beta)
84
+ end
85
+ end
86
+
87
+ end
88
+
89
+ end #class
60
90
 
61
91
  end
62
92
  end