fpgrowth 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +18 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +88 -0
- data/Rakefile +11 -0
- data/fpgrowth-ruby.gemspec +23 -0
- data/lib/fpgrowth.rb +8 -0
- data/lib/fpgrowth/fp_tree.rb +153 -0
- data/lib/fpgrowth/fp_tree/builder.rb +19 -0
- data/lib/fpgrowth/fp_tree/builder/first_pass.rb +72 -0
- data/lib/fpgrowth/fp_tree/builder/second_pass.rb +70 -0
- data/lib/fpgrowth/fp_tree/node.rb +46 -0
- data/lib/fpgrowth/miner.rb +62 -0
- data/lib/fpgrowth/miner/conditional_tree_builder.rb +123 -0
- data/lib/fpgrowth/miner/pattern.rb +47 -0
- data/lib/fpgrowth/miner/pattern_base_extractor.rb +70 -0
- data/lib/fpgrowth/version.rb +3 -0
- data/test/montreal-sondage/mtlsat12.csv +1202 -0
- data/test/montreal-velos-comptage/2009.csv +366 -0
- data/test/montreal-velos-comptage/2010.csv +366 -0
- data/test/montreal-velos-comptage/2011.csv +366 -0
- data/test/montreal-velos-comptage/2012.csv +311 -0
- data/test/tc_builder.rb +65 -0
- data/test/tc_first_pass.rb +119 -0
- data/test/tc_fp_tree.rb +168 -0
- data/test/tc_miner.rb +116 -0
- data/test/tc_node.rb +101 -0
- data/test/tc_open_data_sondage_montreal.rb +49 -0
- data/test/tc_open_data_velo_montreal.rb +58 -0
- data/test/tc_pattern.rb +56 -0
- data/test/tc_second_pass.rb +148 -0
- metadata +131 -0
data/test/tc_builder.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'fpgrowth/fp_tree/builder'
|
3
|
+
|
4
|
+
class TestBuilder < Test::Unit::TestCase
|
5
|
+
|
6
|
+
|
7
|
+
def setup
|
8
|
+
|
9
|
+
items= ['a', 'b', 'c', 'd', 'e']
|
10
|
+
|
11
|
+
r = Random.new
|
12
|
+
|
13
|
+
@n = r.rand(100..500)
|
14
|
+
|
15
|
+
@random_transactions = []
|
16
|
+
for i in (0..@n)
|
17
|
+
|
18
|
+
|
19
|
+
@m = r.rand(1..5)
|
20
|
+
@random_transactions[i]=[]
|
21
|
+
for j in (0..@m)
|
22
|
+
x = r.rand(10)
|
23
|
+
if x == 9
|
24
|
+
# Trick pour que le 'e' se fasse pruner
|
25
|
+
then
|
26
|
+
@random_transactions[i] << items[r.rand(items.size)]
|
27
|
+
else
|
28
|
+
@random_transactions[i] << items[r.rand(items.size - 1)]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
@non_random = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
|
35
|
+
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
def teardown
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_build
|
45
|
+
tree = nil
|
46
|
+
assert_nothing_raised { tree = FpGrowth::FpTree::Builder.build(@non_random, 0.5) }
|
47
|
+
assert_not_nil(tree)
|
48
|
+
assert_instance_of(FpGrowth::FpTree::FpTree, tree)
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
assert_equal(nil,tree.root.item)
|
53
|
+
assert_equal(1, tree.root.children.size)
|
54
|
+
assert_equal('b', tree.root.children.first.item)
|
55
|
+
assert_equal('a', tree.root.children.first.children.first.item)
|
56
|
+
assert_equal('c', tree.root.children.first.children.last.item)
|
57
|
+
|
58
|
+
assert_nothing_raised { tree = FpGrowth::FpTree::Builder.build(@random_transactions) }
|
59
|
+
assert_not_nil(tree)
|
60
|
+
assert_instance_of(FpGrowth::FpTree::FpTree, tree)
|
61
|
+
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'fpgrowth/fp_tree/builder/first_pass'
|
3
|
+
|
4
|
+
class TestFirstPass < Test::Unit::TestCase
|
5
|
+
|
6
|
+
# Generer une liste de transaction aléatoire
|
7
|
+
#
|
8
|
+
def setup
|
9
|
+
|
10
|
+
items= ['a', 'b', 'c', 'd', 'e']
|
11
|
+
|
12
|
+
r = Random.new
|
13
|
+
|
14
|
+
@n = r.rand(100..500)
|
15
|
+
|
16
|
+
@random_transactions = []
|
17
|
+
for i in (0..@n)
|
18
|
+
|
19
|
+
|
20
|
+
@m = r.rand(0..5)
|
21
|
+
@random_transactions[i]=[]
|
22
|
+
for j in (0..@m)
|
23
|
+
x = r.rand(21)
|
24
|
+
if x == 20
|
25
|
+
# Trick pour que le 'e' se fasse pruner
|
26
|
+
then
|
27
|
+
@random_transactions[i] << items[r.rand(items.size)]
|
28
|
+
else
|
29
|
+
@random_transactions[i] << items[r.rand(items.size - 1)]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
#Trick pour qu'une transaction se fasse vider
|
36
|
+
@random_transactions << ['e']
|
37
|
+
|
38
|
+
@non_random = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
|
39
|
+
|
40
|
+
|
41
|
+
firstPass = FpGrowth::FpTree::Builder::FirstPass.new()
|
42
|
+
|
43
|
+
@support_random = firstPass.scan(@random_transactions)
|
44
|
+
|
45
|
+
@support_non_random = firstPass.scan(@non_random)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Called after every test method runs. Can be used to tear
|
49
|
+
# down fixture information.
|
50
|
+
#
|
51
|
+
def teardown
|
52
|
+
# Do nothing
|
53
|
+
end
|
54
|
+
|
55
|
+
# test_scan
|
56
|
+
def test_scan
|
57
|
+
|
58
|
+
assert_equal(2, @support_non_random['a'])
|
59
|
+
assert_equal(4, @support_non_random['b'])
|
60
|
+
assert_equal(1, @support_non_random['c'])
|
61
|
+
|
62
|
+
assert_equal(5, @support_random.size)
|
63
|
+
for val in @support_random.values
|
64
|
+
assert_operator(@n * 5, ">=", val)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_pruning
|
69
|
+
|
70
|
+
firstPass = FpGrowth::FpTree::Builder::FirstPass.new()
|
71
|
+
|
72
|
+
random_transactions = @random_transactions.clone()
|
73
|
+
non_random = @non_random.clone()
|
74
|
+
|
75
|
+
@support_random_pruned = firstPass.pruning(random_transactions, @support_random.clone, 10)
|
76
|
+
@support_non_random_pruned = firstPass.pruning(non_random, @support_non_random.clone, 20)
|
77
|
+
|
78
|
+
# There must be no pruning, considering the very few element there is
|
79
|
+
assert_equal(3, @support_non_random.size, "Supports : "+@support_non_random.to_s)
|
80
|
+
|
81
|
+
minimum = @random_transactions.size.to_f / 100 * 20
|
82
|
+
|
83
|
+
assert_operator(5, ">", @support_random_pruned.size, "En plus e : #{@support_random['e']} et minimum : #{minimum}")
|
84
|
+
for transaction in random_transactions
|
85
|
+
assert_not_equal(0, transaction.size)
|
86
|
+
assert( not(transaction.include?('e')) , "e doit avoir disparu !")
|
87
|
+
end
|
88
|
+
assert_operator(@random_transactions.size, ">", random_transactions.size)
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_sort
|
92
|
+
firstPass = FpGrowth::FpTree::Builder::FirstPass.new()
|
93
|
+
|
94
|
+
@support_random_sorted = firstPass.sort(@support_random)
|
95
|
+
@support_non_random_sorted = firstPass.sort(@support_non_random)
|
96
|
+
|
97
|
+
assert_equal('b', @support_non_random_sorted.keys[0])
|
98
|
+
assert_equal('a', @support_non_random_sorted.keys[1])
|
99
|
+
|
100
|
+
for i in (0..(@support_random_sorted.keys.size - 2))
|
101
|
+
assert_operator(@support_random_sorted[@support_random_sorted.keys[i]], ">=", @support_random_sorted[@support_random_sorted.keys[i+1]])
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_execute
|
107
|
+
firstPass = FpGrowth::FpTree::Builder::FirstPass.new()
|
108
|
+
|
109
|
+
non_random_first_passed = firstPass.execute(@non_random)
|
110
|
+
random_first_passed = firstPass.execute(@random_transactions, 20)
|
111
|
+
|
112
|
+
assert_instance_of(Hash, non_random_first_passed)
|
113
|
+
assert_instance_of(Hash, random_first_passed)
|
114
|
+
|
115
|
+
assert_equal(3, non_random_first_passed.size)
|
116
|
+
assert_equal(4, random_first_passed.size, "En plus : #{random_first_passed}" )
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
data/test/tc_fp_tree.rb
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require "fpgrowth/fp_tree"
|
3
|
+
require "fpgrowth/fp_tree/node"
|
4
|
+
require "fpgrowth/fp_tree/builder/first_pass"
|
5
|
+
|
6
|
+
class TestFpTree < Test::Unit::TestCase
|
7
|
+
|
8
|
+
# Called before every test method runs. Can be used
|
9
|
+
# to set up fixture information.
|
10
|
+
def setup
|
11
|
+
# Do nothing
|
12
|
+
end
|
13
|
+
|
14
|
+
# Called after every test method runs. Can be used to tear
|
15
|
+
# down fixture information.
|
16
|
+
|
17
|
+
def teardown
|
18
|
+
# Do nothing
|
19
|
+
end
|
20
|
+
|
21
|
+
# test initialize
|
22
|
+
def test_initialize
|
23
|
+
|
24
|
+
fp_tree = nil
|
25
|
+
# no arguments
|
26
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new()}
|
27
|
+
assert_not_nil( fp_tree.root )
|
28
|
+
assert_instance_of( FpGrowth::FpTree::Node , fp_tree.root )
|
29
|
+
assert_instance_of( Hash , fp_tree.heads )
|
30
|
+
assert_equal( {} , fp_tree.supports )
|
31
|
+
|
32
|
+
# list empty
|
33
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new({})}
|
34
|
+
assert_not_nil( fp_tree.root )
|
35
|
+
assert_instance_of( FpGrowth::FpTree::Node , fp_tree.root )
|
36
|
+
assert_instance_of( Hash , fp_tree.heads )
|
37
|
+
assert_equal( {} , fp_tree.supports )
|
38
|
+
|
39
|
+
|
40
|
+
# list with arguments
|
41
|
+
support = { 'a' => 1, 'b' => 2}
|
42
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new(support)}
|
43
|
+
assert_not_nil( fp_tree.root )
|
44
|
+
assert_instance_of( FpGrowth::FpTree::Node , fp_tree.root )
|
45
|
+
assert_instance_of( Hash , fp_tree.heads )
|
46
|
+
assert( fp_tree.heads.has_key?('a') , "a n'existe pas")
|
47
|
+
assert( fp_tree.heads.has_key?('b') ,"b n'existe pas !" )
|
48
|
+
assert_equal( 2 , fp_tree.heads.length )
|
49
|
+
assert( fp_tree.supports.has_key?('a') , "a n'existe pas")
|
50
|
+
assert( fp_tree.supports.has_key?('b') ,"b n'existe pas !" )
|
51
|
+
assert_equal( 2 , fp_tree.supports.length )
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
# test item_oder_lookup
|
56
|
+
def test_item_oder_lookup
|
57
|
+
|
58
|
+
# look up with fp_tree nul
|
59
|
+
fp_tree = nil
|
60
|
+
# no arguments
|
61
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new()}
|
62
|
+
lookup = fp_tree.item_order_lookup
|
63
|
+
assert_equal( {} , lookup )
|
64
|
+
|
65
|
+
# look up with fp_tree non null
|
66
|
+
support = { 'a' => 1, 'b' => 2}
|
67
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new(support)}
|
68
|
+
lookup = fp_tree.item_order_lookup
|
69
|
+
assert_equal( 0 , lookup['a'] )
|
70
|
+
assert_equal( 1 , lookup['b'] )
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
# test find_lateral_leaf_for_item
|
75
|
+
def test_find_lateral_leaf_for_item
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_sort_children_by_support
|
80
|
+
@non_random = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
|
81
|
+
@supports_non_random = FpGrowth::FpTree::Builder::FirstPass.new().execute(@non_random)
|
82
|
+
|
83
|
+
secondPass = FpGrowth::FpTree::Builder::SecondPass.new(@supports_non_random)
|
84
|
+
|
85
|
+
list = [FpGrowth::FpTree::Node.new('a'), FpGrowth::FpTree::Node.new('c'), FpGrowth::FpTree::Node.new('b')]
|
86
|
+
|
87
|
+
|
88
|
+
assert_nothing_raised { secondPass.fp_tree.sort_children_by_support(list) }
|
89
|
+
|
90
|
+
|
91
|
+
assert_equal(@supports_non_random.keys.first, list.first.item)
|
92
|
+
assert_equal(@supports_non_random.keys[1], list[1].item)
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
def test_append_node
|
97
|
+
parent = FpGrowth::FpTree::Node.new()
|
98
|
+
child = FpGrowth::FpTree::Node.new('a')
|
99
|
+
@non_random = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
|
100
|
+
@supports_non_random = FpGrowth::FpTree::Builder::FirstPass.new().execute(@non_random)
|
101
|
+
|
102
|
+
|
103
|
+
#Ajout first
|
104
|
+
secondPass = FpGrowth::FpTree::Builder::SecondPass.new(@supports_non_random)
|
105
|
+
|
106
|
+
assert_nothing_raised { secondPass.fp_tree.append_node(parent, child) }
|
107
|
+
assert_not_equal(0, parent.children.size)
|
108
|
+
assert_equal(child, parent.children.last)
|
109
|
+
assert_equal(child, secondPass.fp_tree.heads['a'])
|
110
|
+
assert_equal(parent, child.parent)
|
111
|
+
|
112
|
+
#Ajout lateral
|
113
|
+
|
114
|
+
child = FpGrowth::FpTree::Node.new('a')
|
115
|
+
|
116
|
+
assert_nothing_raised() { secondPass.fp_tree.append_node(parent, child) }
|
117
|
+
assert_not_equal(0, parent.children.size)
|
118
|
+
assert_equal(child, parent.children[1])
|
119
|
+
assert_equal(child, secondPass.fp_tree.heads['a'].lateral)
|
120
|
+
|
121
|
+
#Ajout en profondeur
|
122
|
+
parent = parent.children[0]
|
123
|
+
|
124
|
+
child = FpGrowth::FpTree::Node.new('b')
|
125
|
+
|
126
|
+
assert_nothing_raised() { secondPass.fp_tree.append_node(parent, child) }
|
127
|
+
assert_not_equal(0, parent.children.size)
|
128
|
+
assert_equal(child, parent.children[0])
|
129
|
+
assert_equal(child, secondPass.fp_tree.heads['b'])
|
130
|
+
assert_equal(parent, child.parent)
|
131
|
+
|
132
|
+
# Verifier l'ordre des enfants
|
133
|
+
parent = FpGrowth::FpTree::Node.new()
|
134
|
+
child = FpGrowth::FpTree::Node.new('a')
|
135
|
+
child2 = FpGrowth::FpTree::Node.new('b')
|
136
|
+
|
137
|
+
|
138
|
+
assert_nothing_raised() { secondPass.fp_tree.append_node(parent, child) }
|
139
|
+
assert_nothing_raised() { secondPass.fp_tree.append_node(parent, child2) }
|
140
|
+
|
141
|
+
assert_equal('b', parent.children.first.item)
|
142
|
+
assert_equal('a', parent.children.last.item)
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_single_path
|
146
|
+
child = FpGrowth::FpTree::Node.new('a')
|
147
|
+
|
148
|
+
fptree = FpGrowth::FpTree.build([ ['b','a']*3])
|
149
|
+
|
150
|
+
assert_equal(true, fptree.single_path?)
|
151
|
+
|
152
|
+
fptree.append_node(fptree.root, child)
|
153
|
+
|
154
|
+
assert_equal(false, fptree.single_path?)
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_combination
|
159
|
+
fp_tree = FpGrowth::FpTree.build([['a', 'b'], ['b'], ['b', 'c', 'a'], ['a', 'b']], 0)
|
160
|
+
assert_equal(true, fp_tree.single_path?)
|
161
|
+
power_set = nil
|
162
|
+
assert_nothing_raised { power_set = fp_tree.combinations }
|
163
|
+
|
164
|
+
fail("ToDo")
|
165
|
+
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
data/test/tc_miner.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'fpgrowth/miner'
|
3
|
+
|
4
|
+
class TestMiner < Test::Unit::TestCase
|
5
|
+
|
6
|
+
|
7
|
+
def test_build_conditional_tree
|
8
|
+
|
9
|
+
fp_tree = FpGrowth::FpTree.build([['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']], 0)
|
10
|
+
conditional_tree = nil
|
11
|
+
|
12
|
+
|
13
|
+
conditional_tree = FpGrowth::Miner.build_conditional_tree(fp_tree, 'a')
|
14
|
+
|
15
|
+
fp_tree.graphviz()
|
16
|
+
conditional_tree.graphviz("conditional")
|
17
|
+
|
18
|
+
|
19
|
+
assert_equal('b', conditional_tree.root.children.first.item)
|
20
|
+
assert_equal(2, conditional_tree.root.children.first.support)
|
21
|
+
assert_equal([], conditional_tree.root.children.first.children)
|
22
|
+
|
23
|
+
assert_equal(true, conditional_tree.single_path?)
|
24
|
+
|
25
|
+
items= ['a', 'b', 'c', 'd', 'e']
|
26
|
+
|
27
|
+
# Randomized
|
28
|
+
|
29
|
+
r = Random.new
|
30
|
+
@n = r.rand(100..500)
|
31
|
+
@random_transactions = []
|
32
|
+
for i in (0..@n)
|
33
|
+
|
34
|
+
|
35
|
+
@m = r.rand(0..5)
|
36
|
+
@random_transactions[i]=[]
|
37
|
+
for j in (0..@m)
|
38
|
+
x = r.rand(21)
|
39
|
+
if x == 20
|
40
|
+
# Trick pour que le 'e' se fasse pruner
|
41
|
+
then
|
42
|
+
@random_transactions[i] << items[r.rand(items.size)]
|
43
|
+
else
|
44
|
+
@random_transactions[i] << items[r.rand(items.size - 1)]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
#Trick pour qu'une transaction se fasse vider
|
51
|
+
@random_transactions << ['e']
|
52
|
+
|
53
|
+
fp_tree = FpGrowth::FpTree.build(@random_transactions, 1)
|
54
|
+
conditional_tree = FpGrowth::Miner.build_conditional_tree(fp_tree, fp_tree.heads.keys[-2])
|
55
|
+
|
56
|
+
fp_tree.graphviz()
|
57
|
+
conditional_tree.graphviz("conditional-#{fp_tree.heads.keys[-2]}")
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_fp_growth
|
62
|
+
fp_tree = FpGrowth::FpTree.build([['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']], 0)
|
63
|
+
pattern_set = nil
|
64
|
+
|
65
|
+
assert_nothing_raised { pattern_set = FpGrowth::Miner.fp_growth(fp_tree) }
|
66
|
+
assert_not_equal(0, pattern_set.size)
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
def fp_growth_randomized
|
73
|
+
# Randomized
|
74
|
+
|
75
|
+
items= ['a', 'b', 'c', 'd', 'e', 'f','g','h','i','j','k']
|
76
|
+
r = Random.new
|
77
|
+
@n = r.rand(100..500)
|
78
|
+
@random_transactions = []
|
79
|
+
for i in (0..@n)
|
80
|
+
|
81
|
+
|
82
|
+
@m = r.rand(0..5)
|
83
|
+
@random_transactions[i]=[]
|
84
|
+
for j in (0..@m)
|
85
|
+
x = r.rand(21)
|
86
|
+
if x == 20
|
87
|
+
# Trick pour que le 'e' se fasse pruner
|
88
|
+
then
|
89
|
+
@random_transactions[i] << items[r.rand(items.size)]
|
90
|
+
else
|
91
|
+
@random_transactions[i] << items[r.rand(items.size - 1)]
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
#Trick pour qu'une transaction se fasse vider
|
98
|
+
@random_transactions << ['e']
|
99
|
+
|
100
|
+
fp_tree = FpGrowth::FpTree.build(@random_transactions, 10)
|
101
|
+
pattern_set = nil
|
102
|
+
|
103
|
+
assert_nothing_raised { pattern_set = FpGrowth::Miner.fp_growth(fp_tree) }
|
104
|
+
assert_not_equal(0, pattern_set.size)
|
105
|
+
|
106
|
+
|
107
|
+
for pattern in pattern_set
|
108
|
+
puts "<#{pattern.content}:#{pattern.support}>"
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
fail("ToDo")
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|