fpgrowth 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +18 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +22 -0
- data/README.md +88 -0
- data/Rakefile +11 -0
- data/fpgrowth-ruby.gemspec +23 -0
- data/lib/fpgrowth.rb +8 -0
- data/lib/fpgrowth/fp_tree.rb +153 -0
- data/lib/fpgrowth/fp_tree/builder.rb +19 -0
- data/lib/fpgrowth/fp_tree/builder/first_pass.rb +72 -0
- data/lib/fpgrowth/fp_tree/builder/second_pass.rb +70 -0
- data/lib/fpgrowth/fp_tree/node.rb +46 -0
- data/lib/fpgrowth/miner.rb +62 -0
- data/lib/fpgrowth/miner/conditional_tree_builder.rb +123 -0
- data/lib/fpgrowth/miner/pattern.rb +47 -0
- data/lib/fpgrowth/miner/pattern_base_extractor.rb +70 -0
- data/lib/fpgrowth/version.rb +3 -0
- data/test/montreal-sondage/mtlsat12.csv +1202 -0
- data/test/montreal-velos-comptage/2009.csv +366 -0
- data/test/montreal-velos-comptage/2010.csv +366 -0
- data/test/montreal-velos-comptage/2011.csv +366 -0
- data/test/montreal-velos-comptage/2012.csv +311 -0
- data/test/tc_builder.rb +65 -0
- data/test/tc_first_pass.rb +119 -0
- data/test/tc_fp_tree.rb +168 -0
- data/test/tc_miner.rb +116 -0
- data/test/tc_node.rb +101 -0
- data/test/tc_open_data_sondage_montreal.rb +49 -0
- data/test/tc_open_data_velo_montreal.rb +58 -0
- data/test/tc_pattern.rb +56 -0
- data/test/tc_second_pass.rb +148 -0
- metadata +131 -0
data/test/tc_builder.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'fpgrowth/fp_tree/builder'
|
3
|
+
|
4
|
+
class TestBuilder < Test::Unit::TestCase
|
5
|
+
|
6
|
+
|
7
|
+
def setup
|
8
|
+
|
9
|
+
items= ['a', 'b', 'c', 'd', 'e']
|
10
|
+
|
11
|
+
r = Random.new
|
12
|
+
|
13
|
+
@n = r.rand(100..500)
|
14
|
+
|
15
|
+
@random_transactions = []
|
16
|
+
for i in (0..@n)
|
17
|
+
|
18
|
+
|
19
|
+
@m = r.rand(1..5)
|
20
|
+
@random_transactions[i]=[]
|
21
|
+
for j in (0..@m)
|
22
|
+
x = r.rand(10)
|
23
|
+
if x == 9
|
24
|
+
# Trick pour que le 'e' se fasse pruner
|
25
|
+
then
|
26
|
+
@random_transactions[i] << items[r.rand(items.size)]
|
27
|
+
else
|
28
|
+
@random_transactions[i] << items[r.rand(items.size - 1)]
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
@non_random = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
|
35
|
+
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
def teardown
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
def test_build
|
45
|
+
tree = nil
|
46
|
+
assert_nothing_raised { tree = FpGrowth::FpTree::Builder.build(@non_random, 0.5) }
|
47
|
+
assert_not_nil(tree)
|
48
|
+
assert_instance_of(FpGrowth::FpTree::FpTree, tree)
|
49
|
+
|
50
|
+
|
51
|
+
|
52
|
+
assert_equal(nil,tree.root.item)
|
53
|
+
assert_equal(1, tree.root.children.size)
|
54
|
+
assert_equal('b', tree.root.children.first.item)
|
55
|
+
assert_equal('a', tree.root.children.first.children.first.item)
|
56
|
+
assert_equal('c', tree.root.children.first.children.last.item)
|
57
|
+
|
58
|
+
assert_nothing_raised { tree = FpGrowth::FpTree::Builder.build(@random_transactions) }
|
59
|
+
assert_not_nil(tree)
|
60
|
+
assert_instance_of(FpGrowth::FpTree::FpTree, tree)
|
61
|
+
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,119 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'fpgrowth/fp_tree/builder/first_pass'
|
3
|
+
|
4
|
+
class TestFirstPass < Test::Unit::TestCase
|
5
|
+
|
6
|
+
# Generer une liste de transaction aléatoire
|
7
|
+
#
|
8
|
+
def setup
|
9
|
+
|
10
|
+
items= ['a', 'b', 'c', 'd', 'e']
|
11
|
+
|
12
|
+
r = Random.new
|
13
|
+
|
14
|
+
@n = r.rand(100..500)
|
15
|
+
|
16
|
+
@random_transactions = []
|
17
|
+
for i in (0..@n)
|
18
|
+
|
19
|
+
|
20
|
+
@m = r.rand(0..5)
|
21
|
+
@random_transactions[i]=[]
|
22
|
+
for j in (0..@m)
|
23
|
+
x = r.rand(21)
|
24
|
+
if x == 20
|
25
|
+
# Trick pour que le 'e' se fasse pruner
|
26
|
+
then
|
27
|
+
@random_transactions[i] << items[r.rand(items.size)]
|
28
|
+
else
|
29
|
+
@random_transactions[i] << items[r.rand(items.size - 1)]
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
#Trick pour qu'une transaction se fasse vider
|
36
|
+
@random_transactions << ['e']
|
37
|
+
|
38
|
+
@non_random = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
|
39
|
+
|
40
|
+
|
41
|
+
firstPass = FpGrowth::FpTree::Builder::FirstPass.new()
|
42
|
+
|
43
|
+
@support_random = firstPass.scan(@random_transactions)
|
44
|
+
|
45
|
+
@support_non_random = firstPass.scan(@non_random)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Called after every test method runs. Can be used to tear
|
49
|
+
# down fixture information.
|
50
|
+
#
|
51
|
+
def teardown
|
52
|
+
# Do nothing
|
53
|
+
end
|
54
|
+
|
55
|
+
# test_scan
|
56
|
+
def test_scan
|
57
|
+
|
58
|
+
assert_equal(2, @support_non_random['a'])
|
59
|
+
assert_equal(4, @support_non_random['b'])
|
60
|
+
assert_equal(1, @support_non_random['c'])
|
61
|
+
|
62
|
+
assert_equal(5, @support_random.size)
|
63
|
+
for val in @support_random.values
|
64
|
+
assert_operator(@n * 5, ">=", val)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_pruning
|
69
|
+
|
70
|
+
firstPass = FpGrowth::FpTree::Builder::FirstPass.new()
|
71
|
+
|
72
|
+
random_transactions = @random_transactions.clone()
|
73
|
+
non_random = @non_random.clone()
|
74
|
+
|
75
|
+
@support_random_pruned = firstPass.pruning(random_transactions, @support_random.clone, 10)
|
76
|
+
@support_non_random_pruned = firstPass.pruning(non_random, @support_non_random.clone, 20)
|
77
|
+
|
78
|
+
# There must be no pruning, considering the very few element there is
|
79
|
+
assert_equal(3, @support_non_random.size, "Supports : "+@support_non_random.to_s)
|
80
|
+
|
81
|
+
minimum = @random_transactions.size.to_f / 100 * 20
|
82
|
+
|
83
|
+
assert_operator(5, ">", @support_random_pruned.size, "En plus e : #{@support_random['e']} et minimum : #{minimum}")
|
84
|
+
for transaction in random_transactions
|
85
|
+
assert_not_equal(0, transaction.size)
|
86
|
+
assert( not(transaction.include?('e')) , "e doit avoir disparu !")
|
87
|
+
end
|
88
|
+
assert_operator(@random_transactions.size, ">", random_transactions.size)
|
89
|
+
end
|
90
|
+
|
91
|
+
def test_sort
|
92
|
+
firstPass = FpGrowth::FpTree::Builder::FirstPass.new()
|
93
|
+
|
94
|
+
@support_random_sorted = firstPass.sort(@support_random)
|
95
|
+
@support_non_random_sorted = firstPass.sort(@support_non_random)
|
96
|
+
|
97
|
+
assert_equal('b', @support_non_random_sorted.keys[0])
|
98
|
+
assert_equal('a', @support_non_random_sorted.keys[1])
|
99
|
+
|
100
|
+
for i in (0..(@support_random_sorted.keys.size - 2))
|
101
|
+
assert_operator(@support_random_sorted[@support_random_sorted.keys[i]], ">=", @support_random_sorted[@support_random_sorted.keys[i+1]])
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
105
|
+
|
106
|
+
def test_execute
|
107
|
+
firstPass = FpGrowth::FpTree::Builder::FirstPass.new()
|
108
|
+
|
109
|
+
non_random_first_passed = firstPass.execute(@non_random)
|
110
|
+
random_first_passed = firstPass.execute(@random_transactions, 20)
|
111
|
+
|
112
|
+
assert_instance_of(Hash, non_random_first_passed)
|
113
|
+
assert_instance_of(Hash, random_first_passed)
|
114
|
+
|
115
|
+
assert_equal(3, non_random_first_passed.size)
|
116
|
+
assert_equal(4, random_first_passed.size, "En plus : #{random_first_passed}" )
|
117
|
+
end
|
118
|
+
|
119
|
+
end
|
data/test/tc_fp_tree.rb
ADDED
@@ -0,0 +1,168 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require "fpgrowth/fp_tree"
|
3
|
+
require "fpgrowth/fp_tree/node"
|
4
|
+
require "fpgrowth/fp_tree/builder/first_pass"
|
5
|
+
|
6
|
+
class TestFpTree < Test::Unit::TestCase
|
7
|
+
|
8
|
+
# Called before every test method runs. Can be used
|
9
|
+
# to set up fixture information.
|
10
|
+
def setup
|
11
|
+
# Do nothing
|
12
|
+
end
|
13
|
+
|
14
|
+
# Called after every test method runs. Can be used to tear
|
15
|
+
# down fixture information.
|
16
|
+
|
17
|
+
def teardown
|
18
|
+
# Do nothing
|
19
|
+
end
|
20
|
+
|
21
|
+
# test initialize
|
22
|
+
def test_initialize
|
23
|
+
|
24
|
+
fp_tree = nil
|
25
|
+
# no arguments
|
26
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new()}
|
27
|
+
assert_not_nil( fp_tree.root )
|
28
|
+
assert_instance_of( FpGrowth::FpTree::Node , fp_tree.root )
|
29
|
+
assert_instance_of( Hash , fp_tree.heads )
|
30
|
+
assert_equal( {} , fp_tree.supports )
|
31
|
+
|
32
|
+
# list empty
|
33
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new({})}
|
34
|
+
assert_not_nil( fp_tree.root )
|
35
|
+
assert_instance_of( FpGrowth::FpTree::Node , fp_tree.root )
|
36
|
+
assert_instance_of( Hash , fp_tree.heads )
|
37
|
+
assert_equal( {} , fp_tree.supports )
|
38
|
+
|
39
|
+
|
40
|
+
# list with arguments
|
41
|
+
support = { 'a' => 1, 'b' => 2}
|
42
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new(support)}
|
43
|
+
assert_not_nil( fp_tree.root )
|
44
|
+
assert_instance_of( FpGrowth::FpTree::Node , fp_tree.root )
|
45
|
+
assert_instance_of( Hash , fp_tree.heads )
|
46
|
+
assert( fp_tree.heads.has_key?('a') , "a n'existe pas")
|
47
|
+
assert( fp_tree.heads.has_key?('b') ,"b n'existe pas !" )
|
48
|
+
assert_equal( 2 , fp_tree.heads.length )
|
49
|
+
assert( fp_tree.supports.has_key?('a') , "a n'existe pas")
|
50
|
+
assert( fp_tree.supports.has_key?('b') ,"b n'existe pas !" )
|
51
|
+
assert_equal( 2 , fp_tree.supports.length )
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
# test item_oder_lookup
|
56
|
+
def test_item_oder_lookup
|
57
|
+
|
58
|
+
# look up with fp_tree nul
|
59
|
+
fp_tree = nil
|
60
|
+
# no arguments
|
61
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new()}
|
62
|
+
lookup = fp_tree.item_order_lookup
|
63
|
+
assert_equal( {} , lookup )
|
64
|
+
|
65
|
+
# look up with fp_tree non null
|
66
|
+
support = { 'a' => 1, 'b' => 2}
|
67
|
+
assert_nothing_raised {fp_tree = FpGrowth::FpTree::FpTree.new(support)}
|
68
|
+
lookup = fp_tree.item_order_lookup
|
69
|
+
assert_equal( 0 , lookup['a'] )
|
70
|
+
assert_equal( 1 , lookup['b'] )
|
71
|
+
|
72
|
+
end
|
73
|
+
|
74
|
+
# test find_lateral_leaf_for_item
|
75
|
+
def test_find_lateral_leaf_for_item
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
def test_sort_children_by_support
|
80
|
+
@non_random = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
|
81
|
+
@supports_non_random = FpGrowth::FpTree::Builder::FirstPass.new().execute(@non_random)
|
82
|
+
|
83
|
+
secondPass = FpGrowth::FpTree::Builder::SecondPass.new(@supports_non_random)
|
84
|
+
|
85
|
+
list = [FpGrowth::FpTree::Node.new('a'), FpGrowth::FpTree::Node.new('c'), FpGrowth::FpTree::Node.new('b')]
|
86
|
+
|
87
|
+
|
88
|
+
assert_nothing_raised { secondPass.fp_tree.sort_children_by_support(list) }
|
89
|
+
|
90
|
+
|
91
|
+
assert_equal(@supports_non_random.keys.first, list.first.item)
|
92
|
+
assert_equal(@supports_non_random.keys[1], list[1].item)
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
def test_append_node
|
97
|
+
parent = FpGrowth::FpTree::Node.new()
|
98
|
+
child = FpGrowth::FpTree::Node.new('a')
|
99
|
+
@non_random = [['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']]
|
100
|
+
@supports_non_random = FpGrowth::FpTree::Builder::FirstPass.new().execute(@non_random)
|
101
|
+
|
102
|
+
|
103
|
+
#Ajout first
|
104
|
+
secondPass = FpGrowth::FpTree::Builder::SecondPass.new(@supports_non_random)
|
105
|
+
|
106
|
+
assert_nothing_raised { secondPass.fp_tree.append_node(parent, child) }
|
107
|
+
assert_not_equal(0, parent.children.size)
|
108
|
+
assert_equal(child, parent.children.last)
|
109
|
+
assert_equal(child, secondPass.fp_tree.heads['a'])
|
110
|
+
assert_equal(parent, child.parent)
|
111
|
+
|
112
|
+
#Ajout lateral
|
113
|
+
|
114
|
+
child = FpGrowth::FpTree::Node.new('a')
|
115
|
+
|
116
|
+
assert_nothing_raised() { secondPass.fp_tree.append_node(parent, child) }
|
117
|
+
assert_not_equal(0, parent.children.size)
|
118
|
+
assert_equal(child, parent.children[1])
|
119
|
+
assert_equal(child, secondPass.fp_tree.heads['a'].lateral)
|
120
|
+
|
121
|
+
#Ajout en profondeur
|
122
|
+
parent = parent.children[0]
|
123
|
+
|
124
|
+
child = FpGrowth::FpTree::Node.new('b')
|
125
|
+
|
126
|
+
assert_nothing_raised() { secondPass.fp_tree.append_node(parent, child) }
|
127
|
+
assert_not_equal(0, parent.children.size)
|
128
|
+
assert_equal(child, parent.children[0])
|
129
|
+
assert_equal(child, secondPass.fp_tree.heads['b'])
|
130
|
+
assert_equal(parent, child.parent)
|
131
|
+
|
132
|
+
# Verifier l'ordre des enfants
|
133
|
+
parent = FpGrowth::FpTree::Node.new()
|
134
|
+
child = FpGrowth::FpTree::Node.new('a')
|
135
|
+
child2 = FpGrowth::FpTree::Node.new('b')
|
136
|
+
|
137
|
+
|
138
|
+
assert_nothing_raised() { secondPass.fp_tree.append_node(parent, child) }
|
139
|
+
assert_nothing_raised() { secondPass.fp_tree.append_node(parent, child2) }
|
140
|
+
|
141
|
+
assert_equal('b', parent.children.first.item)
|
142
|
+
assert_equal('a', parent.children.last.item)
|
143
|
+
end
|
144
|
+
|
145
|
+
def test_single_path
|
146
|
+
child = FpGrowth::FpTree::Node.new('a')
|
147
|
+
|
148
|
+
fptree = FpGrowth::FpTree.build([ ['b','a']*3])
|
149
|
+
|
150
|
+
assert_equal(true, fptree.single_path?)
|
151
|
+
|
152
|
+
fptree.append_node(fptree.root, child)
|
153
|
+
|
154
|
+
assert_equal(false, fptree.single_path?)
|
155
|
+
|
156
|
+
end
|
157
|
+
|
158
|
+
def test_combination
|
159
|
+
fp_tree = FpGrowth::FpTree.build([['a', 'b'], ['b'], ['b', 'c', 'a'], ['a', 'b']], 0)
|
160
|
+
assert_equal(true, fp_tree.single_path?)
|
161
|
+
power_set = nil
|
162
|
+
assert_nothing_raised { power_set = fp_tree.combinations }
|
163
|
+
|
164
|
+
fail("ToDo")
|
165
|
+
|
166
|
+
end
|
167
|
+
|
168
|
+
end
|
data/test/tc_miner.rb
ADDED
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'fpgrowth/miner'
|
3
|
+
|
4
|
+
class TestMiner < Test::Unit::TestCase
|
5
|
+
|
6
|
+
|
7
|
+
def test_build_conditional_tree
|
8
|
+
|
9
|
+
fp_tree = FpGrowth::FpTree.build([['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']], 0)
|
10
|
+
conditional_tree = nil
|
11
|
+
|
12
|
+
|
13
|
+
conditional_tree = FpGrowth::Miner.build_conditional_tree(fp_tree, 'a')
|
14
|
+
|
15
|
+
fp_tree.graphviz()
|
16
|
+
conditional_tree.graphviz("conditional")
|
17
|
+
|
18
|
+
|
19
|
+
assert_equal('b', conditional_tree.root.children.first.item)
|
20
|
+
assert_equal(2, conditional_tree.root.children.first.support)
|
21
|
+
assert_equal([], conditional_tree.root.children.first.children)
|
22
|
+
|
23
|
+
assert_equal(true, conditional_tree.single_path?)
|
24
|
+
|
25
|
+
items= ['a', 'b', 'c', 'd', 'e']
|
26
|
+
|
27
|
+
# Randomized
|
28
|
+
|
29
|
+
r = Random.new
|
30
|
+
@n = r.rand(100..500)
|
31
|
+
@random_transactions = []
|
32
|
+
for i in (0..@n)
|
33
|
+
|
34
|
+
|
35
|
+
@m = r.rand(0..5)
|
36
|
+
@random_transactions[i]=[]
|
37
|
+
for j in (0..@m)
|
38
|
+
x = r.rand(21)
|
39
|
+
if x == 20
|
40
|
+
# Trick pour que le 'e' se fasse pruner
|
41
|
+
then
|
42
|
+
@random_transactions[i] << items[r.rand(items.size)]
|
43
|
+
else
|
44
|
+
@random_transactions[i] << items[r.rand(items.size - 1)]
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
end
|
49
|
+
|
50
|
+
#Trick pour qu'une transaction se fasse vider
|
51
|
+
@random_transactions << ['e']
|
52
|
+
|
53
|
+
fp_tree = FpGrowth::FpTree.build(@random_transactions, 1)
|
54
|
+
conditional_tree = FpGrowth::Miner.build_conditional_tree(fp_tree, fp_tree.heads.keys[-2])
|
55
|
+
|
56
|
+
fp_tree.graphviz()
|
57
|
+
conditional_tree.graphviz("conditional-#{fp_tree.heads.keys[-2]}")
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
def test_fp_growth
|
62
|
+
fp_tree = FpGrowth::FpTree.build([['a', 'b'], ['b'], ['b', 'c'], ['a', 'b']], 0)
|
63
|
+
pattern_set = nil
|
64
|
+
|
65
|
+
assert_nothing_raised { pattern_set = FpGrowth::Miner.fp_growth(fp_tree) }
|
66
|
+
assert_not_equal(0, pattern_set.size)
|
67
|
+
|
68
|
+
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
def fp_growth_randomized
|
73
|
+
# Randomized
|
74
|
+
|
75
|
+
items= ['a', 'b', 'c', 'd', 'e', 'f','g','h','i','j','k']
|
76
|
+
r = Random.new
|
77
|
+
@n = r.rand(100..500)
|
78
|
+
@random_transactions = []
|
79
|
+
for i in (0..@n)
|
80
|
+
|
81
|
+
|
82
|
+
@m = r.rand(0..5)
|
83
|
+
@random_transactions[i]=[]
|
84
|
+
for j in (0..@m)
|
85
|
+
x = r.rand(21)
|
86
|
+
if x == 20
|
87
|
+
# Trick pour que le 'e' se fasse pruner
|
88
|
+
then
|
89
|
+
@random_transactions[i] << items[r.rand(items.size)]
|
90
|
+
else
|
91
|
+
@random_transactions[i] << items[r.rand(items.size - 1)]
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
#Trick pour qu'une transaction se fasse vider
|
98
|
+
@random_transactions << ['e']
|
99
|
+
|
100
|
+
fp_tree = FpGrowth::FpTree.build(@random_transactions, 10)
|
101
|
+
pattern_set = nil
|
102
|
+
|
103
|
+
assert_nothing_raised { pattern_set = FpGrowth::Miner.fp_growth(fp_tree) }
|
104
|
+
assert_not_equal(0, pattern_set.size)
|
105
|
+
|
106
|
+
|
107
|
+
for pattern in pattern_set
|
108
|
+
puts "<#{pattern.content}:#{pattern.support}>"
|
109
|
+
end
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
fail("ToDo")
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|