fpgrowth 0.0.2 → 1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +43 -2
- data/Rakefile +1 -1
- data/lib/fpgrowth.rb +16 -4
- data/lib/fpgrowth/fp_tree.rb +145 -5
- data/lib/fpgrowth/fp_tree/bonzai_secateur.rb +30 -0
- data/lib/fpgrowth/fp_tree/builder.rb +1 -0
- data/lib/fpgrowth/fp_tree/builder/first_pass.rb +2 -7
- data/lib/fpgrowth/fp_tree/builder/header_table_builder.rb +37 -0
- data/lib/fpgrowth/fp_tree/header_table.rb +41 -0
- data/lib/fpgrowth/fp_tree/node.rb +1 -0
- data/lib/fpgrowth/miner.rb +32 -2
- data/lib/fpgrowth/miner/pattern_base_extractor.rb +51 -3
- data/lib/fpgrowth/version.rb +1 -1
- data/test/tc_conditional_tree_builder.rb +74 -3
- data/test/tc_fp_tree.rb +80 -29
- data/test/tc_miner.rb +4 -4
- data/test/tc_open_data_enel.rb +77 -10
- data/test/tc_open_data_sondage_montreal.rb +62 -19
- data/test/tc_open_data_velo_montreal.rb +43 -9
- data/test/tc_pattern_base_extractor.rb +116 -0
- metadata +7 -2
@@ -7,16 +7,33 @@ class TestOpenDataMTLSondage < Test::Unit::TestCase
|
|
7
7
|
|
8
8
|
def setup
|
9
9
|
|
10
|
+
puts "Setup Test : Open Data Sondage Montreal"
|
11
|
+
|
10
12
|
@transactions = []
|
11
13
|
CSV.foreach("test/montreal-sondage/mtlsat12.csv", {:headers => true, :header_converters => :symbol, :header_converters => :symbol, :converters => :all, :encoding => 'windows-1251:utf-8'}) do |row|
|
12
14
|
transaction = row.to_a
|
13
15
|
transaction.delete_if { |item|
|
14
|
-
item[0].to_s.include?("dm") or (item[0].to_s.include?("sd80m") and item[1].to_s == '2') or item[1] == nil
|
16
|
+
item[0].to_s.include?("dm") or (item[0].to_s.include?("sd80m") and item[1].to_s == '2') or item[1] == nil or item[1] == nil or item[1].to_s == " " or item[1].to_s.empty?
|
15
17
|
}
|
16
18
|
@transactions << transaction
|
17
|
-
|
18
19
|
end
|
19
20
|
|
21
|
+
|
22
|
+
total_item = 0
|
23
|
+
min = @transactions[0].size
|
24
|
+
max = 0
|
25
|
+
@transactions.each { |transaction|
|
26
|
+
total_item += transaction.size
|
27
|
+
min = transaction.size if transaction.size < min
|
28
|
+
max = transaction.size if transaction.size > max
|
29
|
+
}
|
30
|
+
average = total_item / @transactions.size
|
31
|
+
|
32
|
+
|
33
|
+
puts "Extracted #{@transactions.size} transactions"
|
34
|
+
puts "With a total of #{total_item} items"
|
35
|
+
puts "min:#{min} avg:#{average} max:#{max} items/sets"
|
36
|
+
|
20
37
|
end
|
21
38
|
|
22
39
|
|
@@ -25,25 +42,51 @@ class TestOpenDataMTLSondage < Test::Unit::TestCase
|
|
25
42
|
end
|
26
43
|
|
27
44
|
|
28
|
-
def
|
45
|
+
def test_fp_growth
|
46
|
+
|
47
|
+
clone = @transactions.clone
|
48
|
+
|
49
|
+
start = Time.now
|
50
|
+
|
51
|
+
fp_tree = FpGrowth::FpTree.build(clone, 1)
|
52
|
+
|
53
|
+
loop = Time.now
|
54
|
+
puts "Tree built of size #{fp_tree.size} in #{loop - start}"
|
55
|
+
|
56
|
+
fp_tree.to_bonzai! 83
|
57
|
+
|
58
|
+
loop2 = Time.now
|
59
|
+
puts "Tree bonzaied #{fp_tree.size} in :#{loop2 - loop} sec"
|
60
|
+
|
61
|
+
patterns = FpGrowth::Miner.fp_growth(fp_tree)
|
62
|
+
|
63
|
+
patterns.sort! { |a, b| a.support <=> b.support }.reverse!
|
64
|
+
finish = Time.now
|
65
|
+
|
66
|
+
puts "Tree Mined in #{finish - loop2}"
|
67
|
+
|
68
|
+
transactions2 = @transactions.clone
|
69
|
+
start_td = Time.now
|
70
|
+
fp_tree_td = FpGrowth::FpTree.build(transactions2, 1)
|
71
|
+
|
72
|
+
|
73
|
+
loop_td = Time.now
|
74
|
+
puts "Tree built of size #{fp_tree_td.size} in #{loop_td - start_td}"
|
75
|
+
|
76
|
+
fp_tree_td.to_bonzai! 83
|
77
|
+
|
78
|
+
loop2_td = Time.now
|
79
|
+
puts "Tree bonzaied to #{fp_tree_td.size} in :#{loop2_td - loop_td} sec"
|
80
|
+
|
81
|
+
patterns_td = FpGrowth::Miner.td_fp_growth(fp_tree_td)
|
82
|
+
|
83
|
+
finish_td = Time.now
|
84
|
+
|
85
|
+
puts "Tree TDMined in #{finish_td - loop2_td}"
|
29
86
|
|
30
|
-
|
31
|
-
puts "Start time : "+d.to_s
|
87
|
+
puts "Found #{patterns_td.size} rather than #{patterns.size} with a DeltaTime of #{finish_td - start_td - (finish - start)} it's a #{-(finish_td - start_td - (finish - start)) / (finish - start) * 100}% speedup"
|
32
88
|
|
33
|
-
fp_tree = FpGrowth::FpTree.build(@transactions, 1)
|
34
89
|
|
35
|
-
puts "
|
36
|
-
#patterns = FpGrowth::Miner.fp_growth(fp_tree)
|
37
|
-
#
|
38
|
-
#f=Time.now
|
39
|
-
#puts "Mining took #{(f-d)}sec"
|
40
|
-
#
|
41
|
-
#patterns.sort! { |a, b| a.support <=> b.support }.reverse!
|
42
|
-
#
|
43
|
-
#for pattern in patterns
|
44
|
-
# puts "#{pattern.content} #{pattern.support}"
|
45
|
-
#end
|
46
|
-
#
|
47
|
-
#assert_not_equal(0, patterns.size)
|
90
|
+
patterns_td.each {|x| puts "#{x.content} #{ x.support}"}
|
48
91
|
end
|
49
92
|
end
|
@@ -7,12 +7,29 @@ class TestOpenDataVelo < Test::Unit::TestCase
|
|
7
7
|
|
8
8
|
def setup
|
9
9
|
|
10
|
+
puts "Setup Test : Open Data Velo Montreal"
|
11
|
+
|
10
12
|
@transactions = []
|
11
13
|
CSV.foreach("test/montreal-velos-comptage/2012.csv", {:headers => true, :header_converters => :symbol, :header_converters => :symbol, :converters => :all, :encoding => 'windows-1251:utf-8', :col_sep => ";"}) do |row|
|
12
14
|
date = Date.parse(row[:date])
|
13
15
|
@transactions << [[:wday, date.wday], [:month, date.month], [:date, row[:date]], [:berri, row[:berri_1]], [:ctesaintecatherine, row[:ctesaintecatherine]], [:maisonneuve_1, row[:maisonneuve_1]], [:maisonneuve_2, row[:maisonneuve_2]], [:du_parc, row[:du_parc]], [:pierredupuy, row[:pierredupuy]], [:rachel, row[:rachel1]]]
|
14
16
|
end
|
15
17
|
|
18
|
+
total_item = 0
|
19
|
+
min = @transactions[0].size
|
20
|
+
max = 0
|
21
|
+
@transactions.each { |transaction|
|
22
|
+
total_item += transaction.size
|
23
|
+
min = transaction.size if transaction.size < min
|
24
|
+
max = transaction.size if transaction.size > max
|
25
|
+
}
|
26
|
+
average = total_item / @transactions.size
|
27
|
+
|
28
|
+
|
29
|
+
puts "Extracted #{@transactions.size} transactions"
|
30
|
+
puts "With a total of #{total_item} items"
|
31
|
+
puts "min:#{min} avg:#{average} max:#{max} items/sets"
|
32
|
+
|
16
33
|
|
17
34
|
end
|
18
35
|
|
@@ -23,23 +40,40 @@ class TestOpenDataVelo < Test::Unit::TestCase
|
|
23
40
|
|
24
41
|
# Fake test
|
25
42
|
def test_fp_growth
|
43
|
+
transactions1 = @transactions.clone
|
44
|
+
|
45
|
+
# Début
|
26
46
|
start = Time.now
|
27
|
-
fp_tree = FpGrowth::FpTree.build(
|
47
|
+
fp_tree = FpGrowth::FpTree.build(transactions1, 1)
|
48
|
+
|
28
49
|
loop = Time.now
|
29
|
-
puts "Tree built in #{loop - start}"
|
50
|
+
puts "Tree built of size #{fp_tree.size} in #{loop - start}"
|
30
51
|
|
31
52
|
patterns = FpGrowth::Miner.fp_growth(fp_tree)
|
32
53
|
|
54
|
+
patterns.sort! { |a, b| a.support <=> b.support }.reverse!
|
33
55
|
finish = Time.now
|
34
|
-
puts "Tree Mined in #{finish -start}"
|
35
56
|
|
36
|
-
|
57
|
+
puts "Tree Mined in #{finish - loop}"
|
58
|
+
assert_not_equal(0, patterns.size)
|
59
|
+
# Fin
|
60
|
+
|
61
|
+
transactions2 = @transactions.clone
|
62
|
+
start_td = Time.now
|
63
|
+
fp_tree_td = FpGrowth::FpTree.build(transactions2, 1)
|
64
|
+
|
65
|
+
|
66
|
+
loop_td = Time.now
|
67
|
+
puts "Tree built of size #{fp_tree_td.size} in #{loop_td - start_td}"
|
68
|
+
|
69
|
+
patterns_td = FpGrowth::Miner.td_fp_growth(fp_tree_td)
|
70
|
+
|
71
|
+
finish_td = Time.now
|
72
|
+
|
73
|
+
puts "Tree built in #{loop_td - start_td} TDMined in #{finish_td - loop_td}"
|
74
|
+
|
75
|
+
puts "Found #{patterns_td.size} rather than #{patterns.size} with a DeltaTime of #{finish_td - start_td - (finish - start)} it's a #{-(finish_td - start_td - (finish - start)) / (finish - start) * 100}% speedup"
|
37
76
|
|
38
|
-
=begin
|
39
|
-
for pattern in patterns
|
40
|
-
puts "#{pattern.content} #{pattern.support}"
|
41
|
-
end
|
42
|
-
=end
|
43
77
|
|
44
78
|
assert_not_equal(0, patterns.size)
|
45
79
|
end
|
@@ -0,0 +1,116 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require "fpgrowth/miner"
|
3
|
+
require "fpgrowth/fp_tree"
|
4
|
+
|
5
|
+
|
6
|
+
class TestPatternBaseExtractor < Test::Unit::TestCase
|
7
|
+
|
8
|
+
# Called before every test method runs. Can be used
|
9
|
+
# to set up fixture information.
|
10
|
+
def setup
|
11
|
+
items= ['a', 'b', 'c', 'd', 'e']
|
12
|
+
@supports_exemple = {'a' => 1, 'b' => 5, 'c' => 4}
|
13
|
+
@tableau_pattern = []
|
14
|
+
@tableau_pattern << FpGrowth::Miner::Pattern.new(["b"], 3 )
|
15
|
+
|
16
|
+
|
17
|
+
|
18
|
+
@transactions = [['b', 'a'], ['b'], ['b', 'c'], ['a', 'b'],['b', 'c'],['b', 'c']]
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
# Called after every test method runs. Can be used to tear
|
23
|
+
# down fixture information.
|
24
|
+
|
25
|
+
def teardown
|
26
|
+
# Do nothing
|
27
|
+
end
|
28
|
+
|
29
|
+
|
30
|
+
def test_initialize
|
31
|
+
pattern_base_extractor = nil
|
32
|
+
#initialisation avec un argument
|
33
|
+
assert_nothing_raised { pattern_base_extractor = FpGrowth::Miner::PatternBaseExtractor.new(1) }
|
34
|
+
assert_equal(true , pattern_base_extractor.test_tree())
|
35
|
+
assert_equal(true , pattern_base_extractor.test_conditionnal_item(1))
|
36
|
+
assert_equal(true , pattern_base_extractor.test_patterns([]) )
|
37
|
+
|
38
|
+
#initialisation avec deux arguments
|
39
|
+
fp_tree = nil
|
40
|
+
assert_nothing_raised { fp_tree = FpGrowth::FpTree::FpTree.new(@supports_exemple) }
|
41
|
+
assert_nothing_raised { pattern_base_extractor = FpGrowth::Miner::PatternBaseExtractor.new(fp_tree, 1) }
|
42
|
+
assert_equal(true , pattern_base_extractor.test_tree(fp_tree))
|
43
|
+
assert_equal(true , pattern_base_extractor.test_conditionnal_item(1))
|
44
|
+
assert_equal(true , pattern_base_extractor.test_patterns )
|
45
|
+
end
|
46
|
+
|
47
|
+
# Tree must be built
|
48
|
+
def test_execute
|
49
|
+
|
50
|
+
pattern_base_extractor = nil
|
51
|
+
pattern_result = nil
|
52
|
+
#One argument
|
53
|
+
assert_nothing_raised { pattern_base_extractor = FpGrowth::Miner::PatternBaseExtractor.new(1) }
|
54
|
+
assert_nothing_raised { pattern_result = pattern_base_extractor.execute}
|
55
|
+
assert_equal(true , pattern_base_extractor.test_patterns(pattern_result) )
|
56
|
+
|
57
|
+
#Two arguments
|
58
|
+
fp_tree = nil
|
59
|
+
fp_tree = FpGrowth::FpTree.build(@transactions, 1)
|
60
|
+
assert_nothing_raised { pattern_base_extractor = FpGrowth::Miner::PatternBaseExtractor.new(fp_tree, 'c') }
|
61
|
+
assert_nothing_raised { pattern_result = pattern_base_extractor.execute}
|
62
|
+
|
63
|
+
assert_equal( @tableau_pattern[1] , pattern_result[1])
|
64
|
+
assert_equal(true , pattern_base_extractor.test_patterns(pattern_result) )
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
def test_horizontal_traversal
|
69
|
+
|
70
|
+
# Test unit with execute
|
71
|
+
end
|
72
|
+
|
73
|
+
def test_horizontal_traversal_step
|
74
|
+
pattern_base_extractor = nil
|
75
|
+
fp_tree = nil
|
76
|
+
fp_tree = FpGrowth::FpTree.build(@transactions)
|
77
|
+
|
78
|
+
assert_nothing_raised { pattern_base_extractor = FpGrowth::Miner::PatternBaseExtractor.new(fp_tree, 'c') }
|
79
|
+
assert_nothing_raised { pattern_base_extractor.horizontal_traversal_step (fp_tree.heads['c']) }
|
80
|
+
assert_equal(true , pattern_base_extractor.test_min_support(fp_tree.heads['c'].support) )
|
81
|
+
|
82
|
+
# attente
|
83
|
+
|
84
|
+
end
|
85
|
+
|
86
|
+
def test_down_to_top_traversal
|
87
|
+
|
88
|
+
# No Test because the current branch is nil for the function down_to_top_vertical
|
89
|
+
current_branch = nil
|
90
|
+
pattern_base_extractor = nil
|
91
|
+
fp_tree = FpGrowth::FpTree.build(@transactions)
|
92
|
+
|
93
|
+
assert_nothing_raised { pattern_base_extractor = FpGrowth::Miner::PatternBaseExtractor.new(fp_tree, 'c') }
|
94
|
+
assert_nothing_raised { current_branch = pattern_base_extractor.down_to_top_traversal( [] , fp_tree.heads['c'].parent ) }
|
95
|
+
assert_equal( ["b"] , current_branch)
|
96
|
+
|
97
|
+
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
def test_down_to_top_traversal_step
|
102
|
+
|
103
|
+
current_branch = nil
|
104
|
+
pattern_base_extractor = nil
|
105
|
+
fp_tree = FpGrowth::FpTree.build(@transactions)
|
106
|
+
|
107
|
+
assert_nothing_raised { pattern_base_extractor = FpGrowth::Miner::PatternBaseExtractor.new(fp_tree, 'c') }
|
108
|
+
assert_nothing_raised { current_branch = pattern_base_extractor.down_to_top_traversal_step( [] , fp_tree.heads['c'].parent ) }
|
109
|
+
assert_equal( ["b"] , current_branch)
|
110
|
+
|
111
|
+
|
112
|
+
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: fpgrowth
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: '1'
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-
|
12
|
+
date: 2013-06-12 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: bundler
|
@@ -66,9 +66,12 @@ files:
|
|
66
66
|
- fpgrowth-ruby.gemspec
|
67
67
|
- lib/fpgrowth.rb
|
68
68
|
- lib/fpgrowth/fp_tree.rb
|
69
|
+
- lib/fpgrowth/fp_tree/bonzai_secateur.rb
|
69
70
|
- lib/fpgrowth/fp_tree/builder.rb
|
70
71
|
- lib/fpgrowth/fp_tree/builder/first_pass.rb
|
72
|
+
- lib/fpgrowth/fp_tree/builder/header_table_builder.rb
|
71
73
|
- lib/fpgrowth/fp_tree/builder/second_pass.rb
|
74
|
+
- lib/fpgrowth/fp_tree/header_table.rb
|
72
75
|
- lib/fpgrowth/fp_tree/node.rb
|
73
76
|
- lib/fpgrowth/miner.rb
|
74
77
|
- lib/fpgrowth/miner/conditional_tree_builder.rb
|
@@ -92,6 +95,7 @@ files:
|
|
92
95
|
- test/tc_open_data_sondage_montreal.rb
|
93
96
|
- test/tc_open_data_velo_montreal.rb
|
94
97
|
- test/tc_pattern.rb
|
98
|
+
- test/tc_pattern_base_extractor.rb
|
95
99
|
- test/tc_second_pass.rb
|
96
100
|
homepage: https://github.com/thedamfr/fpgrowth
|
97
101
|
licenses:
|
@@ -136,4 +140,5 @@ test_files:
|
|
136
140
|
- test/tc_open_data_sondage_montreal.rb
|
137
141
|
- test/tc_open_data_velo_montreal.rb
|
138
142
|
- test/tc_pattern.rb
|
143
|
+
- test/tc_pattern_base_extractor.rb
|
139
144
|
- test/tc_second_pass.rb
|