rank-aggregation 0.0.3 → 0.0.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -4
- data/VERSION +1 -1
- data/bin/rank +8 -10
- data/lib/rank-aggregation/markov.rb +5 -11
- data/lib/rank-aggregation/ranker.rb +87 -31
- data/notes/algorithm.markdown +2 -0
- data/rank-aggregation.gemspec +3 -4
- data/samples/images +249 -0
- data/scores +4 -3
- data/spec/rank-aggregation_spec.rb +25 -0
- metadata +5 -6
- data/lib/rank-aggregation/ordering.rb +0 -44
- data/lib/rank-aggregation/scorer.rb +0 -41
data/Rakefile
CHANGED
@@ -26,7 +26,6 @@ end
|
|
26
26
|
task "scores" do
|
27
27
|
$: << "lib"
|
28
28
|
require "rank-aggregation"
|
29
|
-
require "rank-aggregation/scorer"
|
30
29
|
|
31
30
|
File.open("scores", "w"){ |o|
|
32
31
|
Dir["samples/*"].sort.each{|file|
|
@@ -38,9 +37,7 @@ task "scores" do
|
|
38
37
|
|
39
38
|
items.each{|i| r.add_ranking i }
|
40
39
|
|
41
|
-
|
42
|
-
|
43
|
-
o.puts "#{name}: #{kendall_distance}"
|
40
|
+
o.puts "#{name}: #{r.kendall_distance}"
|
44
41
|
}
|
45
42
|
}
|
46
43
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.5
|
data/bin/rank
CHANGED
@@ -8,30 +8,28 @@ require "trollop"
|
|
8
8
|
|
9
9
|
opts = Trollop.options do
|
10
10
|
opt :score, "Output scoring metrics on STDERR"
|
11
|
-
opt :rough, "Use only the rough score for sorting"
|
11
|
+
opt :rough, "Use only the rough score for sorting, don't perform any optimisation on top of that"
|
12
12
|
opt :smoothing, "Set the smoothing parameter: It should be approximately equal to the sample size you consider large enough to matter", :type => :int
|
13
|
+
opt :debug
|
13
14
|
end
|
14
15
|
|
15
16
|
ranker = RankAggregation::Ranker.new
|
16
17
|
|
17
18
|
ranker.smoothing = opts[:smoothing] if opts[:smoothing]
|
18
|
-
|
19
|
-
collect = [] if opts[:score]
|
19
|
+
ranker.logger.level = Logger::DEBUG if opts[:debug]
|
20
20
|
|
21
21
|
ARGF.each_line{|l|
|
22
22
|
items = l.split("\t").map{|x| x.strip}.select{|x| x.length > 0}
|
23
|
-
|
24
|
-
collect << items if collect
|
25
|
-
|
26
23
|
ranker.add_ranking items
|
27
24
|
}
|
28
25
|
|
29
|
-
result =
|
26
|
+
result = (
|
27
|
+
if opts[:rough] then ranker.rough_combined_rankings
|
28
|
+
else ranker.combined_rankings end
|
29
|
+
)
|
30
30
|
|
31
31
|
puts result.join("\t")
|
32
32
|
|
33
33
|
if opts[:score]
|
34
|
-
|
35
|
-
|
36
|
-
STDERR.puts "Average kendal distance: #{RankAggregation::Scorer.average_kendall_distance result, collect}"
|
34
|
+
STDERR.puts "Average kendal distance: #{ranker.kendall_distance}"
|
37
35
|
end
|
@@ -1,16 +1,9 @@
|
|
1
1
|
module RankAggregation
|
2
2
|
class MarkovChain
|
3
|
-
def initialize(items, transitions)
|
4
|
-
@
|
3
|
+
def initialize(items, transitions, logger)
|
4
|
+
@logger = logger
|
5
|
+
@transitions = transitions
|
5
6
|
@items = items
|
6
|
-
|
7
|
-
items.each{|x|
|
8
|
-
tot = items.map{|y| transitions[x][y]}.inject(0.0){|u, v| u + v}
|
9
|
-
|
10
|
-
tx = (@transitions[x] = {})
|
11
|
-
|
12
|
-
items.each{|y| tx[y] = transitions[x][y] / tot }
|
13
|
-
}
|
14
7
|
end
|
15
8
|
|
16
9
|
def stationary_distribution
|
@@ -18,7 +11,8 @@ module RankAggregation
|
|
18
11
|
|
19
12
|
@items.each{|x| dist[x] = 1.0 / @items.size }
|
20
13
|
|
21
|
-
|
14
|
+
3.times{ |i|
|
15
|
+
@logger.debug "markov chain iteration #{i}"
|
22
16
|
new_dist = Hash.new(0.0)
|
23
17
|
|
24
18
|
dist.each{|x, p|
|
@@ -1,26 +1,37 @@
|
|
1
|
-
require "
|
1
|
+
require "set"
|
2
|
+
require "logger"
|
3
|
+
|
2
4
|
require "rank-aggregation/markov"
|
3
5
|
|
6
|
+
|
4
7
|
module RankAggregation
|
5
8
|
class Ranker
|
6
|
-
attr_accessor :
|
9
|
+
attr_accessor :less_scores, :smoothing, :items, :logger
|
7
10
|
|
8
11
|
def initialize
|
9
|
-
@
|
12
|
+
@less_scores = {}
|
10
13
|
@items = Set.new
|
11
14
|
@smoothing = 5
|
12
15
|
@vote_count = 0
|
16
|
+
@rank_count = 0
|
17
|
+
self.logger = Logger.new(STDERR)
|
18
|
+
self.logger.level = Logger::WARN
|
13
19
|
end
|
14
20
|
|
15
21
|
def add_ranking(xs)
|
22
|
+
xs = xs.uniq
|
16
23
|
return if xs.size <= 1
|
17
24
|
|
25
|
+
@rank_count += 1
|
26
|
+
|
18
27
|
reset_cached
|
19
|
-
xs.each{|x| @
|
28
|
+
xs.each{|x| @less_scores[x] ||= Hash.new(0.0); items.add x }
|
29
|
+
|
30
|
+
weight = 1.0 / (0.5 * xs.size * (xs.size - 1))
|
20
31
|
|
21
32
|
(0...xs.length).each{|i|
|
22
33
|
((i+1)...xs.length).each{|j|
|
23
|
-
@
|
34
|
+
@less_scores[xs[i]][xs[j]] += weight
|
24
35
|
}
|
25
36
|
}
|
26
37
|
|
@@ -29,49 +40,31 @@ module RankAggregation
|
|
29
40
|
|
30
41
|
def less_chances
|
31
42
|
@_less_chances ||= begin
|
43
|
+
logger.debug "calculating less_chances"
|
32
44
|
less_chances = Hash.new{|h, k| h[k] = Hash.new(0.5)}
|
33
45
|
|
34
|
-
|
46
|
+
less_scores.each{|x, vs|
|
35
47
|
vs.each{|y, c|
|
36
|
-
p = (c + 0.5 * self.smoothing) / (self.smoothing + c +
|
48
|
+
p = (c + 0.5 * self.smoothing) / (self.smoothing + c + less_scores[y][x])
|
37
49
|
less_chances[x][y] = p
|
38
50
|
less_chances[y][x] = 1 - p
|
39
51
|
}
|
40
52
|
}
|
53
|
+
logger.debug "calculating less_chances complete"
|
41
54
|
less_chances
|
42
55
|
end
|
43
56
|
end
|
44
57
|
|
45
|
-
def base_ordering
|
46
|
-
@_base_ordering ||= begin
|
47
|
-
edges = []
|
48
|
-
less_chances.each{|x, ys|
|
49
|
-
ys.each{|y, v|
|
50
|
-
edges << [x, y, v] if v > 0.5
|
51
|
-
}
|
52
|
-
}
|
53
|
-
edges.sort!{|x, y| y[2] <=> x[2]}
|
54
|
-
|
55
|
-
ordering = Ordering.new(less_chances.keys)
|
56
|
-
|
57
|
-
edges.each{|x, y, v|
|
58
|
-
ordering.determine(x, y)
|
59
|
-
break if ordering.determined?
|
60
|
-
}
|
61
|
-
ordering
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
# The rough score for x is the average chance of it being > y for all y we've got a comparison with
|
66
|
-
# We use this as a tie breaking heuristic.
|
67
58
|
def rough_scores
|
68
59
|
@_rough_scores ||= begin
|
60
|
+
logger.debug "calculating rough_scores"
|
69
61
|
# This markov chain is based off MC4. The idea is as follows:
|
70
62
|
# Starting at an item we pick one of the other items at random.
|
71
63
|
# We then transition to that item with probability P(i < j).
|
72
64
|
# If we fail to transition we stay where we are.
|
73
65
|
# i.e. the probability of transitioning form i to j with i != j is 1/(n-1) P(i < j).
|
74
66
|
|
67
|
+
logger.debug "calculating transition probabilities"
|
75
68
|
transitions = {}
|
76
69
|
|
77
70
|
@items.each{|i|
|
@@ -90,7 +83,10 @@ module RankAggregation
|
|
90
83
|
end
|
91
84
|
}
|
92
85
|
|
93
|
-
|
86
|
+
logger.debug "calculating transition probabilities complete"
|
87
|
+
result = MarkovChain.new(@items, transitions, logger).stationary_distribution
|
88
|
+
logger.debug "calculating rough_scores complete"
|
89
|
+
result
|
94
90
|
end
|
95
91
|
end
|
96
92
|
|
@@ -102,12 +98,72 @@ module RankAggregation
|
|
102
98
|
|
103
99
|
def combined_rankings
|
104
100
|
@_combined_rankings ||= begin
|
105
|
-
|
101
|
+
triangle_shuffle(rough_combined_rankings)
|
106
102
|
end
|
107
103
|
end
|
108
104
|
|
105
|
+
def kendall_distance
|
106
|
+
@_kendall_distance ||= kendall_distance_for(combined_rankings)
|
107
|
+
end
|
108
|
+
|
109
109
|
private
|
110
110
|
|
111
|
+
def kendall_distance_for(ranks)
|
112
|
+
tot = 0.0
|
113
|
+
(0...ranks.length).each{|i|
|
114
|
+
(i+1...ranks.length).each{|j|
|
115
|
+
tot += less_scores[ranks[j]][ranks[i]]
|
116
|
+
}
|
117
|
+
}
|
118
|
+
tot / @rank_count
|
119
|
+
end
|
120
|
+
|
121
|
+
def triangle_shuffle(ranks)
|
122
|
+
ranks = ranks.dup
|
123
|
+
i = 0
|
124
|
+
|
125
|
+
changed = true
|
126
|
+
|
127
|
+
iterations = 0
|
128
|
+
|
129
|
+
while changed
|
130
|
+
iterations += 1
|
131
|
+
|
132
|
+
shuffle_count = 0
|
133
|
+
|
134
|
+
changed = false
|
135
|
+
(0...ranks.length - 2).each do |i|
|
136
|
+
# we look at the positions i, i+1 and i+2 and form a kemeny optimal ordering of the items
|
137
|
+
# there
|
138
|
+
a, b, c = ranks[i..i+2]
|
139
|
+
|
140
|
+
shuffles = [
|
141
|
+
[a, b, c],
|
142
|
+
[a, c, b],
|
143
|
+
[b, a, c],
|
144
|
+
[b, c, a],
|
145
|
+
[c, a, b],
|
146
|
+
[c, b, a]
|
147
|
+
].map{|x| [kendall_distance_for(x), x]}
|
148
|
+
|
149
|
+
original_score = shuffles[0][0]
|
150
|
+
best_score, best = shuffles.min
|
151
|
+
|
152
|
+
if best_score < original_score
|
153
|
+
shuffle_count += 1
|
154
|
+
changed = true
|
155
|
+
ranks[i..i+2] = best
|
156
|
+
logger.debug{
|
157
|
+
"#{a}, #{b}, #{c} shuffled to #{best.join(", ")}. Score went from #{original_score} to #{best_score}"
|
158
|
+
}
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
logger.debug "triangle shuffling iteration ##{iterations} performed #{shuffle_count} shuffles"
|
163
|
+
end
|
164
|
+
ranks
|
165
|
+
end
|
166
|
+
|
111
167
|
def reset_cached
|
112
168
|
self.instance_variables.grep(/^@_/).each{|v| instance_variable_set(v, nil)}
|
113
169
|
end
|
data/notes/algorithm.markdown
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Algorithm description
|
2
2
|
|
3
|
+
Warning: These notes are a bit out of date.
|
4
|
+
|
3
5
|
This is a description of the algorithm embodied in this library.
|
4
6
|
|
5
7
|
The input of this algorithm is a list of partial rankings of a set of items (the set of items is not known up front - it's computed from the lists. This is a minor and not terribly important detail).
|
data/rank-aggregation.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rank-aggregation}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["David R. MacIver"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-07-08}
|
13
13
|
s.default_executable = %q{rank}
|
14
14
|
s.email = %q{david@drmaciver.com}
|
15
15
|
s.executables = ["rank"]
|
@@ -23,13 +23,12 @@ Gem::Specification.new do |s|
|
|
23
23
|
"bin/rank",
|
24
24
|
"lib/rank-aggregation.rb",
|
25
25
|
"lib/rank-aggregation/markov.rb",
|
26
|
-
"lib/rank-aggregation/ordering.rb",
|
27
26
|
"lib/rank-aggregation/ranker.rb",
|
28
|
-
"lib/rank-aggregation/scorer.rb",
|
29
27
|
"notes/algorithm.markdown",
|
30
28
|
"rank-aggregation.gemspec",
|
31
29
|
"samples/clear-with-noise",
|
32
30
|
"samples/different-sizes-interleaved",
|
31
|
+
"samples/images",
|
33
32
|
"samples/languages",
|
34
33
|
"samples/pairs",
|
35
34
|
"samples/random",
|
data/samples/images
ADDED
@@ -0,0 +1,249 @@
|
|
1
|
+
4713907317 4714793516
|
2
|
+
4714044861 4715364378
|
3
|
+
4713401785 4715058159
|
4
|
+
4713685521 4713662041
|
5
|
+
4714695978 4714036236
|
6
|
+
4714094858 4714570862
|
7
|
+
4714120928 4714183405
|
8
|
+
4714519726 4713952977
|
9
|
+
4714809167 4714181334
|
10
|
+
4713940506 4713660541
|
11
|
+
4715424340 4714519726
|
12
|
+
4713379937 0
|
13
|
+
4714441346 4713614365
|
14
|
+
4713940506 4714519726
|
15
|
+
4714866582 4714214085
|
16
|
+
4713907317 4714094858
|
17
|
+
4714707313 4714173098
|
18
|
+
4714441346 4713387133
|
19
|
+
4713981249 4714633673
|
20
|
+
4714994910 4714570862
|
21
|
+
4714157060 4714209769
|
22
|
+
4714967844 4714180352
|
23
|
+
4715190691 4714982926
|
24
|
+
4714629861 4714214085
|
25
|
+
4714793516 4714633673
|
26
|
+
4715213970 4714625095
|
27
|
+
4714286467 4715003796
|
28
|
+
4715856420 4714073463
|
29
|
+
4714695978 4714214085
|
30
|
+
4714866582 4713526865
|
31
|
+
4714625095 4713660541
|
32
|
+
4715424340 4714834229
|
33
|
+
4714930683 4714930683
|
34
|
+
4714274838 4714574641
|
35
|
+
4714589388 4714286467
|
36
|
+
4714241833 4714384952
|
37
|
+
4714820813 4714157060
|
38
|
+
4714793516 4714820813
|
39
|
+
4714157060 4713981249
|
40
|
+
4713660541 4714180352
|
41
|
+
4714990888 4714100910
|
42
|
+
4715453204 4714319362
|
43
|
+
4714664778 4713651819
|
44
|
+
4714866582 4714151904
|
45
|
+
4713693925 4713981249
|
46
|
+
4713906389 4714227818
|
47
|
+
4714393079 4714367616
|
48
|
+
4715073620 4714183405
|
49
|
+
4715814487 4713589101
|
50
|
+
4714509338 4714192076
|
51
|
+
4714291607 4714871415
|
52
|
+
4713788611 4716484338
|
53
|
+
4713965261 4714982926
|
54
|
+
4713685521 4714793516
|
55
|
+
4714928611 4714068646
|
56
|
+
4715085584 4713616757
|
57
|
+
4713379937 4714183405
|
58
|
+
4715356822 4715012282
|
59
|
+
4715100820 4716703934
|
60
|
+
4715714373 4714227078
|
61
|
+
4715763896 4714087637
|
62
|
+
4713844607 4714133515
|
63
|
+
4714661034 4713997878
|
64
|
+
4714871415 4715860020
|
65
|
+
4714797576 4714534020
|
66
|
+
4715558413 4714589388
|
67
|
+
4714096389 4715073620
|
68
|
+
4716091440 4714763663
|
69
|
+
4714121666 4714137974
|
70
|
+
4715343302 4714145722
|
71
|
+
4714629861 4713776313
|
72
|
+
4714866582 4713736745
|
73
|
+
4714512738 4715963672
|
74
|
+
4714185386 4715242266
|
75
|
+
4714967844 4715888970
|
76
|
+
4714173098 4716213882
|
77
|
+
4715049826 4713612015
|
78
|
+
4713752261 4713979531
|
79
|
+
4713365017 4716213882
|
80
|
+
4713744349 4713686267
|
81
|
+
4714810870 4714658782
|
82
|
+
4714533206 4715356822
|
83
|
+
4713314831 4714905429
|
84
|
+
4715388191 4715489479
|
85
|
+
4714692816 4715453204
|
86
|
+
4713898663 4714137996
|
87
|
+
4714522757 4714107938
|
88
|
+
4713781919 4715430345
|
89
|
+
4716515978 4715003796
|
90
|
+
4714533547 4714506478
|
91
|
+
4716526328 4715040480
|
92
|
+
4714182023 4714681315
|
93
|
+
4715940337 4714822451
|
94
|
+
4714435781 4715888970
|
95
|
+
4714423390 4714930683
|
96
|
+
4715215778 4714644144
|
97
|
+
4714533547 4715324294
|
98
|
+
4713495537 4713951401
|
99
|
+
4715215778 4714931374
|
100
|
+
4713575079 4713418207
|
101
|
+
4715602724 4714692816
|
102
|
+
4714274838 4715426962
|
103
|
+
4714179261 4716714528
|
104
|
+
4713853729 4715075632
|
105
|
+
4714525315 4713936967
|
106
|
+
4714264652 4714928611
|
107
|
+
4714207610 4713940506
|
108
|
+
4715215778 4713405627
|
109
|
+
4714522289 4713853729
|
110
|
+
4715992740 4713547921
|
111
|
+
4715233482 4715075632
|
112
|
+
4716115538 4714695978
|
113
|
+
4713980054 4715683996
|
114
|
+
4715073620 4713892543
|
115
|
+
4714625095 4713854897
|
116
|
+
4714967844 4714820813
|
117
|
+
4714067235 4713587865
|
118
|
+
4713844121 4715200759
|
119
|
+
4714430684 4714684363
|
120
|
+
4714442589 4713917102
|
121
|
+
4714001813 4714574641
|
122
|
+
4715331713 4714535413
|
123
|
+
4715153745 4715617690
|
124
|
+
4713575079 4715888970
|
125
|
+
4713879311 4714344426
|
126
|
+
4715217508 4716083048
|
127
|
+
4715741624 4713749569
|
128
|
+
4713788611 4715242266
|
129
|
+
4714449713 4714329119
|
130
|
+
4714808591 4714589388
|
131
|
+
4714959812 4714133515
|
132
|
+
4714037152 4714490458
|
133
|
+
4714941210 4714970667
|
134
|
+
4713574409 4714103021
|
135
|
+
4713997878 4714241833
|
136
|
+
4715535678 4715856420
|
137
|
+
4715324294 4713322691
|
138
|
+
4715477764 4714522289
|
139
|
+
4714236828 4714534020
|
140
|
+
4714545096 4713495101
|
141
|
+
4715322098 4714341509
|
142
|
+
4714137996 4714341509
|
143
|
+
4714990888 4714179898
|
144
|
+
4713981249 4713691107
|
145
|
+
4713947773 4713401785
|
146
|
+
4713879311 4713689031
|
147
|
+
4715992740 4715103668
|
148
|
+
4715190691 4715005229
|
149
|
+
4713685665 4713937913
|
150
|
+
4714091142 4713689031
|
151
|
+
4714695978 4715005260
|
152
|
+
4713917539 4713575079
|
153
|
+
4715049826 4713631637
|
154
|
+
4715669395 4715503820
|
155
|
+
4714493522 4713685521
|
156
|
+
4715364378 4713907317
|
157
|
+
4713844607 4714808591
|
158
|
+
4715260222 4714075745
|
159
|
+
4713612015 4713913835
|
160
|
+
4715501923 4713913835
|
161
|
+
4715627740 4714367073
|
162
|
+
4715669286 4715215778
|
163
|
+
4715069575 4715859809
|
164
|
+
4713931385 4714121666
|
165
|
+
4713980054 4715678715
|
166
|
+
4715217508 4715739086
|
167
|
+
4713566967 4714592156
|
168
|
+
4715669286 4715683996
|
169
|
+
4715069575 4713458933
|
170
|
+
4713443893 4713575079
|
171
|
+
4716120110 4713951401
|
172
|
+
4714232322 4713967155
|
173
|
+
4714866582 4714341509
|
174
|
+
4715217376 4713879311
|
175
|
+
4715605259 4714219924
|
176
|
+
4713322691 4715374569
|
177
|
+
4714149874 4714034091
|
178
|
+
4715374569 4714341509
|
179
|
+
4714797576 4714209769
|
180
|
+
4714326747 4713604321
|
181
|
+
4714479137 4714905514
|
182
|
+
4714535413 4713892543
|
183
|
+
4714493522 4715088795
|
184
|
+
4713666531 4715058159
|
185
|
+
4713611777 4713666531
|
186
|
+
4713969986 4715285982
|
187
|
+
4713570275 4714132776
|
188
|
+
4714590394 4715525984
|
189
|
+
4714150388 4714522289
|
190
|
+
4714345492 4714592156
|
191
|
+
4715775475 4714393079
|
192
|
+
4714423390 4714335269
|
193
|
+
4713570275 4714589388
|
194
|
+
4715602724 4715219978
|
195
|
+
4714120928 4715266308
|
196
|
+
4714695978 4713931385
|
197
|
+
4714990888 4714509338
|
198
|
+
4714353096 4714678696
|
199
|
+
4714096389 4714522289
|
200
|
+
4715056162 4714274838
|
201
|
+
4714107938 4716703934
|
202
|
+
4714292768 4714763663
|
203
|
+
4715741624 4714036236
|
204
|
+
4714545096 4713604321
|
205
|
+
4715091935 4713952977
|
206
|
+
4715049826 4715169543
|
207
|
+
4713931385 4713502361
|
208
|
+
4714967844 4714522757
|
209
|
+
4714936074 4716091609
|
210
|
+
4715100820 4713740667
|
211
|
+
4714075745 4714522289
|
212
|
+
4715175414 4714590394
|
213
|
+
4716213882 4713574409
|
214
|
+
4713339825 4715739086
|
215
|
+
4715775475 4716193284
|
216
|
+
4715388191 4714384952
|
217
|
+
4714274838 4713781919
|
218
|
+
4714546309 4715739086
|
219
|
+
4714207610 4714423390
|
220
|
+
4714586002 4715233482
|
221
|
+
4714227078 4714180352
|
222
|
+
4714546309 4714254254
|
223
|
+
4715477764 4714548260
|
224
|
+
4713635095 4714661034
|
225
|
+
4713339825 4714120928
|
226
|
+
4714094858 4714590394
|
227
|
+
4715364099 4714209769
|
228
|
+
4714187130 4714391365
|
229
|
+
4714614348 4713972161
|
230
|
+
4713441293 4714227818
|
231
|
+
4714941210 4715012282
|
232
|
+
4715963672 4714317837
|
233
|
+
4713363381 4714440750
|
234
|
+
4715665972 4713749569
|
235
|
+
4714329119 4714909026
|
236
|
+
4713350507 4714930683
|
237
|
+
4713285765 4713931385
|
238
|
+
4715284379 4714535413
|
239
|
+
4714187130 4714525527
|
240
|
+
4715286921 4714448373
|
241
|
+
4713368857 4713631637
|
242
|
+
4714695978 4715005260
|
243
|
+
4714599345 4713506075
|
244
|
+
4713651819 4714796667
|
245
|
+
4714187130 4714367073
|
246
|
+
4713614365 4714442589
|
247
|
+
4714091142 4714137974
|
248
|
+
4713980054 4714353096
|
249
|
+
4715550743 4714467446
|
data/scores
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
clear-with-noise: 0.213968253968254
|
2
|
-
different-sizes-interleaved: 0.
|
3
|
-
|
2
|
+
different-sizes-interleaved: 0.043343653250774
|
3
|
+
images: 0.0524193548387097
|
4
|
+
languages: 0.372378907195571
|
4
5
|
pairs: 0.333333333333333
|
5
|
-
random: 0.
|
6
|
+
random: 0.478733444496679
|
@@ -1,6 +1,9 @@
|
|
1
1
|
require "helper"
|
2
2
|
|
3
|
+
A, B, C = [:a, :b, :c]
|
4
|
+
|
3
5
|
describe RankAggregation do
|
6
|
+
|
4
7
|
it "should aggregate a single rank into itself" do
|
5
8
|
r = (1..10).to_a
|
6
9
|
RankAggregation.combine_rankings([r]).should == r
|
@@ -27,4 +30,26 @@ describe RankAggregation do
|
|
27
30
|
RankAggregation.combine_rankings(r2).should == b
|
28
31
|
RankAggregation.combine_rankings(r3).should == c
|
29
32
|
end
|
33
|
+
|
34
|
+
it "should not consider alternatives to be irrelevant" do
|
35
|
+
# This is my example from "Irrelevant alternatives aren't"
|
36
|
+
# which shows that you need to consider C to get the order of
|
37
|
+
# A and B right, as there's a 50/50 voting split between the
|
38
|
+
# two of them, but clear majorities showing that B < C < A
|
39
|
+
|
40
|
+
RankAggregation.combine_rankings([
|
41
|
+
[A, B, C],
|
42
|
+
[B, C, A],
|
43
|
+
[C, A, B],
|
44
|
+
[B, C, A]
|
45
|
+
]).should == [B, C, A]
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should follow the majority with greater weight behind it" do
|
49
|
+
RankAggregation.combine_rankings(
|
50
|
+
[[B, C]] * 6 +
|
51
|
+
[[A, B]] * 10 +
|
52
|
+
[[C, A]] * 5
|
53
|
+
).should == [A, B, C]
|
54
|
+
end
|
30
55
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rank-aggregation
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 5
|
10
|
+
version: 0.0.5
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- David R. MacIver
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-07-08 00:00:00 +01:00
|
19
19
|
default_executable: rank
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -47,13 +47,12 @@ files:
|
|
47
47
|
- bin/rank
|
48
48
|
- lib/rank-aggregation.rb
|
49
49
|
- lib/rank-aggregation/markov.rb
|
50
|
-
- lib/rank-aggregation/ordering.rb
|
51
50
|
- lib/rank-aggregation/ranker.rb
|
52
|
-
- lib/rank-aggregation/scorer.rb
|
53
51
|
- notes/algorithm.markdown
|
54
52
|
- rank-aggregation.gemspec
|
55
53
|
- samples/clear-with-noise
|
56
54
|
- samples/different-sizes-interleaved
|
55
|
+
- samples/images
|
57
56
|
- samples/languages
|
58
57
|
- samples/pairs
|
59
58
|
- samples/random
|
@@ -1,44 +0,0 @@
|
|
1
|
-
require "set"
|
2
|
-
|
3
|
-
module RankAggregation
|
4
|
-
class Ordering
|
5
|
-
attr_accessor :order, :items, :determined
|
6
|
-
|
7
|
-
def initialize(items)
|
8
|
-
@items = Set[*items.to_a]
|
9
|
-
@determined = Set.new
|
10
|
-
@order = Hash.new{|h, k| h[k] = { k => 0 }}
|
11
|
-
end
|
12
|
-
|
13
|
-
def determine(x, y)
|
14
|
-
return false if @order[x][y]
|
15
|
-
|
16
|
-
@order[x][y] = -1
|
17
|
-
@order[y][x] = 1
|
18
|
-
|
19
|
-
@determined << x if @order[x].size == @items.size
|
20
|
-
@determined << y if @order[y].size == @items.size
|
21
|
-
|
22
|
-
@order[x].each{|z, v|
|
23
|
-
determine(z, y) if v == 1
|
24
|
-
}
|
25
|
-
|
26
|
-
@order[y].each{|z, v|
|
27
|
-
determine(x, z) if v == -1
|
28
|
-
}
|
29
|
-
true
|
30
|
-
end
|
31
|
-
|
32
|
-
def determined?(item=nil)
|
33
|
-
if item
|
34
|
-
return self.determined.include?(item)
|
35
|
-
else
|
36
|
-
return self.determined.size == self.items.size
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def [](x, y)
|
41
|
-
@order[x][y]
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
module RankAggregation
|
2
|
-
module Scorer
|
3
|
-
def average_kendall_distance(aggregate, ranks)
|
4
|
-
a_indices = {}
|
5
|
-
|
6
|
-
aggregate.each_with_index{|x, i|
|
7
|
-
a_indices[x] = i
|
8
|
-
}
|
9
|
-
|
10
|
-
parts = ranks.select{|x| x.size > 1 }.map{|x| kendal_distance(a_indices, x) }
|
11
|
-
parts.inject(0.0){|x, y| x + y} / ranks.size
|
12
|
-
end
|
13
|
-
|
14
|
-
# TODO: Decent implementation of this
|
15
|
-
private
|
16
|
-
|
17
|
-
def kendal_distance(a_indices, b)
|
18
|
-
|
19
|
-
tot = 0.0
|
20
|
-
(0...b.length).each{|i|
|
21
|
-
((i + 1)...b.length).each{|j|
|
22
|
-
x = a_indices[b[i]]
|
23
|
-
y = a_indices[b[j]]
|
24
|
-
next unless x && y
|
25
|
-
|
26
|
-
tot += 1 if x > y
|
27
|
-
}
|
28
|
-
}
|
29
|
-
|
30
|
-
n = (a_indices.keys & b).size
|
31
|
-
|
32
|
-
p b if n <= 1
|
33
|
-
|
34
|
-
score = tot / (0.5 * n * (n - 1))
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class <<Scorer
|
39
|
-
include Scorer
|
40
|
-
end
|
41
|
-
end
|