rank-aggregation 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -4
- data/VERSION +1 -1
- data/bin/rank +8 -10
- data/lib/rank-aggregation/markov.rb +5 -11
- data/lib/rank-aggregation/ranker.rb +87 -31
- data/notes/algorithm.markdown +2 -0
- data/rank-aggregation.gemspec +3 -4
- data/samples/images +249 -0
- data/scores +4 -3
- data/spec/rank-aggregation_spec.rb +25 -0
- metadata +5 -6
- data/lib/rank-aggregation/ordering.rb +0 -44
- data/lib/rank-aggregation/scorer.rb +0 -41
data/Rakefile
CHANGED
@@ -26,7 +26,6 @@ end
|
|
26
26
|
task "scores" do
|
27
27
|
$: << "lib"
|
28
28
|
require "rank-aggregation"
|
29
|
-
require "rank-aggregation/scorer"
|
30
29
|
|
31
30
|
File.open("scores", "w"){ |o|
|
32
31
|
Dir["samples/*"].sort.each{|file|
|
@@ -38,9 +37,7 @@ task "scores" do
|
|
38
37
|
|
39
38
|
items.each{|i| r.add_ranking i }
|
40
39
|
|
41
|
-
|
42
|
-
|
43
|
-
o.puts "#{name}: #{kendall_distance}"
|
40
|
+
o.puts "#{name}: #{r.kendall_distance}"
|
44
41
|
}
|
45
42
|
}
|
46
43
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.5
|
data/bin/rank
CHANGED
@@ -8,30 +8,28 @@ require "trollop"
|
|
8
8
|
|
9
9
|
opts = Trollop.options do
|
10
10
|
opt :score, "Output scoring metrics on STDERR"
|
11
|
-
opt :rough, "Use only the rough score for sorting"
|
11
|
+
opt :rough, "Use only the rough score for sorting, don't perform any optimisation on top of that"
|
12
12
|
opt :smoothing, "Set the smoothing parameter: It should be approximately equal to the sample size you consider large enough to matter", :type => :int
|
13
|
+
opt :debug
|
13
14
|
end
|
14
15
|
|
15
16
|
ranker = RankAggregation::Ranker.new
|
16
17
|
|
17
18
|
ranker.smoothing = opts[:smoothing] if opts[:smoothing]
|
18
|
-
|
19
|
-
collect = [] if opts[:score]
|
19
|
+
ranker.logger.level = Logger::DEBUG if opts[:debug]
|
20
20
|
|
21
21
|
ARGF.each_line{|l|
|
22
22
|
items = l.split("\t").map{|x| x.strip}.select{|x| x.length > 0}
|
23
|
-
|
24
|
-
collect << items if collect
|
25
|
-
|
26
23
|
ranker.add_ranking items
|
27
24
|
}
|
28
25
|
|
29
|
-
result =
|
26
|
+
result = (
|
27
|
+
if opts[:rough] then ranker.rough_combined_rankings
|
28
|
+
else ranker.combined_rankings end
|
29
|
+
)
|
30
30
|
|
31
31
|
puts result.join("\t")
|
32
32
|
|
33
33
|
if opts[:score]
|
34
|
-
|
35
|
-
|
36
|
-
STDERR.puts "Average kendal distance: #{RankAggregation::Scorer.average_kendall_distance result, collect}"
|
34
|
+
STDERR.puts "Average kendal distance: #{ranker.kendall_distance}"
|
37
35
|
end
|
@@ -1,16 +1,9 @@
|
|
1
1
|
module RankAggregation
|
2
2
|
class MarkovChain
|
3
|
-
def initialize(items, transitions)
|
4
|
-
@
|
3
|
+
def initialize(items, transitions, logger)
|
4
|
+
@logger = logger
|
5
|
+
@transitions = transitions
|
5
6
|
@items = items
|
6
|
-
|
7
|
-
items.each{|x|
|
8
|
-
tot = items.map{|y| transitions[x][y]}.inject(0.0){|u, v| u + v}
|
9
|
-
|
10
|
-
tx = (@transitions[x] = {})
|
11
|
-
|
12
|
-
items.each{|y| tx[y] = transitions[x][y] / tot }
|
13
|
-
}
|
14
7
|
end
|
15
8
|
|
16
9
|
def stationary_distribution
|
@@ -18,7 +11,8 @@ module RankAggregation
|
|
18
11
|
|
19
12
|
@items.each{|x| dist[x] = 1.0 / @items.size }
|
20
13
|
|
21
|
-
|
14
|
+
3.times{ |i|
|
15
|
+
@logger.debug "markov chain iteration #{i}"
|
22
16
|
new_dist = Hash.new(0.0)
|
23
17
|
|
24
18
|
dist.each{|x, p|
|
@@ -1,26 +1,37 @@
|
|
1
|
-
require "
|
1
|
+
require "set"
|
2
|
+
require "logger"
|
3
|
+
|
2
4
|
require "rank-aggregation/markov"
|
3
5
|
|
6
|
+
|
4
7
|
module RankAggregation
|
5
8
|
class Ranker
|
6
|
-
attr_accessor :
|
9
|
+
attr_accessor :less_scores, :smoothing, :items, :logger
|
7
10
|
|
8
11
|
def initialize
|
9
|
-
@
|
12
|
+
@less_scores = {}
|
10
13
|
@items = Set.new
|
11
14
|
@smoothing = 5
|
12
15
|
@vote_count = 0
|
16
|
+
@rank_count = 0
|
17
|
+
self.logger = Logger.new(STDERR)
|
18
|
+
self.logger.level = Logger::WARN
|
13
19
|
end
|
14
20
|
|
15
21
|
def add_ranking(xs)
|
22
|
+
xs = xs.uniq
|
16
23
|
return if xs.size <= 1
|
17
24
|
|
25
|
+
@rank_count += 1
|
26
|
+
|
18
27
|
reset_cached
|
19
|
-
xs.each{|x| @
|
28
|
+
xs.each{|x| @less_scores[x] ||= Hash.new(0.0); items.add x }
|
29
|
+
|
30
|
+
weight = 1.0 / (0.5 * xs.size * (xs.size - 1))
|
20
31
|
|
21
32
|
(0...xs.length).each{|i|
|
22
33
|
((i+1)...xs.length).each{|j|
|
23
|
-
@
|
34
|
+
@less_scores[xs[i]][xs[j]] += weight
|
24
35
|
}
|
25
36
|
}
|
26
37
|
|
@@ -29,49 +40,31 @@ module RankAggregation
|
|
29
40
|
|
30
41
|
def less_chances
|
31
42
|
@_less_chances ||= begin
|
43
|
+
logger.debug "calculating less_chances"
|
32
44
|
less_chances = Hash.new{|h, k| h[k] = Hash.new(0.5)}
|
33
45
|
|
34
|
-
|
46
|
+
less_scores.each{|x, vs|
|
35
47
|
vs.each{|y, c|
|
36
|
-
p = (c + 0.5 * self.smoothing) / (self.smoothing + c +
|
48
|
+
p = (c + 0.5 * self.smoothing) / (self.smoothing + c + less_scores[y][x])
|
37
49
|
less_chances[x][y] = p
|
38
50
|
less_chances[y][x] = 1 - p
|
39
51
|
}
|
40
52
|
}
|
53
|
+
logger.debug "calculating less_chances complete"
|
41
54
|
less_chances
|
42
55
|
end
|
43
56
|
end
|
44
57
|
|
45
|
-
def base_ordering
|
46
|
-
@_base_ordering ||= begin
|
47
|
-
edges = []
|
48
|
-
less_chances.each{|x, ys|
|
49
|
-
ys.each{|y, v|
|
50
|
-
edges << [x, y, v] if v > 0.5
|
51
|
-
}
|
52
|
-
}
|
53
|
-
edges.sort!{|x, y| y[2] <=> x[2]}
|
54
|
-
|
55
|
-
ordering = Ordering.new(less_chances.keys)
|
56
|
-
|
57
|
-
edges.each{|x, y, v|
|
58
|
-
ordering.determine(x, y)
|
59
|
-
break if ordering.determined?
|
60
|
-
}
|
61
|
-
ordering
|
62
|
-
end
|
63
|
-
end
|
64
|
-
|
65
|
-
# The rough score for x is the average chance of it being > y for all y we've got a comparison with
|
66
|
-
# We use this as a tie breaking heuristic.
|
67
58
|
def rough_scores
|
68
59
|
@_rough_scores ||= begin
|
60
|
+
logger.debug "calculating rough_scores"
|
69
61
|
# This markov chain is based off MC4. The idea is as follows:
|
70
62
|
# Starting at an item we pick one of the other items at random.
|
71
63
|
# We then transition to that item with probability P(i < j).
|
72
64
|
# If we fail to transition we stay where we are.
|
73
65
|
# i.e. the probability of transitioning form i to j with i != j is 1/(n-1) P(i < j).
|
74
66
|
|
67
|
+
logger.debug "calculating transition probabilities"
|
75
68
|
transitions = {}
|
76
69
|
|
77
70
|
@items.each{|i|
|
@@ -90,7 +83,10 @@ module RankAggregation
|
|
90
83
|
end
|
91
84
|
}
|
92
85
|
|
93
|
-
|
86
|
+
logger.debug "calculating transition probabilities complete"
|
87
|
+
result = MarkovChain.new(@items, transitions, logger).stationary_distribution
|
88
|
+
logger.debug "calculating rough_scores complete"
|
89
|
+
result
|
94
90
|
end
|
95
91
|
end
|
96
92
|
|
@@ -102,12 +98,72 @@ module RankAggregation
|
|
102
98
|
|
103
99
|
def combined_rankings
|
104
100
|
@_combined_rankings ||= begin
|
105
|
-
|
101
|
+
triangle_shuffle(rough_combined_rankings)
|
106
102
|
end
|
107
103
|
end
|
108
104
|
|
105
|
+
def kendall_distance
|
106
|
+
@_kendall_distance ||= kendall_distance_for(combined_rankings)
|
107
|
+
end
|
108
|
+
|
109
109
|
private
|
110
110
|
|
111
|
+
def kendall_distance_for(ranks)
|
112
|
+
tot = 0.0
|
113
|
+
(0...ranks.length).each{|i|
|
114
|
+
(i+1...ranks.length).each{|j|
|
115
|
+
tot += less_scores[ranks[j]][ranks[i]]
|
116
|
+
}
|
117
|
+
}
|
118
|
+
tot / @rank_count
|
119
|
+
end
|
120
|
+
|
121
|
+
def triangle_shuffle(ranks)
|
122
|
+
ranks = ranks.dup
|
123
|
+
i = 0
|
124
|
+
|
125
|
+
changed = true
|
126
|
+
|
127
|
+
iterations = 0
|
128
|
+
|
129
|
+
while changed
|
130
|
+
iterations += 1
|
131
|
+
|
132
|
+
shuffle_count = 0
|
133
|
+
|
134
|
+
changed = false
|
135
|
+
(0...ranks.length - 2).each do |i|
|
136
|
+
# we look at the positions i, i+1 and i+2 and form a kemeny optimal ordering of the items
|
137
|
+
# there
|
138
|
+
a, b, c = ranks[i..i+2]
|
139
|
+
|
140
|
+
shuffles = [
|
141
|
+
[a, b, c],
|
142
|
+
[a, c, b],
|
143
|
+
[b, a, c],
|
144
|
+
[b, c, a],
|
145
|
+
[c, a, b],
|
146
|
+
[c, b, a]
|
147
|
+
].map{|x| [kendall_distance_for(x), x]}
|
148
|
+
|
149
|
+
original_score = shuffles[0][0]
|
150
|
+
best_score, best = shuffles.min
|
151
|
+
|
152
|
+
if best_score < original_score
|
153
|
+
shuffle_count += 1
|
154
|
+
changed = true
|
155
|
+
ranks[i..i+2] = best
|
156
|
+
logger.debug{
|
157
|
+
"#{a}, #{b}, #{c} shuffled to #{best.join(", ")}. Score went from #{original_score} to #{best_score}"
|
158
|
+
}
|
159
|
+
end
|
160
|
+
|
161
|
+
end
|
162
|
+
logger.debug "triangle shuffling iteration ##{iterations} performed #{shuffle_count} shuffles"
|
163
|
+
end
|
164
|
+
ranks
|
165
|
+
end
|
166
|
+
|
111
167
|
def reset_cached
|
112
168
|
self.instance_variables.grep(/^@_/).each{|v| instance_variable_set(v, nil)}
|
113
169
|
end
|
data/notes/algorithm.markdown
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
# Algorithm description
|
2
2
|
|
3
|
+
Warning: These notes are a bit out of date.
|
4
|
+
|
3
5
|
This is a description of the algorithm embodied in this library.
|
4
6
|
|
5
7
|
The input of this algorithm is a list of partial rankings of a set of items (the set of items is not known up front - it's computed from the lists. This is a minor and not terribly important detail).
|
data/rank-aggregation.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{rank-aggregation}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.5"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["David R. MacIver"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-07-08}
|
13
13
|
s.default_executable = %q{rank}
|
14
14
|
s.email = %q{david@drmaciver.com}
|
15
15
|
s.executables = ["rank"]
|
@@ -23,13 +23,12 @@ Gem::Specification.new do |s|
|
|
23
23
|
"bin/rank",
|
24
24
|
"lib/rank-aggregation.rb",
|
25
25
|
"lib/rank-aggregation/markov.rb",
|
26
|
-
"lib/rank-aggregation/ordering.rb",
|
27
26
|
"lib/rank-aggregation/ranker.rb",
|
28
|
-
"lib/rank-aggregation/scorer.rb",
|
29
27
|
"notes/algorithm.markdown",
|
30
28
|
"rank-aggregation.gemspec",
|
31
29
|
"samples/clear-with-noise",
|
32
30
|
"samples/different-sizes-interleaved",
|
31
|
+
"samples/images",
|
33
32
|
"samples/languages",
|
34
33
|
"samples/pairs",
|
35
34
|
"samples/random",
|
data/samples/images
ADDED
@@ -0,0 +1,249 @@
|
|
1
|
+
4713907317 4714793516
|
2
|
+
4714044861 4715364378
|
3
|
+
4713401785 4715058159
|
4
|
+
4713685521 4713662041
|
5
|
+
4714695978 4714036236
|
6
|
+
4714094858 4714570862
|
7
|
+
4714120928 4714183405
|
8
|
+
4714519726 4713952977
|
9
|
+
4714809167 4714181334
|
10
|
+
4713940506 4713660541
|
11
|
+
4715424340 4714519726
|
12
|
+
4713379937 0
|
13
|
+
4714441346 4713614365
|
14
|
+
4713940506 4714519726
|
15
|
+
4714866582 4714214085
|
16
|
+
4713907317 4714094858
|
17
|
+
4714707313 4714173098
|
18
|
+
4714441346 4713387133
|
19
|
+
4713981249 4714633673
|
20
|
+
4714994910 4714570862
|
21
|
+
4714157060 4714209769
|
22
|
+
4714967844 4714180352
|
23
|
+
4715190691 4714982926
|
24
|
+
4714629861 4714214085
|
25
|
+
4714793516 4714633673
|
26
|
+
4715213970 4714625095
|
27
|
+
4714286467 4715003796
|
28
|
+
4715856420 4714073463
|
29
|
+
4714695978 4714214085
|
30
|
+
4714866582 4713526865
|
31
|
+
4714625095 4713660541
|
32
|
+
4715424340 4714834229
|
33
|
+
4714930683 4714930683
|
34
|
+
4714274838 4714574641
|
35
|
+
4714589388 4714286467
|
36
|
+
4714241833 4714384952
|
37
|
+
4714820813 4714157060
|
38
|
+
4714793516 4714820813
|
39
|
+
4714157060 4713981249
|
40
|
+
4713660541 4714180352
|
41
|
+
4714990888 4714100910
|
42
|
+
4715453204 4714319362
|
43
|
+
4714664778 4713651819
|
44
|
+
4714866582 4714151904
|
45
|
+
4713693925 4713981249
|
46
|
+
4713906389 4714227818
|
47
|
+
4714393079 4714367616
|
48
|
+
4715073620 4714183405
|
49
|
+
4715814487 4713589101
|
50
|
+
4714509338 4714192076
|
51
|
+
4714291607 4714871415
|
52
|
+
4713788611 4716484338
|
53
|
+
4713965261 4714982926
|
54
|
+
4713685521 4714793516
|
55
|
+
4714928611 4714068646
|
56
|
+
4715085584 4713616757
|
57
|
+
4713379937 4714183405
|
58
|
+
4715356822 4715012282
|
59
|
+
4715100820 4716703934
|
60
|
+
4715714373 4714227078
|
61
|
+
4715763896 4714087637
|
62
|
+
4713844607 4714133515
|
63
|
+
4714661034 4713997878
|
64
|
+
4714871415 4715860020
|
65
|
+
4714797576 4714534020
|
66
|
+
4715558413 4714589388
|
67
|
+
4714096389 4715073620
|
68
|
+
4716091440 4714763663
|
69
|
+
4714121666 4714137974
|
70
|
+
4715343302 4714145722
|
71
|
+
4714629861 4713776313
|
72
|
+
4714866582 4713736745
|
73
|
+
4714512738 4715963672
|
74
|
+
4714185386 4715242266
|
75
|
+
4714967844 4715888970
|
76
|
+
4714173098 4716213882
|
77
|
+
4715049826 4713612015
|
78
|
+
4713752261 4713979531
|
79
|
+
4713365017 4716213882
|
80
|
+
4713744349 4713686267
|
81
|
+
4714810870 4714658782
|
82
|
+
4714533206 4715356822
|
83
|
+
4713314831 4714905429
|
84
|
+
4715388191 4715489479
|
85
|
+
4714692816 4715453204
|
86
|
+
4713898663 4714137996
|
87
|
+
4714522757 4714107938
|
88
|
+
4713781919 4715430345
|
89
|
+
4716515978 4715003796
|
90
|
+
4714533547 4714506478
|
91
|
+
4716526328 4715040480
|
92
|
+
4714182023 4714681315
|
93
|
+
4715940337 4714822451
|
94
|
+
4714435781 4715888970
|
95
|
+
4714423390 4714930683
|
96
|
+
4715215778 4714644144
|
97
|
+
4714533547 4715324294
|
98
|
+
4713495537 4713951401
|
99
|
+
4715215778 4714931374
|
100
|
+
4713575079 4713418207
|
101
|
+
4715602724 4714692816
|
102
|
+
4714274838 4715426962
|
103
|
+
4714179261 4716714528
|
104
|
+
4713853729 4715075632
|
105
|
+
4714525315 4713936967
|
106
|
+
4714264652 4714928611
|
107
|
+
4714207610 4713940506
|
108
|
+
4715215778 4713405627
|
109
|
+
4714522289 4713853729
|
110
|
+
4715992740 4713547921
|
111
|
+
4715233482 4715075632
|
112
|
+
4716115538 4714695978
|
113
|
+
4713980054 4715683996
|
114
|
+
4715073620 4713892543
|
115
|
+
4714625095 4713854897
|
116
|
+
4714967844 4714820813
|
117
|
+
4714067235 4713587865
|
118
|
+
4713844121 4715200759
|
119
|
+
4714430684 4714684363
|
120
|
+
4714442589 4713917102
|
121
|
+
4714001813 4714574641
|
122
|
+
4715331713 4714535413
|
123
|
+
4715153745 4715617690
|
124
|
+
4713575079 4715888970
|
125
|
+
4713879311 4714344426
|
126
|
+
4715217508 4716083048
|
127
|
+
4715741624 4713749569
|
128
|
+
4713788611 4715242266
|
129
|
+
4714449713 4714329119
|
130
|
+
4714808591 4714589388
|
131
|
+
4714959812 4714133515
|
132
|
+
4714037152 4714490458
|
133
|
+
4714941210 4714970667
|
134
|
+
4713574409 4714103021
|
135
|
+
4713997878 4714241833
|
136
|
+
4715535678 4715856420
|
137
|
+
4715324294 4713322691
|
138
|
+
4715477764 4714522289
|
139
|
+
4714236828 4714534020
|
140
|
+
4714545096 4713495101
|
141
|
+
4715322098 4714341509
|
142
|
+
4714137996 4714341509
|
143
|
+
4714990888 4714179898
|
144
|
+
4713981249 4713691107
|
145
|
+
4713947773 4713401785
|
146
|
+
4713879311 4713689031
|
147
|
+
4715992740 4715103668
|
148
|
+
4715190691 4715005229
|
149
|
+
4713685665 4713937913
|
150
|
+
4714091142 4713689031
|
151
|
+
4714695978 4715005260
|
152
|
+
4713917539 4713575079
|
153
|
+
4715049826 4713631637
|
154
|
+
4715669395 4715503820
|
155
|
+
4714493522 4713685521
|
156
|
+
4715364378 4713907317
|
157
|
+
4713844607 4714808591
|
158
|
+
4715260222 4714075745
|
159
|
+
4713612015 4713913835
|
160
|
+
4715501923 4713913835
|
161
|
+
4715627740 4714367073
|
162
|
+
4715669286 4715215778
|
163
|
+
4715069575 4715859809
|
164
|
+
4713931385 4714121666
|
165
|
+
4713980054 4715678715
|
166
|
+
4715217508 4715739086
|
167
|
+
4713566967 4714592156
|
168
|
+
4715669286 4715683996
|
169
|
+
4715069575 4713458933
|
170
|
+
4713443893 4713575079
|
171
|
+
4716120110 4713951401
|
172
|
+
4714232322 4713967155
|
173
|
+
4714866582 4714341509
|
174
|
+
4715217376 4713879311
|
175
|
+
4715605259 4714219924
|
176
|
+
4713322691 4715374569
|
177
|
+
4714149874 4714034091
|
178
|
+
4715374569 4714341509
|
179
|
+
4714797576 4714209769
|
180
|
+
4714326747 4713604321
|
181
|
+
4714479137 4714905514
|
182
|
+
4714535413 4713892543
|
183
|
+
4714493522 4715088795
|
184
|
+
4713666531 4715058159
|
185
|
+
4713611777 4713666531
|
186
|
+
4713969986 4715285982
|
187
|
+
4713570275 4714132776
|
188
|
+
4714590394 4715525984
|
189
|
+
4714150388 4714522289
|
190
|
+
4714345492 4714592156
|
191
|
+
4715775475 4714393079
|
192
|
+
4714423390 4714335269
|
193
|
+
4713570275 4714589388
|
194
|
+
4715602724 4715219978
|
195
|
+
4714120928 4715266308
|
196
|
+
4714695978 4713931385
|
197
|
+
4714990888 4714509338
|
198
|
+
4714353096 4714678696
|
199
|
+
4714096389 4714522289
|
200
|
+
4715056162 4714274838
|
201
|
+
4714107938 4716703934
|
202
|
+
4714292768 4714763663
|
203
|
+
4715741624 4714036236
|
204
|
+
4714545096 4713604321
|
205
|
+
4715091935 4713952977
|
206
|
+
4715049826 4715169543
|
207
|
+
4713931385 4713502361
|
208
|
+
4714967844 4714522757
|
209
|
+
4714936074 4716091609
|
210
|
+
4715100820 4713740667
|
211
|
+
4714075745 4714522289
|
212
|
+
4715175414 4714590394
|
213
|
+
4716213882 4713574409
|
214
|
+
4713339825 4715739086
|
215
|
+
4715775475 4716193284
|
216
|
+
4715388191 4714384952
|
217
|
+
4714274838 4713781919
|
218
|
+
4714546309 4715739086
|
219
|
+
4714207610 4714423390
|
220
|
+
4714586002 4715233482
|
221
|
+
4714227078 4714180352
|
222
|
+
4714546309 4714254254
|
223
|
+
4715477764 4714548260
|
224
|
+
4713635095 4714661034
|
225
|
+
4713339825 4714120928
|
226
|
+
4714094858 4714590394
|
227
|
+
4715364099 4714209769
|
228
|
+
4714187130 4714391365
|
229
|
+
4714614348 4713972161
|
230
|
+
4713441293 4714227818
|
231
|
+
4714941210 4715012282
|
232
|
+
4715963672 4714317837
|
233
|
+
4713363381 4714440750
|
234
|
+
4715665972 4713749569
|
235
|
+
4714329119 4714909026
|
236
|
+
4713350507 4714930683
|
237
|
+
4713285765 4713931385
|
238
|
+
4715284379 4714535413
|
239
|
+
4714187130 4714525527
|
240
|
+
4715286921 4714448373
|
241
|
+
4713368857 4713631637
|
242
|
+
4714695978 4715005260
|
243
|
+
4714599345 4713506075
|
244
|
+
4713651819 4714796667
|
245
|
+
4714187130 4714367073
|
246
|
+
4713614365 4714442589
|
247
|
+
4714091142 4714137974
|
248
|
+
4713980054 4714353096
|
249
|
+
4715550743 4714467446
|
data/scores
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
clear-with-noise: 0.213968253968254
|
2
|
-
different-sizes-interleaved: 0.
|
3
|
-
|
2
|
+
different-sizes-interleaved: 0.043343653250774
|
3
|
+
images: 0.0524193548387097
|
4
|
+
languages: 0.372378907195571
|
4
5
|
pairs: 0.333333333333333
|
5
|
-
random: 0.
|
6
|
+
random: 0.478733444496679
|
@@ -1,6 +1,9 @@
|
|
1
1
|
require "helper"
|
2
2
|
|
3
|
+
A, B, C = [:a, :b, :c]
|
4
|
+
|
3
5
|
describe RankAggregation do
|
6
|
+
|
4
7
|
it "should aggregate a single rank into itself" do
|
5
8
|
r = (1..10).to_a
|
6
9
|
RankAggregation.combine_rankings([r]).should == r
|
@@ -27,4 +30,26 @@ describe RankAggregation do
|
|
27
30
|
RankAggregation.combine_rankings(r2).should == b
|
28
31
|
RankAggregation.combine_rankings(r3).should == c
|
29
32
|
end
|
33
|
+
|
34
|
+
it "should not consider alternatives to be irrelevant" do
|
35
|
+
# This is my example from "Irrelevant alternatives aren't"
|
36
|
+
# which shows that you need to consider C to get the order of
|
37
|
+
# A and B right, as there's a 50/50 voting split between the
|
38
|
+
# two of them, but clear majorities showing that B < C < A
|
39
|
+
|
40
|
+
RankAggregation.combine_rankings([
|
41
|
+
[A, B, C],
|
42
|
+
[B, C, A],
|
43
|
+
[C, A, B],
|
44
|
+
[B, C, A]
|
45
|
+
]).should == [B, C, A]
|
46
|
+
end
|
47
|
+
|
48
|
+
it "should follow the majority with greater weight behind it" do
|
49
|
+
RankAggregation.combine_rankings(
|
50
|
+
[[B, C]] * 6 +
|
51
|
+
[[A, B]] * 10 +
|
52
|
+
[[C, A]] * 5
|
53
|
+
).should == [A, B, C]
|
54
|
+
end
|
30
55
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: rank-aggregation
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 5
|
10
|
+
version: 0.0.5
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- David R. MacIver
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-
|
18
|
+
date: 2010-07-08 00:00:00 +01:00
|
19
19
|
default_executable: rank
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -47,13 +47,12 @@ files:
|
|
47
47
|
- bin/rank
|
48
48
|
- lib/rank-aggregation.rb
|
49
49
|
- lib/rank-aggregation/markov.rb
|
50
|
-
- lib/rank-aggregation/ordering.rb
|
51
50
|
- lib/rank-aggregation/ranker.rb
|
52
|
-
- lib/rank-aggregation/scorer.rb
|
53
51
|
- notes/algorithm.markdown
|
54
52
|
- rank-aggregation.gemspec
|
55
53
|
- samples/clear-with-noise
|
56
54
|
- samples/different-sizes-interleaved
|
55
|
+
- samples/images
|
57
56
|
- samples/languages
|
58
57
|
- samples/pairs
|
59
58
|
- samples/random
|
@@ -1,44 +0,0 @@
|
|
1
|
-
require "set"
|
2
|
-
|
3
|
-
module RankAggregation
|
4
|
-
class Ordering
|
5
|
-
attr_accessor :order, :items, :determined
|
6
|
-
|
7
|
-
def initialize(items)
|
8
|
-
@items = Set[*items.to_a]
|
9
|
-
@determined = Set.new
|
10
|
-
@order = Hash.new{|h, k| h[k] = { k => 0 }}
|
11
|
-
end
|
12
|
-
|
13
|
-
def determine(x, y)
|
14
|
-
return false if @order[x][y]
|
15
|
-
|
16
|
-
@order[x][y] = -1
|
17
|
-
@order[y][x] = 1
|
18
|
-
|
19
|
-
@determined << x if @order[x].size == @items.size
|
20
|
-
@determined << y if @order[y].size == @items.size
|
21
|
-
|
22
|
-
@order[x].each{|z, v|
|
23
|
-
determine(z, y) if v == 1
|
24
|
-
}
|
25
|
-
|
26
|
-
@order[y].each{|z, v|
|
27
|
-
determine(x, z) if v == -1
|
28
|
-
}
|
29
|
-
true
|
30
|
-
end
|
31
|
-
|
32
|
-
def determined?(item=nil)
|
33
|
-
if item
|
34
|
-
return self.determined.include?(item)
|
35
|
-
else
|
36
|
-
return self.determined.size == self.items.size
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def [](x, y)
|
41
|
-
@order[x][y]
|
42
|
-
end
|
43
|
-
end
|
44
|
-
end
|
@@ -1,41 +0,0 @@
|
|
1
|
-
module RankAggregation
|
2
|
-
module Scorer
|
3
|
-
def average_kendall_distance(aggregate, ranks)
|
4
|
-
a_indices = {}
|
5
|
-
|
6
|
-
aggregate.each_with_index{|x, i|
|
7
|
-
a_indices[x] = i
|
8
|
-
}
|
9
|
-
|
10
|
-
parts = ranks.select{|x| x.size > 1 }.map{|x| kendal_distance(a_indices, x) }
|
11
|
-
parts.inject(0.0){|x, y| x + y} / ranks.size
|
12
|
-
end
|
13
|
-
|
14
|
-
# TODO: Decent implementation of this
|
15
|
-
private
|
16
|
-
|
17
|
-
def kendal_distance(a_indices, b)
|
18
|
-
|
19
|
-
tot = 0.0
|
20
|
-
(0...b.length).each{|i|
|
21
|
-
((i + 1)...b.length).each{|j|
|
22
|
-
x = a_indices[b[i]]
|
23
|
-
y = a_indices[b[j]]
|
24
|
-
next unless x && y
|
25
|
-
|
26
|
-
tot += 1 if x > y
|
27
|
-
}
|
28
|
-
}
|
29
|
-
|
30
|
-
n = (a_indices.keys & b).size
|
31
|
-
|
32
|
-
p b if n <= 1
|
33
|
-
|
34
|
-
score = tot / (0.5 * n * (n - 1))
|
35
|
-
end
|
36
|
-
end
|
37
|
-
|
38
|
-
class <<Scorer
|
39
|
-
include Scorer
|
40
|
-
end
|
41
|
-
end
|