rank-aggregation 0.0.3 → 0.0.5

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -26,7 +26,6 @@ end
26
26
  task "scores" do
27
27
  $: << "lib"
28
28
  require "rank-aggregation"
29
- require "rank-aggregation/scorer"
30
29
 
31
30
  File.open("scores", "w"){ |o|
32
31
  Dir["samples/*"].sort.each{|file|
@@ -38,9 +37,7 @@ task "scores" do
38
37
 
39
38
  items.each{|i| r.add_ranking i }
40
39
 
41
- kendall_distance = RankAggregation::Scorer.average_kendall_distance(r.combined_rankings, items)
42
-
43
- o.puts "#{name}: #{kendall_distance}"
40
+ o.puts "#{name}: #{r.kendall_distance}"
44
41
  }
45
42
  }
46
43
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.3
1
+ 0.0.5
data/bin/rank CHANGED
@@ -8,30 +8,28 @@ require "trollop"
8
8
 
9
9
  opts = Trollop.options do
10
10
  opt :score, "Output scoring metrics on STDERR"
11
- opt :rough, "Use only the rough score for sorting"
11
+ opt :rough, "Use only the rough score for sorting, don't perform any optimisation on top of that"
12
12
  opt :smoothing, "Set the smoothing parameter: It should be approximately equal to the sample size you consider large enough to matter", :type => :int
13
+ opt :debug
13
14
  end
14
15
 
15
16
  ranker = RankAggregation::Ranker.new
16
17
 
17
18
  ranker.smoothing = opts[:smoothing] if opts[:smoothing]
18
-
19
- collect = [] if opts[:score]
19
+ ranker.logger.level = Logger::DEBUG if opts[:debug]
20
20
 
21
21
  ARGF.each_line{|l|
22
22
  items = l.split("\t").map{|x| x.strip}.select{|x| x.length > 0}
23
-
24
- collect << items if collect
25
-
26
23
  ranker.add_ranking items
27
24
  }
28
25
 
29
- result = if opts[:rough] then ranker.rough_combined_rankings else ranker.combined_rankings end
26
+ result = (
27
+ if opts[:rough] then ranker.rough_combined_rankings
28
+ else ranker.combined_rankings end
29
+ )
30
30
 
31
31
  puts result.join("\t")
32
32
 
33
33
  if opts[:score]
34
- require "rank-aggregation/scorer"
35
-
36
- STDERR.puts "Average kendal distance: #{RankAggregation::Scorer.average_kendall_distance result, collect}"
34
+ STDERR.puts "Average kendal distance: #{ranker.kendall_distance}"
37
35
  end
@@ -1,16 +1,9 @@
1
1
  module RankAggregation
2
2
  class MarkovChain
3
- def initialize(items, transitions)
4
- @transitions = {}
3
+ def initialize(items, transitions, logger)
4
+ @logger = logger
5
+ @transitions = transitions
5
6
  @items = items
6
-
7
- items.each{|x|
8
- tot = items.map{|y| transitions[x][y]}.inject(0.0){|u, v| u + v}
9
-
10
- tx = (@transitions[x] = {})
11
-
12
- items.each{|y| tx[y] = transitions[x][y] / tot }
13
- }
14
7
  end
15
8
 
16
9
  def stationary_distribution
@@ -18,7 +11,8 @@ module RankAggregation
18
11
 
19
12
  @items.each{|x| dist[x] = 1.0 / @items.size }
20
13
 
21
- 10.times{
14
+ 3.times{ |i|
15
+ @logger.debug "markov chain iteration #{i}"
22
16
  new_dist = Hash.new(0.0)
23
17
 
24
18
  dist.each{|x, p|
@@ -1,26 +1,37 @@
1
- require "rank-aggregation/ordering"
1
+ require "set"
2
+ require "logger"
3
+
2
4
  require "rank-aggregation/markov"
3
5
 
6
+
4
7
  module RankAggregation
5
8
  class Ranker
6
- attr_accessor :less_counts, :smoothing, :items
9
+ attr_accessor :less_scores, :smoothing, :items, :logger
7
10
 
8
11
  def initialize
9
- @less_counts = {}
12
+ @less_scores = {}
10
13
  @items = Set.new
11
14
  @smoothing = 5
12
15
  @vote_count = 0
16
+ @rank_count = 0
17
+ self.logger = Logger.new(STDERR)
18
+ self.logger.level = Logger::WARN
13
19
  end
14
20
 
15
21
  def add_ranking(xs)
22
+ xs = xs.uniq
16
23
  return if xs.size <= 1
17
24
 
25
+ @rank_count += 1
26
+
18
27
  reset_cached
19
- xs.each{|x| @less_counts[x] ||= Hash.new(0); items.add x }
28
+ xs.each{|x| @less_scores[x] ||= Hash.new(0.0); items.add x }
29
+
30
+ weight = 1.0 / (0.5 * xs.size * (xs.size - 1))
20
31
 
21
32
  (0...xs.length).each{|i|
22
33
  ((i+1)...xs.length).each{|j|
23
- @less_counts[xs[i]][xs[j]] += 1
34
+ @less_scores[xs[i]][xs[j]] += weight
24
35
  }
25
36
  }
26
37
 
@@ -29,49 +40,31 @@ module RankAggregation
29
40
 
30
41
  def less_chances
31
42
  @_less_chances ||= begin
43
+ logger.debug "calculating less_chances"
32
44
  less_chances = Hash.new{|h, k| h[k] = Hash.new(0.5)}
33
45
 
34
- less_counts.each{|x, vs|
46
+ less_scores.each{|x, vs|
35
47
  vs.each{|y, c|
36
- p = (c + 0.5 * self.smoothing) / (self.smoothing + c + less_counts[y][x])
48
+ p = (c + 0.5 * self.smoothing) / (self.smoothing + c + less_scores[y][x])
37
49
  less_chances[x][y] = p
38
50
  less_chances[y][x] = 1 - p
39
51
  }
40
52
  }
53
+ logger.debug "calculating less_chances complete"
41
54
  less_chances
42
55
  end
43
56
  end
44
57
 
45
- def base_ordering
46
- @_base_ordering ||= begin
47
- edges = []
48
- less_chances.each{|x, ys|
49
- ys.each{|y, v|
50
- edges << [x, y, v] if v > 0.5
51
- }
52
- }
53
- edges.sort!{|x, y| y[2] <=> x[2]}
54
-
55
- ordering = Ordering.new(less_chances.keys)
56
-
57
- edges.each{|x, y, v|
58
- ordering.determine(x, y)
59
- break if ordering.determined?
60
- }
61
- ordering
62
- end
63
- end
64
-
65
- # The rough score for x is the average chance of it being > y for all y we've got a comparison with
66
- # We use this as a tie breaking heuristic.
67
58
  def rough_scores
68
59
  @_rough_scores ||= begin
60
+ logger.debug "calculating rough_scores"
69
61
  # This markov chain is based off MC4. The idea is as follows:
70
62
  # Starting at an item we pick one of the other items at random.
71
63
  # We then transition to that item with probability P(i < j).
72
64
  # If we fail to transition we stay where we are.
73
65
  # i.e. the probability of transitioning form i to j with i != j is 1/(n-1) P(i < j).
74
66
 
67
+ logger.debug "calculating transition probabilities"
75
68
  transitions = {}
76
69
 
77
70
  @items.each{|i|
@@ -90,7 +83,10 @@ module RankAggregation
90
83
  end
91
84
  }
92
85
 
93
- MarkovChain.new(@items, transitions).stationary_distribution
86
+ logger.debug "calculating transition probabilities complete"
87
+ result = MarkovChain.new(@items, transitions, logger).stationary_distribution
88
+ logger.debug "calculating rough_scores complete"
89
+ result
94
90
  end
95
91
  end
96
92
 
@@ -102,12 +98,72 @@ module RankAggregation
102
98
 
103
99
  def combined_rankings
104
100
  @_combined_rankings ||= begin
105
- @items.sort{|x, y| base_ordering[x, y] || (rough_scores[x] <=> rough_scores[y]) }
101
+ triangle_shuffle(rough_combined_rankings)
106
102
  end
107
103
  end
108
104
 
105
+ def kendall_distance
106
+ @_kendall_distance ||= kendall_distance_for(combined_rankings)
107
+ end
108
+
109
109
  private
110
110
 
111
+ def kendall_distance_for(ranks)
112
+ tot = 0.0
113
+ (0...ranks.length).each{|i|
114
+ (i+1...ranks.length).each{|j|
115
+ tot += less_scores[ranks[j]][ranks[i]]
116
+ }
117
+ }
118
+ tot / @rank_count
119
+ end
120
+
121
+ def triangle_shuffle(ranks)
122
+ ranks = ranks.dup
123
+ i = 0
124
+
125
+ changed = true
126
+
127
+ iterations = 0
128
+
129
+ while changed
130
+ iterations += 1
131
+
132
+ shuffle_count = 0
133
+
134
+ changed = false
135
+ (0...ranks.length - 2).each do |i|
136
+ # we look at the positions i, i+1 and i+2 and form a kemeny optimal ordering of the items
137
+ # there
138
+ a, b, c = ranks[i..i+2]
139
+
140
+ shuffles = [
141
+ [a, b, c],
142
+ [a, c, b],
143
+ [b, a, c],
144
+ [b, c, a],
145
+ [c, a, b],
146
+ [c, b, a]
147
+ ].map{|x| [kendall_distance_for(x), x]}
148
+
149
+ original_score = shuffles[0][0]
150
+ best_score, best = shuffles.min
151
+
152
+ if best_score < original_score
153
+ shuffle_count += 1
154
+ changed = true
155
+ ranks[i..i+2] = best
156
+ logger.debug{
157
+ "#{a}, #{b}, #{c} shuffled to #{best.join(", ")}. Score went from #{original_score} to #{best_score}"
158
+ }
159
+ end
160
+
161
+ end
162
+ logger.debug "triangle shuffling iteration ##{iterations} performed #{shuffle_count} shuffles"
163
+ end
164
+ ranks
165
+ end
166
+
111
167
  def reset_cached
112
168
  self.instance_variables.grep(/^@_/).each{|v| instance_variable_set(v, nil)}
113
169
  end
@@ -1,5 +1,7 @@
1
1
  # Algorithm description
2
2
 
3
+ Warning: These notes are a bit out of date.
4
+
3
5
  This is a description of the algorithm embodied in this library.
4
6
 
5
7
  The input of this algorithm is a list of partial rankings of a set of items (the set of items is not known up front - it's computed from the lists. This is a minor and not terribly important detail).
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{rank-aggregation}
8
- s.version = "0.0.3"
8
+ s.version = "0.0.5"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["David R. MacIver"]
12
- s.date = %q{2010-06-20}
12
+ s.date = %q{2010-07-08}
13
13
  s.default_executable = %q{rank}
14
14
  s.email = %q{david@drmaciver.com}
15
15
  s.executables = ["rank"]
@@ -23,13 +23,12 @@ Gem::Specification.new do |s|
23
23
  "bin/rank",
24
24
  "lib/rank-aggregation.rb",
25
25
  "lib/rank-aggregation/markov.rb",
26
- "lib/rank-aggregation/ordering.rb",
27
26
  "lib/rank-aggregation/ranker.rb",
28
- "lib/rank-aggregation/scorer.rb",
29
27
  "notes/algorithm.markdown",
30
28
  "rank-aggregation.gemspec",
31
29
  "samples/clear-with-noise",
32
30
  "samples/different-sizes-interleaved",
31
+ "samples/images",
33
32
  "samples/languages",
34
33
  "samples/pairs",
35
34
  "samples/random",
data/samples/images ADDED
@@ -0,0 +1,249 @@
1
+ 4713907317 4714793516
2
+ 4714044861 4715364378
3
+ 4713401785 4715058159
4
+ 4713685521 4713662041
5
+ 4714695978 4714036236
6
+ 4714094858 4714570862
7
+ 4714120928 4714183405
8
+ 4714519726 4713952977
9
+ 4714809167 4714181334
10
+ 4713940506 4713660541
11
+ 4715424340 4714519726
12
+ 4713379937 0
13
+ 4714441346 4713614365
14
+ 4713940506 4714519726
15
+ 4714866582 4714214085
16
+ 4713907317 4714094858
17
+ 4714707313 4714173098
18
+ 4714441346 4713387133
19
+ 4713981249 4714633673
20
+ 4714994910 4714570862
21
+ 4714157060 4714209769
22
+ 4714967844 4714180352
23
+ 4715190691 4714982926
24
+ 4714629861 4714214085
25
+ 4714793516 4714633673
26
+ 4715213970 4714625095
27
+ 4714286467 4715003796
28
+ 4715856420 4714073463
29
+ 4714695978 4714214085
30
+ 4714866582 4713526865
31
+ 4714625095 4713660541
32
+ 4715424340 4714834229
33
+ 4714930683 4714930683
34
+ 4714274838 4714574641
35
+ 4714589388 4714286467
36
+ 4714241833 4714384952
37
+ 4714820813 4714157060
38
+ 4714793516 4714820813
39
+ 4714157060 4713981249
40
+ 4713660541 4714180352
41
+ 4714990888 4714100910
42
+ 4715453204 4714319362
43
+ 4714664778 4713651819
44
+ 4714866582 4714151904
45
+ 4713693925 4713981249
46
+ 4713906389 4714227818
47
+ 4714393079 4714367616
48
+ 4715073620 4714183405
49
+ 4715814487 4713589101
50
+ 4714509338 4714192076
51
+ 4714291607 4714871415
52
+ 4713788611 4716484338
53
+ 4713965261 4714982926
54
+ 4713685521 4714793516
55
+ 4714928611 4714068646
56
+ 4715085584 4713616757
57
+ 4713379937 4714183405
58
+ 4715356822 4715012282
59
+ 4715100820 4716703934
60
+ 4715714373 4714227078
61
+ 4715763896 4714087637
62
+ 4713844607 4714133515
63
+ 4714661034 4713997878
64
+ 4714871415 4715860020
65
+ 4714797576 4714534020
66
+ 4715558413 4714589388
67
+ 4714096389 4715073620
68
+ 4716091440 4714763663
69
+ 4714121666 4714137974
70
+ 4715343302 4714145722
71
+ 4714629861 4713776313
72
+ 4714866582 4713736745
73
+ 4714512738 4715963672
74
+ 4714185386 4715242266
75
+ 4714967844 4715888970
76
+ 4714173098 4716213882
77
+ 4715049826 4713612015
78
+ 4713752261 4713979531
79
+ 4713365017 4716213882
80
+ 4713744349 4713686267
81
+ 4714810870 4714658782
82
+ 4714533206 4715356822
83
+ 4713314831 4714905429
84
+ 4715388191 4715489479
85
+ 4714692816 4715453204
86
+ 4713898663 4714137996
87
+ 4714522757 4714107938
88
+ 4713781919 4715430345
89
+ 4716515978 4715003796
90
+ 4714533547 4714506478
91
+ 4716526328 4715040480
92
+ 4714182023 4714681315
93
+ 4715940337 4714822451
94
+ 4714435781 4715888970
95
+ 4714423390 4714930683
96
+ 4715215778 4714644144
97
+ 4714533547 4715324294
98
+ 4713495537 4713951401
99
+ 4715215778 4714931374
100
+ 4713575079 4713418207
101
+ 4715602724 4714692816
102
+ 4714274838 4715426962
103
+ 4714179261 4716714528
104
+ 4713853729 4715075632
105
+ 4714525315 4713936967
106
+ 4714264652 4714928611
107
+ 4714207610 4713940506
108
+ 4715215778 4713405627
109
+ 4714522289 4713853729
110
+ 4715992740 4713547921
111
+ 4715233482 4715075632
112
+ 4716115538 4714695978
113
+ 4713980054 4715683996
114
+ 4715073620 4713892543
115
+ 4714625095 4713854897
116
+ 4714967844 4714820813
117
+ 4714067235 4713587865
118
+ 4713844121 4715200759
119
+ 4714430684 4714684363
120
+ 4714442589 4713917102
121
+ 4714001813 4714574641
122
+ 4715331713 4714535413
123
+ 4715153745 4715617690
124
+ 4713575079 4715888970
125
+ 4713879311 4714344426
126
+ 4715217508 4716083048
127
+ 4715741624 4713749569
128
+ 4713788611 4715242266
129
+ 4714449713 4714329119
130
+ 4714808591 4714589388
131
+ 4714959812 4714133515
132
+ 4714037152 4714490458
133
+ 4714941210 4714970667
134
+ 4713574409 4714103021
135
+ 4713997878 4714241833
136
+ 4715535678 4715856420
137
+ 4715324294 4713322691
138
+ 4715477764 4714522289
139
+ 4714236828 4714534020
140
+ 4714545096 4713495101
141
+ 4715322098 4714341509
142
+ 4714137996 4714341509
143
+ 4714990888 4714179898
144
+ 4713981249 4713691107
145
+ 4713947773 4713401785
146
+ 4713879311 4713689031
147
+ 4715992740 4715103668
148
+ 4715190691 4715005229
149
+ 4713685665 4713937913
150
+ 4714091142 4713689031
151
+ 4714695978 4715005260
152
+ 4713917539 4713575079
153
+ 4715049826 4713631637
154
+ 4715669395 4715503820
155
+ 4714493522 4713685521
156
+ 4715364378 4713907317
157
+ 4713844607 4714808591
158
+ 4715260222 4714075745
159
+ 4713612015 4713913835
160
+ 4715501923 4713913835
161
+ 4715627740 4714367073
162
+ 4715669286 4715215778
163
+ 4715069575 4715859809
164
+ 4713931385 4714121666
165
+ 4713980054 4715678715
166
+ 4715217508 4715739086
167
+ 4713566967 4714592156
168
+ 4715669286 4715683996
169
+ 4715069575 4713458933
170
+ 4713443893 4713575079
171
+ 4716120110 4713951401
172
+ 4714232322 4713967155
173
+ 4714866582 4714341509
174
+ 4715217376 4713879311
175
+ 4715605259 4714219924
176
+ 4713322691 4715374569
177
+ 4714149874 4714034091
178
+ 4715374569 4714341509
179
+ 4714797576 4714209769
180
+ 4714326747 4713604321
181
+ 4714479137 4714905514
182
+ 4714535413 4713892543
183
+ 4714493522 4715088795
184
+ 4713666531 4715058159
185
+ 4713611777 4713666531
186
+ 4713969986 4715285982
187
+ 4713570275 4714132776
188
+ 4714590394 4715525984
189
+ 4714150388 4714522289
190
+ 4714345492 4714592156
191
+ 4715775475 4714393079
192
+ 4714423390 4714335269
193
+ 4713570275 4714589388
194
+ 4715602724 4715219978
195
+ 4714120928 4715266308
196
+ 4714695978 4713931385
197
+ 4714990888 4714509338
198
+ 4714353096 4714678696
199
+ 4714096389 4714522289
200
+ 4715056162 4714274838
201
+ 4714107938 4716703934
202
+ 4714292768 4714763663
203
+ 4715741624 4714036236
204
+ 4714545096 4713604321
205
+ 4715091935 4713952977
206
+ 4715049826 4715169543
207
+ 4713931385 4713502361
208
+ 4714967844 4714522757
209
+ 4714936074 4716091609
210
+ 4715100820 4713740667
211
+ 4714075745 4714522289
212
+ 4715175414 4714590394
213
+ 4716213882 4713574409
214
+ 4713339825 4715739086
215
+ 4715775475 4716193284
216
+ 4715388191 4714384952
217
+ 4714274838 4713781919
218
+ 4714546309 4715739086
219
+ 4714207610 4714423390
220
+ 4714586002 4715233482
221
+ 4714227078 4714180352
222
+ 4714546309 4714254254
223
+ 4715477764 4714548260
224
+ 4713635095 4714661034
225
+ 4713339825 4714120928
226
+ 4714094858 4714590394
227
+ 4715364099 4714209769
228
+ 4714187130 4714391365
229
+ 4714614348 4713972161
230
+ 4713441293 4714227818
231
+ 4714941210 4715012282
232
+ 4715963672 4714317837
233
+ 4713363381 4714440750
234
+ 4715665972 4713749569
235
+ 4714329119 4714909026
236
+ 4713350507 4714930683
237
+ 4713285765 4713931385
238
+ 4715284379 4714535413
239
+ 4714187130 4714525527
240
+ 4715286921 4714448373
241
+ 4713368857 4713631637
242
+ 4714695978 4715005260
243
+ 4714599345 4713506075
244
+ 4713651819 4714796667
245
+ 4714187130 4714367073
246
+ 4713614365 4714442589
247
+ 4714091142 4714137974
248
+ 4713980054 4714353096
249
+ 4715550743 4714467446
data/scores CHANGED
@@ -1,5 +1,6 @@
1
1
  clear-with-noise: 0.213968253968254
2
- different-sizes-interleaved: 0.294117647058824
3
- languages: 0.350065325612717
2
+ different-sizes-interleaved: 0.043343653250774
3
+ images: 0.0524193548387097
4
+ languages: 0.372378907195571
4
5
  pairs: 0.333333333333333
5
- random: 0.479696100794631
6
+ random: 0.478733444496679
@@ -1,6 +1,9 @@
1
1
  require "helper"
2
2
 
3
+ A, B, C = [:a, :b, :c]
4
+
3
5
  describe RankAggregation do
6
+
4
7
  it "should aggregate a single rank into itself" do
5
8
  r = (1..10).to_a
6
9
  RankAggregation.combine_rankings([r]).should == r
@@ -27,4 +30,26 @@ describe RankAggregation do
27
30
  RankAggregation.combine_rankings(r2).should == b
28
31
  RankAggregation.combine_rankings(r3).should == c
29
32
  end
33
+
34
+ it "should not consider alternatives to be irrelevant" do
35
+ # This is my example from "Irrelevant alternatives aren't"
36
+ # which shows that you need to consider C to get the order of
37
+ # A and B right, as there's a 50/50 voting split between the
38
+ # two of them, but clear majorities showing that B < C < A
39
+
40
+ RankAggregation.combine_rankings([
41
+ [A, B, C],
42
+ [B, C, A],
43
+ [C, A, B],
44
+ [B, C, A]
45
+ ]).should == [B, C, A]
46
+ end
47
+
48
+ it "should follow the majority with greater weight behind it" do
49
+ RankAggregation.combine_rankings(
50
+ [[B, C]] * 6 +
51
+ [[A, B]] * 10 +
52
+ [[C, A]] * 5
53
+ ).should == [A, B, C]
54
+ end
30
55
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rank-aggregation
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 21
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 5
10
+ version: 0.0.5
11
11
  platform: ruby
12
12
  authors:
13
13
  - David R. MacIver
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-06-20 00:00:00 +01:00
18
+ date: 2010-07-08 00:00:00 +01:00
19
19
  default_executable: rank
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -47,13 +47,12 @@ files:
47
47
  - bin/rank
48
48
  - lib/rank-aggregation.rb
49
49
  - lib/rank-aggregation/markov.rb
50
- - lib/rank-aggregation/ordering.rb
51
50
  - lib/rank-aggregation/ranker.rb
52
- - lib/rank-aggregation/scorer.rb
53
51
  - notes/algorithm.markdown
54
52
  - rank-aggregation.gemspec
55
53
  - samples/clear-with-noise
56
54
  - samples/different-sizes-interleaved
55
+ - samples/images
57
56
  - samples/languages
58
57
  - samples/pairs
59
58
  - samples/random
@@ -1,44 +0,0 @@
1
- require "set"
2
-
3
- module RankAggregation
4
- class Ordering
5
- attr_accessor :order, :items, :determined
6
-
7
- def initialize(items)
8
- @items = Set[*items.to_a]
9
- @determined = Set.new
10
- @order = Hash.new{|h, k| h[k] = { k => 0 }}
11
- end
12
-
13
- def determine(x, y)
14
- return false if @order[x][y]
15
-
16
- @order[x][y] = -1
17
- @order[y][x] = 1
18
-
19
- @determined << x if @order[x].size == @items.size
20
- @determined << y if @order[y].size == @items.size
21
-
22
- @order[x].each{|z, v|
23
- determine(z, y) if v == 1
24
- }
25
-
26
- @order[y].each{|z, v|
27
- determine(x, z) if v == -1
28
- }
29
- true
30
- end
31
-
32
- def determined?(item=nil)
33
- if item
34
- return self.determined.include?(item)
35
- else
36
- return self.determined.size == self.items.size
37
- end
38
- end
39
-
40
- def [](x, y)
41
- @order[x][y]
42
- end
43
- end
44
- end
@@ -1,41 +0,0 @@
1
- module RankAggregation
2
- module Scorer
3
- def average_kendall_distance(aggregate, ranks)
4
- a_indices = {}
5
-
6
- aggregate.each_with_index{|x, i|
7
- a_indices[x] = i
8
- }
9
-
10
- parts = ranks.select{|x| x.size > 1 }.map{|x| kendal_distance(a_indices, x) }
11
- parts.inject(0.0){|x, y| x + y} / ranks.size
12
- end
13
-
14
- # TODO: Decent implementation of this
15
- private
16
-
17
- def kendal_distance(a_indices, b)
18
-
19
- tot = 0.0
20
- (0...b.length).each{|i|
21
- ((i + 1)...b.length).each{|j|
22
- x = a_indices[b[i]]
23
- y = a_indices[b[j]]
24
- next unless x && y
25
-
26
- tot += 1 if x > y
27
- }
28
- }
29
-
30
- n = (a_indices.keys & b).size
31
-
32
- p b if n <= 1
33
-
34
- score = tot / (0.5 * n * (n - 1))
35
- end
36
- end
37
-
38
- class <<Scorer
39
- include Scorer
40
- end
41
- end