evalir 0.0.1 → 0.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- evalir (0.0.1)
4
+ evalir (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
data/README.md CHANGED
@@ -12,6 +12,8 @@ What can Evalir do?
12
12
  * Precision at rank k
13
13
  * Average Precision
14
14
  * Precision-Recall curve
15
+ * Reciprocal Rank
16
+ * [Mean Reciprocal Rank](http://en.wikipedia.org/wiki/Mean_reciprocal_rank)
15
17
  * [Mean Average Precision (MAP)](http://en.wikipedia.org/wiki/Information_retrieval#Mean_average_precision)
16
18
  * [F-measure](http://en.wikipedia.org/wiki/Information_retrieval#F-measure)
17
19
  * [R-Precision](http://en.wikipedia.org/wiki/Information_retrieval#R-Precision)
@@ -44,6 +46,7 @@ For example, we have the aforementioned information need and query, and a list o
44
46
  puts "F-3: #{e.f_measure(3)}"
45
47
  puts "Precision at rank 10: #{e.precision_at_rank(10)}"
46
48
  puts "Average Precision: #{e.average_precision}"
49
+ puts "NDCG @ 5: #{e.ndcg_at(5)}"
47
50
 
48
51
  When you have several information needs and want to compute aggregate statistics, use an EvaliratorCollection like this:
49
52
 
@@ -51,7 +54,9 @@ When you have several information needs and want to compute aggregate statistics
51
54
  queries.each do |query|
52
55
  relevant = get_relevant_docids(query)
53
56
  results = get_results(query)
54
- e << Evalir.Evalirator.new(relevant, results)
57
+ e.add(relevant, results)
55
58
  end
59
+
56
60
  puts "MAP: #{e.mean_average_precision}"
57
- puts "Precision-Recall Curve: #{e.precision_recall_curve}"
61
+ puts "Precision-Recall Curve: #{e.precision_recall_curve}"
62
+ puts "Avg. NDCG @ 3: #{e.average_ndcg_at(3)}"
@@ -149,6 +149,17 @@ module Evalir
149
149
  avg
150
150
  end
151
151
 
152
+ # The reciprocal rank, meaning
153
+ # 1 divided by the rank of the
154
+ # most highly ranked relevant
155
+ # result.
156
+ def reciprocal_rank
157
+ @search_hits.each_with_index do |h,i|
158
+ return 1.0 / (i + 1) if @relevant_docids.include? h
159
+ end
160
+ return 0.0
161
+ end
162
+
152
163
  # Discounted Cumulative Gain at
153
164
  # rank k. For a relevant search
154
165
  # result at position x, its con-
@@ -167,5 +178,26 @@ module Evalir
167
178
  end
168
179
  dcg
169
180
  end
181
+
182
+ # Normalized Discounted Cumulative
183
+ # Gain at rank <em>k</em>. This is
184
+ # the #dcg_at normalized by the optimal
185
+ # dcg value at rank k.
186
+ def ndcg_at(k, logbase=2)
187
+ dcg = dcg_at(k, logbase)
188
+ dcg > 0 ? dcg / ideal_dcg_at(k, logbase) : 0
189
+ end
190
+
191
+ private
192
+ def ideal_dcg_at(k, logbase=2)
193
+ idcg = 0.0
194
+ n = @true_positives
195
+ (1..k).each do |i|
196
+ break unless n > 0
197
+ idcg += i == 1 ? 1.0 : 1.0 / Math.log(i, logbase)
198
+ n -= 1
199
+ end
200
+ idcg
201
+ end
170
202
  end
171
203
  end
@@ -43,11 +43,11 @@ module Evalir
43
43
  # a fancy way of saying 'average average
44
44
  # precision'!
45
45
  def mean_average_precision
46
- avg = 0.0
47
- @evalirators.each do |e|
48
- avg += (e.average_precision / @evalirators.size)
49
- end
50
- avg
46
+ @evalirators.reduce(0.0) {|avg,e| avg + (e.average_precision / @evalirators.size)}
47
+ end
48
+
49
+ def mean_reciprocal_rank
50
+ self.reduce(0.0) { |avg,e| avg + (e.reciprocal_rank / self.size)}
51
51
  end
52
52
 
53
53
  # Gets the data for the precision-recall
@@ -55,17 +55,25 @@ module Evalir
55
55
  # <em>to</em>], with a step size of <em>step</em>.
56
56
  # This is the average over all evalirators.
57
57
  def precision_recall_curve(from = 0, to = 100, step = 10)
58
+ raise "From must be in the interval [0, 100)" unless (from >= 0 and from < 100)
59
+ raise "To must be in the interval (from, 100]" unless (to > from and to <= 100)
60
+ raise "Invalid step size - (to-from) must be divisible by step." unless ((to - from) % step) == 0
58
61
  return nil if @evalirators.empty?
59
-
60
- #n = self.size.to_f
61
- x = 1
62
+
63
+ steps = ((to - from) / step) + 1
62
64
  curves = self.lazy_map { |e| e.precision_recall_curve(from, to, step) }
63
- return curves.reduce do |acc, data|
64
- x += 1
65
+ curves.reduce([0] * steps) do |acc, data|
65
66
  data.each_with_index.map do |d,i|
66
- acc[i] = (acc[i] + d) / x
67
+ acc[i] += d / self.size
67
68
  end
68
69
  end
69
70
  end
71
+
72
+ # Gets the average Normalized Discounted
73
+ # Cumulative Gain over all queries.
74
+ def average_ndcg_at(k, logbase = 2)
75
+ values = self.lazy_map {|e| e.ndcg_at(k, logbase)}
76
+ values.reduce(0.0) { |acc, v| acc + (v / self.size) }
77
+ end
70
78
  end
71
79
  end
@@ -1,3 +1,3 @@
1
1
  module Evalir
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -16,9 +16,18 @@ class EvaliratorCollectionTest < Test::Unit::TestCase
16
16
  assert_equal(2, @e.count)
17
17
  end
18
18
 
19
+ def test_mean_reciprocal_rank
20
+ expected = (1.0 + 0.5) / 2
21
+ assert_equal(expected, @e.mean_reciprocal_rank)
22
+ end
23
+
19
24
  def test_precision_recall_curve
20
25
  expected = [1.0, 0.5, 0.5, 0.5, 0.375, 0.4, 0.417, 0.429, 0.375, 0.389, 0.4]
21
26
  actual = @e.precision_recall_curve.collect { |f| f.round(3) }
22
27
  assert_equal(expected, actual)
23
28
  end
29
+
30
+ def test_average_ndcg_at_3
31
+ assert_equal(0.5, @e.average_ndcg_at(3).round(2))
32
+ end
24
33
  end
@@ -36,6 +36,10 @@ class EvaliratorRankedTest < Test::Unit::TestCase
36
36
  assert_equal(0.4, @e.r_precision)
37
37
  end
38
38
 
39
+ def test_reciprocal_rank
40
+ assert_equal(1.0, @e.reciprocal_rank)
41
+ end
42
+
39
43
  def test_average_precision
40
44
  e1 = Evalir::Evalirator.new([1,3,4,5,6,10], [1,2,3,4,5,6,7,8,9,10])
41
45
  assert_equal(0.78, e1.average_precision.round(2))
@@ -48,5 +52,21 @@ class EvaliratorRankedTest < Test::Unit::TestCase
48
52
  expected = 1.0 + (1.0/Math.log(3,2))
49
53
  assert_equal(expected, @e.dcg_at(5))
50
54
  end
55
+
56
+ def test_ndcg_at_3
57
+ dcg = 1.0 + (1.0/Math.log(3,2))
58
+ idcg = 2.0 + (1.0/Math.log(3,2))
59
+ assert_equal(dcg/idcg, @e.ndcg_at(3))
60
+ end
61
+
62
+ def test_dcg_when_no_relevant
63
+ e = Evalir::Evalirator.new([1,2,3],[4,5,6])
64
+ assert_equal(0.0, e.dcg_at(3))
65
+ end
66
+
67
+ def test_ndcg_when_no_relevant
68
+ e = Evalir::Evalirator.new([1,2,3], [4,5,6])
69
+ assert_equal(0.0, e.ndcg_at(3))
70
+ end
51
71
  end
52
72
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evalir
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2011-09-30 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &70244820710980 !ruby/object:Gem::Requirement
16
+ requirement: &70127782700140 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70244820710980
24
+ version_requirements: *70127782700140
25
25
  description: Evalir is used to measure search relevance at Companybook, and offers
26
26
  a number of standard measurements, from the basic precision and recall to single
27
27
  value summaries such as NDCG and MAP.
@@ -58,7 +58,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
58
58
  version: '0'
59
59
  segments:
60
60
  - 0
61
- hash: 1697956995838933814
61
+ hash: 3533848118518243422
62
62
  required_rubygems_version: !ruby/object:Gem::Requirement
63
63
  none: false
64
64
  requirements:
@@ -67,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
67
67
  version: '0'
68
68
  segments:
69
69
  - 0
70
- hash: 1697956995838933814
70
+ hash: 3533848118518243422
71
71
  requirements: []
72
72
  rubyforge_project:
73
73
  rubygems_version: 1.8.10