evalir 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- evalir (0.0.1)
4
+ evalir (0.0.2)
5
5
 
6
6
  GEM
7
7
  remote: http://rubygems.org/
data/README.md CHANGED
@@ -12,6 +12,8 @@ What can Evalir do?
12
12
  * Precision at rank k
13
13
  * Average Precision
14
14
  * Precision-Recall curve
15
+ * Reciprocal Rank
16
+ * [Mean Reciprocal Rank](http://en.wikipedia.org/wiki/Mean_reciprocal_rank)
15
17
  * [Mean Average Precision (MAP)](http://en.wikipedia.org/wiki/Information_retrieval#Mean_average_precision)
16
18
  * [F-measure](http://en.wikipedia.org/wiki/Information_retrieval#F-measure)
17
19
  * [R-Precision](http://en.wikipedia.org/wiki/Information_retrieval#R-Precision)
@@ -44,6 +46,7 @@ For example, we have the aforementioned information need and query, and a list o
44
46
  puts "F-3: #{e.f_measure(3)}"
45
47
  puts "Precision at rank 10: #{e.precision_at_rank(10)}"
46
48
  puts "Average Precision: #{e.average_precision}"
49
+ puts "NDCG @ 5: #{e.ndcg_at(5)}"
47
50
 
48
51
  When you have several information needs and want to compute aggregate statistics, use an EvaliratorCollection like this:
49
52
 
@@ -51,7 +54,9 @@ When you have several information needs and want to compute aggregate statistics
51
54
  queries.each do |query|
52
55
  relevant = get_relevant_docids(query)
53
56
  results = get_results(query)
54
- e << Evalir.Evalirator.new(relevant, results)
57
+ e.add(relevant, results)
55
58
  end
59
+
56
60
  puts "MAP: #{e.mean_average_precision}"
57
- puts "Precision-Recall Curve: #{e.precision_recall_curve}"
61
+ puts "Precision-Recall Curve: #{e.precision_recall_curve}"
62
+ puts "Avg. NDCG @ 3: #{e.average_ndcg_at(3)}"
@@ -149,6 +149,17 @@ module Evalir
149
149
  avg
150
150
  end
151
151
 
152
+ # The reciprocal rank, meaning
153
+ # 1 divided by the rank of the
154
+ # most highly ranked relevant
155
+ # result.
156
+ def reciprocal_rank
157
+ @search_hits.each_with_index do |h,i|
158
+ return 1.0 / (i + 1) if @relevant_docids.include? h
159
+ end
160
+ return 0.0
161
+ end
162
+
152
163
  # Discounted Cumulative Gain at
153
164
  # rank k. For a relevant search
154
165
  # result at position x, its con-
@@ -167,5 +178,26 @@ module Evalir
167
178
  end
168
179
  dcg
169
180
  end
181
+
182
+ # Normalized Discounted Cumulative
183
+ # Gain at rank <em>k</em>. This is
184
+ # the #dcg_at normalized by the optimal
185
+ # dcg value at rank k.
186
+ def ndcg_at(k, logbase=2)
187
+ dcg = dcg_at(k, logbase)
188
+ dcg > 0 ? dcg / ideal_dcg_at(k, logbase) : 0
189
+ end
190
+
191
+ private
192
+ def ideal_dcg_at(k, logbase=2)
193
+ idcg = 0.0
194
+ n = @true_positives
195
+ (1..k).each do |i|
196
+ break unless n > 0
197
+ idcg += i == 1 ? 1.0 : 1.0 / Math.log(i, logbase)
198
+ n -= 1
199
+ end
200
+ idcg
201
+ end
170
202
  end
171
203
  end
@@ -43,11 +43,11 @@ module Evalir
43
43
  # a fancy way of saying 'average average
44
44
  # precision'!
45
45
  def mean_average_precision
46
- avg = 0.0
47
- @evalirators.each do |e|
48
- avg += (e.average_precision / @evalirators.size)
49
- end
50
- avg
46
+ @evalirators.reduce(0.0) {|avg,e| avg + (e.average_precision / @evalirators.size)}
47
+ end
48
+
49
+ def mean_reciprocal_rank
50
+ self.reduce(0.0) { |avg,e| avg + (e.reciprocal_rank / self.size)}
51
51
  end
52
52
 
53
53
  # Gets the data for the precision-recall
@@ -55,17 +55,25 @@ module Evalir
55
55
  # <em>to</em>], with a step size of <em>step</em>.
56
56
  # This is the average over all evalirators.
57
57
  def precision_recall_curve(from = 0, to = 100, step = 10)
58
+ raise "From must be in the interval [0, 100)" unless (from >= 0 and from < 100)
59
+ raise "To must be in the interval (from, 100]" unless (to > from and to <= 100)
60
+ raise "Invalid step size - (to-from) must be divisible by step." unless ((to - from) % step) == 0
58
61
  return nil if @evalirators.empty?
59
-
60
- #n = self.size.to_f
61
- x = 1
62
+
63
+ steps = ((to - from) / step) + 1
62
64
  curves = self.lazy_map { |e| e.precision_recall_curve(from, to, step) }
63
- return curves.reduce do |acc, data|
64
- x += 1
65
+ curves.reduce([0] * steps) do |acc, data|
65
66
  data.each_with_index.map do |d,i|
66
- acc[i] = (acc[i] + d) / x
67
+ acc[i] += d / self.size
67
68
  end
68
69
  end
69
70
  end
71
+
72
+ # Gets the average Normalized Discounted
73
+ # Cumulative Gain over all queries.
74
+ def average_ndcg_at(k, logbase = 2)
75
+ values = self.lazy_map {|e| e.ndcg_at(k, logbase)}
76
+ values.reduce(0.0) { |acc, v| acc + (v / self.size) }
77
+ end
70
78
  end
71
79
  end
@@ -1,3 +1,3 @@
1
1
  module Evalir
2
- VERSION = "0.0.1"
2
+ VERSION = "0.0.2"
3
3
  end
@@ -16,9 +16,18 @@ class EvaliratorCollectionTest < Test::Unit::TestCase
16
16
  assert_equal(2, @e.count)
17
17
  end
18
18
 
19
+ def test_mean_reciprocal_rank
20
+ expected = (1.0 + 0.5) / 2
21
+ assert_equal(expected, @e.mean_reciprocal_rank)
22
+ end
23
+
19
24
  def test_precision_recall_curve
20
25
  expected = [1.0, 0.5, 0.5, 0.5, 0.375, 0.4, 0.417, 0.429, 0.375, 0.389, 0.4]
21
26
  actual = @e.precision_recall_curve.collect { |f| f.round(3) }
22
27
  assert_equal(expected, actual)
23
28
  end
29
+
30
+ def test_average_ndcg_at_3
31
+ assert_equal(0.5, @e.average_ndcg_at(3).round(2))
32
+ end
24
33
  end
@@ -36,6 +36,10 @@ class EvaliratorRankedTest < Test::Unit::TestCase
36
36
  assert_equal(0.4, @e.r_precision)
37
37
  end
38
38
 
39
+ def test_reciprocal_rank
40
+ assert_equal(1.0, @e.reciprocal_rank)
41
+ end
42
+
39
43
  def test_average_precision
40
44
  e1 = Evalir::Evalirator.new([1,3,4,5,6,10], [1,2,3,4,5,6,7,8,9,10])
41
45
  assert_equal(0.78, e1.average_precision.round(2))
@@ -48,5 +52,21 @@ class EvaliratorRankedTest < Test::Unit::TestCase
48
52
  expected = 1.0 + (1.0/Math.log(3,2))
49
53
  assert_equal(expected, @e.dcg_at(5))
50
54
  end
55
+
56
+ def test_ndcg_at_3
57
+ dcg = 1.0 + (1.0/Math.log(3,2))
58
+ idcg = 2.0 + (1.0/Math.log(3,2))
59
+ assert_equal(dcg/idcg, @e.ndcg_at(3))
60
+ end
61
+
62
+ def test_dcg_when_no_relevant
63
+ e = Evalir::Evalirator.new([1,2,3],[4,5,6])
64
+ assert_equal(0.0, e.dcg_at(3))
65
+ end
66
+
67
+ def test_ndcg_when_no_relevant
68
+ e = Evalir::Evalirator.new([1,2,3], [4,5,6])
69
+ assert_equal(0.0, e.ndcg_at(3))
70
+ end
51
71
  end
52
72
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: evalir
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.1
4
+ version: 0.0.2
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ date: 2011-09-30 00:00:00.000000000Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: rake
16
- requirement: &70244820710980 !ruby/object:Gem::Requirement
16
+ requirement: &70127782700140 !ruby/object:Gem::Requirement
17
17
  none: false
18
18
  requirements:
19
19
  - - ! '>='
@@ -21,7 +21,7 @@ dependencies:
21
21
  version: '0'
22
22
  type: :development
23
23
  prerelease: false
24
- version_requirements: *70244820710980
24
+ version_requirements: *70127782700140
25
25
  description: Evalir is used to measure search relevance at Companybook, and offers
26
26
  a number of standard measurements, from the basic precision and recall to single
27
27
  value summaries such as NDCG and MAP.
@@ -58,7 +58,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
58
58
  version: '0'
59
59
  segments:
60
60
  - 0
61
- hash: 1697956995838933814
61
+ hash: 3533848118518243422
62
62
  required_rubygems_version: !ruby/object:Gem::Requirement
63
63
  none: false
64
64
  requirements:
@@ -67,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
67
67
  version: '0'
68
68
  segments:
69
69
  - 0
70
- hash: 1697956995838933814
70
+ hash: 3533848118518243422
71
71
  requirements: []
72
72
  rubyforge_project:
73
73
  rubygems_version: 1.8.10