evalir 0.0.1 → 0.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile.lock +1 -1
- data/README.md +7 -2
- data/lib/evalir/evalirator.rb +32 -0
- data/lib/evalir/evalirator_collection.rb +19 -11
- data/lib/evalir/version.rb +1 -1
- data/test/test_evalirator_collection.rb +9 -0
- data/test/test_evalirator_ranked.rb +20 -0
- metadata +5 -5
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -12,6 +12,8 @@ What can Evalir do?
|
|
12
12
|
* Precision at rank k
|
13
13
|
* Average Precision
|
14
14
|
* Precision-Recall curve
|
15
|
+
* Reciprocal Rank
|
16
|
+
* [Mean Reciprocal Rank](http://en.wikipedia.org/wiki/Mean_reciprocal_rank)
|
15
17
|
* [Mean Average Precision (MAP)](http://en.wikipedia.org/wiki/Information_retrieval#Mean_average_precision)
|
16
18
|
* [F-measure](http://en.wikipedia.org/wiki/Information_retrieval#F-measure)
|
17
19
|
* [R-Precision](http://en.wikipedia.org/wiki/Information_retrieval#R-Precision)
|
@@ -44,6 +46,7 @@ For example, we have the aforementioned information need and query, and a list o
|
|
44
46
|
puts "F-3: #{e.f_measure(3)}"
|
45
47
|
puts "Precision at rank 10: #{e.precision_at_rank(10)}"
|
46
48
|
puts "Average Precision: #{e.average_precision}"
|
49
|
+
puts "NDCG @ 5: #{e.ndcg_at(5)}"
|
47
50
|
|
48
51
|
When you have several information needs and want to compute aggregate statistics, use an EvaliratorCollection like this:
|
49
52
|
|
@@ -51,7 +54,9 @@ When you have several information needs and want to compute aggregate statistics
|
|
51
54
|
queries.each do |query|
|
52
55
|
relevant = get_relevant_docids(query)
|
53
56
|
results = get_results(query)
|
54
|
-
e
|
57
|
+
e.add(relevant, results)
|
55
58
|
end
|
59
|
+
|
56
60
|
puts "MAP: #{e.mean_average_precision}"
|
57
|
-
puts "Precision-Recall Curve: #{e.precision_recall_curve}"
|
61
|
+
puts "Precision-Recall Curve: #{e.precision_recall_curve}"
|
62
|
+
puts "Avg. NDCG @ 3: #{e.average_ndcg_at(3)}"
|
data/lib/evalir/evalirator.rb
CHANGED
@@ -149,6 +149,17 @@ module Evalir
|
|
149
149
|
avg
|
150
150
|
end
|
151
151
|
|
152
|
+
# The reciprocal rank, meaning
|
153
|
+
# 1 divided by the rank of the
|
154
|
+
# most highly ranked relevant
|
155
|
+
# result.
|
156
|
+
def reciprocal_rank
|
157
|
+
@search_hits.each_with_index do |h,i|
|
158
|
+
return 1.0 / (i + 1) if @relevant_docids.include? h
|
159
|
+
end
|
160
|
+
return 0.0
|
161
|
+
end
|
162
|
+
|
152
163
|
# Discounted Cumulative Gain at
|
153
164
|
# rank k. For a relevant search
|
154
165
|
# result at position x, its con-
|
@@ -167,5 +178,26 @@ module Evalir
|
|
167
178
|
end
|
168
179
|
dcg
|
169
180
|
end
|
181
|
+
|
182
|
+
# Normalized Discounted Cumulative
|
183
|
+
# Gain at rank <em>k</em>. This is
|
184
|
+
# the #dcg_at normalized by the optimal
|
185
|
+
# dcg value at rank k.
|
186
|
+
def ndcg_at(k, logbase=2)
|
187
|
+
dcg = dcg_at(k, logbase)
|
188
|
+
dcg > 0 ? dcg / ideal_dcg_at(k, logbase) : 0
|
189
|
+
end
|
190
|
+
|
191
|
+
private
|
192
|
+
def ideal_dcg_at(k, logbase=2)
|
193
|
+
idcg = 0.0
|
194
|
+
n = @true_positives
|
195
|
+
(1..k).each do |i|
|
196
|
+
break unless n > 0
|
197
|
+
idcg += i == 1 ? 1.0 : 1.0 / Math.log(i, logbase)
|
198
|
+
n -= 1
|
199
|
+
end
|
200
|
+
idcg
|
201
|
+
end
|
170
202
|
end
|
171
203
|
end
|
@@ -43,11 +43,11 @@ module Evalir
|
|
43
43
|
# a fancy way of saying 'average average
|
44
44
|
# precision'!
|
45
45
|
def mean_average_precision
|
46
|
-
avg
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
avg
|
46
|
+
@evalirators.reduce(0.0) {|avg,e| avg + (e.average_precision / @evalirators.size)}
|
47
|
+
end
|
48
|
+
|
49
|
+
def mean_reciprocal_rank
|
50
|
+
self.reduce(0.0) { |avg,e| avg + (e.reciprocal_rank / self.size)}
|
51
51
|
end
|
52
52
|
|
53
53
|
# Gets the data for the precision-recall
|
@@ -55,17 +55,25 @@ module Evalir
|
|
55
55
|
# <em>to</em>], with a step size of <em>step</em>.
|
56
56
|
# This is the average over all evalirators.
|
57
57
|
def precision_recall_curve(from = 0, to = 100, step = 10)
|
58
|
+
raise "From must be in the interval [0, 100)" unless (from >= 0 and from < 100)
|
59
|
+
raise "To must be in the interval (from, 100]" unless (to > from and to <= 100)
|
60
|
+
raise "Invalid step size - (to-from) must be divisible by step." unless ((to - from) % step) == 0
|
58
61
|
return nil if @evalirators.empty?
|
59
|
-
|
60
|
-
|
61
|
-
x = 1
|
62
|
+
|
63
|
+
steps = ((to - from) / step) + 1
|
62
64
|
curves = self.lazy_map { |e| e.precision_recall_curve(from, to, step) }
|
63
|
-
|
64
|
-
x += 1
|
65
|
+
curves.reduce([0] * steps) do |acc, data|
|
65
66
|
data.each_with_index.map do |d,i|
|
66
|
-
acc[i]
|
67
|
+
acc[i] += d / self.size
|
67
68
|
end
|
68
69
|
end
|
69
70
|
end
|
71
|
+
|
72
|
+
# Gets the average Normalized Discounted
|
73
|
+
# Cumulative Gain over all queries.
|
74
|
+
def average_ndcg_at(k, logbase = 2)
|
75
|
+
values = self.lazy_map {|e| e.ndcg_at(k, logbase)}
|
76
|
+
values.reduce(0.0) { |acc, v| acc + (v / self.size) }
|
77
|
+
end
|
70
78
|
end
|
71
79
|
end
|
data/lib/evalir/version.rb
CHANGED
@@ -16,9 +16,18 @@ class EvaliratorCollectionTest < Test::Unit::TestCase
|
|
16
16
|
assert_equal(2, @e.count)
|
17
17
|
end
|
18
18
|
|
19
|
+
def test_mean_reciprocal_rank
|
20
|
+
expected = (1.0 + 0.5) / 2
|
21
|
+
assert_equal(expected, @e.mean_reciprocal_rank)
|
22
|
+
end
|
23
|
+
|
19
24
|
def test_precision_recall_curve
|
20
25
|
expected = [1.0, 0.5, 0.5, 0.5, 0.375, 0.4, 0.417, 0.429, 0.375, 0.389, 0.4]
|
21
26
|
actual = @e.precision_recall_curve.collect { |f| f.round(3) }
|
22
27
|
assert_equal(expected, actual)
|
23
28
|
end
|
29
|
+
|
30
|
+
def test_average_ndcg_at_3
|
31
|
+
assert_equal(0.5, @e.average_ndcg_at(3).round(2))
|
32
|
+
end
|
24
33
|
end
|
@@ -36,6 +36,10 @@ class EvaliratorRankedTest < Test::Unit::TestCase
|
|
36
36
|
assert_equal(0.4, @e.r_precision)
|
37
37
|
end
|
38
38
|
|
39
|
+
def test_reciprocal_rank
|
40
|
+
assert_equal(1.0, @e.reciprocal_rank)
|
41
|
+
end
|
42
|
+
|
39
43
|
def test_average_precision
|
40
44
|
e1 = Evalir::Evalirator.new([1,3,4,5,6,10], [1,2,3,4,5,6,7,8,9,10])
|
41
45
|
assert_equal(0.78, e1.average_precision.round(2))
|
@@ -48,5 +52,21 @@ class EvaliratorRankedTest < Test::Unit::TestCase
|
|
48
52
|
expected = 1.0 + (1.0/Math.log(3,2))
|
49
53
|
assert_equal(expected, @e.dcg_at(5))
|
50
54
|
end
|
55
|
+
|
56
|
+
def test_ndcg_at_3
|
57
|
+
dcg = 1.0 + (1.0/Math.log(3,2))
|
58
|
+
idcg = 2.0 + (1.0/Math.log(3,2))
|
59
|
+
assert_equal(dcg/idcg, @e.ndcg_at(3))
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_dcg_when_no_relevant
|
63
|
+
e = Evalir::Evalirator.new([1,2,3],[4,5,6])
|
64
|
+
assert_equal(0.0, e.dcg_at(3))
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_ndcg_when_no_relevant
|
68
|
+
e = Evalir::Evalirator.new([1,2,3], [4,5,6])
|
69
|
+
assert_equal(0.0, e.ndcg_at(3))
|
70
|
+
end
|
51
71
|
end
|
52
72
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: evalir
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-09-30 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &70127782700140 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70127782700140
|
25
25
|
description: Evalir is used to measure search relevance at Companybook, and offers
|
26
26
|
a number of standard measurements, from the basic precision and recall to single
|
27
27
|
value summaries such as NDCG and MAP.
|
@@ -58,7 +58,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
58
58
|
version: '0'
|
59
59
|
segments:
|
60
60
|
- 0
|
61
|
-
hash:
|
61
|
+
hash: 3533848118518243422
|
62
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
63
|
none: false
|
64
64
|
requirements:
|
@@ -67,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
67
67
|
version: '0'
|
68
68
|
segments:
|
69
69
|
- 0
|
70
|
-
hash:
|
70
|
+
hash: 3533848118518243422
|
71
71
|
requirements: []
|
72
72
|
rubyforge_project:
|
73
73
|
rubygems_version: 1.8.10
|