evalir 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile.lock +1 -1
- data/README.md +7 -2
- data/lib/evalir/evalirator.rb +32 -0
- data/lib/evalir/evalirator_collection.rb +19 -11
- data/lib/evalir/version.rb +1 -1
- data/test/test_evalirator_collection.rb +9 -0
- data/test/test_evalirator_ranked.rb +20 -0
- metadata +5 -5
data/Gemfile.lock
CHANGED
data/README.md
CHANGED
@@ -12,6 +12,8 @@ What can Evalir do?
|
|
12
12
|
* Precision at rank k
|
13
13
|
* Average Precision
|
14
14
|
* Precision-Recall curve
|
15
|
+
* Reciprocal Rank
|
16
|
+
* [Mean Reciprocal Rank](http://en.wikipedia.org/wiki/Mean_reciprocal_rank)
|
15
17
|
* [Mean Average Precision (MAP)](http://en.wikipedia.org/wiki/Information_retrieval#Mean_average_precision)
|
16
18
|
* [F-measure](http://en.wikipedia.org/wiki/Information_retrieval#F-measure)
|
17
19
|
* [R-Precision](http://en.wikipedia.org/wiki/Information_retrieval#R-Precision)
|
@@ -44,6 +46,7 @@ For example, we have the aforementioned information need and query, and a list o
|
|
44
46
|
puts "F-3: #{e.f_measure(3)}"
|
45
47
|
puts "Precision at rank 10: #{e.precision_at_rank(10)}"
|
46
48
|
puts "Average Precision: #{e.average_precision}"
|
49
|
+
puts "NDCG @ 5: #{e.ndcg_at(5)}"
|
47
50
|
|
48
51
|
When you have several information needs and want to compute aggregate statistics, use an EvaliratorCollection like this:
|
49
52
|
|
@@ -51,7 +54,9 @@ When you have several information needs and want to compute aggregate statistics
|
|
51
54
|
queries.each do |query|
|
52
55
|
relevant = get_relevant_docids(query)
|
53
56
|
results = get_results(query)
|
54
|
-
e
|
57
|
+
e.add(relevant, results)
|
55
58
|
end
|
59
|
+
|
56
60
|
puts "MAP: #{e.mean_average_precision}"
|
57
|
-
puts "Precision-Recall Curve: #{e.precision_recall_curve}"
|
61
|
+
puts "Precision-Recall Curve: #{e.precision_recall_curve}"
|
62
|
+
puts "Avg. NDCG @ 3: #{e.average_ndcg_at(3)}"
|
data/lib/evalir/evalirator.rb
CHANGED
@@ -149,6 +149,17 @@ module Evalir
|
|
149
149
|
avg
|
150
150
|
end
|
151
151
|
|
152
|
+
# The reciprocal rank, meaning
|
153
|
+
# 1 divided by the rank of the
|
154
|
+
# most highly ranked relevant
|
155
|
+
# result.
|
156
|
+
def reciprocal_rank
|
157
|
+
@search_hits.each_with_index do |h,i|
|
158
|
+
return 1.0 / (i + 1) if @relevant_docids.include? h
|
159
|
+
end
|
160
|
+
return 0.0
|
161
|
+
end
|
162
|
+
|
152
163
|
# Discounted Cumulative Gain at
|
153
164
|
# rank k. For a relevant search
|
154
165
|
# result at position x, its con-
|
@@ -167,5 +178,26 @@ module Evalir
|
|
167
178
|
end
|
168
179
|
dcg
|
169
180
|
end
|
181
|
+
|
182
|
+
# Normalized Discounted Cumulative
|
183
|
+
# Gain at rank <em>k</em>. This is
|
184
|
+
# the #dcg_at normalized by the optimal
|
185
|
+
# dcg value at rank k.
|
186
|
+
def ndcg_at(k, logbase=2)
|
187
|
+
dcg = dcg_at(k, logbase)
|
188
|
+
dcg > 0 ? dcg / ideal_dcg_at(k, logbase) : 0
|
189
|
+
end
|
190
|
+
|
191
|
+
private
|
192
|
+
def ideal_dcg_at(k, logbase=2)
|
193
|
+
idcg = 0.0
|
194
|
+
n = @true_positives
|
195
|
+
(1..k).each do |i|
|
196
|
+
break unless n > 0
|
197
|
+
idcg += i == 1 ? 1.0 : 1.0 / Math.log(i, logbase)
|
198
|
+
n -= 1
|
199
|
+
end
|
200
|
+
idcg
|
201
|
+
end
|
170
202
|
end
|
171
203
|
end
|
@@ -43,11 +43,11 @@ module Evalir
|
|
43
43
|
# a fancy way of saying 'average average
|
44
44
|
# precision'!
|
45
45
|
def mean_average_precision
|
46
|
-
avg
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
avg
|
46
|
+
@evalirators.reduce(0.0) {|avg,e| avg + (e.average_precision / @evalirators.size)}
|
47
|
+
end
|
48
|
+
|
49
|
+
def mean_reciprocal_rank
|
50
|
+
self.reduce(0.0) { |avg,e| avg + (e.reciprocal_rank / self.size)}
|
51
51
|
end
|
52
52
|
|
53
53
|
# Gets the data for the precision-recall
|
@@ -55,17 +55,25 @@ module Evalir
|
|
55
55
|
# <em>to</em>], with a step size of <em>step</em>.
|
56
56
|
# This is the average over all evalirators.
|
57
57
|
def precision_recall_curve(from = 0, to = 100, step = 10)
|
58
|
+
raise "From must be in the interval [0, 100)" unless (from >= 0 and from < 100)
|
59
|
+
raise "To must be in the interval (from, 100]" unless (to > from and to <= 100)
|
60
|
+
raise "Invalid step size - (to-from) must be divisible by step." unless ((to - from) % step) == 0
|
58
61
|
return nil if @evalirators.empty?
|
59
|
-
|
60
|
-
|
61
|
-
x = 1
|
62
|
+
|
63
|
+
steps = ((to - from) / step) + 1
|
62
64
|
curves = self.lazy_map { |e| e.precision_recall_curve(from, to, step) }
|
63
|
-
|
64
|
-
x += 1
|
65
|
+
curves.reduce([0] * steps) do |acc, data|
|
65
66
|
data.each_with_index.map do |d,i|
|
66
|
-
acc[i]
|
67
|
+
acc[i] += d / self.size
|
67
68
|
end
|
68
69
|
end
|
69
70
|
end
|
71
|
+
|
72
|
+
# Gets the average Normalized Discounted
|
73
|
+
# Cumulative Gain over all queries.
|
74
|
+
def average_ndcg_at(k, logbase = 2)
|
75
|
+
values = self.lazy_map {|e| e.ndcg_at(k, logbase)}
|
76
|
+
values.reduce(0.0) { |acc, v| acc + (v / self.size) }
|
77
|
+
end
|
70
78
|
end
|
71
79
|
end
|
data/lib/evalir/version.rb
CHANGED
@@ -16,9 +16,18 @@ class EvaliratorCollectionTest < Test::Unit::TestCase
|
|
16
16
|
assert_equal(2, @e.count)
|
17
17
|
end
|
18
18
|
|
19
|
+
def test_mean_reciprocal_rank
|
20
|
+
expected = (1.0 + 0.5) / 2
|
21
|
+
assert_equal(expected, @e.mean_reciprocal_rank)
|
22
|
+
end
|
23
|
+
|
19
24
|
def test_precision_recall_curve
|
20
25
|
expected = [1.0, 0.5, 0.5, 0.5, 0.375, 0.4, 0.417, 0.429, 0.375, 0.389, 0.4]
|
21
26
|
actual = @e.precision_recall_curve.collect { |f| f.round(3) }
|
22
27
|
assert_equal(expected, actual)
|
23
28
|
end
|
29
|
+
|
30
|
+
def test_average_ndcg_at_3
|
31
|
+
assert_equal(0.5, @e.average_ndcg_at(3).round(2))
|
32
|
+
end
|
24
33
|
end
|
@@ -36,6 +36,10 @@ class EvaliratorRankedTest < Test::Unit::TestCase
|
|
36
36
|
assert_equal(0.4, @e.r_precision)
|
37
37
|
end
|
38
38
|
|
39
|
+
def test_reciprocal_rank
|
40
|
+
assert_equal(1.0, @e.reciprocal_rank)
|
41
|
+
end
|
42
|
+
|
39
43
|
def test_average_precision
|
40
44
|
e1 = Evalir::Evalirator.new([1,3,4,5,6,10], [1,2,3,4,5,6,7,8,9,10])
|
41
45
|
assert_equal(0.78, e1.average_precision.round(2))
|
@@ -48,5 +52,21 @@ class EvaliratorRankedTest < Test::Unit::TestCase
|
|
48
52
|
expected = 1.0 + (1.0/Math.log(3,2))
|
49
53
|
assert_equal(expected, @e.dcg_at(5))
|
50
54
|
end
|
55
|
+
|
56
|
+
def test_ndcg_at_3
|
57
|
+
dcg = 1.0 + (1.0/Math.log(3,2))
|
58
|
+
idcg = 2.0 + (1.0/Math.log(3,2))
|
59
|
+
assert_equal(dcg/idcg, @e.ndcg_at(3))
|
60
|
+
end
|
61
|
+
|
62
|
+
def test_dcg_when_no_relevant
|
63
|
+
e = Evalir::Evalirator.new([1,2,3],[4,5,6])
|
64
|
+
assert_equal(0.0, e.dcg_at(3))
|
65
|
+
end
|
66
|
+
|
67
|
+
def test_ndcg_when_no_relevant
|
68
|
+
e = Evalir::Evalirator.new([1,2,3], [4,5,6])
|
69
|
+
assert_equal(0.0, e.ndcg_at(3))
|
70
|
+
end
|
51
71
|
end
|
52
72
|
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: evalir
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.2
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -13,7 +13,7 @@ date: 2011-09-30 00:00:00.000000000Z
|
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: rake
|
16
|
-
requirement: &
|
16
|
+
requirement: &70127782700140 !ruby/object:Gem::Requirement
|
17
17
|
none: false
|
18
18
|
requirements:
|
19
19
|
- - ! '>='
|
@@ -21,7 +21,7 @@ dependencies:
|
|
21
21
|
version: '0'
|
22
22
|
type: :development
|
23
23
|
prerelease: false
|
24
|
-
version_requirements: *
|
24
|
+
version_requirements: *70127782700140
|
25
25
|
description: Evalir is used to measure search relevance at Companybook, and offers
|
26
26
|
a number of standard measurements, from the basic precision and recall to single
|
27
27
|
value summaries such as NDCG and MAP.
|
@@ -58,7 +58,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
58
58
|
version: '0'
|
59
59
|
segments:
|
60
60
|
- 0
|
61
|
-
hash:
|
61
|
+
hash: 3533848118518243422
|
62
62
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
63
63
|
none: false
|
64
64
|
requirements:
|
@@ -67,7 +67,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
67
67
|
version: '0'
|
68
68
|
segments:
|
69
69
|
- 0
|
70
|
-
hash:
|
70
|
+
hash: 3533848118518243422
|
71
71
|
requirements: []
|
72
72
|
rubyforge_project:
|
73
73
|
rubygems_version: 1.8.10
|