retreval 0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +7 -0
- data/README.md +321 -0
- data/TODO +5 -0
- data/bin/retreval +5 -0
- data/example/gold_standard.yml +48 -0
- data/example/query_results.yml +23 -0
- data/lib/retreval/gold_standard.rb +424 -0
- data/lib/retreval/options.rb +66 -0
- data/lib/retreval/query_result.rb +511 -0
- data/lib/retreval/runner.rb +121 -0
- data/output_avg_precision.yml +2 -0
- data/output_statistics.yml +82 -0
- data/retreval.gemspec +16 -0
- data/test/test_gold_standard.rb +111 -0
- data/test/test_query_result.rb +166 -0
- metadata +390 -0
@@ -0,0 +1,121 @@
|
|
1
|
+
require_relative 'options'
|
2
|
+
require_relative 'gold_standard'
|
3
|
+
require_relative 'query_result'
|
4
|
+
require 'yaml'
|
5
|
+
require 'pp'
|
6
|
+
|
7
|
+
# The Retreval allows to load and define Gold Standards, add Query Results and
|
8
|
+
# calculate the most common metrics used in information retrieval evaluation.
|
9
|
+
module Retreval
|
10
|
+
|
11
|
+
# A simple class that performs the task of running this library when called
|
12
|
+
# from the commandline
|
13
|
+
class Runner
|
14
|
+
|
15
|
+
# Invokes a new Runner object by loading the options from the commandline
|
16
|
+
def initialize(args)
|
17
|
+
@options = Options.new(args)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Takes the passed options for a GoldStandard file and loads it according to the format specified
|
21
|
+
def load_gold_standard
|
22
|
+
unless @options.gold_standard_file.nil?
|
23
|
+
|
24
|
+
|
25
|
+
print "Loading gold standard file '#{@options.gold_standard_file}' ... " if $verbose
|
26
|
+
@gold_standard = GoldStandard.new
|
27
|
+
case @options.format
|
28
|
+
when "yaml"
|
29
|
+
@gold_standard.load_from_yaml_file @options.gold_standard_file
|
30
|
+
when "plain"
|
31
|
+
@gold_standard.load_from_plaintext_file @options.gold_standard_file
|
32
|
+
else
|
33
|
+
raise "I don't understand the format '#{@options.format}'"
|
34
|
+
end
|
35
|
+
|
36
|
+
print "done\n" if $verbose
|
37
|
+
print "Gold standard loaded from #{@options.gold_standard_file} contains:
|
38
|
+
- #{@gold_standard.queries.count} queries,
|
39
|
+
- #{@gold_standard.documents.count} documents,
|
40
|
+
- #{@gold_standard.judgements.count} judgements, made by
|
41
|
+
- #{@gold_standard.users.count} users\n\n" if $verbose
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# Takes the passed options for a QueryResultSet file and loads it according to the format specified
|
46
|
+
def load_query_result_set
|
47
|
+
unless @options.query_result_set_file.nil?
|
48
|
+
|
49
|
+
print "Loading query result set from file '#{@options.query_result_set_file}' ... " if $verbose
|
50
|
+
@query_result_set = QueryResultSet.new :gold_standard => @gold_standard
|
51
|
+
case @options.format
|
52
|
+
when "yaml"
|
53
|
+
@query_result_set.load_from_yaml_file @options.query_result_set_file
|
54
|
+
when "plain"
|
55
|
+
@query_result_set.load_from_yaml_file @options.query_result_set_file
|
56
|
+
else
|
57
|
+
raise "I don't understand the format '#{@options.format}'"
|
58
|
+
end
|
59
|
+
|
60
|
+
print "done\n" if $verbose
|
61
|
+
print "Query results loaded from #{@options.query_result_set_file} contain:
|
62
|
+
- #{@query_result_set.query_results.count} query results\n\n" if $verbose
|
63
|
+
end
|
64
|
+
end
|
65
|
+
|
66
|
+
# Performs the default calculations and writes their output to the file specified
|
67
|
+
def begin_calculations
|
68
|
+
@statistics = Hash.new
|
69
|
+
@average_precision = Hash.new
|
70
|
+
|
71
|
+
@query_result_set.query_results.each_with_index do |result, index|
|
72
|
+
begin
|
73
|
+
print "Cleaning up results and removing documents without judgements ... \n" if $verbose
|
74
|
+
result.cleanup
|
75
|
+
|
76
|
+
print "Calculating statistics for result #{index+1} of #{@query_result_set.query_results.count} ... "
|
77
|
+
@statistics[result.query.querystring] = result.statistics
|
78
|
+
@average_precision[result.query.querystring] = result.average_precision
|
79
|
+
print "Done.\n"
|
80
|
+
|
81
|
+
result.print_ranked_table if $verbose
|
82
|
+
|
83
|
+
write_to_yaml_file :data => @statistics, :filename => "statistics.yml"
|
84
|
+
write_to_yaml_file :data => @average_precision, :filename => "avg_precision.yml"
|
85
|
+
|
86
|
+
# rescue Exception => e
|
87
|
+
# raise "Error while calculating results: #{e}"
|
88
|
+
end
|
89
|
+
end
|
90
|
+
|
91
|
+
print "Finished calculating all results. Exiting.\n" if $verbose
|
92
|
+
print "The mean average precision was #{@query_result_set.mean_average_precision}\n" if $verbose
|
93
|
+
exit
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
# Writes an object to a YAML file.
|
98
|
+
# Called by:
|
99
|
+
# write_to_yaml_file :data => my_data, :filename => "my_data_file.yml"
|
100
|
+
def write_to_yaml_file(args)
|
101
|
+
data = args[:data]
|
102
|
+
filename = args[:filename]
|
103
|
+
|
104
|
+
if data.nil? or filename.nil?
|
105
|
+
raise "Must pass filename and data in order to write to file!"
|
106
|
+
end
|
107
|
+
|
108
|
+
filename = @options.output + "_" + filename
|
109
|
+
File.open(filename, "w") { |f| f.write data.to_yaml }
|
110
|
+
end
|
111
|
+
|
112
|
+
# Called when the script is executed from the command line
|
113
|
+
def run
|
114
|
+
|
115
|
+
load_gold_standard
|
116
|
+
load_query_result_set
|
117
|
+
begin_calculations
|
118
|
+
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
@@ -0,0 +1,82 @@
|
|
1
|
+
---
|
2
|
+
Example Query:
|
3
|
+
- :precision: 1.0
|
4
|
+
:recall: 0.25
|
5
|
+
:false_negatives: 3
|
6
|
+
:false_positives: 0
|
7
|
+
:true_negatives: 6
|
8
|
+
:true_positives: 1
|
9
|
+
:document: ict21307
|
10
|
+
:relevant: true
|
11
|
+
- :precision: 0.5
|
12
|
+
:recall: 0.25
|
13
|
+
:false_negatives: 3
|
14
|
+
:false_positives: 1
|
15
|
+
:true_negatives: 5
|
16
|
+
:true_positives: 1
|
17
|
+
:document: ict21309
|
18
|
+
:relevant: false
|
19
|
+
- :precision: 0.3333333333333333
|
20
|
+
:recall: 0.25
|
21
|
+
:false_negatives: 3
|
22
|
+
:false_positives: 2
|
23
|
+
:true_negatives: 4
|
24
|
+
:true_positives: 1
|
25
|
+
:document: ict21311
|
26
|
+
:relevant: false
|
27
|
+
- :precision: 0.25
|
28
|
+
:recall: 0.25
|
29
|
+
:false_negatives: 3
|
30
|
+
:false_positives: 3
|
31
|
+
:true_negatives: 3
|
32
|
+
:true_positives: 1
|
33
|
+
:document: ict21313
|
34
|
+
:relevant: false
|
35
|
+
- :precision: 0.4
|
36
|
+
:recall: 0.5
|
37
|
+
:false_negatives: 2
|
38
|
+
:false_positives: 3
|
39
|
+
:true_negatives: 3
|
40
|
+
:true_positives: 2
|
41
|
+
:document: ict21315
|
42
|
+
:relevant: true
|
43
|
+
- :precision: 0.3333333333333333
|
44
|
+
:recall: 0.5
|
45
|
+
:false_negatives: 2
|
46
|
+
:false_positives: 4
|
47
|
+
:true_negatives: 2
|
48
|
+
:true_positives: 2
|
49
|
+
:document: ict21317
|
50
|
+
:relevant: false
|
51
|
+
- :precision: 0.2857142857142857
|
52
|
+
:recall: 0.5
|
53
|
+
:false_negatives: 2
|
54
|
+
:false_positives: 5
|
55
|
+
:true_negatives: 1
|
56
|
+
:true_positives: 2
|
57
|
+
:document: ict21319
|
58
|
+
:relevant: false
|
59
|
+
- :precision: 0.25
|
60
|
+
:recall: 0.5
|
61
|
+
:false_negatives: 2
|
62
|
+
:false_positives: 6
|
63
|
+
:true_negatives: 0
|
64
|
+
:true_positives: 2
|
65
|
+
:document: ict21321
|
66
|
+
:relevant: false
|
67
|
+
- :precision: 0.3333333333333333
|
68
|
+
:recall: 0.75
|
69
|
+
:false_negatives: 1
|
70
|
+
:false_positives: 6
|
71
|
+
:true_negatives: 0
|
72
|
+
:true_positives: 3
|
73
|
+
:document: ict21323
|
74
|
+
:relevant: true
|
75
|
+
- :precision: 0.4
|
76
|
+
:recall: 1.0
|
77
|
+
:false_negatives: 0
|
78
|
+
:false_positives: 6
|
79
|
+
:true_negatives: 0
|
80
|
+
:true_positives: 4
|
81
|
+
:document: ict21325
|
82
|
+
:relevant: true
|
data/retreval.gemspec
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "retreval"
|
3
|
+
s.summary = "A Ruby API for Evaluating Retrieval Results"
|
4
|
+
s.description = File.read(File.join(File.dirname(__FILE__), 'README.md'))
|
5
|
+
# s.requirements = [ 'Nothing special' ]
|
6
|
+
s.version = "0.1"
|
7
|
+
s.author = "Werner Robitza"
|
8
|
+
s.email = "werner.robitza@univie.ac.at"
|
9
|
+
s.homepage = "http://github.com/slhck/retreval"
|
10
|
+
s.platform = Gem::Platform::RUBY
|
11
|
+
s.required_ruby_version = '>=1.9'
|
12
|
+
s.files = Dir['**/**']
|
13
|
+
s.executables = [ 'retreval' ]
|
14
|
+
s.test_files = Dir["test/test*.rb"]
|
15
|
+
s.has_rdoc = true
|
16
|
+
end
|
@@ -0,0 +1,111 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'retreval/gold_standard'
|
3
|
+
|
4
|
+
# Some basic unit tests for the GoldStandard
|
5
|
+
class TestGoldStandard < Test::Unit::TestCase
|
6
|
+
|
7
|
+
|
8
|
+
# Adds one test judgement to this test case
|
9
|
+
def add_test_judgement
|
10
|
+
@gold_standard = Retreval::GoldStandard.new
|
11
|
+
@gold_standard.add_judgement :document => "doc1", :query => "query1", :relevant => true, :user => "John Doe"
|
12
|
+
end
|
13
|
+
|
14
|
+
|
15
|
+
# Tests whether the Document is correctly included
|
16
|
+
def test_document
|
17
|
+
add_test_judgement
|
18
|
+
assert(@gold_standard.contains_document? :id => "doc1")
|
19
|
+
end
|
20
|
+
|
21
|
+
|
22
|
+
# Tests whether the Query is correctly included
|
23
|
+
def test_query
|
24
|
+
add_test_judgement
|
25
|
+
assert(@gold_standard.contains_query? :querystring => "query1")
|
26
|
+
end
|
27
|
+
|
28
|
+
|
29
|
+
# Tests whether the User is correctly included
|
30
|
+
def test_user
|
31
|
+
add_test_judgement
|
32
|
+
assert(@gold_standard.contains_user? :id => "John Doe")
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# Tests whether the Judgement is correctly included
|
37
|
+
def test_judgement
|
38
|
+
add_test_judgement
|
39
|
+
assert(@gold_standard.contains_judgement? :document => "doc1", :query => "query1")
|
40
|
+
end
|
41
|
+
|
42
|
+
|
43
|
+
# Tests whether the Judgement (i.e. the relevancy) is correctly added
|
44
|
+
def test_relevant
|
45
|
+
add_test_judgement
|
46
|
+
assert(@gold_standard.relevant? :document => "doc1", :query => "query1")
|
47
|
+
end
|
48
|
+
|
49
|
+
# Tests if the kappa measure is calculated correctly.
|
50
|
+
# See http://nlp.stanford.edu/IR-book/html/htmledition/assessing-relevance-1.html
|
51
|
+
# for the examples in this test
|
52
|
+
def test_kappa_ir_book
|
53
|
+
|
54
|
+
@gold_standard = Retreval::GoldStandard.new
|
55
|
+
|
56
|
+
for i in (1..300) do
|
57
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Alice", :relevant => true
|
58
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Bob", :relevant => true
|
59
|
+
end
|
60
|
+
|
61
|
+
for i in (301..320) do
|
62
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Alice", :relevant => true
|
63
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Bob", :relevant => false
|
64
|
+
end
|
65
|
+
|
66
|
+
for i in (321..330) do
|
67
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Alice", :relevant => false
|
68
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Bob", :relevant => true
|
69
|
+
end
|
70
|
+
|
71
|
+
for i in (331..400) do
|
72
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Alice", :relevant => false
|
73
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Bob", :relevant => false
|
74
|
+
end
|
75
|
+
|
76
|
+
assert_equal(0.7759103641456584, @gold_standard.kappa, "IR Book kappa test failed!")
|
77
|
+
end
|
78
|
+
|
79
|
+
|
80
|
+
# Tests if the kappa measure is calculated correctly.
|
81
|
+
# See http://nlp.stanford.edu/IR-book/html/htmledition/assessing-relevance-1.html
|
82
|
+
# for the examples in this test
|
83
|
+
def test_kappa_wikipedia
|
84
|
+
|
85
|
+
@gold_standard = Retreval::GoldStandard.new
|
86
|
+
|
87
|
+
for i in (1..20) do
|
88
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Alice", :relevant => true
|
89
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Bob", :relevant => true
|
90
|
+
end
|
91
|
+
|
92
|
+
for i in (21..25) do
|
93
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Alice", :relevant => true
|
94
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Bob", :relevant => false
|
95
|
+
end
|
96
|
+
|
97
|
+
for i in (26..35) do
|
98
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Alice", :relevant => false
|
99
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Bob", :relevant => true
|
100
|
+
end
|
101
|
+
|
102
|
+
for i in (36..50) do
|
103
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Alice", :relevant => false
|
104
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query#{i}", :user => "Bob", :relevant => false
|
105
|
+
end
|
106
|
+
puts "#{@gold_standard.kappa}"
|
107
|
+
assert_equal(0.3939393939393937, @gold_standard.kappa, "Wikipedia kappa test failed!")
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
@@ -0,0 +1,166 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
require 'retreval/gold_standard'
|
3
|
+
require 'retreval/query_result'
|
4
|
+
|
5
|
+
# Some basic unit tests for QueryResult
|
6
|
+
# Unranked results include 4 documents of 10, which are all retrieved.
|
7
|
+
# The ranked results are evaluated from this table:
|
8
|
+
# Index Relevant Precision Recall Document ID
|
9
|
+
# 1 [X] 1.000 0.250 doc1
|
10
|
+
# 2 [X] 1.000 0.500 doc2
|
11
|
+
# 3 [ ] 0.667 0.500 doc5
|
12
|
+
# 4 [X] 0.750 0.750 doc3
|
13
|
+
# 5 [ ] 0.600 0.750 doc6
|
14
|
+
# 6 [X] 0.667 1.000 doc4
|
15
|
+
# 7 [ ] 0.571 1.000 doc7
|
16
|
+
# 8 [ ] 0.500 1.000 doc8
|
17
|
+
# 9 [ ] 0.444 1.000 doc9
|
18
|
+
# 10 [ ] 0.400 1.000 doc10
|
19
|
+
class TestQueryResult < Test::Unit::TestCase
|
20
|
+
|
21
|
+
|
22
|
+
# Adds 10 test judgements to this test case
|
23
|
+
def add_test_judgements
|
24
|
+
|
25
|
+
@gold_standard = Retreval::GoldStandard.new
|
26
|
+
|
27
|
+
for i in (1..4) do
|
28
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query1", :relevant => true
|
29
|
+
end
|
30
|
+
|
31
|
+
for i in (5..10) do
|
32
|
+
@gold_standard.add_judgement :document => "doc#{i}", :query => "query1", :relevant => false
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
|
38
|
+
# Adds the unranked query results to be tested against to this test case
|
39
|
+
def add_unranked_query_result
|
40
|
+
|
41
|
+
@query_result = Retreval::UnrankedQueryResult.new :query => "query1", :gold_standard => @gold_standard
|
42
|
+
|
43
|
+
for i in (1..4) do
|
44
|
+
@query_result.add_document :id => "doc#{i}"
|
45
|
+
end
|
46
|
+
|
47
|
+
for i in (5..10) do
|
48
|
+
@query_result.add_document :id => "doc#{i}"
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
# Adds the ranked query results to be tested against to this test case
|
55
|
+
def add_ranked_query_result
|
56
|
+
|
57
|
+
@query_result = Retreval::RankedQueryResult.new :query => "query1", :gold_standard => @gold_standard
|
58
|
+
|
59
|
+
@query_result.add_document :id => "doc1"
|
60
|
+
@query_result.add_document :id => "doc2"
|
61
|
+
@query_result.add_document :id => "doc5"
|
62
|
+
@query_result.add_document :id => "doc3"
|
63
|
+
@query_result.add_document :id => "doc6"
|
64
|
+
@query_result.add_document :id => "doc4"
|
65
|
+
@query_result.add_document :id => "doc7"
|
66
|
+
@query_result.add_document :id => "doc8"
|
67
|
+
@query_result.add_document :id => "doc9"
|
68
|
+
@query_result.add_document :id => "doc10"
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
|
73
|
+
# Tests the unranked precision
|
74
|
+
def test_unranked_precision
|
75
|
+
|
76
|
+
add_test_judgements
|
77
|
+
add_unranked_query_result
|
78
|
+
assert_equal(0.4, @query_result.statistics[:precision])
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
|
83
|
+
# Tests if the unranked recall is calculated correctly
|
84
|
+
def test_unranked_recall
|
85
|
+
|
86
|
+
add_test_judgements
|
87
|
+
add_unranked_query_result
|
88
|
+
assert_equal(1.0, @query_result.statistics[:recall])
|
89
|
+
|
90
|
+
end
|
91
|
+
|
92
|
+
|
93
|
+
# Tests if the ranked recalls are calculated correctly
|
94
|
+
def test_ranked_precision
|
95
|
+
|
96
|
+
add_test_judgements
|
97
|
+
add_ranked_query_result
|
98
|
+
expected_precision = [
|
99
|
+
1,
|
100
|
+
1,
|
101
|
+
0.6666666666666666,
|
102
|
+
0.75,
|
103
|
+
0.6,
|
104
|
+
0.6666666666666666,
|
105
|
+
0.5714285714285714,
|
106
|
+
0.5,
|
107
|
+
0.4444444444444444,
|
108
|
+
0.4
|
109
|
+
]
|
110
|
+
@query_result.statistics.each_with_index do |rank, index|
|
111
|
+
assert_equal(expected_precision[index], rank[:precision])
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
# Tests if the ranked recalls are calculated correctly
|
117
|
+
def test_ranked_recall
|
118
|
+
|
119
|
+
add_test_judgements
|
120
|
+
add_ranked_query_result
|
121
|
+
expected_recall = [
|
122
|
+
0.25,
|
123
|
+
0.5,
|
124
|
+
0.5,
|
125
|
+
0.75,
|
126
|
+
0.75,
|
127
|
+
1,
|
128
|
+
1,
|
129
|
+
1,
|
130
|
+
1,
|
131
|
+
1
|
132
|
+
]
|
133
|
+
@query_result.statistics.each_with_index do |rank, index|
|
134
|
+
assert_equal(expected_recall[index], rank[:recall])
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
138
|
+
|
139
|
+
|
140
|
+
# Tests the correct calculation of the eleven point precision as outlined here:
|
141
|
+
# http://nlp.stanford.edu/IR-book/html/htmledition/evaluation-of-ranked-retrieval-results-1.html
|
142
|
+
def test_eleven_point_precision
|
143
|
+
|
144
|
+
add_test_judgements
|
145
|
+
add_ranked_query_result
|
146
|
+
expected_results = [
|
147
|
+
1.0,
|
148
|
+
1.0,
|
149
|
+
1.0,
|
150
|
+
0.6666666666666666,
|
151
|
+
0.6666666666666666,
|
152
|
+
0.6666666666666666,
|
153
|
+
0.6,
|
154
|
+
0.6,
|
155
|
+
0.4,
|
156
|
+
0.4,
|
157
|
+
0.4,
|
158
|
+
]
|
159
|
+
@query_result.eleven_point_precision.each_with_index do |p, index|
|
160
|
+
assert_equal(expected_results[index], p[1])
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
|
166
|
+
end
|