retreval 0.1 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/TODO +1 -1
- data/doc/CHANGELOG.html +124 -0
- data/doc/Retreval.html +212 -0
- data/doc/Retreval/Document.html +297 -0
- data/doc/Retreval/GoldStandard.html +1013 -0
- data/doc/Retreval/Judgement.html +353 -0
- data/doc/Retreval/Options.html +406 -0
- data/doc/Retreval/Query.html +296 -0
- data/doc/Retreval/QueryResult.html +746 -0
- data/doc/Retreval/QueryResultSet.html +429 -0
- data/doc/Retreval/RankedQueryResult.html +556 -0
- data/doc/Retreval/ResultDocument.html +253 -0
- data/doc/Retreval/Runner.html +506 -0
- data/doc/Retreval/UnrankedQueryResult.html +278 -0
- data/doc/Retreval/User.html +354 -0
- data/doc/TODO.html +124 -0
- data/doc/TestGoldStandard.html +551 -0
- data/doc/TestQueryResult.html +606 -0
- data/doc/bin/retreval.html +54 -0
- data/doc/created.rid +10 -0
- data/doc/index.html +218 -0
- data/doc/lib/retreval/gold_standard_rb.html +52 -0
- data/doc/lib/retreval/options_rb.html +56 -0
- data/doc/lib/retreval/query_result_rb.html +54 -0
- data/doc/lib/retreval/runner_rb.html +56 -0
- data/doc/rdoc.css +706 -0
- data/doc/test/test_gold_standard_rb.html +56 -0
- data/doc/test/test_query_result_rb.html +58 -0
- data/lib/retreval/gold_standard.rb +6 -9
- data/lib/retreval/query_result.rb +5 -2
- data/retreval.gemspec +1 -1
- metadata +29 -2
@@ -0,0 +1,56 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: test_gold_standard.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="../js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="../js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="../js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="../js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">2011-04-05 15:32:54 +0200</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
<li>test/unit</li>
|
35
|
+
|
36
|
+
<li>retreval/gold_standard</li>
|
37
|
+
|
38
|
+
</ul>
|
39
|
+
</dd>
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
</dl>
|
44
|
+
</div>
|
45
|
+
|
46
|
+
<div id="documentation">
|
47
|
+
|
48
|
+
<div class="description">
|
49
|
+
<h2>Description</h2>
|
50
|
+
|
51
|
+
</div>
|
52
|
+
|
53
|
+
</div>
|
54
|
+
</body>
|
55
|
+
</html>
|
56
|
+
|
@@ -0,0 +1,58 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
6
|
+
<head>
|
7
|
+
<meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
|
8
|
+
|
9
|
+
<title>File: test_query_result.rb [RDoc Documentation]</title>
|
10
|
+
|
11
|
+
<link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
|
12
|
+
|
13
|
+
<script src="../js/jquery.js" type="text/javascript"
|
14
|
+
charset="utf-8"></script>
|
15
|
+
<script src="../js/thickbox-compressed.js" type="text/javascript"
|
16
|
+
charset="utf-8"></script>
|
17
|
+
<script src="../js/quicksearch.js" type="text/javascript"
|
18
|
+
charset="utf-8"></script>
|
19
|
+
<script src="../js/darkfish.js" type="text/javascript"
|
20
|
+
charset="utf-8"></script>
|
21
|
+
</head>
|
22
|
+
|
23
|
+
<body class="file file-popup">
|
24
|
+
<div id="metadata">
|
25
|
+
<dl>
|
26
|
+
<dt class="modified-date">Last Modified</dt>
|
27
|
+
<dd class="modified-date">2011-04-05 15:32:58 +0200</dd>
|
28
|
+
|
29
|
+
|
30
|
+
<dt class="requires">Requires</dt>
|
31
|
+
<dd class="requires">
|
32
|
+
<ul>
|
33
|
+
|
34
|
+
<li>test/unit</li>
|
35
|
+
|
36
|
+
<li>retreval/gold_standard</li>
|
37
|
+
|
38
|
+
<li>retreval/query_result</li>
|
39
|
+
|
40
|
+
</ul>
|
41
|
+
</dd>
|
42
|
+
|
43
|
+
|
44
|
+
|
45
|
+
</dl>
|
46
|
+
</div>
|
47
|
+
|
48
|
+
<div id="documentation">
|
49
|
+
|
50
|
+
<div class="description">
|
51
|
+
<h2>Description</h2>
|
52
|
+
|
53
|
+
</div>
|
54
|
+
|
55
|
+
</div>
|
56
|
+
</body>
|
57
|
+
</html>
|
58
|
+
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Retreval
|
2
2
|
|
3
|
-
# A
|
4
|
-
# cartesian product of
|
3
|
+
# A Gold Standard is composed of several Judgements for the
|
4
|
+
# cartesian product of Documents and Queries
|
5
5
|
class GoldStandard
|
6
6
|
|
7
7
|
attr_reader :documents, :judgements, :queries, :users
|
@@ -110,8 +110,8 @@ module Retreval
|
|
110
110
|
end
|
111
111
|
|
112
112
|
|
113
|
-
# Adds a judgement (
|
114
|
-
# All of those are strings in the public interface.
|
113
|
+
# Adds a judgement (Document, Query, relevancy) to the GoldStandard.
|
114
|
+
# All of those are represented as strings in the public interface.
|
115
115
|
# The user ID is an optional parameter that can be used to measure kappa later.
|
116
116
|
# Call this with:
|
117
117
|
# add_judgement :document => doc_id, :query => query_string, :relevant => boolean, :user => John
|
@@ -136,7 +136,6 @@ module Retreval
|
|
136
136
|
|
137
137
|
# If there is no judgement for this combination, just add the document/query pair
|
138
138
|
if relevant.nil?
|
139
|
-
# TODO: improve efficiency by introducing hashes !
|
140
139
|
@documents[document_id] = document
|
141
140
|
@queries << query unless @queries.include?(query)
|
142
141
|
return
|
@@ -155,8 +154,7 @@ module Retreval
|
|
155
154
|
@queries << query unless @queries.include?(query)
|
156
155
|
@judgements << judgement
|
157
156
|
else
|
158
|
-
|
159
|
-
raise "Need at least a Document, and a Query for creating the new entry."
|
157
|
+
raise ArgumentError.new("Need at least a Document, and a Query for creating the new entry.")
|
160
158
|
end
|
161
159
|
|
162
160
|
end
|
@@ -200,7 +198,6 @@ module Retreval
|
|
200
198
|
document = Document.new :id => args[:document]
|
201
199
|
|
202
200
|
#TODO: a hash could improve performance here as well
|
203
|
-
|
204
201
|
@judgements.each { |judgement| return true if judgement.document == document and judgement.query == query }
|
205
202
|
|
206
203
|
false
|
@@ -240,7 +237,7 @@ module Retreval
|
|
240
237
|
# See: http://nlp.stanford.edu/IR-book/html/htmledition/assessing-relevance-1.html
|
241
238
|
def kappa
|
242
239
|
|
243
|
-
#
|
240
|
+
# TODO This isn't very pretty, maybe there's a more ruby-esque way to do this?
|
244
241
|
sum = 0
|
245
242
|
count = 0
|
246
243
|
|
@@ -230,9 +230,10 @@ module Retreval
|
|
230
230
|
# Use the gold standard we initially received
|
231
231
|
standard = @gold_standard
|
232
232
|
|
233
|
-
# If there is an unranked result to be calculated
|
233
|
+
# If there is an unranked result to be calculated ..
|
234
234
|
if resultset.nil?
|
235
235
|
unranked = true
|
236
|
+
# ... we'll just take the array of documents for this result
|
236
237
|
resultset = OpenStruct.new
|
237
238
|
resultset.documents = @documents.values
|
238
239
|
resultset.query = @query
|
@@ -250,7 +251,9 @@ module Retreval
|
|
250
251
|
|
251
252
|
# Get the document sets we are working on
|
252
253
|
retrieved_documents = resultset.documents # => This is an Array all the time
|
253
|
-
|
254
|
+
# This means that we will filter out all retreived documents from the Gold Standard
|
255
|
+
# TODO: This can definitely be improved, but how?
|
256
|
+
not_retrieved_documents = standard.documents.reject { |key, doc| retrieved_documents.include? doc } # => This is a Hash as it refers to the Gold Standard
|
254
257
|
|
255
258
|
# Check whether each of the retrieved documents is relevant or not ...
|
256
259
|
retrieved_documents.each do |doc|
|
data/retreval.gemspec
CHANGED
@@ -3,7 +3,7 @@ Gem::Specification.new do |s|
|
|
3
3
|
s.summary = "A Ruby API for Evaluating Retrieval Results"
|
4
4
|
s.description = File.read(File.join(File.dirname(__FILE__), 'README.md'))
|
5
5
|
# s.requirements = [ 'Nothing special' ]
|
6
|
-
s.version = "0.1"
|
6
|
+
s.version = "0.1.1"
|
7
7
|
s.author = "Werner Robitza"
|
8
8
|
s.email = "werner.robitza@univie.ac.at"
|
9
9
|
s.homepage = "http://github.com/slhck/retreval"
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: retreval
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version:
|
5
|
+
version: 0.1.1
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Werner Robitza
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-04-
|
13
|
+
date: 2011-04-06 00:00:00 Z
|
14
14
|
dependencies: []
|
15
15
|
|
16
16
|
description: |-
|
@@ -345,6 +345,33 @@ extra_rdoc_files: []
|
|
345
345
|
files:
|
346
346
|
- bin/retreval
|
347
347
|
- CHANGELOG
|
348
|
+
- doc/bin/retreval.html
|
349
|
+
- doc/CHANGELOG.html
|
350
|
+
- doc/created.rid
|
351
|
+
- doc/index.html
|
352
|
+
- doc/lib/retreval/gold_standard_rb.html
|
353
|
+
- doc/lib/retreval/options_rb.html
|
354
|
+
- doc/lib/retreval/query_result_rb.html
|
355
|
+
- doc/lib/retreval/runner_rb.html
|
356
|
+
- doc/rdoc.css
|
357
|
+
- doc/Retreval/Document.html
|
358
|
+
- doc/Retreval/GoldStandard.html
|
359
|
+
- doc/Retreval/Judgement.html
|
360
|
+
- doc/Retreval/Options.html
|
361
|
+
- doc/Retreval/Query.html
|
362
|
+
- doc/Retreval/QueryResult.html
|
363
|
+
- doc/Retreval/QueryResultSet.html
|
364
|
+
- doc/Retreval/RankedQueryResult.html
|
365
|
+
- doc/Retreval/ResultDocument.html
|
366
|
+
- doc/Retreval/Runner.html
|
367
|
+
- doc/Retreval/UnrankedQueryResult.html
|
368
|
+
- doc/Retreval/User.html
|
369
|
+
- doc/Retreval.html
|
370
|
+
- doc/test/test_gold_standard_rb.html
|
371
|
+
- doc/test/test_query_result_rb.html
|
372
|
+
- doc/TestGoldStandard.html
|
373
|
+
- doc/TestQueryResult.html
|
374
|
+
- doc/TODO.html
|
348
375
|
- example/gold_standard.yml
|
349
376
|
- example/query_results.yml
|
350
377
|
- lib/retreval/gold_standard.rb
|