retreval 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,56 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6
+ <head>
7
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
8
+
9
+ <title>File: test_gold_standard.rb [RDoc Documentation]</title>
10
+
11
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
12
+
13
+ <script src="../js/jquery.js" type="text/javascript"
14
+ charset="utf-8"></script>
15
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
16
+ charset="utf-8"></script>
17
+ <script src="../js/quicksearch.js" type="text/javascript"
18
+ charset="utf-8"></script>
19
+ <script src="../js/darkfish.js" type="text/javascript"
20
+ charset="utf-8"></script>
21
+ </head>
22
+
23
+ <body class="file file-popup">
24
+ <div id="metadata">
25
+ <dl>
26
+ <dt class="modified-date">Last Modified</dt>
27
+ <dd class="modified-date">2011-04-05 15:32:54 +0200</dd>
28
+
29
+
30
+ <dt class="requires">Requires</dt>
31
+ <dd class="requires">
32
+ <ul>
33
+
34
+ <li>test/unit</li>
35
+
36
+ <li>retreval/gold_standard</li>
37
+
38
+ </ul>
39
+ </dd>
40
+
41
+
42
+
43
+ </dl>
44
+ </div>
45
+
46
+ <div id="documentation">
47
+
48
+ <div class="description">
49
+ <h2>Description</h2>
50
+
51
+ </div>
52
+
53
+ </div>
54
+ </body>
55
+ </html>
56
+
@@ -0,0 +1,58 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
6
+ <head>
7
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
8
+
9
+ <title>File: test_query_result.rb [RDoc Documentation]</title>
10
+
11
+ <link type="text/css" media="screen" href="../rdoc.css" rel="stylesheet" />
12
+
13
+ <script src="../js/jquery.js" type="text/javascript"
14
+ charset="utf-8"></script>
15
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
16
+ charset="utf-8"></script>
17
+ <script src="../js/quicksearch.js" type="text/javascript"
18
+ charset="utf-8"></script>
19
+ <script src="../js/darkfish.js" type="text/javascript"
20
+ charset="utf-8"></script>
21
+ </head>
22
+
23
+ <body class="file file-popup">
24
+ <div id="metadata">
25
+ <dl>
26
+ <dt class="modified-date">Last Modified</dt>
27
+ <dd class="modified-date">2011-04-05 15:32:58 +0200</dd>
28
+
29
+
30
+ <dt class="requires">Requires</dt>
31
+ <dd class="requires">
32
+ <ul>
33
+
34
+ <li>test/unit</li>
35
+
36
+ <li>retreval/gold_standard</li>
37
+
38
+ <li>retreval/query_result</li>
39
+
40
+ </ul>
41
+ </dd>
42
+
43
+
44
+
45
+ </dl>
46
+ </div>
47
+
48
+ <div id="documentation">
49
+
50
+ <div class="description">
51
+ <h2>Description</h2>
52
+
53
+ </div>
54
+
55
+ </div>
56
+ </body>
57
+ </html>
58
+
@@ -1,7 +1,7 @@
1
1
  module Retreval
2
2
 
3
- # A gold standard is composed of several judgements for the
4
- # cartesian product of documents and queries
3
+ # A Gold Standard is composed of several Judgements for the
4
+ # cartesian product of Documents and Queries
5
5
  class GoldStandard
6
6
 
7
7
  attr_reader :documents, :judgements, :queries, :users
@@ -110,8 +110,8 @@ module Retreval
110
110
  end
111
111
 
112
112
 
113
- # Adds a judgement (document, query, relevancy) to the gold standard.
114
- # All of those are strings in the public interface.
113
+ # Adds a judgement (Document, Query, relevancy) to the GoldStandard.
114
+ # All of those are represented as strings in the public interface.
115
115
  # The user ID is an optional parameter that can be used to measure kappa later.
116
116
  # Call this with:
117
117
  # add_judgement :document => doc_id, :query => query_string, :relevant => boolean, :user => John
@@ -136,7 +136,6 @@ module Retreval
136
136
 
137
137
  # If there is no judgement for this combination, just add the document/query pair
138
138
  if relevant.nil?
139
- # TODO: improve efficiency by introducing hashes !
140
139
  @documents[document_id] = document
141
140
  @queries << query unless @queries.include?(query)
142
141
  return
@@ -155,8 +154,7 @@ module Retreval
155
154
  @queries << query unless @queries.include?(query)
156
155
  @judgements << judgement
157
156
  else
158
- #TOOD I think there is somethink like an ArgumentExcpetion in Ruby; use that if applicable
159
- raise "Need at least a Document, and a Query for creating the new entry."
157
+ raise ArgumentError.new("Need at least a Document, and a Query for creating the new entry.")
160
158
  end
161
159
 
162
160
  end
@@ -200,7 +198,6 @@ module Retreval
200
198
  document = Document.new :id => args[:document]
201
199
 
202
200
  #TODO: a hash could improve performance here as well
203
-
204
201
  @judgements.each { |judgement| return true if judgement.document == document and judgement.query == query }
205
202
 
206
203
  false
@@ -240,7 +237,7 @@ module Retreval
240
237
  # See: http://nlp.stanford.edu/IR-book/html/htmledition/assessing-relevance-1.html
241
238
  def kappa
242
239
 
243
- # FIXME: This isn't very pretty, maybe there's a more ruby-esque way to do this?
240
+ # TODO This isn't very pretty, maybe there's a more ruby-esque way to do this?
244
241
  sum = 0
245
242
  count = 0
246
243
 
@@ -230,9 +230,10 @@ module Retreval
230
230
  # Use the gold standard we initially received
231
231
  standard = @gold_standard
232
232
 
233
- # If there is an unranked result to be calculated, we will
233
+ # If there is an unranked result to be calculated ..
234
234
  if resultset.nil?
235
235
  unranked = true
236
+ # ... we'll just take the array of documents for this result
236
237
  resultset = OpenStruct.new
237
238
  resultset.documents = @documents.values
238
239
  resultset.query = @query
@@ -250,7 +251,9 @@ module Retreval
250
251
 
251
252
  # Get the document sets we are working on
252
253
  retrieved_documents = resultset.documents # => This is an Array all the time
253
- not_retrieved_documents = standard.documents.reject { |key, doc| retrieved_documents.include? doc } # => This is a Hash
254
+ # This means that we will filter out all retreived documents from the Gold Standard
255
+ # TODO: This can definitely be improved, but how?
256
+ not_retrieved_documents = standard.documents.reject { |key, doc| retrieved_documents.include? doc } # => This is a Hash as it refers to the Gold Standard
254
257
 
255
258
  # Check whether each of the retrieved documents is relevant or not ...
256
259
  retrieved_documents.each do |doc|
@@ -3,7 +3,7 @@ Gem::Specification.new do |s|
3
3
  s.summary = "A Ruby API for Evaluating Retrieval Results"
4
4
  s.description = File.read(File.join(File.dirname(__FILE__), 'README.md'))
5
5
  # s.requirements = [ 'Nothing special' ]
6
- s.version = "0.1"
6
+ s.version = "0.1.1"
7
7
  s.author = "Werner Robitza"
8
8
  s.email = "werner.robitza@univie.ac.at"
9
9
  s.homepage = "http://github.com/slhck/retreval"
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: retreval
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: "0.1"
5
+ version: 0.1.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - Werner Robitza
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-04-05 00:00:00 Z
13
+ date: 2011-04-06 00:00:00 Z
14
14
  dependencies: []
15
15
 
16
16
  description: |-
@@ -345,6 +345,33 @@ extra_rdoc_files: []
345
345
  files:
346
346
  - bin/retreval
347
347
  - CHANGELOG
348
+ - doc/bin/retreval.html
349
+ - doc/CHANGELOG.html
350
+ - doc/created.rid
351
+ - doc/index.html
352
+ - doc/lib/retreval/gold_standard_rb.html
353
+ - doc/lib/retreval/options_rb.html
354
+ - doc/lib/retreval/query_result_rb.html
355
+ - doc/lib/retreval/runner_rb.html
356
+ - doc/rdoc.css
357
+ - doc/Retreval/Document.html
358
+ - doc/Retreval/GoldStandard.html
359
+ - doc/Retreval/Judgement.html
360
+ - doc/Retreval/Options.html
361
+ - doc/Retreval/Query.html
362
+ - doc/Retreval/QueryResult.html
363
+ - doc/Retreval/QueryResultSet.html
364
+ - doc/Retreval/RankedQueryResult.html
365
+ - doc/Retreval/ResultDocument.html
366
+ - doc/Retreval/Runner.html
367
+ - doc/Retreval/UnrankedQueryResult.html
368
+ - doc/Retreval/User.html
369
+ - doc/Retreval.html
370
+ - doc/test/test_gold_standard_rb.html
371
+ - doc/test/test_query_result_rb.html
372
+ - doc/TestGoldStandard.html
373
+ - doc/TestQueryResult.html
374
+ - doc/TODO.html
348
375
  - example/gold_standard.yml
349
376
  - example/query_results.yml
350
377
  - lib/retreval/gold_standard.rb