retreval 0.1 → 0.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1013 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
+ <head>
6
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
7
+
8
+ <title>Class: Retreval::GoldStandard</title>
9
+
10
+ <link rel="stylesheet" href="../rdoc.css" type="text/css" media="screen" />
11
+
12
+ <script src="../js/jquery.js" type="text/javascript"
13
+ charset="utf-8"></script>
14
+ <script src="../js/thickbox-compressed.js" type="text/javascript"
15
+ charset="utf-8"></script>
16
+ <script src="../js/quicksearch.js" type="text/javascript"
17
+ charset="utf-8"></script>
18
+ <script src="../js/darkfish.js" type="text/javascript"
19
+ charset="utf-8"></script>
20
+
21
+ </head>
22
+ <body class="class">
23
+
24
+ <div id="metadata">
25
+ <div id="home-metadata">
26
+ <div id="home-section" class="section">
27
+ <h3 class="section-header">
28
+ <a href="../index.html">Home</a>
29
+ <a href="../index.html#classes">Classes</a>
30
+ <a href="../index.html#methods">Methods</a>
31
+ </h3>
32
+ </div>
33
+ </div>
34
+
35
+ <div id="file-metadata">
36
+ <div id="file-list-section" class="section">
37
+ <h3 class="section-header">In Files</h3>
38
+ <div class="section-body">
39
+ <ul>
40
+
41
+ <li><a href="../lib/retreval/gold_standard_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
42
+ class="thickbox" title="lib/retreval/gold_standard.rb">lib/retreval/gold_standard.rb</a></li>
43
+
44
+ </ul>
45
+ </div>
46
+ </div>
47
+
48
+
49
+ </div>
50
+
51
+ <div id="class-metadata">
52
+
53
+ <!-- Parent Class -->
54
+
55
+ <div id="parent-class-section" class="section">
56
+ <h3 class="section-header">Parent</h3>
57
+
58
+ <p class="link">Object</p>
59
+
60
+ </div>
61
+
62
+
63
+ <!-- Namespace Contents -->
64
+
65
+
66
+ <!-- Method Quickref -->
67
+
68
+ <div id="method-list-section" class="section">
69
+ <h3 class="section-header">Methods</h3>
70
+ <ul class="link-list">
71
+
72
+ <li><a href="#method-c-new">::new</a></li>
73
+
74
+ <li><a href="#method-i-%3C%3C">#<<</a></li>
75
+
76
+ <li><a href="#method-i-add_judgement">#add_judgement</a></li>
77
+
78
+ <li><a href="#method-i-contains_document%3F">#contains_document?</a></li>
79
+
80
+ <li><a href="#method-i-contains_judgement%3F">#contains_judgement?</a></li>
81
+
82
+ <li><a href="#method-i-contains_query%3F">#contains_query?</a></li>
83
+
84
+ <li><a href="#method-i-contains_user%3F">#contains_user?</a></li>
85
+
86
+ <li><a href="#method-i-kappa">#kappa</a></li>
87
+
88
+ <li><a href="#method-i-load_from_plaintext_file">#load_from_plaintext_file</a></li>
89
+
90
+ <li><a href="#method-i-load_from_yaml_file">#load_from_yaml_file</a></li>
91
+
92
+ <li><a href="#method-i-pairwise_kappa">#pairwise_kappa</a></li>
93
+
94
+ <li><a href="#method-i-relevant%3F">#relevant?</a></li>
95
+
96
+ </ul>
97
+ </div>
98
+
99
+
100
+ <!-- Included Modules -->
101
+
102
+ </div>
103
+
104
+ <div id="project-metadata">
105
+
106
+
107
+ <div id="fileindex-section" class="section project-section">
108
+ <h3 class="section-header">Files</h3>
109
+ <ul>
110
+
111
+ <li class="file"><a href="../CHANGELOG.html">CHANGELOG</a></li>
112
+
113
+ <li class="file"><a href="../TODO.html">TODO</a></li>
114
+
115
+ </ul>
116
+ </div>
117
+
118
+
119
+ <div id="classindex-section" class="section project-section">
120
+ <h3 class="section-header">Class Index
121
+ <span class="search-toggle"><img src="../images/find.png"
122
+ height="16" width="16" alt="[+]"
123
+ title="show/hide quicksearch" /></span></h3>
124
+ <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
125
+ <fieldset>
126
+ <legend>Quicksearch</legend>
127
+ <input type="text" name="quicksearch" value=""
128
+ class="quicksearch-field" />
129
+ </fieldset>
130
+ </form>
131
+
132
+ <ul class="link-list">
133
+
134
+ <li><a href="../Retreval.html">Retreval</a></li>
135
+
136
+ <li><a href="../Retreval/Document.html">Retreval::Document</a></li>
137
+
138
+ <li><a href="../Retreval/GoldStandard.html">Retreval::GoldStandard</a></li>
139
+
140
+ <li><a href="../Retreval/Judgement.html">Retreval::Judgement</a></li>
141
+
142
+ <li><a href="../Retreval/Options.html">Retreval::Options</a></li>
143
+
144
+ <li><a href="../Retreval/Query.html">Retreval::Query</a></li>
145
+
146
+ <li><a href="../Retreval/QueryResult.html">Retreval::QueryResult</a></li>
147
+
148
+ <li><a href="../Retreval/QueryResultSet.html">Retreval::QueryResultSet</a></li>
149
+
150
+ <li><a href="../Retreval/RankedQueryResult.html">Retreval::RankedQueryResult</a></li>
151
+
152
+ <li><a href="../Retreval/ResultDocument.html">Retreval::ResultDocument</a></li>
153
+
154
+ <li><a href="../Retreval/Runner.html">Retreval::Runner</a></li>
155
+
156
+ <li><a href="../Retreval/UnrankedQueryResult.html">Retreval::UnrankedQueryResult</a></li>
157
+
158
+ <li><a href="../Retreval/User.html">Retreval::User</a></li>
159
+
160
+ <li><a href="../TestGoldStandard.html">TestGoldStandard</a></li>
161
+
162
+ <li><a href="../TestQueryResult.html">TestQueryResult</a></li>
163
+
164
+ </ul>
165
+ <div id="no-class-search-results" style="display: none;">No matching classes.</div>
166
+ </div>
167
+
168
+
169
+ </div>
170
+ </div>
171
+
172
+ <div id="documentation">
173
+ <h1 class="class">Retreval::GoldStandard</h1>
174
+
175
+ <div id="description">
176
+ <p>
177
+ A Gold Standard is composed of several Judgements for the cartesian
178
+ product of Documents and Queries
179
+ </p>
180
+
181
+ </div>
182
+
183
+ <!-- Constants -->
184
+
185
+
186
+ <!-- Attributes -->
187
+
188
+ <div id="attribute-method-details" class="method-section section">
189
+ <h3 class="section-header">Attributes</h3>
190
+
191
+
192
+ <div id="documents-attribute-method" class="method-detail">
193
+ <a name="documents"></a>
194
+
195
+ <div class="method-heading attribute-method-heading">
196
+ <span class="method-name">documents</span><span
197
+ class="attribute-access-type">[R]</span>
198
+ </div>
199
+
200
+ <div class="method-description">
201
+
202
+
203
+
204
+ </div>
205
+ </div>
206
+
207
+ <div id="judgements-attribute-method" class="method-detail">
208
+ <a name="judgements"></a>
209
+
210
+ <div class="method-heading attribute-method-heading">
211
+ <span class="method-name">judgements</span><span
212
+ class="attribute-access-type">[R]</span>
213
+ </div>
214
+
215
+ <div class="method-description">
216
+
217
+
218
+
219
+ </div>
220
+ </div>
221
+
222
+ <div id="queries-attribute-method" class="method-detail">
223
+ <a name="queries"></a>
224
+
225
+ <div class="method-heading attribute-method-heading">
226
+ <span class="method-name">queries</span><span
227
+ class="attribute-access-type">[R]</span>
228
+ </div>
229
+
230
+ <div class="method-description">
231
+
232
+
233
+
234
+ </div>
235
+ </div>
236
+
237
+ <div id="users-attribute-method" class="method-detail">
238
+ <a name="users"></a>
239
+
240
+ <div class="method-heading attribute-method-heading">
241
+ <span class="method-name">users</span><span
242
+ class="attribute-access-type">[R]</span>
243
+ </div>
244
+
245
+ <div class="method-description">
246
+
247
+
248
+
249
+ </div>
250
+ </div>
251
+
252
+ </div>
253
+
254
+
255
+ <!-- Methods -->
256
+
257
+ <div id="public-class-method-details" class="method-section section">
258
+ <h3 class="section-header">Public Class Methods</h3>
259
+
260
+
261
+ <div id="new-method" class="method-detail ">
262
+ <a name="method-c-new"></a>
263
+
264
+ <div class="method-heading">
265
+
266
+ <span class="method-name">new</span><span
267
+ class="method-args">(args = {})</span>
268
+ <span class="method-click-advice">click to toggle source</span>
269
+
270
+ </div>
271
+
272
+ <div class="method-description">
273
+
274
+ <p>
275
+ Creates a new gold standard. One can optionally construct the gold standard
276
+ with triples given. This would be a hash like:
277
+ </p>
278
+ <pre>
279
+ triples = {
280
+ :document =&gt; &quot;Document ID&quot;,
281
+ :query =&gt; &quot;Some query&quot;,
282
+ :relevant =&gt; &quot;true&quot;
283
+ }
284
+ </pre>
285
+ <p>
286
+ Called via:
287
+ </p>
288
+ <pre>
289
+ GoldStandard.new :triples =&gt; an_array_of_triples</pre>
290
+
291
+
292
+
293
+ <div class="method-source-code"
294
+ id="new-source">
295
+ <pre>
296
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 19</span>
297
+ 19: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">args</span> = {})
298
+ 20: <span class="ruby-ivar">@documents</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
299
+ 21: <span class="ruby-ivar">@queries</span> = <span class="ruby-constant">Array</span>.<span class="ruby-identifier">new</span>
300
+ 22: <span class="ruby-ivar">@judgements</span> = <span class="ruby-constant">Array</span>.<span class="ruby-identifier">new</span>
301
+ 23: <span class="ruby-ivar">@users</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
302
+ 24:
303
+ 25: <span class="ruby-comment cmt"># one can also construct a Gold Standard with everything already loaded</span>
304
+ 26: <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:triples</span>].<span class="ruby-identifier">nil?</span>
305
+ 27: <span class="ruby-identifier">args</span>[<span class="ruby-value">:triples</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">triple</span><span class="ruby-operator">|</span>
306
+ 28: <span class="ruby-identifier">add_judgement</span>(<span class="ruby-identifier">triple</span>)
307
+ 29: <span class="ruby-keyword kw">end</span>
308
+ 30: <span class="ruby-keyword kw">end</span>
309
+ 31: <span class="ruby-keyword kw">end</span></pre>
310
+ </div>
311
+
312
+ </div>
313
+
314
+
315
+
316
+
317
+ </div>
318
+
319
+
320
+ </div>
321
+
322
+ <div id="public-instance-method-details" class="method-section section">
323
+ <h3 class="section-header">Public Instance Methods</h3>
324
+
325
+
326
+ <div id="--method" class="method-detail ">
327
+ <a name="method-i-%3C%3C"></a>
328
+
329
+ <div class="method-heading">
330
+
331
+ <span class="method-name">&lt;&lt;</span><span
332
+ class="method-args">(args)</span>
333
+ <span class="method-click-advice">click to toggle source</span>
334
+
335
+ </div>
336
+
337
+ <div class="method-description">
338
+
339
+ <p>
340
+ This is essentially the same as adding a <a
341
+ href="Judgement.html">Judgement</a>, we can use this operator too.
342
+ </p>
343
+
344
+
345
+
346
+ <div class="method-source-code"
347
+ id="--source">
348
+ <pre>
349
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 163</span>
350
+ 163: <span class="ruby-keyword kw">def</span> <span class="ruby-operator">&lt;&lt;</span>(<span class="ruby-identifier">args</span>)
351
+ 164: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">add_judgement</span> <span class="ruby-identifier">args</span>
352
+ 165: <span class="ruby-keyword kw">end</span></pre>
353
+ </div>
354
+
355
+ </div>
356
+
357
+
358
+
359
+
360
+ </div>
361
+
362
+
363
+ <div id="add-judgement-method" class="method-detail ">
364
+ <a name="method-i-add_judgement"></a>
365
+
366
+ <div class="method-heading">
367
+
368
+ <span class="method-name">add_judgement</span><span
369
+ class="method-args">(args)</span>
370
+ <span class="method-click-advice">click to toggle source</span>
371
+
372
+ </div>
373
+
374
+ <div class="method-description">
375
+
376
+ <p>
377
+ Adds a judgement (<a href="Document.html">Document</a>, <a
378
+ href="Query.html">Query</a>, relevancy) to the <a
379
+ href="GoldStandard.html">GoldStandard</a>. All of those are represented as
380
+ strings in the public interface. The user ID is an optional parameter that
381
+ can be used to measure kappa later. Call this with:
382
+ </p>
383
+ <pre>
384
+ add_judgement :document =&gt; doc_id, :query =&gt; query_string, :relevant =&gt; boolean, :user =&gt; John</pre>
385
+
386
+
387
+
388
+ <div class="method-source-code"
389
+ id="add-judgement-source">
390
+ <pre>
391
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 118</span>
392
+ 118: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_judgement</span>(<span class="ruby-identifier">args</span>)
393
+ 119: <span class="ruby-identifier">document_id</span> = <span class="ruby-identifier">args</span>[<span class="ruby-value">:document</span>]
394
+ 120: <span class="ruby-identifier">query_string</span> = <span class="ruby-identifier">args</span>[<span class="ruby-value">:query</span>]
395
+ 121: <span class="ruby-identifier">relevant</span> = <span class="ruby-identifier">args</span>[<span class="ruby-value">:relevant</span>]
396
+ 122: <span class="ruby-identifier">user_id</span> = <span class="ruby-identifier">args</span>[<span class="ruby-value">:user</span>]
397
+ 123:
398
+ 124:
399
+ 125: <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">document_id</span>.<span class="ruby-identifier">nil?</span> <span class="ruby-keyword kw">or</span> <span class="ruby-identifier">query_string</span>.<span class="ruby-identifier">nil?</span>
400
+ 126: <span class="ruby-identifier">document</span> = <span class="ruby-constant">Document</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:id</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">document_id</span>
401
+ 127: <span class="ruby-identifier">query</span> = <span class="ruby-constant">Query</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:querystring</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">query_string</span>
402
+ 128:
403
+ 129:
404
+ 130: <span class="ruby-comment cmt"># If the user exists, load it, otherwise create a new one</span>
405
+ 131: <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@users</span>.<span class="ruby-identifier">has_key?</span>(<span class="ruby-identifier">user_id</span>)
406
+ 132: <span class="ruby-identifier">user</span> = <span class="ruby-ivar">@users</span>[<span class="ruby-identifier">user_id</span>]
407
+ 133: <span class="ruby-keyword kw">else</span>
408
+ 134: <span class="ruby-identifier">user</span> = <span class="ruby-constant">User</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:id</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">user_id</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">user_id</span>.<span class="ruby-identifier">nil?</span>
409
+ 135: <span class="ruby-keyword kw">end</span>
410
+ 136:
411
+ 137: <span class="ruby-comment cmt"># If there is no judgement for this combination, just add the document/query pair</span>
412
+ 138: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">relevant</span>.<span class="ruby-identifier">nil?</span>
413
+ 139: <span class="ruby-ivar">@documents</span>[<span class="ruby-identifier">document_id</span>] = <span class="ruby-identifier">document</span>
414
+ 140: <span class="ruby-ivar">@queries</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">query</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-ivar">@queries</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">query</span>)
415
+ 141: <span class="ruby-keyword kw">return</span>
416
+ 142: <span class="ruby-keyword kw">end</span>
417
+ 143:
418
+ 144: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">user_id</span>.<span class="ruby-identifier">nil?</span>
419
+ 145: <span class="ruby-identifier">judgement</span> = <span class="ruby-constant">Judgement</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:document</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">document</span>, <span class="ruby-value">:query</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">query</span>, <span class="ruby-value">:relevant</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">relevant</span>
420
+ 146: <span class="ruby-keyword kw">else</span>
421
+ 147: <span class="ruby-identifier">judgement</span> = <span class="ruby-constant">Judgement</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:document</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">document</span>, <span class="ruby-value">:query</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">query</span>, <span class="ruby-value">:relevant</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">relevant</span>, <span class="ruby-value">:user</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">user</span>
422
+ 148:
423
+ 149: <span class="ruby-identifier">user</span>.<span class="ruby-identifier">add_judgement</span>(<span class="ruby-identifier">judgement</span>)
424
+ 150: <span class="ruby-ivar">@users</span>[<span class="ruby-identifier">user_id</span>] = <span class="ruby-identifier">user</span>
425
+ 151: <span class="ruby-keyword kw">end</span>
426
+ 152:
427
+ 153: <span class="ruby-ivar">@documents</span>[<span class="ruby-identifier">document_id</span>] = <span class="ruby-identifier">document</span>
428
+ 154: <span class="ruby-ivar">@queries</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">query</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-ivar">@queries</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">query</span>)
429
+ 155: <span class="ruby-ivar">@judgements</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-identifier">judgement</span>
430
+ 156: <span class="ruby-keyword kw">else</span>
431
+ 157: <span class="ruby-identifier">raise</span> <span class="ruby-constant">ArgumentError</span>.<span class="ruby-identifier">new</span>(<span class="ruby-value str">&quot;Need at least a Document, and a Query for creating the new entry.&quot;</span>)
432
+ 158: <span class="ruby-keyword kw">end</span>
433
+ 159:
434
+ 160: <span class="ruby-keyword kw">end</span></pre>
435
+ </div>
436
+
437
+ </div>
438
+
439
+
440
+
441
+
442
+ </div>
443
+
444
+
445
+ <div id="contains-document--method" class="method-detail ">
446
+ <a name="method-i-contains_document%3F"></a>
447
+
448
+ <div class="method-heading">
449
+
450
+ <span class="method-name">contains_document?</span><span
451
+ class="method-args">(args)</span>
452
+ <span class="method-click-advice">click to toggle source</span>
453
+
454
+ </div>
455
+
456
+ <div class="method-description">
457
+
458
+ <p>
459
+ Returns true if this <a href="GoldStandard.html">GoldStandard</a> contains
460
+ this <a href="Document.html">Document</a> Called by:
461
+ </p>
462
+ <pre>
463
+ contains_document? :id =&gt; &quot;document ID&quot;</pre>
464
+
465
+
466
+
467
+ <div class="method-source-code"
468
+ id="contains-document--source">
469
+ <pre>
470
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 210</span>
471
+ 210: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">contains_document?</span>(<span class="ruby-identifier">args</span>)
472
+ 211: <span class="ruby-identifier">document_id</span> = <span class="ruby-identifier">args</span>[<span class="ruby-value">:id</span>]
473
+ 212: <span class="ruby-ivar">@documents</span>.<span class="ruby-identifier">key?</span> <span class="ruby-identifier">document_id</span>
474
+ 213: <span class="ruby-keyword kw">end</span></pre>
475
+ </div>
476
+
477
+ </div>
478
+
479
+
480
+
481
+
482
+ </div>
483
+
484
+
485
+ <div id="contains-judgement--method" class="method-detail ">
486
+ <a name="method-i-contains_judgement%3F"></a>
487
+
488
+ <div class="method-heading">
489
+
490
+ <span class="method-name">contains_judgement?</span><span
491
+ class="method-args">(args)</span>
492
+ <span class="method-click-advice">click to toggle source</span>
493
+
494
+ </div>
495
+
496
+ <div class="method-description">
497
+
498
+ <p>
499
+ Returns true if this <a href="GoldStandard.html">GoldStandard</a> contains
500
+ a <a href="Judgement.html">Judgement</a> for this <a
501
+ href="Query.html">Query</a> / <a href="Document.html">Document</a> pair
502
+ This is called by:
503
+ </p>
504
+ <pre>
505
+ contains_judgement? :id =&gt; &quot;the document ID&quot;, :querystring =&gt; &quot;the query&quot;</pre>
506
+
507
+
508
+
509
+ <div class="method-source-code"
510
+ id="contains-judgement--source">
511
+ <pre>
512
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 196</span>
513
+ 196: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">contains_judgement?</span>(<span class="ruby-identifier">args</span>)
514
+ 197: <span class="ruby-identifier">query</span> = <span class="ruby-constant">Query</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:querystring</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:query</span>]
515
+ 198: <span class="ruby-identifier">document</span> = <span class="ruby-constant">Document</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:id</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:document</span>]
516
+ 199:
517
+ 200: <span class="ruby-comment cmt">#TODO: a hash could improve performance here as well</span>
518
+ 201: <span class="ruby-ivar">@judgements</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">judgement</span><span class="ruby-operator">|</span> <span class="ruby-keyword kw">return</span> <span class="ruby-keyword kw">true</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">judgement</span>.<span class="ruby-identifier">document</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">document</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">judgement</span>.<span class="ruby-identifier">query</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">query</span> }
519
+ 202:
520
+ 203: <span class="ruby-keyword kw">false</span>
521
+ 204: <span class="ruby-keyword kw">end</span></pre>
522
+ </div>
523
+
524
+ </div>
525
+
526
+
527
+
528
+
529
+ </div>
530
+
531
+
532
+ <div id="contains-query--method" class="method-detail ">
533
+ <a name="method-i-contains_query%3F"></a>
534
+
535
+ <div class="method-heading">
536
+
537
+ <span class="method-name">contains_query?</span><span
538
+ class="method-args">(args)</span>
539
+ <span class="method-click-advice">click to toggle source</span>
540
+
541
+ </div>
542
+
543
+ <div class="method-description">
544
+
545
+ <p>
546
+ Returns true if this <a href="GoldStandard.html">GoldStandard</a> contains
547
+ this <a href="Query.html">Query</a> string Called by:
548
+ </p>
549
+ <pre>
550
+ contains_query? :querystring =&gt; &quot;the query&quot;</pre>
551
+
552
+
553
+
554
+ <div class="method-source-code"
555
+ id="contains-query--source">
556
+ <pre>
557
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 219</span>
558
+ 219: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">contains_query?</span>(<span class="ruby-identifier">args</span>)
559
+ 220: <span class="ruby-identifier">querystring</span> = <span class="ruby-identifier">args</span>[<span class="ruby-value">:querystring</span>]
560
+ 221: <span class="ruby-identifier">query</span> = <span class="ruby-constant">Query</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:querystring</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">querystring</span>
561
+ 222: <span class="ruby-ivar">@queries</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">query</span>
562
+ 223: <span class="ruby-keyword kw">end</span></pre>
563
+ </div>
564
+
565
+ </div>
566
+
567
+
568
+
569
+
570
+ </div>
571
+
572
+
573
+ <div id="contains-user--method" class="method-detail ">
574
+ <a name="method-i-contains_user%3F"></a>
575
+
576
+ <div class="method-heading">
577
+
578
+ <span class="method-name">contains_user?</span><span
579
+ class="method-args">(args)</span>
580
+ <span class="method-click-advice">click to toggle source</span>
581
+
582
+ </div>
583
+
584
+ <div class="method-description">
585
+
586
+ <p>
587
+ Returns true if this <a href="GoldStandard.html">GoldStandard</a> contains
588
+ this <a href="User.html">User</a> Called by:
589
+ </p>
590
+ <pre>
591
+ contains_user? :id =&gt; &quot;John Doe&quot;</pre>
592
+
593
+
594
+
595
+ <div class="method-source-code"
596
+ id="contains-user--source">
597
+ <pre>
598
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 229</span>
599
+ 229: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">contains_user?</span>(<span class="ruby-identifier">args</span>)
600
+ 230: <span class="ruby-identifier">user_id</span> = <span class="ruby-identifier">args</span>[<span class="ruby-value">:id</span>]
601
+ 231: <span class="ruby-ivar">@users</span>.<span class="ruby-identifier">key?</span> <span class="ruby-identifier">user_id</span>
602
+ 232: <span class="ruby-keyword kw">end</span></pre>
603
+ </div>
604
+
605
+ </div>
606
+
607
+
608
+
609
+
610
+ </div>
611
+
612
+
613
+ <div id="kappa-method" class="method-detail ">
614
+ <a name="method-i-kappa"></a>
615
+
616
+ <div class="method-heading">
617
+
618
+ <span class="method-name">kappa</span><span
619
+ class="method-args">()</span>
620
+ <span class="method-click-advice">click to toggle source</span>
621
+
622
+ </div>
623
+
624
+ <div class="method-description">
625
+
626
+ <p>
627
+ Calculates and returns the Kappa measure for this <a
628
+ href="GoldStandard.html">GoldStandard</a>. It shows to which degree the
629
+ judges agree in their decisions See: <a
630
+ href="http://nlp.stanford.edu/IR-book/html/htmledition/assessing-relevance-1.html">nlp.stanford.edu/IR-book/html/htmledition/assessing-relevance-1.html</a>
631
+ </p>
632
+
633
+
634
+
635
+ <div class="method-source-code"
636
+ id="kappa-source">
637
+ <pre>
638
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 238</span>
639
+ 238: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">kappa</span>
640
+ 239:
641
+ 240: <span class="ruby-comment cmt"># TODO This isn't very pretty, maybe there's a more ruby-esque way to do this?</span>
642
+ 241: <span class="ruby-identifier">sum</span> = <span class="ruby-value">0</span>
643
+ 242: <span class="ruby-identifier">count</span> = <span class="ruby-value">0</span>
644
+ 243:
645
+ 244: <span class="ruby-comment cmt"># A repeated_combination yields all the pairwise combinations of</span>
646
+ 245: <span class="ruby-comment cmt"># users to generate the pairwise kappa statistic. Elements are also</span>
647
+ 246: <span class="ruby-comment cmt"># paired with themselves, so we need to remove those.</span>
648
+ 247: <span class="ruby-ivar">@users</span>.<span class="ruby-identifier">values</span>.<span class="ruby-identifier">repeated_combination</span>(<span class="ruby-value">2</span>) <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">combination</span><span class="ruby-operator">|</span>
649
+ 248: <span class="ruby-identifier">user1</span>, <span class="ruby-identifier">user2</span> = <span class="ruby-identifier">combination</span>[<span class="ruby-value">0</span>], <span class="ruby-identifier">combination</span>[<span class="ruby-value">1</span>]
650
+ 249: <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">user1</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">user2</span>
651
+ 250: <span class="ruby-identifier">kappa</span> = <span class="ruby-identifier">pairwise_kappa</span>(<span class="ruby-identifier">user1</span>, <span class="ruby-identifier">user2</span>)
652
+ 251: <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">kappa</span>.<span class="ruby-identifier">nil?</span>
653
+ 252: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Kappa for User #{user1.id} and #{user2.id}: #{kappa}&quot;</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">$verbose</span>
654
+ 253: <span class="ruby-identifier">sum</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">kappa</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">kappa</span>.<span class="ruby-identifier">nil?</span>
655
+ 254: <span class="ruby-identifier">count</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
656
+ 255: <span class="ruby-keyword kw">end</span>
657
+ 256: <span class="ruby-keyword kw">end</span>
658
+ 257: <span class="ruby-keyword kw">end</span>
659
+ 258:
660
+ 259: <span class="ruby-ivar">@kappa</span> = <span class="ruby-identifier">sum</span> <span class="ruby-operator">/</span> <span class="ruby-identifier">count</span>.<span class="ruby-identifier">to_f</span>
661
+ 260: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Average pairwise kappa: #{@kappa}&quot;</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">$verbose</span>
662
+ 261: <span class="ruby-keyword kw">return</span> <span class="ruby-ivar">@kappa</span>
663
+ 262: <span class="ruby-keyword kw">end</span></pre>
664
+ </div>
665
+
666
+ </div>
667
+
668
+
669
+
670
+
671
+ </div>
672
+
673
+
674
+ <div id="load-from-plaintext-file-method" class="method-detail ">
675
+ <a name="method-i-load_from_plaintext_file"></a>
676
+
677
+ <div class="method-heading">
678
+
679
+ <span class="method-name">load_from_plaintext_file</span><span
680
+ class="method-args">(file)</span>
681
+ <span class="method-click-advice">click to toggle source</span>
682
+
683
+ </div>
684
+
685
+ <div class="method-description">
686
+
687
+ <p>
688
+ Parses a plaintext file adhering to the following standard: Every line of
689
+ text should include a triple that designates the judgement. The symbols
690
+ should be separated by a tabulator. E.g.
691
+ </p>
692
+ <pre>
693
+ my_query my_document_1 false
694
+ my_query my_document_2 true
695
+ </pre>
696
+ <p>
697
+ You can also add the user&#8217;s ID in the fourth column.
698
+ </p>
699
+
700
+
701
+
702
+ <div class="method-source-code"
703
+ id="load-from-plaintext-file-source">
704
+ <pre>
705
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 96</span>
706
+ 96: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">load_from_plaintext_file</span>(<span class="ruby-identifier">file</span>)
707
+ 97: <span class="ruby-keyword kw">begin</span>
708
+ 98: <span class="ruby-constant">File</span>.<span class="ruby-identifier">open</span>(<span class="ruby-identifier">file</span>).<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">line</span><span class="ruby-operator">|</span>
709
+ 99: <span class="ruby-identifier">line</span>.<span class="ruby-identifier">chomp!</span>
710
+ 100: <span class="ruby-identifier">info</span> = <span class="ruby-identifier">line</span>.<span class="ruby-identifier">split</span>(<span class="ruby-value str">&quot;\t&quot;</span>)
711
+ 101: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">info</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">==</span> <span class="ruby-value">3</span>
712
+ 102: <span class="ruby-identifier">add_judgement</span> <span class="ruby-value">:query</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">info</span>[<span class="ruby-value">0</span>], <span class="ruby-value">:document</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">info</span>[<span class="ruby-value">1</span>], <span class="ruby-value">:relevant</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">info</span>[<span class="ruby-value">2</span>]
713
+ 103: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">info</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">==</span> <span class="ruby-value">4</span>
714
+ 104: <span class="ruby-identifier">add_judgement</span> <span class="ruby-value">:query</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">info</span>[<span class="ruby-value">0</span>], <span class="ruby-value">:document</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">info</span>[<span class="ruby-value">1</span>], <span class="ruby-value">:relevant</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">info</span>[<span class="ruby-value">2</span>], <span class="ruby-value">:user</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">info</span>[<span class="ruby-value">3</span>]
715
+ 105: <span class="ruby-keyword kw">end</span>
716
+ 106: <span class="ruby-keyword kw">end</span>
717
+ 107: <span class="ruby-keyword kw">rescue</span> <span class="ruby-constant">Exception</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">e</span>
718
+ 108: <span class="ruby-identifier">raise</span> <span class="ruby-value str">&quot;Error while parsing the document: &quot;</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">message</span>
719
+ 109: <span class="ruby-keyword kw">end</span>
720
+ 110: <span class="ruby-keyword kw">end</span></pre>
721
+ </div>
722
+
723
+ </div>
724
+
725
+
726
+
727
+
728
+ </div>
729
+
730
+
731
+ <div id="load-from-yaml-file-method" class="method-detail ">
732
+ <a name="method-i-load_from_yaml_file"></a>
733
+
734
+ <div class="method-heading">
735
+
736
+ <span class="method-name">load_from_yaml_file</span><span
737
+ class="method-args">(file)</span>
738
+ <span class="method-click-advice">click to toggle source</span>
739
+
740
+ </div>
741
+
742
+ <div class="method-description">
743
+
744
+ <p>
745
+ Parses a YAML file adhering to the following generic standard:
746
+ </p>
747
+ <ul>
748
+ <li><p>
749
+ &#8220;query&#8221; denotes the query
750
+ </p>
751
+ </li>
752
+ <li><p>
753
+ &#8220;documents&#8221; these are the documents judged for this query
754
+ </p>
755
+ </li>
756
+ <li><p>
757
+ &#8220;id&#8221; the ID of the document (e.g. its filename, etc.)
758
+ </p>
759
+ </li>
760
+ <li><p>
761
+ &#8220;judgements&#8221; an array of judgements, each one with:
762
+ </p>
763
+ </li>
764
+ <li><p>
765
+ &#8220;relevant&#8221; a boolean value of the judgment (relevant or not)
766
+ </p>
767
+ </li>
768
+ <li><p>
769
+ &#8220;user&#8221; an optional identifier of the user
770
+ </p>
771
+ </li>
772
+ </ul>
773
+ <p>
774
+ Example file:
775
+ </p>
776
+ <pre>
777
+ * query: 12th air force germany 1957
778
+ documents:
779
+ * id: g5701s.ict21311
780
+ judgements: []
781
+
782
+ * id: g5701s.ict21313
783
+ judgements:
784
+ * relevant: false
785
+ user: 2</pre>
786
+
787
+
788
+
789
+ <div class="method-source-code"
790
+ id="load-from-yaml-file-source">
791
+ <pre>
792
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 53</span>
793
+ 53: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">load_from_yaml_file</span>(<span class="ruby-identifier">file</span>)
794
+ 54: <span class="ruby-keyword kw">begin</span>
795
+ 55: <span class="ruby-identifier">ydoc</span> = <span class="ruby-constant">YAML</span>.<span class="ruby-identifier">load</span>(<span class="ruby-constant">File</span>.<span class="ruby-identifier">open</span>(<span class="ruby-identifier">file</span>, <span class="ruby-value str">&quot;r&quot;</span>))
796
+ 56: <span class="ruby-identifier">ydoc</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">entry</span><span class="ruby-operator">|</span>
797
+ 57:
798
+ 58: <span class="ruby-comment cmt"># The query is first in the hierarchy</span>
799
+ 59: <span class="ruby-identifier">query</span> = <span class="ruby-identifier">entry</span>[<span class="ruby-value str">&quot;query&quot;</span>]
800
+ 60:
801
+ 61: <span class="ruby-comment cmt"># Every query contains several documents</span>
802
+ 62: <span class="ruby-identifier">documents</span> = <span class="ruby-identifier">entry</span>[<span class="ruby-value str">&quot;documents&quot;</span>]
803
+ 63: <span class="ruby-identifier">documents</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">doc</span><span class="ruby-operator">|</span>
804
+ 64:
805
+ 65: <span class="ruby-identifier">document</span> = <span class="ruby-identifier">doc</span>[<span class="ruby-value str">&quot;id&quot;</span>]
806
+ 66:
807
+ 67: <span class="ruby-comment cmt"># Only count the map if it has judgements</span>
808
+ 68: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">doc</span>[<span class="ruby-value str">&quot;judgements&quot;</span>].<span class="ruby-identifier">empty?</span>
809
+ 69: <span class="ruby-identifier">add_judgement</span> <span class="ruby-value">:document</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">document</span>, <span class="ruby-value">:query</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">query</span>, <span class="ruby-value">:relevant</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-keyword kw">nil</span>, <span class="ruby-value">:user</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-keyword kw">nil</span>
810
+ 70: <span class="ruby-keyword kw">else</span>
811
+ 71: <span class="ruby-identifier">doc</span>[<span class="ruby-value str">&quot;judgements&quot;</span>].<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">judgement</span><span class="ruby-operator">|</span>
812
+ 72: <span class="ruby-identifier">relevant</span> = <span class="ruby-identifier">judgement</span>[<span class="ruby-value str">&quot;relevant&quot;</span>]
813
+ 73: <span class="ruby-identifier">user</span> = <span class="ruby-identifier">judgement</span>[<span class="ruby-value str">&quot;user&quot;</span>]
814
+ 74:
815
+ 75: <span class="ruby-identifier">add_judgement</span> <span class="ruby-value">:document</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">document</span>, <span class="ruby-value">:query</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">query</span>, <span class="ruby-value">:relevant</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">relevant</span>, <span class="ruby-value">:user</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">user</span>
816
+ 76: <span class="ruby-keyword kw">end</span>
817
+ 77: <span class="ruby-keyword kw">end</span>
818
+ 78:
819
+ 79: <span class="ruby-keyword kw">end</span>
820
+ 80: <span class="ruby-keyword kw">end</span>
821
+ 81:
822
+ 82: <span class="ruby-keyword kw">rescue</span> <span class="ruby-constant">Exception</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">e</span>
823
+ 83: <span class="ruby-identifier">raise</span> <span class="ruby-value str">&quot;Error while parsing the YAML document: &quot;</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">e</span>.<span class="ruby-identifier">message</span>
824
+ 84: <span class="ruby-keyword kw">end</span>
825
+ 85: <span class="ruby-keyword kw">end</span></pre>
826
+ </div>
827
+
828
+ </div>
829
+
830
+
831
+
832
+
833
+ </div>
834
+
835
+
836
+ <div id="relevant--method" class="method-detail ">
837
+ <a name="method-i-relevant%3F"></a>
838
+
839
+ <div class="method-heading">
840
+
841
+ <span class="method-name">relevant?</span><span
842
+ class="method-args">(args)</span>
843
+ <span class="method-click-advice">click to toggle source</span>
844
+
845
+ </div>
846
+
847
+ <div class="method-description">
848
+
849
+ <p>
850
+ Returns true if a <a href="Document.html">Document</a> is relevant for a <a
851
+ href="Query.html">Query</a>, according to this <a
852
+ href="GoldStandard.html">GoldStandard</a>. Called by:
853
+ </p>
854
+ <pre>
855
+ relevant? :document =&gt; &quot;document ID&quot;, :query =&gt; &quot;query&quot;</pre>
856
+
857
+
858
+
859
+ <div class="method-source-code"
860
+ id="relevant--source">
861
+ <pre>
862
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 170</span>
863
+ 170: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">relevant?</span>(<span class="ruby-identifier">args</span>)
864
+ 171: <span class="ruby-identifier">query</span> = <span class="ruby-constant">Query</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:querystring</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:query</span>]
865
+ 172: <span class="ruby-identifier">document</span> = <span class="ruby-constant">Document</span>.<span class="ruby-identifier">new</span> <span class="ruby-value">:id</span> =<span class="ruby-operator">&gt;</span> <span class="ruby-identifier">args</span>[<span class="ruby-value">:document</span>]
866
+ 173:
867
+ 174: <span class="ruby-identifier">relevant_count</span> = <span class="ruby-value">0</span>
868
+ 175: <span class="ruby-identifier">nonrelevant_count</span> = <span class="ruby-value">0</span>
869
+ 176:
870
+ 177: <span class="ruby-comment cmt">#TODO: looks quite inefficient. Would a hash with document-query-pairs as key help?</span>
871
+ 178: <span class="ruby-ivar">@judgements</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">judgement</span><span class="ruby-operator">|</span>
872
+ 179: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">judgement</span>.<span class="ruby-identifier">document</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">document</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">judgement</span>.<span class="ruby-identifier">query</span> <span class="ruby-operator">==</span> <span class="ruby-identifier">query</span>
873
+ 180: <span class="ruby-identifier">judgement</span>.<span class="ruby-identifier">relevant</span> <span class="ruby-operator">?</span> <span class="ruby-identifier">relevant_count</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span> <span class="ruby-operator">:</span> <span class="ruby-identifier">nonrelevant_count</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
874
+ 181: <span class="ruby-keyword kw">end</span>
875
+ 182: <span class="ruby-keyword kw">end</span>
876
+ 183:
877
+ 184: <span class="ruby-comment cmt"># If we didn't find any judgements, just leave it as false</span>
878
+ 185: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">relevant_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">relevant_count</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
879
+ 186: <span class="ruby-keyword kw">false</span>
880
+ 187: <span class="ruby-keyword kw">else</span>
881
+ 188: <span class="ruby-identifier">relevant_count</span> <span class="ruby-operator">&gt;=</span> <span class="ruby-identifier">nonrelevant_count</span>
882
+ 189: <span class="ruby-keyword kw">end</span>
883
+ 190: <span class="ruby-keyword kw">end</span></pre>
884
+ </div>
885
+
886
+ </div>
887
+
888
+
889
+
890
+
891
+ </div>
892
+
893
+
894
+ </div>
895
+
896
+ <div id="private-instance-method-details" class="method-section section">
897
+ <h3 class="section-header">Private Instance Methods</h3>
898
+
899
+
900
+ <div id="pairwise-kappa-method" class="method-detail ">
901
+ <a name="method-i-pairwise_kappa"></a>
902
+
903
+ <div class="method-heading">
904
+
905
+ <span class="method-name">pairwise_kappa</span><span
906
+ class="method-args">(user1, user2)</span>
907
+ <span class="method-click-advice">click to toggle source</span>
908
+
909
+ </div>
910
+
911
+ <div class="method-description">
912
+
913
+ <p>
914
+ Calculates the pairwise kappa statistic for two users. The two users
915
+ objects need at least one <a href="Judgement.html">Judgement</a> in common.
916
+ Note that the kappa statistic is not really meaningful when there are too
917
+ little judgements in common!
918
+ </p>
919
+
920
+
921
+
922
+ <div class="method-source-code"
923
+ id="pairwise-kappa-source">
924
+ <pre>
925
+ <span class="ruby-comment cmt"># File lib/retreval/gold_standard.rb, line 270</span>
926
+ 270: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">pairwise_kappa</span>(<span class="ruby-identifier">user1</span>, <span class="ruby-identifier">user2</span>)
927
+ 271:
928
+ 272: <span class="ruby-identifier">user1_judgements</span> = <span class="ruby-identifier">user1</span>.<span class="ruby-identifier">judgements</span>.<span class="ruby-identifier">reject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">judgement</span><span class="ruby-operator">|</span> <span class="ruby-keyword kw">not</span> <span class="ruby-identifier">user2</span>.<span class="ruby-identifier">judgements</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">judgement</span>) }
929
+ 273: <span class="ruby-identifier">user2_judgements</span> = <span class="ruby-identifier">user2</span>.<span class="ruby-identifier">judgements</span>.<span class="ruby-identifier">reject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">judgement</span><span class="ruby-operator">|</span> <span class="ruby-keyword kw">not</span> <span class="ruby-identifier">user1</span>.<span class="ruby-identifier">judgements</span>.<span class="ruby-identifier">include?</span>(<span class="ruby-identifier">judgement</span>) }
930
+ 274:
931
+ 275: <span class="ruby-identifier">total_count</span> = <span class="ruby-identifier">user1_judgements</span>.<span class="ruby-identifier">count</span>
932
+ 276:
933
+ 277: <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">user1_judgements</span>.<span class="ruby-identifier">empty?</span> <span class="ruby-keyword kw">or</span> <span class="ruby-identifier">user1_judgements</span>.<span class="ruby-identifier">empty?</span>
934
+ 278:
935
+ 279: <span class="ruby-identifier">positive_agreements</span> = <span class="ruby-value">0</span> <span class="ruby-comment cmt"># =&gt; when both judges agree positively (relevant)</span>
936
+ 280: <span class="ruby-identifier">negative_agreements</span> = <span class="ruby-value">0</span> <span class="ruby-comment cmt"># =&gt; when both judges agree negatively (nonrelevant)</span>
937
+ 281: <span class="ruby-identifier">negative_disagreements</span> = <span class="ruby-value">0</span> <span class="ruby-comment cmt"># =&gt; when the second judge disagrees by using &quot;nonrelevant&quot;</span>
938
+ 282: <span class="ruby-identifier">positive_disagreements</span> = <span class="ruby-value">0</span> <span class="ruby-comment cmt"># =&gt; when the second judge disagrees by using &quot;relevant&quot;</span>
939
+ 283:
940
+ 284: <span class="ruby-keyword kw">for</span> <span class="ruby-identifier">i</span> <span class="ruby-keyword kw">in</span> <span class="ruby-value">0</span><span class="ruby-operator">..</span>(<span class="ruby-identifier">user1_judgements</span>.<span class="ruby-identifier">count</span><span class="ruby-operator">-</span><span class="ruby-value">1</span>)
941
+ 285: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">user1_judgements</span>[<span class="ruby-identifier">i</span>].<span class="ruby-identifier">relevant</span> <span class="ruby-operator">==</span> <span class="ruby-keyword kw">true</span>
942
+ 286: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">user2_judgements</span>[<span class="ruby-identifier">i</span>].<span class="ruby-identifier">relevant</span> <span class="ruby-operator">==</span> <span class="ruby-keyword kw">true</span>
943
+ 287: <span class="ruby-identifier">positive_agreements</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
944
+ 288: <span class="ruby-keyword kw">else</span>
945
+ 289: <span class="ruby-identifier">negative_disagreements</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
946
+ 290: <span class="ruby-keyword kw">end</span>
947
+ 291: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">user1_judgements</span>[<span class="ruby-identifier">i</span>].<span class="ruby-identifier">relevant</span> <span class="ruby-operator">==</span> <span class="ruby-keyword kw">false</span>
948
+ 292: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">user2_judgements</span>[<span class="ruby-identifier">i</span>].<span class="ruby-identifier">relevant</span> <span class="ruby-operator">==</span> <span class="ruby-keyword kw">false</span>
949
+ 293: <span class="ruby-identifier">negative_agreements</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
950
+ 294: <span class="ruby-keyword kw">else</span>
951
+ 295: <span class="ruby-identifier">positive_disagreements</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
952
+ 296: <span class="ruby-keyword kw">end</span>
953
+ 297: <span class="ruby-keyword kw">end</span>
954
+ 298: <span class="ruby-keyword kw">end</span>
955
+ 299:
956
+ 300: <span class="ruby-comment cmt"># The proportion the judges agreed:</span>
957
+ 301: <span class="ruby-identifier">p_agreed</span> = (<span class="ruby-identifier">positive_agreements</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">negative_agreements</span>) <span class="ruby-operator">/</span> <span class="ruby-identifier">total_count</span>.<span class="ruby-identifier">to_f</span>
958
+ 302:
959
+ 303: <span class="ruby-comment cmt"># The pooled marginals:</span>
960
+ 304: <span class="ruby-identifier">p_nonrelevant</span> = (<span class="ruby-identifier">positive_disagreements</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">negative_agreements</span> * <span class="ruby-value">2</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">negative_disagreements</span>) <span class="ruby-operator">/</span> (<span class="ruby-identifier">total_count</span>.<span class="ruby-identifier">to_f</span> * <span class="ruby-value">2</span>)
961
+ 305: <span class="ruby-comment cmt"># This one is the opposite of P(nonrelevant):</span>
962
+ 306: <span class="ruby-comment cmt"># p_relevant = (positive_agreements * 2 + negative_disagreements + positive_disagreements) / (total_count.to_f * 2)</span>
963
+ 307: <span class="ruby-identifier">p_relevant</span> = <span class="ruby-value">1</span> <span class="ruby-operator">-</span> <span class="ruby-identifier">p_nonrelevant</span>
964
+ 308:
965
+ 309: <span class="ruby-comment cmt"># The probability that the judges agreed by chance</span>
966
+ 310: <span class="ruby-identifier">p_agreement_by_chance</span> = <span class="ruby-identifier">p_nonrelevant</span> <span class="ruby-operator">**</span> <span class="ruby-value">2</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">p_relevant</span> <span class="ruby-operator">**</span> <span class="ruby-value">2</span>
967
+ 311:
968
+ 312:
969
+ 313: <span class="ruby-comment cmt"># Finally, the pairwise kappa value</span>
970
+ 314: <span class="ruby-comment cmt"># If there'd be a division by zero, we avoid it and return 0 right away</span>
971
+ 315: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">p_agreed</span> <span class="ruby-operator">-</span> <span class="ruby-identifier">p_agreement_by_chance</span> <span class="ruby-operator">==</span> <span class="ruby-value">0</span>
972
+ 316: <span class="ruby-keyword kw">return</span> <span class="ruby-value">0</span>
973
+ 317: <span class="ruby-comment cmt"># In any other case, the kappa value is correct and we can return it</span>
974
+ 318: <span class="ruby-keyword kw">else</span>
975
+ 319: <span class="ruby-identifier">kappa</span> = (<span class="ruby-identifier">p_agreed</span> <span class="ruby-operator">-</span> <span class="ruby-identifier">p_agreement_by_chance</span>) <span class="ruby-operator">/</span> (<span class="ruby-value">1</span> <span class="ruby-operator">-</span> <span class="ruby-identifier">p_agreement_by_chance</span>)
976
+ 320: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">kappa</span>
977
+ 321: <span class="ruby-keyword kw">end</span>
978
+ 322: <span class="ruby-keyword kw">end</span>
979
+ 323:
980
+ 324: <span class="ruby-comment cmt"># If there are no common judgements, there is no kappa value to calculate</span>
981
+ 325: <span class="ruby-keyword kw">return</span> <span class="ruby-keyword kw">nil</span>
982
+ 326: <span class="ruby-keyword kw">end</span></pre>
983
+ </div>
984
+
985
+ </div>
986
+
987
+
988
+
989
+
990
+ </div>
991
+
992
+
993
+ </div>
994
+
995
+
996
+ </div>
997
+
998
+
999
+ <div id="rdoc-debugging-section-dump" class="debugging-section">
1000
+
1001
+ <p>Disabled; run with --debug to generate this.</p>
1002
+
1003
+ </div>
1004
+
1005
+ <div id="validator-badges">
1006
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
1007
+ <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
1008
+ Rdoc Generator</a> 1.1.6</small>.</p>
1009
+ </div>
1010
+
1011
+ </body>
1012
+ </html>
1013
+