classifier 1.2.0 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (96) hide show
  1. data/LICENSE +361 -273
  2. data/README +6 -5
  3. data/Rakefile +12 -2
  4. data/bin/summarize.rb +11 -0
  5. data/doc/classes/Array.html +139 -0
  6. data/doc/classes/Array.src/M000003.html +18 -0
  7. data/doc/classes/Classifier.html +5 -5
  8. data/doc/classes/Classifier/Bayes.html +43 -43
  9. data/doc/classes/Classifier/Bayes.src/{M000023.html → M000038.html} +0 -0
  10. data/doc/classes/Classifier/Bayes.src/{M000024.html → M000039.html} +0 -0
  11. data/doc/classes/Classifier/Bayes.src/{M000025.html → M000040.html} +0 -0
  12. data/doc/classes/Classifier/Bayes.src/{M000026.html → M000041.html} +0 -0
  13. data/doc/classes/Classifier/Bayes.src/{M000027.html → M000042.html} +0 -0
  14. data/doc/classes/Classifier/Bayes.src/{M000028.html → M000043.html} +0 -0
  15. data/doc/classes/Classifier/Bayes.src/{M000029.html → M000044.html} +0 -0
  16. data/doc/classes/Classifier/ContentNode.html +23 -28
  17. data/doc/classes/Classifier/ContentNode.src/M000046.html +19 -0
  18. data/doc/classes/Classifier/ContentNode.src/{M000032.html → M000047.html} +1 -1
  19. data/doc/classes/Classifier/ContentNode.src/{M000033.html → M000048.html} +1 -1
  20. data/doc/classes/Classifier/ContentNode.src/M000049.html +49 -0
  21. data/doc/classes/Classifier/LSI.html +158 -68
  22. data/doc/classes/Classifier/LSI.src/M000022.html +6 -17
  23. data/doc/classes/Classifier/LSI.src/{M000012.html → M000023.html} +2 -2
  24. data/doc/classes/Classifier/LSI.src/{M000013.html → M000024.html} +3 -2
  25. data/doc/classes/Classifier/LSI.src/{M000014.html → M000025.html} +1 -1
  26. data/doc/classes/Classifier/LSI.src/M000026.html +19 -0
  27. data/doc/classes/Classifier/LSI.src/{M000015.html → M000027.html} +1 -1
  28. data/doc/classes/Classifier/LSI.src/{M000016.html → M000028.html} +1 -1
  29. data/doc/classes/Classifier/LSI.src/M000029.html +19 -0
  30. data/doc/classes/Classifier/LSI.src/M000030.html +43 -0
  31. data/doc/classes/Classifier/LSI.src/M000031.html +23 -0
  32. data/doc/classes/Classifier/LSI.src/{M000018.html → M000032.html} +7 -3
  33. data/doc/classes/Classifier/LSI.src/{M000019.html → M000033.html} +6 -2
  34. data/doc/classes/Classifier/LSI.src/{M000020.html → M000034.html} +2 -4
  35. data/doc/classes/Classifier/LSI.src/{M000021.html → M000035.html} +1 -1
  36. data/doc/classes/Classifier/LSI.src/M000036.html +31 -0
  37. data/doc/classes/Classifier/LSI.src/M000037.html +21 -0
  38. data/doc/classes/Classifier/WordList.html +37 -22
  39. data/doc/classes/Classifier/WordList.src/{M000007.html → M000017.html} +2 -2
  40. data/doc/classes/Classifier/WordList.src/{M000008.html → M000018.html} +1 -1
  41. data/doc/classes/Classifier/WordList.src/{M000009.html → M000019.html} +1 -1
  42. data/doc/classes/Classifier/WordList.src/M000020.html +18 -0
  43. data/doc/classes/Classifier/WordList.src/{M000010.html → M000021.html} +1 -1
  44. data/doc/classes/GSL.html +2 -1
  45. data/doc/classes/GSL/Matrix.html +126 -0
  46. data/doc/classes/GSL/Vector.html +10 -10
  47. data/doc/classes/GSL/Vector.src/{M000005.html → M000015.html} +0 -0
  48. data/doc/classes/GSL/Vector.src/{M000006.html → M000016.html} +0 -0
  49. data/doc/classes/Matrix.html +184 -0
  50. data/doc/classes/Matrix.src/M000004.html +18 -0
  51. data/doc/classes/Matrix.src/M000005.html +76 -0
  52. data/doc/classes/Matrix.src/M000006.html +18 -0
  53. data/doc/classes/Object.html +7 -7
  54. data/doc/classes/Object.src/{M000001.html → M000007.html} +1 -1
  55. data/doc/classes/String.html +90 -20
  56. data/doc/classes/String.src/{M000002.html → M000008.html} +0 -0
  57. data/doc/classes/String.src/{M000003.html → M000009.html} +0 -0
  58. data/doc/classes/String.src/{M000004.html → M000010.html} +0 -0
  59. data/doc/classes/String.src/M000011.html +18 -0
  60. data/doc/classes/String.src/M000012.html +18 -0
  61. data/doc/classes/String.src/M000013.html +18 -0
  62. data/doc/classes/String.src/M000014.html +18 -0
  63. data/doc/classes/Vector.html +154 -0
  64. data/doc/classes/Vector.src/M000001.html +22 -0
  65. data/doc/classes/Vector.src/M000002.html +25 -0
  66. data/doc/created.rid +1 -1
  67. data/doc/files/README.html +14 -8
  68. data/doc/files/lib/classifier/bayes_rb.html +1 -1
  69. data/doc/files/lib/classifier/{string_extensions_rb.html → extensions/string_rb.html} +5 -5
  70. data/doc/files/lib/classifier/extensions/vector_rb.html +120 -0
  71. data/doc/files/lib/classifier/extensions/vector_serialize_rb.html +1 -1
  72. data/doc/files/lib/classifier/extensions/word_hash_rb.html +1 -1
  73. data/doc/files/lib/classifier/lsi/content_node_rb.html +2 -2
  74. data/doc/files/lib/classifier/lsi/summary_rb.html +115 -0
  75. data/doc/files/lib/classifier/{extensions → lsi}/word_list_rb.html +3 -3
  76. data/doc/files/lib/classifier/lsi_rb.html +5 -3
  77. data/doc/files/lib/classifier_rb.html +2 -2
  78. data/doc/fr_class_index.html +4 -0
  79. data/doc/fr_file_index.html +4 -2
  80. data/doc/fr_method_index.html +49 -34
  81. data/doc/index.html +2 -2
  82. data/lib/classifier.rb +1 -1
  83. data/lib/classifier/{string_extensions.rb → extensions/string.rb} +0 -0
  84. data/lib/classifier/extensions/vector.rb +106 -0
  85. data/lib/classifier/extensions/vector_serialize.rb +6 -0
  86. data/lib/classifier/lsi.rb +101 -31
  87. data/lib/classifier/lsi/content_node.rb +28 -23
  88. data/lib/classifier/lsi/summary.rb +31 -0
  89. data/lib/classifier/{extensions → lsi}/word_list.rb +7 -2
  90. data/test/{string_extensions → extensions}/word_hash_test.rb +0 -0
  91. data/test/lsi/lsi_test.rb +36 -1
  92. metadata +68 -41
  93. data/doc/classes/Classifier/ContentNode.src/M000031.html +0 -21
  94. data/doc/classes/Classifier/ContentNode.src/M000034.html +0 -41
  95. data/doc/classes/Classifier/LSI.src/M000011.html +0 -20
  96. data/doc/classes/Classifier/LSI.src/M000017.html +0 -32
@@ -83,7 +83,7 @@
83
83
  <div id="description">
84
84
  <p>
85
85
  This is an internal data structure class for the <a href="LSI.html">LSI</a>
86
- node. Save for <a href="ContentNode.html#M000034">raw_vector_with</a>, it
86
+ node. Save for <a href="ContentNode.html#M000049">raw_vector_with</a>, it
87
87
  should be fairly straightforward to understand. You should never have to
88
88
  use it directly.
89
89
  </p>
@@ -97,10 +97,10 @@ use it directly.
97
97
  <h3 class="section-bar">Methods</h3>
98
98
 
99
99
  <div class="name-list">
100
- <a href="#M000031">new</a>&nbsp;&nbsp;
101
- <a href="#M000034">raw_vector_with</a>&nbsp;&nbsp;
102
- <a href="#M000033">search_norm</a>&nbsp;&nbsp;
103
- <a href="#M000032">search_vector</a>&nbsp;&nbsp;
100
+ <a href="#M000046">new</a>&nbsp;&nbsp;
101
+ <a href="#M000049">raw_vector_with</a>&nbsp;&nbsp;
102
+ <a href="#M000048">search_norm</a>&nbsp;&nbsp;
103
+ <a href="#M000047">search_vector</a>&nbsp;&nbsp;
104
104
  </div>
105
105
  </div>
106
106
 
@@ -145,14 +145,9 @@ use it directly.
145
145
  <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
146
146
  <td class="context-item-desc"></td>
147
147
  </tr>
148
- <tr class="top-aligned-row context-row">
149
- <td class="context-item-name">source</td>
150
- <td class="context-item-value">&nbsp;[R]&nbsp;</td>
151
- <td class="context-item-desc"></td>
152
- </tr>
153
148
  <tr class="top-aligned-row context-row">
154
149
  <td class="context-item-name">word_hash</td>
155
- <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
150
+ <td class="context-item-value">&nbsp;[R]&nbsp;</td>
156
151
  <td class="context-item-desc"></td>
157
152
  </tr>
158
153
  </table>
@@ -165,13 +160,13 @@ use it directly.
165
160
  <div id="methods">
166
161
  <h3 class="section-bar">Public Class methods</h3>
167
162
 
168
- <div id="method-M000031" class="method-detail">
169
- <a name="M000031"></a>
163
+ <div id="method-M000046" class="method-detail">
164
+ <a name="M000046"></a>
170
165
 
171
166
  <div class="method-heading">
172
- <a href="ContentNode.src/M000031.html" target="Code" class="method-signature"
173
- onclick="popupCode('ContentNode.src/M000031.html');return false;">
174
- <span class="method-name">new</span><span class="method-args">( source, categories=nil, text_proc=nil )</span>
167
+ <a href="ContentNode.src/M000046.html" target="Code" class="method-signature"
168
+ onclick="popupCode('ContentNode.src/M000046.html');return false;">
169
+ <span class="method-name">new</span><span class="method-args">( word_hash, *categories )</span>
175
170
  </a>
176
171
  </div>
177
172
 
@@ -185,12 +180,12 @@ source.to_s
185
180
 
186
181
  <h3 class="section-bar">Public Instance methods</h3>
187
182
 
188
- <div id="method-M000034" class="method-detail">
189
- <a name="M000034"></a>
183
+ <div id="method-M000049" class="method-detail">
184
+ <a name="M000049"></a>
190
185
 
191
186
  <div class="method-heading">
192
- <a href="ContentNode.src/M000034.html" target="Code" class="method-signature"
193
- onclick="popupCode('ContentNode.src/M000034.html');return false;">
187
+ <a href="ContentNode.src/M000049.html" target="Code" class="method-signature"
188
+ onclick="popupCode('ContentNode.src/M000049.html');return false;">
194
189
  <span class="method-name">raw_vector_with</span><span class="method-args">( word_list )</span>
195
190
  </a>
196
191
  </div>
@@ -203,12 +198,12 @@ mapping the vector space.
203
198
  </div>
204
199
  </div>
205
200
 
206
- <div id="method-M000033" class="method-detail">
207
- <a name="M000033"></a>
201
+ <div id="method-M000048" class="method-detail">
202
+ <a name="M000048"></a>
208
203
 
209
204
  <div class="method-heading">
210
- <a href="ContentNode.src/M000033.html" target="Code" class="method-signature"
211
- onclick="popupCode('ContentNode.src/M000033.html');return false;">
205
+ <a href="ContentNode.src/M000048.html" target="Code" class="method-signature"
206
+ onclick="popupCode('ContentNode.src/M000048.html');return false;">
212
207
  <span class="method-name">search_norm</span><span class="method-args">()</span>
213
208
  </a>
214
209
  </div>
@@ -220,12 +215,12 @@ Use this to fetch the appropriate search vector in normalized form.
220
215
  </div>
221
216
  </div>
222
217
 
223
- <div id="method-M000032" class="method-detail">
224
- <a name="M000032"></a>
218
+ <div id="method-M000047" class="method-detail">
219
+ <a name="M000047"></a>
225
220
 
226
221
  <div class="method-heading">
227
- <a href="ContentNode.src/M000032.html" target="Code" class="method-signature"
228
- onclick="popupCode('ContentNode.src/M000032.html');return false;">
222
+ <a href="ContentNode.src/M000047.html" target="Code" class="method-signature"
223
+ onclick="popupCode('ContentNode.src/M000047.html');return false;">
229
224
  <span class="method-name">search_vector</span><span class="method-args">()</span>
230
225
  </a>
231
226
  </div>
@@ -0,0 +1,19 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>new (Classifier::ContentNode)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi/content_node.rb, line 18</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>( <span class="ruby-identifier">word_hash</span>, <span class="ruby-operator">*</span><span class="ruby-identifier">categories</span> )
15
+ <span class="ruby-ivar">@categories</span> = <span class="ruby-identifier">categories</span> <span class="ruby-operator">||</span> []
16
+ <span class="ruby-ivar">@word_hash</span> = <span class="ruby-identifier">word_hash</span>
17
+ <span class="ruby-keyword kw">end</span></pre>
18
+ </body>
19
+ </html>
@@ -10,7 +10,7 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/lsi/content_node.rb, line 27</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi/content_node.rb, line 24</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">search_vector</span>
15
15
  <span class="ruby-ivar">@lsi_vector</span> <span class="ruby-operator">||</span> <span class="ruby-ivar">@raw_vector</span>
16
16
  <span class="ruby-keyword kw">end</span></pre>
@@ -10,7 +10,7 @@
10
10
  <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
11
  </head>
12
12
  <body class="standalone-code">
13
- <pre><span class="ruby-comment cmt"># File lib/classifier/lsi/content_node.rb, line 32</span>
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi/content_node.rb, line 29</span>
14
14
  <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">search_norm</span>
15
15
  <span class="ruby-ivar">@lsi_norm</span> <span class="ruby-operator">||</span> <span class="ruby-ivar">@raw_norm</span>
16
16
  <span class="ruby-keyword kw">end</span></pre>
@@ -0,0 +1,49 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>raw_vector_with (Classifier::ContentNode)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi/content_node.rb, line 35</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">raw_vector_with</span>( <span class="ruby-identifier">word_list</span> )
15
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">$GSL</span>
16
+ <span class="ruby-identifier">vec</span> = <span class="ruby-constant">Vector</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">word_list</span>.<span class="ruby-identifier">size</span>)
17
+ <span class="ruby-keyword kw">else</span>
18
+ <span class="ruby-identifier">vec</span> = <span class="ruby-constant">Array</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">word_list</span>.<span class="ruby-identifier">size</span>, <span class="ruby-value">0</span>)
19
+ <span class="ruby-keyword kw">end</span>
20
+
21
+ <span class="ruby-ivar">@word_hash</span>.<span class="ruby-identifier">each_key</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span><span class="ruby-operator">|</span>
22
+ <span class="ruby-identifier">vec</span>[<span class="ruby-identifier">word_list</span>[<span class="ruby-identifier">word</span>]] = <span class="ruby-ivar">@word_hash</span>[<span class="ruby-identifier">word</span>] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">word_list</span>[<span class="ruby-identifier">word</span>]
23
+ <span class="ruby-keyword kw">end</span>
24
+
25
+ <span class="ruby-comment cmt"># Perform the scaling transform</span>
26
+ <span class="ruby-identifier">total_words</span> = <span class="ruby-identifier">vec</span>.<span class="ruby-identifier">sum</span>
27
+
28
+ <span class="ruby-comment cmt"># Perform first-order association transform if this vector has more</span>
29
+ <span class="ruby-comment cmt"># than one word in it. </span>
30
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">total_words</span> <span class="ruby-operator">&gt;</span> <span class="ruby-value">1.0</span>
31
+ <span class="ruby-identifier">weighted_total</span> = <span class="ruby-value">0</span><span class="ruby-value">.0</span>
32
+ <span class="ruby-identifier">vec</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">term</span><span class="ruby-operator">|</span>
33
+ <span class="ruby-keyword kw">if</span> ( <span class="ruby-identifier">term</span> <span class="ruby-operator">&gt;</span> <span class="ruby-value">0</span> )
34
+ <span class="ruby-identifier">weighted_total</span> <span class="ruby-operator">+=</span> (( <span class="ruby-identifier">term</span> <span class="ruby-operator">/</span> <span class="ruby-identifier">total_words</span> ) <span class="ruby-operator">*</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>( <span class="ruby-identifier">term</span> <span class="ruby-operator">/</span> <span class="ruby-identifier">total_words</span> ))
35
+ <span class="ruby-keyword kw">end</span>
36
+ <span class="ruby-keyword kw">end</span>
37
+ <span class="ruby-identifier">vec</span> = <span class="ruby-identifier">vec</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">val</span><span class="ruby-operator">|</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>( <span class="ruby-identifier">val</span> <span class="ruby-operator">+</span> <span class="ruby-value">1</span> ) <span class="ruby-operator">/</span> <span class="ruby-operator">-</span><span class="ruby-identifier">weighted_total</span> }
38
+ <span class="ruby-keyword kw">end</span>
39
+
40
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">$GSL</span>
41
+ <span class="ruby-ivar">@raw_norm</span> = <span class="ruby-identifier">vec</span>.<span class="ruby-identifier">normalize</span>
42
+ <span class="ruby-ivar">@raw_vector</span> = <span class="ruby-identifier">vec</span>
43
+ <span class="ruby-keyword kw">else</span>
44
+ <span class="ruby-ivar">@raw_norm</span> = <span class="ruby-constant">Vector</span>[<span class="ruby-operator">*</span><span class="ruby-identifier">vec</span>].<span class="ruby-identifier">normalize</span>
45
+ <span class="ruby-ivar">@raw_vector</span> = <span class="ruby-constant">Vector</span>[<span class="ruby-operator">*</span><span class="ruby-identifier">vec</span>]
46
+ <span class="ruby-keyword kw">end</span>
47
+ <span class="ruby-keyword kw">end</span></pre>
48
+ </body>
49
+ </html>
@@ -97,18 +97,22 @@ href="http://en.wikipedia.org/wiki/Latent_Semantic_Indexing">Wikipedia</a>.
97
97
  <h3 class="section-bar">Methods</h3>
98
98
 
99
99
  <div class="name-list">
100
- <a href="#M000014">&lt;&lt;</a>&nbsp;&nbsp;
101
- <a href="#M000013">add_item</a>&nbsp;&nbsp;
102
- <a href="#M000017">build_index</a>&nbsp;&nbsp;
103
- <a href="#M000022">classify</a>&nbsp;&nbsp;
104
- <a href="#M000021">find_related</a>&nbsp;&nbsp;
105
- <a href="#M000016">items</a>&nbsp;&nbsp;
106
- <a href="#M000012">needs_rebuild?</a>&nbsp;&nbsp;
107
- <a href="#M000011">new</a>&nbsp;&nbsp;
108
- <a href="#M000018">proximity_array_for_content</a>&nbsp;&nbsp;
109
- <a href="#M000019">proximity_norms_for_content</a>&nbsp;&nbsp;
110
- <a href="#M000015">remove_item</a>&nbsp;&nbsp;
111
- <a href="#M000020">search</a>&nbsp;&nbsp;
100
+ <a href="#M000025">&lt;&lt;</a>&nbsp;&nbsp;
101
+ <a href="#M000024">add_item</a>&nbsp;&nbsp;
102
+ <a href="#M000030">build_index</a>&nbsp;&nbsp;
103
+ <a href="#M000029">categories_for</a>&nbsp;&nbsp;
104
+ <a href="#M000026">categories_for</a>&nbsp;&nbsp;
105
+ <a href="#M000036">classify</a>&nbsp;&nbsp;
106
+ <a href="#M000035">find_related</a>&nbsp;&nbsp;
107
+ <a href="#M000037">highest_ranked_stems</a>&nbsp;&nbsp;
108
+ <a href="#M000031">highest_relative_content</a>&nbsp;&nbsp;
109
+ <a href="#M000028">items</a>&nbsp;&nbsp;
110
+ <a href="#M000023">needs_rebuild?</a>&nbsp;&nbsp;
111
+ <a href="#M000022">new</a>&nbsp;&nbsp;
112
+ <a href="#M000032">proximity_array_for_content</a>&nbsp;&nbsp;
113
+ <a href="#M000033">proximity_norms_for_content</a>&nbsp;&nbsp;
114
+ <a href="#M000027">remove_item</a>&nbsp;&nbsp;
115
+ <a href="#M000034">search</a>&nbsp;&nbsp;
112
116
  </div>
113
117
  </div>
114
118
 
@@ -128,6 +132,11 @@ href="http://en.wikipedia.org/wiki/Latent_Semantic_Indexing">Wikipedia</a>.
128
132
 
129
133
  <div class="name-list">
130
134
  <table>
135
+ <tr class="top-aligned-row context-row">
136
+ <td class="context-item-name">auto_rebuild</td>
137
+ <td class="context-item-value">&nbsp;[RW]&nbsp;</td>
138
+ <td class="context-item-desc"></td>
139
+ </tr>
131
140
  <tr class="top-aligned-row context-row">
132
141
  <td class="context-item-name">word_list</td>
133
142
  <td class="context-item-value">&nbsp;[R]&nbsp;</td>
@@ -143,12 +152,12 @@ href="http://en.wikipedia.org/wiki/Latent_Semantic_Indexing">Wikipedia</a>.
143
152
  <div id="methods">
144
153
  <h3 class="section-bar">Public Class methods</h3>
145
154
 
146
- <div id="method-M000011" class="method-detail">
147
- <a name="M000011"></a>
155
+ <div id="method-M000022" class="method-detail">
156
+ <a name="M000022"></a>
148
157
 
149
158
  <div class="method-heading">
150
- <a href="LSI.src/M000011.html" target="Code" class="method-signature"
151
- onclick="popupCode('LSI.src/M000011.html');return false;">
159
+ <a href="LSI.src/M000022.html" target="Code" class="method-signature"
160
+ onclick="popupCode('LSI.src/M000022.html');return false;">
152
161
  <span class="method-name">new</span><span class="method-args">(options = {})</span>
153
162
  </a>
154
163
  </div>
@@ -156,7 +165,7 @@ href="http://en.wikipedia.org/wiki/Latent_Semantic_Indexing">Wikipedia</a>.
156
165
  <div class="method-description">
157
166
  <p>
158
167
  Create a fresh index. If you want to call <a
159
- href="LSI.html#M000017">build_index</a> manually, use
168
+ href="LSI.html#M000030">build_index</a> manually, use
160
169
  </p>
161
170
  <pre>
162
171
  Classifier::LSI.new :auto_rebuild =&gt; false
@@ -166,31 +175,31 @@ href="LSI.html#M000017">build_index</a> manually, use
166
175
 
167
176
  <h3 class="section-bar">Public Instance methods</h3>
168
177
 
169
- <div id="method-M000014" class="method-detail">
170
- <a name="M000014"></a>
178
+ <div id="method-M000025" class="method-detail">
179
+ <a name="M000025"></a>
171
180
 
172
181
  <div class="method-heading">
173
- <a href="LSI.src/M000014.html" target="Code" class="method-signature"
174
- onclick="popupCode('LSI.src/M000014.html');return false;">
182
+ <a href="LSI.src/M000025.html" target="Code" class="method-signature"
183
+ onclick="popupCode('LSI.src/M000025.html');return false;">
175
184
  <span class="method-name">&lt;&lt;</span><span class="method-args">( item )</span>
176
185
  </a>
177
186
  </div>
178
187
 
179
188
  <div class="method-description">
180
189
  <p>
181
- A less flexible shorthand for <a href="LSI.html#M000013">add_item</a> that
190
+ A less flexible shorthand for <a href="LSI.html#M000024">add_item</a> that
182
191
  assumes you are passing in a string with no categorries. item will be duck
183
192
  typed via to_s .
184
193
  </p>
185
194
  </div>
186
195
  </div>
187
196
 
188
- <div id="method-M000013" class="method-detail">
189
- <a name="M000013"></a>
197
+ <div id="method-M000024" class="method-detail">
198
+ <a name="M000024"></a>
190
199
 
191
200
  <div class="method-heading">
192
- <a href="LSI.src/M000013.html" target="Code" class="method-signature"
193
- onclick="popupCode('LSI.src/M000013.html');return false;">
201
+ <a href="LSI.src/M000024.html" target="Code" class="method-signature"
202
+ onclick="popupCode('LSI.src/M000024.html');return false;">
194
203
  <span class="method-name">add_item</span><span class="method-args">( item, *categories, &amp;block )</span>
195
204
  </a>
196
205
  </div>
@@ -216,12 +225,12 @@ For example:
216
225
  </div>
217
226
  </div>
218
227
 
219
- <div id="method-M000017" class="method-detail">
220
- <a name="M000017"></a>
228
+ <div id="method-M000030" class="method-detail">
229
+ <a name="M000030"></a>
221
230
 
222
231
  <div class="method-heading">
223
- <a href="LSI.src/M000017.html" target="Code" class="method-signature"
224
- onclick="popupCode('LSI.src/M000017.html');return false;">
232
+ <a href="LSI.src/M000030.html" target="Code" class="method-signature"
233
+ onclick="popupCode('LSI.src/M000030.html');return false;">
225
234
  <span class="method-name">build_index</span><span class="method-args">( cutoff=0.75 )</span>
226
235
  </a>
227
236
  </div>
@@ -229,7 +238,7 @@ For example:
229
238
  <div class="method-description">
230
239
  <p>
231
240
  This function rebuilds the index if <a
232
- href="LSI.html#M000012">needs_rebuild?</a> returns true. For very large
241
+ href="LSI.html#M000023">needs_rebuild?</a> returns true. For very large
233
242
  document spaces, this indexing operation may take some time to complete, so
234
243
  it may be wise to place the operation in another thread.
235
244
  </p>
@@ -249,12 +258,50 @@ engine.
249
258
  </div>
250
259
  </div>
251
260
 
252
- <div id="method-M000022" class="method-detail">
253
- <a name="M000022"></a>
261
+ <div id="method-M000029" class="method-detail">
262
+ <a name="M000029"></a>
254
263
 
255
264
  <div class="method-heading">
256
- <a href="LSI.src/M000022.html" target="Code" class="method-signature"
257
- onclick="popupCode('LSI.src/M000022.html');return false;">
265
+ <a href="LSI.src/M000029.html" target="Code" class="method-signature"
266
+ onclick="popupCode('LSI.src/M000029.html');return false;">
267
+ <span class="method-name">categories_for</span><span class="method-args">(item)</span>
268
+ </a>
269
+ </div>
270
+
271
+ <div class="method-description">
272
+ <p>
273
+ Returns the categories for a given indexed items. You are free to add and
274
+ remove items from this as you see fit. It does not invalide an index to
275
+ change its categories.
276
+ </p>
277
+ </div>
278
+ </div>
279
+
280
+ <div id="method-M000026" class="method-detail">
281
+ <a name="M000026"></a>
282
+
283
+ <div class="method-heading">
284
+ <a href="LSI.src/M000026.html" target="Code" class="method-signature"
285
+ onclick="popupCode('LSI.src/M000026.html');return false;">
286
+ <span class="method-name">categories_for</span><span class="method-args">(item)</span>
287
+ </a>
288
+ </div>
289
+
290
+ <div class="method-description">
291
+ <p>
292
+ Returns the categories for a given indexed items. You are free to add and
293
+ remove items from this as you see fit. It does not invalide an index to
294
+ change its categories.
295
+ </p>
296
+ </div>
297
+ </div>
298
+
299
+ <div id="method-M000036" class="method-detail">
300
+ <a name="M000036"></a>
301
+
302
+ <div class="method-heading">
303
+ <a href="LSI.src/M000036.html" target="Code" class="method-signature"
304
+ onclick="popupCode('LSI.src/M000036.html');return false;">
258
305
  <span class="method-name">classify</span><span class="method-args">( doc, cutoff=0.30, &amp;block )</span>
259
306
  </a>
260
307
  </div>
@@ -263,7 +310,7 @@ engine.
263
310
  <p>
264
311
  This function uses a voting system to categorize documents, based on the
265
312
  categories of other documents. It uses the same logic as the <a
266
- href="LSI.html#M000021">find_related</a> function to find related
313
+ href="LSI.html#M000035">find_related</a> function to find related
267
314
  documents, then returns the most obvious category from this list.
268
315
  </p>
269
316
  <p>
@@ -274,12 +321,12 @@ the document is in. This may not always make sense.
274
321
  </div>
275
322
  </div>
276
323
 
277
- <div id="method-M000021" class="method-detail">
278
- <a name="M000021"></a>
324
+ <div id="method-M000035" class="method-detail">
325
+ <a name="M000035"></a>
279
326
 
280
327
  <div class="method-heading">
281
- <a href="LSI.src/M000021.html" target="Code" class="method-signature"
282
- onclick="popupCode('LSI.src/M000021.html');return false;">
328
+ <a href="LSI.src/M000035.html" target="Code" class="method-signature"
329
+ onclick="popupCode('LSI.src/M000035.html');return false;">
283
330
  <span class="method-name">find_related</span><span class="method-args">( doc, max_nearest=3, &amp;block )</span>
284
331
  </a>
285
332
  </div>
@@ -301,12 +348,55 @@ each other in an essay.
301
348
  </div>
302
349
  </div>
303
350
 
304
- <div id="method-M000016" class="method-detail">
305
- <a name="M000016"></a>
351
+ <div id="method-M000037" class="method-detail">
352
+ <a name="M000037"></a>
353
+
354
+ <div class="method-heading">
355
+ <a href="LSI.src/M000037.html" target="Code" class="method-signature"
356
+ onclick="popupCode('LSI.src/M000037.html');return false;">
357
+ <span class="method-name">highest_ranked_stems</span><span class="method-args">( doc, count=3 )</span>
358
+ </a>
359
+ </div>
360
+
361
+ <div class="method-description">
362
+ <p>
363
+ Prototype, only works on indexed documents. I have no clue if this is going
364
+ to work, but in theory it&#8217;s supposed to.
365
+ </p>
366
+ </div>
367
+ </div>
368
+
369
+ <div id="method-M000031" class="method-detail">
370
+ <a name="M000031"></a>
371
+
372
+ <div class="method-heading">
373
+ <a href="LSI.src/M000031.html" target="Code" class="method-signature"
374
+ onclick="popupCode('LSI.src/M000031.html');return false;">
375
+ <span class="method-name">highest_relative_content</span><span class="method-args">( max_chunks=10 )</span>
376
+ </a>
377
+ </div>
378
+
379
+ <div class="method-description">
380
+ <p>
381
+ This method returns max_chunks entries, ordered by their average semantic
382
+ rating. Essentially, the average distance of each entry from all other
383
+ entries is calculated, the highest are returned.
384
+ </p>
385
+ <p>
386
+ This can be used to build a summary service, or to provide more information
387
+ about your dataset&#8217;s general content. For example, if you were to use
388
+ categorize on the results of this data, you could gather information on
389
+ what your dataset is generally about.
390
+ </p>
391
+ </div>
392
+ </div>
393
+
394
+ <div id="method-M000028" class="method-detail">
395
+ <a name="M000028"></a>
306
396
 
307
397
  <div class="method-heading">
308
- <a href="LSI.src/M000016.html" target="Code" class="method-signature"
309
- onclick="popupCode('LSI.src/M000016.html');return false;">
398
+ <a href="LSI.src/M000028.html" target="Code" class="method-signature"
399
+ onclick="popupCode('LSI.src/M000028.html');return false;">
310
400
  <span class="method-name">items</span><span class="method-args">()</span>
311
401
  </a>
312
402
  </div>
@@ -318,12 +408,12 @@ Returns an array of items that are indexed.
318
408
  </div>
319
409
  </div>
320
410
 
321
- <div id="method-M000012" class="method-detail">
322
- <a name="M000012"></a>
411
+ <div id="method-M000023" class="method-detail">
412
+ <a name="M000023"></a>
323
413
 
324
414
  <div class="method-heading">
325
- <a href="LSI.src/M000012.html" target="Code" class="method-signature"
326
- onclick="popupCode('LSI.src/M000012.html');return false;">
415
+ <a href="LSI.src/M000023.html" target="Code" class="method-signature"
416
+ onclick="popupCode('LSI.src/M000023.html');return false;">
327
417
  <span class="method-name">needs_rebuild?</span><span class="method-args">()</span>
328
418
  </a>
329
419
  </div>
@@ -337,12 +427,12 @@ classification and cluster detection.
337
427
  </div>
338
428
  </div>
339
429
 
340
- <div id="method-M000018" class="method-detail">
341
- <a name="M000018"></a>
430
+ <div id="method-M000032" class="method-detail">
431
+ <a name="M000032"></a>
342
432
 
343
433
  <div class="method-heading">
344
- <a href="LSI.src/M000018.html" target="Code" class="method-signature"
345
- onclick="popupCode('LSI.src/M000018.html');return false;">
434
+ <a href="LSI.src/M000032.html" target="Code" class="method-signature"
435
+ onclick="popupCode('LSI.src/M000032.html');return false;">
346
436
  <span class="method-name">proximity_array_for_content</span><span class="method-args">( doc, &amp;block )</span>
347
437
  </a>
348
438
  </div>
@@ -350,7 +440,7 @@ classification and cluster detection.
350
440
  <div class="method-description">
351
441
  <p>
352
442
  This function is the primitive that <a
353
- href="LSI.html#M000021">find_related</a> and classify build upon. It
443
+ href="LSI.html#M000035">find_related</a> and classify build upon. It
354
444
  returns an array of 2-element arrays. The first element of this array is a
355
445
  document, and the second is its &quot;score&quot;, defining how
356
446
  &quot;close&quot; it is to other indexed items.
@@ -363,25 +453,25 @@ meaningful between indexes.
363
453
  <p>
364
454
  The parameter doc is the content to compare. If that content is not
365
455
  indexed, you can pass an optional block to define how to create the text
366
- data. See <a href="LSI.html#M000013">add_item</a> for examples of how this
456
+ data. See <a href="LSI.html#M000024">add_item</a> for examples of how this
367
457
  works.
368
458
  </p>
369
459
  </div>
370
460
  </div>
371
461
 
372
- <div id="method-M000019" class="method-detail">
373
- <a name="M000019"></a>
462
+ <div id="method-M000033" class="method-detail">
463
+ <a name="M000033"></a>
374
464
 
375
465
  <div class="method-heading">
376
- <a href="LSI.src/M000019.html" target="Code" class="method-signature"
377
- onclick="popupCode('LSI.src/M000019.html');return false;">
466
+ <a href="LSI.src/M000033.html" target="Code" class="method-signature"
467
+ onclick="popupCode('LSI.src/M000033.html');return false;">
378
468
  <span class="method-name">proximity_norms_for_content</span><span class="method-args">( doc, &amp;block )</span>
379
469
  </a>
380
470
  </div>
381
471
 
382
472
  <div class="method-description">
383
473
  <p>
384
- Similar to <a href="LSI.html#M000018">proximity_array_for_content</a>, this
474
+ Similar to <a href="LSI.html#M000032">proximity_array_for_content</a>, this
385
475
  function takes similar arguments and returns a similar array. However, it
386
476
  uses the normalized calculated vectors instead of their full versions. This
387
477
  is useful when you&#8217;re trying to perform operations on content that is
@@ -391,12 +481,12 @@ primitive.
391
481
  </div>
392
482
  </div>
393
483
 
394
- <div id="method-M000015" class="method-detail">
395
- <a name="M000015"></a>
484
+ <div id="method-M000027" class="method-detail">
485
+ <a name="M000027"></a>
396
486
 
397
487
  <div class="method-heading">
398
- <a href="LSI.src/M000015.html" target="Code" class="method-signature"
399
- onclick="popupCode('LSI.src/M000015.html');return false;">
488
+ <a href="LSI.src/M000027.html" target="Code" class="method-signature"
489
+ onclick="popupCode('LSI.src/M000027.html');return false;">
400
490
  <span class="method-name">remove_item</span><span class="method-args">( item )</span>
401
491
  </a>
402
492
  </div>
@@ -408,12 +498,12 @@ Removes an item from the database, if it is indexed.
408
498
  </div>
409
499
  </div>
410
500
 
411
- <div id="method-M000020" class="method-detail">
412
- <a name="M000020"></a>
501
+ <div id="method-M000034" class="method-detail">
502
+ <a name="M000034"></a>
413
503
 
414
504
  <div class="method-heading">
415
- <a href="LSI.src/M000020.html" target="Code" class="method-signature"
416
- onclick="popupCode('LSI.src/M000020.html');return false;">
505
+ <a href="LSI.src/M000034.html" target="Code" class="method-signature"
506
+ onclick="popupCode('LSI.src/M000034.html');return false;">
417
507
  <span class="method-name">search</span><span class="method-args">( string, max_nearest=3 )</span>
418
508
  </a>
419
509
  </div>
@@ -421,7 +511,7 @@ Removes an item from the database, if it is indexed.
421
511
  <div class="method-description">
422
512
  <p>
423
513
  This function allows for text-based search of your index. Unlike other
424
- functions like <a href="LSI.html#M000021">find_related</a> and classify,
514
+ functions like <a href="LSI.html#M000035">find_related</a> and classify,
425
515
  search only takes short strings. It will also ignore factors like repeated
426
516
  words. It is best for short, google-like search terms. A search will first
427
517
  priortize lexical relationships, then semantic ones.