classifier 1.1.1 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (77) hide show
  1. data/LICENSE +341 -0
  2. data/README +59 -6
  3. data/Rakefile +16 -4
  4. data/bin/bayes.rb +8 -2
  5. data/doc/classes/Classifier.html +15 -10
  6. data/doc/classes/Classifier/Bayes.html +68 -38
  7. data/doc/classes/Classifier/Bayes.src/{M000005.html → M000023.html} +1 -1
  8. data/doc/classes/Classifier/Bayes.src/{M000006.html → M000024.html} +1 -1
  9. data/doc/classes/Classifier/Bayes.src/M000025.html +30 -0
  10. data/doc/classes/Classifier/Bayes.src/{M000007.html → M000026.html} +1 -1
  11. data/doc/classes/Classifier/Bayes.src/{M000008.html → M000027.html} +1 -1
  12. data/doc/classes/Classifier/Bayes.src/{M000009.html → M000028.html} +4 -4
  13. data/doc/classes/Classifier/Bayes.src/{M000010.html → M000029.html} +2 -2
  14. data/doc/classes/Classifier/ContentNode.html +252 -0
  15. data/doc/classes/Classifier/ContentNode.src/M000031.html +21 -0
  16. data/doc/classes/Classifier/ContentNode.src/M000032.html +18 -0
  17. data/doc/classes/Classifier/ContentNode.src/M000033.html +18 -0
  18. data/doc/classes/Classifier/ContentNode.src/M000034.html +41 -0
  19. data/doc/classes/Classifier/LSI.html +449 -0
  20. data/doc/classes/Classifier/LSI.src/M000011.html +20 -0
  21. data/doc/classes/Classifier/LSI.src/M000012.html +18 -0
  22. data/doc/classes/Classifier/LSI.src/M000013.html +20 -0
  23. data/doc/classes/Classifier/LSI.src/M000014.html +18 -0
  24. data/doc/classes/Classifier/LSI.src/M000015.html +21 -0
  25. data/doc/classes/Classifier/LSI.src/M000016.html +18 -0
  26. data/doc/classes/Classifier/LSI.src/M000017.html +32 -0
  27. data/doc/classes/Classifier/LSI.src/M000018.html +26 -0
  28. data/doc/classes/Classifier/LSI.src/M000019.html +26 -0
  29. data/doc/classes/Classifier/LSI.src/M000020.html +23 -0
  30. data/doc/classes/Classifier/LSI.src/M000021.html +21 -0
  31. data/doc/classes/Classifier/LSI.src/M000022.html +31 -0
  32. data/doc/classes/Classifier/WordList.html +202 -0
  33. data/doc/classes/Classifier/WordList.src/M000007.html +18 -0
  34. data/doc/classes/Classifier/WordList.src/M000008.html +19 -0
  35. data/doc/classes/Classifier/WordList.src/M000009.html +19 -0
  36. data/doc/classes/Classifier/WordList.src/M000010.html +18 -0
  37. data/doc/classes/GSL.html +111 -0
  38. data/doc/classes/GSL/Vector.html +156 -0
  39. data/doc/classes/GSL/Vector.src/M000005.html +18 -0
  40. data/doc/classes/GSL/Vector.src/M000006.html +19 -0
  41. data/doc/classes/Object.html +139 -0
  42. data/doc/classes/Object.src/M000001.html +16 -0
  43. data/doc/classes/String.html +95 -9
  44. data/doc/classes/{Classifier/WordHash.src/M000001.html → String.src/M000002.html} +3 -3
  45. data/doc/classes/String.src/M000003.html +18 -0
  46. data/doc/classes/String.src/M000004.html +18 -0
  47. data/doc/created.rid +1 -1
  48. data/doc/files/README.html +102 -12
  49. data/doc/files/lib/classifier/bayes_rb.html +1 -1
  50. data/doc/files/lib/classifier/{string_extensions/porter_stemmer_rb.html → extensions/vector_serialize_rb.html} +4 -15
  51. data/doc/files/lib/classifier/{string_extensions → extensions}/word_hash_rb.html +2 -2
  52. data/doc/files/lib/classifier/extensions/word_list_rb.html +115 -0
  53. data/doc/files/lib/classifier/lsi/content_node_rb.html +115 -0
  54. data/doc/files/lib/classifier/lsi_rb.html +125 -0
  55. data/doc/files/lib/classifier/string_extensions_rb.html +2 -3
  56. data/doc/files/lib/classifier_rb.html +3 -1
  57. data/doc/fr_class_index.html +6 -2
  58. data/doc/fr_file_index.html +5 -2
  59. data/doc/fr_method_index.html +34 -11
  60. data/lib/classifier.rb +3 -1
  61. data/lib/classifier/bayes.rb +34 -9
  62. data/lib/classifier/extensions/vector_serialize.rb +14 -0
  63. data/lib/classifier/extensions/word_hash.rb +125 -0
  64. data/lib/classifier/extensions/word_list.rb +31 -0
  65. data/lib/classifier/lsi.rb +248 -0
  66. data/lib/classifier/lsi/content_node.rb +67 -0
  67. data/lib/classifier/string_extensions.rb +10 -5
  68. data/test/bayes/bayesian_test.rb +2 -2
  69. data/test/lsi/lsi_test.rb +88 -0
  70. data/test/string_extensions/word_hash_test.rb +7 -5
  71. metadata +79 -24
  72. data/doc/classes/Classifier/Stemmable.html +0 -243
  73. data/doc/classes/Classifier/Stemmable.src/M000003.html +0 -102
  74. data/doc/classes/Classifier/WordHash.html +0 -178
  75. data/doc/classes/Classifier/WordHash.src/M000002.html +0 -28
  76. data/lib/classifier/string_extensions/porter_stemmer.rb +0 -199
  77. data/lib/classifier/string_extensions/word_hash.rb +0 -119
@@ -0,0 +1,20 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>new (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 26</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">options</span> = {})
15
+ <span class="ruby-ivar">@auto_rebuild</span> = <span class="ruby-keyword kw">true</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">options</span>[<span class="ruby-identifier">:auto_rebuild</span>] <span class="ruby-operator">==</span> <span class="ruby-keyword kw">false</span>
16
+ <span class="ruby-ivar">@word_list</span>, <span class="ruby-ivar">@items</span> = <span class="ruby-constant">WordList</span>.<span class="ruby-identifier">new</span>, {}
17
+ <span class="ruby-ivar">@version</span>, <span class="ruby-ivar">@built_at_version</span> = <span class="ruby-value">0</span>, <span class="ruby-value">-1</span>
18
+ <span class="ruby-keyword kw">end</span></pre>
19
+ </body>
20
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>needs_rebuild? (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 35</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">needs_rebuild?</span>
15
+ <span class="ruby-ivar">@version</span> <span class="ruby-operator">!=</span> <span class="ruby-ivar">@built_at_version</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,20 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>add_item (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 52</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_item</span>( <span class="ruby-identifier">item</span>, <span class="ruby-operator">*</span><span class="ruby-identifier">categories</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-ivar">@items</span>[<span class="ruby-identifier">item</span>] = <span class="ruby-constant">ContentNode</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">item</span>, <span class="ruby-identifier">categories</span>, <span class="ruby-identifier">block</span>)
16
+ <span class="ruby-ivar">@version</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
17
+ <span class="ruby-identifier">build_index</span> <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@auto_rebuild</span>
18
+ <span class="ruby-keyword kw">end</span></pre>
19
+ </body>
20
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>&lt;&lt; (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 62</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-operator">&lt;&lt;</span>( <span class="ruby-identifier">item</span> )
15
+ <span class="ruby-identifier">add_item</span> <span class="ruby-identifier">item</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>remove_item (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 68</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">remove_item</span>( <span class="ruby-identifier">item</span> )
15
+ <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">contain?</span> <span class="ruby-identifier">item</span>
16
+ <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">remove</span> <span class="ruby-identifier">item</span>
17
+ <span class="ruby-ivar">@version</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
18
+ <span class="ruby-keyword kw">end</span>
19
+ <span class="ruby-keyword kw">end</span></pre>
20
+ </body>
21
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>items (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 76</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">items</span>
15
+ <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">keys</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,32 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>build_index (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 94</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">build_index</span>( <span class="ruby-identifier">cutoff</span>=<span class="ruby-value">0</span><span class="ruby-value">.75</span> )
15
+ <span class="ruby-keyword kw">return</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">needs_rebuild?</span>
16
+ <span class="ruby-identifier">make_word_list</span>
17
+
18
+ <span class="ruby-identifier">doc_list</span> = <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">values</span>
19
+ <span class="ruby-identifier">tda</span> = <span class="ruby-identifier">doc_list</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">node</span><span class="ruby-operator">|</span> <span class="ruby-identifier">node</span>.<span class="ruby-identifier">raw_vector_with</span>( <span class="ruby-ivar">@word_list</span> ) }
20
+ <span class="ruby-identifier">tdm</span> = <span class="ruby-constant">GSL</span><span class="ruby-operator">::</span><span class="ruby-constant">Matrix</span>.<span class="ruby-identifier">new</span>( <span class="ruby-operator">*</span><span class="ruby-identifier">tda</span> ).<span class="ruby-identifier">trans</span>
21
+ <span class="ruby-identifier">ntdm</span> = <span class="ruby-identifier">build_reduced_matrix</span>(<span class="ruby-identifier">tdm</span>, <span class="ruby-identifier">cutoff</span>)
22
+
23
+ <span class="ruby-identifier">ntdm</span>.<span class="ruby-identifier">size</span>[<span class="ruby-value">1</span>].<span class="ruby-identifier">times</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">col</span><span class="ruby-operator">|</span>
24
+ <span class="ruby-identifier">vec</span> = <span class="ruby-constant">GSL</span><span class="ruby-operator">::</span><span class="ruby-constant">Vector</span>.<span class="ruby-identifier">new</span>( <span class="ruby-identifier">ntdm</span>.<span class="ruby-identifier">column</span>(<span class="ruby-identifier">col</span>) ).<span class="ruby-identifier">row</span>
25
+ <span class="ruby-identifier">doc_list</span>[<span class="ruby-identifier">col</span>].<span class="ruby-identifier">lsi_vector</span> = <span class="ruby-identifier">vec</span>
26
+ <span class="ruby-identifier">doc_list</span>[<span class="ruby-identifier">col</span>].<span class="ruby-identifier">lsi_norm</span> = <span class="ruby-identifier">vec</span>.<span class="ruby-identifier">normalize</span>
27
+ <span class="ruby-keyword kw">end</span>
28
+
29
+ <span class="ruby-ivar">@built_at_version</span> = <span class="ruby-ivar">@version</span>
30
+ <span class="ruby-keyword kw">end</span></pre>
31
+ </body>
32
+ </html>
@@ -0,0 +1,26 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>proximity_array_for_content (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 124</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">proximity_array_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-keyword kw">return</span> [] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">needs_rebuild?</span>
16
+
17
+ <span class="ruby-identifier">content_node</span> = <span class="ruby-identifier">node_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
18
+ <span class="ruby-identifier">result</span> =
19
+ <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">collect</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">item</span><span class="ruby-operator">|</span>
20
+ <span class="ruby-identifier">val</span> = <span class="ruby-identifier">content_node</span>.<span class="ruby-identifier">search_vector</span> <span class="ruby-operator">*</span> <span class="ruby-ivar">@items</span>[<span class="ruby-identifier">item</span>].<span class="ruby-identifier">search_vector</span>.<span class="ruby-identifier">col</span>
21
+ [<span class="ruby-identifier">item</span>, <span class="ruby-identifier">val</span>]
22
+ <span class="ruby-keyword kw">end</span>
23
+ <span class="ruby-identifier">result</span>.<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">reverse</span>
24
+ <span class="ruby-keyword kw">end</span></pre>
25
+ </body>
26
+ </html>
@@ -0,0 +1,26 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>proximity_norms_for_content (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 141</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">proximity_norms_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-keyword kw">return</span> [] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">needs_rebuild?</span>
16
+
17
+ <span class="ruby-identifier">content_node</span> = <span class="ruby-identifier">node_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
18
+ <span class="ruby-identifier">result</span> =
19
+ <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">collect</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">item</span><span class="ruby-operator">|</span>
20
+ <span class="ruby-identifier">val</span> = <span class="ruby-identifier">content_node</span>.<span class="ruby-identifier">search_norm</span> <span class="ruby-operator">*</span> <span class="ruby-ivar">@items</span>[<span class="ruby-identifier">item</span>].<span class="ruby-identifier">search_norm</span>.<span class="ruby-identifier">col</span>
21
+ [<span class="ruby-identifier">item</span>, <span class="ruby-identifier">val</span>]
22
+ <span class="ruby-keyword kw">end</span>
23
+ <span class="ruby-identifier">result</span>.<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">reverse</span>
24
+ <span class="ruby-keyword kw">end</span></pre>
25
+ </body>
26
+ </html>
@@ -0,0 +1,23 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>search (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 160</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">search</span>( <span class="ruby-identifier">string</span>, <span class="ruby-identifier">max_nearest</span>=<span class="ruby-value">3</span> )
15
+ <span class="ruby-keyword kw">return</span> [] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">needs_rebuild?</span>
16
+
17
+ <span class="ruby-identifier">carry</span> =
18
+ <span class="ruby-identifier">proximity_norms_for_content</span>( <span class="ruby-identifier">string</span> )
19
+ <span class="ruby-identifier">result</span> = <span class="ruby-identifier">carry</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>[<span class="ruby-value">0</span>] }
20
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">result</span>[<span class="ruby-value">0</span><span class="ruby-operator">..</span><span class="ruby-identifier">max_nearest</span><span class="ruby-operator">-</span><span class="ruby-value">1</span>]
21
+ <span class="ruby-keyword kw">end</span></pre>
22
+ </body>
23
+ </html>
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>find_related (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 178</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">find_related</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-identifier">max_nearest</span>=<span class="ruby-value">3</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-identifier">carry</span> =
16
+ <span class="ruby-identifier">proximity_array_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> ).<span class="ruby-identifier">reject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">pair</span><span class="ruby-operator">|</span> <span class="ruby-identifier">pair</span>[<span class="ruby-value">0</span>] <span class="ruby-operator">==</span> <span class="ruby-identifier">doc</span> }
17
+ <span class="ruby-identifier">result</span> = <span class="ruby-identifier">carry</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>[<span class="ruby-value">0</span>] }
18
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">result</span>[<span class="ruby-value">0</span><span class="ruby-operator">..</span><span class="ruby-identifier">max_nearest</span><span class="ruby-operator">-</span><span class="ruby-value">1</span>]
19
+ <span class="ruby-keyword kw">end</span></pre>
20
+ </body>
21
+ </html>
@@ -0,0 +1,31 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>classify (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 194</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-identifier">cutoff</span>=<span class="ruby-value">0</span><span class="ruby-value">.30</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-identifier">icutoff</span> = (<span class="ruby-ivar">@items</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">*</span> <span class="ruby-identifier">cutoff</span>).<span class="ruby-identifier">round</span>
16
+ <span class="ruby-identifier">carry</span> = <span class="ruby-identifier">proximity_array_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
17
+ <span class="ruby-identifier">carry</span> = <span class="ruby-identifier">carry</span>[<span class="ruby-value">0</span><span class="ruby-operator">..</span><span class="ruby-identifier">icutoff</span><span class="ruby-operator">-</span><span class="ruby-value">1</span>]
18
+ <span class="ruby-identifier">votes</span> = {}
19
+ <span class="ruby-identifier">carry</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">pair</span><span class="ruby-operator">|</span>
20
+ <span class="ruby-identifier">categories</span> = <span class="ruby-ivar">@items</span>[<span class="ruby-identifier">pair</span>[<span class="ruby-value">0</span>]].<span class="ruby-identifier">categories</span>
21
+ <span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span>
22
+ <span class="ruby-identifier">votes</span>[<span class="ruby-identifier">category</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span><span class="ruby-value">.0</span>
23
+ <span class="ruby-identifier">votes</span>[<span class="ruby-identifier">category</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">pair</span>[<span class="ruby-value">1</span>]
24
+ <span class="ruby-keyword kw">end</span>
25
+ <span class="ruby-keyword kw">end</span>
26
+
27
+ <span class="ruby-identifier">ranking</span> = <span class="ruby-identifier">votes</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">votes</span>[<span class="ruby-identifier">x</span>] }
28
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">ranking</span>[<span class="ruby-value">-1</span>]
29
+ <span class="ruby-keyword kw">end</span></pre>
30
+ </body>
31
+ </html>
@@ -0,0 +1,202 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Classifier::WordList</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Classifier::WordList</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/classifier/extensions/word_list_rb.html">
59
+ lib/classifier/extensions/word_list.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ <a href="../Object.html">
69
+ Object
70
+ </a>
71
+ </td>
72
+ </tr>
73
+ </table>
74
+ </div>
75
+ <!-- banner header -->
76
+
77
+ <div id="bodyContent">
78
+
79
+
80
+
81
+ <div id="contextContent">
82
+
83
+ <div id="description">
84
+ <p>
85
+ This class keeps a word =&gt; index mapping. It is used to map stemmed
86
+ words to dimensions of a vector.
87
+ </p>
88
+
89
+ </div>
90
+
91
+
92
+ </div>
93
+
94
+ <div id="method-list">
95
+ <h3 class="section-bar">Methods</h3>
96
+
97
+ <div class="name-list">
98
+ <a href="#M000009">[]</a>&nbsp;&nbsp;
99
+ <a href="#M000008">add_word</a>&nbsp;&nbsp;
100
+ <a href="#M000007">new</a>&nbsp;&nbsp;
101
+ <a href="#M000010">size</a>&nbsp;&nbsp;
102
+ </div>
103
+ </div>
104
+
105
+ </div>
106
+
107
+
108
+ <!-- if includes -->
109
+
110
+ <div id="section">
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+ <!-- if method_list -->
120
+ <div id="methods">
121
+ <h3 class="section-bar">Public Class methods</h3>
122
+
123
+ <div id="method-M000007" class="method-detail">
124
+ <a name="M000007"></a>
125
+
126
+ <div class="method-heading">
127
+ <a href="WordList.src/M000007.html" target="Code" class="method-signature"
128
+ onclick="popupCode('WordList.src/M000007.html');return false;">
129
+ <span class="method-name">new</span><span class="method-args">()</span>
130
+ </a>
131
+ </div>
132
+
133
+ <div class="method-description">
134
+ </div>
135
+ </div>
136
+
137
+ <h3 class="section-bar">Public Instance methods</h3>
138
+
139
+ <div id="method-M000009" class="method-detail">
140
+ <a name="M000009"></a>
141
+
142
+ <div class="method-heading">
143
+ <a href="WordList.src/M000009.html" target="Code" class="method-signature"
144
+ onclick="popupCode('WordList.src/M000009.html');return false;">
145
+ <span class="method-name">[]</span><span class="method-args">(lookup)</span>
146
+ </a>
147
+ </div>
148
+
149
+ <div class="method-description">
150
+ <p>
151
+ Returns the dimension of the word or nil if the word is not in the space.
152
+ </p>
153
+ </div>
154
+ </div>
155
+
156
+ <div id="method-M000008" class="method-detail">
157
+ <a name="M000008"></a>
158
+
159
+ <div class="method-heading">
160
+ <a href="WordList.src/M000008.html" target="Code" class="method-signature"
161
+ onclick="popupCode('WordList.src/M000008.html');return false;">
162
+ <span class="method-name">add_word</span><span class="method-args">(word)</span>
163
+ </a>
164
+ </div>
165
+
166
+ <div class="method-description">
167
+ <p>
168
+ Adds a word (if it is new) and assigns it a unique dimension.
169
+ </p>
170
+ </div>
171
+ </div>
172
+
173
+ <div id="method-M000010" class="method-detail">
174
+ <a name="M000010"></a>
175
+
176
+ <div class="method-heading">
177
+ <a href="WordList.src/M000010.html" target="Code" class="method-signature"
178
+ onclick="popupCode('WordList.src/M000010.html');return false;">
179
+ <span class="method-name">size</span><span class="method-args">()</span>
180
+ </a>
181
+ </div>
182
+
183
+ <div class="method-description">
184
+ <p>
185
+ Returns the number of words mapped.
186
+ </p>
187
+ </div>
188
+ </div>
189
+
190
+
191
+ </div>
192
+
193
+
194
+ </div>
195
+
196
+
197
+ <div id="validator-badges">
198
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
199
+ </div>
200
+
201
+ </body>
202
+ </html>