classifier 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (77) hide show
  1. data/LICENSE +341 -0
  2. data/README +59 -6
  3. data/Rakefile +16 -4
  4. data/bin/bayes.rb +8 -2
  5. data/doc/classes/Classifier.html +15 -10
  6. data/doc/classes/Classifier/Bayes.html +68 -38
  7. data/doc/classes/Classifier/Bayes.src/{M000005.html → M000023.html} +1 -1
  8. data/doc/classes/Classifier/Bayes.src/{M000006.html → M000024.html} +1 -1
  9. data/doc/classes/Classifier/Bayes.src/M000025.html +30 -0
  10. data/doc/classes/Classifier/Bayes.src/{M000007.html → M000026.html} +1 -1
  11. data/doc/classes/Classifier/Bayes.src/{M000008.html → M000027.html} +1 -1
  12. data/doc/classes/Classifier/Bayes.src/{M000009.html → M000028.html} +4 -4
  13. data/doc/classes/Classifier/Bayes.src/{M000010.html → M000029.html} +2 -2
  14. data/doc/classes/Classifier/ContentNode.html +252 -0
  15. data/doc/classes/Classifier/ContentNode.src/M000031.html +21 -0
  16. data/doc/classes/Classifier/ContentNode.src/M000032.html +18 -0
  17. data/doc/classes/Classifier/ContentNode.src/M000033.html +18 -0
  18. data/doc/classes/Classifier/ContentNode.src/M000034.html +41 -0
  19. data/doc/classes/Classifier/LSI.html +449 -0
  20. data/doc/classes/Classifier/LSI.src/M000011.html +20 -0
  21. data/doc/classes/Classifier/LSI.src/M000012.html +18 -0
  22. data/doc/classes/Classifier/LSI.src/M000013.html +20 -0
  23. data/doc/classes/Classifier/LSI.src/M000014.html +18 -0
  24. data/doc/classes/Classifier/LSI.src/M000015.html +21 -0
  25. data/doc/classes/Classifier/LSI.src/M000016.html +18 -0
  26. data/doc/classes/Classifier/LSI.src/M000017.html +32 -0
  27. data/doc/classes/Classifier/LSI.src/M000018.html +26 -0
  28. data/doc/classes/Classifier/LSI.src/M000019.html +26 -0
  29. data/doc/classes/Classifier/LSI.src/M000020.html +23 -0
  30. data/doc/classes/Classifier/LSI.src/M000021.html +21 -0
  31. data/doc/classes/Classifier/LSI.src/M000022.html +31 -0
  32. data/doc/classes/Classifier/WordList.html +202 -0
  33. data/doc/classes/Classifier/WordList.src/M000007.html +18 -0
  34. data/doc/classes/Classifier/WordList.src/M000008.html +19 -0
  35. data/doc/classes/Classifier/WordList.src/M000009.html +19 -0
  36. data/doc/classes/Classifier/WordList.src/M000010.html +18 -0
  37. data/doc/classes/GSL.html +111 -0
  38. data/doc/classes/GSL/Vector.html +156 -0
  39. data/doc/classes/GSL/Vector.src/M000005.html +18 -0
  40. data/doc/classes/GSL/Vector.src/M000006.html +19 -0
  41. data/doc/classes/Object.html +139 -0
  42. data/doc/classes/Object.src/M000001.html +16 -0
  43. data/doc/classes/String.html +95 -9
  44. data/doc/classes/{Classifier/WordHash.src/M000001.html → String.src/M000002.html} +3 -3
  45. data/doc/classes/String.src/M000003.html +18 -0
  46. data/doc/classes/String.src/M000004.html +18 -0
  47. data/doc/created.rid +1 -1
  48. data/doc/files/README.html +102 -12
  49. data/doc/files/lib/classifier/bayes_rb.html +1 -1
  50. data/doc/files/lib/classifier/{string_extensions/porter_stemmer_rb.html → extensions/vector_serialize_rb.html} +4 -15
  51. data/doc/files/lib/classifier/{string_extensions → extensions}/word_hash_rb.html +2 -2
  52. data/doc/files/lib/classifier/extensions/word_list_rb.html +115 -0
  53. data/doc/files/lib/classifier/lsi/content_node_rb.html +115 -0
  54. data/doc/files/lib/classifier/lsi_rb.html +125 -0
  55. data/doc/files/lib/classifier/string_extensions_rb.html +2 -3
  56. data/doc/files/lib/classifier_rb.html +3 -1
  57. data/doc/fr_class_index.html +6 -2
  58. data/doc/fr_file_index.html +5 -2
  59. data/doc/fr_method_index.html +34 -11
  60. data/lib/classifier.rb +3 -1
  61. data/lib/classifier/bayes.rb +34 -9
  62. data/lib/classifier/extensions/vector_serialize.rb +14 -0
  63. data/lib/classifier/extensions/word_hash.rb +125 -0
  64. data/lib/classifier/extensions/word_list.rb +31 -0
  65. data/lib/classifier/lsi.rb +248 -0
  66. data/lib/classifier/lsi/content_node.rb +67 -0
  67. data/lib/classifier/string_extensions.rb +10 -5
  68. data/test/bayes/bayesian_test.rb +2 -2
  69. data/test/lsi/lsi_test.rb +88 -0
  70. data/test/string_extensions/word_hash_test.rb +7 -5
  71. metadata +79 -24
  72. data/doc/classes/Classifier/Stemmable.html +0 -243
  73. data/doc/classes/Classifier/Stemmable.src/M000003.html +0 -102
  74. data/doc/classes/Classifier/WordHash.html +0 -178
  75. data/doc/classes/Classifier/WordHash.src/M000002.html +0 -28
  76. data/lib/classifier/string_extensions/porter_stemmer.rb +0 -199
  77. data/lib/classifier/string_extensions/word_hash.rb +0 -119
@@ -0,0 +1,20 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>new (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 26</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-identifier">options</span> = {})
15
+ <span class="ruby-ivar">@auto_rebuild</span> = <span class="ruby-keyword kw">true</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">options</span>[<span class="ruby-identifier">:auto_rebuild</span>] <span class="ruby-operator">==</span> <span class="ruby-keyword kw">false</span>
16
+ <span class="ruby-ivar">@word_list</span>, <span class="ruby-ivar">@items</span> = <span class="ruby-constant">WordList</span>.<span class="ruby-identifier">new</span>, {}
17
+ <span class="ruby-ivar">@version</span>, <span class="ruby-ivar">@built_at_version</span> = <span class="ruby-value">0</span>, <span class="ruby-value">-1</span>
18
+ <span class="ruby-keyword kw">end</span></pre>
19
+ </body>
20
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>needs_rebuild? (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 35</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">needs_rebuild?</span>
15
+ <span class="ruby-ivar">@version</span> <span class="ruby-operator">!=</span> <span class="ruby-ivar">@built_at_version</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,20 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>add_item (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 52</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_item</span>( <span class="ruby-identifier">item</span>, <span class="ruby-operator">*</span><span class="ruby-identifier">categories</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-ivar">@items</span>[<span class="ruby-identifier">item</span>] = <span class="ruby-constant">ContentNode</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">item</span>, <span class="ruby-identifier">categories</span>, <span class="ruby-identifier">block</span>)
16
+ <span class="ruby-ivar">@version</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
17
+ <span class="ruby-identifier">build_index</span> <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@auto_rebuild</span>
18
+ <span class="ruby-keyword kw">end</span></pre>
19
+ </body>
20
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>&lt;&lt; (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 62</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-operator">&lt;&lt;</span>( <span class="ruby-identifier">item</span> )
15
+ <span class="ruby-identifier">add_item</span> <span class="ruby-identifier">item</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>remove_item (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 68</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">remove_item</span>( <span class="ruby-identifier">item</span> )
15
+ <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">contain?</span> <span class="ruby-identifier">item</span>
16
+ <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">remove</span> <span class="ruby-identifier">item</span>
17
+ <span class="ruby-ivar">@version</span> <span class="ruby-operator">+=</span> <span class="ruby-value">1</span>
18
+ <span class="ruby-keyword kw">end</span>
19
+ <span class="ruby-keyword kw">end</span></pre>
20
+ </body>
21
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>items (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 76</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">items</span>
15
+ <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">keys</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,32 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>build_index (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 94</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">build_index</span>( <span class="ruby-identifier">cutoff</span>=<span class="ruby-value">0</span><span class="ruby-value">.75</span> )
15
+ <span class="ruby-keyword kw">return</span> <span class="ruby-keyword kw">unless</span> <span class="ruby-identifier">needs_rebuild?</span>
16
+ <span class="ruby-identifier">make_word_list</span>
17
+
18
+ <span class="ruby-identifier">doc_list</span> = <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">values</span>
19
+ <span class="ruby-identifier">tda</span> = <span class="ruby-identifier">doc_list</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">node</span><span class="ruby-operator">|</span> <span class="ruby-identifier">node</span>.<span class="ruby-identifier">raw_vector_with</span>( <span class="ruby-ivar">@word_list</span> ) }
20
+ <span class="ruby-identifier">tdm</span> = <span class="ruby-constant">GSL</span><span class="ruby-operator">::</span><span class="ruby-constant">Matrix</span>.<span class="ruby-identifier">new</span>( <span class="ruby-operator">*</span><span class="ruby-identifier">tda</span> ).<span class="ruby-identifier">trans</span>
21
+ <span class="ruby-identifier">ntdm</span> = <span class="ruby-identifier">build_reduced_matrix</span>(<span class="ruby-identifier">tdm</span>, <span class="ruby-identifier">cutoff</span>)
22
+
23
+ <span class="ruby-identifier">ntdm</span>.<span class="ruby-identifier">size</span>[<span class="ruby-value">1</span>].<span class="ruby-identifier">times</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">col</span><span class="ruby-operator">|</span>
24
+ <span class="ruby-identifier">vec</span> = <span class="ruby-constant">GSL</span><span class="ruby-operator">::</span><span class="ruby-constant">Vector</span>.<span class="ruby-identifier">new</span>( <span class="ruby-identifier">ntdm</span>.<span class="ruby-identifier">column</span>(<span class="ruby-identifier">col</span>) ).<span class="ruby-identifier">row</span>
25
+ <span class="ruby-identifier">doc_list</span>[<span class="ruby-identifier">col</span>].<span class="ruby-identifier">lsi_vector</span> = <span class="ruby-identifier">vec</span>
26
+ <span class="ruby-identifier">doc_list</span>[<span class="ruby-identifier">col</span>].<span class="ruby-identifier">lsi_norm</span> = <span class="ruby-identifier">vec</span>.<span class="ruby-identifier">normalize</span>
27
+ <span class="ruby-keyword kw">end</span>
28
+
29
+ <span class="ruby-ivar">@built_at_version</span> = <span class="ruby-ivar">@version</span>
30
+ <span class="ruby-keyword kw">end</span></pre>
31
+ </body>
32
+ </html>
@@ -0,0 +1,26 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>proximity_array_for_content (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 124</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">proximity_array_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-keyword kw">return</span> [] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">needs_rebuild?</span>
16
+
17
+ <span class="ruby-identifier">content_node</span> = <span class="ruby-identifier">node_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
18
+ <span class="ruby-identifier">result</span> =
19
+ <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">collect</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">item</span><span class="ruby-operator">|</span>
20
+ <span class="ruby-identifier">val</span> = <span class="ruby-identifier">content_node</span>.<span class="ruby-identifier">search_vector</span> <span class="ruby-operator">*</span> <span class="ruby-ivar">@items</span>[<span class="ruby-identifier">item</span>].<span class="ruby-identifier">search_vector</span>.<span class="ruby-identifier">col</span>
21
+ [<span class="ruby-identifier">item</span>, <span class="ruby-identifier">val</span>]
22
+ <span class="ruby-keyword kw">end</span>
23
+ <span class="ruby-identifier">result</span>.<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">reverse</span>
24
+ <span class="ruby-keyword kw">end</span></pre>
25
+ </body>
26
+ </html>
@@ -0,0 +1,26 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>proximity_norms_for_content (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 141</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">proximity_norms_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-keyword kw">return</span> [] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">needs_rebuild?</span>
16
+
17
+ <span class="ruby-identifier">content_node</span> = <span class="ruby-identifier">node_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
18
+ <span class="ruby-identifier">result</span> =
19
+ <span class="ruby-ivar">@items</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">collect</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">item</span><span class="ruby-operator">|</span>
20
+ <span class="ruby-identifier">val</span> = <span class="ruby-identifier">content_node</span>.<span class="ruby-identifier">search_norm</span> <span class="ruby-operator">*</span> <span class="ruby-ivar">@items</span>[<span class="ruby-identifier">item</span>].<span class="ruby-identifier">search_norm</span>.<span class="ruby-identifier">col</span>
21
+ [<span class="ruby-identifier">item</span>, <span class="ruby-identifier">val</span>]
22
+ <span class="ruby-keyword kw">end</span>
23
+ <span class="ruby-identifier">result</span>.<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>[<span class="ruby-value">1</span>] }.<span class="ruby-identifier">reverse</span>
24
+ <span class="ruby-keyword kw">end</span></pre>
25
+ </body>
26
+ </html>
@@ -0,0 +1,23 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>search (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 160</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">search</span>( <span class="ruby-identifier">string</span>, <span class="ruby-identifier">max_nearest</span>=<span class="ruby-value">3</span> )
15
+ <span class="ruby-keyword kw">return</span> [] <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">needs_rebuild?</span>
16
+
17
+ <span class="ruby-identifier">carry</span> =
18
+ <span class="ruby-identifier">proximity_norms_for_content</span>( <span class="ruby-identifier">string</span> )
19
+ <span class="ruby-identifier">result</span> = <span class="ruby-identifier">carry</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>[<span class="ruby-value">0</span>] }
20
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">result</span>[<span class="ruby-value">0</span><span class="ruby-operator">..</span><span class="ruby-identifier">max_nearest</span><span class="ruby-operator">-</span><span class="ruby-value">1</span>]
21
+ <span class="ruby-keyword kw">end</span></pre>
22
+ </body>
23
+ </html>
@@ -0,0 +1,21 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>find_related (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 178</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">find_related</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-identifier">max_nearest</span>=<span class="ruby-value">3</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-identifier">carry</span> =
16
+ <span class="ruby-identifier">proximity_array_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> ).<span class="ruby-identifier">reject</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">pair</span><span class="ruby-operator">|</span> <span class="ruby-identifier">pair</span>[<span class="ruby-value">0</span>] <span class="ruby-operator">==</span> <span class="ruby-identifier">doc</span> }
17
+ <span class="ruby-identifier">result</span> = <span class="ruby-identifier">carry</span>.<span class="ruby-identifier">collect</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">x</span>[<span class="ruby-value">0</span>] }
18
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">result</span>[<span class="ruby-value">0</span><span class="ruby-operator">..</span><span class="ruby-identifier">max_nearest</span><span class="ruby-operator">-</span><span class="ruby-value">1</span>]
19
+ <span class="ruby-keyword kw">end</span></pre>
20
+ </body>
21
+ </html>
@@ -0,0 +1,31 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>classify (Classifier::LSI)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/lsi.rb, line 194</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-identifier">cutoff</span>=<span class="ruby-value">0</span><span class="ruby-value">.30</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
15
+ <span class="ruby-identifier">icutoff</span> = (<span class="ruby-ivar">@items</span>.<span class="ruby-identifier">size</span> <span class="ruby-operator">*</span> <span class="ruby-identifier">cutoff</span>).<span class="ruby-identifier">round</span>
16
+ <span class="ruby-identifier">carry</span> = <span class="ruby-identifier">proximity_array_for_content</span>( <span class="ruby-identifier">doc</span>, <span class="ruby-operator">&amp;</span><span class="ruby-identifier">block</span> )
17
+ <span class="ruby-identifier">carry</span> = <span class="ruby-identifier">carry</span>[<span class="ruby-value">0</span><span class="ruby-operator">..</span><span class="ruby-identifier">icutoff</span><span class="ruby-operator">-</span><span class="ruby-value">1</span>]
18
+ <span class="ruby-identifier">votes</span> = {}
19
+ <span class="ruby-identifier">carry</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">pair</span><span class="ruby-operator">|</span>
20
+ <span class="ruby-identifier">categories</span> = <span class="ruby-ivar">@items</span>[<span class="ruby-identifier">pair</span>[<span class="ruby-value">0</span>]].<span class="ruby-identifier">categories</span>
21
+ <span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span>
22
+ <span class="ruby-identifier">votes</span>[<span class="ruby-identifier">category</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span><span class="ruby-value">.0</span>
23
+ <span class="ruby-identifier">votes</span>[<span class="ruby-identifier">category</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">pair</span>[<span class="ruby-value">1</span>]
24
+ <span class="ruby-keyword kw">end</span>
25
+ <span class="ruby-keyword kw">end</span>
26
+
27
+ <span class="ruby-identifier">ranking</span> = <span class="ruby-identifier">votes</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">x</span><span class="ruby-operator">|</span> <span class="ruby-identifier">votes</span>[<span class="ruby-identifier">x</span>] }
28
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">ranking</span>[<span class="ruby-value">-1</span>]
29
+ <span class="ruby-keyword kw">end</span></pre>
30
+ </body>
31
+ </html>
@@ -0,0 +1,202 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Class: Classifier::WordList</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Class</strong></td>
53
+ <td class="class-name-in-header">Classifier::WordList</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/classifier/extensions/word_list_rb.html">
59
+ lib/classifier/extensions/word_list.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ <tr class="top-aligned-row">
66
+ <td><strong>Parent:</strong></td>
67
+ <td>
68
+ <a href="../Object.html">
69
+ Object
70
+ </a>
71
+ </td>
72
+ </tr>
73
+ </table>
74
+ </div>
75
+ <!-- banner header -->
76
+
77
+ <div id="bodyContent">
78
+
79
+
80
+
81
+ <div id="contextContent">
82
+
83
+ <div id="description">
84
+ <p>
85
+ This class keeps a word =&gt; index mapping. It is used to map stemmed
86
+ words to dimensions of a vector.
87
+ </p>
88
+
89
+ </div>
90
+
91
+
92
+ </div>
93
+
94
+ <div id="method-list">
95
+ <h3 class="section-bar">Methods</h3>
96
+
97
+ <div class="name-list">
98
+ <a href="#M000009">[]</a>&nbsp;&nbsp;
99
+ <a href="#M000008">add_word</a>&nbsp;&nbsp;
100
+ <a href="#M000007">new</a>&nbsp;&nbsp;
101
+ <a href="#M000010">size</a>&nbsp;&nbsp;
102
+ </div>
103
+ </div>
104
+
105
+ </div>
106
+
107
+
108
+ <!-- if includes -->
109
+
110
+ <div id="section">
111
+
112
+
113
+
114
+
115
+
116
+
117
+
118
+
119
+ <!-- if method_list -->
120
+ <div id="methods">
121
+ <h3 class="section-bar">Public Class methods</h3>
122
+
123
+ <div id="method-M000007" class="method-detail">
124
+ <a name="M000007"></a>
125
+
126
+ <div class="method-heading">
127
+ <a href="WordList.src/M000007.html" target="Code" class="method-signature"
128
+ onclick="popupCode('WordList.src/M000007.html');return false;">
129
+ <span class="method-name">new</span><span class="method-args">()</span>
130
+ </a>
131
+ </div>
132
+
133
+ <div class="method-description">
134
+ </div>
135
+ </div>
136
+
137
+ <h3 class="section-bar">Public Instance methods</h3>
138
+
139
+ <div id="method-M000009" class="method-detail">
140
+ <a name="M000009"></a>
141
+
142
+ <div class="method-heading">
143
+ <a href="WordList.src/M000009.html" target="Code" class="method-signature"
144
+ onclick="popupCode('WordList.src/M000009.html');return false;">
145
+ <span class="method-name">[]</span><span class="method-args">(lookup)</span>
146
+ </a>
147
+ </div>
148
+
149
+ <div class="method-description">
150
+ <p>
151
+ Returns the dimension of the word or nil if the word is not in the space.
152
+ </p>
153
+ </div>
154
+ </div>
155
+
156
+ <div id="method-M000008" class="method-detail">
157
+ <a name="M000008"></a>
158
+
159
+ <div class="method-heading">
160
+ <a href="WordList.src/M000008.html" target="Code" class="method-signature"
161
+ onclick="popupCode('WordList.src/M000008.html');return false;">
162
+ <span class="method-name">add_word</span><span class="method-args">(word)</span>
163
+ </a>
164
+ </div>
165
+
166
+ <div class="method-description">
167
+ <p>
168
+ Adds a word (if it is new) and assigns it a unique dimension.
169
+ </p>
170
+ </div>
171
+ </div>
172
+
173
+ <div id="method-M000010" class="method-detail">
174
+ <a name="M000010"></a>
175
+
176
+ <div class="method-heading">
177
+ <a href="WordList.src/M000010.html" target="Code" class="method-signature"
178
+ onclick="popupCode('WordList.src/M000010.html');return false;">
179
+ <span class="method-name">size</span><span class="method-args">()</span>
180
+ </a>
181
+ </div>
182
+
183
+ <div class="method-description">
184
+ <p>
185
+ Returns the number of words mapped.
186
+ </p>
187
+ </div>
188
+ </div>
189
+
190
+
191
+ </div>
192
+
193
+
194
+ </div>
195
+
196
+
197
+ <div id="validator-badges">
198
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
199
+ </div>
200
+
201
+ </body>
202
+ </html>