classifier 1.1 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. data/README +2 -2
  2. data/Rakefile +2 -4
  3. data/doc/classes/Classifier.html +135 -0
  4. data/doc/classes/Classifier/Bayes.html +287 -0
  5. data/doc/classes/Classifier/Bayes.src/M000005.html +20 -0
  6. data/doc/classes/Classifier/Bayes.src/M000006.html +23 -0
  7. data/doc/classes/Classifier/Bayes.src/M000007.html +27 -0
  8. data/doc/classes/Classifier/Bayes.src/M000008.html +18 -0
  9. data/doc/classes/Classifier/Bayes.src/M000009.html +25 -0
  10. data/doc/classes/Classifier/Bayes.src/M000010.html +18 -0
  11. data/doc/classes/Classifier/Stemmable.html +243 -0
  12. data/doc/classes/Classifier/Stemmable.src/M000003.html +102 -0
  13. data/doc/classes/Classifier/WordHash.html +178 -0
  14. data/doc/classes/Classifier/WordHash.src/M000001.html +18 -0
  15. data/doc/classes/Classifier/WordHash.src/M000002.html +28 -0
  16. data/doc/classes/String.html +119 -0
  17. data/doc/created.rid +1 -0
  18. data/doc/files/README.html +156 -0
  19. data/doc/files/lib/classifier/bayes_rb.html +115 -0
  20. data/doc/files/lib/classifier/string_extensions/porter_stemmer_rb.html +112 -0
  21. data/doc/files/lib/classifier/string_extensions/word_hash_rb.html +115 -0
  22. data/doc/files/lib/classifier/string_extensions_rb.html +123 -0
  23. data/doc/files/lib/classifier_rb.html +123 -0
  24. data/doc/fr_class_index.html +31 -0
  25. data/doc/fr_file_index.html +32 -0
  26. data/doc/fr_method_index.html +37 -0
  27. data/doc/index.html +24 -0
  28. data/doc/rdoc-style.css +208 -0
  29. data/lib/classifier/bayes.rb +63 -12
  30. data/lib/classifier/string_extensions/porter_stemmer.rb +18 -15
  31. data/lib/classifier/string_extensions/word_hash.rb +96 -3
  32. data/test/bayes/bayesian_test.rb +13 -0
  33. data/test/string_extensions/word_hash_test.rb +7 -3
  34. metadata +36 -1
@@ -0,0 +1,20 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>new (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 11</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">categories</span>)
15
+ <span class="ruby-ivar">@categories</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
+ <span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> }
17
+ <span class="ruby-ivar">@total_words</span> = <span class="ruby-value">0</span>
18
+ <span class="ruby-keyword kw">end</span></pre>
19
+ </body>
20
+ </html>
@@ -0,0 +1,23 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>train (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 24</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">train</span>(<span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>)
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
16
+ <span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
17
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
18
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
19
+ <span class="ruby-ivar">@total_words</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
20
+ <span class="ruby-keyword kw">end</span>
21
+ <span class="ruby-keyword kw">end</span></pre>
22
+ </body>
23
+ </html>
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>classifications (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 38</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
15
+ <span class="ruby-identifier">score</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
+ <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">category</span>, <span class="ruby-identifier">category_words</span><span class="ruby-operator">|</span>
17
+ <span class="ruby-identifier">score</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>] = <span class="ruby-value">0</span>
18
+ <span class="ruby-identifier">total</span> = <span class="ruby-identifier">category_words</span>.<span class="ruby-identifier">values</span>.<span class="ruby-identifier">inject</span>(<span class="ruby-value">0</span>) {<span class="ruby-operator">|</span><span class="ruby-identifier">sum</span>, <span class="ruby-identifier">element</span><span class="ruby-operator">|</span> <span class="ruby-identifier">sum</span><span class="ruby-operator">+</span><span class="ruby-identifier">element</span>}
19
+ <span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
20
+ <span class="ruby-identifier">s</span> = <span class="ruby-identifier">category_words</span>.<span class="ruby-identifier">has_key?</span>(<span class="ruby-identifier">word</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">category_words</span>[<span class="ruby-identifier">word</span>] <span class="ruby-operator">:</span> <span class="ruby-value">0</span><span class="ruby-value">.1</span>
21
+ <span class="ruby-identifier">score</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>] <span class="ruby-operator">+=</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>(<span class="ruby-identifier">s</span><span class="ruby-operator">/</span><span class="ruby-identifier">total</span>.<span class="ruby-identifier">to_f</span>)
22
+ <span class="ruby-keyword kw">end</span>
23
+ <span class="ruby-keyword kw">end</span>
24
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">score</span>
25
+ <span class="ruby-keyword kw">end</span></pre>
26
+ </body>
27
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>classify (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 56</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
15
+ (<span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">a</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">a</span>[<span class="ruby-value">1</span>] })[<span class="ruby-value">0</span>][<span class="ruby-value">0</span>]
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,25 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>method_missing (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 67</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">method_missing</span>(<span class="ruby-identifier">name</span>, <span class="ruby-operator">*</span><span class="ruby-identifier">args</span>)
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/train_([\w]+)/</span>, <span class="ruby-value str">'\1'</span>).<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
16
+ <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">category</span>
17
+ <span class="ruby-identifier">args</span>.<span class="ruby-identifier">each</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">text</span><span class="ruby-operator">|</span> <span class="ruby-identifier">train</span> <span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>}
18
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/train_([\w]+)/</span>
19
+ <span class="ruby-identifier">raise</span> <span class="ruby-constant">StandardError</span>, <span class="ruby-node">&quot;No such category: #{category}&quot;</span>
20
+ <span class="ruby-keyword kw">else</span>
21
+ <span class="ruby-keyword kw">super</span> <span class="ruby-comment cmt">#raise StandardError, &quot;No such method: #{name}&quot;</span>
22
+ <span class="ruby-keyword kw">end</span>
23
+ <span class="ruby-keyword kw">end</span></pre>
24
+ </body>
25
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>add_category (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 96</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_category</span>(<span class="ruby-identifier">category</span>)
15
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,243 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Module: Classifier::Stemmable</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Module</strong></td>
53
+ <td class="class-name-in-header">Classifier::Stemmable</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/classifier/string_extensions/porter_stemmer_rb.html">
59
+ lib/classifier/string_extensions/porter_stemmer.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ </table>
66
+ </div>
67
+ <!-- banner header -->
68
+
69
+ <div id="bodyContent">
70
+
71
+
72
+
73
+ <div id="contextContent">
74
+
75
+ <div id="description">
76
+ <p>
77
+ Porter stemmer in Ruby.
78
+ </p>
79
+ <p>
80
+ This is the Porter stemming algorithm, ported to Ruby from the version
81
+ coded up in Perl. It&#8217;s easy to follow against the rules in the
82
+ original paper in:
83
+ </p>
84
+ <pre>
85
+ Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
86
+ no. 3, pp 130-137,
87
+ </pre>
88
+ <p>
89
+ See also <a
90
+ href="http://www.tartarus.org/~martin/PorterStemmer">www.tartarus.org/~martin/PorterStemmer</a>
91
+ </p>
92
+ <p>
93
+ Send comments to raypereda@hotmail.com
94
+ </p>
95
+
96
+ </div>
97
+
98
+
99
+ </div>
100
+
101
+ <div id="method-list">
102
+ <h3 class="section-bar">Methods</h3>
103
+
104
+ <div class="name-list">
105
+ <a href="#M000004">stem</a>&nbsp;&nbsp;
106
+ <a href="#M000003">stem_porter</a>&nbsp;&nbsp;
107
+ </div>
108
+ </div>
109
+
110
+ </div>
111
+
112
+
113
+ <!-- if includes -->
114
+
115
+ <div id="section">
116
+
117
+
118
+ <div id="constants-list">
119
+ <h3 class="section-bar">Constants</h3>
120
+
121
+ <div class="name-list">
122
+ <table summary="Constants">
123
+ <tr class="top-aligned-row context-row">
124
+ <td class="context-item-name">STEP_2_LIST</td>
125
+ <td>=</td>
126
+ <td class="context-item-value">{
127
  'ational'=&gt;'ate', 'tional'=&gt;'tion', 'enci'=&gt;'ence', 'anci'=&gt;'ance',
1
128
  'izer'=&gt;'ize', 'bli'=&gt;'ble',
2
129
  'alli'=&gt;'al', 'entli'=&gt;'ent', 'eli'=&gt;'e', 'ousli'=&gt;'ous',
3
130
  'ization'=&gt;'ize', 'ation'=&gt;'ate',
4
131
  'ator'=&gt;'ate', 'alism'=&gt;'al', 'iveness'=&gt;'ive', 'fulness'=&gt;'ful',
5
132
  'ousness'=&gt;'ous', 'aliti'=&gt;'al',
6
133
  'iviti'=&gt;'ive', 'biliti'=&gt;'ble', 'logi'=&gt;'log'</td>
134
+ </tr>
135
+ <tr class="top-aligned-row context-row">
136
+ <td class="context-item-name">STEP_3_LIST</td>
137
+ <td>=</td>
138
+ <td class="context-item-value">{
7
139
  'icate'=&gt;'ic', 'ative'=&gt;'', 'alize'=&gt;'al', 'iciti'=&gt;'ic',
8
140
  'ical'=&gt;'ic', 'ful'=&gt;'', 'ness'=&gt;''</td>
141
+ </tr>
142
+ <tr class="top-aligned-row context-row">
143
+ <td class="context-item-name">SUFFIX_1_REGEXP</td>
144
+ <td>=</td>
145
+ <td class="context-item-value">/(
9
146
  ational |
10
147
  tional |
11
148
  enci |
12
149
  anci |
13
150
  izer |
14
151
  bli |
15
152
  alli |
16
153
  entli |
17
154
  eli |
18
155
  ousli |
19
156
  ization |
20
157
  ation |
21
158
  ator |
22
159
  alism |
23
160
  iveness |
24
161
  fulness |
25
162
  ousness |
26
163
  aliti |
27
164
  iviti |
28
165
  biliti |
29
166
  logi)$/x</td>
167
+ </tr>
168
+ <tr class="top-aligned-row context-row">
169
+ <td class="context-item-name">SUFFIX_2_REGEXP</td>
170
+ <td>=</td>
171
+ <td class="context-item-value">/(
30
172
  al |
31
173
  ance |
32
174
  ence |
33
175
  er |
34
176
  ic |
35
177
  able |
36
178
  ible |
37
179
  ant |
38
180
  ement |
39
181
  ment |
40
182
  ent |
41
183
  ou |
42
184
  ism |
43
185
  ate |
44
186
  iti |
45
187
  ous |
46
188
  ive |
47
189
  ize)$/x</td>
190
+ </tr>
191
+ <tr class="top-aligned-row context-row">
192
+ <td class="context-item-name">C</td>
193
+ <td>=</td>
194
+ <td class="context-item-value">&quot;[^aeiou]&quot;</td>
195
+ </tr>
196
+ <tr class="top-aligned-row context-row">
197
+ <td class="context-item-name">V</td>
198
+ <td>=</td>
199
+ <td class="context-item-value">&quot;[aeiouy]&quot;</td>
200
+ </tr>
201
+ <tr class="top-aligned-row context-row">
202
+ <td class="context-item-name">CC</td>
203
+ <td>=</td>
204
+ <td class="context-item-value">&quot;#{C}(?&gt;[^aeiouy]*)&quot;</td>
205
+ </tr>
206
+ <tr class="top-aligned-row context-row">
207
+ <td class="context-item-name">VV</td>
208
+ <td>=</td>
209
+ <td class="context-item-value">&quot;#{V}(?&gt;[aeiou]*)&quot;</td>
210
+ </tr>
211
+ <tr class="top-aligned-row context-row">
212
+ <td class="context-item-name">MGR0</td>
213
+ <td>=</td>
214
+ <td class="context-item-value">/^(#{CC})?#{VV}#{CC}/o</td>
215
+ </tr>
216
+ <tr class="top-aligned-row context-row">
217
+ <td class="context-item-name">MEQ1</td>
218
+ <td>=</td>
219
+ <td class="context-item-value">/^(#{CC})?#{VV}#{CC}(#{VV})?$/o</td>
220
+ </tr>
221
+ <tr class="top-aligned-row context-row">
222
+ <td class="context-item-name">MGR1</td>
223
+ <td>=</td>
224
+ <td class="context-item-value">/^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o</td>
225
+ </tr>
226
+ <tr class="top-aligned-row context-row">
227
+ <td class="context-item-name">VOWEL_IN_STEM</td>
228
+ <td>=</td>
229
+ <td class="context-item-value">/^(#{CC})?#{V}/o</td>
230
+ </tr>
231
+ </table>
232
+ </div>
233
+ </div>
234
+
235
+
236
+
237
+
238
+
239
+
240
+ <!-- if method_list -->
241
+ <div id="methods">
242
+ <h3 class="section-bar">Public Instance methods</h3>
243
+
244
+ <div id="method-M000004" class="method-detail">
245
+ <a name="M000004"></a>
246
+
247
+ <div class="method-heading">
248
+ <span class="method-name">stem</span><span class="method-args">()</span>
249
+ </div>
250
+
251
+ <div class="method-description">
252
+ <p>
253
+ Alias for <a href="Stemmable.html#M000003">stem_porter</a>
254
+ </p>
255
+ </div>
256
+ </div>
257
+
258
+ <div id="method-M000003" class="method-detail">
259
+ <a name="M000003"></a>
260
+
261
+ <div class="method-heading">
262
+ <a href="Stemmable.src/M000003.html" target="Code" class="method-signature"
263
+ onclick="popupCode('Stemmable.src/M000003.html');return false;">
264
+ <span class="method-name">stem_porter</span><span class="method-args">()</span>
265
+ </a>
266
+ </div>
267
+
268
+ <div class="method-description">
269
+ <p>
270
+ Stems the word contained in the current object. E.g.,
271
+ </p>
272
+ <pre>
273
+ &quot;actually&quot;.stem_porter
274
+ =&gt; &quot;actual&quot;
275
+ </pre>
276
+ </div>
277
+ </div>
278
+
279
+
280
+ </div>
281
+
282
+
283
+ </div>
284
+
285
+
286
+ <div id="validator-badges">
287
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
288
+ </div>
289
+
290
+ </body>
291
+ </html>
@@ -0,0 +1,102 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>stem_porter (Classifier::Stemmable)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/string_extensions/porter_stemmer.rb, line 102</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">stem_porter</span>
15
+
16
+ <span class="ruby-comment cmt"># make a copy of the given object and convert it to a string.
17
+ <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>.<span class="ruby-identifier">to_str</span>
18
+
19
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">w</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;</span> <span class="ruby-value">3</span>
20
+
21
+ <span class="ruby-comment cmt"># now map initial y to Y so that the patterns never treat it as vowel
22
+ <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] = <span class="ruby-value str">'Y'</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] <span class="ruby-operator">==</span> <span class="ruby-value">?y</span>
23
+
24
+ <span class="ruby-comment cmt"># Step 1a
25
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ss|i)es$/</span>
26
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">$`</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">$1</span>
27
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/([^s])s$/</span>
28
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">$`</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">$1</span>
29
+ <span class="ruby-keyword kw">end</span>
30
+
31
+ <span class="ruby-comment cmt"># Step 1b
32
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/eed$/</span>
33
+ <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">$`</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR0</span>
34
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ed|ing)$/</span>
35
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
36
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">VOWEL_IN_STEM</span>
37
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
38
+ <span class="ruby-keyword kw">case</span> <span class="ruby-identifier">w</span>
39
+ <span class="ruby-keyword kw">when</span> <span class="ruby-regexp re">/(at|bl|iz)$/</span> <span class="ruby-keyword kw">then</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-value str">&quot;e&quot;</span>
40
+ <span class="ruby-keyword kw">when</span> <span class="ruby-regexp re">/([^aeiouylsz])\1$/</span> <span class="ruby-keyword kw">then</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
41
+ <span class="ruby-keyword kw">when</span> <span class="ruby-node">/^#{CC}#{V}[^aeiouwxy]$/o</span> <span class="ruby-keyword kw">then</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-value str">&quot;e&quot;</span>
42
+ <span class="ruby-keyword kw">end</span>
43
+ <span class="ruby-keyword kw">end</span>
44
+ <span class="ruby-keyword kw">end</span>
45
+
46
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/y$/</span>
47
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
48
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;i&quot;</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">VOWEL_IN_STEM</span>
49
+ <span class="ruby-keyword kw">end</span>
50
+
51
+ <span class="ruby-comment cmt"># Step 2
52
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">SUFFIX_1_REGEXP</span>
53
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
54
+ <span class="ruby-identifier">suffix</span> = <span class="ruby-identifier">$1</span>
55
+ <span class="ruby-comment cmt"># print &quot;stem= &quot; + stem + &quot;\n&quot; + &quot;suffix=&quot; + suffix + &quot;\n&quot;
56
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR0</span>
57
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span> <span class="ruby-operator">+</span> <span class="ruby-constant">STEP_2_LIST</span>[<span class="ruby-identifier">suffix</span>]
58
+ <span class="ruby-keyword kw">end</span>
59
+ <span class="ruby-keyword kw">end</span>
60
+
61
+ <span class="ruby-comment cmt"># Step 3
62
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(icate|ative|alize|iciti|ical|ful|ness)$/</span>
63
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
64
+ <span class="ruby-identifier">suffix</span> = <span class="ruby-identifier">$1</span>
65
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR0</span>
66
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span> <span class="ruby-operator">+</span> <span class="ruby-constant">STEP_3_LIST</span>[<span class="ruby-identifier">suffix</span>]
67
+ <span class="ruby-keyword kw">end</span>
68
+ <span class="ruby-keyword kw">end</span>
69
+
70
+ <span class="ruby-comment cmt"># Step 4
71
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">SUFFIX_2_REGEXP</span>
72
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
73
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>
74
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
75
+ <span class="ruby-keyword kw">end</span>
76
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)(ion)$/</span>
77
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">$1</span>
78
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>
79
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
80
+ <span class="ruby-keyword kw">end</span>
81
+ <span class="ruby-keyword kw">end</span>
82
+
83
+ <span class="ruby-comment cmt"># Step 5
84
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span>
85
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
86
+ <span class="ruby-keyword kw">if</span> (<span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>) <span class="ruby-operator">||</span>
87
+ (<span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MEQ1</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">!~</span> <span class="ruby-node">/^#{CC}#{V}[^aeiouwxy]$/o</span>)
88
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
89
+ <span class="ruby-keyword kw">end</span>
90
+ <span class="ruby-keyword kw">end</span>
91
+
92
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>
93
+ <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
94
+ <span class="ruby-keyword kw">end</span>
95
+
96
+ <span class="ruby-comment cmt"># and turn initial Y back to y
97
+ <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] = <span class="ruby-value str">'y'</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] <span class="ruby-operator">==</span> <span class="ruby-value">?Y</span>
98
+
99
+ <span class="ruby-identifier">w</span>
100
+ <span class="ruby-keyword kw">end</span></pre>
101
+ </body>
102
+ </html>