classifier 1.1 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (34) hide show
  1. data/README +2 -2
  2. data/Rakefile +2 -4
  3. data/doc/classes/Classifier.html +135 -0
  4. data/doc/classes/Classifier/Bayes.html +287 -0
  5. data/doc/classes/Classifier/Bayes.src/M000005.html +20 -0
  6. data/doc/classes/Classifier/Bayes.src/M000006.html +23 -0
  7. data/doc/classes/Classifier/Bayes.src/M000007.html +27 -0
  8. data/doc/classes/Classifier/Bayes.src/M000008.html +18 -0
  9. data/doc/classes/Classifier/Bayes.src/M000009.html +25 -0
  10. data/doc/classes/Classifier/Bayes.src/M000010.html +18 -0
  11. data/doc/classes/Classifier/Stemmable.html +243 -0
  12. data/doc/classes/Classifier/Stemmable.src/M000003.html +102 -0
  13. data/doc/classes/Classifier/WordHash.html +178 -0
  14. data/doc/classes/Classifier/WordHash.src/M000001.html +18 -0
  15. data/doc/classes/Classifier/WordHash.src/M000002.html +28 -0
  16. data/doc/classes/String.html +119 -0
  17. data/doc/created.rid +1 -0
  18. data/doc/files/README.html +156 -0
  19. data/doc/files/lib/classifier/bayes_rb.html +115 -0
  20. data/doc/files/lib/classifier/string_extensions/porter_stemmer_rb.html +112 -0
  21. data/doc/files/lib/classifier/string_extensions/word_hash_rb.html +115 -0
  22. data/doc/files/lib/classifier/string_extensions_rb.html +123 -0
  23. data/doc/files/lib/classifier_rb.html +123 -0
  24. data/doc/fr_class_index.html +31 -0
  25. data/doc/fr_file_index.html +32 -0
  26. data/doc/fr_method_index.html +37 -0
  27. data/doc/index.html +24 -0
  28. data/doc/rdoc-style.css +208 -0
  29. data/lib/classifier/bayes.rb +63 -12
  30. data/lib/classifier/string_extensions/porter_stemmer.rb +18 -15
  31. data/lib/classifier/string_extensions/word_hash.rb +96 -3
  32. data/test/bayes/bayesian_test.rb +13 -0
  33. data/test/string_extensions/word_hash_test.rb +7 -3
  34. metadata +36 -1
@@ -0,0 +1,20 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>new (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 11</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">categories</span>)
15
+ <span class="ruby-ivar">@categories</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
+ <span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> }
17
+ <span class="ruby-ivar">@total_words</span> = <span class="ruby-value">0</span>
18
+ <span class="ruby-keyword kw">end</span></pre>
19
+ </body>
20
+ </html>
@@ -0,0 +1,23 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>train (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 24</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">train</span>(<span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>)
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
16
+ <span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
17
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
18
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
19
+ <span class="ruby-ivar">@total_words</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
20
+ <span class="ruby-keyword kw">end</span>
21
+ <span class="ruby-keyword kw">end</span></pre>
22
+ </body>
23
+ </html>
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>classifications (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 38</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
15
+ <span class="ruby-identifier">score</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
+ <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">category</span>, <span class="ruby-identifier">category_words</span><span class="ruby-operator">|</span>
17
+ <span class="ruby-identifier">score</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>] = <span class="ruby-value">0</span>
18
+ <span class="ruby-identifier">total</span> = <span class="ruby-identifier">category_words</span>.<span class="ruby-identifier">values</span>.<span class="ruby-identifier">inject</span>(<span class="ruby-value">0</span>) {<span class="ruby-operator">|</span><span class="ruby-identifier">sum</span>, <span class="ruby-identifier">element</span><span class="ruby-operator">|</span> <span class="ruby-identifier">sum</span><span class="ruby-operator">+</span><span class="ruby-identifier">element</span>}
19
+ <span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
20
+ <span class="ruby-identifier">s</span> = <span class="ruby-identifier">category_words</span>.<span class="ruby-identifier">has_key?</span>(<span class="ruby-identifier">word</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">category_words</span>[<span class="ruby-identifier">word</span>] <span class="ruby-operator">:</span> <span class="ruby-value">0</span><span class="ruby-value">.1</span>
21
+ <span class="ruby-identifier">score</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>] <span class="ruby-operator">+=</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>(<span class="ruby-identifier">s</span><span class="ruby-operator">/</span><span class="ruby-identifier">total</span>.<span class="ruby-identifier">to_f</span>)
22
+ <span class="ruby-keyword kw">end</span>
23
+ <span class="ruby-keyword kw">end</span>
24
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">score</span>
25
+ <span class="ruby-keyword kw">end</span></pre>
26
+ </body>
27
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>classify (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 56</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
15
+ (<span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">a</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">a</span>[<span class="ruby-value">1</span>] })[<span class="ruby-value">0</span>][<span class="ruby-value">0</span>]
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,25 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>method_missing (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 67</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">method_missing</span>(<span class="ruby-identifier">name</span>, <span class="ruby-operator">*</span><span class="ruby-identifier">args</span>)
15
+ <span class="ruby-identifier">category</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/train_([\w]+)/</span>, <span class="ruby-value str">'\1'</span>).<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
16
+ <span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">category</span>
17
+ <span class="ruby-identifier">args</span>.<span class="ruby-identifier">each</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">text</span><span class="ruby-operator">|</span> <span class="ruby-identifier">train</span> <span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>}
18
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/train_([\w]+)/</span>
19
+ <span class="ruby-identifier">raise</span> <span class="ruby-constant">StandardError</span>, <span class="ruby-node">&quot;No such category: #{category}&quot;</span>
20
+ <span class="ruby-keyword kw">else</span>
21
+ <span class="ruby-keyword kw">super</span> <span class="ruby-comment cmt">#raise StandardError, &quot;No such method: #{name}&quot;</span>
22
+ <span class="ruby-keyword kw">end</span>
23
+ <span class="ruby-keyword kw">end</span></pre>
24
+ </body>
25
+ </html>
@@ -0,0 +1,18 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>add_category (Classifier::Bayes)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 96</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_category</span>(<span class="ruby-identifier">category</span>)
15
+ <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">&quot;_&quot;</span>,<span class="ruby-value str">&quot; &quot;</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
16
+ <span class="ruby-keyword kw">end</span></pre>
17
+ </body>
18
+ </html>
@@ -0,0 +1,243 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
7
+ <head>
8
+ <title>Module: Classifier::Stemmable</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <meta http-equiv="Content-Script-Type" content="text/javascript" />
11
+ <link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
12
+ <script type="text/javascript">
13
+ // <![CDATA[
14
+
15
+ function popupCode( url ) {
16
+ window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
17
+ }
18
+
19
+ function toggleCode( id ) {
20
+ if ( document.getElementById )
21
+ elem = document.getElementById( id );
22
+ else if ( document.all )
23
+ elem = eval( "document.all." + id );
24
+ else
25
+ return false;
26
+
27
+ elemStyle = elem.style;
28
+
29
+ if ( elemStyle.display != "block" ) {
30
+ elemStyle.display = "block"
31
+ } else {
32
+ elemStyle.display = "none"
33
+ }
34
+
35
+ return true;
36
+ }
37
+
38
+ // Make codeblocks hidden by default
39
+ document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
40
+
41
+ // ]]>
42
+ </script>
43
+
44
+ </head>
45
+ <body>
46
+
47
+
48
+
49
+ <div id="classHeader">
50
+ <table class="header-table">
51
+ <tr class="top-aligned-row">
52
+ <td><strong>Module</strong></td>
53
+ <td class="class-name-in-header">Classifier::Stemmable</td>
54
+ </tr>
55
+ <tr class="top-aligned-row">
56
+ <td><strong>In:</strong></td>
57
+ <td>
58
+ <a href="../../files/lib/classifier/string_extensions/porter_stemmer_rb.html">
59
+ lib/classifier/string_extensions/porter_stemmer.rb
60
+ </a>
61
+ <br />
62
+ </td>
63
+ </tr>
64
+
65
+ </table>
66
+ </div>
67
+ <!-- banner header -->
68
+
69
+ <div id="bodyContent">
70
+
71
+
72
+
73
+ <div id="contextContent">
74
+
75
+ <div id="description">
76
+ <p>
77
+ Porter stemmer in Ruby.
78
+ </p>
79
+ <p>
80
+ This is the Porter stemming algorithm, ported to Ruby from the version
81
+ coded up in Perl. It&#8217;s easy to follow against the rules in the
82
+ original paper in:
83
+ </p>
84
+ <pre>
85
+ Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
86
+ no. 3, pp 130-137,
87
+ </pre>
88
+ <p>
89
+ See also <a
90
+ href="http://www.tartarus.org/~martin/PorterStemmer">www.tartarus.org/~martin/PorterStemmer</a>
91
+ </p>
92
+ <p>
93
+ Send comments to raypereda@hotmail.com
94
+ </p>
95
+
96
+ </div>
97
+
98
+
99
+ </div>
100
+
101
+ <div id="method-list">
102
+ <h3 class="section-bar">Methods</h3>
103
+
104
+ <div class="name-list">
105
+ <a href="#M000004">stem</a>&nbsp;&nbsp;
106
+ <a href="#M000003">stem_porter</a>&nbsp;&nbsp;
107
+ </div>
108
+ </div>
109
+
110
+ </div>
111
+
112
+
113
+ <!-- if includes -->
114
+
115
+ <div id="section">
116
+
117
+
118
+ <div id="constants-list">
119
+ <h3 class="section-bar">Constants</h3>
120
+
121
+ <div class="name-list">
122
+ <table summary="Constants">
123
+ <tr class="top-aligned-row context-row">
124
+ <td class="context-item-name">STEP_2_LIST</td>
125
+ <td>=</td>
126
+ <td class="context-item-value">{
127
  'ational'=&gt;'ate', 'tional'=&gt;'tion', 'enci'=&gt;'ence', 'anci'=&gt;'ance',
1
128
  'izer'=&gt;'ize', 'bli'=&gt;'ble',
2
129
  'alli'=&gt;'al', 'entli'=&gt;'ent', 'eli'=&gt;'e', 'ousli'=&gt;'ous',
3
130
  'ization'=&gt;'ize', 'ation'=&gt;'ate',
4
131
  'ator'=&gt;'ate', 'alism'=&gt;'al', 'iveness'=&gt;'ive', 'fulness'=&gt;'ful',
5
132
  'ousness'=&gt;'ous', 'aliti'=&gt;'al',
6
133
  'iviti'=&gt;'ive', 'biliti'=&gt;'ble', 'logi'=&gt;'log'</td>
134
+ </tr>
135
+ <tr class="top-aligned-row context-row">
136
+ <td class="context-item-name">STEP_3_LIST</td>
137
+ <td>=</td>
138
+ <td class="context-item-value">{
7
139
  'icate'=&gt;'ic', 'ative'=&gt;'', 'alize'=&gt;'al', 'iciti'=&gt;'ic',
8
140
  'ical'=&gt;'ic', 'ful'=&gt;'', 'ness'=&gt;''</td>
141
+ </tr>
142
+ <tr class="top-aligned-row context-row">
143
+ <td class="context-item-name">SUFFIX_1_REGEXP</td>
144
+ <td>=</td>
145
+ <td class="context-item-value">/(
9
146
  ational |
10
147
  tional |
11
148
  enci |
12
149
  anci |
13
150
  izer |
14
151
  bli |
15
152
  alli |
16
153
  entli |
17
154
  eli |
18
155
  ousli |
19
156
  ization |
20
157
  ation |
21
158
  ator |
22
159
  alism |
23
160
  iveness |
24
161
  fulness |
25
162
  ousness |
26
163
  aliti |
27
164
  iviti |
28
165
  biliti |
29
166
  logi)$/x</td>
167
+ </tr>
168
+ <tr class="top-aligned-row context-row">
169
+ <td class="context-item-name">SUFFIX_2_REGEXP</td>
170
+ <td>=</td>
171
+ <td class="context-item-value">/(
30
172
  al |
31
173
  ance |
32
174
  ence |
33
175
  er |
34
176
  ic |
35
177
  able |
36
178
  ible |
37
179
  ant |
38
180
  ement |
39
181
  ment |
40
182
  ent |
41
183
  ou |
42
184
  ism |
43
185
  ate |
44
186
  iti |
45
187
  ous |
46
188
  ive |
47
189
  ize)$/x</td>
190
+ </tr>
191
+ <tr class="top-aligned-row context-row">
192
+ <td class="context-item-name">C</td>
193
+ <td>=</td>
194
+ <td class="context-item-value">&quot;[^aeiou]&quot;</td>
195
+ </tr>
196
+ <tr class="top-aligned-row context-row">
197
+ <td class="context-item-name">V</td>
198
+ <td>=</td>
199
+ <td class="context-item-value">&quot;[aeiouy]&quot;</td>
200
+ </tr>
201
+ <tr class="top-aligned-row context-row">
202
+ <td class="context-item-name">CC</td>
203
+ <td>=</td>
204
+ <td class="context-item-value">&quot;#{C}(?&gt;[^aeiouy]*)&quot;</td>
205
+ </tr>
206
+ <tr class="top-aligned-row context-row">
207
+ <td class="context-item-name">VV</td>
208
+ <td>=</td>
209
+ <td class="context-item-value">&quot;#{V}(?&gt;[aeiou]*)&quot;</td>
210
+ </tr>
211
+ <tr class="top-aligned-row context-row">
212
+ <td class="context-item-name">MGR0</td>
213
+ <td>=</td>
214
+ <td class="context-item-value">/^(#{CC})?#{VV}#{CC}/o</td>
215
+ </tr>
216
+ <tr class="top-aligned-row context-row">
217
+ <td class="context-item-name">MEQ1</td>
218
+ <td>=</td>
219
+ <td class="context-item-value">/^(#{CC})?#{VV}#{CC}(#{VV})?$/o</td>
220
+ </tr>
221
+ <tr class="top-aligned-row context-row">
222
+ <td class="context-item-name">MGR1</td>
223
+ <td>=</td>
224
+ <td class="context-item-value">/^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o</td>
225
+ </tr>
226
+ <tr class="top-aligned-row context-row">
227
+ <td class="context-item-name">VOWEL_IN_STEM</td>
228
+ <td>=</td>
229
+ <td class="context-item-value">/^(#{CC})?#{V}/o</td>
230
+ </tr>
231
+ </table>
232
+ </div>
233
+ </div>
234
+
235
+
236
+
237
+
238
+
239
+
240
+ <!-- if method_list -->
241
+ <div id="methods">
242
+ <h3 class="section-bar">Public Instance methods</h3>
243
+
244
+ <div id="method-M000004" class="method-detail">
245
+ <a name="M000004"></a>
246
+
247
+ <div class="method-heading">
248
+ <span class="method-name">stem</span><span class="method-args">()</span>
249
+ </div>
250
+
251
+ <div class="method-description">
252
+ <p>
253
+ Alias for <a href="Stemmable.html#M000003">stem_porter</a>
254
+ </p>
255
+ </div>
256
+ </div>
257
+
258
+ <div id="method-M000003" class="method-detail">
259
+ <a name="M000003"></a>
260
+
261
+ <div class="method-heading">
262
+ <a href="Stemmable.src/M000003.html" target="Code" class="method-signature"
263
+ onclick="popupCode('Stemmable.src/M000003.html');return false;">
264
+ <span class="method-name">stem_porter</span><span class="method-args">()</span>
265
+ </a>
266
+ </div>
267
+
268
+ <div class="method-description">
269
+ <p>
270
+ Stems the word contained in the current object. E.g.,
271
+ </p>
272
+ <pre>
273
+ &quot;actually&quot;.stem_porter
274
+ =&gt; &quot;actual&quot;
275
+ </pre>
276
+ </div>
277
+ </div>
278
+
279
+
280
+ </div>
281
+
282
+
283
+ </div>
284
+
285
+
286
+ <div id="validator-badges">
287
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
288
+ </div>
289
+
290
+ </body>
291
+ </html>
@@ -0,0 +1,102 @@
1
+ <?xml version="1.0" encoding="iso-8859-1"?>
2
+ <!DOCTYPE html
3
+ PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
4
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
5
+
6
+ <html>
7
+ <head>
8
+ <title>stem_porter (Classifier::Stemmable)</title>
9
+ <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
10
+ <link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
11
+ </head>
12
+ <body class="standalone-code">
13
+ <pre><span class="ruby-comment cmt"># File lib/classifier/string_extensions/porter_stemmer.rb, line 102</span>
14
+ <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">stem_porter</span>
15
+
16
+ <span class="ruby-comment cmt"># make a copy of the given object and convert it to a string.
17
+ <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>.<span class="ruby-identifier">to_str</span>
18
+
19
+ <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">w</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;</span> <span class="ruby-value">3</span>
20
+
21
+ <span class="ruby-comment cmt"># now map initial y to Y so that the patterns never treat it as vowel
22
+ <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] = <span class="ruby-value str">'Y'</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] <span class="ruby-operator">==</span> <span class="ruby-value">?y</span>
23
+
24
+ <span class="ruby-comment cmt"># Step 1a
25
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ss|i)es$/</span>
26
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">$`</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">$1</span>
27
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/([^s])s$/</span>
28
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">$`</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">$1</span>
29
+ <span class="ruby-keyword kw">end</span>
30
+
31
+ <span class="ruby-comment cmt"># Step 1b
32
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/eed$/</span>
33
+ <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">$`</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR0</span>
34
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ed|ing)$/</span>
35
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
36
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">VOWEL_IN_STEM</span>
37
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
38
+ <span class="ruby-keyword kw">case</span> <span class="ruby-identifier">w</span>
39
+ <span class="ruby-keyword kw">when</span> <span class="ruby-regexp re">/(at|bl|iz)$/</span> <span class="ruby-keyword kw">then</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-value str">&quot;e&quot;</span>
40
+ <span class="ruby-keyword kw">when</span> <span class="ruby-regexp re">/([^aeiouylsz])\1$/</span> <span class="ruby-keyword kw">then</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
41
+ <span class="ruby-keyword kw">when</span> <span class="ruby-node">/^#{CC}#{V}[^aeiouwxy]$/o</span> <span class="ruby-keyword kw">then</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">&lt;&lt;</span> <span class="ruby-value str">&quot;e&quot;</span>
42
+ <span class="ruby-keyword kw">end</span>
43
+ <span class="ruby-keyword kw">end</span>
44
+ <span class="ruby-keyword kw">end</span>
45
+
46
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/y$/</span>
47
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
48
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;i&quot;</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">VOWEL_IN_STEM</span>
49
+ <span class="ruby-keyword kw">end</span>
50
+
51
+ <span class="ruby-comment cmt"># Step 2
52
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">SUFFIX_1_REGEXP</span>
53
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
54
+ <span class="ruby-identifier">suffix</span> = <span class="ruby-identifier">$1</span>
55
+ <span class="ruby-comment cmt"># print &quot;stem= &quot; + stem + &quot;\n&quot; + &quot;suffix=&quot; + suffix + &quot;\n&quot;
56
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR0</span>
57
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span> <span class="ruby-operator">+</span> <span class="ruby-constant">STEP_2_LIST</span>[<span class="ruby-identifier">suffix</span>]
58
+ <span class="ruby-keyword kw">end</span>
59
+ <span class="ruby-keyword kw">end</span>
60
+
61
+ <span class="ruby-comment cmt"># Step 3
62
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(icate|ative|alize|iciti|ical|ful|ness)$/</span>
63
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
64
+ <span class="ruby-identifier">suffix</span> = <span class="ruby-identifier">$1</span>
65
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR0</span>
66
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span> <span class="ruby-operator">+</span> <span class="ruby-constant">STEP_3_LIST</span>[<span class="ruby-identifier">suffix</span>]
67
+ <span class="ruby-keyword kw">end</span>
68
+ <span class="ruby-keyword kw">end</span>
69
+
70
+ <span class="ruby-comment cmt"># Step 4
71
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">SUFFIX_2_REGEXP</span>
72
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
73
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>
74
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
75
+ <span class="ruby-keyword kw">end</span>
76
+ <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)(ion)$/</span>
77
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">$1</span>
78
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>
79
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
80
+ <span class="ruby-keyword kw">end</span>
81
+ <span class="ruby-keyword kw">end</span>
82
+
83
+ <span class="ruby-comment cmt"># Step 5
84
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span>
85
+ <span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
86
+ <span class="ruby-keyword kw">if</span> (<span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>) <span class="ruby-operator">||</span>
87
+ (<span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MEQ1</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">!~</span> <span class="ruby-node">/^#{CC}#{V}[^aeiouwxy]$/o</span>)
88
+ <span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
89
+ <span class="ruby-keyword kw">end</span>
90
+ <span class="ruby-keyword kw">end</span>
91
+
92
+ <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-operator">&amp;&amp;</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>
93
+ <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
94
+ <span class="ruby-keyword kw">end</span>
95
+
96
+ <span class="ruby-comment cmt"># and turn initial Y back to y
97
+ <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] = <span class="ruby-value str">'y'</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] <span class="ruby-operator">==</span> <span class="ruby-value">?Y</span>
98
+
99
+ <span class="ruby-identifier">w</span>
100
+ <span class="ruby-keyword kw">end</span></pre>
101
+ </body>
102
+ </html>