classifier 1.1 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README +2 -2
- data/Rakefile +2 -4
- data/doc/classes/Classifier.html +135 -0
- data/doc/classes/Classifier/Bayes.html +287 -0
- data/doc/classes/Classifier/Bayes.src/M000005.html +20 -0
- data/doc/classes/Classifier/Bayes.src/M000006.html +23 -0
- data/doc/classes/Classifier/Bayes.src/M000007.html +27 -0
- data/doc/classes/Classifier/Bayes.src/M000008.html +18 -0
- data/doc/classes/Classifier/Bayes.src/M000009.html +25 -0
- data/doc/classes/Classifier/Bayes.src/M000010.html +18 -0
- data/doc/classes/Classifier/Stemmable.html +243 -0
- data/doc/classes/Classifier/Stemmable.src/M000003.html +102 -0
- data/doc/classes/Classifier/WordHash.html +178 -0
- data/doc/classes/Classifier/WordHash.src/M000001.html +18 -0
- data/doc/classes/Classifier/WordHash.src/M000002.html +28 -0
- data/doc/classes/String.html +119 -0
- data/doc/created.rid +1 -0
- data/doc/files/README.html +156 -0
- data/doc/files/lib/classifier/bayes_rb.html +115 -0
- data/doc/files/lib/classifier/string_extensions/porter_stemmer_rb.html +112 -0
- data/doc/files/lib/classifier/string_extensions/word_hash_rb.html +115 -0
- data/doc/files/lib/classifier/string_extensions_rb.html +123 -0
- data/doc/files/lib/classifier_rb.html +123 -0
- data/doc/fr_class_index.html +31 -0
- data/doc/fr_file_index.html +32 -0
- data/doc/fr_method_index.html +37 -0
- data/doc/index.html +24 -0
- data/doc/rdoc-style.css +208 -0
- data/lib/classifier/bayes.rb +63 -12
- data/lib/classifier/string_extensions/porter_stemmer.rb +18 -15
- data/lib/classifier/string_extensions/word_hash.rb +96 -3
- data/test/bayes/bayesian_test.rb +13 -0
- data/test/string_extensions/word_hash_test.rb +7 -3
- metadata +36 -1
@@ -0,0 +1,20 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>new (Classifier::Bayes)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 11</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">initialize</span>(<span class="ruby-operator">*</span><span class="ruby-identifier">categories</span>)
|
15
|
+
<span class="ruby-ivar">@categories</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
|
16
|
+
<span class="ruby-identifier">categories</span>.<span class="ruby-identifier">each</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">category</span><span class="ruby-operator">|</span> <span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">"_"</span>,<span class="ruby-value str">" "</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span> }
|
17
|
+
<span class="ruby-ivar">@total_words</span> = <span class="ruby-value">0</span>
|
18
|
+
<span class="ruby-keyword kw">end</span></pre>
|
19
|
+
</body>
|
20
|
+
</html>
|
@@ -0,0 +1,23 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>train (Classifier::Bayes)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 24</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">train</span>(<span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>)
|
15
|
+
<span class="ruby-identifier">category</span> = <span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">"_"</span>,<span class="ruby-value str">" "</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
|
16
|
+
<span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
|
17
|
+
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">||=</span> <span class="ruby-value">0</span>
|
18
|
+
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>][<span class="ruby-identifier">word</span>] <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
|
19
|
+
<span class="ruby-ivar">@total_words</span> <span class="ruby-operator">+=</span> <span class="ruby-identifier">count</span>
|
20
|
+
<span class="ruby-keyword kw">end</span>
|
21
|
+
<span class="ruby-keyword kw">end</span></pre>
|
22
|
+
</body>
|
23
|
+
</html>
|
@@ -0,0 +1,27 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>classifications (Classifier::Bayes)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 38</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>)
|
15
|
+
<span class="ruby-identifier">score</span> = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
|
16
|
+
<span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">category</span>, <span class="ruby-identifier">category_words</span><span class="ruby-operator">|</span>
|
17
|
+
<span class="ruby-identifier">score</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>] = <span class="ruby-value">0</span>
|
18
|
+
<span class="ruby-identifier">total</span> = <span class="ruby-identifier">category_words</span>.<span class="ruby-identifier">values</span>.<span class="ruby-identifier">inject</span>(<span class="ruby-value">0</span>) {<span class="ruby-operator">|</span><span class="ruby-identifier">sum</span>, <span class="ruby-identifier">element</span><span class="ruby-operator">|</span> <span class="ruby-identifier">sum</span><span class="ruby-operator">+</span><span class="ruby-identifier">element</span>}
|
19
|
+
<span class="ruby-identifier">text</span>.<span class="ruby-identifier">word_hash</span>.<span class="ruby-identifier">each</span> <span class="ruby-keyword kw">do</span> <span class="ruby-operator">|</span><span class="ruby-identifier">word</span>, <span class="ruby-identifier">count</span><span class="ruby-operator">|</span>
|
20
|
+
<span class="ruby-identifier">s</span> = <span class="ruby-identifier">category_words</span>.<span class="ruby-identifier">has_key?</span>(<span class="ruby-identifier">word</span>) <span class="ruby-operator">?</span> <span class="ruby-identifier">category_words</span>[<span class="ruby-identifier">word</span>] <span class="ruby-operator">:</span> <span class="ruby-value">0</span><span class="ruby-value">.1</span>
|
21
|
+
<span class="ruby-identifier">score</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>] <span class="ruby-operator">+=</span> <span class="ruby-constant">Math</span>.<span class="ruby-identifier">log</span>(<span class="ruby-identifier">s</span><span class="ruby-operator">/</span><span class="ruby-identifier">total</span>.<span class="ruby-identifier">to_f</span>)
|
22
|
+
<span class="ruby-keyword kw">end</span>
|
23
|
+
<span class="ruby-keyword kw">end</span>
|
24
|
+
<span class="ruby-keyword kw">return</span> <span class="ruby-identifier">score</span>
|
25
|
+
<span class="ruby-keyword kw">end</span></pre>
|
26
|
+
</body>
|
27
|
+
</html>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>classify (Classifier::Bayes)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 56</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">classify</span>(<span class="ruby-identifier">text</span>)
|
15
|
+
(<span class="ruby-identifier">classifications</span>(<span class="ruby-identifier">text</span>).<span class="ruby-identifier">sort_by</span> { <span class="ruby-operator">|</span><span class="ruby-identifier">a</span><span class="ruby-operator">|</span> <span class="ruby-operator">-</span><span class="ruby-identifier">a</span>[<span class="ruby-value">1</span>] })[<span class="ruby-value">0</span>][<span class="ruby-value">0</span>]
|
16
|
+
<span class="ruby-keyword kw">end</span></pre>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,25 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>method_missing (Classifier::Bayes)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 67</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">method_missing</span>(<span class="ruby-identifier">name</span>, <span class="ruby-operator">*</span><span class="ruby-identifier">args</span>)
|
15
|
+
<span class="ruby-identifier">category</span> = <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/train_([\w]+)/</span>, <span class="ruby-value str">'\1'</span>).<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">"_"</span>,<span class="ruby-value str">" "</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>
|
16
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-ivar">@categories</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">category</span>
|
17
|
+
<span class="ruby-identifier">args</span>.<span class="ruby-identifier">each</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">text</span><span class="ruby-operator">|</span> <span class="ruby-identifier">train</span> <span class="ruby-identifier">category</span>, <span class="ruby-identifier">text</span>}
|
18
|
+
<span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">name</span>.<span class="ruby-identifier">to_s</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/train_([\w]+)/</span>
|
19
|
+
<span class="ruby-identifier">raise</span> <span class="ruby-constant">StandardError</span>, <span class="ruby-node">"No such category: #{category}"</span>
|
20
|
+
<span class="ruby-keyword kw">else</span>
|
21
|
+
<span class="ruby-keyword kw">super</span> <span class="ruby-comment cmt">#raise StandardError, "No such method: #{name}"</span>
|
22
|
+
<span class="ruby-keyword kw">end</span>
|
23
|
+
<span class="ruby-keyword kw">end</span></pre>
|
24
|
+
</body>
|
25
|
+
</html>
|
@@ -0,0 +1,18 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>add_category (Classifier::Bayes)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/bayes.rb, line 96</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">add_category</span>(<span class="ruby-identifier">category</span>)
|
15
|
+
<span class="ruby-ivar">@categories</span>[<span class="ruby-identifier">category</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-value str">"_"</span>,<span class="ruby-value str">" "</span>).<span class="ruby-identifier">capitalize</span>.<span class="ruby-identifier">intern</span>] = <span class="ruby-constant">Hash</span>.<span class="ruby-identifier">new</span>
|
16
|
+
<span class="ruby-keyword kw">end</span></pre>
|
17
|
+
</body>
|
18
|
+
</html>
|
@@ -0,0 +1,243 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
|
7
|
+
<head>
|
8
|
+
<title>Module: Classifier::Stemmable</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<meta http-equiv="Content-Script-Type" content="text/javascript" />
|
11
|
+
<link rel="stylesheet" href="../.././rdoc-style.css" type="text/css" media="screen" />
|
12
|
+
<script type="text/javascript">
|
13
|
+
// <![CDATA[
|
14
|
+
|
15
|
+
function popupCode( url ) {
|
16
|
+
window.open(url, "Code", "resizable=yes,scrollbars=yes,toolbar=no,status=no,height=150,width=400")
|
17
|
+
}
|
18
|
+
|
19
|
+
function toggleCode( id ) {
|
20
|
+
if ( document.getElementById )
|
21
|
+
elem = document.getElementById( id );
|
22
|
+
else if ( document.all )
|
23
|
+
elem = eval( "document.all." + id );
|
24
|
+
else
|
25
|
+
return false;
|
26
|
+
|
27
|
+
elemStyle = elem.style;
|
28
|
+
|
29
|
+
if ( elemStyle.display != "block" ) {
|
30
|
+
elemStyle.display = "block"
|
31
|
+
} else {
|
32
|
+
elemStyle.display = "none"
|
33
|
+
}
|
34
|
+
|
35
|
+
return true;
|
36
|
+
}
|
37
|
+
|
38
|
+
// Make codeblocks hidden by default
|
39
|
+
document.writeln( "<style type=\"text/css\">div.method-source-code { display: none }</style>" )
|
40
|
+
|
41
|
+
// ]]>
|
42
|
+
</script>
|
43
|
+
|
44
|
+
</head>
|
45
|
+
<body>
|
46
|
+
|
47
|
+
|
48
|
+
|
49
|
+
<div id="classHeader">
|
50
|
+
<table class="header-table">
|
51
|
+
<tr class="top-aligned-row">
|
52
|
+
<td><strong>Module</strong></td>
|
53
|
+
<td class="class-name-in-header">Classifier::Stemmable</td>
|
54
|
+
</tr>
|
55
|
+
<tr class="top-aligned-row">
|
56
|
+
<td><strong>In:</strong></td>
|
57
|
+
<td>
|
58
|
+
<a href="../../files/lib/classifier/string_extensions/porter_stemmer_rb.html">
|
59
|
+
lib/classifier/string_extensions/porter_stemmer.rb
|
60
|
+
</a>
|
61
|
+
<br />
|
62
|
+
</td>
|
63
|
+
</tr>
|
64
|
+
|
65
|
+
</table>
|
66
|
+
</div>
|
67
|
+
<!-- banner header -->
|
68
|
+
|
69
|
+
<div id="bodyContent">
|
70
|
+
|
71
|
+
|
72
|
+
|
73
|
+
<div id="contextContent">
|
74
|
+
|
75
|
+
<div id="description">
|
76
|
+
<p>
|
77
|
+
Porter stemmer in Ruby.
|
78
|
+
</p>
|
79
|
+
<p>
|
80
|
+
This is the Porter stemming algorithm, ported to Ruby from the version
|
81
|
+
coded up in Perl. It’s easy to follow against the rules in the
|
82
|
+
original paper in:
|
83
|
+
</p>
|
84
|
+
<pre>
|
85
|
+
Porter, 1980, An algorithm for suffix stripping, Program, Vol. 14,
|
86
|
+
no. 3, pp 130-137,
|
87
|
+
</pre>
|
88
|
+
<p>
|
89
|
+
See also <a
|
90
|
+
href="http://www.tartarus.org/~martin/PorterStemmer">www.tartarus.org/~martin/PorterStemmer</a>
|
91
|
+
</p>
|
92
|
+
<p>
|
93
|
+
Send comments to raypereda@hotmail.com
|
94
|
+
</p>
|
95
|
+
|
96
|
+
</div>
|
97
|
+
|
98
|
+
|
99
|
+
</div>
|
100
|
+
|
101
|
+
<div id="method-list">
|
102
|
+
<h3 class="section-bar">Methods</h3>
|
103
|
+
|
104
|
+
<div class="name-list">
|
105
|
+
<a href="#M000004">stem</a>
|
106
|
+
<a href="#M000003">stem_porter</a>
|
107
|
+
</div>
|
108
|
+
</div>
|
109
|
+
|
110
|
+
</div>
|
111
|
+
|
112
|
+
|
113
|
+
<!-- if includes -->
|
114
|
+
|
115
|
+
<div id="section">
|
116
|
+
|
117
|
+
|
118
|
+
<div id="constants-list">
|
119
|
+
<h3 class="section-bar">Constants</h3>
|
120
|
+
|
121
|
+
<div class="name-list">
|
122
|
+
<table summary="Constants">
|
123
|
+
<tr class="top-aligned-row context-row">
|
124
|
+
<td class="context-item-name">STEP_2_LIST</td>
|
125
|
+
<td>=</td>
|
126
|
+
<td class="context-item-value">{
|
127
|
'ational'=>'ate', 'tional'=>'tion', 'enci'=>'ence', 'anci'=>'ance',
|
1
128
|
'izer'=>'ize', 'bli'=>'ble',
|
2
129
|
'alli'=>'al', 'entli'=>'ent', 'eli'=>'e', 'ousli'=>'ous',
|
3
130
|
'ization'=>'ize', 'ation'=>'ate',
|
4
131
|
'ator'=>'ate', 'alism'=>'al', 'iveness'=>'ive', 'fulness'=>'ful',
|
5
132
|
'ousness'=>'ous', 'aliti'=>'al',
|
6
133
|
'iviti'=>'ive', 'biliti'=>'ble', 'logi'=>'log'</td>
|
134
|
+
</tr>
|
135
|
+
<tr class="top-aligned-row context-row">
|
136
|
+
<td class="context-item-name">STEP_3_LIST</td>
|
137
|
+
<td>=</td>
|
138
|
+
<td class="context-item-value">{
|
7
139
|
'icate'=>'ic', 'ative'=>'', 'alize'=>'al', 'iciti'=>'ic',
|
8
140
|
'ical'=>'ic', 'ful'=>'', 'ness'=>''</td>
|
141
|
+
</tr>
|
142
|
+
<tr class="top-aligned-row context-row">
|
143
|
+
<td class="context-item-name">SUFFIX_1_REGEXP</td>
|
144
|
+
<td>=</td>
|
145
|
+
<td class="context-item-value">/(
|
9
146
|
ational |
|
10
147
|
tional |
|
11
148
|
enci |
|
12
149
|
anci |
|
13
150
|
izer |
|
14
151
|
bli |
|
15
152
|
alli |
|
16
153
|
entli |
|
17
154
|
eli |
|
18
155
|
ousli |
|
19
156
|
ization |
|
20
157
|
ation |
|
21
158
|
ator |
|
22
159
|
alism |
|
23
160
|
iveness |
|
24
161
|
fulness |
|
25
162
|
ousness |
|
26
163
|
aliti |
|
27
164
|
iviti |
|
28
165
|
biliti |
|
29
166
|
logi)$/x</td>
|
167
|
+
</tr>
|
168
|
+
<tr class="top-aligned-row context-row">
|
169
|
+
<td class="context-item-name">SUFFIX_2_REGEXP</td>
|
170
|
+
<td>=</td>
|
171
|
+
<td class="context-item-value">/(
|
30
172
|
al |
|
31
173
|
ance |
|
32
174
|
ence |
|
33
175
|
er |
|
34
176
|
ic |
|
35
177
|
able |
|
36
178
|
ible |
|
37
179
|
ant |
|
38
180
|
ement |
|
39
181
|
ment |
|
40
182
|
ent |
|
41
183
|
ou |
|
42
184
|
ism |
|
43
185
|
ate |
|
44
186
|
iti |
|
45
187
|
ous |
|
46
188
|
ive |
|
47
189
|
ize)$/x</td>
|
190
|
+
</tr>
|
191
|
+
<tr class="top-aligned-row context-row">
|
192
|
+
<td class="context-item-name">C</td>
|
193
|
+
<td>=</td>
|
194
|
+
<td class="context-item-value">"[^aeiou]"</td>
|
195
|
+
</tr>
|
196
|
+
<tr class="top-aligned-row context-row">
|
197
|
+
<td class="context-item-name">V</td>
|
198
|
+
<td>=</td>
|
199
|
+
<td class="context-item-value">"[aeiouy]"</td>
|
200
|
+
</tr>
|
201
|
+
<tr class="top-aligned-row context-row">
|
202
|
+
<td class="context-item-name">CC</td>
|
203
|
+
<td>=</td>
|
204
|
+
<td class="context-item-value">"#{C}(?>[^aeiouy]*)"</td>
|
205
|
+
</tr>
|
206
|
+
<tr class="top-aligned-row context-row">
|
207
|
+
<td class="context-item-name">VV</td>
|
208
|
+
<td>=</td>
|
209
|
+
<td class="context-item-value">"#{V}(?>[aeiou]*)"</td>
|
210
|
+
</tr>
|
211
|
+
<tr class="top-aligned-row context-row">
|
212
|
+
<td class="context-item-name">MGR0</td>
|
213
|
+
<td>=</td>
|
214
|
+
<td class="context-item-value">/^(#{CC})?#{VV}#{CC}/o</td>
|
215
|
+
</tr>
|
216
|
+
<tr class="top-aligned-row context-row">
|
217
|
+
<td class="context-item-name">MEQ1</td>
|
218
|
+
<td>=</td>
|
219
|
+
<td class="context-item-value">/^(#{CC})?#{VV}#{CC}(#{VV})?$/o</td>
|
220
|
+
</tr>
|
221
|
+
<tr class="top-aligned-row context-row">
|
222
|
+
<td class="context-item-name">MGR1</td>
|
223
|
+
<td>=</td>
|
224
|
+
<td class="context-item-value">/^(#{CC})?#{VV}#{CC}#{VV}#{CC}/o</td>
|
225
|
+
</tr>
|
226
|
+
<tr class="top-aligned-row context-row">
|
227
|
+
<td class="context-item-name">VOWEL_IN_STEM</td>
|
228
|
+
<td>=</td>
|
229
|
+
<td class="context-item-value">/^(#{CC})?#{V}/o</td>
|
230
|
+
</tr>
|
231
|
+
</table>
|
232
|
+
</div>
|
233
|
+
</div>
|
234
|
+
|
235
|
+
|
236
|
+
|
237
|
+
|
238
|
+
|
239
|
+
|
240
|
+
<!-- if method_list -->
|
241
|
+
<div id="methods">
|
242
|
+
<h3 class="section-bar">Public Instance methods</h3>
|
243
|
+
|
244
|
+
<div id="method-M000004" class="method-detail">
|
245
|
+
<a name="M000004"></a>
|
246
|
+
|
247
|
+
<div class="method-heading">
|
248
|
+
<span class="method-name">stem</span><span class="method-args">()</span>
|
249
|
+
</div>
|
250
|
+
|
251
|
+
<div class="method-description">
|
252
|
+
<p>
|
253
|
+
Alias for <a href="Stemmable.html#M000003">stem_porter</a>
|
254
|
+
</p>
|
255
|
+
</div>
|
256
|
+
</div>
|
257
|
+
|
258
|
+
<div id="method-M000003" class="method-detail">
|
259
|
+
<a name="M000003"></a>
|
260
|
+
|
261
|
+
<div class="method-heading">
|
262
|
+
<a href="Stemmable.src/M000003.html" target="Code" class="method-signature"
|
263
|
+
onclick="popupCode('Stemmable.src/M000003.html');return false;">
|
264
|
+
<span class="method-name">stem_porter</span><span class="method-args">()</span>
|
265
|
+
</a>
|
266
|
+
</div>
|
267
|
+
|
268
|
+
<div class="method-description">
|
269
|
+
<p>
|
270
|
+
Stems the word contained in the current object. E.g.,
|
271
|
+
</p>
|
272
|
+
<pre>
|
273
|
+
"actually".stem_porter
|
274
|
+
=> "actual"
|
275
|
+
</pre>
|
276
|
+
</div>
|
277
|
+
</div>
|
278
|
+
|
279
|
+
|
280
|
+
</div>
|
281
|
+
|
282
|
+
|
283
|
+
</div>
|
284
|
+
|
285
|
+
|
286
|
+
<div id="validator-badges">
|
287
|
+
<p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
|
288
|
+
</div>
|
289
|
+
|
290
|
+
</body>
|
291
|
+
</html>
|
@@ -0,0 +1,102 @@
|
|
1
|
+
<?xml version="1.0" encoding="iso-8859-1"?>
|
2
|
+
<!DOCTYPE html
|
3
|
+
PUBLIC "-//W3C//DTD XHTML 1.0 Transitional//EN"
|
4
|
+
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd">
|
5
|
+
|
6
|
+
<html>
|
7
|
+
<head>
|
8
|
+
<title>stem_porter (Classifier::Stemmable)</title>
|
9
|
+
<meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1" />
|
10
|
+
<link rel="stylesheet" href="../../.././rdoc-style.css" type="text/css" media="screen" />
|
11
|
+
</head>
|
12
|
+
<body class="standalone-code">
|
13
|
+
<pre><span class="ruby-comment cmt"># File lib/classifier/string_extensions/porter_stemmer.rb, line 102</span>
|
14
|
+
<span class="ruby-keyword kw">def</span> <span class="ruby-identifier">stem_porter</span>
|
15
|
+
|
16
|
+
<span class="ruby-comment cmt"># make a copy of the given object and convert it to a string.
|
17
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>.<span class="ruby-identifier">to_str</span>
|
18
|
+
|
19
|
+
<span class="ruby-keyword kw">return</span> <span class="ruby-identifier">w</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator"><</span> <span class="ruby-value">3</span>
|
20
|
+
|
21
|
+
<span class="ruby-comment cmt"># now map initial y to Y so that the patterns never treat it as vowel
|
22
|
+
<span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] = <span class="ruby-value str">'Y'</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] <span class="ruby-operator">==</span> <span class="ruby-value">?y</span>
|
23
|
+
|
24
|
+
<span class="ruby-comment cmt"># Step 1a
|
25
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ss|i)es$/</span>
|
26
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-identifier">$`</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">$1</span>
|
27
|
+
<span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/([^s])s$/</span>
|
28
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-identifier">$`</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">$1</span>
|
29
|
+
<span class="ruby-keyword kw">end</span>
|
30
|
+
|
31
|
+
<span class="ruby-comment cmt"># Step 1b
|
32
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/eed$/</span>
|
33
|
+
<span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">$`</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR0</span>
|
34
|
+
<span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ed|ing)$/</span>
|
35
|
+
<span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
|
36
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">VOWEL_IN_STEM</span>
|
37
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
|
38
|
+
<span class="ruby-keyword kw">case</span> <span class="ruby-identifier">w</span>
|
39
|
+
<span class="ruby-keyword kw">when</span> <span class="ruby-regexp re">/(at|bl|iz)$/</span> <span class="ruby-keyword kw">then</span> <span class="ruby-identifier">w</span> <span class="ruby-operator"><<</span> <span class="ruby-value str">"e"</span>
|
40
|
+
<span class="ruby-keyword kw">when</span> <span class="ruby-regexp re">/([^aeiouylsz])\1$/</span> <span class="ruby-keyword kw">then</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
|
41
|
+
<span class="ruby-keyword kw">when</span> <span class="ruby-node">/^#{CC}#{V}[^aeiouwxy]$/o</span> <span class="ruby-keyword kw">then</span> <span class="ruby-identifier">w</span> <span class="ruby-operator"><<</span> <span class="ruby-value str">"e"</span>
|
42
|
+
<span class="ruby-keyword kw">end</span>
|
43
|
+
<span class="ruby-keyword kw">end</span>
|
44
|
+
<span class="ruby-keyword kw">end</span>
|
45
|
+
|
46
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/y$/</span>
|
47
|
+
<span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
|
48
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span> <span class="ruby-operator">+</span> <span class="ruby-value str">"i"</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">VOWEL_IN_STEM</span>
|
49
|
+
<span class="ruby-keyword kw">end</span>
|
50
|
+
|
51
|
+
<span class="ruby-comment cmt"># Step 2
|
52
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">SUFFIX_1_REGEXP</span>
|
53
|
+
<span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
|
54
|
+
<span class="ruby-identifier">suffix</span> = <span class="ruby-identifier">$1</span>
|
55
|
+
<span class="ruby-comment cmt"># print "stem= " + stem + "\n" + "suffix=" + suffix + "\n"
|
56
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR0</span>
|
57
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span> <span class="ruby-operator">+</span> <span class="ruby-constant">STEP_2_LIST</span>[<span class="ruby-identifier">suffix</span>]
|
58
|
+
<span class="ruby-keyword kw">end</span>
|
59
|
+
<span class="ruby-keyword kw">end</span>
|
60
|
+
|
61
|
+
<span class="ruby-comment cmt"># Step 3
|
62
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(icate|ative|alize|iciti|ical|ful|ness)$/</span>
|
63
|
+
<span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
|
64
|
+
<span class="ruby-identifier">suffix</span> = <span class="ruby-identifier">$1</span>
|
65
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR0</span>
|
66
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span> <span class="ruby-operator">+</span> <span class="ruby-constant">STEP_3_LIST</span>[<span class="ruby-identifier">suffix</span>]
|
67
|
+
<span class="ruby-keyword kw">end</span>
|
68
|
+
<span class="ruby-keyword kw">end</span>
|
69
|
+
|
70
|
+
<span class="ruby-comment cmt"># Step 4
|
71
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">SUFFIX_2_REGEXP</span>
|
72
|
+
<span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
|
73
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>
|
74
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
|
75
|
+
<span class="ruby-keyword kw">end</span>
|
76
|
+
<span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)(ion)$/</span>
|
77
|
+
<span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span> <span class="ruby-operator">+</span> <span class="ruby-identifier">$1</span>
|
78
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>
|
79
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
|
80
|
+
<span class="ruby-keyword kw">end</span>
|
81
|
+
<span class="ruby-keyword kw">end</span>
|
82
|
+
|
83
|
+
<span class="ruby-comment cmt"># Step 5
|
84
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span>
|
85
|
+
<span class="ruby-identifier">stem</span> = <span class="ruby-identifier">$`</span>
|
86
|
+
<span class="ruby-keyword kw">if</span> (<span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>) <span class="ruby-operator">||</span>
|
87
|
+
(<span class="ruby-identifier">stem</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MEQ1</span> <span class="ruby-operator">&&</span> <span class="ruby-identifier">stem</span> <span class="ruby-operator">!~</span> <span class="ruby-node">/^#{CC}#{V}[^aeiouwxy]$/o</span>)
|
88
|
+
<span class="ruby-identifier">w</span> = <span class="ruby-identifier">stem</span>
|
89
|
+
<span class="ruby-keyword kw">end</span>
|
90
|
+
<span class="ruby-keyword kw">end</span>
|
91
|
+
|
92
|
+
<span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-operator">&&</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-constant">MGR1</span>
|
93
|
+
<span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
|
94
|
+
<span class="ruby-keyword kw">end</span>
|
95
|
+
|
96
|
+
<span class="ruby-comment cmt"># and turn initial Y back to y
|
97
|
+
<span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] = <span class="ruby-value str">'y'</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span>[<span class="ruby-value">0</span>] <span class="ruby-operator">==</span> <span class="ruby-value">?Y</span>
|
98
|
+
|
99
|
+
<span class="ruby-identifier">w</span>
|
100
|
+
<span class="ruby-keyword kw">end</span></pre>
|
101
|
+
</body>
|
102
|
+
</html>
|