porter2stemmer 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/rdoc/String.html ADDED
@@ -0,0 +1,1142 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
+ <head>
6
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
7
+
8
+ <title>Class: String</title>
9
+
10
+ <link rel="stylesheet" href="./rdoc.css" type="text/css" media="screen" />
11
+
12
+ <script src="./js/jquery.js" type="text/javascript"
13
+ charset="utf-8"></script>
14
+ <script src="./js/thickbox-compressed.js" type="text/javascript"
15
+ charset="utf-8"></script>
16
+ <script src="./js/quicksearch.js" type="text/javascript"
17
+ charset="utf-8"></script>
18
+ <script src="./js/darkfish.js" type="text/javascript"
19
+ charset="utf-8"></script>
20
+
21
+ </head>
22
+ <body class="class">
23
+
24
+ <div id="metadata">
25
+ <div id="home-metadata">
26
+ <div id="home-section" class="section">
27
+ <h3 class="section-header">
28
+ <a href="./index.html">Home</a>
29
+ <a href="./index.html#classes">Classes</a>
30
+ <a href="./index.html#methods">Methods</a>
31
+ </h3>
32
+ </div>
33
+ </div>
34
+
35
+ <div id="file-metadata">
36
+ <div id="file-list-section" class="section">
37
+ <h3 class="section-header">In Files</h3>
38
+ <div class="section-body">
39
+ <ul>
40
+
41
+ <li><a href="./lib/porter2stemmer/implementation_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
42
+ class="thickbox" title="lib/porter2stemmer/implementation.rb">lib/porter2stemmer/implementation.rb</a></li>
43
+
44
+ </ul>
45
+ </div>
46
+ </div>
47
+
48
+
49
+ </div>
50
+
51
+ <div id="class-metadata">
52
+
53
+ <!-- Parent Class -->
54
+
55
+ <div id="parent-class-section" class="section">
56
+ <h3 class="section-header">Parent</h3>
57
+
58
+ <p class="link">Object</p>
59
+
60
+ </div>
61
+
62
+
63
+ <!-- Namespace Contents -->
64
+
65
+
66
+ <!-- Method Quickref -->
67
+
68
+ <div id="method-list-section" class="section">
69
+ <h3 class="section-header">Methods</h3>
70
+ <ul class="link-list">
71
+
72
+ <li><a href="#method-i-porter2_ends_with_short_syllable%3F">#porter2_ends_with_short_syllable?</a></li>
73
+
74
+ <li><a href="#method-i-porter2_is_short_word%3F">#porter2_is_short_word?</a></li>
75
+
76
+ <li><a href="#method-i-porter2_postprocess">#porter2_postprocess</a></li>
77
+
78
+ <li><a href="#method-i-porter2_preprocess">#porter2_preprocess</a></li>
79
+
80
+ <li><a href="#method-i-porter2_r1">#porter2_r1</a></li>
81
+
82
+ <li><a href="#method-i-porter2_r2">#porter2_r2</a></li>
83
+
84
+ <li><a href="#method-i-porter2_stem">#porter2_stem</a></li>
85
+
86
+ <li><a href="#method-i-porter2_stem_verbose">#porter2_stem_verbose</a></li>
87
+
88
+ <li><a href="#method-i-porter2_step0">#porter2_step0</a></li>
89
+
90
+ <li><a href="#method-i-porter2_step1a">#porter2_step1a</a></li>
91
+
92
+ <li><a href="#method-i-porter2_step1b">#porter2_step1b</a></li>
93
+
94
+ <li><a href="#method-i-porter2_step1c">#porter2_step1c</a></li>
95
+
96
+ <li><a href="#method-i-porter2_step2">#porter2_step2</a></li>
97
+
98
+ <li><a href="#method-i-porter2_step3">#porter2_step3</a></li>
99
+
100
+ <li><a href="#method-i-porter2_step4">#porter2_step4</a></li>
101
+
102
+ <li><a href="#method-i-porter2_step5">#porter2_step5</a></li>
103
+
104
+ <li><a href="#method-i-porter2_tidy">#porter2_tidy</a></li>
105
+
106
+ <li><a href="#method-i-stem">#stem</a></li>
107
+
108
+ </ul>
109
+ </div>
110
+
111
+
112
+ <!-- Included Modules -->
113
+
114
+ </div>
115
+
116
+ <div id="project-metadata">
117
+
118
+
119
+ <div id="fileindex-section" class="section project-section">
120
+ <h3 class="section-header">Files</h3>
121
+ <ul>
122
+
123
+ <li class="file"><a href="./README_rdoc.html">README.rdoc</a></li>
124
+
125
+ </ul>
126
+ </div>
127
+
128
+
129
+ <div id="classindex-section" class="section project-section">
130
+ <h3 class="section-header">Class Index
131
+ <span class="search-toggle"><img src="./images/find.png"
132
+ height="16" width="16" alt="[+]"
133
+ title="show/hide quicksearch" /></span></h3>
134
+ <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
135
+ <fieldset>
136
+ <legend>Quicksearch</legend>
137
+ <input type="text" name="quicksearch" value=""
138
+ class="quicksearch-field" />
139
+ </fieldset>
140
+ </form>
141
+
142
+ <ul class="link-list">
143
+
144
+ <li><a href="./Porter2.html">Porter2</a></li>
145
+
146
+ <li><a href="./String.html">String</a></li>
147
+
148
+ </ul>
149
+ <div id="no-class-search-results" style="display: none;">No matching classes.</div>
150
+ </div>
151
+
152
+
153
+ </div>
154
+ </div>
155
+
156
+ <div id="documentation">
157
+ <h1 class="class">String</h1>
158
+
159
+ <div id="description">
160
+ <p>
161
+ Implementation of the Porter 2 stemmer. <a
162
+ href="String.html#method-i-porter2_stem">String#porter2_stem</a> is the
163
+ main stemming procedure.
164
+ </p>
165
+
166
+ </div>
167
+
168
+ <!-- Constants -->
169
+
170
+
171
+ <!-- Attributes -->
172
+
173
+
174
+ <!-- Methods -->
175
+
176
+ <div id="public-instance-method-details" class="method-section section">
177
+ <h3 class="section-header">Public Instance Methods</h3>
178
+
179
+
180
+ <div id="porter-ends-with-short-syllable--method" class="method-detail ">
181
+ <a name="method-i-porter2_ends_with_short_syllable%3F"></a>
182
+
183
+ <div class="method-heading">
184
+
185
+ <span class="method-name">porter2_ends_with_short_syllable?</span><span
186
+ class="method-args">()</span>
187
+ <span class="method-click-advice">click to toggle source</span>
188
+
189
+ </div>
190
+
191
+ <div class="method-description">
192
+
193
+ <p>
194
+ Returns true if the word ends with a short syllable
195
+ </p>
196
+
197
+
198
+
199
+ <div class="method-source-code"
200
+ id="porter-ends-with-short-syllable--source">
201
+ <pre>
202
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 59</span>
203
+ 59: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_ends_with_short_syllable?</span>
204
+ 60: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}$/</span> <span class="ruby-operator">?</span> <span class="ruby-keyword kw">true</span> <span class="ruby-operator">:</span> <span class="ruby-keyword kw">false</span>
205
+ 61: <span class="ruby-keyword kw">end</span></pre>
206
+ </div>
207
+
208
+ </div>
209
+
210
+
211
+
212
+
213
+ </div>
214
+
215
+
216
+ <div id="porter-is-short-word--method" class="method-detail ">
217
+ <a name="method-i-porter2_is_short_word%3F"></a>
218
+
219
+ <div class="method-heading">
220
+
221
+ <span class="method-name">porter2_is_short_word?</span><span
222
+ class="method-args">()</span>
223
+ <span class="method-click-advice">click to toggle source</span>
224
+
225
+ </div>
226
+
227
+ <div class="method-description">
228
+
229
+ <p>
230
+ A word is short if it ends in a short syllable, and R1 is null
231
+ </p>
232
+
233
+
234
+
235
+ <div class="method-source-code"
236
+ id="porter-is-short-word--source">
237
+ <pre>
238
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 65</span>
239
+ 65: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_is_short_word?</span>
240
+ 66: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>.<span class="ruby-identifier">empty?</span>
241
+ 67: <span class="ruby-keyword kw">end</span></pre>
242
+ </div>
243
+
244
+ </div>
245
+
246
+
247
+
248
+
249
+ </div>
250
+
251
+
252
+ <div id="porter-postprocess-method" class="method-detail ">
253
+ <a name="method-i-porter2_postprocess"></a>
254
+
255
+ <div class="method-heading">
256
+
257
+ <span class="method-name">porter2_postprocess</span><span
258
+ class="method-args">()</span>
259
+ <span class="method-click-advice">click to toggle source</span>
260
+
261
+ </div>
262
+
263
+ <div class="method-description">
264
+
265
+ <p>
266
+ Turn all Y letters into y
267
+ </p>
268
+
269
+
270
+
271
+ <div class="method-source-code"
272
+ id="porter-postprocess-source">
273
+ <pre>
274
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 261</span>
275
+ 261: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_postprocess</span>
276
+ 262: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/Y/</span>, <span class="ruby-value str">'y'</span>)
277
+ 263: <span class="ruby-keyword kw">end</span></pre>
278
+ </div>
279
+
280
+ </div>
281
+
282
+
283
+
284
+
285
+ </div>
286
+
287
+
288
+ <div id="porter-preprocess-method" class="method-detail ">
289
+ <a name="method-i-porter2_preprocess"></a>
290
+
291
+ <div class="method-heading">
292
+
293
+ <span class="method-name">porter2_preprocess</span><span
294
+ class="method-args">()</span>
295
+ <span class="method-click-advice">click to toggle source</span>
296
+
297
+ </div>
298
+
299
+ <div class="method-description">
300
+
301
+ <p>
302
+ Preprocess the word. Remove any initial &#8217;, if present. Then, set
303
+ initial y, or y after a vowel, to Y
304
+ </p>
305
+ <p>
306
+ (The comment to &#8216;establish the regions R1 and R2&#8217; in the
307
+ original description is an implementation optimisation that identifies
308
+ where the regions start. As no modifications are made to the word that
309
+ affect those positions, you may want to cache them now. This implementation
310
+ doesn&#8217;t do that.)
311
+ </p>
312
+
313
+
314
+
315
+ <div class="method-source-code"
316
+ id="porter-preprocess-source">
317
+ <pre>
318
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 25</span>
319
+ 25: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_preprocess</span>
320
+ 26: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
321
+ 27:
322
+ 28: <span class="ruby-comment cmt"># remove any initial apostrophe</span>
323
+ 29: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^'*(.)/</span>, <span class="ruby-value str">'\1'</span>)
324
+ 30:
325
+ 31: <span class="ruby-comment cmt"># set initial y, or y after a vowel, to Y</span>
326
+ 32: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^y/</span>, <span class="ruby-value str">&quot;Y&quot;</span>)
327
+ 33: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-node">/(#{Porter2::V})y/</span>, <span class="ruby-value str">'\1Y'</span>)
328
+ 34:
329
+ 35: <span class="ruby-identifier">w</span>
330
+ 36: <span class="ruby-keyword kw">end</span></pre>
331
+ </div>
332
+
333
+ </div>
334
+
335
+
336
+
337
+
338
+ </div>
339
+
340
+
341
+ <div id="porter-r--method" class="method-detail ">
342
+ <a name="method-i-porter2_r1"></a>
343
+
344
+ <div class="method-heading">
345
+
346
+ <span class="method-name">porter2_r1</span><span
347
+ class="method-args">()</span>
348
+ <span class="method-click-advice">click to toggle source</span>
349
+
350
+ </div>
351
+
352
+ <div class="method-description">
353
+
354
+ <p>
355
+ R1 is the portion of the word after the first non-vowel after the first
356
+ vowel (with words beginning &#8216;gener-&#8217;, &#8216;commun-&#8217;,
357
+ and &#8216;arsen-&#8217; treated as special cases
358
+ </p>
359
+
360
+
361
+
362
+ <div class="method-source-code"
363
+ id="porter-r--source">
364
+ <pre>
365
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 41</span>
366
+ 41: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r1</span>
367
+ 42: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^(gener|commun|arsen)(?&lt;r1&gt;.*)/</span>
368
+ 43: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>)
369
+ 44: <span class="ruby-keyword kw">else</span>
370
+ 45: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r1&gt;.*)$/</span>
371
+ 46: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
372
+ 47: <span class="ruby-keyword kw">end</span>
373
+ 48: <span class="ruby-keyword kw">end</span></pre>
374
+ </div>
375
+
376
+ </div>
377
+
378
+
379
+
380
+
381
+ </div>
382
+
383
+
384
+ <div id="porter-r--method" class="method-detail ">
385
+ <a name="method-i-porter2_r2"></a>
386
+
387
+ <div class="method-heading">
388
+
389
+ <span class="method-name">porter2_r2</span><span
390
+ class="method-args">()</span>
391
+ <span class="method-click-advice">click to toggle source</span>
392
+
393
+ </div>
394
+
395
+ <div class="method-description">
396
+
397
+ <p>
398
+ R2 is the portion of R1 (<a
399
+ href="String.html#method-i-porter2_r1">porter2_r1</a>) after the first
400
+ non-vowel after the first vowel
401
+ </p>
402
+
403
+
404
+
405
+ <div class="method-source-code"
406
+ id="porter-r--source">
407
+ <pre>
408
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 52</span>
409
+ 52: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r2</span>
410
+ 53: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r2&gt;.*)$/</span>
411
+ 54: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r2</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
412
+ 55: <span class="ruby-keyword kw">end</span></pre>
413
+ </div>
414
+
415
+ </div>
416
+
417
+
418
+
419
+
420
+ </div>
421
+
422
+
423
+ <div id="porter-stem-method" class="method-detail ">
424
+ <a name="method-i-porter2_stem"></a>
425
+
426
+ <div class="method-heading">
427
+
428
+ <span class="method-name">porter2_stem</span><span
429
+ class="method-args">(gb_english = false)</span>
430
+ <span class="method-click-advice">click to toggle source</span>
431
+
432
+ </div>
433
+
434
+ <div class="method-description">
435
+
436
+ <p>
437
+ Perform the stemming procedure. If <tt>gb_english</tt> is true, treat
438
+ &#8217;-ise&#8217; and similar suffixes as &#8217;-ize&#8217; in American
439
+ English.
440
+ </p>
441
+
442
+
443
+
444
+ <div class="method-source-code"
445
+ id="porter-stem-source">
446
+ <pre>
447
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 269</span>
448
+ 269: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
449
+ 270: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
450
+ 271: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
451
+ 272:
452
+ 273: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
453
+ 274:
454
+ 275: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
455
+ 276: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
456
+ 277: <span class="ruby-keyword kw">else</span>
457
+ 278: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span>.<span class="ruby-identifier">porter2_step1a</span>
458
+ 279: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
459
+ 280: <span class="ruby-identifier">w1a</span>
460
+ 281: <span class="ruby-keyword kw">else</span>
461
+ 282: <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step5</span>.<span class="ruby-identifier">porter2_postprocess</span>
462
+ 283: <span class="ruby-keyword kw">end</span>
463
+ 284: <span class="ruby-keyword kw">end</span>
464
+ 285: <span class="ruby-keyword kw">end</span></pre>
465
+ </div>
466
+
467
+ </div>
468
+
469
+
470
+ <div class="aliases">
471
+ Also aliased as: <a href="String.html#method-i-stem">stem</a>
472
+ </div>
473
+
474
+
475
+
476
+ </div>
477
+
478
+
479
+ <div id="porter-stem-verbose-method" class="method-detail ">
480
+ <a name="method-i-porter2_stem_verbose"></a>
481
+
482
+ <div class="method-heading">
483
+
484
+ <span class="method-name">porter2_stem_verbose</span><span
485
+ class="method-args">(gb_english = false)</span>
486
+ <span class="method-click-advice">click to toggle source</span>
487
+
488
+ </div>
489
+
490
+ <div class="method-description">
491
+
492
+ <p>
493
+ A verbose version of <a
494
+ href="String.html#method-i-porter2_stem">porter2_stem</a> that prints the
495
+ output of each stage to STDOUT
496
+ </p>
497
+
498
+
499
+
500
+ <div class="method-source-code"
501
+ id="porter-stem-verbose-source">
502
+ <pre>
503
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 288</span>
504
+ 288: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem_verbose</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
505
+ 289: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
506
+ 290: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preword: #{preword}&quot;</span>
507
+ 291: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
508
+ 292:
509
+ 293: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
510
+ 294: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preprocessed: #{word}&quot;</span>
511
+ 295:
512
+ 296: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
513
+ 297: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}&quot;</span>
514
+ 298: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
515
+ 299: <span class="ruby-keyword kw">else</span>
516
+ 300: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r1</span>
517
+ 301: <span class="ruby-identifier">r2</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r2</span>
518
+ 302: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;R1 = #{r1}, R2 = #{r2}&quot;</span>
519
+ 303:
520
+ 304: <span class="ruby-identifier">w0</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})&quot;</span>
521
+ 305: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">w0</span>.<span class="ruby-identifier">porter2_step1a</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})&quot;</span>
522
+ 306:
523
+ 307: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
524
+ 308: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{w1a} as 1a special case&quot;</span>
525
+ 309: <span class="ruby-identifier">w1a</span>
526
+ 310: <span class="ruby-keyword kw">else</span>
527
+ 311: <span class="ruby-identifier">w1b</span> = <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})&quot;</span>
528
+ 312: <span class="ruby-identifier">w1c</span> = <span class="ruby-identifier">w1b</span>.<span class="ruby-identifier">porter2_step1c</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})&quot;</span>
529
+ 313: <span class="ruby-identifier">w2</span> = <span class="ruby-identifier">w1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})&quot;</span>
530
+ 314: <span class="ruby-identifier">w3</span> = <span class="ruby-identifier">w2</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})&quot;</span>
531
+ 315: <span class="ruby-identifier">w4</span> = <span class="ruby-identifier">w3</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})&quot;</span>
532
+ 316: <span class="ruby-identifier">w5</span> = <span class="ruby-identifier">w4</span>.<span class="ruby-identifier">porter2_step5</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 5: #{w5}&quot;</span>
533
+ 317: <span class="ruby-identifier">wpost</span> = <span class="ruby-identifier">w5</span>.<span class="ruby-identifier">porter2_postprocess</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After postprocess: #{wpost}&quot;</span>
534
+ 318: <span class="ruby-identifier">wpost</span>
535
+ 319: <span class="ruby-keyword kw">end</span>
536
+ 320: <span class="ruby-keyword kw">end</span>
537
+ 321: <span class="ruby-keyword kw">end</span></pre>
538
+ </div>
539
+
540
+ </div>
541
+
542
+
543
+
544
+
545
+ </div>
546
+
547
+
548
+ <div id="porter-step--method" class="method-detail ">
549
+ <a name="method-i-porter2_step0"></a>
550
+
551
+ <div class="method-heading">
552
+
553
+ <span class="method-name">porter2_step0</span><span
554
+ class="method-args">()</span>
555
+ <span class="method-click-advice">click to toggle source</span>
556
+
557
+ </div>
558
+
559
+ <div class="method-description">
560
+
561
+ <p>
562
+ Search for the longest among the suffixes,
563
+ </p>
564
+ <ul>
565
+ <li><p>
566
+ &#8216;
567
+ </p>
568
+ </li>
569
+ <li><p>
570
+ &#8217;s
571
+ </p>
572
+ </li>
573
+ <li><p>
574
+ &#8217;s&#8217;
575
+ </p>
576
+ </li>
577
+ </ul>
578
+ <p>
579
+ and remove if found.
580
+ </p>
581
+
582
+
583
+
584
+ <div class="method-source-code"
585
+ id="porter-step--source">
586
+ <pre>
587
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 75</span>
588
+ 75: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step0</span>
589
+ 76: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(.)('s'|'s|')$/</span>, <span class="ruby-value str">'\1'</span>) <span class="ruby-operator">||</span> <span class="ruby-keyword kw">self</span>
590
+ 77: <span class="ruby-keyword kw">end</span></pre>
591
+ </div>
592
+
593
+ </div>
594
+
595
+
596
+
597
+
598
+ </div>
599
+
600
+
601
+ <div id="porter-step-a-method" class="method-detail ">
602
+ <a name="method-i-porter2_step1a"></a>
603
+
604
+ <div class="method-heading">
605
+
606
+ <span class="method-name">porter2_step1a</span><span
607
+ class="method-args">()</span>
608
+ <span class="method-click-advice">click to toggle source</span>
609
+
610
+ </div>
611
+
612
+ <div class="method-description">
613
+
614
+ <p>
615
+ Search for the longest among the following suffixes, and perform the action
616
+ indicated.
617
+ </p>
618
+ <table>
619
+ <tr><td valign="top">sses</td><td><p>
620
+ replace by ss
621
+ </p>
622
+ </td></tr>
623
+ <tr><td valign="top">ied, ies</td><td><p>
624
+ replace by i if preceded by more than one letter, otherwise by ie
625
+ </p>
626
+ </td></tr>
627
+ <tr><td valign="top">s</td><td><p>
628
+ delete if the preceding word part contains a vowel not immediately before
629
+ the s
630
+ </p>
631
+ </td></tr>
632
+ <tr><td valign="top">us, ss</td><td><p>
633
+ do nothing
634
+ </p>
635
+ </td></tr>
636
+ </table>
637
+
638
+
639
+
640
+ <div class="method-source-code"
641
+ id="porter-step-a-source">
642
+ <pre>
643
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 85</span>
644
+ 85: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1a</span>
645
+ 86: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/sses$/</span>
646
+ 87: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/sses$/</span>, <span class="ruby-value str">'ss'</span>)
647
+ 88: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/..(ied|ies)$/</span>
648
+ 89: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'i'</span>)
649
+ 90: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ied|ies)$/</span>
650
+ 91: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'ie'</span>)
651
+ 92: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(us|ss)$/</span>
652
+ 93: <span class="ruby-keyword kw">self</span>
653
+ 94: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/s$/</span>
654
+ 95: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::V}.+)s$/</span>
655
+ 96: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/s$/</span>, <span class="ruby-value str">''</span>)
656
+ 97: <span class="ruby-keyword kw">else</span>
657
+ 98: <span class="ruby-keyword kw">self</span>
658
+ 99: <span class="ruby-keyword kw">end</span>
659
+ 100: <span class="ruby-keyword kw">else</span>
660
+ 101: <span class="ruby-keyword kw">self</span>
661
+ 102: <span class="ruby-keyword kw">end</span>
662
+ 103: <span class="ruby-keyword kw">end</span></pre>
663
+ </div>
664
+
665
+ </div>
666
+
667
+
668
+
669
+
670
+ </div>
671
+
672
+
673
+ <div id="porter-step-b-method" class="method-detail ">
674
+ <a name="method-i-porter2_step1b"></a>
675
+
676
+ <div class="method-heading">
677
+
678
+ <span class="method-name">porter2_step1b</span><span
679
+ class="method-args">(gb_english = false)</span>
680
+ <span class="method-click-advice">click to toggle source</span>
681
+
682
+ </div>
683
+
684
+ <div class="method-description">
685
+
686
+ <p>
687
+ Search for the longest among the following suffixes, and perform the action
688
+ indicated.
689
+ </p>
690
+ <table>
691
+ <tr><td valign="top">eed, eedly</td><td><p>
692
+ replace by ee if the suffix is also in R1
693
+ </p>
694
+ </td></tr>
695
+ <tr><td valign="top">ed, edly, ing, ingly</td><td><p>
696
+ delete if the preceding word part contains a vowel and, after the
697
+ deletion:
698
+ </p>
699
+ <ul>
700
+ <li><p>
701
+ if the word ends at, bl or iz: add e, or
702
+ </p>
703
+ </li>
704
+ </ul>
705
+ <ul>
706
+ <li><p>
707
+ if the word ends with a double: remove the last letter, or
708
+ </p>
709
+ </li>
710
+ </ul>
711
+ <ul>
712
+ <li><p>
713
+ if the word is short: add e
714
+ </p>
715
+ </li>
716
+ </ul>
717
+ </td></tr>
718
+ </table>
719
+ <p>
720
+ (If gb_english is <tt>true</tt>, treat the &#8216;is&#8217; suffix as
721
+ &#8216;iz&#8217; above.)
722
+ </p>
723
+
724
+
725
+
726
+ <div class="method-source-code"
727
+ id="porter-step-b-source">
728
+ <pre>
729
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 115</span>
730
+ 115: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
731
+ 116: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
732
+ 117: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
733
+ 118: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(eed|eedly)$/</span>, <span class="ruby-value str">'ee'</span>)
734
+ 119: <span class="ruby-keyword kw">else</span>
735
+ 120: <span class="ruby-keyword kw">self</span>
736
+ 121: <span class="ruby-keyword kw">end</span>
737
+ 122: <span class="ruby-keyword kw">else</span>
738
+ 123: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
739
+ 124: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}.*(ed|edly|ing|ingly)$/</span>
740
+ 125: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(ed|edly|ing|ingly)$/</span>, <span class="ruby-value str">''</span>)
741
+ 126: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(at|lb|iz)$/</span>
742
+ 127: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
743
+ 128: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/is$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">gb_english</span>
744
+ 129: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
745
+ 130: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::Double}$/</span>
746
+ 131: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
747
+ 132: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>
748
+ 133: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
749
+ 134: <span class="ruby-keyword kw">end</span>
750
+ 135: <span class="ruby-keyword kw">end</span>
751
+ 136: <span class="ruby-identifier">w</span>
752
+ 137: <span class="ruby-keyword kw">end</span>
753
+ 138: <span class="ruby-keyword kw">end</span></pre>
754
+ </div>
755
+
756
+ </div>
757
+
758
+
759
+
760
+
761
+ </div>
762
+
763
+
764
+ <div id="porter-step-c-method" class="method-detail ">
765
+ <a name="method-i-porter2_step1c"></a>
766
+
767
+ <div class="method-heading">
768
+
769
+ <span class="method-name">porter2_step1c</span><span
770
+ class="method-args">()</span>
771
+ <span class="method-click-advice">click to toggle source</span>
772
+
773
+ </div>
774
+
775
+ <div class="method-description">
776
+
777
+ <p>
778
+ Replace a suffix of y or Y by i if it is preceded by a non-vowel which is
779
+ not the first letter of the word.
780
+ </p>
781
+
782
+
783
+
784
+ <div class="method-source-code"
785
+ id="porter-step-c-source">
786
+ <pre>
787
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 143</span>
788
+ 143: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1c</span>
789
+ 144: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/.+#{Porter2::C}(y|Y)$/</span>
790
+ 145: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(y|Y)$/</span>, <span class="ruby-value str">'i'</span>)
791
+ 146: <span class="ruby-keyword kw">else</span>
792
+ 147: <span class="ruby-keyword kw">self</span>
793
+ 148: <span class="ruby-keyword kw">end</span>
794
+ 149: <span class="ruby-keyword kw">end</span></pre>
795
+ </div>
796
+
797
+ </div>
798
+
799
+
800
+
801
+
802
+ </div>
803
+
804
+
805
+ <div id="porter-step--method" class="method-detail ">
806
+ <a name="method-i-porter2_step2"></a>
807
+
808
+ <div class="method-heading">
809
+
810
+ <span class="method-name">porter2_step2</span><span
811
+ class="method-args">(gb_english = false)</span>
812
+ <span class="method-click-advice">click to toggle source</span>
813
+
814
+ </div>
815
+
816
+ <div class="method-description">
817
+
818
+ <p>
819
+ Search for the longest among the suffixes listed in the keys of
820
+ Porter2::STEP_2_MAPS. If one is found and that suffix occurs in R1,
821
+ replace it with the value found in STEP_2_MAPS.
822
+ </p>
823
+ <p>
824
+ (Suffixes &#8216;ogi&#8217; and &#8216;li&#8217; are treated as special
825
+ cases in the procedure.)
826
+ </p>
827
+ <p>
828
+ (If gb_english is <tt>true</tt>, replace the &#8216;iser&#8217; and
829
+ &#8216;isation&#8217; suffixes with &#8216;ise&#8217;, similarly to how
830
+ &#8216;izer&#8217; and &#8216;ization&#8217; are treated.)
831
+ </p>
832
+
833
+
834
+
835
+ <div class="method-source-code"
836
+ id="porter-step--source">
837
+ <pre>
838
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 160</span>
839
+ 160: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
840
+ 161: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
841
+ 162: <span class="ruby-identifier">s2m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_2_MAPS</span>.<span class="ruby-identifier">dup</span>
842
+ 163: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
843
+ 164: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;iser&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
844
+ 165: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;isation&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
845
+ 166: <span class="ruby-keyword kw">end</span>
846
+ 167: <span class="ruby-identifier">step_2_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s2m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
847
+ 168: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_2_re</span>
848
+ 169: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span>
849
+ 170: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s2m</span>[<span class="ruby-node">$&amp;</span>])
850
+ 171: <span class="ruby-keyword kw">else</span>
851
+ 172: <span class="ruby-keyword kw">self</span>
852
+ 173: <span class="ruby-keyword kw">end</span>
853
+ 174: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/li$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::Valid_LI})li$/</span>
854
+ 175: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/li$/</span>, <span class="ruby-value str">''</span>)
855
+ 176: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ogi$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/logi$/</span>
856
+ 177: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ogi$/</span>, <span class="ruby-value str">'og'</span>)
857
+ 178: <span class="ruby-keyword kw">else</span>
858
+ 179: <span class="ruby-keyword kw">self</span>
859
+ 180: <span class="ruby-keyword kw">end</span>
860
+ 181: <span class="ruby-keyword kw">end</span></pre>
861
+ </div>
862
+
863
+ </div>
864
+
865
+
866
+
867
+
868
+ </div>
869
+
870
+
871
+ <div id="porter-step--method" class="method-detail ">
872
+ <a name="method-i-porter2_step3"></a>
873
+
874
+ <div class="method-heading">
875
+
876
+ <span class="method-name">porter2_step3</span><span
877
+ class="method-args">(gb_english = false)</span>
878
+ <span class="method-click-advice">click to toggle source</span>
879
+
880
+ </div>
881
+
882
+ <div class="method-description">
883
+
884
+ <p>
885
+ Search for the longest among the suffixes listed in the keys of
886
+ Porter2::STEP_3_MAPS. If one is found and that suffix occurs in R1,
887
+ replace it with the value found in STEP_3_MAPS.
888
+ </p>
889
+ <p>
890
+ (Suffix &#8216;ative&#8217; is treated as a special case in the procedure.)
891
+ </p>
892
+ <p>
893
+ (If gb_english is <tt>true</tt>, replace the &#8216;alise&#8217; suffix
894
+ with &#8216;al&#8217;, similarly to how &#8216;alize&#8217; is treated.)
895
+ </p>
896
+
897
+
898
+
899
+ <div class="method-source-code"
900
+ id="porter-step--source">
901
+ <pre>
902
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 192</span>
903
+ 192: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
904
+ 193: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span>
905
+ 194: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ative$/</span>, <span class="ruby-value str">''</span>)
906
+ 195: <span class="ruby-keyword kw">else</span>
907
+ 196: <span class="ruby-identifier">s3m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_3_MAPS</span>.<span class="ruby-identifier">dup</span>
908
+ 197: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
909
+ 198: <span class="ruby-identifier">s3m</span>[<span class="ruby-value str">&quot;alise&quot;</span>] = <span class="ruby-value str">&quot;al&quot;</span>
910
+ 199: <span class="ruby-keyword kw">end</span>
911
+ 200: <span class="ruby-identifier">step_3_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s3m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
912
+ 201: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
913
+ 202: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_3_re</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span>
914
+ 203: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s3m</span>[<span class="ruby-node">$&amp;</span>])
915
+ 204: <span class="ruby-keyword kw">else</span>
916
+ 205: <span class="ruby-keyword kw">self</span>
917
+ 206: <span class="ruby-keyword kw">end</span>
918
+ 207: <span class="ruby-keyword kw">end</span>
919
+ 208: <span class="ruby-keyword kw">end</span></pre>
920
+ </div>
921
+
922
+ </div>
923
+
924
+
925
+
926
+
927
+ </div>
928
+
929
+
930
+ <div id="porter-step--method" class="method-detail ">
931
+ <a name="method-i-porter2_step4"></a>
932
+
933
+ <div class="method-heading">
934
+
935
+ <span class="method-name">porter2_step4</span><span
936
+ class="method-args">(gb_english = false)</span>
937
+ <span class="method-click-advice">click to toggle source</span>
938
+
939
+ </div>
940
+
941
+ <div class="method-description">
942
+
943
+ <p>
944
+ Search for the longest among the suffixes listed in the keys of
945
+ Porter2::STEP_4_MAPS. If one is found and that suffix occurs in R2,
946
+ replace it with the value found in STEP_4_MAPS.
947
+ </p>
948
+ <p>
949
+ (Suffix &#8216;ion&#8217; is treated as a special case in the procedure.)
950
+ </p>
951
+ <p>
952
+ (If gb_english is <tt>true</tt>, delete the &#8216;ise&#8217; suffix if
953
+ found.)
954
+ </p>
955
+
956
+
957
+
958
+ <div class="method-source-code"
959
+ id="porter-step--source">
960
+ <pre>
961
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 218</span>
962
+ 218: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
963
+ 219: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ion$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)ion$/</span>
964
+ 220: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ion$/</span>, <span class="ruby-value str">''</span>)
965
+ 221: <span class="ruby-keyword kw">else</span>
966
+ 222: <span class="ruby-identifier">s4m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_4_MAPS</span>.<span class="ruby-identifier">dup</span>
967
+ 223: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
968
+ 224: <span class="ruby-identifier">s4m</span>[<span class="ruby-value str">&quot;ise&quot;</span>] = <span class="ruby-value str">&quot;&quot;</span>
969
+ 225: <span class="ruby-keyword kw">end</span>
970
+ 226: <span class="ruby-identifier">step_4_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s4m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
971
+ 227: <span class="ruby-identifier">r2</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span>
972
+ 228: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_4_re</span>
973
+ 229: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r2</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}/</span>
974
+ 230: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s4m</span>[<span class="ruby-node">$&amp;</span>])
975
+ 231: <span class="ruby-keyword kw">else</span>
976
+ 232: <span class="ruby-keyword kw">self</span>
977
+ 233: <span class="ruby-keyword kw">end</span>
978
+ 234: <span class="ruby-keyword kw">else</span>
979
+ 235: <span class="ruby-keyword kw">self</span>
980
+ 236: <span class="ruby-keyword kw">end</span>
981
+ 237: <span class="ruby-keyword kw">end</span>
982
+ 238: <span class="ruby-keyword kw">end</span></pre>
983
+ </div>
984
+
985
+ </div>
986
+
987
+
988
+
989
+
990
+ </div>
991
+
992
+
993
+ <div id="porter-step--method" class="method-detail ">
994
+ <a name="method-i-porter2_step5"></a>
995
+
996
+ <div class="method-heading">
997
+
998
+ <span class="method-name">porter2_step5</span><span
999
+ class="method-args">()</span>
1000
+ <span class="method-click-advice">click to toggle source</span>
1001
+
1002
+ </div>
1003
+
1004
+ <div class="method-description">
1005
+
1006
+ <p>
1007
+ Search for the the following suffixes, and, if found, perform the action
1008
+ indicated.
1009
+ </p>
1010
+ <table>
1011
+ <tr><td valign="top">e</td><td><p>
1012
+ delete if in R2, or in R1 and not preceded by a short syllable
1013
+ </p>
1014
+ </td></tr>
1015
+ <tr><td valign="top">l</td><td><p>
1016
+ delete if in R2 and preceded by l
1017
+ </p>
1018
+ </td></tr>
1019
+ </table>
1020
+
1021
+
1022
+
1023
+ <div class="method-source-code"
1024
+ id="porter-step--source">
1025
+ <pre>
1026
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 244</span>
1027
+ 244: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step5</span>
1028
+ 245: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/l$/</span>
1029
+ 246: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ll$/</span>, <span class="ruby-value str">'l'</span>)
1030
+ 247: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span>
1031
+ 248: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
1032
+ 249: <span class="ruby-keyword kw">else</span>
1033
+ 250: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
1034
+ 251: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">not</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}e$/</span>
1035
+ 252: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
1036
+ 253: <span class="ruby-keyword kw">else</span>
1037
+ 254: <span class="ruby-keyword kw">self</span>
1038
+ 255: <span class="ruby-keyword kw">end</span>
1039
+ 256: <span class="ruby-keyword kw">end</span>
1040
+ 257: <span class="ruby-keyword kw">end</span></pre>
1041
+ </div>
1042
+
1043
+ </div>
1044
+
1045
+
1046
+
1047
+
1048
+ </div>
1049
+
1050
+
1051
+ <div id="porter-tidy-method" class="method-detail ">
1052
+ <a name="method-i-porter2_tidy"></a>
1053
+
1054
+ <div class="method-heading">
1055
+
1056
+ <span class="method-name">porter2_tidy</span><span
1057
+ class="method-args">()</span>
1058
+ <span class="method-click-advice">click to toggle source</span>
1059
+
1060
+ </div>
1061
+
1062
+ <div class="method-description">
1063
+
1064
+ <p>
1065
+ Tidy up the word before we get down to the algorithm
1066
+ </p>
1067
+
1068
+
1069
+
1070
+ <div class="method-source-code"
1071
+ id="porter-tidy-source">
1072
+ <pre>
1073
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 7</span>
1074
+ 7: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_tidy</span>
1075
+ 8: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">downcase</span>
1076
+ 9:
1077
+ 10: <span class="ruby-comment cmt"># map apostrophe-like characters to apostrophes</span>
1078
+ 11: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/‘/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
1079
+ 12: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/’/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
1080
+ 13:
1081
+ 14: <span class="ruby-identifier">preword</span>
1082
+ 15: <span class="ruby-keyword kw">end</span></pre>
1083
+ </div>
1084
+
1085
+ </div>
1086
+
1087
+
1088
+
1089
+
1090
+ </div>
1091
+
1092
+
1093
+ <div id="stem-method" class="method-detail method-alias">
1094
+ <a name="method-i-stem"></a>
1095
+
1096
+ <div class="method-heading">
1097
+
1098
+ <span class="method-name">stem</span><span
1099
+ class="method-args">(gb_english = false)</span>
1100
+ <span class="method-click-advice">click to toggle source</span>
1101
+
1102
+ </div>
1103
+
1104
+ <div class="method-description">
1105
+
1106
+
1107
+
1108
+
1109
+
1110
+ </div>
1111
+
1112
+
1113
+
1114
+
1115
+ <div class="aliases">
1116
+ Alias for: <a href="String.html#method-i-porter2_stem">porter2_stem</a>
1117
+ </div>
1118
+
1119
+ </div>
1120
+
1121
+
1122
+ </div>
1123
+
1124
+
1125
+ </div>
1126
+
1127
+
1128
+ <div id="rdoc-debugging-section-dump" class="debugging-section">
1129
+
1130
+ <p>Disabled; run with --debug to generate this.</p>
1131
+
1132
+ </div>
1133
+
1134
+ <div id="validator-badges">
1135
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
1136
+ <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
1137
+ Rdoc Generator</a> 1.1.6</small>.</p>
1138
+ </div>
1139
+
1140
+ </body>
1141
+ </html>
1142
+