porter2stemmer 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/rdoc/String.html ADDED
@@ -0,0 +1,1142 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN"
3
+ "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
4
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
5
+ <head>
6
+ <meta content="text/html; charset=utf-8" http-equiv="Content-Type" />
7
+
8
+ <title>Class: String</title>
9
+
10
+ <link rel="stylesheet" href="./rdoc.css" type="text/css" media="screen" />
11
+
12
+ <script src="./js/jquery.js" type="text/javascript"
13
+ charset="utf-8"></script>
14
+ <script src="./js/thickbox-compressed.js" type="text/javascript"
15
+ charset="utf-8"></script>
16
+ <script src="./js/quicksearch.js" type="text/javascript"
17
+ charset="utf-8"></script>
18
+ <script src="./js/darkfish.js" type="text/javascript"
19
+ charset="utf-8"></script>
20
+
21
+ </head>
22
+ <body class="class">
23
+
24
+ <div id="metadata">
25
+ <div id="home-metadata">
26
+ <div id="home-section" class="section">
27
+ <h3 class="section-header">
28
+ <a href="./index.html">Home</a>
29
+ <a href="./index.html#classes">Classes</a>
30
+ <a href="./index.html#methods">Methods</a>
31
+ </h3>
32
+ </div>
33
+ </div>
34
+
35
+ <div id="file-metadata">
36
+ <div id="file-list-section" class="section">
37
+ <h3 class="section-header">In Files</h3>
38
+ <div class="section-body">
39
+ <ul>
40
+
41
+ <li><a href="./lib/porter2stemmer/implementation_rb.html?TB_iframe=true&amp;height=550&amp;width=785"
42
+ class="thickbox" title="lib/porter2stemmer/implementation.rb">lib/porter2stemmer/implementation.rb</a></li>
43
+
44
+ </ul>
45
+ </div>
46
+ </div>
47
+
48
+
49
+ </div>
50
+
51
+ <div id="class-metadata">
52
+
53
+ <!-- Parent Class -->
54
+
55
+ <div id="parent-class-section" class="section">
56
+ <h3 class="section-header">Parent</h3>
57
+
58
+ <p class="link">Object</p>
59
+
60
+ </div>
61
+
62
+
63
+ <!-- Namespace Contents -->
64
+
65
+
66
+ <!-- Method Quickref -->
67
+
68
+ <div id="method-list-section" class="section">
69
+ <h3 class="section-header">Methods</h3>
70
+ <ul class="link-list">
71
+
72
+ <li><a href="#method-i-porter2_ends_with_short_syllable%3F">#porter2_ends_with_short_syllable?</a></li>
73
+
74
+ <li><a href="#method-i-porter2_is_short_word%3F">#porter2_is_short_word?</a></li>
75
+
76
+ <li><a href="#method-i-porter2_postprocess">#porter2_postprocess</a></li>
77
+
78
+ <li><a href="#method-i-porter2_preprocess">#porter2_preprocess</a></li>
79
+
80
+ <li><a href="#method-i-porter2_r1">#porter2_r1</a></li>
81
+
82
+ <li><a href="#method-i-porter2_r2">#porter2_r2</a></li>
83
+
84
+ <li><a href="#method-i-porter2_stem">#porter2_stem</a></li>
85
+
86
+ <li><a href="#method-i-porter2_stem_verbose">#porter2_stem_verbose</a></li>
87
+
88
+ <li><a href="#method-i-porter2_step0">#porter2_step0</a></li>
89
+
90
+ <li><a href="#method-i-porter2_step1a">#porter2_step1a</a></li>
91
+
92
+ <li><a href="#method-i-porter2_step1b">#porter2_step1b</a></li>
93
+
94
+ <li><a href="#method-i-porter2_step1c">#porter2_step1c</a></li>
95
+
96
+ <li><a href="#method-i-porter2_step2">#porter2_step2</a></li>
97
+
98
+ <li><a href="#method-i-porter2_step3">#porter2_step3</a></li>
99
+
100
+ <li><a href="#method-i-porter2_step4">#porter2_step4</a></li>
101
+
102
+ <li><a href="#method-i-porter2_step5">#porter2_step5</a></li>
103
+
104
+ <li><a href="#method-i-porter2_tidy">#porter2_tidy</a></li>
105
+
106
+ <li><a href="#method-i-stem">#stem</a></li>
107
+
108
+ </ul>
109
+ </div>
110
+
111
+
112
+ <!-- Included Modules -->
113
+
114
+ </div>
115
+
116
+ <div id="project-metadata">
117
+
118
+
119
+ <div id="fileindex-section" class="section project-section">
120
+ <h3 class="section-header">Files</h3>
121
+ <ul>
122
+
123
+ <li class="file"><a href="./README_rdoc.html">README.rdoc</a></li>
124
+
125
+ </ul>
126
+ </div>
127
+
128
+
129
+ <div id="classindex-section" class="section project-section">
130
+ <h3 class="section-header">Class Index
131
+ <span class="search-toggle"><img src="./images/find.png"
132
+ height="16" width="16" alt="[+]"
133
+ title="show/hide quicksearch" /></span></h3>
134
+ <form action="#" method="get" accept-charset="utf-8" class="initially-hidden">
135
+ <fieldset>
136
+ <legend>Quicksearch</legend>
137
+ <input type="text" name="quicksearch" value=""
138
+ class="quicksearch-field" />
139
+ </fieldset>
140
+ </form>
141
+
142
+ <ul class="link-list">
143
+
144
+ <li><a href="./Porter2.html">Porter2</a></li>
145
+
146
+ <li><a href="./String.html">String</a></li>
147
+
148
+ </ul>
149
+ <div id="no-class-search-results" style="display: none;">No matching classes.</div>
150
+ </div>
151
+
152
+
153
+ </div>
154
+ </div>
155
+
156
+ <div id="documentation">
157
+ <h1 class="class">String</h1>
158
+
159
+ <div id="description">
160
+ <p>
161
+ Implementation of the Porter 2 stemmer. <a
162
+ href="String.html#method-i-porter2_stem">String#porter2_stem</a> is the
163
+ main stemming procedure.
164
+ </p>
165
+
166
+ </div>
167
+
168
+ <!-- Constants -->
169
+
170
+
171
+ <!-- Attributes -->
172
+
173
+
174
+ <!-- Methods -->
175
+
176
+ <div id="public-instance-method-details" class="method-section section">
177
+ <h3 class="section-header">Public Instance Methods</h3>
178
+
179
+
180
+ <div id="porter-ends-with-short-syllable--method" class="method-detail ">
181
+ <a name="method-i-porter2_ends_with_short_syllable%3F"></a>
182
+
183
+ <div class="method-heading">
184
+
185
+ <span class="method-name">porter2_ends_with_short_syllable?</span><span
186
+ class="method-args">()</span>
187
+ <span class="method-click-advice">click to toggle source</span>
188
+
189
+ </div>
190
+
191
+ <div class="method-description">
192
+
193
+ <p>
194
+ Returns true if the word ends with a short syllable
195
+ </p>
196
+
197
+
198
+
199
+ <div class="method-source-code"
200
+ id="porter-ends-with-short-syllable--source">
201
+ <pre>
202
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 59</span>
203
+ 59: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_ends_with_short_syllable?</span>
204
+ 60: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}$/</span> <span class="ruby-operator">?</span> <span class="ruby-keyword kw">true</span> <span class="ruby-operator">:</span> <span class="ruby-keyword kw">false</span>
205
+ 61: <span class="ruby-keyword kw">end</span></pre>
206
+ </div>
207
+
208
+ </div>
209
+
210
+
211
+
212
+
213
+ </div>
214
+
215
+
216
+ <div id="porter-is-short-word--method" class="method-detail ">
217
+ <a name="method-i-porter2_is_short_word%3F"></a>
218
+
219
+ <div class="method-heading">
220
+
221
+ <span class="method-name">porter2_is_short_word?</span><span
222
+ class="method-args">()</span>
223
+ <span class="method-click-advice">click to toggle source</span>
224
+
225
+ </div>
226
+
227
+ <div class="method-description">
228
+
229
+ <p>
230
+ A word is short if it ends in a short syllable, and R1 is null
231
+ </p>
232
+
233
+
234
+
235
+ <div class="method-source-code"
236
+ id="porter-is-short-word--source">
237
+ <pre>
238
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 65</span>
239
+ 65: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_is_short_word?</span>
240
+ 66: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_ends_with_short_syllable?</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>.<span class="ruby-identifier">empty?</span>
241
+ 67: <span class="ruby-keyword kw">end</span></pre>
242
+ </div>
243
+
244
+ </div>
245
+
246
+
247
+
248
+
249
+ </div>
250
+
251
+
252
+ <div id="porter-postprocess-method" class="method-detail ">
253
+ <a name="method-i-porter2_postprocess"></a>
254
+
255
+ <div class="method-heading">
256
+
257
+ <span class="method-name">porter2_postprocess</span><span
258
+ class="method-args">()</span>
259
+ <span class="method-click-advice">click to toggle source</span>
260
+
261
+ </div>
262
+
263
+ <div class="method-description">
264
+
265
+ <p>
266
+ Turn all Y letters into y
267
+ </p>
268
+
269
+
270
+
271
+ <div class="method-source-code"
272
+ id="porter-postprocess-source">
273
+ <pre>
274
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 261</span>
275
+ 261: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_postprocess</span>
276
+ 262: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">gsub</span>(<span class="ruby-regexp re">/Y/</span>, <span class="ruby-value str">'y'</span>)
277
+ 263: <span class="ruby-keyword kw">end</span></pre>
278
+ </div>
279
+
280
+ </div>
281
+
282
+
283
+
284
+
285
+ </div>
286
+
287
+
288
+ <div id="porter-preprocess-method" class="method-detail ">
289
+ <a name="method-i-porter2_preprocess"></a>
290
+
291
+ <div class="method-heading">
292
+
293
+ <span class="method-name">porter2_preprocess</span><span
294
+ class="method-args">()</span>
295
+ <span class="method-click-advice">click to toggle source</span>
296
+
297
+ </div>
298
+
299
+ <div class="method-description">
300
+
301
+ <p>
302
+ Preprocess the word. Remove any initial &#8217;, if present. Then, set
303
+ initial y, or y after a vowel, to Y
304
+ </p>
305
+ <p>
306
+ (The comment to &#8216;establish the regions R1 and R2&#8217; in the
307
+ original description is an implementation optimisation that identifies
308
+ where the regions start. As no modifications are made to the word that
309
+ affect those positions, you may want to cache them now. This implementation
310
+ doesn&#8217;t do that.)
311
+ </p>
312
+
313
+
314
+
315
+ <div class="method-source-code"
316
+ id="porter-preprocess-source">
317
+ <pre>
318
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 25</span>
319
+ 25: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_preprocess</span>
320
+ 26: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
321
+ 27:
322
+ 28: <span class="ruby-comment cmt"># remove any initial apostrophe</span>
323
+ 29: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^'*(.)/</span>, <span class="ruby-value str">'\1'</span>)
324
+ 30:
325
+ 31: <span class="ruby-comment cmt"># set initial y, or y after a vowel, to Y</span>
326
+ 32: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/^y/</span>, <span class="ruby-value str">&quot;Y&quot;</span>)
327
+ 33: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-node">/(#{Porter2::V})y/</span>, <span class="ruby-value str">'\1Y'</span>)
328
+ 34:
329
+ 35: <span class="ruby-identifier">w</span>
330
+ 36: <span class="ruby-keyword kw">end</span></pre>
331
+ </div>
332
+
333
+ </div>
334
+
335
+
336
+
337
+
338
+ </div>
339
+
340
+
341
+ <div id="porter-r--method" class="method-detail ">
342
+ <a name="method-i-porter2_r1"></a>
343
+
344
+ <div class="method-heading">
345
+
346
+ <span class="method-name">porter2_r1</span><span
347
+ class="method-args">()</span>
348
+ <span class="method-click-advice">click to toggle source</span>
349
+
350
+ </div>
351
+
352
+ <div class="method-description">
353
+
354
+ <p>
355
+ R1 is the portion of the word after the first non-vowel after the first
356
+ vowel (with words beginning &#8216;gener-&#8217;, &#8216;commun-&#8217;,
357
+ and &#8216;arsen-&#8217; treated as special cases
358
+ </p>
359
+
360
+
361
+
362
+ <div class="method-source-code"
363
+ id="porter-r--source">
364
+ <pre>
365
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 41</span>
366
+ 41: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r1</span>
367
+ 42: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/^(gener|commun|arsen)(?&lt;r1&gt;.*)/</span>
368
+ 43: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>)
369
+ 44: <span class="ruby-keyword kw">else</span>
370
+ 45: <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r1&gt;.*)$/</span>
371
+ 46: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r1</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
372
+ 47: <span class="ruby-keyword kw">end</span>
373
+ 48: <span class="ruby-keyword kw">end</span></pre>
374
+ </div>
375
+
376
+ </div>
377
+
378
+
379
+
380
+
381
+ </div>
382
+
383
+
384
+ <div id="porter-r--method" class="method-detail ">
385
+ <a name="method-i-porter2_r2"></a>
386
+
387
+ <div class="method-heading">
388
+
389
+ <span class="method-name">porter2_r2</span><span
390
+ class="method-args">()</span>
391
+ <span class="method-click-advice">click to toggle source</span>
392
+
393
+ </div>
394
+
395
+ <div class="method-description">
396
+
397
+ <p>
398
+ R2 is the portion of R1 (<a
399
+ href="String.html#method-i-porter2_r1">porter2_r1</a>) after the first
400
+ non-vowel after the first vowel
401
+ </p>
402
+
403
+
404
+
405
+ <div class="method-source-code"
406
+ id="porter-r--source">
407
+ <pre>
408
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 52</span>
409
+ 52: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_r2</span>
410
+ 53: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}#{Porter2::C}(?&lt;r2&gt;.*)$/</span>
411
+ 54: <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">last_match</span>(<span class="ruby-value">:r2</span>) <span class="ruby-operator">||</span> <span class="ruby-value str">&quot;&quot;</span>
412
+ 55: <span class="ruby-keyword kw">end</span></pre>
413
+ </div>
414
+
415
+ </div>
416
+
417
+
418
+
419
+
420
+ </div>
421
+
422
+
423
+ <div id="porter-stem-method" class="method-detail ">
424
+ <a name="method-i-porter2_stem"></a>
425
+
426
+ <div class="method-heading">
427
+
428
+ <span class="method-name">porter2_stem</span><span
429
+ class="method-args">(gb_english = false)</span>
430
+ <span class="method-click-advice">click to toggle source</span>
431
+
432
+ </div>
433
+
434
+ <div class="method-description">
435
+
436
+ <p>
437
+ Perform the stemming procedure. If <tt>gb_english</tt> is true, treat
438
+ &#8217;-ise&#8217; and similar suffixes as &#8217;-ize&#8217; in American
439
+ English.
440
+ </p>
441
+
442
+
443
+
444
+ <div class="method-source-code"
445
+ id="porter-stem-source">
446
+ <pre>
447
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 269</span>
448
+ 269: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
449
+ 270: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
450
+ 271: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
451
+ 272:
452
+ 273: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
453
+ 274:
454
+ 275: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
455
+ 276: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
456
+ 277: <span class="ruby-keyword kw">else</span>
457
+ 278: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span>.<span class="ruby-identifier">porter2_step1a</span>
458
+ 279: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
459
+ 280: <span class="ruby-identifier">w1a</span>
460
+ 281: <span class="ruby-keyword kw">else</span>
461
+ 282: <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>).<span class="ruby-identifier">porter2_step5</span>.<span class="ruby-identifier">porter2_postprocess</span>
462
+ 283: <span class="ruby-keyword kw">end</span>
463
+ 284: <span class="ruby-keyword kw">end</span>
464
+ 285: <span class="ruby-keyword kw">end</span></pre>
465
+ </div>
466
+
467
+ </div>
468
+
469
+
470
+ <div class="aliases">
471
+ Also aliased as: <a href="String.html#method-i-stem">stem</a>
472
+ </div>
473
+
474
+
475
+
476
+ </div>
477
+
478
+
479
+ <div id="porter-stem-verbose-method" class="method-detail ">
480
+ <a name="method-i-porter2_stem_verbose"></a>
481
+
482
+ <div class="method-heading">
483
+
484
+ <span class="method-name">porter2_stem_verbose</span><span
485
+ class="method-args">(gb_english = false)</span>
486
+ <span class="method-click-advice">click to toggle source</span>
487
+
488
+ </div>
489
+
490
+ <div class="method-description">
491
+
492
+ <p>
493
+ A verbose version of <a
494
+ href="String.html#method-i-porter2_stem">porter2_stem</a> that prints the
495
+ output of each stage to STDOUT
496
+ </p>
497
+
498
+
499
+
500
+ <div class="method-source-code"
501
+ id="porter-stem-verbose-source">
502
+ <pre>
503
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 288</span>
504
+ 288: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_stem_verbose</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
505
+ 289: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_tidy</span>
506
+ 290: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preword: #{preword}&quot;</span>
507
+ 291: <span class="ruby-keyword kw">return</span> <span class="ruby-identifier">preword</span> <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">length</span> <span class="ruby-operator">&lt;=</span> <span class="ruby-value">2</span>
508
+ 292:
509
+ 293: <span class="ruby-identifier">word</span> = <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">porter2_preprocess</span>
510
+ 294: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Preprocessed: #{word}&quot;</span>
511
+ 295:
512
+ 296: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>.<span class="ruby-identifier">has_key?</span> <span class="ruby-identifier">word</span>
513
+ 297: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{word} as special case #{Porter2::SPECIAL_CASES[word]}&quot;</span>
514
+ 298: <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">SPECIAL_CASES</span>[<span class="ruby-identifier">word</span>]
515
+ 299: <span class="ruby-keyword kw">else</span>
516
+ 300: <span class="ruby-identifier">r1</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r1</span>
517
+ 301: <span class="ruby-identifier">r2</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_r2</span>
518
+ 302: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;R1 = #{r1}, R2 = #{r2}&quot;</span>
519
+ 303:
520
+ 304: <span class="ruby-identifier">w0</span> = <span class="ruby-identifier">word</span>.<span class="ruby-identifier">porter2_step0</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 0: #{w0} (R1 = #{w0.porter2_r1}, R2 = #{w0.porter2_r2})&quot;</span>
521
+ 305: <span class="ruby-identifier">w1a</span> = <span class="ruby-identifier">w0</span>.<span class="ruby-identifier">porter2_step1a</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1a: #{w1a} (R1 = #{w1a.porter2_r1}, R2 = #{w1a.porter2_r2})&quot;</span>
522
+ 306:
523
+ 307: <span class="ruby-keyword kw">if</span> <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_1A_SPECIAL_CASES</span>.<span class="ruby-identifier">include?</span> <span class="ruby-identifier">w1a</span>
524
+ 308: <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;Returning #{w1a} as 1a special case&quot;</span>
525
+ 309: <span class="ruby-identifier">w1a</span>
526
+ 310: <span class="ruby-keyword kw">else</span>
527
+ 311: <span class="ruby-identifier">w1b</span> = <span class="ruby-identifier">w1a</span>.<span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1b: #{w1b} (R1 = #{w1b.porter2_r1}, R2 = #{w1b.porter2_r2})&quot;</span>
528
+ 312: <span class="ruby-identifier">w1c</span> = <span class="ruby-identifier">w1b</span>.<span class="ruby-identifier">porter2_step1c</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 1c: #{w1c} (R1 = #{w1c.porter2_r1}, R2 = #{w1c.porter2_r2})&quot;</span>
529
+ 313: <span class="ruby-identifier">w2</span> = <span class="ruby-identifier">w1c</span>.<span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 2: #{w2} (R1 = #{w2.porter2_r1}, R2 = #{w2.porter2_r2})&quot;</span>
530
+ 314: <span class="ruby-identifier">w3</span> = <span class="ruby-identifier">w2</span>.<span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 3: #{w3} (R1 = #{w3.porter2_r1}, R2 = #{w3.porter2_r2})&quot;</span>
531
+ 315: <span class="ruby-identifier">w4</span> = <span class="ruby-identifier">w3</span>.<span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span>) ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 4: #{w4} (R1 = #{w4.porter2_r1}, R2 = #{w4.porter2_r2})&quot;</span>
532
+ 316: <span class="ruby-identifier">w5</span> = <span class="ruby-identifier">w4</span>.<span class="ruby-identifier">porter2_step5</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After step 5: #{w5}&quot;</span>
533
+ 317: <span class="ruby-identifier">wpost</span> = <span class="ruby-identifier">w5</span>.<span class="ruby-identifier">porter2_postprocess</span> ; <span class="ruby-identifier">puts</span> <span class="ruby-node">&quot;After postprocess: #{wpost}&quot;</span>
534
+ 318: <span class="ruby-identifier">wpost</span>
535
+ 319: <span class="ruby-keyword kw">end</span>
536
+ 320: <span class="ruby-keyword kw">end</span>
537
+ 321: <span class="ruby-keyword kw">end</span></pre>
538
+ </div>
539
+
540
+ </div>
541
+
542
+
543
+
544
+
545
+ </div>
546
+
547
+
548
+ <div id="porter-step--method" class="method-detail ">
549
+ <a name="method-i-porter2_step0"></a>
550
+
551
+ <div class="method-heading">
552
+
553
+ <span class="method-name">porter2_step0</span><span
554
+ class="method-args">()</span>
555
+ <span class="method-click-advice">click to toggle source</span>
556
+
557
+ </div>
558
+
559
+ <div class="method-description">
560
+
561
+ <p>
562
+ Search for the longest among the suffixes,
563
+ </p>
564
+ <ul>
565
+ <li><p>
566
+ &#8216;
567
+ </p>
568
+ </li>
569
+ <li><p>
570
+ &#8217;s
571
+ </p>
572
+ </li>
573
+ <li><p>
574
+ &#8217;s&#8217;
575
+ </p>
576
+ </li>
577
+ </ul>
578
+ <p>
579
+ and remove if found.
580
+ </p>
581
+
582
+
583
+
584
+ <div class="method-source-code"
585
+ id="porter-step--source">
586
+ <pre>
587
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 75</span>
588
+ 75: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step0</span>
589
+ 76: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(.)('s'|'s|')$/</span>, <span class="ruby-value str">'\1'</span>) <span class="ruby-operator">||</span> <span class="ruby-keyword kw">self</span>
590
+ 77: <span class="ruby-keyword kw">end</span></pre>
591
+ </div>
592
+
593
+ </div>
594
+
595
+
596
+
597
+
598
+ </div>
599
+
600
+
601
+ <div id="porter-step-a-method" class="method-detail ">
602
+ <a name="method-i-porter2_step1a"></a>
603
+
604
+ <div class="method-heading">
605
+
606
+ <span class="method-name">porter2_step1a</span><span
607
+ class="method-args">()</span>
608
+ <span class="method-click-advice">click to toggle source</span>
609
+
610
+ </div>
611
+
612
+ <div class="method-description">
613
+
614
+ <p>
615
+ Search for the longest among the following suffixes, and perform the action
616
+ indicated.
617
+ </p>
618
+ <table>
619
+ <tr><td valign="top">sses</td><td><p>
620
+ replace by ss
621
+ </p>
622
+ </td></tr>
623
+ <tr><td valign="top">ied, ies</td><td><p>
624
+ replace by i if preceded by more than one letter, otherwise by ie
625
+ </p>
626
+ </td></tr>
627
+ <tr><td valign="top">s</td><td><p>
628
+ delete if the preceding word part contains a vowel not immediately before
629
+ the s
630
+ </p>
631
+ </td></tr>
632
+ <tr><td valign="top">us, ss</td><td><p>
633
+ do nothing
634
+ </p>
635
+ </td></tr>
636
+ </table>
637
+
638
+
639
+
640
+ <div class="method-source-code"
641
+ id="porter-step-a-source">
642
+ <pre>
643
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 85</span>
644
+ 85: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1a</span>
645
+ 86: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/sses$/</span>
646
+ 87: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/sses$/</span>, <span class="ruby-value str">'ss'</span>)
647
+ 88: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/..(ied|ies)$/</span>
648
+ 89: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'i'</span>)
649
+ 90: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(ied|ies)$/</span>
650
+ 91: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(ied|ies)$/</span>, <span class="ruby-value str">'ie'</span>)
651
+ 92: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(us|ss)$/</span>
652
+ 93: <span class="ruby-keyword kw">self</span>
653
+ 94: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/s$/</span>
654
+ 95: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::V}.+)s$/</span>
655
+ 96: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/s$/</span>, <span class="ruby-value str">''</span>)
656
+ 97: <span class="ruby-keyword kw">else</span>
657
+ 98: <span class="ruby-keyword kw">self</span>
658
+ 99: <span class="ruby-keyword kw">end</span>
659
+ 100: <span class="ruby-keyword kw">else</span>
660
+ 101: <span class="ruby-keyword kw">self</span>
661
+ 102: <span class="ruby-keyword kw">end</span>
662
+ 103: <span class="ruby-keyword kw">end</span></pre>
663
+ </div>
664
+
665
+ </div>
666
+
667
+
668
+
669
+
670
+ </div>
671
+
672
+
673
+ <div id="porter-step-b-method" class="method-detail ">
674
+ <a name="method-i-porter2_step1b"></a>
675
+
676
+ <div class="method-heading">
677
+
678
+ <span class="method-name">porter2_step1b</span><span
679
+ class="method-args">(gb_english = false)</span>
680
+ <span class="method-click-advice">click to toggle source</span>
681
+
682
+ </div>
683
+
684
+ <div class="method-description">
685
+
686
+ <p>
687
+ Search for the longest among the following suffixes, and perform the action
688
+ indicated.
689
+ </p>
690
+ <table>
691
+ <tr><td valign="top">eed, eedly</td><td><p>
692
+ replace by ee if the suffix is also in R1
693
+ </p>
694
+ </td></tr>
695
+ <tr><td valign="top">ed, edly, ing, ingly</td><td><p>
696
+ delete if the preceding word part contains a vowel and, after the
697
+ deletion:
698
+ </p>
699
+ <ul>
700
+ <li><p>
701
+ if the word ends at, bl or iz: add e, or
702
+ </p>
703
+ </li>
704
+ </ul>
705
+ <ul>
706
+ <li><p>
707
+ if the word ends with a double: remove the last letter, or
708
+ </p>
709
+ </li>
710
+ </ul>
711
+ <ul>
712
+ <li><p>
713
+ if the word is short: add e
714
+ </p>
715
+ </li>
716
+ </ul>
717
+ </td></tr>
718
+ </table>
719
+ <p>
720
+ (If gb_english is <tt>true</tt>, treat the &#8216;is&#8217; suffix as
721
+ &#8216;iz&#8217; above.)
722
+ </p>
723
+
724
+
725
+
726
+ <div class="method-source-code"
727
+ id="porter-step-b-source">
728
+ <pre>
729
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 115</span>
730
+ 115: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1b</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
731
+ 116: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
732
+ 117: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(eed|eedly)$/</span>
733
+ 118: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(eed|eedly)$/</span>, <span class="ruby-value str">'ee'</span>)
734
+ 119: <span class="ruby-keyword kw">else</span>
735
+ 120: <span class="ruby-keyword kw">self</span>
736
+ 121: <span class="ruby-keyword kw">end</span>
737
+ 122: <span class="ruby-keyword kw">else</span>
738
+ 123: <span class="ruby-identifier">w</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">dup</span>
739
+ 124: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::V}.*(ed|edly|ing|ingly)$/</span>
740
+ 125: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">sub!</span>(<span class="ruby-regexp re">/(ed|edly|ing|ingly)$/</span>, <span class="ruby-value str">''</span>)
741
+ 126: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(at|lb|iz)$/</span>
742
+ 127: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
743
+ 128: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/is$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">gb_english</span>
744
+ 129: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
745
+ 130: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::Double}$/</span>
746
+ 131: <span class="ruby-identifier">w</span>.<span class="ruby-identifier">chop!</span>
747
+ 132: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">w</span>.<span class="ruby-identifier">porter2_is_short_word?</span>
748
+ 133: <span class="ruby-identifier">w</span> <span class="ruby-operator">+=</span> <span class="ruby-value str">'e'</span>
749
+ 134: <span class="ruby-keyword kw">end</span>
750
+ 135: <span class="ruby-keyword kw">end</span>
751
+ 136: <span class="ruby-identifier">w</span>
752
+ 137: <span class="ruby-keyword kw">end</span>
753
+ 138: <span class="ruby-keyword kw">end</span></pre>
754
+ </div>
755
+
756
+ </div>
757
+
758
+
759
+
760
+
761
+ </div>
762
+
763
+
764
+ <div id="porter-step-c-method" class="method-detail ">
765
+ <a name="method-i-porter2_step1c"></a>
766
+
767
+ <div class="method-heading">
768
+
769
+ <span class="method-name">porter2_step1c</span><span
770
+ class="method-args">()</span>
771
+ <span class="method-click-advice">click to toggle source</span>
772
+
773
+ </div>
774
+
775
+ <div class="method-description">
776
+
777
+ <p>
778
+ Replace a suffix of y or Y by i if it is preceded by a non-vowel which is
779
+ not the first letter of the word.
780
+ </p>
781
+
782
+
783
+
784
+ <div class="method-source-code"
785
+ id="porter-step-c-source">
786
+ <pre>
787
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 143</span>
788
+ 143: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step1c</span>
789
+ 144: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/.+#{Porter2::C}(y|Y)$/</span>
790
+ 145: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/(y|Y)$/</span>, <span class="ruby-value str">'i'</span>)
791
+ 146: <span class="ruby-keyword kw">else</span>
792
+ 147: <span class="ruby-keyword kw">self</span>
793
+ 148: <span class="ruby-keyword kw">end</span>
794
+ 149: <span class="ruby-keyword kw">end</span></pre>
795
+ </div>
796
+
797
+ </div>
798
+
799
+
800
+
801
+
802
+ </div>
803
+
804
+
805
+ <div id="porter-step--method" class="method-detail ">
806
+ <a name="method-i-porter2_step2"></a>
807
+
808
+ <div class="method-heading">
809
+
810
+ <span class="method-name">porter2_step2</span><span
811
+ class="method-args">(gb_english = false)</span>
812
+ <span class="method-click-advice">click to toggle source</span>
813
+
814
+ </div>
815
+
816
+ <div class="method-description">
817
+
818
+ <p>
819
+ Search for the longest among the suffixes listed in the keys of
820
+ Porter2::STEP_2_MAPS. If one is found and that suffix occurs in R1,
821
+ replace it with the value found in STEP_2_MAPS.
822
+ </p>
823
+ <p>
824
+ (Suffixes &#8216;ogi&#8217; and &#8216;li&#8217; are treated as special
825
+ cases in the procedure.)
826
+ </p>
827
+ <p>
828
+ (If gb_english is <tt>true</tt>, replace the &#8216;iser&#8217; and
829
+ &#8216;isation&#8217; suffixes with &#8216;ise&#8217;, similarly to how
830
+ &#8216;izer&#8217; and &#8216;ization&#8217; are treated.)
831
+ </p>
832
+
833
+
834
+
835
+ <div class="method-source-code"
836
+ id="porter-step--source">
837
+ <pre>
838
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 160</span>
839
+ 160: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step2</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
840
+ 161: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
841
+ 162: <span class="ruby-identifier">s2m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_2_MAPS</span>.<span class="ruby-identifier">dup</span>
842
+ 163: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
843
+ 164: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;iser&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
844
+ 165: <span class="ruby-identifier">s2m</span>[<span class="ruby-value str">&quot;isation&quot;</span>] = <span class="ruby-value str">&quot;ise&quot;</span>
845
+ 166: <span class="ruby-keyword kw">end</span>
846
+ 167: <span class="ruby-identifier">step_2_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s2m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
847
+ 168: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_2_re</span>
848
+ 169: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span>
849
+ 170: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s2m</span>[<span class="ruby-node">$&amp;</span>])
850
+ 171: <span class="ruby-keyword kw">else</span>
851
+ 172: <span class="ruby-keyword kw">self</span>
852
+ 173: <span class="ruby-keyword kw">end</span>
853
+ 174: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/li$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/(#{Porter2::Valid_LI})li$/</span>
854
+ 175: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/li$/</span>, <span class="ruby-value str">''</span>)
855
+ 176: <span class="ruby-keyword kw">elsif</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ogi$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/logi$/</span>
856
+ 177: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ogi$/</span>, <span class="ruby-value str">'og'</span>)
857
+ 178: <span class="ruby-keyword kw">else</span>
858
+ 179: <span class="ruby-keyword kw">self</span>
859
+ 180: <span class="ruby-keyword kw">end</span>
860
+ 181: <span class="ruby-keyword kw">end</span></pre>
861
+ </div>
862
+
863
+ </div>
864
+
865
+
866
+
867
+
868
+ </div>
869
+
870
+
871
+ <div id="porter-step--method" class="method-detail ">
872
+ <a name="method-i-porter2_step3"></a>
873
+
874
+ <div class="method-heading">
875
+
876
+ <span class="method-name">porter2_step3</span><span
877
+ class="method-args">(gb_english = false)</span>
878
+ <span class="method-click-advice">click to toggle source</span>
879
+
880
+ </div>
881
+
882
+ <div class="method-description">
883
+
884
+ <p>
885
+ Search for the longest among the suffixes listed in the keys of
886
+ Porter2::STEP_3_MAPS. If one is found and that suffix occurs in R1,
887
+ replace it with the value found in STEP_3_MAPS.
888
+ </p>
889
+ <p>
890
+ (Suffix &#8216;ative&#8217; is treated as a special case in the procedure.)
891
+ </p>
892
+ <p>
893
+ (If gb_english is <tt>true</tt>, replace the &#8216;alise&#8217; suffix
894
+ with &#8216;al&#8217;, similarly to how &#8216;alize&#8217; is treated.)
895
+ </p>
896
+
897
+
898
+
899
+ <div class="method-source-code"
900
+ id="porter-step--source">
901
+ <pre>
902
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 192</span>
903
+ 192: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step3</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
904
+ 193: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ative$/</span>
905
+ 194: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ative$/</span>, <span class="ruby-value str">''</span>)
906
+ 195: <span class="ruby-keyword kw">else</span>
907
+ 196: <span class="ruby-identifier">s3m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_3_MAPS</span>.<span class="ruby-identifier">dup</span>
908
+ 197: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
909
+ 198: <span class="ruby-identifier">s3m</span>[<span class="ruby-value str">&quot;alise&quot;</span>] = <span class="ruby-value str">&quot;al&quot;</span>
910
+ 199: <span class="ruby-keyword kw">end</span>
911
+ 200: <span class="ruby-identifier">step_3_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s3m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
912
+ 201: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
913
+ 202: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_3_re</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}$/</span>
914
+ 203: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s3m</span>[<span class="ruby-node">$&amp;</span>])
915
+ 204: <span class="ruby-keyword kw">else</span>
916
+ 205: <span class="ruby-keyword kw">self</span>
917
+ 206: <span class="ruby-keyword kw">end</span>
918
+ 207: <span class="ruby-keyword kw">end</span>
919
+ 208: <span class="ruby-keyword kw">end</span></pre>
920
+ </div>
921
+
922
+ </div>
923
+
924
+
925
+
926
+
927
+ </div>
928
+
929
+
930
+ <div id="porter-step--method" class="method-detail ">
931
+ <a name="method-i-porter2_step4"></a>
932
+
933
+ <div class="method-heading">
934
+
935
+ <span class="method-name">porter2_step4</span><span
936
+ class="method-args">(gb_english = false)</span>
937
+ <span class="method-click-advice">click to toggle source</span>
938
+
939
+ </div>
940
+
941
+ <div class="method-description">
942
+
943
+ <p>
944
+ Search for the longest among the suffixes listed in the keys of
945
+ Porter2::STEP_4_MAPS. If one is found and that suffix occurs in R2,
946
+ replace it with the value found in STEP_4_MAPS.
947
+ </p>
948
+ <p>
949
+ (Suffix &#8216;ion&#8217; is treated as a special case in the procedure.)
950
+ </p>
951
+ <p>
952
+ (If gb_english is <tt>true</tt>, delete the &#8216;ise&#8217; suffix if
953
+ found.)
954
+ </p>
955
+
956
+
957
+
958
+ <div class="method-source-code"
959
+ id="porter-step--source">
960
+ <pre>
961
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 218</span>
962
+ 218: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step4</span>(<span class="ruby-identifier">gb_english</span> = <span class="ruby-keyword kw">false</span>)
963
+ 219: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ion$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/(s|t)ion$/</span>
964
+ 220: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ion$/</span>, <span class="ruby-value str">''</span>)
965
+ 221: <span class="ruby-keyword kw">else</span>
966
+ 222: <span class="ruby-identifier">s4m</span> = <span class="ruby-constant">Porter2</span><span class="ruby-operator">::</span><span class="ruby-constant">STEP_4_MAPS</span>.<span class="ruby-identifier">dup</span>
967
+ 223: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">gb_english</span>
968
+ 224: <span class="ruby-identifier">s4m</span>[<span class="ruby-value str">&quot;ise&quot;</span>] = <span class="ruby-value str">&quot;&quot;</span>
969
+ 225: <span class="ruby-keyword kw">end</span>
970
+ 226: <span class="ruby-identifier">step_4_re</span> = <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">union</span>(<span class="ruby-identifier">s4m</span>.<span class="ruby-identifier">keys</span>.<span class="ruby-identifier">map</span> {<span class="ruby-operator">|</span><span class="ruby-identifier">r</span><span class="ruby-operator">|</span> <span class="ruby-constant">Regexp</span>.<span class="ruby-identifier">new</span>(<span class="ruby-identifier">r</span> <span class="ruby-operator">+</span> <span class="ruby-value str">&quot;$&quot;</span>)})
971
+ 227: <span class="ruby-identifier">r2</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span>
972
+ 228: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-identifier">step_4_re</span>
973
+ 229: <span class="ruby-keyword kw">if</span> <span class="ruby-identifier">r2</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{$&amp;}/</span>
974
+ 230: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-node">/#{$&amp;}$/</span>, <span class="ruby-identifier">s4m</span>[<span class="ruby-node">$&amp;</span>])
975
+ 231: <span class="ruby-keyword kw">else</span>
976
+ 232: <span class="ruby-keyword kw">self</span>
977
+ 233: <span class="ruby-keyword kw">end</span>
978
+ 234: <span class="ruby-keyword kw">else</span>
979
+ 235: <span class="ruby-keyword kw">self</span>
980
+ 236: <span class="ruby-keyword kw">end</span>
981
+ 237: <span class="ruby-keyword kw">end</span>
982
+ 238: <span class="ruby-keyword kw">end</span></pre>
983
+ </div>
984
+
985
+ </div>
986
+
987
+
988
+
989
+
990
+ </div>
991
+
992
+
993
+ <div id="porter-step--method" class="method-detail ">
994
+ <a name="method-i-porter2_step5"></a>
995
+
996
+ <div class="method-heading">
997
+
998
+ <span class="method-name">porter2_step5</span><span
999
+ class="method-args">()</span>
1000
+ <span class="method-click-advice">click to toggle source</span>
1001
+
1002
+ </div>
1003
+
1004
+ <div class="method-description">
1005
+
1006
+ <p>
1007
+ Search for the the following suffixes, and, if found, perform the action
1008
+ indicated.
1009
+ </p>
1010
+ <table>
1011
+ <tr><td valign="top">e</td><td><p>
1012
+ delete if in R2, or in R1 and not preceded by a short syllable
1013
+ </p>
1014
+ </td></tr>
1015
+ <tr><td valign="top">l</td><td><p>
1016
+ delete if in R2 and preceded by l
1017
+ </p>
1018
+ </td></tr>
1019
+ </table>
1020
+
1021
+
1022
+
1023
+ <div class="method-source-code"
1024
+ id="porter-step--source">
1025
+ <pre>
1026
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 244</span>
1027
+ 244: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_step5</span>
1028
+ 245: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/ll$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/l$/</span>
1029
+ 246: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/ll$/</span>, <span class="ruby-value str">'l'</span>)
1030
+ 247: <span class="ruby-keyword kw">elsif</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r2</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span>
1031
+ 248: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
1032
+ 249: <span class="ruby-keyword kw">else</span>
1033
+ 250: <span class="ruby-identifier">r1</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">porter2_r1</span>
1034
+ 251: <span class="ruby-keyword kw">if</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-identifier">r1</span> <span class="ruby-operator">=~</span> <span class="ruby-regexp re">/e$/</span> <span class="ruby-keyword kw">and</span> <span class="ruby-keyword kw">not</span> <span class="ruby-keyword kw">self</span> <span class="ruby-operator">=~</span> <span class="ruby-node">/#{Porter2::SHORT_SYLLABLE}e$/</span>
1035
+ 252: <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">sub</span>(<span class="ruby-regexp re">/e$/</span>, <span class="ruby-value str">''</span>)
1036
+ 253: <span class="ruby-keyword kw">else</span>
1037
+ 254: <span class="ruby-keyword kw">self</span>
1038
+ 255: <span class="ruby-keyword kw">end</span>
1039
+ 256: <span class="ruby-keyword kw">end</span>
1040
+ 257: <span class="ruby-keyword kw">end</span></pre>
1041
+ </div>
1042
+
1043
+ </div>
1044
+
1045
+
1046
+
1047
+
1048
+ </div>
1049
+
1050
+
1051
+ <div id="porter-tidy-method" class="method-detail ">
1052
+ <a name="method-i-porter2_tidy"></a>
1053
+
1054
+ <div class="method-heading">
1055
+
1056
+ <span class="method-name">porter2_tidy</span><span
1057
+ class="method-args">()</span>
1058
+ <span class="method-click-advice">click to toggle source</span>
1059
+
1060
+ </div>
1061
+
1062
+ <div class="method-description">
1063
+
1064
+ <p>
1065
+ Tidy up the word before we get down to the algorithm
1066
+ </p>
1067
+
1068
+
1069
+
1070
+ <div class="method-source-code"
1071
+ id="porter-tidy-source">
1072
+ <pre>
1073
+ <span class="ruby-comment cmt"># File lib/porter2stemmer/implementation.rb, line 7</span>
1074
+ 7: <span class="ruby-keyword kw">def</span> <span class="ruby-identifier">porter2_tidy</span>
1075
+ 8: <span class="ruby-identifier">preword</span> = <span class="ruby-keyword kw">self</span>.<span class="ruby-identifier">to_s</span>.<span class="ruby-identifier">strip</span>.<span class="ruby-identifier">downcase</span>
1076
+ 9:
1077
+ 10: <span class="ruby-comment cmt"># map apostrophe-like characters to apostrophes</span>
1078
+ 11: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/‘/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
1079
+ 12: <span class="ruby-identifier">preword</span>.<span class="ruby-identifier">gsub!</span>(<span class="ruby-regexp re">/’/</span>, <span class="ruby-value str">&quot;'&quot;</span>)
1080
+ 13:
1081
+ 14: <span class="ruby-identifier">preword</span>
1082
+ 15: <span class="ruby-keyword kw">end</span></pre>
1083
+ </div>
1084
+
1085
+ </div>
1086
+
1087
+
1088
+
1089
+
1090
+ </div>
1091
+
1092
+
1093
+ <div id="stem-method" class="method-detail method-alias">
1094
+ <a name="method-i-stem"></a>
1095
+
1096
+ <div class="method-heading">
1097
+
1098
+ <span class="method-name">stem</span><span
1099
+ class="method-args">(gb_english = false)</span>
1100
+ <span class="method-click-advice">click to toggle source</span>
1101
+
1102
+ </div>
1103
+
1104
+ <div class="method-description">
1105
+
1106
+
1107
+
1108
+
1109
+
1110
+ </div>
1111
+
1112
+
1113
+
1114
+
1115
+ <div class="aliases">
1116
+ Alias for: <a href="String.html#method-i-porter2_stem">porter2_stem</a>
1117
+ </div>
1118
+
1119
+ </div>
1120
+
1121
+
1122
+ </div>
1123
+
1124
+
1125
+ </div>
1126
+
1127
+
1128
+ <div id="rdoc-debugging-section-dump" class="debugging-section">
1129
+
1130
+ <p>Disabled; run with --debug to generate this.</p>
1131
+
1132
+ </div>
1133
+
1134
+ <div id="validator-badges">
1135
+ <p><small><a href="http://validator.w3.org/check/referer">[Validate]</a></small></p>
1136
+ <p><small>Generated with the <a href="http://deveiate.org/projects/Darkfish-Rdoc/">Darkfish
1137
+ Rdoc Generator</a> 1.1.6</small>.</p>
1138
+ </div>
1139
+
1140
+ </body>
1141
+ </html>
1142
+