indonesian_stemmer 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,3 @@
1
+ module IndonesianStemmer
2
+ VERSION = "0.0.1"
3
+ end
@@ -0,0 +1,145 @@
1
+ require 'spec_helper'
2
+
3
+ describe IndonesianStemmer do
4
+ describe "covering the inflectional particles" do
5
+ describe "'kah'" do
6
+ it { should_stem 'bukukah', 'buku' }
7
+ end
8
+
9
+ describe "'lah'" do
10
+ it { should_stem 'adalah', 'ada' }
11
+ end
12
+
13
+ describe "'pun'" do
14
+ it { should_stem 'bagaimanapun', 'bagaimana' }
15
+ end
16
+ end
17
+
18
+ describe "covering the inflectional possessive pronouns" do
19
+ describe "'ku'" do
20
+ it { should_stem 'bukuku', 'buku' }
21
+ end
22
+
23
+ describe "'mu'" do
24
+ it { should_stem 'rumahmu', 'rumah' }
25
+ end
26
+
27
+ describe "'nya'" do
28
+ it { should_stem 'cintanya', 'cinta' }
29
+ end
30
+ end
31
+
32
+ describe "covering the first order of derivational prefixes" do
33
+ describe "'meng'" do
34
+ it { should_stem 'mengukur', 'ukur' }
35
+ end
36
+
37
+ describe "'meny'" do
38
+ it { should_stem 'menyapu', 'sapu' }
39
+ end
40
+
41
+ describe "'men'" do
42
+ it { should_stem 'menduga', 'duga' }
43
+ it { should_stem 'menuduh', 'tuduh' }
44
+ end
45
+
46
+ describe "'mem' followed by 'p'" do
47
+ it { should_stem 'memilih', 'pilih'}
48
+ it { should_stem 'memilah', 'pilah'}
49
+ it { should_stem 'memuji', 'puji'}
50
+ end
51
+
52
+ describe "'mem'" do
53
+ it { should_stem 'membaca', 'baca'}
54
+ it { should_stem 'membantu', 'bantu'}
55
+ end
56
+
57
+ describe "'me'" do
58
+ it { should_stem 'merusak', 'rusak'}
59
+ it { should_stem 'melayang', 'layang'}
60
+ end
61
+
62
+ describe "'peng'" do
63
+ it { should_stem 'pengukur', 'ukur'}
64
+ end
65
+
66
+ describe "'peny'" do
67
+ it { should_stem 'penyalin', 'salin'}
68
+ end
69
+
70
+ describe "'pen'" do
71
+ it { should_stem 'penasehat', 'nasehat'}
72
+ it { should_stem 'penarik', 'tarik'}
73
+ end
74
+
75
+ describe "'pem' followed by 'p'" do
76
+ it { should_stem 'pemilih', 'pilih'}
77
+ it { should_stem 'pemilah', 'pilah'}
78
+ it { should_stem 'pemuji', 'puji'}
79
+ end
80
+
81
+ describe "'pem'" do
82
+ it { should_stem 'pembaca', 'baca'}
83
+ end
84
+
85
+ describe "'di'" do
86
+ it { should_stem 'diukur', 'ukur'}
87
+ it { should_stem 'dilihat', 'lihat'}
88
+ end
89
+
90
+ describe "'ter'" do
91
+ it { should_stem 'terindah', 'indah'}
92
+ it { should_stem 'terhebat', 'hebat'}
93
+ it { should_stem 'terukur', 'ukur'}
94
+ it { should_stem 'tersapu', 'sapu'}
95
+ end
96
+
97
+ describe "'ke'" do
98
+ it { should_stem 'kekasih', 'kasih'}
99
+ end
100
+ end
101
+
102
+ describe "covering the second order of derivational prefixes" do
103
+ describe "'ber'" do
104
+ it { should_stem 'berlari', 'lari'}
105
+ end
106
+
107
+ describe "'bel'" do
108
+ it { should_stem 'belajar', 'ajar'}
109
+ end
110
+
111
+ describe "'be'" do
112
+ it { should_stem 'bekerja', 'kerja'}
113
+ end
114
+
115
+ describe "'per'" do
116
+ it { should_stem 'perjelas', 'jelas'}
117
+ end
118
+
119
+ describe "'pel'" do
120
+ it { should_stem 'pelajar', 'ajar'}
121
+ end
122
+
123
+ describe "'pe'" do
124
+ it { should_stem 'pekerja', 'kerja'}
125
+ it { should_stem 'pelari', 'lari'}
126
+ end
127
+ end
128
+
129
+ describe "covering the derivational suffixes" do
130
+ describe "'kan'" do
131
+ it { should_stem 'tarikkan', 'tarik'}
132
+ it { should_stem 'ambilkan', 'ambil'}
133
+ end
134
+
135
+ describe "'an'" do
136
+ it { should_stem 'makanan', 'makan'}
137
+ it { should_stem 'sarapan', 'sarap'}
138
+ end
139
+
140
+ describe "'i'" do
141
+ it { should_stem 'ajari', 'ajar'}
142
+ it { should_stem 'cermati', 'cermat'}
143
+ end
144
+ end
145
+ end
@@ -0,0 +1,630 @@
1
+ require 'spec_helper'
2
+
3
+ describe IndonesianStemmer::MorphologicalUtility do
4
+ describe '#total_syllables' do
5
+ it "'memasak' should return 3" do
6
+ IndonesianStemmer.total_syllables('memasak').should == 3
7
+ end
8
+
9
+ it "'mewarnai' should return 3" do
10
+ IndonesianStemmer.total_syllables('mewarnai').should == 4
11
+ end
12
+
13
+ it "'permainan' should return 4" do
14
+ IndonesianStemmer.total_syllables('permainan').should == 4
15
+ end
16
+ end
17
+
18
+ describe '#remove_particle' do
19
+ describe 'should remove these particles at the end of the word' do
20
+ it "'kah'" do
21
+ should_transform(:remove_particle, 'manakah', 'mana')
22
+ end
23
+
24
+ it "'lah'" do
25
+ should_transform(:remove_particle, 'kembalilah', 'kembali')
26
+ end
27
+
28
+ it "'pun'" do
29
+ should_transform(:remove_particle, 'bagaimanapun', 'bagaimana')
30
+ end
31
+ end
32
+
33
+ describe 'should not remove these particles at the rest part of the word' do
34
+ it "'kah'" do
35
+ should_not_transform(:remove_particle, 'kahak')
36
+ should_not_transform(:remove_particle, 'pernikahan')
37
+ end
38
+
39
+ it "'lah'" do
40
+ should_not_transform(:remove_particle, 'lahiriah')
41
+ should_not_transform(:remove_particle, 'kelahiran')
42
+ end
43
+
44
+ it "'pun'" do
45
+ should_not_transform(:remove_particle, 'punya')
46
+ should_not_transform(:remove_particle, 'kepunyaan')
47
+ end
48
+ end
49
+
50
+ describe 'should not set any flags regardless the character position' do
51
+ it "'kah'" do
52
+ should_not_set_flags :remove_particle, 'manakah'
53
+ should_not_set_flags :remove_particle, 'kahak'
54
+ should_not_set_flags :remove_particle, 'pernikahan'
55
+ end
56
+
57
+ it "'lah'" do
58
+ should_not_set_flags :remove_particle, 'kembalilah'
59
+ should_not_set_flags :remove_particle, 'lahiriah'
60
+ should_not_set_flags :remove_particle, 'kelahiran'
61
+ end
62
+
63
+ it "'pun'" do
64
+ should_not_set_flags :remove_particle, 'bagaimanapun'
65
+ should_not_set_flags :remove_particle, 'punya'
66
+ should_not_set_flags :remove_particle, 'kepunyaan'
67
+ end
68
+ end
69
+ end
70
+
71
+ describe '#remove_possessive_pronoun' do
72
+ describe 'should remove these possessive pronouns at the end of the word' do
73
+ it "'ku'" do
74
+ should_transform(:remove_possessive_pronoun, 'mainanku', 'mainan')
75
+ end
76
+
77
+ it "'mu'" do
78
+ should_transform(:remove_possessive_pronoun, 'mobilmu', 'mobil')
79
+ end
80
+
81
+ it "'nya'" do
82
+ should_transform(:remove_possessive_pronoun, 'gelasnya', 'gelas')
83
+ end
84
+ end
85
+
86
+ describe 'should not remove these possessive pronouns at the rest part of the word' do
87
+ it "'ku'" do
88
+ should_not_transform(:remove_possessive_pronoun, 'kumakan')
89
+ should_not_transform(:remove_possessive_pronoun, 'kekurangan')
90
+ end
91
+
92
+ it "'mu'" do
93
+ should_not_transform(:remove_possessive_pronoun, 'murahan')
94
+ should_not_transform(:remove_possessive_pronoun, 'kemurkaan')
95
+ end
96
+
97
+ it "'nya'" do
98
+ should_not_transform(:remove_possessive_pronoun, 'nyapu')
99
+ should_not_transform(:remove_possessive_pronoun, 'menyambung')
100
+ end
101
+ end
102
+
103
+ describe 'should not set any flags regardless the characters position' do
104
+ it 'ku' do
105
+ should_not_set_flags :remove_possessive_pronoun, 'mainanku'
106
+ should_not_set_flags :remove_possessive_pronoun, 'kumakan'
107
+ should_not_set_flags :remove_possessive_pronoun, 'kekurangan'
108
+ end
109
+
110
+ it 'mu' do
111
+ should_not_set_flags :remove_possessive_pronoun, 'mobilmu'
112
+ should_not_set_flags :remove_possessive_pronoun, 'murahan'
113
+ should_not_set_flags :remove_possessive_pronoun, 'kemurkaan'
114
+ end
115
+
116
+ it 'nya' do
117
+ should_not_set_flags :remove_possessive_pronoun, 'gelasnya'
118
+ should_not_set_flags :remove_possessive_pronoun, 'nyapu'
119
+ should_not_set_flags :remove_possessive_pronoun, 'menyambung'
120
+ end
121
+ end
122
+ end
123
+
124
+ describe '#remove_first_order_prefix' do
125
+ describe "words with these special characters" do
126
+ describe "at the begining" do
127
+ describe "followed by a vowel, should remove and substitute the last character" do
128
+ it "'meny'" do
129
+ should_transform(:remove_first_order_prefix, 'menyambung', 'sambung')
130
+ end
131
+
132
+ it "'peny'" do
133
+ should_transform(:remove_first_order_prefix, 'penyantap', 'santap')
134
+ end
135
+
136
+ it "'pen'" do
137
+ should_transform(:remove_first_order_prefix, 'penata', 'tata')
138
+ end
139
+ end
140
+
141
+ describe "followed by consonant, should only remove the special characters" do
142
+ it "'meny'" do
143
+ # TODO: Find a real indonesian word for this case
144
+ should_transform(:remove_first_order_prefix, 'menyxxx', 'xxx')
145
+ end
146
+
147
+ it "'peny'" do
148
+ # TODO: Find a real indonesian word for this case
149
+ should_transform(:remove_first_order_prefix, 'penyxxx', 'xxx')
150
+ end
151
+
152
+ it "'pen'" do
153
+ should_transform(:remove_first_order_prefix, 'penjahat', 'jahat')
154
+ end
155
+ end
156
+
157
+ describe "should set the flags correctly regardless vowel or consonant" do
158
+ before do
159
+ unset_flags
160
+ end
161
+
162
+ it "'meny' should set the flags to REMOVED_MENG" do
163
+ constant = 'REMOVED_MENG'
164
+ should_set_flags_to :remove_first_order_prefix, 'menyambung', constant
165
+ unset_flags
166
+ should_set_flags_to :remove_first_order_prefix, 'menyxxx', constant
167
+ end
168
+
169
+ it "'peny' should set the flags to REMOVED_PENG" do
170
+ constant = 'REMOVED_PENG'
171
+ should_set_flags_to :remove_first_order_prefix, 'penyantap', constant
172
+ unset_flags
173
+ should_set_flags_to :remove_first_order_prefix, 'penyxxx', constant
174
+ end
175
+
176
+ it "'pen' should set the flags to REMOVED_PENG" do
177
+ constant = 'REMOVED_PENG'
178
+ should_set_flags_to :remove_first_order_prefix, 'penata', constant
179
+ unset_flags
180
+ should_set_flags_to :remove_first_order_prefix, 'penjahat', constant
181
+ end
182
+ end
183
+ end
184
+
185
+ describe "at the rest part of the word" do
186
+ describe "followed by a vowel, should not do anything" do
187
+ it "'meny'" do
188
+ # TODO: Find a real indonesian word for this case
189
+ should_transform(:remove_first_order_prefix, 'xxxmenyaxx', 'xxxmenyaxx')
190
+ should_transform(:remove_first_order_prefix, 'xxxmenya', 'xxxmenya')
191
+ end
192
+
193
+ it "'peny'" do
194
+ # TODO: Find a real indonesian word for this case
195
+ should_transform(:remove_first_order_prefix, 'xxxpenyaxx', 'xxxpenyaxx')
196
+ should_transform(:remove_first_order_prefix, 'xxxpenya', 'xxxpenya')
197
+ end
198
+
199
+ it "'pen'" do
200
+ # TODO: Find a real indonesian word for this case
201
+ should_transform(:remove_first_order_prefix, 'xxxpenexx', 'xxxpenexx')
202
+ should_transform(:remove_first_order_prefix, 'xxxpeno', 'xxxpeno')
203
+ end
204
+ end
205
+
206
+ describe "followed by consonant, should not do anything" do
207
+ it "'meny'" do
208
+ # TODO: Find a real indonesian word for this case
209
+ should_transform(:remove_first_order_prefix, 'xxxmenykxx', 'xxxmenykxx')
210
+ should_transform(:remove_first_order_prefix, 'xxxmenyk', 'xxxmenyk')
211
+ end
212
+
213
+ it "'peny'" do
214
+ # TODO: Find a real indonesian word for this case
215
+ should_transform(:remove_first_order_prefix, 'xxxpenykxx', 'xxxpenykxx')
216
+ should_transform(:remove_first_order_prefix, 'xxxpenyk', 'xxxpenyk')
217
+ end
218
+
219
+ it "'pen'" do
220
+ # TODO: Find a real indonesian word for this case
221
+ should_transform(:remove_first_order_prefix, 'xxxpenrxx', 'xxxpenrxx')
222
+ should_transform(:remove_first_order_prefix, 'xxxpenr', 'xxxpenr')
223
+ end
224
+ end
225
+
226
+ describe "should not set any flags regardless the characters position" do
227
+ it "'meny'" do
228
+ # TODO: Find a real indonesian word for this case
229
+ %w( xxxmenyaxx xxxmenya xxxmenykxx xxxmenyk ).each do |character|
230
+ should_not_set_flags :remove_first_order_prefix, character
231
+ end
232
+ end
233
+
234
+ it "'peny'" do
235
+ # TODO: Find a real indonesian word for this case
236
+ %w( xxxpenyaxx xxxpenya xxxpenykxx xxxpenyk ).each do |character|
237
+ should_not_set_flags :remove_first_order_prefix, character
238
+ end
239
+ end
240
+
241
+ it "'pen'" do
242
+ # TODO: Find a real indonesian word for this case
243
+ %w( xxxpenexx xxxpeno xxxpenrxx xxxpenr ).each do |character|
244
+ should_not_set_flags :remove_first_order_prefix, character
245
+ end
246
+ end
247
+ end
248
+ end
249
+ end
250
+
251
+ describe "words with first order prefix characters" do
252
+ describe "at the begining," do
253
+ describe "should remove these characters" do
254
+ it "'meng'" do
255
+ should_transform(:remove_first_order_prefix, 'menggambar', 'gambar')
256
+ end
257
+
258
+ it "'men'" do
259
+ should_transform(:remove_first_order_prefix, 'mendaftar', 'daftar')
260
+ end
261
+
262
+ it "'mem'" do
263
+ should_transform(:remove_first_order_prefix, 'membangun', 'bangun')
264
+ end
265
+
266
+ it "'me'" do
267
+ should_transform(:remove_first_order_prefix, 'melukis', 'lukis')
268
+ end
269
+
270
+ it "'peng'" do
271
+ should_transform(:remove_first_order_prefix, 'penggaris', 'garis')
272
+ end
273
+
274
+ it "'pem'" do
275
+ should_transform(:remove_first_order_prefix, 'pembajak', 'bajak')
276
+ end
277
+
278
+ it "'di'" do
279
+ should_transform(:remove_first_order_prefix, 'disayang', 'sayang')
280
+ end
281
+
282
+ it "'ter'" do
283
+ should_transform(:remove_first_order_prefix, 'terucap', 'ucap')
284
+ end
285
+
286
+ it "'ke'" do
287
+ should_transform(:remove_first_order_prefix, 'kemakan', 'makan')
288
+ end
289
+ end
290
+
291
+ describe "should set the flags correctly" do
292
+ before do
293
+ unset_flags
294
+ end
295
+
296
+ describe "to REMOVED_MENG on these characters" do
297
+ before do
298
+ @constant = 'REMOVED_MENG'
299
+ end
300
+
301
+ it "'meng'" do
302
+ should_set_flags_to :remove_first_order_prefix, 'menggambar', @constant
303
+ end
304
+
305
+ it "'men'" do
306
+ should_set_flags_to :remove_first_order_prefix, 'mendaftar', @constant
307
+ end
308
+
309
+ it "'mem'" do
310
+ should_set_flags_to :remove_first_order_prefix, 'membangun', @constant
311
+ end
312
+
313
+ it "'me'" do
314
+ should_set_flags_to :remove_first_order_prefix, 'melukis', @constant
315
+ end
316
+ end
317
+
318
+ describe "to REMOVED_PENG on these characters" do
319
+ before do
320
+ @constant = 'REMOVED_PENG'
321
+ end
322
+
323
+ it "'peng'" do
324
+ should_set_flags_to :remove_first_order_prefix, 'penggaris', @constant
325
+ end
326
+
327
+ it "'pem'" do
328
+ should_set_flags_to :remove_first_order_prefix, 'pembajak', @constant
329
+ end
330
+ end
331
+
332
+ describe "to their respective constants on these characters" do
333
+ it "'di'" do
334
+ should_set_flags_to :remove_first_order_prefix, 'disayang', 'REMOVED_DI'
335
+ end
336
+
337
+ it "'ter'" do
338
+ should_set_flags_to :remove_first_order_prefix, 'terucap', 'REMOVED_TER'
339
+ end
340
+
341
+ it "'ke'" do
342
+ should_set_flags_to :remove_first_order_prefix, 'kemakan', 'REMOVED_KE'
343
+ end
344
+ end
345
+ end
346
+ end
347
+
348
+ describe "at the rest part of the word," do
349
+ describe "should not remove these characters" do
350
+ it "'meng'" do
351
+ should_transform(:remove_first_order_prefix, 'xxxmengxex', 'xxxmengxex')
352
+ should_transform(:remove_first_order_prefix, 'xexmeng', 'xexmeng')
353
+ end
354
+
355
+ it "'men'" do
356
+ should_transform(:remove_first_order_prefix, 'xxxmenxxx', 'xxxmenxxx')
357
+ should_transform(:remove_first_order_prefix, 'xxxmen', 'xxxmen')
358
+ end
359
+
360
+ it "'mem'" do
361
+ should_transform(:remove_first_order_prefix, 'xxxmemxxx', 'xxxmemxxx')
362
+ should_transform(:remove_first_order_prefix, 'xxxmem', 'xxxmem')
363
+ end
364
+
365
+ it "'me'" do
366
+ should_transform(:remove_first_order_prefix, 'xxxmexxx', 'xxxmexxx')
367
+ should_transform(:remove_first_order_prefix, 'xxxme', 'xxxme')
368
+ end
369
+
370
+ it "'peng'" do
371
+ should_transform(:remove_first_order_prefix, 'xxxpengxxx', 'xxxpengxxx')
372
+ should_transform(:remove_first_order_prefix, 'xxxpeng', 'xxxpeng')
373
+ end
374
+
375
+ it "'pem'" do
376
+ should_transform(:remove_first_order_prefix, 'xxxpemxxx', 'xxxpemxxx')
377
+ should_transform(:remove_first_order_prefix, 'xxxpem', 'xxxpem')
378
+ end
379
+
380
+ it "'di'" do
381
+ should_transform(:remove_first_order_prefix, 'xxxdixxx', 'xxxdixxx')
382
+ should_transform(:remove_first_order_prefix, 'xxxdi', 'xxxdi')
383
+ end
384
+
385
+ it "'ter'" do
386
+ should_transform(:remove_first_order_prefix, 'xxxterxxx', 'xxxterxxx')
387
+ should_transform(:remove_first_order_prefix, 'xxxter', 'xxxter')
388
+ end
389
+
390
+ it "'ke'" do
391
+ should_transform(:remove_first_order_prefix, 'xxxkexxx', 'xxxkexxx')
392
+ should_transform(:remove_first_order_prefix, 'xxxke', 'xxxke')
393
+ end
394
+ end
395
+
396
+ describe "should not set any flags" do
397
+ it "'meng'" do
398
+ should_not_set_flags :remove_first_order_prefix, 'xxxmengxex'
399
+ should_not_set_flags :remove_first_order_prefix, 'xexmeng'
400
+ end
401
+
402
+ it "'men'" do
403
+ should_not_set_flags :remove_first_order_prefix, 'xxxmenxxx'
404
+ should_not_set_flags :remove_first_order_prefix, 'xxxmen'
405
+ end
406
+
407
+ it "'mem'" do
408
+ should_not_set_flags :remove_first_order_prefix, 'xxxmemxxx'
409
+ should_not_set_flags :remove_first_order_prefix, 'xxxmem'
410
+ end
411
+
412
+ it "'me'" do
413
+ should_not_set_flags :remove_first_order_prefix, 'xxxmexxx'
414
+ should_not_set_flags :remove_first_order_prefix, 'xxxme'
415
+ end
416
+
417
+ it "'peng'" do
418
+ should_not_set_flags :remove_first_order_prefix, 'xxxpengxxx'
419
+ should_not_set_flags :remove_first_order_prefix, 'xxxpeng'
420
+ end
421
+
422
+ it "'pem'" do
423
+ should_not_set_flags :remove_first_order_prefix, 'xxxpemxxx'
424
+ should_not_set_flags :remove_first_order_prefix, 'xxxpem'
425
+ end
426
+
427
+ it "'di'" do
428
+ should_not_set_flags :remove_first_order_prefix, 'xxxdixxx'
429
+ should_not_set_flags :remove_first_order_prefix, 'xxxdi'
430
+ end
431
+
432
+ it "'ter'" do
433
+ should_not_set_flags :remove_first_order_prefix, 'xxxterxxx'
434
+ should_not_set_flags :remove_first_order_prefix, 'xxxter'
435
+ end
436
+
437
+ it "'ke'" do
438
+ should_not_set_flags :remove_first_order_prefix, 'xxxkexxx'
439
+ should_not_set_flags :remove_first_order_prefix, 'xxxke'
440
+ end
441
+ end
442
+ end
443
+ end
444
+ end
445
+
446
+ describe '#remove_second_order_prefix' do
447
+ describe "regarding these irregular words" do
448
+ describe "'belajar'" do
449
+ before do
450
+ @word = 'belajar'
451
+ end
452
+
453
+ it 'should be handled correctly' do
454
+ should_transform :remove_second_order_prefix, @word, 'ajar'
455
+ end
456
+
457
+ it 'should not set any flags' do
458
+ should_not_set_flags :remove_second_order_prefix, @word
459
+ end
460
+ end
461
+
462
+ describe "'belunjur'" do
463
+ before do
464
+ @word = 'belunjur'
465
+ end
466
+
467
+ it 'should be handled correctly' do
468
+ should_transform(:remove_second_order_prefix, @word, 'unjur')
469
+ end
470
+
471
+ it 'should not set any flags' do
472
+ should_not_set_flags :remove_second_order_prefix, @word
473
+ end
474
+ end
475
+
476
+ describe "'pelajar'" do
477
+ before do
478
+ @word = 'pelajar'
479
+ end
480
+
481
+ it 'should be handled correctly' do
482
+ should_transform(:remove_second_order_prefix, @word, 'ajar')
483
+ end
484
+
485
+ it 'should not set any flags' do
486
+ should_not_set_flags :remove_second_order_prefix, @word
487
+ end
488
+ end
489
+ end
490
+
491
+ describe "regarding words starting with 'be*er' where * isn't a vowel & the length > 4" do
492
+ # TODO: Find other word(s) matching this rule
493
+ describe "'beserta'" do
494
+ before do
495
+ @word = 'beserta'
496
+ unset_flags
497
+ end
498
+
499
+ it "should be handled correctly" do
500
+ should_transform :remove_second_order_prefix, @word, 'serta'
501
+ end
502
+
503
+ it "should set the flags to REMOVED_BER" do
504
+ should_set_flags_to :remove_second_order_prefix, @word, 'REMOVED_BER'
505
+ end
506
+ end
507
+ end
508
+
509
+ describe "words with second order prefix characters" do
510
+ describe "at the begining," do
511
+ describe "should remove these characters" do
512
+ it "'ber'" do
513
+ should_transform(:remove_second_order_prefix, 'bercerita', 'cerita')
514
+ end
515
+
516
+ it "'per'" do
517
+ should_transform(:remove_second_order_prefix, 'perjelas', 'jelas')
518
+ end
519
+
520
+ it "'pe'" do
521
+ should_transform(:remove_second_order_prefix, 'pesuruh', 'suruh')
522
+ end
523
+ end
524
+
525
+ describe "should set the flags correctly" do
526
+ before do
527
+ unset_flags
528
+ end
529
+
530
+ it "'ber' should set to REMOVED_" do
531
+ should_set_flags_to :remove_second_order_prefix, 'bercerita', 'REMOVED_BER'
532
+ end
533
+
534
+ it "'per' should not set any flags" do
535
+ should_not_set_flags :remove_second_order_prefix, 'perjelas'
536
+ end
537
+
538
+ it "'pe' should set to REMOVED_" do
539
+ should_set_flags_to :remove_second_order_prefix, 'pesuruh', 'REMOVED_PE'
540
+ end
541
+ end
542
+ end
543
+
544
+ describe "at the rest part of the word," do
545
+ describe "should not remove these characters" do
546
+ it "'ber'" do
547
+ should_not_transform(:remove_second_order_prefix, 'xxxberxxx')
548
+ should_not_transform(:remove_second_order_prefix, 'xxxber')
549
+ end
550
+
551
+ it "'per'" do
552
+ should_not_transform(:remove_second_order_prefix, 'xxxperxxx')
553
+ should_not_transform(:remove_second_order_prefix, 'xxxper')
554
+ end
555
+
556
+ it "'pe'" do
557
+ should_not_transform(:remove_second_order_prefix, 'xxxpexxx')
558
+ should_not_transform(:remove_second_order_prefix, 'xxxpe')
559
+ end
560
+ end
561
+
562
+ describe "should not set any tags" do
563
+ it "'ber'" do
564
+ should_not_set_flags :remove_second_order_prefix, 'xxxberxxx'
565
+ should_not_set_flags :remove_second_order_prefix, 'xxxber'
566
+ end
567
+
568
+ it "'per'" do
569
+ should_not_set_flags :remove_second_order_prefix, 'xxxperxxx'
570
+ should_not_set_flags :remove_second_order_prefix, 'xxxper'
571
+ end
572
+
573
+ it "'pe'" do
574
+ should_not_set_flags :remove_second_order_prefix, 'xxxpexxx'
575
+ should_not_set_flags :remove_second_order_prefix, 'xxxpe'
576
+ end
577
+ end
578
+ end
579
+ end
580
+ end
581
+
582
+ describe '#remove_suffix' do
583
+ describe "words with these suffix characters" do
584
+ describe "at the end of the word, should remove the suffix characters" do
585
+ it "'kan'" do
586
+ should_transform(:remove_suffix, 'katakan', 'kata')
587
+ end
588
+
589
+ it "'an'" do
590
+ should_transform(:remove_suffix, 'sandaran', 'sandar')
591
+ end
592
+
593
+ it "'i'" do
594
+ should_transform(:remove_suffix, 'tiduri', 'tidur')
595
+ end
596
+ end
597
+
598
+ describe 'at the rest part of the word, should not remove the characters' do
599
+ it "'kan'" do
600
+ should_not_transform(:remove_suffix, 'kanxxx')
601
+ should_not_transform(:remove_suffix, 'xxxkanxxx')
602
+ end
603
+
604
+ it "'an'" do
605
+ should_not_transform(:remove_suffix, 'anxxx')
606
+ should_not_transform(:remove_suffix, 'xxxanxxx')
607
+ end
608
+
609
+ it "'i'" do
610
+ should_not_transform(:remove_suffix, 'ixxx')
611
+ should_not_transform(:remove_suffix, 'xxxixxx')
612
+ end
613
+ end
614
+
615
+ describe "should not set any flags, regardless the characters position" do
616
+ it "'kan'" do
617
+ should_not_set_flags :remove_suffix, 'katakan'
618
+ end
619
+
620
+ it "'an'" do
621
+ should_not_set_flags :remove_suffix, 'sandaran'
622
+ end
623
+
624
+ it "'i'" do
625
+ should_not_set_flags :remove_suffix, 'tiduri'
626
+ end
627
+ end
628
+ end
629
+ end
630
+ end