indonesian_stemmer 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +17 -0
- data/.rvmrc +48 -0
- data/Gemfile +21 -0
- data/Guardfile +9 -0
- data/LICENSE.txt +22 -0
- data/README.md +29 -0
- data/Rakefile +7 -0
- data/indonesian_stemmer.gemspec +23 -0
- data/lib/indonesian_stemmer.rb +51 -0
- data/lib/indonesian_stemmer/morphological_utility.rb +183 -0
- data/lib/indonesian_stemmer/stemmer_utility.rb +27 -0
- data/lib/indonesian_stemmer/version.rb +3 -0
- data/spec/indonesian_stemmer/indonesian_stemmer_spec.rb +145 -0
- data/spec/indonesian_stemmer/morphological_utility_spec.rb +630 -0
- data/spec/indonesian_stemmer/stemmer_utility_spec.rb +59 -0
- data/spec/spec_helper.rb +47 -0
- metadata +105 -0
@@ -0,0 +1,145 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe IndonesianStemmer do
|
4
|
+
describe "covering the inflectional particles" do
|
5
|
+
describe "'kah'" do
|
6
|
+
it { should_stem 'bukukah', 'buku' }
|
7
|
+
end
|
8
|
+
|
9
|
+
describe "'lah'" do
|
10
|
+
it { should_stem 'adalah', 'ada' }
|
11
|
+
end
|
12
|
+
|
13
|
+
describe "'pun'" do
|
14
|
+
it { should_stem 'bagaimanapun', 'bagaimana' }
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe "covering the inflectional possessive pronouns" do
|
19
|
+
describe "'ku'" do
|
20
|
+
it { should_stem 'bukuku', 'buku' }
|
21
|
+
end
|
22
|
+
|
23
|
+
describe "'mu'" do
|
24
|
+
it { should_stem 'rumahmu', 'rumah' }
|
25
|
+
end
|
26
|
+
|
27
|
+
describe "'nya'" do
|
28
|
+
it { should_stem 'cintanya', 'cinta' }
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe "covering the first order of derivational prefixes" do
|
33
|
+
describe "'meng'" do
|
34
|
+
it { should_stem 'mengukur', 'ukur' }
|
35
|
+
end
|
36
|
+
|
37
|
+
describe "'meny'" do
|
38
|
+
it { should_stem 'menyapu', 'sapu' }
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "'men'" do
|
42
|
+
it { should_stem 'menduga', 'duga' }
|
43
|
+
it { should_stem 'menuduh', 'tuduh' }
|
44
|
+
end
|
45
|
+
|
46
|
+
describe "'mem' followed by 'p'" do
|
47
|
+
it { should_stem 'memilih', 'pilih'}
|
48
|
+
it { should_stem 'memilah', 'pilah'}
|
49
|
+
it { should_stem 'memuji', 'puji'}
|
50
|
+
end
|
51
|
+
|
52
|
+
describe "'mem'" do
|
53
|
+
it { should_stem 'membaca', 'baca'}
|
54
|
+
it { should_stem 'membantu', 'bantu'}
|
55
|
+
end
|
56
|
+
|
57
|
+
describe "'me'" do
|
58
|
+
it { should_stem 'merusak', 'rusak'}
|
59
|
+
it { should_stem 'melayang', 'layang'}
|
60
|
+
end
|
61
|
+
|
62
|
+
describe "'peng'" do
|
63
|
+
it { should_stem 'pengukur', 'ukur'}
|
64
|
+
end
|
65
|
+
|
66
|
+
describe "'peny'" do
|
67
|
+
it { should_stem 'penyalin', 'salin'}
|
68
|
+
end
|
69
|
+
|
70
|
+
describe "'pen'" do
|
71
|
+
it { should_stem 'penasehat', 'nasehat'}
|
72
|
+
it { should_stem 'penarik', 'tarik'}
|
73
|
+
end
|
74
|
+
|
75
|
+
describe "'pem' followed by 'p'" do
|
76
|
+
it { should_stem 'pemilih', 'pilih'}
|
77
|
+
it { should_stem 'pemilah', 'pilah'}
|
78
|
+
it { should_stem 'pemuji', 'puji'}
|
79
|
+
end
|
80
|
+
|
81
|
+
describe "'pem'" do
|
82
|
+
it { should_stem 'pembaca', 'baca'}
|
83
|
+
end
|
84
|
+
|
85
|
+
describe "'di'" do
|
86
|
+
it { should_stem 'diukur', 'ukur'}
|
87
|
+
it { should_stem 'dilihat', 'lihat'}
|
88
|
+
end
|
89
|
+
|
90
|
+
describe "'ter'" do
|
91
|
+
it { should_stem 'terindah', 'indah'}
|
92
|
+
it { should_stem 'terhebat', 'hebat'}
|
93
|
+
it { should_stem 'terukur', 'ukur'}
|
94
|
+
it { should_stem 'tersapu', 'sapu'}
|
95
|
+
end
|
96
|
+
|
97
|
+
describe "'ke'" do
|
98
|
+
it { should_stem 'kekasih', 'kasih'}
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
describe "covering the second order of derivational prefixes" do
|
103
|
+
describe "'ber'" do
|
104
|
+
it { should_stem 'berlari', 'lari'}
|
105
|
+
end
|
106
|
+
|
107
|
+
describe "'bel'" do
|
108
|
+
it { should_stem 'belajar', 'ajar'}
|
109
|
+
end
|
110
|
+
|
111
|
+
describe "'be'" do
|
112
|
+
it { should_stem 'bekerja', 'kerja'}
|
113
|
+
end
|
114
|
+
|
115
|
+
describe "'per'" do
|
116
|
+
it { should_stem 'perjelas', 'jelas'}
|
117
|
+
end
|
118
|
+
|
119
|
+
describe "'pel'" do
|
120
|
+
it { should_stem 'pelajar', 'ajar'}
|
121
|
+
end
|
122
|
+
|
123
|
+
describe "'pe'" do
|
124
|
+
it { should_stem 'pekerja', 'kerja'}
|
125
|
+
it { should_stem 'pelari', 'lari'}
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
describe "covering the derivational suffixes" do
|
130
|
+
describe "'kan'" do
|
131
|
+
it { should_stem 'tarikkan', 'tarik'}
|
132
|
+
it { should_stem 'ambilkan', 'ambil'}
|
133
|
+
end
|
134
|
+
|
135
|
+
describe "'an'" do
|
136
|
+
it { should_stem 'makanan', 'makan'}
|
137
|
+
it { should_stem 'sarapan', 'sarap'}
|
138
|
+
end
|
139
|
+
|
140
|
+
describe "'i'" do
|
141
|
+
it { should_stem 'ajari', 'ajar'}
|
142
|
+
it { should_stem 'cermati', 'cermat'}
|
143
|
+
end
|
144
|
+
end
|
145
|
+
end
|
@@ -0,0 +1,630 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe IndonesianStemmer::MorphologicalUtility do
|
4
|
+
describe '#total_syllables' do
|
5
|
+
it "'memasak' should return 3" do
|
6
|
+
IndonesianStemmer.total_syllables('memasak').should == 3
|
7
|
+
end
|
8
|
+
|
9
|
+
it "'mewarnai' should return 3" do
|
10
|
+
IndonesianStemmer.total_syllables('mewarnai').should == 4
|
11
|
+
end
|
12
|
+
|
13
|
+
it "'permainan' should return 4" do
|
14
|
+
IndonesianStemmer.total_syllables('permainan').should == 4
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
describe '#remove_particle' do
|
19
|
+
describe 'should remove these particles at the end of the word' do
|
20
|
+
it "'kah'" do
|
21
|
+
should_transform(:remove_particle, 'manakah', 'mana')
|
22
|
+
end
|
23
|
+
|
24
|
+
it "'lah'" do
|
25
|
+
should_transform(:remove_particle, 'kembalilah', 'kembali')
|
26
|
+
end
|
27
|
+
|
28
|
+
it "'pun'" do
|
29
|
+
should_transform(:remove_particle, 'bagaimanapun', 'bagaimana')
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
describe 'should not remove these particles at the rest part of the word' do
|
34
|
+
it "'kah'" do
|
35
|
+
should_not_transform(:remove_particle, 'kahak')
|
36
|
+
should_not_transform(:remove_particle, 'pernikahan')
|
37
|
+
end
|
38
|
+
|
39
|
+
it "'lah'" do
|
40
|
+
should_not_transform(:remove_particle, 'lahiriah')
|
41
|
+
should_not_transform(:remove_particle, 'kelahiran')
|
42
|
+
end
|
43
|
+
|
44
|
+
it "'pun'" do
|
45
|
+
should_not_transform(:remove_particle, 'punya')
|
46
|
+
should_not_transform(:remove_particle, 'kepunyaan')
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
describe 'should not set any flags regardless the character position' do
|
51
|
+
it "'kah'" do
|
52
|
+
should_not_set_flags :remove_particle, 'manakah'
|
53
|
+
should_not_set_flags :remove_particle, 'kahak'
|
54
|
+
should_not_set_flags :remove_particle, 'pernikahan'
|
55
|
+
end
|
56
|
+
|
57
|
+
it "'lah'" do
|
58
|
+
should_not_set_flags :remove_particle, 'kembalilah'
|
59
|
+
should_not_set_flags :remove_particle, 'lahiriah'
|
60
|
+
should_not_set_flags :remove_particle, 'kelahiran'
|
61
|
+
end
|
62
|
+
|
63
|
+
it "'pun'" do
|
64
|
+
should_not_set_flags :remove_particle, 'bagaimanapun'
|
65
|
+
should_not_set_flags :remove_particle, 'punya'
|
66
|
+
should_not_set_flags :remove_particle, 'kepunyaan'
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
describe '#remove_possessive_pronoun' do
|
72
|
+
describe 'should remove these possessive pronouns at the end of the word' do
|
73
|
+
it "'ku'" do
|
74
|
+
should_transform(:remove_possessive_pronoun, 'mainanku', 'mainan')
|
75
|
+
end
|
76
|
+
|
77
|
+
it "'mu'" do
|
78
|
+
should_transform(:remove_possessive_pronoun, 'mobilmu', 'mobil')
|
79
|
+
end
|
80
|
+
|
81
|
+
it "'nya'" do
|
82
|
+
should_transform(:remove_possessive_pronoun, 'gelasnya', 'gelas')
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe 'should not remove these possessive pronouns at the rest part of the word' do
|
87
|
+
it "'ku'" do
|
88
|
+
should_not_transform(:remove_possessive_pronoun, 'kumakan')
|
89
|
+
should_not_transform(:remove_possessive_pronoun, 'kekurangan')
|
90
|
+
end
|
91
|
+
|
92
|
+
it "'mu'" do
|
93
|
+
should_not_transform(:remove_possessive_pronoun, 'murahan')
|
94
|
+
should_not_transform(:remove_possessive_pronoun, 'kemurkaan')
|
95
|
+
end
|
96
|
+
|
97
|
+
it "'nya'" do
|
98
|
+
should_not_transform(:remove_possessive_pronoun, 'nyapu')
|
99
|
+
should_not_transform(:remove_possessive_pronoun, 'menyambung')
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
describe 'should not set any flags regardless the characters position' do
|
104
|
+
it 'ku' do
|
105
|
+
should_not_set_flags :remove_possessive_pronoun, 'mainanku'
|
106
|
+
should_not_set_flags :remove_possessive_pronoun, 'kumakan'
|
107
|
+
should_not_set_flags :remove_possessive_pronoun, 'kekurangan'
|
108
|
+
end
|
109
|
+
|
110
|
+
it 'mu' do
|
111
|
+
should_not_set_flags :remove_possessive_pronoun, 'mobilmu'
|
112
|
+
should_not_set_flags :remove_possessive_pronoun, 'murahan'
|
113
|
+
should_not_set_flags :remove_possessive_pronoun, 'kemurkaan'
|
114
|
+
end
|
115
|
+
|
116
|
+
it 'nya' do
|
117
|
+
should_not_set_flags :remove_possessive_pronoun, 'gelasnya'
|
118
|
+
should_not_set_flags :remove_possessive_pronoun, 'nyapu'
|
119
|
+
should_not_set_flags :remove_possessive_pronoun, 'menyambung'
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
|
124
|
+
describe '#remove_first_order_prefix' do
|
125
|
+
describe "words with these special characters" do
|
126
|
+
describe "at the begining" do
|
127
|
+
describe "followed by a vowel, should remove and substitute the last character" do
|
128
|
+
it "'meny'" do
|
129
|
+
should_transform(:remove_first_order_prefix, 'menyambung', 'sambung')
|
130
|
+
end
|
131
|
+
|
132
|
+
it "'peny'" do
|
133
|
+
should_transform(:remove_first_order_prefix, 'penyantap', 'santap')
|
134
|
+
end
|
135
|
+
|
136
|
+
it "'pen'" do
|
137
|
+
should_transform(:remove_first_order_prefix, 'penata', 'tata')
|
138
|
+
end
|
139
|
+
end
|
140
|
+
|
141
|
+
describe "followed by consonant, should only remove the special characters" do
|
142
|
+
it "'meny'" do
|
143
|
+
# TODO: Find a real indonesian word for this case
|
144
|
+
should_transform(:remove_first_order_prefix, 'menyxxx', 'xxx')
|
145
|
+
end
|
146
|
+
|
147
|
+
it "'peny'" do
|
148
|
+
# TODO: Find a real indonesian word for this case
|
149
|
+
should_transform(:remove_first_order_prefix, 'penyxxx', 'xxx')
|
150
|
+
end
|
151
|
+
|
152
|
+
it "'pen'" do
|
153
|
+
should_transform(:remove_first_order_prefix, 'penjahat', 'jahat')
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
describe "should set the flags correctly regardless vowel or consonant" do
|
158
|
+
before do
|
159
|
+
unset_flags
|
160
|
+
end
|
161
|
+
|
162
|
+
it "'meny' should set the flags to REMOVED_MENG" do
|
163
|
+
constant = 'REMOVED_MENG'
|
164
|
+
should_set_flags_to :remove_first_order_prefix, 'menyambung', constant
|
165
|
+
unset_flags
|
166
|
+
should_set_flags_to :remove_first_order_prefix, 'menyxxx', constant
|
167
|
+
end
|
168
|
+
|
169
|
+
it "'peny' should set the flags to REMOVED_PENG" do
|
170
|
+
constant = 'REMOVED_PENG'
|
171
|
+
should_set_flags_to :remove_first_order_prefix, 'penyantap', constant
|
172
|
+
unset_flags
|
173
|
+
should_set_flags_to :remove_first_order_prefix, 'penyxxx', constant
|
174
|
+
end
|
175
|
+
|
176
|
+
it "'pen' should set the flags to REMOVED_PENG" do
|
177
|
+
constant = 'REMOVED_PENG'
|
178
|
+
should_set_flags_to :remove_first_order_prefix, 'penata', constant
|
179
|
+
unset_flags
|
180
|
+
should_set_flags_to :remove_first_order_prefix, 'penjahat', constant
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
describe "at the rest part of the word" do
|
186
|
+
describe "followed by a vowel, should not do anything" do
|
187
|
+
it "'meny'" do
|
188
|
+
# TODO: Find a real indonesian word for this case
|
189
|
+
should_transform(:remove_first_order_prefix, 'xxxmenyaxx', 'xxxmenyaxx')
|
190
|
+
should_transform(:remove_first_order_prefix, 'xxxmenya', 'xxxmenya')
|
191
|
+
end
|
192
|
+
|
193
|
+
it "'peny'" do
|
194
|
+
# TODO: Find a real indonesian word for this case
|
195
|
+
should_transform(:remove_first_order_prefix, 'xxxpenyaxx', 'xxxpenyaxx')
|
196
|
+
should_transform(:remove_first_order_prefix, 'xxxpenya', 'xxxpenya')
|
197
|
+
end
|
198
|
+
|
199
|
+
it "'pen'" do
|
200
|
+
# TODO: Find a real indonesian word for this case
|
201
|
+
should_transform(:remove_first_order_prefix, 'xxxpenexx', 'xxxpenexx')
|
202
|
+
should_transform(:remove_first_order_prefix, 'xxxpeno', 'xxxpeno')
|
203
|
+
end
|
204
|
+
end
|
205
|
+
|
206
|
+
describe "followed by consonant, should not do anything" do
|
207
|
+
it "'meny'" do
|
208
|
+
# TODO: Find a real indonesian word for this case
|
209
|
+
should_transform(:remove_first_order_prefix, 'xxxmenykxx', 'xxxmenykxx')
|
210
|
+
should_transform(:remove_first_order_prefix, 'xxxmenyk', 'xxxmenyk')
|
211
|
+
end
|
212
|
+
|
213
|
+
it "'peny'" do
|
214
|
+
# TODO: Find a real indonesian word for this case
|
215
|
+
should_transform(:remove_first_order_prefix, 'xxxpenykxx', 'xxxpenykxx')
|
216
|
+
should_transform(:remove_first_order_prefix, 'xxxpenyk', 'xxxpenyk')
|
217
|
+
end
|
218
|
+
|
219
|
+
it "'pen'" do
|
220
|
+
# TODO: Find a real indonesian word for this case
|
221
|
+
should_transform(:remove_first_order_prefix, 'xxxpenrxx', 'xxxpenrxx')
|
222
|
+
should_transform(:remove_first_order_prefix, 'xxxpenr', 'xxxpenr')
|
223
|
+
end
|
224
|
+
end
|
225
|
+
|
226
|
+
describe "should not set any flags regardless the characters position" do
|
227
|
+
it "'meny'" do
|
228
|
+
# TODO: Find a real indonesian word for this case
|
229
|
+
%w( xxxmenyaxx xxxmenya xxxmenykxx xxxmenyk ).each do |character|
|
230
|
+
should_not_set_flags :remove_first_order_prefix, character
|
231
|
+
end
|
232
|
+
end
|
233
|
+
|
234
|
+
it "'peny'" do
|
235
|
+
# TODO: Find a real indonesian word for this case
|
236
|
+
%w( xxxpenyaxx xxxpenya xxxpenykxx xxxpenyk ).each do |character|
|
237
|
+
should_not_set_flags :remove_first_order_prefix, character
|
238
|
+
end
|
239
|
+
end
|
240
|
+
|
241
|
+
it "'pen'" do
|
242
|
+
# TODO: Find a real indonesian word for this case
|
243
|
+
%w( xxxpenexx xxxpeno xxxpenrxx xxxpenr ).each do |character|
|
244
|
+
should_not_set_flags :remove_first_order_prefix, character
|
245
|
+
end
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
249
|
+
end
|
250
|
+
|
251
|
+
describe "words with first order prefix characters" do
|
252
|
+
describe "at the begining," do
|
253
|
+
describe "should remove these characters" do
|
254
|
+
it "'meng'" do
|
255
|
+
should_transform(:remove_first_order_prefix, 'menggambar', 'gambar')
|
256
|
+
end
|
257
|
+
|
258
|
+
it "'men'" do
|
259
|
+
should_transform(:remove_first_order_prefix, 'mendaftar', 'daftar')
|
260
|
+
end
|
261
|
+
|
262
|
+
it "'mem'" do
|
263
|
+
should_transform(:remove_first_order_prefix, 'membangun', 'bangun')
|
264
|
+
end
|
265
|
+
|
266
|
+
it "'me'" do
|
267
|
+
should_transform(:remove_first_order_prefix, 'melukis', 'lukis')
|
268
|
+
end
|
269
|
+
|
270
|
+
it "'peng'" do
|
271
|
+
should_transform(:remove_first_order_prefix, 'penggaris', 'garis')
|
272
|
+
end
|
273
|
+
|
274
|
+
it "'pem'" do
|
275
|
+
should_transform(:remove_first_order_prefix, 'pembajak', 'bajak')
|
276
|
+
end
|
277
|
+
|
278
|
+
it "'di'" do
|
279
|
+
should_transform(:remove_first_order_prefix, 'disayang', 'sayang')
|
280
|
+
end
|
281
|
+
|
282
|
+
it "'ter'" do
|
283
|
+
should_transform(:remove_first_order_prefix, 'terucap', 'ucap')
|
284
|
+
end
|
285
|
+
|
286
|
+
it "'ke'" do
|
287
|
+
should_transform(:remove_first_order_prefix, 'kemakan', 'makan')
|
288
|
+
end
|
289
|
+
end
|
290
|
+
|
291
|
+
describe "should set the flags correctly" do
|
292
|
+
before do
|
293
|
+
unset_flags
|
294
|
+
end
|
295
|
+
|
296
|
+
describe "to REMOVED_MENG on these characters" do
|
297
|
+
before do
|
298
|
+
@constant = 'REMOVED_MENG'
|
299
|
+
end
|
300
|
+
|
301
|
+
it "'meng'" do
|
302
|
+
should_set_flags_to :remove_first_order_prefix, 'menggambar', @constant
|
303
|
+
end
|
304
|
+
|
305
|
+
it "'men'" do
|
306
|
+
should_set_flags_to :remove_first_order_prefix, 'mendaftar', @constant
|
307
|
+
end
|
308
|
+
|
309
|
+
it "'mem'" do
|
310
|
+
should_set_flags_to :remove_first_order_prefix, 'membangun', @constant
|
311
|
+
end
|
312
|
+
|
313
|
+
it "'me'" do
|
314
|
+
should_set_flags_to :remove_first_order_prefix, 'melukis', @constant
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
describe "to REMOVED_PENG on these characters" do
|
319
|
+
before do
|
320
|
+
@constant = 'REMOVED_PENG'
|
321
|
+
end
|
322
|
+
|
323
|
+
it "'peng'" do
|
324
|
+
should_set_flags_to :remove_first_order_prefix, 'penggaris', @constant
|
325
|
+
end
|
326
|
+
|
327
|
+
it "'pem'" do
|
328
|
+
should_set_flags_to :remove_first_order_prefix, 'pembajak', @constant
|
329
|
+
end
|
330
|
+
end
|
331
|
+
|
332
|
+
describe "to their respective constants on these characters" do
|
333
|
+
it "'di'" do
|
334
|
+
should_set_flags_to :remove_first_order_prefix, 'disayang', 'REMOVED_DI'
|
335
|
+
end
|
336
|
+
|
337
|
+
it "'ter'" do
|
338
|
+
should_set_flags_to :remove_first_order_prefix, 'terucap', 'REMOVED_TER'
|
339
|
+
end
|
340
|
+
|
341
|
+
it "'ke'" do
|
342
|
+
should_set_flags_to :remove_first_order_prefix, 'kemakan', 'REMOVED_KE'
|
343
|
+
end
|
344
|
+
end
|
345
|
+
end
|
346
|
+
end
|
347
|
+
|
348
|
+
describe "at the rest part of the word," do
|
349
|
+
describe "should not remove these characters" do
|
350
|
+
it "'meng'" do
|
351
|
+
should_transform(:remove_first_order_prefix, 'xxxmengxex', 'xxxmengxex')
|
352
|
+
should_transform(:remove_first_order_prefix, 'xexmeng', 'xexmeng')
|
353
|
+
end
|
354
|
+
|
355
|
+
it "'men'" do
|
356
|
+
should_transform(:remove_first_order_prefix, 'xxxmenxxx', 'xxxmenxxx')
|
357
|
+
should_transform(:remove_first_order_prefix, 'xxxmen', 'xxxmen')
|
358
|
+
end
|
359
|
+
|
360
|
+
it "'mem'" do
|
361
|
+
should_transform(:remove_first_order_prefix, 'xxxmemxxx', 'xxxmemxxx')
|
362
|
+
should_transform(:remove_first_order_prefix, 'xxxmem', 'xxxmem')
|
363
|
+
end
|
364
|
+
|
365
|
+
it "'me'" do
|
366
|
+
should_transform(:remove_first_order_prefix, 'xxxmexxx', 'xxxmexxx')
|
367
|
+
should_transform(:remove_first_order_prefix, 'xxxme', 'xxxme')
|
368
|
+
end
|
369
|
+
|
370
|
+
it "'peng'" do
|
371
|
+
should_transform(:remove_first_order_prefix, 'xxxpengxxx', 'xxxpengxxx')
|
372
|
+
should_transform(:remove_first_order_prefix, 'xxxpeng', 'xxxpeng')
|
373
|
+
end
|
374
|
+
|
375
|
+
it "'pem'" do
|
376
|
+
should_transform(:remove_first_order_prefix, 'xxxpemxxx', 'xxxpemxxx')
|
377
|
+
should_transform(:remove_first_order_prefix, 'xxxpem', 'xxxpem')
|
378
|
+
end
|
379
|
+
|
380
|
+
it "'di'" do
|
381
|
+
should_transform(:remove_first_order_prefix, 'xxxdixxx', 'xxxdixxx')
|
382
|
+
should_transform(:remove_first_order_prefix, 'xxxdi', 'xxxdi')
|
383
|
+
end
|
384
|
+
|
385
|
+
it "'ter'" do
|
386
|
+
should_transform(:remove_first_order_prefix, 'xxxterxxx', 'xxxterxxx')
|
387
|
+
should_transform(:remove_first_order_prefix, 'xxxter', 'xxxter')
|
388
|
+
end
|
389
|
+
|
390
|
+
it "'ke'" do
|
391
|
+
should_transform(:remove_first_order_prefix, 'xxxkexxx', 'xxxkexxx')
|
392
|
+
should_transform(:remove_first_order_prefix, 'xxxke', 'xxxke')
|
393
|
+
end
|
394
|
+
end
|
395
|
+
|
396
|
+
describe "should not set any flags" do
|
397
|
+
it "'meng'" do
|
398
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxmengxex'
|
399
|
+
should_not_set_flags :remove_first_order_prefix, 'xexmeng'
|
400
|
+
end
|
401
|
+
|
402
|
+
it "'men'" do
|
403
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxmenxxx'
|
404
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxmen'
|
405
|
+
end
|
406
|
+
|
407
|
+
it "'mem'" do
|
408
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxmemxxx'
|
409
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxmem'
|
410
|
+
end
|
411
|
+
|
412
|
+
it "'me'" do
|
413
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxmexxx'
|
414
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxme'
|
415
|
+
end
|
416
|
+
|
417
|
+
it "'peng'" do
|
418
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxpengxxx'
|
419
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxpeng'
|
420
|
+
end
|
421
|
+
|
422
|
+
it "'pem'" do
|
423
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxpemxxx'
|
424
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxpem'
|
425
|
+
end
|
426
|
+
|
427
|
+
it "'di'" do
|
428
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxdixxx'
|
429
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxdi'
|
430
|
+
end
|
431
|
+
|
432
|
+
it "'ter'" do
|
433
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxterxxx'
|
434
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxter'
|
435
|
+
end
|
436
|
+
|
437
|
+
it "'ke'" do
|
438
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxkexxx'
|
439
|
+
should_not_set_flags :remove_first_order_prefix, 'xxxke'
|
440
|
+
end
|
441
|
+
end
|
442
|
+
end
|
443
|
+
end
|
444
|
+
end
|
445
|
+
|
446
|
+
describe '#remove_second_order_prefix' do
|
447
|
+
describe "regarding these irregular words" do
|
448
|
+
describe "'belajar'" do
|
449
|
+
before do
|
450
|
+
@word = 'belajar'
|
451
|
+
end
|
452
|
+
|
453
|
+
it 'should be handled correctly' do
|
454
|
+
should_transform :remove_second_order_prefix, @word, 'ajar'
|
455
|
+
end
|
456
|
+
|
457
|
+
it 'should not set any flags' do
|
458
|
+
should_not_set_flags :remove_second_order_prefix, @word
|
459
|
+
end
|
460
|
+
end
|
461
|
+
|
462
|
+
describe "'belunjur'" do
|
463
|
+
before do
|
464
|
+
@word = 'belunjur'
|
465
|
+
end
|
466
|
+
|
467
|
+
it 'should be handled correctly' do
|
468
|
+
should_transform(:remove_second_order_prefix, @word, 'unjur')
|
469
|
+
end
|
470
|
+
|
471
|
+
it 'should not set any flags' do
|
472
|
+
should_not_set_flags :remove_second_order_prefix, @word
|
473
|
+
end
|
474
|
+
end
|
475
|
+
|
476
|
+
describe "'pelajar'" do
|
477
|
+
before do
|
478
|
+
@word = 'pelajar'
|
479
|
+
end
|
480
|
+
|
481
|
+
it 'should be handled correctly' do
|
482
|
+
should_transform(:remove_second_order_prefix, @word, 'ajar')
|
483
|
+
end
|
484
|
+
|
485
|
+
it 'should not set any flags' do
|
486
|
+
should_not_set_flags :remove_second_order_prefix, @word
|
487
|
+
end
|
488
|
+
end
|
489
|
+
end
|
490
|
+
|
491
|
+
describe "regarding words starting with 'be*er' where * isn't a vowel & the length > 4" do
|
492
|
+
# TODO: Find other word(s) matching this rule
|
493
|
+
describe "'beserta'" do
|
494
|
+
before do
|
495
|
+
@word = 'beserta'
|
496
|
+
unset_flags
|
497
|
+
end
|
498
|
+
|
499
|
+
it "should be handled correctly" do
|
500
|
+
should_transform :remove_second_order_prefix, @word, 'serta'
|
501
|
+
end
|
502
|
+
|
503
|
+
it "should set the flags to REMOVED_BER" do
|
504
|
+
should_set_flags_to :remove_second_order_prefix, @word, 'REMOVED_BER'
|
505
|
+
end
|
506
|
+
end
|
507
|
+
end
|
508
|
+
|
509
|
+
describe "words with second order prefix characters" do
|
510
|
+
describe "at the begining," do
|
511
|
+
describe "should remove these characters" do
|
512
|
+
it "'ber'" do
|
513
|
+
should_transform(:remove_second_order_prefix, 'bercerita', 'cerita')
|
514
|
+
end
|
515
|
+
|
516
|
+
it "'per'" do
|
517
|
+
should_transform(:remove_second_order_prefix, 'perjelas', 'jelas')
|
518
|
+
end
|
519
|
+
|
520
|
+
it "'pe'" do
|
521
|
+
should_transform(:remove_second_order_prefix, 'pesuruh', 'suruh')
|
522
|
+
end
|
523
|
+
end
|
524
|
+
|
525
|
+
describe "should set the flags correctly" do
|
526
|
+
before do
|
527
|
+
unset_flags
|
528
|
+
end
|
529
|
+
|
530
|
+
it "'ber' should set to REMOVED_" do
|
531
|
+
should_set_flags_to :remove_second_order_prefix, 'bercerita', 'REMOVED_BER'
|
532
|
+
end
|
533
|
+
|
534
|
+
it "'per' should not set any flags" do
|
535
|
+
should_not_set_flags :remove_second_order_prefix, 'perjelas'
|
536
|
+
end
|
537
|
+
|
538
|
+
it "'pe' should set to REMOVED_" do
|
539
|
+
should_set_flags_to :remove_second_order_prefix, 'pesuruh', 'REMOVED_PE'
|
540
|
+
end
|
541
|
+
end
|
542
|
+
end
|
543
|
+
|
544
|
+
describe "at the rest part of the word," do
|
545
|
+
describe "should not remove these characters" do
|
546
|
+
it "'ber'" do
|
547
|
+
should_not_transform(:remove_second_order_prefix, 'xxxberxxx')
|
548
|
+
should_not_transform(:remove_second_order_prefix, 'xxxber')
|
549
|
+
end
|
550
|
+
|
551
|
+
it "'per'" do
|
552
|
+
should_not_transform(:remove_second_order_prefix, 'xxxperxxx')
|
553
|
+
should_not_transform(:remove_second_order_prefix, 'xxxper')
|
554
|
+
end
|
555
|
+
|
556
|
+
it "'pe'" do
|
557
|
+
should_not_transform(:remove_second_order_prefix, 'xxxpexxx')
|
558
|
+
should_not_transform(:remove_second_order_prefix, 'xxxpe')
|
559
|
+
end
|
560
|
+
end
|
561
|
+
|
562
|
+
describe "should not set any tags" do
|
563
|
+
it "'ber'" do
|
564
|
+
should_not_set_flags :remove_second_order_prefix, 'xxxberxxx'
|
565
|
+
should_not_set_flags :remove_second_order_prefix, 'xxxber'
|
566
|
+
end
|
567
|
+
|
568
|
+
it "'per'" do
|
569
|
+
should_not_set_flags :remove_second_order_prefix, 'xxxperxxx'
|
570
|
+
should_not_set_flags :remove_second_order_prefix, 'xxxper'
|
571
|
+
end
|
572
|
+
|
573
|
+
it "'pe'" do
|
574
|
+
should_not_set_flags :remove_second_order_prefix, 'xxxpexxx'
|
575
|
+
should_not_set_flags :remove_second_order_prefix, 'xxxpe'
|
576
|
+
end
|
577
|
+
end
|
578
|
+
end
|
579
|
+
end
|
580
|
+
end
|
581
|
+
|
582
|
+
describe '#remove_suffix' do
|
583
|
+
describe "words with these suffix characters" do
|
584
|
+
describe "at the end of the word, should remove the suffix characters" do
|
585
|
+
it "'kan'" do
|
586
|
+
should_transform(:remove_suffix, 'katakan', 'kata')
|
587
|
+
end
|
588
|
+
|
589
|
+
it "'an'" do
|
590
|
+
should_transform(:remove_suffix, 'sandaran', 'sandar')
|
591
|
+
end
|
592
|
+
|
593
|
+
it "'i'" do
|
594
|
+
should_transform(:remove_suffix, 'tiduri', 'tidur')
|
595
|
+
end
|
596
|
+
end
|
597
|
+
|
598
|
+
describe 'at the rest part of the word, should not remove the characters' do
|
599
|
+
it "'kan'" do
|
600
|
+
should_not_transform(:remove_suffix, 'kanxxx')
|
601
|
+
should_not_transform(:remove_suffix, 'xxxkanxxx')
|
602
|
+
end
|
603
|
+
|
604
|
+
it "'an'" do
|
605
|
+
should_not_transform(:remove_suffix, 'anxxx')
|
606
|
+
should_not_transform(:remove_suffix, 'xxxanxxx')
|
607
|
+
end
|
608
|
+
|
609
|
+
it "'i'" do
|
610
|
+
should_not_transform(:remove_suffix, 'ixxx')
|
611
|
+
should_not_transform(:remove_suffix, 'xxxixxx')
|
612
|
+
end
|
613
|
+
end
|
614
|
+
|
615
|
+
describe "should not set any flags, regardless the characters position" do
|
616
|
+
it "'kan'" do
|
617
|
+
should_not_set_flags :remove_suffix, 'katakan'
|
618
|
+
end
|
619
|
+
|
620
|
+
it "'an'" do
|
621
|
+
should_not_set_flags :remove_suffix, 'sandaran'
|
622
|
+
end
|
623
|
+
|
624
|
+
it "'i'" do
|
625
|
+
should_not_set_flags :remove_suffix, 'tiduri'
|
626
|
+
end
|
627
|
+
end
|
628
|
+
end
|
629
|
+
end
|
630
|
+
end
|