turkish_stemmer 0.1.4 → 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,19 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ require 'turkish_stemmer'
8
+
9
+ RSpec.configure do |config|
10
+ config.treat_symbols_as_metadata_keys_with_true_values = true
11
+ config.run_all_when_everything_filtered = true
12
+ config.filter_run :focus
13
+
14
+ # Run specs in random order to surface order dependencies. If you find an
15
+ # order dependency and want to debug it, you can fix the order by providing
16
+ # the seed, which is printed after each run.
17
+ # --seed 1234
18
+ config.order = 'random'
19
+ end
@@ -0,0 +1,101 @@
1
+ evimden,ev,from my house,ev-(i)m-den,
2
+ göz,göz,eye,--,
3
+ güzelmişsin,güzel,you were beautiful,güzel-miş-sin,rumor
4
+ etkilerden,etki,from the effects,etki-ler-den,
5
+ çocukmuş,çocuk,it was child,çocuk-miş,rumor
6
+ kediymiş,kedi,it was cat,kedi-(y)miş,rumor
7
+ balığım,balık,my fish,balık-(i)m,
8
+ doktoruymuşsunuz,doktor,you were his/her/its doctor,doktor-i-(y)miş-siniz,rumor
9
+ kalelerimizdekilerden,kale,the ones that are from our castle,kale-ler-(i)miz-de-ki-ler-den,
10
+ çocuğuymuşumçasına,çocuk,as if i was his/her child ,çocuk-i-(y)miş-im-cesine,
11
+ kedileriyle,kedi,with his/her/its cats,kedi-ler-i-(y)le,kedileri+ile
12
+ çocuklarımmış,çocuk,they were my children,çocuk-ler-(i)m-miş,rumor
13
+ kitabımızdı,kitap,it was our book,kitap-(i)miz-di,
14
+ kelimelerin,kelime,"""your"" -or- ""of"" words",kelime-ler-(i)n -or- kelime-ler-in,both ways
15
+ kayısısı,kayısı,his/her/its apricot,kayısı-(s)ı,
16
+ eriğinin,erik,"of ""your"" -or- ""his/her/its"" plum",erik-(i)n-in -or- erik-i-(n)in,both ways
17
+ eriğindeki,erik,"the one that is at ""your"" -or- ""his/her/its"" plum ",erik-(i)n-de-ki - or- erik-i-(n)de-ki,both ways
18
+ eriğinden,erik,"from ""your"" -or- ""his/her/its"" plum",erik-(i)n-den -or- erik-i-(n)den,both ways
19
+ eriğine,erik,"to ""your"" -or- ""his/her/its"" plum",erik-(i)n-e -or- erik-i-(n)e,both ways
20
+ eriğinde,erik,"at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de -or- erik-i-(n)de,both ways
21
+ kayısısına,kayısı,his/her/its apricot,kayısı-(s)ı-(n)a,
22
+ kayısısında,kayısı,at his/her/its apricot,kayısı-(s)ı-(n)da,
23
+ saatlerimiz,saat,our watches/hours,saat-ler-(i)miz,
24
+ kalemimin,kalem,of my pencil,kalem-(i)m-in,
25
+ ucu,uç,nib of...,uç-i,
26
+ kalelerdekilerden,kale,from the ones that are at (the) castle,kale-ler-de-ki-ler-den,
27
+ kalelerdekilerin,kale,of the ones that are at (the) castle,kale-ler-de-ki-ler-in,
28
+ kalelerimizdekilerde,kale,at the ones that are at (the) castle,kale-ler-(i)miz-de-ki-ler-de,
29
+ kaleninkinin,kale,of the one that belongs to (the) castle,kale-(n)in-ki-nin,
30
+ kalemizinkinin,kale,of the one that belongs to our castle,kale-miz-(i)n-ki-(n)in,
31
+ kalelerindeki,kale,"the one that is at ""their castle"" -or- ""his/her/its castles""",kale-leri-(n)de-ki -or- kale-ler-i-(n)de-ki,both ways
32
+ erikleri,erik,"""their plum"" -or- ""his/her/its plums""",erik-leri -or- erik-ler-i,both ways
33
+ erikler,erik,(the) plums,erik-ler,
34
+ eriğim,erik,my plum,erik-(i)m,
35
+ eriğimiz,erik,our plum,erik-(i)miz,
36
+ eriğin,erik,your plum,erik-(i)n,
37
+ eriğiniz,erik,your plum,erik-(i)niz,2nd person in plural
38
+ eriği,erik,his/her/its plum,erik-i,
39
+ eriğini,erik,"""your"" -or- ""his/her/its"" plum",erik-(i)n-i -or- erik-i-(n)i,both ways
40
+ eriğinin,erik,"of ""your"" -or- ""his/her/its"" plum",erik-(i)n-in -or- erik-i-(n)in,both ways
41
+ eriğe,erik,to (the) plum,erik-e,
42
+ eriğine,erik,"to ""your"" -or- ""his/her/its"" plum",erik-(i)n-e -or- erik-i-(n)e,both ways
43
+ eriklerine,erik,"to ""their plum"" -or- ""his/her/its plums""",erik-leri-(n)e -or- erik-ler-i-(n)e,both ways
44
+ erikte,erik,at (the) plum,erik-de,
45
+ eriğinde,erik,"at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de -or- erik-i-(n)de,both ways
46
+ erikten,erik,from (the) plum,erik-den,
47
+ eriğinden,erik,"from ""your"" -or- ""his/her/its"" plum",erik-(i)n-den -or- erik-i-(n)den,both ways
48
+ eriğindeki,erik,"the one that is at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de-ki - or- erik-i-(n)de-ki,both ways
49
+ eriğiyle,erik,with his/her/its plum,erik-i-(y)le,
50
+ eriğinin,erik,"of ""your"" -or- ""his/her/its"" plum",erik-(i)n-in -or- erik-i-(n)in,both ways
51
+ eriğindeki,erik,"the one that is at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de-ki - or- erik-i-(n)de-ki,both ways
52
+ eriğince,erik,"after ""your"" -or- ""his/her/its"" plum",erik-(i)n-ce -or- erik-i-(n)ce,both ways
53
+ gülüm,gül,my rose,gül-(i)m,
54
+ erikteki,erik,the one that is at (the) plum,erik-de-ki,
55
+ eriktekilerden,erik,the ones that are from (the) plum,erik-de-ki-ler-den,
56
+ eriklerdeki,erik,the ones that are at (the) plum,erik-ler-de-ki,
57
+ kitabı,kitap,(the) book,kitap-i,
58
+ ağacı,ağaç,(the) tree,ağaç-i,
59
+ eriğim,erik,my plum / i am plum,erik-(i)m / erik-im,
60
+ kayısıyım,kayısı,i am apricot,kayısı-(y)ım,
61
+ eriksem,erik,if i am plum,erik-se-m,
62
+ eriksen,erik,if you are plum,erik-se-n,
63
+ erikse,erik,if he/she/it is plum,erik-se,
64
+ erikseniz,erik,if you are plum,erik-se-niz,2nd person in plural
65
+ erikseler,erik,if they are plum,erik-se-ler,
66
+ erikti,erik,he/she/it was plum,erik-di,
67
+ eriktiniz,erik,you were plum,erik-di-niz,2nd person in plural
68
+ eriktiler,erik,they were plum,erik-di-ler,
69
+ erikmiş,erik,it was plum,erik-miş,rumor
70
+ erikmişçesine,erik,as if it was plum,erik-miş-cesine,
71
+ erikmiştir,erik,it was plum,erik-miş-dir,rumor
72
+ erikmişim,erik,i was plum,erik-miş-(i)m,rumor
73
+ erikmişsin,erik,you were plum,erik-miş-sin,rumor
74
+ erikmişsindir,erik,you happened to be plum,erik-miş-sin-dir,rumor
75
+ erikmişimdir,erik,i happened to be plum,erik-miş-im-dir,rumor
76
+ erikmişiz,erik,we were plum,erik-miş-iz,rumor
77
+ erikmişizdir,erik,we happened to be plum,erik-miş-iz-dir,rumor
78
+ erikmişsiniz,erik,you were plum,erik-miş-siniz,2nd person in plural + rumor
79
+ erikmişsinizdir,erik,you happened to be plum,erik-miş-siniz-dir,2nd person in plural + rumor
80
+ erikmişler,erik,they were plum,erik-miş-ler,rumor
81
+ erikmişlerdir,erik,they happened to be plum,erik-miş-ler-dir,rumor
82
+ erikmişimcesine,erik,as if i was plum,erik-miş-im-cesine,
83
+ erikmişsincesine,erik,as if you were plum,erik-miş-sin-cesine,
84
+ erikmişizcesine,erik,as if we were plum,erik-miş-iz-cesine,
85
+ erikmişsinizcesine,erik,as if you were plum,erik-miş-siniz-cesine,2nd person in plural
86
+ erikmişlercesine,erik,as if they were plum,erik-miş-ler-cesine,
87
+ erikler,erik,plums,erik-ler,
88
+ eriğim,erik,my plum / i am plum,erik-(i)m / erik-im,
89
+ eriksin,erik,you are plum,erik-sin,
90
+ erik,erik,plum / he/she/it is plum,erik / erik,
91
+ eriğiz,erik,we are plum,erik-iz,
92
+ eriksiniz,erik,you are plum,erik-siniz,2nd person in plural
93
+ erikler,erik,they are plum,erik-ler,
94
+ eriktir,erik,it is plum,erik-dir,assumption
95
+ eriktirler,erik,they are plum,erik-dir-ler,assumption
96
+ erikken,erik,while he/she/it was plum,erik-(i)ken,
97
+ kötüymüş,kötü,he/she/it is bad,kötü-(y)miş,rumor
98
+ yüz,yüz,face / hundred,yüz,double meaning
99
+ muş,muş,--,--,this is a suffix
100
+ ad,ad,name,ad,
101
+ soyad,soyad,surname,soyad,soy+ad = lineage+name
@@ -0,0 +1,522 @@
1
+ # coding: utf-8
2
+ require "spec_helper"
3
+ require "pry"
4
+ require "csv"
5
+
6
+ describe TurkishStemmer do
7
+
8
+ describe ".count_syllables" do
9
+ it "counts syllables correctly" do
10
+ expect(described_class.count_syllables("erikler")).to eq 3
11
+ expect(described_class.count_syllables("çocuklarımmış")).to eq 5
12
+ end
13
+ end
14
+
15
+ describe ".vowels" do
16
+ it "returns all vowels of a word" do
17
+ expect(described_class.vowels("kötüymüş")).to eq(%w(ö ü ü))
18
+ end
19
+ end
20
+
21
+ describe ".has_roundness?" do
22
+ context "when vowel is empty" do
23
+ it "has roundness" do
24
+ expect(described_class).to have_roundness(nil, "a")
25
+ end
26
+ end
27
+
28
+ context "when candidate is empty" do
29
+ it "has roundness" do
30
+ expect(described_class).to have_roundness("a", nil)
31
+ end
32
+ end
33
+
34
+ context "when an unrounded vowel is passed" do
35
+ let(:vowel) { described_class::UNROUNDED_VOWELS.chars.to_a.sample }
36
+
37
+ context "and candidate is an unrounded vowel too" do
38
+ let(:candidate) { described_class::UNROUNDED_VOWELS.chars.to_a.sample }
39
+
40
+ it "has roundness" do
41
+ expect(described_class).to have_roundness(vowel, candidate)
42
+ end
43
+ end
44
+
45
+ context "and candidate is not an unrounded vowel" do
46
+ let(:candidate) { described_class::ROUNDED_VOWELS.chars.to_a.sample }
47
+
48
+ it "does not have roundness" do
49
+ expect(described_class).not_to have_roundness(vowel, candidate)
50
+ end
51
+ end
52
+ end
53
+
54
+ context "when a rounded vowel is passed" do
55
+ let(:vowel) { described_class::ROUNDED_VOWELS.chars.to_a.sample }
56
+
57
+ context "and one of 'a', 'e', 'u' or 'ü' is a candidate" do
58
+ let(:candidate) { described_class::FOLLOWING_ROUNDED_VOWELS.chars.to_a.sample }
59
+
60
+ it "has roundness" do
61
+ expect(described_class).to have_roundness(vowel, candidate)
62
+ end
63
+ end
64
+
65
+ context "and candidate is 'o'" do
66
+ let(:candidate) { 'o' }
67
+
68
+ it "does not have roundness" do
69
+ expect(described_class).not_to have_roundness(vowel, candidate)
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ describe ".has_frontness?" do
76
+ context "when vowel is empty" do
77
+ it "has frontness" do
78
+ expect(described_class).to have_frontness(nil, "a")
79
+ end
80
+ end
81
+
82
+ context "when candidate is empty" do
83
+ it "has frontness" do
84
+ expect(described_class).to have_frontness("a", nil)
85
+ end
86
+ end
87
+
88
+ context "when a front vowel is passed" do
89
+ let(:vowel) { described_class::FRONT_VOWELS.chars.to_a.sample }
90
+
91
+ context "and candidate is a front vowel" do
92
+ let(:candidate) { described_class::FRONT_VOWELS.chars.to_a.sample }
93
+
94
+ it "has frontness" do
95
+ expect(described_class).to have_frontness(vowel, candidate)
96
+ end
97
+ end
98
+
99
+ context "and candidate is a back vowel" do
100
+ let(:candidate) { described_class::BACK_VOWELS.chars.to_a.sample }
101
+
102
+ it "does not have frontness" do
103
+ expect(described_class).not_to have_frontness(vowel, candidate)
104
+ end
105
+ end
106
+ end
107
+
108
+ context "when a back vowel is passed" do
109
+ let(:vowel) { described_class::BACK_VOWELS.chars.to_a.sample }
110
+
111
+ context "and candidate is a front vowel" do
112
+ let(:candidate) { described_class::FRONT_VOWELS.chars.to_a.sample }
113
+
114
+ it "does not have frontness" do
115
+ expect(described_class).not_to have_frontness(vowel, candidate)
116
+ end
117
+ end
118
+
119
+ context "and candidate is a back vowel" do
120
+ let(:candidate) { described_class::BACK_VOWELS.chars.to_a.sample }
121
+
122
+ it "has frontness" do
123
+ expect(described_class).to have_frontness(vowel, candidate)
124
+ end
125
+ end
126
+ end
127
+ end
128
+
129
+ describe ".has_vowel_harmony?" do
130
+ it "has vowel harmony for valid Turkish words" do
131
+ expect(described_class).to have_vowel_harmony("Türkiyedir")
132
+ expect(described_class).to have_vowel_harmony("kapıdır")
133
+ expect(described_class).to have_vowel_harmony("gündür")
134
+ expect(described_class).to have_vowel_harmony("paltodur")
135
+ end
136
+
137
+ it "does not have vowel harmony for loanwords" do
138
+ expect(described_class).not_to have_vowel_harmony("kürdan")
139
+ end
140
+
141
+ it "does not have vowel harmony for exceptions" do
142
+ expect(described_class).not_to have_vowel_harmony("anne")
143
+ expect(described_class).not_to have_vowel_harmony("kardeş")
144
+ end
145
+ end
146
+
147
+ describe ".affix_morphological_stripper" do
148
+ context "when states are empty" do
149
+ it "returns the word" do
150
+ expect(
151
+ described_class.
152
+ affix_morphological_stripper("kapıdır", suffixes: :test)).
153
+ to eq(["kapıdır"])
154
+ end
155
+ end
156
+
157
+ context "when suffixes are empty" do
158
+ it "return the word" do
159
+ expect(
160
+ described_class.
161
+ affix_morphological_stripper("kapıdır", states: :test)).
162
+ to eq(["kapıdır"])
163
+ end
164
+ end
165
+
166
+ context "when there exist states and suffixes" do
167
+ let(:states) {
168
+ described_class.
169
+ load_states_or_suffixes("spec/fixtures/simple_state.yml")
170
+ }
171
+
172
+ let(:suffixes) {
173
+ described_class.
174
+ load_states_or_suffixes("spec/fixtures/simple_suffix.yml")
175
+ }
176
+
177
+ it "generates pendings for the initial state" do
178
+ described_class.should_receive(:generate_pendings).with(:a,
179
+ "word", states, suffixes).and_call_original
180
+
181
+ described_class.affix_morphological_stripper("word",
182
+ states: states, suffixes: suffixes)
183
+ end
184
+ end
185
+
186
+ context "when a transition is valid" do
187
+ let(:states) {
188
+ described_class.
189
+ load_states_or_suffixes("spec/fixtures/simple_state.yml")
190
+ }
191
+
192
+ let(:suffixes) {
193
+ described_class.
194
+ load_states_or_suffixes("spec/fixtures/simple_suffix.yml")
195
+ }
196
+
197
+ context "and the transit state is a final state" do
198
+ it "removes similar pending transitions" do
199
+ described_class.should_receive(:mark_stem).with(
200
+ "guzelim", suffixes[:s1]).and_call_original
201
+
202
+ described_class.affix_morphological_stripper(
203
+ "guzelim", states: states, suffixes: suffixes)
204
+ end
205
+
206
+ context "with no other transitions" do
207
+ it "stems the word" do
208
+ expect(
209
+ described_class.
210
+ affix_morphological_stripper("guzelim",
211
+ states: states, suffixes: suffixes)).
212
+ to eq ["guzel"]
213
+ end
214
+ end
215
+
216
+ context "with other transitions" do
217
+ let(:states) {
218
+ described_class.load_states_or_suffixes("spec/fixtures/simple_state_02.yml")
219
+ }
220
+
221
+ it "adds more pendings to check" do
222
+ described_class.should_receive(:mark_stem).with("guzelim",
223
+ suffixes[:s1]).and_call_original
224
+
225
+ described_class.affix_morphological_stripper("guzelim",
226
+ states: states, suffixes: suffixes)
227
+ end
228
+ end
229
+ end
230
+ end
231
+
232
+ context "when one suffix matches correctly with a given word" do
233
+ it "does not compare other suffixes in the same transition" do
234
+ described_class.
235
+ should_receive(:mark_stem).
236
+ with(anything, anything).
237
+ # only for suffixes [sUnUz, nUz]
238
+ exactly(2).times.
239
+ and_call_original
240
+
241
+ puts described_class.
242
+ affix_morphological_stripper("taksicisiniz",
243
+ states: described_class::NOMINAL_VERB_STATES,
244
+ suffixes: described_class::NOMINAL_VERB_SUFFIXES)
245
+ end
246
+ end
247
+ end
248
+
249
+ describe ".stem" do
250
+ context "when input is single syllable" do
251
+ it "returns the input as is" do
252
+ expect(described_class.stem("ev")).to eq "ev"
253
+ end
254
+ end
255
+
256
+ context "when input has zero syllables - one consonant" do
257
+ it "returns the input as is" do
258
+ expect(described_class.stem("p")).to eq "p"
259
+ end
260
+ end
261
+ end
262
+
263
+ describe ".last_consonant!" do
264
+ context "when last consonant is among 'b', 'c', 'd' or 'ğ'" do
265
+ it "is replaced by 'p', 'ç', 't' or 'k'" do
266
+ expect(described_class.last_consonant!('kebab')).to eq('kebap')
267
+ expect(described_class.last_consonant!('kebac')).to eq('kebaç')
268
+ expect(described_class.last_consonant!('kebad')).to eq('kebat')
269
+ expect(described_class.last_consonant!('kebağ')).to eq('kebak')
270
+ end
271
+ end
272
+
273
+ context "when word belongs to protected words" do
274
+ it "does not replace last consonant" do
275
+ expect(described_class.last_consonant!('ad')).to eq('ad')
276
+ end
277
+ end
278
+ end
279
+
280
+ describe ".mark_stem" do
281
+ let(:suffix) do
282
+ {
283
+ "name" => "-dir",
284
+ "regex" => "dir",
285
+ "optional_letter" => false,
286
+ "check_harmony" => true
287
+ }
288
+ end
289
+
290
+ context "when suffix has harmony check on" do
291
+ before do
292
+ suffix["regex"] = "dan"
293
+ end
294
+
295
+ context "and word does not obey harmony rules" do
296
+ it "does not stem a word that does not obey harmony rules" do
297
+ expect(described_class.mark_stem("kürdan", suffix)).to eq(
298
+ { stem: false, word: "kürdan", suffix_applied: nil })
299
+ end
300
+
301
+ context "and word belongs to exceptions" do
302
+ before do
303
+ suffix["regex"] = "ler"
304
+ end
305
+ it "stems the word" do
306
+ expect(described_class.mark_stem("saatler", suffix)).to eq(
307
+ { stem: true, word: "saat", suffix_applied: "ler" })
308
+ end
309
+ end
310
+ end
311
+
312
+ end
313
+
314
+ context "when suffix has harmony check off" do
315
+ before do
316
+ suffix["regex"] = "dan"
317
+ suffix["check_harmony"] = false
318
+ end
319
+
320
+ it "stems a word that does not obey harmony rules" do
321
+ expect(
322
+ described_class.
323
+ mark_stem("kürdan", suffix)).
324
+ to eq({ stem: true, word: "kür", suffix_applied: "dan" })
325
+ end
326
+ end
327
+
328
+ context "when word matches suffix" do
329
+ it "partially stems a word" do
330
+ expect(
331
+ described_class.
332
+ mark_stem("Türkiyedir", suffix)).
333
+ to eq({ stem: true, word: "Türkiye", suffix_applied: "dir" })
334
+ end
335
+
336
+
337
+ context "when suffix has (y) as optional letter" do
338
+ before do
339
+ suffix["optional_letter"] = "y|y"
340
+ suffix["regex"] = "um"
341
+ end
342
+
343
+ context "and new word has valid last 'y' symbol" do
344
+ it "stems correctly and increases the suffix" do
345
+ expect(
346
+ described_class.
347
+ mark_stem("loyum", suffix)).
348
+ to eq({ stem: true, word: "lo", suffix_applied: "yum" })
349
+ end
350
+ end
351
+
352
+ context "and new word does not have valid last 'y' symbol" do
353
+ it "does not stem the word" do
354
+ expect(
355
+ described_class.
356
+ mark_stem("lotyum", suffix)).
357
+ to eq({ stem: false, word: "lotyum", suffix_applied: nil })
358
+ end
359
+ end
360
+ end
361
+ end
362
+ end
363
+
364
+ describe ".generate_pendings" do
365
+ let(:states) { described_class::NOMINAL_VERB_STATES }
366
+ let(:suffixes) { described_class::NOMINAL_VERB_SUFFIXES }
367
+
368
+ it "raises an error if state does not exist" do
369
+ expect {
370
+ described_class.
371
+ generate_pendings(1, "satıyorsunuz", states, suffixes)
372
+ }.to raise_error(ArgumentError, "State #{1} does not exist")
373
+ end
374
+
375
+ context "when state key does not have transitions" do
376
+ it "returns an empty array" do
377
+ expect(
378
+ described_class.
379
+ # :f state does not have transitions
380
+ generate_pendings(:f, "satıyorsunuz", states, suffixes)).
381
+ to eq []
382
+ end
383
+ end
384
+
385
+ context "when state key has transitions" do
386
+ it "returns an array of hashes for each transition" do
387
+ expect(
388
+ described_class.
389
+ generate_pendings(:a, "satıyorsunuz", states, suffixes).first.keys).
390
+ to eq [:suffix, :to_state, :from_state, :word, :mark]
391
+ end
392
+
393
+ it "sets :from_state key to current key state" do
394
+ expect(
395
+ described_class.
396
+ generate_pendings(:a, "satıyorsunuz", states, suffixes).first[:from_state]).
397
+ to eq :a
398
+ end
399
+ end
400
+ end
401
+
402
+ describe ".valid_optional_letter?" do
403
+ context "when last letter of the word is not equal to candidate" do
404
+ it "responds with [true,nil] - indicating that there was not match" do
405
+ expect(
406
+ described_class.valid_optional_letter?("test", "r")).
407
+ to eq([true, nil])
408
+ end
409
+ end
410
+
411
+ context "when there is a vowel match" do
412
+ context "and the previous char is a vowel" do
413
+ it "responds with false" do
414
+ expect(
415
+ described_class.
416
+ valid_optional_letter?("takcicii", "i")).
417
+ to eq([false, "i"])
418
+ end
419
+ end
420
+
421
+ context "and the previous char is a consonant" do
422
+ it "responds with true" do
423
+ expect(
424
+ described_class.
425
+ valid_optional_letter?("okula", "a")).
426
+ to eq([true, "a"])
427
+ end
428
+ end
429
+ end
430
+
431
+ context "when there is a consonant match" do
432
+ context "and the previous char is a vowel" do
433
+ it "responds with true" do
434
+ expect(
435
+ described_class.
436
+ valid_optional_letter?("litiy", "y")).
437
+ to eq([true, "y"])
438
+ end
439
+ end
440
+
441
+ context "and the previous char is a consonant" do
442
+ it "responds with true" do
443
+ expect(
444
+ described_class.
445
+ valid_optional_letter?("lity", "y")).
446
+ to eq([false, "y"])
447
+ end
448
+ end
449
+ end
450
+ end
451
+
452
+ describe ".stem_post_process" do
453
+ context "when input stream has words with last consonant replacements" do
454
+ it "replaces last consonant" do
455
+ expect(described_class.stem_post_process(["kebab"], "word")).to eq("kebap")
456
+ end
457
+ end
458
+
459
+ it "flattens and uniq results" do
460
+ expect(described_class.stem_post_process(["kitap",["kitap"]], "word")).to eq("kitap")
461
+ end
462
+
463
+ it "removes no syllables words" do
464
+ expect(described_class.stem_post_process(["kitap", "k"], "word")).to eq("kitap")
465
+ end
466
+
467
+ context "when multiple stem candidates exist" do
468
+ it "returns the shortest" do
469
+ pending("fix this")
470
+ expect(described_class.stem_post_process(["kitap", "kita", "kit"], "word")).to eq "kit"
471
+ end
472
+
473
+ context "and word belongs to selection list" do
474
+ it "returns this word" do
475
+ expect(described_class.stem_post_process(
476
+ ["su", "suy", "suyu"], "suyu")).to eq "su"
477
+ end
478
+ end
479
+ end
480
+ end
481
+
482
+ describe ".proceed_to_stem?" do
483
+ context "when word has 1 or less syllables" do
484
+ it "returns false" do
485
+ expect(described_class.proceed_to_stem?("kit")).not_to be
486
+ end
487
+ end
488
+
489
+ context "when word is nil" do
490
+ it "returns false" do
491
+ expect(described_class.proceed_to_stem?(nil)).not_to be
492
+ end
493
+ end
494
+
495
+ context "when word is empty" do
496
+ it "returns false" do
497
+ expect(described_class.proceed_to_stem?("")).not_to be
498
+ end
499
+ end
500
+
501
+ context "when word is among protected words" do
502
+ it "returns false" do
503
+ expect(described_class.proceed_to_stem?("soyad")).not_to be
504
+ end
505
+ end
506
+
507
+ context "when word contains non Turkish letters" do
508
+ it "returns false" do
509
+ expect(described_class.proceed_to_stem?("τελειο")).not_to be
510
+ expect(described_class.proceed_to_stem?("&aa")).not_to be
511
+ end
512
+ end
513
+ end
514
+
515
+ context "1:1 testing with paper" do
516
+ CSV.read("spec/support/fixtures.csv").each do |row|
517
+ it "stems #{row[0]} correct" do
518
+ expect(described_class.stem(row[0].downcase)).to eq row[1].downcase
519
+ end
520
+ end
521
+ end
522
+ end
@@ -0,0 +1,36 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'turkish_stemmer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "turkish_stemmer"
8
+ spec.version = TurkishStemmer::VERSION
9
+ spec.authors = ["Tasos Stathopoulos", "Giorgos Tsiftsis"]
10
+ spec.email = ["stathopa@skroutz.gr", "giorgos.tsiftsis@skroutz.gr"]
11
+ spec.summary = %q{A simple Turkish stemmer}
12
+ spec.description = %q{A simple Turkish stemmer}
13
+ spec.homepage = "https://github.com/skroutz/turkish_stemmer"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ #spec.files.reject! { Dir['benchmarks'] }
18
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
19
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_dependency "activesupport", [">= 3.0.0"]
23
+
24
+ spec.add_development_dependency "bundler", "~> 1.5"
25
+ spec.add_development_dependency "rake"
26
+ spec.add_development_dependency "rspec"
27
+ spec.add_development_dependency "ruby-stemmer"
28
+
29
+ if RUBY_ENGINE == "ruby"
30
+ if RUBY_VERSION >= "2.0.0"
31
+ spec.add_development_dependency "pry-byebug"
32
+ else
33
+ spec.add_development_dependency "pry"
34
+ end
35
+ end
36
+ end