turkish_stemmer 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,19 @@
1
+ # This file was generated by the `rspec --init` command. Conventionally, all
2
+ # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
+ # Require this file using `require "spec_helper"` to ensure that it is only
4
+ # loaded once.
5
+ #
6
+ # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
+ require 'turkish_stemmer'
8
+
9
+ RSpec.configure do |config|
10
+ config.treat_symbols_as_metadata_keys_with_true_values = true
11
+ config.run_all_when_everything_filtered = true
12
+ config.filter_run :focus
13
+
14
+ # Run specs in random order to surface order dependencies. If you find an
15
+ # order dependency and want to debug it, you can fix the order by providing
16
+ # the seed, which is printed after each run.
17
+ # --seed 1234
18
+ config.order = 'random'
19
+ end
@@ -0,0 +1,101 @@
1
+ evimden,ev,from my house,ev-(i)m-den,
2
+ göz,göz,eye,--,
3
+ güzelmişsin,güzel,you were beautiful,güzel-miş-sin,rumor
4
+ etkilerden,etki,from the effects,etki-ler-den,
5
+ çocukmuş,çocuk,it was child,çocuk-miş,rumor
6
+ kediymiş,kedi,it was cat,kedi-(y)miş,rumor
7
+ balığım,balık,my fish,balık-(i)m,
8
+ doktoruymuşsunuz,doktor,you were his/her/its doctor,doktor-i-(y)miş-siniz,rumor
9
+ kalelerimizdekilerden,kale,the ones that are from our castle,kale-ler-(i)miz-de-ki-ler-den,
10
+ çocuğuymuşumçasına,çocuk,as if i was his/her child ,çocuk-i-(y)miş-im-cesine,
11
+ kedileriyle,kedi,with his/her/its cats,kedi-ler-i-(y)le,kedileri+ile
12
+ çocuklarımmış,çocuk,they were my children,çocuk-ler-(i)m-miş,rumor
13
+ kitabımızdı,kitap,it was our book,kitap-(i)miz-di,
14
+ kelimelerin,kelime,"""your"" -or- ""of"" words",kelime-ler-(i)n -or- kelime-ler-in,both ways
15
+ kayısısı,kayısı,his/her/its apricot,kayısı-(s)ı,
16
+ eriğinin,erik,"of ""your"" -or- ""his/her/its"" plum",erik-(i)n-in -or- erik-i-(n)in,both ways
17
+ eriğindeki,erik,"the one that is at ""your"" -or- ""his/her/its"" plum ",erik-(i)n-de-ki - or- erik-i-(n)de-ki,both ways
18
+ eriğinden,erik,"from ""your"" -or- ""his/her/its"" plum",erik-(i)n-den -or- erik-i-(n)den,both ways
19
+ eriğine,erik,"to ""your"" -or- ""his/her/its"" plum",erik-(i)n-e -or- erik-i-(n)e,both ways
20
+ eriğinde,erik,"at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de -or- erik-i-(n)de,both ways
21
+ kayısısına,kayısı,his/her/its apricot,kayısı-(s)ı-(n)a,
22
+ kayısısında,kayısı,at his/her/its apricot,kayısı-(s)ı-(n)da,
23
+ saatlerimiz,saat,our watches/hours,saat-ler-(i)miz,
24
+ kalemimin,kalem,of my pencil,kalem-(i)m-in,
25
+ ucu,uç,nib of...,uç-i,
26
+ kalelerdekilerden,kale,from the ones that are at (the) castle,kale-ler-de-ki-ler-den,
27
+ kalelerdekilerin,kale,of the ones that are at (the) castle,kale-ler-de-ki-ler-in,
28
+ kalelerimizdekilerde,kale,at the ones that are at (the) castle,kale-ler-(i)miz-de-ki-ler-de,
29
+ kaleninkinin,kale,of the one that belongs to (the) castle,kale-(n)in-ki-nin,
30
+ kalemizinkinin,kale,of the one that belongs to our castle,kale-miz-(i)n-ki-(n)in,
31
+ kalelerindeki,kale,"the one that is at ""their castle"" -or- ""his/her/its castles""",kale-leri-(n)de-ki -or- kale-ler-i-(n)de-ki,both ways
32
+ erikleri,erik,"""their plum"" -or- ""his/her/its plums""",erik-leri -or- erik-ler-i,both ways
33
+ erikler,erik,(the) plums,erik-ler,
34
+ eriğim,erik,my plum,erik-(i)m,
35
+ eriğimiz,erik,our plum,erik-(i)miz,
36
+ eriğin,erik,your plum,erik-(i)n,
37
+ eriğiniz,erik,your plum,erik-(i)niz,2nd person in plural
38
+ eriği,erik,his/her/its plum,erik-i,
39
+ eriğini,erik,"""your"" -or- ""his/her/its"" plum",erik-(i)n-i -or- erik-i-(n)i,both ways
40
+ eriğinin,erik,"of ""your"" -or- ""his/her/its"" plum",erik-(i)n-in -or- erik-i-(n)in,both ways
41
+ eriğe,erik,to (the) plum,erik-e,
42
+ eriğine,erik,"to ""your"" -or- ""his/her/its"" plum",erik-(i)n-e -or- erik-i-(n)e,both ways
43
+ eriklerine,erik,"to ""their plum"" -or- ""his/her/its plums""",erik-leri-(n)e -or- erik-ler-i-(n)e,both ways
44
+ erikte,erik,at (the) plum,erik-de,
45
+ eriğinde,erik,"at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de -or- erik-i-(n)de,both ways
46
+ erikten,erik,from (the) plum,erik-den,
47
+ eriğinden,erik,"from ""your"" -or- ""his/her/its"" plum",erik-(i)n-den -or- erik-i-(n)den,both ways
48
+ eriğindeki,erik,"the one that is at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de-ki - or- erik-i-(n)de-ki,both ways
49
+ eriğiyle,erik,with his/her/its plum,erik-i-(y)le,
50
+ eriğinin,erik,"of ""your"" -or- ""his/her/its"" plum",erik-(i)n-in -or- erik-i-(n)in,both ways
51
+ eriğindeki,erik,"the one that is at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de-ki - or- erik-i-(n)de-ki,both ways
52
+ eriğince,erik,"after ""your"" -or- ""his/her/its"" plum",erik-(i)n-ce -or- erik-i-(n)ce,both ways
53
+ gülüm,gül,my rose,gül-(i)m,
54
+ erikteki,erik,the one that is at (the) plum,erik-de-ki,
55
+ eriktekilerden,erik,the ones that are from (the) plum,erik-de-ki-ler-den,
56
+ eriklerdeki,erik,the ones that are at (the) plum,erik-ler-de-ki,
57
+ kitabı,kitap,(the) book,kitap-i,
58
+ ağacı,ağaç,(the) tree,ağaç-i,
59
+ eriğim,erik,my plum / i am plum,erik-(i)m / erik-im,
60
+ kayısıyım,kayısı,i am apricot,kayısı-(y)ım,
61
+ eriksem,erik,if i am plum,erik-se-m,
62
+ eriksen,erik,if you are plum,erik-se-n,
63
+ erikse,erik,if he/she/it is plum,erik-se,
64
+ erikseniz,erik,if you are plum,erik-se-niz,2nd person in plural
65
+ erikseler,erik,if they are plum,erik-se-ler,
66
+ erikti,erik,he/she/it was plum,erik-di,
67
+ eriktiniz,erik,you were plum,erik-di-niz,2nd person in plural
68
+ eriktiler,erik,they were plum,erik-di-ler,
69
+ erikmiş,erik,it was plum,erik-miş,rumor
70
+ erikmişçesine,erik,as if it was plum,erik-miş-cesine,
71
+ erikmiştir,erik,it was plum,erik-miş-dir,rumor
72
+ erikmişim,erik,i was plum,erik-miş-(i)m,rumor
73
+ erikmişsin,erik,you were plum,erik-miş-sin,rumor
74
+ erikmişsindir,erik,you happened to be plum,erik-miş-sin-dir,rumor
75
+ erikmişimdir,erik,i happened to be plum,erik-miş-im-dir,rumor
76
+ erikmişiz,erik,we were plum,erik-miş-iz,rumor
77
+ erikmişizdir,erik,we happened to be plum,erik-miş-iz-dir,rumor
78
+ erikmişsiniz,erik,you were plum,erik-miş-siniz,2nd person in plural + rumor
79
+ erikmişsinizdir,erik,you happened to be plum,erik-miş-siniz-dir,2nd person in plural + rumor
80
+ erikmişler,erik,they were plum,erik-miş-ler,rumor
81
+ erikmişlerdir,erik,they happened to be plum,erik-miş-ler-dir,rumor
82
+ erikmişimcesine,erik,as if i was plum,erik-miş-im-cesine,
83
+ erikmişsincesine,erik,as if you were plum,erik-miş-sin-cesine,
84
+ erikmişizcesine,erik,as if we were plum,erik-miş-iz-cesine,
85
+ erikmişsinizcesine,erik,as if you were plum,erik-miş-siniz-cesine,2nd person in plural
86
+ erikmişlercesine,erik,as if they were plum,erik-miş-ler-cesine,
87
+ erikler,erik,plums,erik-ler,
88
+ eriğim,erik,my plum / i am plum,erik-(i)m / erik-im,
89
+ eriksin,erik,you are plum,erik-sin,
90
+ erik,erik,plum / he/she/it is plum,erik / erik,
91
+ eriğiz,erik,we are plum,erik-iz,
92
+ eriksiniz,erik,you are plum,erik-siniz,2nd person in plural
93
+ erikler,erik,they are plum,erik-ler,
94
+ eriktir,erik,it is plum,erik-dir,assumption
95
+ eriktirler,erik,they are plum,erik-dir-ler,assumption
96
+ erikken,erik,while he/she/it was plum,erik-(i)ken,
97
+ kötüymüş,kötü,he/she/it is bad,kötü-(y)miş,rumor
98
+ yüz,yüz,face / hundred,yüz,double meaning
99
+ muş,muş,--,--,this is a suffix
100
+ ad,ad,name,ad,
101
+ soyad,soyad,surname,soyad,soy+ad = lineage+name
@@ -0,0 +1,522 @@
1
+ # coding: utf-8
2
+ require "spec_helper"
3
+ require "pry"
4
+ require "csv"
5
+
6
+ describe TurkishStemmer do
7
+
8
+ describe ".count_syllables" do
9
+ it "counts syllables correctly" do
10
+ expect(described_class.count_syllables("erikler")).to eq 3
11
+ expect(described_class.count_syllables("çocuklarımmış")).to eq 5
12
+ end
13
+ end
14
+
15
+ describe ".vowels" do
16
+ it "returns all vowels of a word" do
17
+ expect(described_class.vowels("kötüymüş")).to eq(%w(ö ü ü))
18
+ end
19
+ end
20
+
21
+ describe ".has_roundness?" do
22
+ context "when vowel is empty" do
23
+ it "has roundness" do
24
+ expect(described_class).to have_roundness(nil, "a")
25
+ end
26
+ end
27
+
28
+ context "when candidate is empty" do
29
+ it "has roundness" do
30
+ expect(described_class).to have_roundness("a", nil)
31
+ end
32
+ end
33
+
34
+ context "when an unrounded vowel is passed" do
35
+ let(:vowel) { described_class::UNROUNDED_VOWELS.chars.to_a.sample }
36
+
37
+ context "and candidate is an unrounded vowel too" do
38
+ let(:candidate) { described_class::UNROUNDED_VOWELS.chars.to_a.sample }
39
+
40
+ it "has roundness" do
41
+ expect(described_class).to have_roundness(vowel, candidate)
42
+ end
43
+ end
44
+
45
+ context "and candidate is not an unrounded vowel" do
46
+ let(:candidate) { described_class::ROUNDED_VOWELS.chars.to_a.sample }
47
+
48
+ it "does not have roundness" do
49
+ expect(described_class).not_to have_roundness(vowel, candidate)
50
+ end
51
+ end
52
+ end
53
+
54
+ context "when a rounded vowel is passed" do
55
+ let(:vowel) { described_class::ROUNDED_VOWELS.chars.to_a.sample }
56
+
57
+ context "and one of 'a', 'e', 'u' or 'ü' is a candidate" do
58
+ let(:candidate) { described_class::FOLLOWING_ROUNDED_VOWELS.chars.to_a.sample }
59
+
60
+ it "has roundness" do
61
+ expect(described_class).to have_roundness(vowel, candidate)
62
+ end
63
+ end
64
+
65
+ context "and candidate is 'o'" do
66
+ let(:candidate) { 'o' }
67
+
68
+ it "does not have roundness" do
69
+ expect(described_class).not_to have_roundness(vowel, candidate)
70
+ end
71
+ end
72
+ end
73
+ end
74
+
75
+ describe ".has_frontness?" do
76
+ context "when vowel is empty" do
77
+ it "has frontness" do
78
+ expect(described_class).to have_frontness(nil, "a")
79
+ end
80
+ end
81
+
82
+ context "when candidate is empty" do
83
+ it "has frontness" do
84
+ expect(described_class).to have_frontness("a", nil)
85
+ end
86
+ end
87
+
88
+ context "when a front vowel is passed" do
89
+ let(:vowel) { described_class::FRONT_VOWELS.chars.to_a.sample }
90
+
91
+ context "and candidate is a front vowel" do
92
+ let(:candidate) { described_class::FRONT_VOWELS.chars.to_a.sample }
93
+
94
+ it "has frontness" do
95
+ expect(described_class).to have_frontness(vowel, candidate)
96
+ end
97
+ end
98
+
99
+ context "and candidate is a back vowel" do
100
+ let(:candidate) { described_class::BACK_VOWELS.chars.to_a.sample }
101
+
102
+ it "does not have frontness" do
103
+ expect(described_class).not_to have_frontness(vowel, candidate)
104
+ end
105
+ end
106
+ end
107
+
108
+ context "when a back vowel is passed" do
109
+ let(:vowel) { described_class::BACK_VOWELS.chars.to_a.sample }
110
+
111
+ context "and candidate is a front vowel" do
112
+ let(:candidate) { described_class::FRONT_VOWELS.chars.to_a.sample }
113
+
114
+ it "does not have frontness" do
115
+ expect(described_class).not_to have_frontness(vowel, candidate)
116
+ end
117
+ end
118
+
119
+ context "and candidate is a back vowel" do
120
+ let(:candidate) { described_class::BACK_VOWELS.chars.to_a.sample }
121
+
122
+ it "has frontness" do
123
+ expect(described_class).to have_frontness(vowel, candidate)
124
+ end
125
+ end
126
+ end
127
+ end
128
+
129
+ describe ".has_vowel_harmony?" do
130
+ it "has vowel harmony for valid Turkish words" do
131
+ expect(described_class).to have_vowel_harmony("Türkiyedir")
132
+ expect(described_class).to have_vowel_harmony("kapıdır")
133
+ expect(described_class).to have_vowel_harmony("gündür")
134
+ expect(described_class).to have_vowel_harmony("paltodur")
135
+ end
136
+
137
+ it "does not have vowel harmony for loanwords" do
138
+ expect(described_class).not_to have_vowel_harmony("kürdan")
139
+ end
140
+
141
+ it "does not have vowel harmony for exceptions" do
142
+ expect(described_class).not_to have_vowel_harmony("anne")
143
+ expect(described_class).not_to have_vowel_harmony("kardeş")
144
+ end
145
+ end
146
+
147
+ describe ".affix_morphological_stripper" do
148
+ context "when states are empty" do
149
+ it "returns the word" do
150
+ expect(
151
+ described_class.
152
+ affix_morphological_stripper("kapıdır", suffixes: :test)).
153
+ to eq(["kapıdır"])
154
+ end
155
+ end
156
+
157
+ context "when suffixes are empty" do
158
+ it "return the word" do
159
+ expect(
160
+ described_class.
161
+ affix_morphological_stripper("kapıdır", states: :test)).
162
+ to eq(["kapıdır"])
163
+ end
164
+ end
165
+
166
+ context "when there exist states and suffixes" do
167
+ let(:states) {
168
+ described_class.
169
+ load_states_or_suffixes("spec/fixtures/simple_state.yml")
170
+ }
171
+
172
+ let(:suffixes) {
173
+ described_class.
174
+ load_states_or_suffixes("spec/fixtures/simple_suffix.yml")
175
+ }
176
+
177
+ it "generates pendings for the initial state" do
178
+ described_class.should_receive(:generate_pendings).with(:a,
179
+ "word", states, suffixes).and_call_original
180
+
181
+ described_class.affix_morphological_stripper("word",
182
+ states: states, suffixes: suffixes)
183
+ end
184
+ end
185
+
186
+ context "when a transition is valid" do
187
+ let(:states) {
188
+ described_class.
189
+ load_states_or_suffixes("spec/fixtures/simple_state.yml")
190
+ }
191
+
192
+ let(:suffixes) {
193
+ described_class.
194
+ load_states_or_suffixes("spec/fixtures/simple_suffix.yml")
195
+ }
196
+
197
+ context "and the transit state is a final state" do
198
+ it "removes similar pending transitions" do
199
+ described_class.should_receive(:mark_stem).with(
200
+ "guzelim", suffixes[:s1]).and_call_original
201
+
202
+ described_class.affix_morphological_stripper(
203
+ "guzelim", states: states, suffixes: suffixes)
204
+ end
205
+
206
+ context "with no other transitions" do
207
+ it "stems the word" do
208
+ expect(
209
+ described_class.
210
+ affix_morphological_stripper("guzelim",
211
+ states: states, suffixes: suffixes)).
212
+ to eq ["guzel"]
213
+ end
214
+ end
215
+
216
+ context "with other transitions" do
217
+ let(:states) {
218
+ described_class.load_states_or_suffixes("spec/fixtures/simple_state_02.yml")
219
+ }
220
+
221
+ it "adds more pendings to check" do
222
+ described_class.should_receive(:mark_stem).with("guzelim",
223
+ suffixes[:s1]).and_call_original
224
+
225
+ described_class.affix_morphological_stripper("guzelim",
226
+ states: states, suffixes: suffixes)
227
+ end
228
+ end
229
+ end
230
+ end
231
+
232
+ context "when one suffix matches correctly with a given word" do
233
+ it "does not compare other suffixes in the same transition" do
234
+ described_class.
235
+ should_receive(:mark_stem).
236
+ with(anything, anything).
237
+ # only for suffixes [sUnUz, nUz]
238
+ exactly(2).times.
239
+ and_call_original
240
+
241
+ puts described_class.
242
+ affix_morphological_stripper("taksicisiniz",
243
+ states: described_class::NOMINAL_VERB_STATES,
244
+ suffixes: described_class::NOMINAL_VERB_SUFFIXES)
245
+ end
246
+ end
247
+ end
248
+
249
+ describe ".stem" do
250
+ context "when input is single syllable" do
251
+ it "returns the input as is" do
252
+ expect(described_class.stem("ev")).to eq "ev"
253
+ end
254
+ end
255
+
256
+ context "when input has zero syllables - one consonant" do
257
+ it "returns the input as is" do
258
+ expect(described_class.stem("p")).to eq "p"
259
+ end
260
+ end
261
+ end
262
+
263
+ describe ".last_consonant!" do
264
+ context "when last consonant is among 'b', 'c', 'd' or 'ğ'" do
265
+ it "is replaced by 'p', 'ç', 't' or 'k'" do
266
+ expect(described_class.last_consonant!('kebab')).to eq('kebap')
267
+ expect(described_class.last_consonant!('kebac')).to eq('kebaç')
268
+ expect(described_class.last_consonant!('kebad')).to eq('kebat')
269
+ expect(described_class.last_consonant!('kebağ')).to eq('kebak')
270
+ end
271
+ end
272
+
273
+ context "when word belongs to protected words" do
274
+ it "does not replace last consonant" do
275
+ expect(described_class.last_consonant!('ad')).to eq('ad')
276
+ end
277
+ end
278
+ end
279
+
280
+ describe ".mark_stem" do
281
+ let(:suffix) do
282
+ {
283
+ name: "-dir",
284
+ regex: "dir",
285
+ optional_letter: false,
286
+ check_harmony: true
287
+ }
288
+ end
289
+
290
+ context "when suffix has harmony check on" do
291
+ before do
292
+ suffix[:regex] = "dan"
293
+ end
294
+
295
+ context "and word does not obey harmony rules" do
296
+ it "does not stem a word that does not obey harmony rules" do
297
+ expect(described_class.mark_stem("kürdan", suffix)).to eq(
298
+ { stem: false, word: "kürdan", suffix_applied: nil })
299
+ end
300
+
301
+ context "and word belongs to exceptions" do
302
+ before do
303
+ suffix[:regex] = "ler"
304
+ end
305
+ it "stems the word" do
306
+ expect(described_class.mark_stem("saatler", suffix)).to eq(
307
+ { stem: true, word: "saat", suffix_applied: "ler" })
308
+ end
309
+ end
310
+ end
311
+
312
+ end
313
+
314
+ context "when suffix has harmony check off" do
315
+ before do
316
+ suffix[:regex] = "dan"
317
+ suffix[:check_harmony] = false
318
+ end
319
+
320
+ it "stems a word that does not obey harmony rules" do
321
+ expect(
322
+ described_class.
323
+ mark_stem("kürdan", suffix)).
324
+ to eq({ stem: true, word: "kür", suffix_applied: "dan" })
325
+ end
326
+ end
327
+
328
+ context "when word matches suffix" do
329
+ it "partially stems a word" do
330
+ expect(
331
+ described_class.
332
+ mark_stem("Türkiyedir", suffix)).
333
+ to eq({ stem: true, word: "Türkiye", suffix_applied: "dir" })
334
+ end
335
+
336
+
337
+ context "when suffix has (y) as optional letter" do
338
+ before do
339
+ suffix[:optional_letter] = "y|y"
340
+ suffix[:regex] = "um"
341
+ end
342
+
343
+ context "and new word has valid last 'y' symbol" do
344
+ it "stems correctly and increases the suffix" do
345
+ expect(
346
+ described_class.
347
+ mark_stem("loyum", suffix)).
348
+ to eq({ stem: true, word: "lo", suffix_applied: "yum" })
349
+ end
350
+ end
351
+
352
+ context "and new word does not have valid last 'y' symbol" do
353
+ it "does not stem the word" do
354
+ expect(
355
+ described_class.
356
+ mark_stem("lotyum", suffix)).
357
+ to eq({ stem: false, word: "lotyum", suffix_applied: nil })
358
+ end
359
+ end
360
+ end
361
+ end
362
+ end
363
+
364
+ describe ".generate_pendings" do
365
+ let(:states) { described_class::NOMINAL_VERB_STATES }
366
+ let(:suffixes) { described_class::NOMINAL_VERB_SUFFIXES }
367
+
368
+ it "raises an error if state does not exist" do
369
+ expect {
370
+ described_class.
371
+ generate_pendings(1, "satıyorsunuz", states, suffixes)
372
+ }.to raise_error(ArgumentError, "State #{1} does not exist")
373
+ end
374
+
375
+ context "when state key does not have transitions" do
376
+ it "returns an empty array" do
377
+ expect(
378
+ described_class.
379
+ # :f state does not have transitions
380
+ generate_pendings(:f, "satıyorsunuz", states, suffixes)).
381
+ to eq []
382
+ end
383
+ end
384
+
385
+ context "when state key has transitions" do
386
+ it "returns an array of hashes for each transition" do
387
+ expect(
388
+ described_class.
389
+ generate_pendings(:a, "satıyorsunuz", states, suffixes).first.keys).
390
+ to eq [:suffix, :to_state, :from_state, :word, :mark]
391
+ end
392
+
393
+ it "sets :from_state key to current key state" do
394
+ expect(
395
+ described_class.
396
+ generate_pendings(:a, "satıyorsunuz", states, suffixes).first[:from_state]).
397
+ to eq :a
398
+ end
399
+ end
400
+ end
401
+
402
+ describe ".valid_optional_letter?" do
403
+ context "when last letter of the word is not equal to candidate" do
404
+ it "responds with [true,nil] - indicating that there was not match" do
405
+ expect(
406
+ described_class.valid_optional_letter?("test", "r")).
407
+ to eq([true, nil])
408
+ end
409
+ end
410
+
411
+ context "when there is a vowel match" do
412
+ context "and the previous char is a vowel" do
413
+ it "responds with false" do
414
+ expect(
415
+ described_class.
416
+ valid_optional_letter?("takcicii", "i")).
417
+ to eq([false, "i"])
418
+ end
419
+ end
420
+
421
+ context "and the previous char is a consonant" do
422
+ it "responds with true" do
423
+ expect(
424
+ described_class.
425
+ valid_optional_letter?("okula", "a")).
426
+ to eq([true, "a"])
427
+ end
428
+ end
429
+ end
430
+
431
+ context "when there is a consonant match" do
432
+ context "and the previous char is a vowel" do
433
+ it "responds with true" do
434
+ expect(
435
+ described_class.
436
+ valid_optional_letter?("litiy", "y")).
437
+ to eq([true, "y"])
438
+ end
439
+ end
440
+
441
+ context "and the previous char is a consonant" do
442
+ it "responds with true" do
443
+ expect(
444
+ described_class.
445
+ valid_optional_letter?("lity", "y")).
446
+ to eq([false, "y"])
447
+ end
448
+ end
449
+ end
450
+ end
451
+
452
+ describe ".stem_post_process" do
453
+ context "when input stream has words with last consonant replacements" do
454
+ it "replaces last consonant" do
455
+ expect(described_class.stem_post_process(["kebab"], "word")).to eq("kebap")
456
+ end
457
+ end
458
+
459
+ it "flattens and uniq results" do
460
+ expect(described_class.stem_post_process(["kitap",["kitap"]], "word")).to eq("kitap")
461
+ end
462
+
463
+ it "removes no syllables words" do
464
+ expect(described_class.stem_post_process(["kitap", "k"], "word")).to eq("kitap")
465
+ end
466
+
467
+ context "when multiple stem candidates exist" do
468
+ it "returns the shortest" do
469
+ pending("fix this")
470
+ expect(described_class.stem_post_process(["kitap", "kita", "kit"], "word")).to eq "kit"
471
+ end
472
+
473
+ context "and word belongs to selection list" do
474
+ it "returns this word" do
475
+ expect(described_class.stem_post_process(
476
+ ["su", "suy", "suyu"], "suyu")).to eq "su"
477
+ end
478
+ end
479
+ end
480
+ end
481
+
482
+ describe ".proceed_to_stem?" do
483
+ context "when word has 1 or less syllables" do
484
+ it "returns false" do
485
+ expect(described_class.proceed_to_stem?("kit")).not_to be
486
+ end
487
+ end
488
+
489
+ context "when word is nil" do
490
+ it "returns false" do
491
+ expect(described_class.proceed_to_stem?(nil)).not_to be
492
+ end
493
+ end
494
+
495
+ context "when word is empty" do
496
+ it "returns false" do
497
+ expect(described_class.proceed_to_stem?("")).not_to be
498
+ end
499
+ end
500
+
501
+ context "when word is among protected words" do
502
+ it "returns false" do
503
+ expect(described_class.proceed_to_stem?("soyad")).not_to be
504
+ end
505
+ end
506
+
507
+ context "when word contains non Turkish letters" do
508
+ it "returns false" do
509
+ expect(described_class.proceed_to_stem?("τελειο")).not_to be
510
+ expect(described_class.proceed_to_stem?("&aa")).not_to be
511
+ end
512
+ end
513
+ end
514
+
515
+ context "1:1 testing with paper" do
516
+ CSV.read("spec/support/fixtures.csv").each do |row|
517
+ it "stems #{row[0]} correct" do
518
+ expect(described_class.stem(row[0].downcase)).to eq row[1].downcase
519
+ end
520
+ end
521
+ end
522
+ end
@@ -0,0 +1,35 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'turkish_stemmer/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "turkish_stemmer"
8
+ spec.version = TurkishStemmer::VERSION
9
+ spec.authors = ["Tasos Stathopoulos", "Giorgos Tsiftsis"]
10
+ spec.email = ["stathopa@skroutz.gr", "giorgos.tsiftsis@skroutz.gr"]
11
+ spec.summary = %q{A simple Turkish stemmer}
12
+ spec.description = %q{A simple Turkish stemmer}
13
+ spec.homepage = "https://gitlab.skroutz.gr/turkish_stemmer"
14
+ spec.license = "MIT"
15
+
16
+ spec.files = `git ls-files`.split($/)
17
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
18
+ spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
19
+ spec.require_paths = ["lib"]
20
+
21
+ spec.add_dependency "hashie"
22
+
23
+ spec.add_development_dependency "bundler", "~> 1.5"
24
+ spec.add_development_dependency "rake"
25
+ spec.add_development_dependency "rspec"
26
+ spec.add_development_dependency "ruby-stemmer"
27
+
28
+ if RUBY_ENGINE == "ruby"
29
+ if RUBY_VERSION >= "2.0.0"
30
+ spec.add_development_dependency "pry-byebug"
31
+ else
32
+ spec.add_development_dependency "pry"
33
+ end
34
+ end
35
+ end