turkish_stemmer 0.1.2 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +0,0 @@
1
- module TurkishStemmer
2
- VERSION = "0.1.2"
3
- end
@@ -1,14 +0,0 @@
1
- a:
2
- transitions:
3
- - suffix: :s1
4
- state: :b
5
-
6
- - suffix: :s2
7
- state: :b
8
-
9
- final_state: true
10
-
11
- b:
12
- transitions: []
13
-
14
- final_state: true
@@ -1,21 +0,0 @@
1
- a:
2
- transitions:
3
- - suffix: :s1
4
- state: :b
5
-
6
- - suffix: :s2
7
- state: :b
8
-
9
- final_state: true
10
-
11
- b:
12
- transitions:
13
- - suffix: :s1
14
- state: :c
15
-
16
- final_state: true
17
-
18
- c:
19
- transitions: []
20
-
21
- final_state: true
@@ -1,7 +0,0 @@
1
- s1:
2
- name: "test"
3
- regex: "im"
4
-
5
- s2:
6
- name: "another"
7
- regex: "siniz"
@@ -1,7 +0,0 @@
1
- s1:
2
- name: "TEST"
3
- regex: "test"
4
-
5
- s2:
6
- name: "another"
7
- regex: "another"
data/spec/spec_helper.rb DELETED
@@ -1,19 +0,0 @@
1
- # This file was generated by the `rspec --init` command. Conventionally, all
2
- # specs live under a `spec` directory, which RSpec adds to the `$LOAD_PATH`.
3
- # Require this file using `require "spec_helper"` to ensure that it is only
4
- # loaded once.
5
- #
6
- # See http://rubydoc.info/gems/rspec-core/RSpec/Core/Configuration
7
- require 'turkish_stemmer'
8
-
9
- RSpec.configure do |config|
10
- config.treat_symbols_as_metadata_keys_with_true_values = true
11
- config.run_all_when_everything_filtered = true
12
- config.filter_run :focus
13
-
14
- # Run specs in random order to surface order dependencies. If you find an
15
- # order dependency and want to debug it, you can fix the order by providing
16
- # the seed, which is printed after each run.
17
- # --seed 1234
18
- config.order = 'random'
19
- end
@@ -1,101 +0,0 @@
1
- evimden,ev,from my house,ev-(i)m-den,
2
- göz,göz,eye,--,
3
- güzelmişsin,güzel,you were beautiful,güzel-miş-sin,rumor
4
- etkilerden,etki,from the effects,etki-ler-den,
5
- çocukmuş,çocuk,it was child,çocuk-miş,rumor
6
- kediymiş,kedi,it was cat,kedi-(y)miş,rumor
7
- balığım,balık,my fish,balık-(i)m,
8
- doktoruymuşsunuz,doktor,you were his/her/its doctor,doktor-i-(y)miş-siniz,rumor
9
- kalelerimizdekilerden,kale,the ones that are from our castle,kale-ler-(i)miz-de-ki-ler-den,
10
- çocuğuymuşumçasına,çocuk,as if i was his/her child ,çocuk-i-(y)miş-im-cesine,
11
- kedileriyle,kedi,with his/her/its cats,kedi-ler-i-(y)le,kedileri+ile
12
- çocuklarımmış,çocuk,they were my children,çocuk-ler-(i)m-miş,rumor
13
- kitabımızdı,kitap,it was our book,kitap-(i)miz-di,
14
- kelimelerin,kelime,"""your"" -or- ""of"" words",kelime-ler-(i)n -or- kelime-ler-in,both ways
15
- kayısısı,kayısı,his/her/its apricot,kayısı-(s)ı,
16
- eriğinin,erik,"of ""your"" -or- ""his/her/its"" plum",erik-(i)n-in -or- erik-i-(n)in,both ways
17
- eriğindeki,erik,"the one that is at ""your"" -or- ""his/her/its"" plum ",erik-(i)n-de-ki - or- erik-i-(n)de-ki,both ways
18
- eriğinden,erik,"from ""your"" -or- ""his/her/its"" plum",erik-(i)n-den -or- erik-i-(n)den,both ways
19
- eriğine,erik,"to ""your"" -or- ""his/her/its"" plum",erik-(i)n-e -or- erik-i-(n)e,both ways
20
- eriğinde,erik,"at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de -or- erik-i-(n)de,both ways
21
- kayısısına,kayısı,his/her/its apricot,kayısı-(s)ı-(n)a,
22
- kayısısında,kayısı,at his/her/its apricot,kayısı-(s)ı-(n)da,
23
- saatlerimiz,saat,our watches/hours,saat-ler-(i)miz,
24
- kalemimin,kalem,of my pencil,kalem-(i)m-in,
25
- ucu,uç,nib of...,uç-i,
26
- kalelerdekilerden,kale,from the ones that are at (the) castle,kale-ler-de-ki-ler-den,
27
- kalelerdekilerin,kale,of the ones that are at (the) castle,kale-ler-de-ki-ler-in,
28
- kalelerimizdekilerde,kale,at the ones that are at (the) castle,kale-ler-(i)miz-de-ki-ler-de,
29
- kaleninkinin,kale,of the one that belongs to (the) castle,kale-(n)in-ki-nin,
30
- kalemizinkinin,kale,of the one that belongs to our castle,kale-miz-(i)n-ki-(n)in,
31
- kalelerindeki,kale,"the one that is at ""their castle"" -or- ""his/her/its castles""",kale-leri-(n)de-ki -or- kale-ler-i-(n)de-ki,both ways
32
- erikleri,erik,"""their plum"" -or- ""his/her/its plums""",erik-leri -or- erik-ler-i,both ways
33
- erikler,erik,(the) plums,erik-ler,
34
- eriğim,erik,my plum,erik-(i)m,
35
- eriğimiz,erik,our plum,erik-(i)miz,
36
- eriğin,erik,your plum,erik-(i)n,
37
- eriğiniz,erik,your plum,erik-(i)niz,2nd person in plural
38
- eriği,erik,his/her/its plum,erik-i,
39
- eriğini,erik,"""your"" -or- ""his/her/its"" plum",erik-(i)n-i -or- erik-i-(n)i,both ways
40
- eriğinin,erik,"of ""your"" -or- ""his/her/its"" plum",erik-(i)n-in -or- erik-i-(n)in,both ways
41
- eriğe,erik,to (the) plum,erik-e,
42
- eriğine,erik,"to ""your"" -or- ""his/her/its"" plum",erik-(i)n-e -or- erik-i-(n)e,both ways
43
- eriklerine,erik,"to ""their plum"" -or- ""his/her/its plums""",erik-leri-(n)e -or- erik-ler-i-(n)e,both ways
44
- erikte,erik,at (the) plum,erik-de,
45
- eriğinde,erik,"at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de -or- erik-i-(n)de,both ways
46
- erikten,erik,from (the) plum,erik-den,
47
- eriğinden,erik,"from ""your"" -or- ""his/her/its"" plum",erik-(i)n-den -or- erik-i-(n)den,both ways
48
- eriğindeki,erik,"the one that is at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de-ki - or- erik-i-(n)de-ki,both ways
49
- eriğiyle,erik,with his/her/its plum,erik-i-(y)le,
50
- eriğinin,erik,"of ""your"" -or- ""his/her/its"" plum",erik-(i)n-in -or- erik-i-(n)in,both ways
51
- eriğindeki,erik,"the one that is at ""your"" -or- ""his/her/its"" plum",erik-(i)n-de-ki - or- erik-i-(n)de-ki,both ways
52
- eriğince,erik,"after ""your"" -or- ""his/her/its"" plum",erik-(i)n-ce -or- erik-i-(n)ce,both ways
53
- gülüm,gül,my rose,gül-(i)m,
54
- erikteki,erik,the one that is at (the) plum,erik-de-ki,
55
- eriktekilerden,erik,the ones that are from (the) plum,erik-de-ki-ler-den,
56
- eriklerdeki,erik,the ones that are at (the) plum,erik-ler-de-ki,
57
- kitabı,kitap,(the) book,kitap-i,
58
- ağacı,ağaç,(the) tree,ağaç-i,
59
- eriğim,erik,my plum / i am plum,erik-(i)m / erik-im,
60
- kayısıyım,kayısı,i am apricot,kayısı-(y)ım,
61
- eriksem,erik,if i am plum,erik-se-m,
62
- eriksen,erik,if you are plum,erik-se-n,
63
- erikse,erik,if he/she/it is plum,erik-se,
64
- erikseniz,erik,if you are plum,erik-se-niz,2nd person in plural
65
- erikseler,erik,if they are plum,erik-se-ler,
66
- erikti,erik,he/she/it was plum,erik-di,
67
- eriktiniz,erik,you were plum,erik-di-niz,2nd person in plural
68
- eriktiler,erik,they were plum,erik-di-ler,
69
- erikmiş,erik,it was plum,erik-miş,rumor
70
- erikmişçesine,erik,as if it was plum,erik-miş-cesine,
71
- erikmiştir,erik,it was plum,erik-miş-dir,rumor
72
- erikmişim,erik,i was plum,erik-miş-(i)m,rumor
73
- erikmişsin,erik,you were plum,erik-miş-sin,rumor
74
- erikmişsindir,erik,you happened to be plum,erik-miş-sin-dir,rumor
75
- erikmişimdir,erik,i happened to be plum,erik-miş-im-dir,rumor
76
- erikmişiz,erik,we were plum,erik-miş-iz,rumor
77
- erikmişizdir,erik,we happened to be plum,erik-miş-iz-dir,rumor
78
- erikmişsiniz,erik,you were plum,erik-miş-siniz,2nd person in plural + rumor
79
- erikmişsinizdir,erik,you happened to be plum,erik-miş-siniz-dir,2nd person in plural + rumor
80
- erikmişler,erik,they were plum,erik-miş-ler,rumor
81
- erikmişlerdir,erik,they happened to be plum,erik-miş-ler-dir,rumor
82
- erikmişimcesine,erik,as if i was plum,erik-miş-im-cesine,
83
- erikmişsincesine,erik,as if you were plum,erik-miş-sin-cesine,
84
- erikmişizcesine,erik,as if we were plum,erik-miş-iz-cesine,
85
- erikmişsinizcesine,erik,as if you were plum,erik-miş-siniz-cesine,2nd person in plural
86
- erikmişlercesine,erik,as if they were plum,erik-miş-ler-cesine,
87
- erikler,erik,plums,erik-ler,
88
- eriğim,erik,my plum / i am plum,erik-(i)m / erik-im,
89
- eriksin,erik,you are plum,erik-sin,
90
- erik,erik,plum / he/she/it is plum,erik / erik,
91
- eriğiz,erik,we are plum,erik-iz,
92
- eriksiniz,erik,you are plum,erik-siniz,2nd person in plural
93
- erikler,erik,they are plum,erik-ler,
94
- eriktir,erik,it is plum,erik-dir,assumption
95
- eriktirler,erik,they are plum,erik-dir-ler,assumption
96
- erikken,erik,while he/she/it was plum,erik-(i)ken,
97
- kötüymüş,kötü,he/she/it is bad,kötü-(y)miş,rumor
98
- yüz,yüz,face / hundred,yüz,double meaning
99
- muş,muş,--,--,this is a suffix
100
- ad,ad,name,ad,
101
- soyad,soyad,surname,soyad,soy+ad = lineage+name
@@ -1,522 +0,0 @@
1
- # coding: utf-8
2
- require "spec_helper"
3
- require "pry"
4
- require "csv"
5
-
6
- describe TurkishStemmer do
7
-
8
- describe ".count_syllables" do
9
- it "counts syllables correctly" do
10
- expect(described_class.count_syllables("erikler")).to eq 3
11
- expect(described_class.count_syllables("çocuklarımmış")).to eq 5
12
- end
13
- end
14
-
15
- describe ".vowels" do
16
- it "returns all vowels of a word" do
17
- expect(described_class.vowels("kötüymüş")).to eq(%w(ö ü ü))
18
- end
19
- end
20
-
21
- describe ".has_roundness?" do
22
- context "when vowel is empty" do
23
- it "has roundness" do
24
- expect(described_class).to have_roundness(nil, "a")
25
- end
26
- end
27
-
28
- context "when candidate is empty" do
29
- it "has roundness" do
30
- expect(described_class).to have_roundness("a", nil)
31
- end
32
- end
33
-
34
- context "when an unrounded vowel is passed" do
35
- let(:vowel) { described_class::UNROUNDED_VOWELS.chars.to_a.sample }
36
-
37
- context "and candidate is an unrounded vowel too" do
38
- let(:candidate) { described_class::UNROUNDED_VOWELS.chars.to_a.sample }
39
-
40
- it "has roundness" do
41
- expect(described_class).to have_roundness(vowel, candidate)
42
- end
43
- end
44
-
45
- context "and candidate is not an unrounded vowel" do
46
- let(:candidate) { described_class::ROUNDED_VOWELS.chars.to_a.sample }
47
-
48
- it "does not have roundness" do
49
- expect(described_class).not_to have_roundness(vowel, candidate)
50
- end
51
- end
52
- end
53
-
54
- context "when a rounded vowel is passed" do
55
- let(:vowel) { described_class::ROUNDED_VOWELS.chars.to_a.sample }
56
-
57
- context "and one of 'a', 'e', 'u' or 'ü' is a candidate" do
58
- let(:candidate) { described_class::FOLLOWING_ROUNDED_VOWELS.chars.to_a.sample }
59
-
60
- it "has roundness" do
61
- expect(described_class).to have_roundness(vowel, candidate)
62
- end
63
- end
64
-
65
- context "and candidate is 'o'" do
66
- let(:candidate) { 'o' }
67
-
68
- it "does not have roundness" do
69
- expect(described_class).not_to have_roundness(vowel, candidate)
70
- end
71
- end
72
- end
73
- end
74
-
75
- describe ".has_frontness?" do
76
- context "when vowel is empty" do
77
- it "has frontness" do
78
- expect(described_class).to have_frontness(nil, "a")
79
- end
80
- end
81
-
82
- context "when candidate is empty" do
83
- it "has frontness" do
84
- expect(described_class).to have_frontness("a", nil)
85
- end
86
- end
87
-
88
- context "when a front vowel is passed" do
89
- let(:vowel) { described_class::FRONT_VOWELS.chars.to_a.sample }
90
-
91
- context "and candidate is a front vowel" do
92
- let(:candidate) { described_class::FRONT_VOWELS.chars.to_a.sample }
93
-
94
- it "has frontness" do
95
- expect(described_class).to have_frontness(vowel, candidate)
96
- end
97
- end
98
-
99
- context "and candidate is a back vowel" do
100
- let(:candidate) { described_class::BACK_VOWELS.chars.to_a.sample }
101
-
102
- it "does not have frontness" do
103
- expect(described_class).not_to have_frontness(vowel, candidate)
104
- end
105
- end
106
- end
107
-
108
- context "when a back vowel is passed" do
109
- let(:vowel) { described_class::BACK_VOWELS.chars.to_a.sample }
110
-
111
- context "and candidate is a front vowel" do
112
- let(:candidate) { described_class::FRONT_VOWELS.chars.to_a.sample }
113
-
114
- it "does not have frontness" do
115
- expect(described_class).not_to have_frontness(vowel, candidate)
116
- end
117
- end
118
-
119
- context "and candidate is a back vowel" do
120
- let(:candidate) { described_class::BACK_VOWELS.chars.to_a.sample }
121
-
122
- it "has frontness" do
123
- expect(described_class).to have_frontness(vowel, candidate)
124
- end
125
- end
126
- end
127
- end
128
-
129
- describe ".has_vowel_harmony?" do
130
- it "has vowel harmony for valid Turkish words" do
131
- expect(described_class).to have_vowel_harmony("Türkiyedir")
132
- expect(described_class).to have_vowel_harmony("kapıdır")
133
- expect(described_class).to have_vowel_harmony("gündür")
134
- expect(described_class).to have_vowel_harmony("paltodur")
135
- end
136
-
137
- it "does not have vowel harmony for loanwords" do
138
- expect(described_class).not_to have_vowel_harmony("kürdan")
139
- end
140
-
141
- it "does not have vowel harmony for exceptions" do
142
- expect(described_class).not_to have_vowel_harmony("anne")
143
- expect(described_class).not_to have_vowel_harmony("kardeş")
144
- end
145
- end
146
-
147
- describe ".affix_morphological_stripper" do
148
- context "when states are empty" do
149
- it "returns the word" do
150
- expect(
151
- described_class.
152
- affix_morphological_stripper("kapıdır", suffixes: :test)).
153
- to eq(["kapıdır"])
154
- end
155
- end
156
-
157
- context "when suffixes are empty" do
158
- it "return the word" do
159
- expect(
160
- described_class.
161
- affix_morphological_stripper("kapıdır", states: :test)).
162
- to eq(["kapıdır"])
163
- end
164
- end
165
-
166
- context "when there exist states and suffixes" do
167
- let(:states) {
168
- described_class.
169
- load_states_or_suffixes("spec/fixtures/simple_state.yml")
170
- }
171
-
172
- let(:suffixes) {
173
- described_class.
174
- load_states_or_suffixes("spec/fixtures/simple_suffix.yml")
175
- }
176
-
177
- it "generates pendings for the initial state" do
178
- described_class.should_receive(:generate_pendings).with(:a,
179
- "word", states, suffixes).and_call_original
180
-
181
- described_class.affix_morphological_stripper("word",
182
- states: states, suffixes: suffixes)
183
- end
184
- end
185
-
186
- context "when a transition is valid" do
187
- let(:states) {
188
- described_class.
189
- load_states_or_suffixes("spec/fixtures/simple_state.yml")
190
- }
191
-
192
- let(:suffixes) {
193
- described_class.
194
- load_states_or_suffixes("spec/fixtures/simple_suffix.yml")
195
- }
196
-
197
- context "and the transit state is a final state" do
198
- it "removes similar pending transitions" do
199
- described_class.should_receive(:mark_stem).with(
200
- "guzelim", suffixes[:s1]).and_call_original
201
-
202
- described_class.affix_morphological_stripper(
203
- "guzelim", states: states, suffixes: suffixes)
204
- end
205
-
206
- context "with no other transitions" do
207
- it "stems the word" do
208
- expect(
209
- described_class.
210
- affix_morphological_stripper("guzelim",
211
- states: states, suffixes: suffixes)).
212
- to eq ["guzel"]
213
- end
214
- end
215
-
216
- context "with other transitions" do
217
- let(:states) {
218
- described_class.load_states_or_suffixes("spec/fixtures/simple_state_02.yml")
219
- }
220
-
221
- it "adds more pendings to check" do
222
- described_class.should_receive(:mark_stem).with("guzelim",
223
- suffixes[:s1]).and_call_original
224
-
225
- described_class.affix_morphological_stripper("guzelim",
226
- states: states, suffixes: suffixes)
227
- end
228
- end
229
- end
230
- end
231
-
232
- context "when one suffix matches correctly with a given word" do
233
- it "does not compare other suffixes in the same transition" do
234
- described_class.
235
- should_receive(:mark_stem).
236
- with(anything, anything).
237
- # only for suffixes [sUnUz, nUz]
238
- exactly(2).times.
239
- and_call_original
240
-
241
- puts described_class.
242
- affix_morphological_stripper("taksicisiniz",
243
- states: described_class::NOMINAL_VERB_STATES,
244
- suffixes: described_class::NOMINAL_VERB_SUFFIXES)
245
- end
246
- end
247
- end
248
-
249
- describe ".stem" do
250
- context "when input is single syllable" do
251
- it "returns the input as is" do
252
- expect(described_class.stem("ev")).to eq "ev"
253
- end
254
- end
255
-
256
- context "when input has zero syllables - one consonant" do
257
- it "returns the input as is" do
258
- expect(described_class.stem("p")).to eq "p"
259
- end
260
- end
261
- end
262
-
263
- describe ".last_consonant!" do
264
- context "when last consonant is among 'b', 'c', 'd' or 'ğ'" do
265
- it "is replaced by 'p', 'ç', 't' or 'k'" do
266
- expect(described_class.last_consonant!('kebab')).to eq('kebap')
267
- expect(described_class.last_consonant!('kebac')).to eq('kebaç')
268
- expect(described_class.last_consonant!('kebad')).to eq('kebat')
269
- expect(described_class.last_consonant!('kebağ')).to eq('kebak')
270
- end
271
- end
272
-
273
- context "when word belongs to protected words" do
274
- it "does not replace last consonant" do
275
- expect(described_class.last_consonant!('ad')).to eq('ad')
276
- end
277
- end
278
- end
279
-
280
- describe ".mark_stem" do
281
- let(:suffix) do
282
- {
283
- "name" => "-dir",
284
- "regex" => "dir",
285
- "optional_letter" => false,
286
- "check_harmony" => true
287
- }
288
- end
289
-
290
- context "when suffix has harmony check on" do
291
- before do
292
- suffix["regex"] = "dan"
293
- end
294
-
295
- context "and word does not obey harmony rules" do
296
- it "does not stem a word that does not obey harmony rules" do
297
- expect(described_class.mark_stem("kürdan", suffix)).to eq(
298
- { stem: false, word: "kürdan", suffix_applied: nil })
299
- end
300
-
301
- context "and word belongs to exceptions" do
302
- before do
303
- suffix["regex"] = "ler"
304
- end
305
- it "stems the word" do
306
- expect(described_class.mark_stem("saatler", suffix)).to eq(
307
- { stem: true, word: "saat", suffix_applied: "ler" })
308
- end
309
- end
310
- end
311
-
312
- end
313
-
314
- context "when suffix has harmony check off" do
315
- before do
316
- suffix["regex"] = "dan"
317
- suffix["check_harmony"] = false
318
- end
319
-
320
- it "stems a word that does not obey harmony rules" do
321
- expect(
322
- described_class.
323
- mark_stem("kürdan", suffix)).
324
- to eq({ stem: true, word: "kür", suffix_applied: "dan" })
325
- end
326
- end
327
-
328
- context "when word matches suffix" do
329
- it "partially stems a word" do
330
- expect(
331
- described_class.
332
- mark_stem("Türkiyedir", suffix)).
333
- to eq({ stem: true, word: "Türkiye", suffix_applied: "dir" })
334
- end
335
-
336
-
337
- context "when suffix has (y) as optional letter" do
338
- before do
339
- suffix["optional_letter"] = "y|y"
340
- suffix["regex"] = "um"
341
- end
342
-
343
- context "and new word has valid last 'y' symbol" do
344
- it "stems correctly and increases the suffix" do
345
- expect(
346
- described_class.
347
- mark_stem("loyum", suffix)).
348
- to eq({ stem: true, word: "lo", suffix_applied: "yum" })
349
- end
350
- end
351
-
352
- context "and new word does not have valid last 'y' symbol" do
353
- it "does not stem the word" do
354
- expect(
355
- described_class.
356
- mark_stem("lotyum", suffix)).
357
- to eq({ stem: false, word: "lotyum", suffix_applied: nil })
358
- end
359
- end
360
- end
361
- end
362
- end
363
-
364
- describe ".generate_pendings" do
365
- let(:states) { described_class::NOMINAL_VERB_STATES }
366
- let(:suffixes) { described_class::NOMINAL_VERB_SUFFIXES }
367
-
368
- it "raises an error if state does not exist" do
369
- expect {
370
- described_class.
371
- generate_pendings(1, "satıyorsunuz", states, suffixes)
372
- }.to raise_error(ArgumentError, "State #{1} does not exist")
373
- end
374
-
375
- context "when state key does not have transitions" do
376
- it "returns an empty array" do
377
- expect(
378
- described_class.
379
- # :f state does not have transitions
380
- generate_pendings(:f, "satıyorsunuz", states, suffixes)).
381
- to eq []
382
- end
383
- end
384
-
385
- context "when state key has transitions" do
386
- it "returns an array of hashes for each transition" do
387
- expect(
388
- described_class.
389
- generate_pendings(:a, "satıyorsunuz", states, suffixes).first.keys).
390
- to eq [:suffix, :to_state, :from_state, :word, :mark]
391
- end
392
-
393
- it "sets :from_state key to current key state" do
394
- expect(
395
- described_class.
396
- generate_pendings(:a, "satıyorsunuz", states, suffixes).first[:from_state]).
397
- to eq :a
398
- end
399
- end
400
- end
401
-
402
- describe ".valid_optional_letter?" do
403
- context "when last letter of the word is not equal to candidate" do
404
- it "responds with [true,nil] - indicating that there was not match" do
405
- expect(
406
- described_class.valid_optional_letter?("test", "r")).
407
- to eq([true, nil])
408
- end
409
- end
410
-
411
- context "when there is a vowel match" do
412
- context "and the previous char is a vowel" do
413
- it "responds with false" do
414
- expect(
415
- described_class.
416
- valid_optional_letter?("takcicii", "i")).
417
- to eq([false, "i"])
418
- end
419
- end
420
-
421
- context "and the previous char is a consonant" do
422
- it "responds with true" do
423
- expect(
424
- described_class.
425
- valid_optional_letter?("okula", "a")).
426
- to eq([true, "a"])
427
- end
428
- end
429
- end
430
-
431
- context "when there is a consonant match" do
432
- context "and the previous char is a vowel" do
433
- it "responds with true" do
434
- expect(
435
- described_class.
436
- valid_optional_letter?("litiy", "y")).
437
- to eq([true, "y"])
438
- end
439
- end
440
-
441
- context "and the previous char is a consonant" do
442
- it "responds with true" do
443
- expect(
444
- described_class.
445
- valid_optional_letter?("lity", "y")).
446
- to eq([false, "y"])
447
- end
448
- end
449
- end
450
- end
451
-
452
- describe ".stem_post_process" do
453
- context "when input stream has words with last consonant replacements" do
454
- it "replaces last consonant" do
455
- expect(described_class.stem_post_process(["kebab"], "word")).to eq("kebap")
456
- end
457
- end
458
-
459
- it "flattens and uniq results" do
460
- expect(described_class.stem_post_process(["kitap",["kitap"]], "word")).to eq("kitap")
461
- end
462
-
463
- it "removes no syllables words" do
464
- expect(described_class.stem_post_process(["kitap", "k"], "word")).to eq("kitap")
465
- end
466
-
467
- context "when multiple stem candidates exist" do
468
- it "returns the shortest" do
469
- pending("fix this")
470
- expect(described_class.stem_post_process(["kitap", "kita", "kit"], "word")).to eq "kit"
471
- end
472
-
473
- context "and word belongs to selection list" do
474
- it "returns this word" do
475
- expect(described_class.stem_post_process(
476
- ["su", "suy", "suyu"], "suyu")).to eq "su"
477
- end
478
- end
479
- end
480
- end
481
-
482
- describe ".proceed_to_stem?" do
483
- context "when word has 1 or less syllables" do
484
- it "returns false" do
485
- expect(described_class.proceed_to_stem?("kit")).not_to be
486
- end
487
- end
488
-
489
- context "when word is nil" do
490
- it "returns false" do
491
- expect(described_class.proceed_to_stem?(nil)).not_to be
492
- end
493
- end
494
-
495
- context "when word is empty" do
496
- it "returns false" do
497
- expect(described_class.proceed_to_stem?("")).not_to be
498
- end
499
- end
500
-
501
- context "when word is among protected words" do
502
- it "returns false" do
503
- expect(described_class.proceed_to_stem?("soyad")).not_to be
504
- end
505
- end
506
-
507
- context "when word contains non Turkish letters" do
508
- it "returns false" do
509
- expect(described_class.proceed_to_stem?("τελειο")).not_to be
510
- expect(described_class.proceed_to_stem?("&aa")).not_to be
511
- end
512
- end
513
- end
514
-
515
- context "1:1 testing with paper" do
516
- CSV.read("spec/support/fixtures.csv").each do |row|
517
- it "stems #{row[0]} correct" do
518
- expect(described_class.stem(row[0].downcase)).to eq row[1].downcase
519
- end
520
- end
521
- end
522
- end