turkish_stemmer 0.1.1 → 0.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/turkish_stemmer/version.rb +1 -1
- data/lib/turkish_stemmer.rb +12 -12
- data/spec/turkish_stemmer_spec.rb +10 -10
- data/turkish_stemmer.gemspec +1 -1
- metadata +4 -5
- data/lib/hash_extension.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4329d09e97cff22cb43a831f47e8f64ca0e5e0ae
|
4
|
+
data.tar.gz: 005c00062f4545e5169ad286cf9843cf11c9c194
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b55ebf06d0c3431fc751993c6bb15c067f56fede1711cc304cffd74e55338259ad7a5251125f47b3c5fed0de2b1914e02aab55c61eb9b8ff81a51dafd7b16d15
|
7
|
+
data.tar.gz: 3048aff4dd75a1ab76a7e9c065e56be863a750ca881f6d0dcf205cc2c8d15b2247167fcb36a6b6e9fd2a7facf74e214b4ae8405b2cad2688c1ff664dbe1923da
|
data/lib/turkish_stemmer.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require "turkish_stemmer/version"
|
3
3
|
require "yaml"
|
4
|
-
require "
|
4
|
+
require "active_support/core_ext/hash"
|
5
5
|
|
6
6
|
# Please note that we use only lowercase letters for all methods. One should
|
7
7
|
# normalize input streams before using the `stem` method.
|
@@ -248,14 +248,14 @@ module TurkishStemmer
|
|
248
248
|
raise ArgumentError, "State #{key} does not exist" if (state = states[key]).nil?
|
249
249
|
mark = options[:mark] || false
|
250
250
|
|
251
|
-
matched_transitions = state[
|
252
|
-
word.match(/(#{suffixes[transition[
|
251
|
+
matched_transitions = state["transitions"].select do |transition|
|
252
|
+
word.match(/(#{suffixes[transition["suffix"]]["regex"]})$/)
|
253
253
|
end
|
254
254
|
|
255
255
|
matched_transitions.map do |transition|
|
256
256
|
{
|
257
|
-
suffix: transition[
|
258
|
-
to_state: transition[
|
257
|
+
suffix: transition["suffix"],
|
258
|
+
to_state: transition["state"],
|
259
259
|
from_state: key,
|
260
260
|
word: word,
|
261
261
|
mark: mark
|
@@ -270,18 +270,18 @@ module TurkishStemmer
|
|
270
270
|
# @return [Hash] a stem answer record
|
271
271
|
def mark_stem(word, suffix)
|
272
272
|
stem = !PROTECTED_WORDS.include?(word) &&
|
273
|
-
(suffix[
|
273
|
+
(suffix["check_harmony"] &&
|
274
274
|
(has_vowel_harmony?(word) || VOWEL_HARMONY_EXCEPTIONS.include?(word))) ||
|
275
|
-
!suffix[
|
275
|
+
!suffix["check_harmony"]
|
276
276
|
|
277
|
-
suffix_applied = suffix[
|
277
|
+
suffix_applied = suffix["regex"]
|
278
278
|
|
279
279
|
if stem && (match = word.match(/(#{suffix_applied})$/))
|
280
280
|
new_word = word.gsub(/(#{match.to_s})$/, '')
|
281
281
|
suffix_applied = match.to_s
|
282
282
|
|
283
|
-
if suffix[
|
284
|
-
answer, match = valid_optional_letter?(new_word, suffix[
|
283
|
+
if suffix["optional_letter"]
|
284
|
+
answer, match = valid_optional_letter?(new_word, suffix["optional_letter"])
|
285
285
|
|
286
286
|
if answer && match
|
287
287
|
new_word = new_word.chop
|
@@ -397,7 +397,7 @@ module TurkishStemmer
|
|
397
397
|
puts "Word: #{word} \nAnswer: #{answer} \nInfo: #{transition} \nSuffix: #{suffix}"
|
398
398
|
end
|
399
399
|
|
400
|
-
if to_state[
|
400
|
+
if to_state["final_state"] == true
|
401
401
|
# We have a valid transition here. It is safe to remove any pendings
|
402
402
|
# with the same signature current pending
|
403
403
|
remove_pendings_like!(transition, pendings)
|
@@ -405,7 +405,7 @@ module TurkishStemmer
|
|
405
405
|
|
406
406
|
stems.push answer[:word]
|
407
407
|
|
408
|
-
unless to_state[
|
408
|
+
unless to_state["transitions"].empty?
|
409
409
|
pendings.unshift(*generate_pendings(transition[:to_state], answer[:word], states, suffixes))
|
410
410
|
end
|
411
411
|
|
@@ -280,16 +280,16 @@ describe TurkishStemmer do
|
|
280
280
|
describe ".mark_stem" do
|
281
281
|
let(:suffix) do
|
282
282
|
{
|
283
|
-
name
|
284
|
-
regex
|
285
|
-
optional_letter
|
286
|
-
check_harmony
|
283
|
+
"name" => "-dir",
|
284
|
+
"regex" => "dir",
|
285
|
+
"optional_letter" => false,
|
286
|
+
"check_harmony" => true
|
287
287
|
}
|
288
288
|
end
|
289
289
|
|
290
290
|
context "when suffix has harmony check on" do
|
291
291
|
before do
|
292
|
-
suffix[
|
292
|
+
suffix["regex"] = "dan"
|
293
293
|
end
|
294
294
|
|
295
295
|
context "and word does not obey harmony rules" do
|
@@ -300,7 +300,7 @@ describe TurkishStemmer do
|
|
300
300
|
|
301
301
|
context "and word belongs to exceptions" do
|
302
302
|
before do
|
303
|
-
suffix[
|
303
|
+
suffix["regex"] = "ler"
|
304
304
|
end
|
305
305
|
it "stems the word" do
|
306
306
|
expect(described_class.mark_stem("saatler", suffix)).to eq(
|
@@ -313,8 +313,8 @@ describe TurkishStemmer do
|
|
313
313
|
|
314
314
|
context "when suffix has harmony check off" do
|
315
315
|
before do
|
316
|
-
suffix[
|
317
|
-
suffix[
|
316
|
+
suffix["regex"] = "dan"
|
317
|
+
suffix["check_harmony"] = false
|
318
318
|
end
|
319
319
|
|
320
320
|
it "stems a word that does not obey harmony rules" do
|
@@ -336,8 +336,8 @@ describe TurkishStemmer do
|
|
336
336
|
|
337
337
|
context "when suffix has (y) as optional letter" do
|
338
338
|
before do
|
339
|
-
suffix[
|
340
|
-
suffix[
|
339
|
+
suffix["optional_letter"] = "y|y"
|
340
|
+
suffix["regex"] = "um"
|
341
341
|
end
|
342
342
|
|
343
343
|
context "and new word has valid last 'y' symbol" do
|
data/turkish_stemmer.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_dependency "
|
21
|
+
spec.add_dependency "activesupport", [">= 3.0.0"]
|
22
22
|
|
23
23
|
spec.add_development_dependency "bundler", "~> 1.5"
|
24
24
|
spec.add_development_dependency "rake"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turkish_stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tasos Stathopoulos
|
@@ -12,19 +12,19 @@ cert_chain: []
|
|
12
12
|
date: 2014-04-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: activesupport
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
18
|
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: 3.0.0
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: 3.0.0
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: bundler
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -119,7 +119,6 @@ files:
|
|
119
119
|
- config/noun_states.yml
|
120
120
|
- config/noun_suffixes.yml
|
121
121
|
- config/stemmer.yml
|
122
|
-
- lib/hash_extension.rb
|
123
122
|
- lib/turkish_stemmer.rb
|
124
123
|
- lib/turkish_stemmer/version.rb
|
125
124
|
- spec/fixtures/simple_state.yml
|