turkish_stemmer 0.1.1 → 0.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/turkish_stemmer/version.rb +1 -1
- data/lib/turkish_stemmer.rb +12 -12
- data/spec/turkish_stemmer_spec.rb +10 -10
- data/turkish_stemmer.gemspec +1 -1
- metadata +4 -5
- data/lib/hash_extension.rb +0 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 4329d09e97cff22cb43a831f47e8f64ca0e5e0ae
|
4
|
+
data.tar.gz: 005c00062f4545e5169ad286cf9843cf11c9c194
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: b55ebf06d0c3431fc751993c6bb15c067f56fede1711cc304cffd74e55338259ad7a5251125f47b3c5fed0de2b1914e02aab55c61eb9b8ff81a51dafd7b16d15
|
7
|
+
data.tar.gz: 3048aff4dd75a1ab76a7e9c065e56be863a750ca881f6d0dcf205cc2c8d15b2247167fcb36a6b6e9fd2a7facf74e214b4ae8405b2cad2688c1ff664dbe1923da
|
data/lib/turkish_stemmer.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
# coding: utf-8
|
2
2
|
require "turkish_stemmer/version"
|
3
3
|
require "yaml"
|
4
|
-
require "
|
4
|
+
require "active_support/core_ext/hash"
|
5
5
|
|
6
6
|
# Please note that we use only lowercase letters for all methods. One should
|
7
7
|
# normalize input streams before using the `stem` method.
|
@@ -248,14 +248,14 @@ module TurkishStemmer
|
|
248
248
|
raise ArgumentError, "State #{key} does not exist" if (state = states[key]).nil?
|
249
249
|
mark = options[:mark] || false
|
250
250
|
|
251
|
-
matched_transitions = state[
|
252
|
-
word.match(/(#{suffixes[transition[
|
251
|
+
matched_transitions = state["transitions"].select do |transition|
|
252
|
+
word.match(/(#{suffixes[transition["suffix"]]["regex"]})$/)
|
253
253
|
end
|
254
254
|
|
255
255
|
matched_transitions.map do |transition|
|
256
256
|
{
|
257
|
-
suffix: transition[
|
258
|
-
to_state: transition[
|
257
|
+
suffix: transition["suffix"],
|
258
|
+
to_state: transition["state"],
|
259
259
|
from_state: key,
|
260
260
|
word: word,
|
261
261
|
mark: mark
|
@@ -270,18 +270,18 @@ module TurkishStemmer
|
|
270
270
|
# @return [Hash] a stem answer record
|
271
271
|
def mark_stem(word, suffix)
|
272
272
|
stem = !PROTECTED_WORDS.include?(word) &&
|
273
|
-
(suffix[
|
273
|
+
(suffix["check_harmony"] &&
|
274
274
|
(has_vowel_harmony?(word) || VOWEL_HARMONY_EXCEPTIONS.include?(word))) ||
|
275
|
-
!suffix[
|
275
|
+
!suffix["check_harmony"]
|
276
276
|
|
277
|
-
suffix_applied = suffix[
|
277
|
+
suffix_applied = suffix["regex"]
|
278
278
|
|
279
279
|
if stem && (match = word.match(/(#{suffix_applied})$/))
|
280
280
|
new_word = word.gsub(/(#{match.to_s})$/, '')
|
281
281
|
suffix_applied = match.to_s
|
282
282
|
|
283
|
-
if suffix[
|
284
|
-
answer, match = valid_optional_letter?(new_word, suffix[
|
283
|
+
if suffix["optional_letter"]
|
284
|
+
answer, match = valid_optional_letter?(new_word, suffix["optional_letter"])
|
285
285
|
|
286
286
|
if answer && match
|
287
287
|
new_word = new_word.chop
|
@@ -397,7 +397,7 @@ module TurkishStemmer
|
|
397
397
|
puts "Word: #{word} \nAnswer: #{answer} \nInfo: #{transition} \nSuffix: #{suffix}"
|
398
398
|
end
|
399
399
|
|
400
|
-
if to_state[
|
400
|
+
if to_state["final_state"] == true
|
401
401
|
# We have a valid transition here. It is safe to remove any pendings
|
402
402
|
# with the same signature current pending
|
403
403
|
remove_pendings_like!(transition, pendings)
|
@@ -405,7 +405,7 @@ module TurkishStemmer
|
|
405
405
|
|
406
406
|
stems.push answer[:word]
|
407
407
|
|
408
|
-
unless to_state[
|
408
|
+
unless to_state["transitions"].empty?
|
409
409
|
pendings.unshift(*generate_pendings(transition[:to_state], answer[:word], states, suffixes))
|
410
410
|
end
|
411
411
|
|
@@ -280,16 +280,16 @@ describe TurkishStemmer do
|
|
280
280
|
describe ".mark_stem" do
|
281
281
|
let(:suffix) do
|
282
282
|
{
|
283
|
-
name
|
284
|
-
regex
|
285
|
-
optional_letter
|
286
|
-
check_harmony
|
283
|
+
"name" => "-dir",
|
284
|
+
"regex" => "dir",
|
285
|
+
"optional_letter" => false,
|
286
|
+
"check_harmony" => true
|
287
287
|
}
|
288
288
|
end
|
289
289
|
|
290
290
|
context "when suffix has harmony check on" do
|
291
291
|
before do
|
292
|
-
suffix[
|
292
|
+
suffix["regex"] = "dan"
|
293
293
|
end
|
294
294
|
|
295
295
|
context "and word does not obey harmony rules" do
|
@@ -300,7 +300,7 @@ describe TurkishStemmer do
|
|
300
300
|
|
301
301
|
context "and word belongs to exceptions" do
|
302
302
|
before do
|
303
|
-
suffix[
|
303
|
+
suffix["regex"] = "ler"
|
304
304
|
end
|
305
305
|
it "stems the word" do
|
306
306
|
expect(described_class.mark_stem("saatler", suffix)).to eq(
|
@@ -313,8 +313,8 @@ describe TurkishStemmer do
|
|
313
313
|
|
314
314
|
context "when suffix has harmony check off" do
|
315
315
|
before do
|
316
|
-
suffix[
|
317
|
-
suffix[
|
316
|
+
suffix["regex"] = "dan"
|
317
|
+
suffix["check_harmony"] = false
|
318
318
|
end
|
319
319
|
|
320
320
|
it "stems a word that does not obey harmony rules" do
|
@@ -336,8 +336,8 @@ describe TurkishStemmer do
|
|
336
336
|
|
337
337
|
context "when suffix has (y) as optional letter" do
|
338
338
|
before do
|
339
|
-
suffix[
|
340
|
-
suffix[
|
339
|
+
suffix["optional_letter"] = "y|y"
|
340
|
+
suffix["regex"] = "um"
|
341
341
|
end
|
342
342
|
|
343
343
|
context "and new word has valid last 'y' symbol" do
|
data/turkish_stemmer.gemspec
CHANGED
@@ -18,7 +18,7 @@ Gem::Specification.new do |spec|
|
|
18
18
|
spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
19
19
|
spec.require_paths = ["lib"]
|
20
20
|
|
21
|
-
spec.add_dependency "
|
21
|
+
spec.add_dependency "activesupport", [">= 3.0.0"]
|
22
22
|
|
23
23
|
spec.add_development_dependency "bundler", "~> 1.5"
|
24
24
|
spec.add_development_dependency "rake"
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: turkish_stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tasos Stathopoulos
|
@@ -12,19 +12,19 @@ cert_chain: []
|
|
12
12
|
date: 2014-04-02 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
|
-
name:
|
15
|
+
name: activesupport
|
16
16
|
requirement: !ruby/object:Gem::Requirement
|
17
17
|
requirements:
|
18
18
|
- - ">="
|
19
19
|
- !ruby/object:Gem::Version
|
20
|
-
version:
|
20
|
+
version: 3.0.0
|
21
21
|
type: :runtime
|
22
22
|
prerelease: false
|
23
23
|
version_requirements: !ruby/object:Gem::Requirement
|
24
24
|
requirements:
|
25
25
|
- - ">="
|
26
26
|
- !ruby/object:Gem::Version
|
27
|
-
version:
|
27
|
+
version: 3.0.0
|
28
28
|
- !ruby/object:Gem::Dependency
|
29
29
|
name: bundler
|
30
30
|
requirement: !ruby/object:Gem::Requirement
|
@@ -119,7 +119,6 @@ files:
|
|
119
119
|
- config/noun_states.yml
|
120
120
|
- config/noun_suffixes.yml
|
121
121
|
- config/stemmer.yml
|
122
|
-
- lib/hash_extension.rb
|
123
122
|
- lib/turkish_stemmer.rb
|
124
123
|
- lib/turkish_stemmer/version.rb
|
125
124
|
- spec/fixtures/simple_state.yml
|