sastrawi 0.1.0.pre → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -1
- data/.travis.yml +7 -5
- data/CONTRIBUTING.md +22 -0
- data/Gemfile +0 -0
- data/LICENSE.txt +1 -1
- data/README.md +53 -19
- data/Rakefile +2 -2
- data/_config.yml +1 -0
- data/bin/sastrawi +24 -0
- data/data/{kata-dasar.txt → base-word.txt} +0 -0
- data/lib/sastrawi.rb +1 -9
- data/lib/sastrawi/dictionary/array_dictionary.rb +36 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +2 -2
- data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +4 -0
- data/lib/sastrawi/stemmer/cache/array_cache.rb +2 -2
- data/lib/sastrawi/stemmer/cached_stemmer.rb +1 -1
- data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +5 -0
- data/lib/sastrawi/stemmer/context/context.rb +28 -7
- data/lib/sastrawi/stemmer/context/removal.rb +1 -1
- data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +0 -0
- data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +2 -2
- data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +10 -1
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +9 -1
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +9 -1
- data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +9 -1
- data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +1 -1
- data/lib/sastrawi/stemmer/filter/text_normalizer.rb +0 -0
- data/lib/sastrawi/stemmer/stemmer.rb +31 -15
- data/lib/sastrawi/stemmer/stemmer_factory.rb +5 -1
- data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +5 -2
- data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +102 -130
- data/lib/sastrawi/version.rb +1 -1
- data/sastrawi.gemspec +6 -5
- metadata +22 -19
File without changes
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule29
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^
|
6
|
+
contains = /^peng([bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
File without changes
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule37a
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^([
|
6
|
+
contains = /^([bcdfghjklmnpqrstvwxyz])(er[aiueo])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule37b
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^([
|
6
|
+
contains = /^([bcdfghjklmnpqrstvwxyz])er([aiueo])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule38a
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^([
|
6
|
+
contains = /^([bcdfghjklmnpqrstvwxyz])(el[aiueo])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule38b
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^([
|
6
|
+
contains = /^([bcdfghjklmnpqrstvwxyz])el([aiueo])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule39a
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^([
|
6
|
+
contains = /^([bcdfghjklmnpqrstvwxyz])(em[aiueo])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule39b
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^([
|
6
|
+
contains = /^([bcdfghjklmnpqrstvwxyz])em([aiueo])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
File without changes
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule40a
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^([
|
6
|
+
contains = /^([bcdfghjklmnpqrstvwxyz])(in[aiueo])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule40b
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^([
|
6
|
+
contains = /^([bcdfghjklmnpqrstvwxyz])in([aiueo])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
File without changes
|
File without changes
|
@@ -3,12 +3,12 @@ module Sastrawi
|
|
3
3
|
module Disambiguator
|
4
4
|
class DisambiguatorPrefixRule5
|
5
5
|
def disambiguate(word)
|
6
|
-
contains = /^be([
|
6
|
+
contains = /^be([bcdfghjklmnpqstvwxyz])(er[bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
|
7
7
|
|
8
8
|
if contains
|
9
9
|
matches = contains.captures
|
10
10
|
|
11
|
-
return matches[0]
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
File without changes
|