sastrawi-ruby 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.github/workflows/ci.yml +23 -0
- data/.gitignore +51 -0
- data/.travis.yml +10 -0
- data/CONTRIBUTING.md +22 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +104 -0
- data/Rakefile +6 -0
- data/_config.yml +1 -0
- data/bin/sastrawi +24 -0
- data/data/base-word.txt +29933 -0
- data/lib/sastrawi/dictionary/array_dictionary.rb +67 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +11 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +19 -0
- data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +28 -0
- data/lib/sastrawi/stemmer/cache/array_cache.rb +25 -0
- data/lib/sastrawi/stemmer/cached_stemmer.rb +33 -0
- data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +25 -0
- data/lib/sastrawi/stemmer/context/context.rb +217 -0
- data/lib/sastrawi/stemmer/context/removal.rb +17 -0
- data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +17 -0
- data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +54 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +37 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +34 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +34 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +34 -0
- data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +157 -0
- data/lib/sastrawi/stemmer/filter/text_normalizer.rb +15 -0
- data/lib/sastrawi/stemmer/stemmer.rb +101 -0
- data/lib/sastrawi/stemmer/stemmer_factory.rb +49 -0
- data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +27 -0
- data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +124 -0
- data/lib/sastrawi/version.rb +5 -0
- data/lib/sastrawi.rb +4 -0
- data/sastrawi.gemspec +34 -0
- metadata +179 -0
|
@@ -0,0 +1,67 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Dictionary
|
|
3
|
+
class ArrayDictionary
|
|
4
|
+
attr_reader :words
|
|
5
|
+
|
|
6
|
+
def initialize(words = [])
|
|
7
|
+
@words = []
|
|
8
|
+
|
|
9
|
+
add_words(words)
|
|
10
|
+
end
|
|
11
|
+
|
|
12
|
+
##
|
|
13
|
+
# Check whether a word is contained in the dictionary
|
|
14
|
+
|
|
15
|
+
def contains?(word)
|
|
16
|
+
@words.include?(word)
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
##
|
|
20
|
+
# Count how many words in the dictionary
|
|
21
|
+
|
|
22
|
+
def count
|
|
23
|
+
@words.length
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
##
|
|
27
|
+
# Add multiple words to the dictionary
|
|
28
|
+
|
|
29
|
+
def add_words(new_words)
|
|
30
|
+
new_words.each do |word|
|
|
31
|
+
add(word)
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
##
|
|
36
|
+
# Add a word to the dictionary
|
|
37
|
+
|
|
38
|
+
def add(word)
|
|
39
|
+
return if word.nil? || word.strip == ''
|
|
40
|
+
|
|
41
|
+
@words.push(word)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
##
|
|
45
|
+
# Add words from a text file to the dictionary
|
|
46
|
+
|
|
47
|
+
def add_words_from_text_file(file_path)
|
|
48
|
+
words = []
|
|
49
|
+
|
|
50
|
+
File.open(file_path, 'r') do |file|
|
|
51
|
+
file.each do |line|
|
|
52
|
+
words.push(line.chomp)
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
add_words(words)
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
##
|
|
60
|
+
# Remove a word from the dictionary
|
|
61
|
+
|
|
62
|
+
def remove(word)
|
|
63
|
+
@words.delete(word)
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule10
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^me([lrwy])([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule11
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^mem([bfv])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule12
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^mempe(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "pe#{matches[0]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule13a
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^mem([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "m#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule13b
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^mem([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "p#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule14
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^men([cdjstz])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule15a
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^men([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "n#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule15b
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^men([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "t#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule16
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^meng([ghqk])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule17a
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^meng([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule17b
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^meng([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "k#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule17c
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^menge(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return matches[0]
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule17d
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^meng([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "ng#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule18a
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^meny([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "ny#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule18b
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^meny([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "s#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule19
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^memp([abcdfghijklmopqrstuvwxyz])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "p#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule1a
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^ber([aiueo].*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return matches[0]
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule1b
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^ber([aiueo].*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "r#{matches[0]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule2
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^ber([bcdfghjklmnpqrstvwxyz])([a-z])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return if /^er(.*)$/.match(matches[2])
|
|
12
|
+
|
|
13
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule20
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^pe([wy])([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule21a
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^per([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule21b
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^pe(r[aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule23
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^per([bcdfghjklmnpqrstvwxyz])([a-z])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return if /^er(.*)$/.match(matches[2])
|
|
12
|
+
|
|
13
|
+
return "#{matches[0]}#{matches[1]}#{matches[2]}"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule24
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^per([bcdfghjklmnpqrstvwxyz])([a-z])er([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return if matches[0] == 'r'
|
|
12
|
+
|
|
13
|
+
return "#{matches[0]}#{matches[1]}er#{matches[2]}#{matches[3]}"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule25
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^pem([bfv])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule26a
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^pem([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "m#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule26b
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^pem([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "p#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule27
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^pen([cdjstz])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule28a
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^pen([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "n#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule28b
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^pen([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "t#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule29
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^peng([bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule3
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^ber([bcdfghjklmnpqrstvwxyz])([a-z])er([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return if matches[0] == 'r'
|
|
12
|
+
|
|
13
|
+
return "#{matches[0]}#{matches[1]}er#{matches[2]}#{matches[3]}"
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
module Sastrawi
|
|
2
|
+
module Morphology
|
|
3
|
+
module Disambiguator
|
|
4
|
+
class DisambiguatorPrefixRule30a
|
|
5
|
+
def disambiguate(word)
|
|
6
|
+
contains = /^peng([aiueo])(.*)$/.match(word)
|
|
7
|
+
|
|
8
|
+
if contains
|
|
9
|
+
matches = contains.captures
|
|
10
|
+
|
|
11
|
+
return "#{matches[0]}#{matches[1]}"
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
17
|
+
end
|