sastrawi-ruby 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/MILESTONES.md +12 -0
- data/data/base-word.txt +17 -1
- data/data/stop-words.txt +842 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +2 -0
- data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +2 -0
- data/lib/sastrawi/stemmer/cached_stemmer.rb +2 -0
- data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +2 -0
- data/lib/sastrawi/stemmer/context/context.rb +2 -0
- data/lib/sastrawi/stemmer/context/removal.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +2 -0
- data/lib/sastrawi/stemmer/stemmer.rb +8 -0
- data/lib/sastrawi/stemmer/stemmer_factory.rb +2 -0
- data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +2 -0
- data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +19 -107
- data/lib/sastrawi/version.rb +1 -1
- data/lib/sastrawi.rb +2 -0
- metadata +3 -1
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'sastrawi/stemmer/context/context'
|
|
2
4
|
|
|
3
5
|
require 'sastrawi/stemmer/context/visitor/visitor_provider'
|
|
@@ -84,6 +86,12 @@ module Sastrawi
|
|
|
84
86
|
|
|
85
87
|
if root_first_word == root_second_word
|
|
86
88
|
root_first_word
|
|
89
|
+
elsif @dictionary.contains?(root_second_word)
|
|
90
|
+
# Handle partial/rhyming reduplication (bolak-balik, sayur-mayur, lauk-pauk)
|
|
91
|
+
# Prefer the second word's stem when it's a dictionary word
|
|
92
|
+
root_second_word
|
|
93
|
+
elsif @dictionary.contains?(root_first_word)
|
|
94
|
+
root_first_word
|
|
87
95
|
else
|
|
88
96
|
word
|
|
89
97
|
end
|