sastrawi-ruby 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ci.yml +23 -0
  3. data/.gitignore +51 -0
  4. data/.travis.yml +10 -0
  5. data/CONTRIBUTING.md +22 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +104 -0
  9. data/Rakefile +6 -0
  10. data/_config.yml +1 -0
  11. data/bin/sastrawi +24 -0
  12. data/data/base-word.txt +29933 -0
  13. data/lib/sastrawi/dictionary/array_dictionary.rb +67 -0
  14. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +17 -0
  15. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +17 -0
  16. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +17 -0
  17. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +17 -0
  18. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +17 -0
  19. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +17 -0
  20. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +17 -0
  21. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +17 -0
  22. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +17 -0
  23. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +17 -0
  24. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +17 -0
  25. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +17 -0
  26. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +17 -0
  27. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +17 -0
  28. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +17 -0
  29. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +17 -0
  30. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +17 -0
  31. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +17 -0
  32. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +19 -0
  33. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +17 -0
  34. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +17 -0
  35. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +17 -0
  36. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +19 -0
  37. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +19 -0
  38. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +17 -0
  39. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +17 -0
  40. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +17 -0
  41. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +17 -0
  42. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +17 -0
  43. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +17 -0
  44. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +17 -0
  45. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +19 -0
  46. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +17 -0
  47. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +17 -0
  48. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +17 -0
  49. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +17 -0
  50. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +17 -0
  51. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +19 -0
  52. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +19 -0
  53. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +17 -0
  54. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +17 -0
  55. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +17 -0
  56. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +17 -0
  57. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +17 -0
  58. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +17 -0
  59. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +17 -0
  60. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +17 -0
  61. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +11 -0
  62. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +17 -0
  63. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +17 -0
  64. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +17 -0
  65. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +17 -0
  66. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +17 -0
  67. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +17 -0
  68. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +17 -0
  69. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +19 -0
  70. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +19 -0
  71. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +19 -0
  72. data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +28 -0
  73. data/lib/sastrawi/stemmer/cache/array_cache.rb +25 -0
  74. data/lib/sastrawi/stemmer/cached_stemmer.rb +33 -0
  75. data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +25 -0
  76. data/lib/sastrawi/stemmer/context/context.rb +217 -0
  77. data/lib/sastrawi/stemmer/context/removal.rb +17 -0
  78. data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +17 -0
  79. data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +54 -0
  80. data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +37 -0
  81. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +34 -0
  82. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +34 -0
  83. data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +34 -0
  84. data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +157 -0
  85. data/lib/sastrawi/stemmer/filter/text_normalizer.rb +15 -0
  86. data/lib/sastrawi/stemmer/stemmer.rb +101 -0
  87. data/lib/sastrawi/stemmer/stemmer_factory.rb +49 -0
  88. data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +27 -0
  89. data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +124 -0
  90. data/lib/sastrawi/version.rb +5 -0
  91. data/lib/sastrawi.rb +4 -0
  92. data/sastrawi.gemspec +34 -0
  93. metadata +179 -0
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule30b
5
+ def disambiguate(word)
6
+ contains = /^peng([aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "k#{matches[0]}#{matches[1]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule30c
5
+ def disambiguate(word)
6
+ contains = /^penge(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return matches[0]
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule31a
5
+ def disambiguate(word)
6
+ contains = /^peny([aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "ny#{matches[0]}#{matches[1]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule31b
5
+ def disambiguate(word)
6
+ contains = /^peny([aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "s#{matches[0]}#{matches[1]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule32
5
+ def disambiguate(word)
6
+ return 'ajar' if word == 'pelajar'
7
+
8
+ contains = /^pe(l[aiueo])(.*)$/.match(word)
9
+
10
+ if contains
11
+ matches = contains.captures
12
+
13
+ return "#{matches[0]}#{matches[1]}"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule34
5
+ def disambiguate(word)
6
+ contains = /^pe([bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return if /^er(.*)$/.match(matches[1])
12
+
13
+ return "#{matches[0]}#{matches[1]}"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule35
5
+ def disambiguate(word)
6
+ contains = /^ter([bcdfghjkpqstvxz])(er[bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule36
5
+ def disambiguate(word)
6
+ contains = /^pe([bcdfghjkpqstvxz])(er[bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule37a
5
+ def disambiguate(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])(er[aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule37b
5
+ def disambiguate(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])er([aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule38a
5
+ def disambiguate(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])(el[aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule38b
5
+ def disambiguate(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])el([aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule39a
5
+ def disambiguate(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])(em[aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule39b
5
+ def disambiguate(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])em([aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,11 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule4
5
+ def disambiguate(word)
6
+ return 'ajar' if word == 'belajar'
7
+ end
8
+ end
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule40a
5
+ def disambiguate(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])(in[aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule40b
5
+ def disambiguate(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])in([aiueo])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule41
5
+ def disambiguate(word)
6
+ contains = /^ku(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return matches[0]
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule42
5
+ def disambiguate(word)
6
+ contains = /^kau(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return matches[0]
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule5
5
+ def disambiguate(word)
6
+ contains = /^be([bcdfghjklmnpqstvwxyz])(er[bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule6a
5
+ def disambiguate(word)
6
+ contains = /^ter([aiueo].*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return matches[0]
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,17 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule6b
5
+ def disambiguate(word)
6
+ contains = /^ter([aiueo].*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return "r#{matches[0]}"
12
+ end
13
+ end
14
+ end
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,19 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule7
5
+ def disambiguate(word)
6
+ contains = /^ter([bcdfghjklmnpqrstvwxyz])er([aiueo].*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return if matches[0] == 'r'
12
+
13
+ return "#{matches[0]}er#{matches[1]}"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule8
5
+ def disambiguate(word)
6
+ contains = /^ter([bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return if matches[0] == 'r' || /^er(.*)$/.match(matches[1])
12
+
13
+ return "#{matches[0]}#{matches[1]}"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,19 @@
1
+ module Sastrawi
2
+ module Morphology
3
+ module Disambiguator
4
+ class DisambiguatorPrefixRule9
5
+ def disambiguate(word)
6
+ contains = /^te([bcdfghjklmnpqrstvwxyz])er([bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
7
+
8
+ if contains
9
+ matches = contains.captures
10
+
11
+ return if matches[0] == 'r'
12
+
13
+ return "#{matches[0]}er#{matches[1]}#{matches[2]}"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
19
+ end
@@ -0,0 +1,28 @@
1
+ ##
2
+ # Asian J. (2007) "Effective Techniques for Indonesian Text Retrieval". page 26
3
+ # http://researchbank.rmit.edu.au/eserv/rmit:6312/Asian.pdf
4
+
5
+ module Sastrawi
6
+ module Morphology
7
+ class InvalidAffixPairSpecification
8
+ def satisfied_by?(word)
9
+ return false if /^me(.*)kan$/.match(word)
10
+
11
+ return false if word == 'ketahui'
12
+
13
+ invalid_affixes = [
14
+ /^ber(.*)i$/, /^di(.*)an$/, /^ke(.*)i$/, /^ke(.*)an$/,
15
+ /^me(.*)an$/, /^me(.*)an$/, /^ter(.*)an$/, /^per(.*)an$/
16
+ ]
17
+
18
+ matches = false
19
+
20
+ invalid_affixes.each do |invalid_affix|
21
+ matches = matches || !!(word =~ invalid_affix)
22
+ end
23
+
24
+ matches
25
+ end
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,25 @@
1
+ module Sastrawi
2
+ module Stemmer
3
+ module Cache
4
+ class ArrayCache
5
+ attr_reader :data
6
+
7
+ def initialize
8
+ @data = {}
9
+ end
10
+
11
+ def set(key, value)
12
+ @data[key.to_sym] = value
13
+ end
14
+
15
+ def get(key)
16
+ @data[key.to_sym] if @data.key?(key.to_sym)
17
+ end
18
+
19
+ def has?(key)
20
+ @data.key?(key.to_sym)
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end
@@ -0,0 +1,33 @@
1
+ require 'sastrawi/stemmer/filter/text_normalizer'
2
+
3
+ module Sastrawi
4
+ module Stemmer
5
+ class CachedStemmer
6
+ attr_reader :cache, :delegated_stemmer
7
+
8
+ def initialize(cache, delegated_stemmer)
9
+ @cache = cache
10
+ @delegated_stemmer = delegated_stemmer
11
+ end
12
+
13
+ def stem(text)
14
+ normalized_text = Sastrawi::Stemmer::Filter::TextNormalizer.normalize_text(text)
15
+
16
+ words = normalized_text.split(' ')
17
+ stems = []
18
+
19
+ words.each do |word|
20
+ if @cache.has?(word)
21
+ stems.push(@cache.get(word))
22
+ else
23
+ stem = @delegated_stemmer.stem(word)
24
+ @cache.set(word, stem)
25
+ stems.push(stem)
26
+ end
27
+ end
28
+
29
+ stems.join(' ')
30
+ end
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,25 @@
1
+ ##
2
+ # Confix Stripping Rule Precendence Adjustment Specification
3
+ # Asian J. (2007) "Effective Techniques for Indonesian Text Retrieval" page 78-79
4
+ # http://researchbank.rmit.edu.au/eserv/rmit:6312/Asian.pdf
5
+
6
+ module Sastrawi
7
+ module Stemmer
8
+ module ConfixStripping
9
+ class PrecedenceAdjustmentSpecification
10
+ def satisfied_by?(value)
11
+ regex_rules = [
12
+ /^be(.*)lah$/, /^be(.*)an$/, /^me(.*)i$/,
13
+ /^di(.*)i$/, /^pe(.*)i$/, /^ter(.*)i$/
14
+ ]
15
+
16
+ regex_rules.each do |rule|
17
+ return true if rule.match(value)
18
+ end
19
+
20
+ false
21
+ end
22
+ end
23
+ end
24
+ end
25
+ end