sastrawi 0.1.0.pre → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
Files changed (92) hide show
  1. checksums.yaml +5 -5
  2. data/.gitignore +1 -1
  3. data/.travis.yml +7 -5
  4. data/CONTRIBUTING.md +22 -0
  5. data/Gemfile +0 -0
  6. data/LICENSE.txt +1 -1
  7. data/README.md +53 -19
  8. data/Rakefile +2 -2
  9. data/_config.yml +1 -0
  10. data/bin/sastrawi +24 -0
  11. data/data/{kata-dasar.txt → base-word.txt} +0 -0
  12. data/lib/sastrawi.rb +1 -9
  13. data/lib/sastrawi/dictionary/array_dictionary.rb +36 -2
  14. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +1 -1
  15. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +1 -1
  16. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +1 -1
  17. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +1 -1
  18. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +1 -1
  19. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +1 -1
  20. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +1 -1
  21. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +1 -1
  22. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +1 -1
  23. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +1 -1
  24. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +1 -1
  25. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +0 -0
  26. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +1 -1
  27. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +1 -1
  28. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +1 -1
  29. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +1 -1
  30. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +0 -0
  31. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +1 -1
  32. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +1 -1
  33. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +1 -1
  34. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +1 -1
  35. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +1 -1
  36. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +1 -1
  37. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +1 -1
  38. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +1 -1
  39. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +1 -1
  40. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +1 -1
  41. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +1 -1
  42. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +1 -1
  43. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +1 -1
  44. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +2 -2
  45. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +1 -1
  46. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +1 -1
  47. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +1 -1
  48. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +0 -0
  49. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +1 -1
  50. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +1 -1
  51. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +1 -1
  52. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +1 -1
  53. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +1 -1
  54. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +1 -1
  55. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +2 -2
  56. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +2 -2
  57. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +2 -2
  58. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +2 -2
  59. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +2 -2
  60. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +2 -2
  61. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +0 -0
  62. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +2 -2
  63. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +2 -2
  64. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +0 -0
  65. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +0 -0
  66. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +2 -2
  67. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +0 -0
  68. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +1 -1
  69. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +1 -1
  70. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +1 -1
  71. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +2 -2
  72. data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +4 -0
  73. data/lib/sastrawi/stemmer/cache/array_cache.rb +2 -2
  74. data/lib/sastrawi/stemmer/cached_stemmer.rb +1 -1
  75. data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +5 -0
  76. data/lib/sastrawi/stemmer/context/context.rb +28 -7
  77. data/lib/sastrawi/stemmer/context/removal.rb +1 -1
  78. data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +0 -0
  79. data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +2 -2
  80. data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +10 -1
  81. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +9 -1
  82. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +9 -1
  83. data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +9 -1
  84. data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +1 -1
  85. data/lib/sastrawi/stemmer/filter/text_normalizer.rb +0 -0
  86. data/lib/sastrawi/stemmer/stemmer.rb +31 -15
  87. data/lib/sastrawi/stemmer/stemmer_factory.rb +5 -1
  88. data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +5 -2
  89. data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +102 -130
  90. data/lib/sastrawi/version.rb +1 -1
  91. data/sastrawi.gemspec +6 -5
  92. metadata +22 -19
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 'ny' << matches[0] << matches[1]
11
+ return "ny#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 's' << matches[0] << matches[1]
11
+ return "s#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 'p' << matches[0] << matches[1]
11
+ return "p#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 'r' << matches[0]
11
+ return "r#{matches[0]}"
12
12
  end
13
13
  end
14
14
  end
@@ -10,7 +10,7 @@ module Sastrawi
10
10
 
11
11
  return if /^er(.*)$/.match(matches[2])
12
12
 
13
- return matches[0] << matches[1] << matches[2]
13
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
14
14
  end
15
15
  end
16
16
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1]
11
+ return "#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1]
11
+ return "#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -10,7 +10,7 @@ module Sastrawi
10
10
 
11
11
  return if /^er(.*)$/.match(matches[2])
12
12
 
13
- return matches[0] << matches[1] << matches[2]
13
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
14
14
  end
15
15
  end
16
16
  end
@@ -10,7 +10,7 @@ module Sastrawi
10
10
 
11
11
  return if matches[0] == 'r'
12
12
 
13
- return matches[0] << matches[1] << 'er' << matches[2] << matches[3]
13
+ return "#{matches[0]}#{matches[1]}er#{matches[2]}#{matches[3]}"
14
14
  end
15
15
  end
16
16
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1]
11
+ return "#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 'm' << matches[0] << matches[1]
11
+ return "m#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 'p' << matches[0] << matches[1]
11
+ return "p#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1]
11
+ return "#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 'n' << matches[0] << matches[1]
11
+ return "n#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 't' << matches[0] << matches[1]
11
+ return "t#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule29
5
5
  def disambiguate(word)
6
- contains = /^pen([bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
6
+ contains = /^peng([bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1]
11
+ return "#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -10,7 +10,7 @@ module Sastrawi
10
10
 
11
11
  return if matches[0] == 'r'
12
12
 
13
- return matches[0] << matches[1] << 'er' << matches[2] << matches[3]
13
+ return "#{matches[0]}#{matches[1]}er#{matches[2]}#{matches[3]}"
14
14
  end
15
15
  end
16
16
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1]
11
+ return "#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 'k' << matches[0] << matches[1]
11
+ return "k#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 'ny' << matches[0] << matches[1]
11
+ return "ny#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 's' << matches[0] << matches[1]
11
+ return "s#{matches[0]}#{matches[1]}"
12
12
  end
13
13
  end
14
14
  end
@@ -10,7 +10,7 @@ module Sastrawi
10
10
  if contains
11
11
  matches = contains.captures
12
12
 
13
- return matches[0] << matches[1]
13
+ return "#{matches[0]}#{matches[1]}"
14
14
  end
15
15
  end
16
16
  end
@@ -10,7 +10,7 @@ module Sastrawi
10
10
 
11
11
  return if /^er(.*)$/.match(matches[1])
12
12
 
13
- return matches[0] << matches[1]
13
+ return "#{matches[0]}#{matches[1]}"
14
14
  end
15
15
  end
16
16
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule37a
5
5
  def disambiguate(word)
6
- contains = /^([bcdfghjklmnpqstvwxyz])(er[aiueo])(.*)$/.match(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])(er[aiueo])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule37b
5
5
  def disambiguate(word)
6
- contains = /^([bcdfghjklmnpqstvwxyz])er([aiueo])(.*)$/.match(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])er([aiueo])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule38a
5
5
  def disambiguate(word)
6
- contains = /^([bcdfghjklmnpqstvwxyz])(el[aiueo])(.*)$/.match(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])(el[aiueo])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule38b
5
5
  def disambiguate(word)
6
- contains = /^([bcdfghjklmnpqstvwxyz])el([aiueo])(.*)$/.match(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])el([aiueo])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule39a
5
5
  def disambiguate(word)
6
- contains = /^([bcdfghjklmnpqstvwxyz])(em[aiueo])(.*)$/.match(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])(em[aiueo])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule39b
5
5
  def disambiguate(word)
6
- contains = /^([bcdfghjklmnpqstvwxyz])em([aiueo])(.*)$/.match(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])em([aiueo])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule40a
5
5
  def disambiguate(word)
6
- contains = /^([bcdfghjklmnpqstvwxyz])(in[aiueo])(.*)$/.match(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])(in[aiueo])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule40b
5
5
  def disambiguate(word)
6
- contains = /^([bcdfghjklmnpqstvwxyz])in([aiueo])(.*)$/.match(word)
6
+ contains = /^([bcdfghjklmnpqrstvwxyz])in([aiueo])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -3,12 +3,12 @@ module Sastrawi
3
3
  module Disambiguator
4
4
  class DisambiguatorPrefixRule5
5
5
  def disambiguate(word)
6
- contains = /^be([bcdfghjklmnpqrstvwxyz])(er[bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
6
+ contains = /^be([bcdfghjklmnpqstvwxyz])(er[bcdfghjklmnpqrstvwxyz])(.*)$/.match(word)
7
7
 
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return matches[0] << matches[1] << matches[2]
11
+ return "#{matches[0]}#{matches[1]}#{matches[2]}"
12
12
  end
13
13
  end
14
14
  end
@@ -8,7 +8,7 @@ module Sastrawi
8
8
  if contains
9
9
  matches = contains.captures
10
10
 
11
- return 'r' << matches[0]
11
+ return "r#{matches[0]}"
12
12
  end
13
13
  end
14
14
  end
@@ -10,7 +10,7 @@ module Sastrawi
10
10
 
11
11
  return if matches[0] == 'r'
12
12
 
13
- return matches[0] << 'er' << matches[1]
13
+ return "#{matches[0]}er#{matches[1]}"
14
14
  end
15
15
  end
16
16
  end