sastrawi 0.1.0.pre

Sign up to get free protection for your applications and to get access to all the features.
Files changed (89) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +50 -0
  3. data/.travis.yml +8 -0
  4. data/Gemfile +4 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +70 -0
  7. data/Rakefile +6 -0
  8. data/data/kata-dasar.txt +29932 -0
  9. data/lib/sastrawi/dictionary/array_dictionary.rb +33 -0
  10. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +17 -0
  11. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +17 -0
  12. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +17 -0
  13. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +17 -0
  14. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +17 -0
  15. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +17 -0
  16. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +17 -0
  17. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +17 -0
  18. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +17 -0
  19. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +17 -0
  20. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +17 -0
  21. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +17 -0
  22. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +17 -0
  23. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +17 -0
  24. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +17 -0
  25. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +17 -0
  26. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +17 -0
  27. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +17 -0
  28. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +19 -0
  29. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +17 -0
  30. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +17 -0
  31. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +17 -0
  32. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +19 -0
  33. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +19 -0
  34. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +17 -0
  35. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +17 -0
  36. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +17 -0
  37. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +17 -0
  38. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +17 -0
  39. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +17 -0
  40. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +17 -0
  41. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +19 -0
  42. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +17 -0
  43. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +17 -0
  44. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +17 -0
  45. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +17 -0
  46. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +17 -0
  47. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +19 -0
  48. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +19 -0
  49. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +17 -0
  50. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +17 -0
  51. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +17 -0
  52. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +17 -0
  53. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +17 -0
  54. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +17 -0
  55. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +17 -0
  56. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +17 -0
  57. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +11 -0
  58. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +17 -0
  59. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +17 -0
  60. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +17 -0
  61. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +17 -0
  62. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +17 -0
  63. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +17 -0
  64. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +17 -0
  65. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +19 -0
  66. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +19 -0
  67. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +19 -0
  68. data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +24 -0
  69. data/lib/sastrawi/stemmer/cache/array_cache.rb +25 -0
  70. data/lib/sastrawi/stemmer/cached_stemmer.rb +33 -0
  71. data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +20 -0
  72. data/lib/sastrawi/stemmer/context/context.rb +170 -0
  73. data/lib/sastrawi/stemmer/context/removal.rb +17 -0
  74. data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +17 -0
  75. data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +46 -0
  76. data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +28 -0
  77. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +26 -0
  78. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +26 -0
  79. data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +26 -0
  80. data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +157 -0
  81. data/lib/sastrawi/stemmer/filter/text_normalizer.rb +15 -0
  82. data/lib/sastrawi/stemmer/stemmer.rb +85 -0
  83. data/lib/sastrawi/stemmer/stemmer_factory.rb +45 -0
  84. data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +24 -0
  85. data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +152 -0
  86. data/lib/sastrawi/version.rb +3 -0
  87. data/lib/sastrawi.rb +12 -0
  88. data/sastrawi.gemspec +25 -0
  89. metadata +173 -0
@@ -0,0 +1,24 @@
1
+ module Sastrawi
2
+ module StopWordRemover
3
+ class StopWordRemover
4
+ attr_accessor :dictionary
5
+
6
+ def initialize(dictionary)
7
+ @dictionary = dictionary
8
+ end
9
+
10
+ def remove(text)
11
+ words = text.split(' ')
12
+ stop_words = []
13
+
14
+ words.each do |word|
15
+ unless @dictionary.include?(word)
16
+ stop_words.push(word)
17
+ end
18
+ end
19
+
20
+ stop_words.join(' ')
21
+ end
22
+ end
23
+ end
24
+ end
@@ -0,0 +1,152 @@
1
+ require 'sastrawi/dictionary/array_dictionary'
2
+ require 'sastrawi/stop_word_remover/stop_word_remover'
3
+
4
+ module Sastrawi
5
+ module StopWordRemover
6
+ module StopWordRemoverFactory
7
+ def create_stop_word_remover
8
+ stop_words = get_stop_word
9
+
10
+ dictionary = Sastrawi::Dictionary::ArrayDictionary.new(stop_words)
11
+ stop_word_remover = Sastrawi::StopWordRemover::StopWordRemover.new(dictionary)
12
+
13
+ stop_word_remover
14
+ end
15
+
16
+ def get_stop_word
17
+ stop_words = [
18
+ 'a', 'ada', 'adalah', 'adanya', 'adapun', 'agak', 'agaknya', 'agar',
19
+ 'akan', 'akankah', 'akhir', 'akhiri', 'akhirnya', 'aku', 'akulah', 'amat',
20
+ 'amatlah', 'anda', 'andalah', 'antar', 'antara', 'antaranya', 'apa',
21
+ 'apaan', 'apabila', 'apakah', 'apalagi', 'apatah', 'arti', 'artinya',
22
+ 'asal', 'asalkan', 'atas', 'atau', 'ataukah', 'ataupun', 'awal', 'awalnya',
23
+ 'b', 'bagai', 'bagaikan', 'bagaimana', 'bagaimanakah', 'bagaimanapun',
24
+ 'bagainamakah', 'bagi', 'bagian', 'bahkan', 'bahwa', 'bahwasannya',
25
+ 'bahwasanya', 'baik', 'baiklah', 'bakal', 'bakalan', 'balik', 'banyak',
26
+ 'bapak', 'baru', 'bawah', 'beberapa', 'begini', 'beginian', 'beginikah',
27
+ 'beginilah', 'begitu', 'begitukah', 'begitulah', 'begitupun', 'bekerja',
28
+ 'belakang', 'belakangan', 'belum', 'belumlah', 'benar', 'benarkah',
29
+ 'benarlah', 'berada', 'berakhir', 'berakhirlah', 'berakhirnya', 'berapa',
30
+ 'berapakah', 'berapalah', 'berapapun', 'berarti', 'berawal', 'berbagai',
31
+ 'berdatangan', 'beri', 'berikan', 'berikut', 'berikutnya', 'berjumlah',
32
+ 'berkali-kali', 'berkata', 'berkehendak', 'berkeinginan', 'berkenaan',
33
+ 'berlainan', 'berlalu', 'berlangsung', 'berlebihan', 'bermacam',
34
+ 'bermacam-macam', 'bermaksud', 'bermula', 'bersama', 'bersama-sama',
35
+ 'bersiap', 'bersiap-siap', 'bertanya', 'bertanya-tanya', 'berturut',
36
+ 'berturut-turut', 'bertutur', 'berujar', 'berupa', 'besar', 'betul',
37
+ 'betulkah', 'biasa', 'biasanya', 'bila', 'bilakah', 'bisa', 'bisakah',
38
+ 'boleh', 'bolehkah', 'bolehlah', 'buat', 'bukan', 'bukankah', 'bukanlah',
39
+ 'bukannya', 'bulan', 'bung', 'c', 'cara', 'caranya', 'cukup', 'cukupkah',
40
+ 'cukuplah', 'cuma', 'd', 'dahulu', 'dalam', 'dan', 'dapat', 'dari',
41
+ 'daripada', 'datang', 'dekat', 'demi', 'demikian', 'demikianlah', 'dengan',
42
+ 'depan', 'di', 'dia', 'diakhiri', 'diakhirinya', 'dialah', 'diantara',
43
+ 'diantaranya', 'diberi', 'diberikan', 'diberikannya', 'dibuat',
44
+ 'dibuatnya', 'didapat', 'didatangkan', 'digunakan', 'diibaratkan',
45
+ 'diibaratkannya', 'diingat', 'diingatkan', 'diinginkan', 'dijawab',
46
+ 'dijelaskan', 'dijelaskannya', 'dikarenakan', 'dikatakan', 'dikatakannya',
47
+ 'dikerjakan', 'diketahui', 'diketahuinya', 'dikira', 'dilakukan',
48
+ 'dilalui', 'dilihat', 'dimaksud', 'dimaksudkan', 'dimaksudkannya',
49
+ 'dimaksudnya', 'diminta', 'dimintai', 'dimisalkan', 'dimulai',
50
+ 'dimulailah', 'dimulainya', 'dimungkinkan', 'dini', 'dipastikan',
51
+ 'diperbuat', 'diperbuatnya', 'dipergunakan', 'diperkirakan',
52
+ 'diperlihatkan', 'diperlukan', 'diperlukannya', 'dipersoalkan',
53
+ 'dipertanyakan', 'dipunyai', 'diri', 'dirinya','disampaikan', 'disebut',
54
+ 'disebutkan', 'disebutkannya', 'disini', 'disinilah', 'ditambahkan',
55
+ 'ditandaskan', 'ditanya', 'ditanyai','ditanyakan', 'ditegaskan',
56
+ 'ditujukan', 'ditunjuk', 'ditunjuki', 'ditunjukkan', 'ditunjukkannya',
57
+ 'ditunjuknya', 'dituturkan', 'dituturkannya', 'diucapkan', 'diucapkannya',
58
+ 'diungkapkan', 'dong', 'dua', 'dulu', 'e', 'empat', 'enak', 'enggak',
59
+ 'enggaknya', 'entah', 'entahlah', 'f', 'g', 'guna', 'gunakan', 'h',
60
+ 'hadap', 'hai', 'hal', 'halo', 'hallo', 'hampir', 'hanya', 'hanyalah',
61
+ 'hari', 'harus', 'haruslah', 'harusnya', 'helo', 'hello', 'hendak',
62
+ 'hendaklah', 'hendaknya', 'hingga', 'i', 'ia', 'ialah', 'ibarat',
63
+ 'ibaratkan', 'ibaratnya', 'ibu', 'ikut', 'ingat', 'ingat-ingat', 'ingin',
64
+ 'inginkah', 'inginkan', 'ini', 'inikah', 'inilah', 'itu', 'itukah',
65
+ 'itulah', 'j', 'jadi', 'jadilah', 'jadinya', 'jangan', 'jangankan',
66
+ 'janganlah', 'jauh', 'jawab', 'jawaban', 'jawabnya', 'jelas', 'jelaskan',
67
+ 'jelaslah', 'jelasnya', 'jika', 'jikalau', 'juga', 'jumlah', 'jumlahnya',
68
+ 'justru', 'k', 'kadar', 'kala', 'kalau', 'kalaulah', 'kalaupun', 'kali',
69
+ 'kalian', 'kami', 'kamilah', 'kamu', 'kamulah', 'kan', 'kapan', 'kapankah',
70
+ 'kapanpun', 'karena', 'karenanya', 'kasus', 'kata', 'katakan',
71
+ 'katakanlah', 'katanya', 'ke', 'keadaan', 'kebetulan', 'kecil', 'kedua',
72
+ 'keduanya', 'keinginan', 'kelamaan', 'kelihatan', 'kelihatannya', 'kelima',
73
+ 'keluar', 'kembali', 'kemudian', 'kemungkinan', 'kemungkinannya', 'kena',
74
+ 'kenapa', 'kepada', 'kepadanya', 'kerja', 'kesampaian', 'keseluruhan',
75
+ 'keseluruhannya', 'keterlaluan', 'ketika', 'khusus', 'khususnya', 'kini',
76
+ 'kinilah', 'kira', 'kira-kira', 'kiranya', 'kita', 'kitalah', 'kok',
77
+ 'kurang', 'l', 'lagi', 'lagian', 'lah', 'lain', 'lainnya', 'laku', 'lalu',
78
+ 'lama', 'lamanya', 'langsung', 'lanjut', 'lanjutnya', 'lebih', 'lewat',
79
+ 'lihat', 'lima', 'luar', 'm', 'macam', 'maka', 'makanya', 'makin',
80
+ 'maksud', 'malah', 'malahan', 'mampu', 'mampukah', 'mana', 'manakala',
81
+ 'manalagi', 'masa', 'masalah', 'masalahnya', 'masih', 'masihkah', 'masing',
82
+ 'masing-masing', 'masuk', 'mata', 'mau', 'maupun', 'melainkan',
83
+ 'melakukan', 'melalui', 'melihat', 'melihatnya', 'memang', 'memastikan',
84
+ 'memberi', 'memberikan', 'membuat', 'memerlukan', 'memihak', 'meminta',
85
+ 'memintakan', 'memisalkan', 'memperbuat', 'mempergunakan', 'memperkirakan',
86
+ 'memperlihatkan', 'mempersiapkan', 'mempersoalkan', 'mempertanyakan',
87
+ 'mempunyai', 'memulai', 'memungkinkan', 'menaiki', 'menambahkan',
88
+ 'menandaskan', 'menanti', 'menanti-nanti', 'menantikan', 'menanya',
89
+ 'menanyai', 'menanyakan', 'mendapat', 'mendapatkan', 'mendatang',
90
+ 'mendatangi', 'mendatangkan', 'menegaskan', 'mengakhiri', 'mengapa',
91
+ 'mengatakan', 'mengatakannya', 'mengenai', 'mengerjakan', 'mengetahui',
92
+ 'menggunakan', 'menghendaki', 'mengibaratkan', 'mengibaratkannya',
93
+ 'mengingat', 'mengingatkan', 'menginginkan', 'mengira', 'mengucapkan',
94
+ 'mengucapkannya', 'mengungkapkan', 'menjadi', 'menjawab', 'menjelaskan',
95
+ 'menuju', 'menunjuk', 'menunjuki', 'menunjukkan', 'menunjuknya', 'menurut',
96
+ 'menuturkan', 'menyampaikan', 'menyangkut', 'menyatakan', 'menyebutkan',
97
+ 'menyeluruh', 'menyiapkan', 'merasa', 'mereka', 'merekalah', 'merupakan',
98
+ 'meski', 'meskipun', 'meyakini', 'meyakinkan', 'minta', 'mirip', 'misal',
99
+ 'misalkan', 'misalnya', 'mohon', 'mula', 'mulai', 'mulailah', 'mulanya',
100
+ 'mungkin', 'mungkinkah', 'n', 'nah', 'naik', 'namun', 'nanti', 'nantinya',
101
+ 'nya', 'nyaris', 'nyata', 'nyatanya', 'o', 'oleh', 'olehnya', 'orang', 'p',
102
+ 'pada', 'padahal', 'padanya', 'pak', 'paling', 'panjang', 'pantas', 'para',
103
+ 'pasti', 'pastilah', 'penting', 'pentingnya', 'per', 'percuma', 'perlu',
104
+ 'perlukah', 'perlunya', 'pernah', 'persoalan', 'pertama', 'pertama-tama',
105
+ 'pertanyaan', 'pertanyakan', 'pihak', 'pihaknya', 'pukul', 'pula', 'pun',
106
+ 'punya', 'q', 'r', 'rasa', 'rasanya', 'rupa', 'rupanya', 's', 'saat',
107
+ 'saatnya', 'saja', 'sajalah', 'salam', 'saling', 'sama', 'sama-sama',
108
+ 'sambil', 'sampai', 'sampai-sampai', 'sampaikan', 'sana', 'sangat',
109
+ 'sangatlah', 'sangkut', 'satu', 'saya', 'sayalah', 'se', 'sebab',
110
+ 'sebabnya', 'sebagai', 'sebagaimana', 'sebagainya', 'sebagian', 'sebaik',
111
+ 'sebaik-baiknya', 'sebaiknya', 'sebaliknya', 'sebanyak', 'sebegini',
112
+ 'sebegitu', 'sebelum', 'sebelumnya', 'sebenarnya', 'seberapa', 'sebesar',
113
+ 'sebetulnya', 'sebisanya', 'sebuah', 'sebut', 'sebutlah', 'sebutnya',
114
+ 'secara', 'secukupnya', 'sedang', 'sedangkan', 'sedemikian', 'sedikit',
115
+ 'sedikitnya', 'seenaknya', 'segala', 'segalanya', 'segera', 'seharusnya',
116
+ 'sehingga', 'seingat', 'sejak', 'sejauh', 'sejenak', 'sejumlah', 'sekadar',
117
+ 'sekadarnya', 'sekali', 'sekali-kali', 'sekalian', 'sekaligus',
118
+ 'sekalipun', 'sekarang', 'sekaranglah', 'sekecil', 'seketika', 'sekiranya',
119
+ 'sekitar', 'sekitarnya', 'sekurang-kurangnya', 'sekurangnya', 'sela',
120
+ 'selain', 'selaku', 'selalu', 'selama', 'selama-lamanya', 'selamanya',
121
+ 'selanjutnya', 'seluruh', 'seluruhnya', 'semacam', 'semakin', 'semampu',
122
+ 'semampunya', 'semasa', 'semasih', 'semata', 'semata-mata', 'semaunya',
123
+ 'sementara', 'semisal', 'semisalnya', 'sempat', 'semua', 'semuanya',
124
+ 'semula', 'sendiri', 'sendirian', 'sendirinya', 'seolah','seolah-olah',
125
+ 'seorang', 'sepanjang', 'sepantasnya', 'sepantasnyalah', 'seperlunya',
126
+ 'seperti', 'sepertinya', 'sepihak', 'sering', 'seringnya', 'serta',
127
+ 'serupa', 'sesaat', 'sesama', 'sesampai', 'sesegera', 'sesekali',
128
+ 'seseorang', 'sesuatu', 'sesuatunya', 'sesudah', 'sesudahnya', 'setelah',
129
+ 'setempat', 'setengah', 'seterusnya', 'setiap', 'setiba', 'setibanya',
130
+ 'setidak-tidaknya', 'setidaknya', 'setinggi', 'seusai', 'sewaktu', 'siap',
131
+ 'siapa', 'siapakah', 'siapapun', 'sini', 'sinilah', 'soal', 'soalnya',
132
+ 'suatu', 'sudah', 'sudahkah', 'sudahlah', 'supaya', 't', 'tadi', 'tadinya',
133
+ 'tahu', 'tak', 'tambah', 'tambahnya', 'tampak', 'tampaknya', 'tandas',
134
+ 'tandasnya', 'tanpa', 'tanya', 'tanyakan', 'tanyanya', 'tapi', 'tegas',
135
+ 'tegasnya', 'telah', 'tempat', 'tentang', 'tentu', 'tentulah', 'tentunya',
136
+ 'tepat', 'terakhir', 'terasa', 'terbanyak', 'terdahulu', 'terdapat',
137
+ 'terdiri', 'terhadap', 'terhadapnya', 'teringat', 'teringat-ingat',
138
+ 'terjadi', 'terjadilah', 'terjadinya', 'terkira', 'terlalu', 'terlebih',
139
+ 'terlihat', 'termasuk', 'ternyata', 'tersampaikan', 'tersebut',
140
+ 'tersebutlah', 'tertentu', 'tertuju', 'terus', 'terutama', 'tetap',
141
+ 'tetapi', 'tiap', 'tiba', 'tiba-tiba', 'tidak', 'tidakkah', 'tidaklah',
142
+ 'tiga', 'toh', 'tuju', 'tunjuk', 'turut', 'tutur', 'tuturnya', 'u', 'ucap',
143
+ 'ucapnya', 'ujar', 'ujarnya', 'umumnya', 'ungkap', 'ungkapnya', 'untuk',
144
+ 'usah', 'usai', 'v', 'w', 'waduh', 'wah', 'wahai', 'waktunya', 'walau',
145
+ 'walaupun', 'wong', 'x', 'y', 'ya', 'yaitu', 'yakin', 'yakni', 'yang', 'z'
146
+ ]
147
+
148
+ stop_words
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,3 @@
1
+ module Sastrawi
2
+ VERSION = "0.1.0.pre"
3
+ end
data/lib/sastrawi.rb ADDED
@@ -0,0 +1,12 @@
1
+ require 'sastrawi/version'
2
+
3
+ require 'sastrawi/stemmer/stemmer_factory'
4
+
5
+ module Sastrawi
6
+ def self.stem(sentence)
7
+ stemmer_factory = Sastrawi::Stemmer::StemmerFactory.new
8
+ stemmer = stemmer_factory.create_stemmer
9
+
10
+ stemmer.stem(sentence)
11
+ end
12
+ end
data/sastrawi.gemspec ADDED
@@ -0,0 +1,25 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'sastrawi/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = "sastrawi"
8
+ spec.version = Sastrawi::VERSION
9
+ spec.authors = ["Andrias Meisyal"]
10
+ spec.email = ["andriasonline@gmail.com"]
11
+
12
+ spec.summary = %q{Ruby bindings for Sastrawi}
13
+ spec.description = %q{A Ruby library which allows you to stem words in Bahasa Indonesia.}
14
+ spec.homepage = "https://github.com/meisyal/sastrawi-ruby"
15
+ spec.license = "MIT"
16
+
17
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
18
+ spec.bindir = "exe"
19
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
20
+ spec.require_paths = ["lib"]
21
+
22
+ spec.add_development_dependency "bundler", "~> 1.12"
23
+ spec.add_development_dependency "rake", "~> 10.0"
24
+ spec.add_development_dependency "rspec", "~> 3.0"
25
+ end
metadata ADDED
@@ -0,0 +1,173 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sastrawi
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0.pre
5
+ platform: ruby
6
+ authors:
7
+ - Andrias Meisyal
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2017-02-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: bundler
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.12'
20
+ type: :development
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.12'
27
+ - !ruby/object:Gem::Dependency
28
+ name: rake
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '10.0'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '10.0'
41
+ - !ruby/object:Gem::Dependency
42
+ name: rspec
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '3.0'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '3.0'
55
+ description: A Ruby library which allows you to stem words in Bahasa Indonesia.
56
+ email:
57
+ - andriasonline@gmail.com
58
+ executables: []
59
+ extensions: []
60
+ extra_rdoc_files: []
61
+ files:
62
+ - ".gitignore"
63
+ - ".travis.yml"
64
+ - Gemfile
65
+ - LICENSE.txt
66
+ - README.md
67
+ - Rakefile
68
+ - data/kata-dasar.txt
69
+ - lib/sastrawi.rb
70
+ - lib/sastrawi/dictionary/array_dictionary.rb
71
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb
72
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb
73
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb
74
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb
75
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb
76
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb
77
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb
78
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb
79
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb
80
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb
81
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb
82
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb
83
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb
84
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb
85
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb
86
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb
87
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb
88
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb
89
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb
90
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb
91
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb
92
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb
93
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb
94
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb
95
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb
96
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb
97
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb
98
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb
99
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb
100
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb
101
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb
102
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb
103
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb
104
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb
105
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb
106
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb
107
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb
108
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb
109
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb
110
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb
111
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb
112
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb
113
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb
114
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb
115
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb
116
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb
117
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb
118
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb
119
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb
120
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb
121
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb
122
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb
123
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb
124
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb
125
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb
126
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb
127
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb
128
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb
129
+ - lib/sastrawi/morphology/invalid_affix_pair_specification.rb
130
+ - lib/sastrawi/stemmer/cache/array_cache.rb
131
+ - lib/sastrawi/stemmer/cached_stemmer.rb
132
+ - lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb
133
+ - lib/sastrawi/stemmer/context/context.rb
134
+ - lib/sastrawi/stemmer/context/removal.rb
135
+ - lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb
136
+ - lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb
137
+ - lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb
138
+ - lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb
139
+ - lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb
140
+ - lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb
141
+ - lib/sastrawi/stemmer/context/visitor/visitor_provider.rb
142
+ - lib/sastrawi/stemmer/filter/text_normalizer.rb
143
+ - lib/sastrawi/stemmer/stemmer.rb
144
+ - lib/sastrawi/stemmer/stemmer_factory.rb
145
+ - lib/sastrawi/stop_word_remover/stop_word_remover.rb
146
+ - lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb
147
+ - lib/sastrawi/version.rb
148
+ - sastrawi.gemspec
149
+ homepage: https://github.com/meisyal/sastrawi-ruby
150
+ licenses:
151
+ - MIT
152
+ metadata: {}
153
+ post_install_message:
154
+ rdoc_options: []
155
+ require_paths:
156
+ - lib
157
+ required_ruby_version: !ruby/object:Gem::Requirement
158
+ requirements:
159
+ - - ">="
160
+ - !ruby/object:Gem::Version
161
+ version: '0'
162
+ required_rubygems_version: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - ">"
165
+ - !ruby/object:Gem::Version
166
+ version: 1.3.1
167
+ requirements: []
168
+ rubyforge_project:
169
+ rubygems_version: 2.5.1
170
+ signing_key:
171
+ specification_version: 4
172
+ summary: Ruby bindings for Sastrawi
173
+ test_files: []