sastrawi 0.1.0.pre
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +50 -0
- data/.travis.yml +8 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +70 -0
- data/Rakefile +6 -0
- data/data/kata-dasar.txt +29932 -0
- data/lib/sastrawi/dictionary/array_dictionary.rb +33 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +11 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +17 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +19 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +19 -0
- data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +24 -0
- data/lib/sastrawi/stemmer/cache/array_cache.rb +25 -0
- data/lib/sastrawi/stemmer/cached_stemmer.rb +33 -0
- data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +20 -0
- data/lib/sastrawi/stemmer/context/context.rb +170 -0
- data/lib/sastrawi/stemmer/context/removal.rb +17 -0
- data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +17 -0
- data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +46 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +28 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +26 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +26 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +26 -0
- data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +157 -0
- data/lib/sastrawi/stemmer/filter/text_normalizer.rb +15 -0
- data/lib/sastrawi/stemmer/stemmer.rb +85 -0
- data/lib/sastrawi/stemmer/stemmer_factory.rb +45 -0
- data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +24 -0
- data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +152 -0
- data/lib/sastrawi/version.rb +3 -0
- data/lib/sastrawi.rb +12 -0
- data/sastrawi.gemspec +25 -0
- metadata +173 -0
@@ -0,0 +1,24 @@
|
|
1
|
+
module Sastrawi
|
2
|
+
module StopWordRemover
|
3
|
+
class StopWordRemover
|
4
|
+
attr_accessor :dictionary
|
5
|
+
|
6
|
+
def initialize(dictionary)
|
7
|
+
@dictionary = dictionary
|
8
|
+
end
|
9
|
+
|
10
|
+
def remove(text)
|
11
|
+
words = text.split(' ')
|
12
|
+
stop_words = []
|
13
|
+
|
14
|
+
words.each do |word|
|
15
|
+
unless @dictionary.include?(word)
|
16
|
+
stop_words.push(word)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
stop_words.join(' ')
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,152 @@
|
|
1
|
+
require 'sastrawi/dictionary/array_dictionary'
|
2
|
+
require 'sastrawi/stop_word_remover/stop_word_remover'
|
3
|
+
|
4
|
+
module Sastrawi
|
5
|
+
module StopWordRemover
|
6
|
+
module StopWordRemoverFactory
|
7
|
+
def create_stop_word_remover
|
8
|
+
stop_words = get_stop_word
|
9
|
+
|
10
|
+
dictionary = Sastrawi::Dictionary::ArrayDictionary.new(stop_words)
|
11
|
+
stop_word_remover = Sastrawi::StopWordRemover::StopWordRemover.new(dictionary)
|
12
|
+
|
13
|
+
stop_word_remover
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_stop_word
|
17
|
+
stop_words = [
|
18
|
+
'a', 'ada', 'adalah', 'adanya', 'adapun', 'agak', 'agaknya', 'agar',
|
19
|
+
'akan', 'akankah', 'akhir', 'akhiri', 'akhirnya', 'aku', 'akulah', 'amat',
|
20
|
+
'amatlah', 'anda', 'andalah', 'antar', 'antara', 'antaranya', 'apa',
|
21
|
+
'apaan', 'apabila', 'apakah', 'apalagi', 'apatah', 'arti', 'artinya',
|
22
|
+
'asal', 'asalkan', 'atas', 'atau', 'ataukah', 'ataupun', 'awal', 'awalnya',
|
23
|
+
'b', 'bagai', 'bagaikan', 'bagaimana', 'bagaimanakah', 'bagaimanapun',
|
24
|
+
'bagainamakah', 'bagi', 'bagian', 'bahkan', 'bahwa', 'bahwasannya',
|
25
|
+
'bahwasanya', 'baik', 'baiklah', 'bakal', 'bakalan', 'balik', 'banyak',
|
26
|
+
'bapak', 'baru', 'bawah', 'beberapa', 'begini', 'beginian', 'beginikah',
|
27
|
+
'beginilah', 'begitu', 'begitukah', 'begitulah', 'begitupun', 'bekerja',
|
28
|
+
'belakang', 'belakangan', 'belum', 'belumlah', 'benar', 'benarkah',
|
29
|
+
'benarlah', 'berada', 'berakhir', 'berakhirlah', 'berakhirnya', 'berapa',
|
30
|
+
'berapakah', 'berapalah', 'berapapun', 'berarti', 'berawal', 'berbagai',
|
31
|
+
'berdatangan', 'beri', 'berikan', 'berikut', 'berikutnya', 'berjumlah',
|
32
|
+
'berkali-kali', 'berkata', 'berkehendak', 'berkeinginan', 'berkenaan',
|
33
|
+
'berlainan', 'berlalu', 'berlangsung', 'berlebihan', 'bermacam',
|
34
|
+
'bermacam-macam', 'bermaksud', 'bermula', 'bersama', 'bersama-sama',
|
35
|
+
'bersiap', 'bersiap-siap', 'bertanya', 'bertanya-tanya', 'berturut',
|
36
|
+
'berturut-turut', 'bertutur', 'berujar', 'berupa', 'besar', 'betul',
|
37
|
+
'betulkah', 'biasa', 'biasanya', 'bila', 'bilakah', 'bisa', 'bisakah',
|
38
|
+
'boleh', 'bolehkah', 'bolehlah', 'buat', 'bukan', 'bukankah', 'bukanlah',
|
39
|
+
'bukannya', 'bulan', 'bung', 'c', 'cara', 'caranya', 'cukup', 'cukupkah',
|
40
|
+
'cukuplah', 'cuma', 'd', 'dahulu', 'dalam', 'dan', 'dapat', 'dari',
|
41
|
+
'daripada', 'datang', 'dekat', 'demi', 'demikian', 'demikianlah', 'dengan',
|
42
|
+
'depan', 'di', 'dia', 'diakhiri', 'diakhirinya', 'dialah', 'diantara',
|
43
|
+
'diantaranya', 'diberi', 'diberikan', 'diberikannya', 'dibuat',
|
44
|
+
'dibuatnya', 'didapat', 'didatangkan', 'digunakan', 'diibaratkan',
|
45
|
+
'diibaratkannya', 'diingat', 'diingatkan', 'diinginkan', 'dijawab',
|
46
|
+
'dijelaskan', 'dijelaskannya', 'dikarenakan', 'dikatakan', 'dikatakannya',
|
47
|
+
'dikerjakan', 'diketahui', 'diketahuinya', 'dikira', 'dilakukan',
|
48
|
+
'dilalui', 'dilihat', 'dimaksud', 'dimaksudkan', 'dimaksudkannya',
|
49
|
+
'dimaksudnya', 'diminta', 'dimintai', 'dimisalkan', 'dimulai',
|
50
|
+
'dimulailah', 'dimulainya', 'dimungkinkan', 'dini', 'dipastikan',
|
51
|
+
'diperbuat', 'diperbuatnya', 'dipergunakan', 'diperkirakan',
|
52
|
+
'diperlihatkan', 'diperlukan', 'diperlukannya', 'dipersoalkan',
|
53
|
+
'dipertanyakan', 'dipunyai', 'diri', 'dirinya','disampaikan', 'disebut',
|
54
|
+
'disebutkan', 'disebutkannya', 'disini', 'disinilah', 'ditambahkan',
|
55
|
+
'ditandaskan', 'ditanya', 'ditanyai','ditanyakan', 'ditegaskan',
|
56
|
+
'ditujukan', 'ditunjuk', 'ditunjuki', 'ditunjukkan', 'ditunjukkannya',
|
57
|
+
'ditunjuknya', 'dituturkan', 'dituturkannya', 'diucapkan', 'diucapkannya',
|
58
|
+
'diungkapkan', 'dong', 'dua', 'dulu', 'e', 'empat', 'enak', 'enggak',
|
59
|
+
'enggaknya', 'entah', 'entahlah', 'f', 'g', 'guna', 'gunakan', 'h',
|
60
|
+
'hadap', 'hai', 'hal', 'halo', 'hallo', 'hampir', 'hanya', 'hanyalah',
|
61
|
+
'hari', 'harus', 'haruslah', 'harusnya', 'helo', 'hello', 'hendak',
|
62
|
+
'hendaklah', 'hendaknya', 'hingga', 'i', 'ia', 'ialah', 'ibarat',
|
63
|
+
'ibaratkan', 'ibaratnya', 'ibu', 'ikut', 'ingat', 'ingat-ingat', 'ingin',
|
64
|
+
'inginkah', 'inginkan', 'ini', 'inikah', 'inilah', 'itu', 'itukah',
|
65
|
+
'itulah', 'j', 'jadi', 'jadilah', 'jadinya', 'jangan', 'jangankan',
|
66
|
+
'janganlah', 'jauh', 'jawab', 'jawaban', 'jawabnya', 'jelas', 'jelaskan',
|
67
|
+
'jelaslah', 'jelasnya', 'jika', 'jikalau', 'juga', 'jumlah', 'jumlahnya',
|
68
|
+
'justru', 'k', 'kadar', 'kala', 'kalau', 'kalaulah', 'kalaupun', 'kali',
|
69
|
+
'kalian', 'kami', 'kamilah', 'kamu', 'kamulah', 'kan', 'kapan', 'kapankah',
|
70
|
+
'kapanpun', 'karena', 'karenanya', 'kasus', 'kata', 'katakan',
|
71
|
+
'katakanlah', 'katanya', 'ke', 'keadaan', 'kebetulan', 'kecil', 'kedua',
|
72
|
+
'keduanya', 'keinginan', 'kelamaan', 'kelihatan', 'kelihatannya', 'kelima',
|
73
|
+
'keluar', 'kembali', 'kemudian', 'kemungkinan', 'kemungkinannya', 'kena',
|
74
|
+
'kenapa', 'kepada', 'kepadanya', 'kerja', 'kesampaian', 'keseluruhan',
|
75
|
+
'keseluruhannya', 'keterlaluan', 'ketika', 'khusus', 'khususnya', 'kini',
|
76
|
+
'kinilah', 'kira', 'kira-kira', 'kiranya', 'kita', 'kitalah', 'kok',
|
77
|
+
'kurang', 'l', 'lagi', 'lagian', 'lah', 'lain', 'lainnya', 'laku', 'lalu',
|
78
|
+
'lama', 'lamanya', 'langsung', 'lanjut', 'lanjutnya', 'lebih', 'lewat',
|
79
|
+
'lihat', 'lima', 'luar', 'm', 'macam', 'maka', 'makanya', 'makin',
|
80
|
+
'maksud', 'malah', 'malahan', 'mampu', 'mampukah', 'mana', 'manakala',
|
81
|
+
'manalagi', 'masa', 'masalah', 'masalahnya', 'masih', 'masihkah', 'masing',
|
82
|
+
'masing-masing', 'masuk', 'mata', 'mau', 'maupun', 'melainkan',
|
83
|
+
'melakukan', 'melalui', 'melihat', 'melihatnya', 'memang', 'memastikan',
|
84
|
+
'memberi', 'memberikan', 'membuat', 'memerlukan', 'memihak', 'meminta',
|
85
|
+
'memintakan', 'memisalkan', 'memperbuat', 'mempergunakan', 'memperkirakan',
|
86
|
+
'memperlihatkan', 'mempersiapkan', 'mempersoalkan', 'mempertanyakan',
|
87
|
+
'mempunyai', 'memulai', 'memungkinkan', 'menaiki', 'menambahkan',
|
88
|
+
'menandaskan', 'menanti', 'menanti-nanti', 'menantikan', 'menanya',
|
89
|
+
'menanyai', 'menanyakan', 'mendapat', 'mendapatkan', 'mendatang',
|
90
|
+
'mendatangi', 'mendatangkan', 'menegaskan', 'mengakhiri', 'mengapa',
|
91
|
+
'mengatakan', 'mengatakannya', 'mengenai', 'mengerjakan', 'mengetahui',
|
92
|
+
'menggunakan', 'menghendaki', 'mengibaratkan', 'mengibaratkannya',
|
93
|
+
'mengingat', 'mengingatkan', 'menginginkan', 'mengira', 'mengucapkan',
|
94
|
+
'mengucapkannya', 'mengungkapkan', 'menjadi', 'menjawab', 'menjelaskan',
|
95
|
+
'menuju', 'menunjuk', 'menunjuki', 'menunjukkan', 'menunjuknya', 'menurut',
|
96
|
+
'menuturkan', 'menyampaikan', 'menyangkut', 'menyatakan', 'menyebutkan',
|
97
|
+
'menyeluruh', 'menyiapkan', 'merasa', 'mereka', 'merekalah', 'merupakan',
|
98
|
+
'meski', 'meskipun', 'meyakini', 'meyakinkan', 'minta', 'mirip', 'misal',
|
99
|
+
'misalkan', 'misalnya', 'mohon', 'mula', 'mulai', 'mulailah', 'mulanya',
|
100
|
+
'mungkin', 'mungkinkah', 'n', 'nah', 'naik', 'namun', 'nanti', 'nantinya',
|
101
|
+
'nya', 'nyaris', 'nyata', 'nyatanya', 'o', 'oleh', 'olehnya', 'orang', 'p',
|
102
|
+
'pada', 'padahal', 'padanya', 'pak', 'paling', 'panjang', 'pantas', 'para',
|
103
|
+
'pasti', 'pastilah', 'penting', 'pentingnya', 'per', 'percuma', 'perlu',
|
104
|
+
'perlukah', 'perlunya', 'pernah', 'persoalan', 'pertama', 'pertama-tama',
|
105
|
+
'pertanyaan', 'pertanyakan', 'pihak', 'pihaknya', 'pukul', 'pula', 'pun',
|
106
|
+
'punya', 'q', 'r', 'rasa', 'rasanya', 'rupa', 'rupanya', 's', 'saat',
|
107
|
+
'saatnya', 'saja', 'sajalah', 'salam', 'saling', 'sama', 'sama-sama',
|
108
|
+
'sambil', 'sampai', 'sampai-sampai', 'sampaikan', 'sana', 'sangat',
|
109
|
+
'sangatlah', 'sangkut', 'satu', 'saya', 'sayalah', 'se', 'sebab',
|
110
|
+
'sebabnya', 'sebagai', 'sebagaimana', 'sebagainya', 'sebagian', 'sebaik',
|
111
|
+
'sebaik-baiknya', 'sebaiknya', 'sebaliknya', 'sebanyak', 'sebegini',
|
112
|
+
'sebegitu', 'sebelum', 'sebelumnya', 'sebenarnya', 'seberapa', 'sebesar',
|
113
|
+
'sebetulnya', 'sebisanya', 'sebuah', 'sebut', 'sebutlah', 'sebutnya',
|
114
|
+
'secara', 'secukupnya', 'sedang', 'sedangkan', 'sedemikian', 'sedikit',
|
115
|
+
'sedikitnya', 'seenaknya', 'segala', 'segalanya', 'segera', 'seharusnya',
|
116
|
+
'sehingga', 'seingat', 'sejak', 'sejauh', 'sejenak', 'sejumlah', 'sekadar',
|
117
|
+
'sekadarnya', 'sekali', 'sekali-kali', 'sekalian', 'sekaligus',
|
118
|
+
'sekalipun', 'sekarang', 'sekaranglah', 'sekecil', 'seketika', 'sekiranya',
|
119
|
+
'sekitar', 'sekitarnya', 'sekurang-kurangnya', 'sekurangnya', 'sela',
|
120
|
+
'selain', 'selaku', 'selalu', 'selama', 'selama-lamanya', 'selamanya',
|
121
|
+
'selanjutnya', 'seluruh', 'seluruhnya', 'semacam', 'semakin', 'semampu',
|
122
|
+
'semampunya', 'semasa', 'semasih', 'semata', 'semata-mata', 'semaunya',
|
123
|
+
'sementara', 'semisal', 'semisalnya', 'sempat', 'semua', 'semuanya',
|
124
|
+
'semula', 'sendiri', 'sendirian', 'sendirinya', 'seolah','seolah-olah',
|
125
|
+
'seorang', 'sepanjang', 'sepantasnya', 'sepantasnyalah', 'seperlunya',
|
126
|
+
'seperti', 'sepertinya', 'sepihak', 'sering', 'seringnya', 'serta',
|
127
|
+
'serupa', 'sesaat', 'sesama', 'sesampai', 'sesegera', 'sesekali',
|
128
|
+
'seseorang', 'sesuatu', 'sesuatunya', 'sesudah', 'sesudahnya', 'setelah',
|
129
|
+
'setempat', 'setengah', 'seterusnya', 'setiap', 'setiba', 'setibanya',
|
130
|
+
'setidak-tidaknya', 'setidaknya', 'setinggi', 'seusai', 'sewaktu', 'siap',
|
131
|
+
'siapa', 'siapakah', 'siapapun', 'sini', 'sinilah', 'soal', 'soalnya',
|
132
|
+
'suatu', 'sudah', 'sudahkah', 'sudahlah', 'supaya', 't', 'tadi', 'tadinya',
|
133
|
+
'tahu', 'tak', 'tambah', 'tambahnya', 'tampak', 'tampaknya', 'tandas',
|
134
|
+
'tandasnya', 'tanpa', 'tanya', 'tanyakan', 'tanyanya', 'tapi', 'tegas',
|
135
|
+
'tegasnya', 'telah', 'tempat', 'tentang', 'tentu', 'tentulah', 'tentunya',
|
136
|
+
'tepat', 'terakhir', 'terasa', 'terbanyak', 'terdahulu', 'terdapat',
|
137
|
+
'terdiri', 'terhadap', 'terhadapnya', 'teringat', 'teringat-ingat',
|
138
|
+
'terjadi', 'terjadilah', 'terjadinya', 'terkira', 'terlalu', 'terlebih',
|
139
|
+
'terlihat', 'termasuk', 'ternyata', 'tersampaikan', 'tersebut',
|
140
|
+
'tersebutlah', 'tertentu', 'tertuju', 'terus', 'terutama', 'tetap',
|
141
|
+
'tetapi', 'tiap', 'tiba', 'tiba-tiba', 'tidak', 'tidakkah', 'tidaklah',
|
142
|
+
'tiga', 'toh', 'tuju', 'tunjuk', 'turut', 'tutur', 'tuturnya', 'u', 'ucap',
|
143
|
+
'ucapnya', 'ujar', 'ujarnya', 'umumnya', 'ungkap', 'ungkapnya', 'untuk',
|
144
|
+
'usah', 'usai', 'v', 'w', 'waduh', 'wah', 'wahai', 'waktunya', 'walau',
|
145
|
+
'walaupun', 'wong', 'x', 'y', 'ya', 'yaitu', 'yakin', 'yakni', 'yang', 'z'
|
146
|
+
]
|
147
|
+
|
148
|
+
stop_words
|
149
|
+
end
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
data/lib/sastrawi.rb
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
require 'sastrawi/version'
|
2
|
+
|
3
|
+
require 'sastrawi/stemmer/stemmer_factory'
|
4
|
+
|
5
|
+
module Sastrawi
|
6
|
+
def self.stem(sentence)
|
7
|
+
stemmer_factory = Sastrawi::Stemmer::StemmerFactory.new
|
8
|
+
stemmer = stemmer_factory.create_stemmer
|
9
|
+
|
10
|
+
stemmer.stem(sentence)
|
11
|
+
end
|
12
|
+
end
|
data/sastrawi.gemspec
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
lib = File.expand_path('../lib', __FILE__)
|
3
|
+
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
|
+
require 'sastrawi/version'
|
5
|
+
|
6
|
+
Gem::Specification.new do |spec|
|
7
|
+
spec.name = "sastrawi"
|
8
|
+
spec.version = Sastrawi::VERSION
|
9
|
+
spec.authors = ["Andrias Meisyal"]
|
10
|
+
spec.email = ["andriasonline@gmail.com"]
|
11
|
+
|
12
|
+
spec.summary = %q{Ruby bindings for Sastrawi}
|
13
|
+
spec.description = %q{A Ruby library which allows you to stem words in Bahasa Indonesia.}
|
14
|
+
spec.homepage = "https://github.com/meisyal/sastrawi-ruby"
|
15
|
+
spec.license = "MIT"
|
16
|
+
|
17
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
18
|
+
spec.bindir = "exe"
|
19
|
+
spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
|
20
|
+
spec.require_paths = ["lib"]
|
21
|
+
|
22
|
+
spec.add_development_dependency "bundler", "~> 1.12"
|
23
|
+
spec.add_development_dependency "rake", "~> 10.0"
|
24
|
+
spec.add_development_dependency "rspec", "~> 3.0"
|
25
|
+
end
|
metadata
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sastrawi
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0.pre
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Andrias Meisyal
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2017-02-13 00:00:00.000000000 Z
|
12
|
+
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: bundler
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.12'
|
20
|
+
type: :development
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.12'
|
27
|
+
- !ruby/object:Gem::Dependency
|
28
|
+
name: rake
|
29
|
+
requirement: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - "~>"
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: '10.0'
|
34
|
+
type: :development
|
35
|
+
prerelease: false
|
36
|
+
version_requirements: !ruby/object:Gem::Requirement
|
37
|
+
requirements:
|
38
|
+
- - "~>"
|
39
|
+
- !ruby/object:Gem::Version
|
40
|
+
version: '10.0'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: rspec
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: '3.0'
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: '3.0'
|
55
|
+
description: A Ruby library which allows you to stem words in Bahasa Indonesia.
|
56
|
+
email:
|
57
|
+
- andriasonline@gmail.com
|
58
|
+
executables: []
|
59
|
+
extensions: []
|
60
|
+
extra_rdoc_files: []
|
61
|
+
files:
|
62
|
+
- ".gitignore"
|
63
|
+
- ".travis.yml"
|
64
|
+
- Gemfile
|
65
|
+
- LICENSE.txt
|
66
|
+
- README.md
|
67
|
+
- Rakefile
|
68
|
+
- data/kata-dasar.txt
|
69
|
+
- lib/sastrawi.rb
|
70
|
+
- lib/sastrawi/dictionary/array_dictionary.rb
|
71
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb
|
72
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb
|
73
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb
|
74
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb
|
75
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb
|
76
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb
|
77
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb
|
78
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb
|
79
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb
|
80
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb
|
81
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb
|
82
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb
|
83
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb
|
84
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb
|
85
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb
|
86
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb
|
87
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb
|
88
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb
|
89
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb
|
90
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb
|
91
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb
|
92
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb
|
93
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb
|
94
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb
|
95
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb
|
96
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb
|
97
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb
|
98
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb
|
99
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb
|
100
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb
|
101
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb
|
102
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb
|
103
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb
|
104
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb
|
105
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb
|
106
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb
|
107
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb
|
108
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb
|
109
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb
|
110
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb
|
111
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb
|
112
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb
|
113
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb
|
114
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb
|
115
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb
|
116
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb
|
117
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb
|
118
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb
|
119
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb
|
120
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb
|
121
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb
|
122
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb
|
123
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb
|
124
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb
|
125
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb
|
126
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb
|
127
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb
|
128
|
+
- lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb
|
129
|
+
- lib/sastrawi/morphology/invalid_affix_pair_specification.rb
|
130
|
+
- lib/sastrawi/stemmer/cache/array_cache.rb
|
131
|
+
- lib/sastrawi/stemmer/cached_stemmer.rb
|
132
|
+
- lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb
|
133
|
+
- lib/sastrawi/stemmer/context/context.rb
|
134
|
+
- lib/sastrawi/stemmer/context/removal.rb
|
135
|
+
- lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb
|
136
|
+
- lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb
|
137
|
+
- lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb
|
138
|
+
- lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb
|
139
|
+
- lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb
|
140
|
+
- lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb
|
141
|
+
- lib/sastrawi/stemmer/context/visitor/visitor_provider.rb
|
142
|
+
- lib/sastrawi/stemmer/filter/text_normalizer.rb
|
143
|
+
- lib/sastrawi/stemmer/stemmer.rb
|
144
|
+
- lib/sastrawi/stemmer/stemmer_factory.rb
|
145
|
+
- lib/sastrawi/stop_word_remover/stop_word_remover.rb
|
146
|
+
- lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb
|
147
|
+
- lib/sastrawi/version.rb
|
148
|
+
- sastrawi.gemspec
|
149
|
+
homepage: https://github.com/meisyal/sastrawi-ruby
|
150
|
+
licenses:
|
151
|
+
- MIT
|
152
|
+
metadata: {}
|
153
|
+
post_install_message:
|
154
|
+
rdoc_options: []
|
155
|
+
require_paths:
|
156
|
+
- lib
|
157
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
158
|
+
requirements:
|
159
|
+
- - ">="
|
160
|
+
- !ruby/object:Gem::Version
|
161
|
+
version: '0'
|
162
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - ">"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 1.3.1
|
167
|
+
requirements: []
|
168
|
+
rubyforge_project:
|
169
|
+
rubygems_version: 2.5.1
|
170
|
+
signing_key:
|
171
|
+
specification_version: 4
|
172
|
+
summary: Ruby bindings for Sastrawi
|
173
|
+
test_files: []
|