sastrawi-ruby 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (93) hide show
  1. checksums.yaml +7 -0
  2. data/.github/workflows/ci.yml +23 -0
  3. data/.gitignore +51 -0
  4. data/.travis.yml +10 -0
  5. data/CONTRIBUTING.md +22 -0
  6. data/Gemfile +4 -0
  7. data/LICENSE.txt +21 -0
  8. data/README.md +104 -0
  9. data/Rakefile +6 -0
  10. data/_config.yml +1 -0
  11. data/bin/sastrawi +24 -0
  12. data/data/base-word.txt +29933 -0
  13. data/lib/sastrawi/dictionary/array_dictionary.rb +67 -0
  14. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +17 -0
  15. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +17 -0
  16. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +17 -0
  17. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +17 -0
  18. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +17 -0
  19. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +17 -0
  20. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +17 -0
  21. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +17 -0
  22. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +17 -0
  23. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +17 -0
  24. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +17 -0
  25. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +17 -0
  26. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +17 -0
  27. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +17 -0
  28. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +17 -0
  29. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +17 -0
  30. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +17 -0
  31. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +17 -0
  32. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +19 -0
  33. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +17 -0
  34. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +17 -0
  35. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +17 -0
  36. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +19 -0
  37. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +19 -0
  38. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +17 -0
  39. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +17 -0
  40. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +17 -0
  41. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +17 -0
  42. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +17 -0
  43. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +17 -0
  44. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +17 -0
  45. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +19 -0
  46. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +17 -0
  47. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +17 -0
  48. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +17 -0
  49. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +17 -0
  50. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +17 -0
  51. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +19 -0
  52. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +19 -0
  53. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +17 -0
  54. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +17 -0
  55. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +17 -0
  56. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +17 -0
  57. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +17 -0
  58. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +17 -0
  59. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +17 -0
  60. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +17 -0
  61. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +11 -0
  62. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +17 -0
  63. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +17 -0
  64. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +17 -0
  65. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +17 -0
  66. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +17 -0
  67. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +17 -0
  68. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +17 -0
  69. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +19 -0
  70. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +19 -0
  71. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +19 -0
  72. data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +28 -0
  73. data/lib/sastrawi/stemmer/cache/array_cache.rb +25 -0
  74. data/lib/sastrawi/stemmer/cached_stemmer.rb +33 -0
  75. data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +25 -0
  76. data/lib/sastrawi/stemmer/context/context.rb +217 -0
  77. data/lib/sastrawi/stemmer/context/removal.rb +17 -0
  78. data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +17 -0
  79. data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +54 -0
  80. data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +37 -0
  81. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +34 -0
  82. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +34 -0
  83. data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +34 -0
  84. data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +157 -0
  85. data/lib/sastrawi/stemmer/filter/text_normalizer.rb +15 -0
  86. data/lib/sastrawi/stemmer/stemmer.rb +101 -0
  87. data/lib/sastrawi/stemmer/stemmer_factory.rb +49 -0
  88. data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +27 -0
  89. data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +124 -0
  90. data/lib/sastrawi/version.rb +5 -0
  91. data/lib/sastrawi.rb +4 -0
  92. data/sastrawi.gemspec +34 -0
  93. metadata +179 -0
@@ -0,0 +1,101 @@
1
+ require 'sastrawi/stemmer/context/context'
2
+
3
+ require 'sastrawi/stemmer/context/visitor/visitor_provider'
4
+
5
+ require 'sastrawi/stemmer/filter/text_normalizer'
6
+
7
+ ##
8
+ # Indonesian Stemmer
9
+ # Nazief & Adriani, CS Stemmer, ECS Stemmer, Improved ECS
10
+
11
+ module Sastrawi
12
+ module Stemmer
13
+ class Stemmer
14
+ attr_reader :dictionary, :visitor_provider
15
+
16
+ def initialize(dictionary)
17
+ @dictionary = dictionary
18
+ @visitor_provider = Sastrawi::Stemmer::Context::Visitor::VisitorProvider.new
19
+ end
20
+
21
+ ##
22
+ # Stem a string to its base form
23
+
24
+ def stem(text)
25
+ normalized_text = Sastrawi::Stemmer::Filter::TextNormalizer.normalize_text(text)
26
+
27
+ words = normalized_text.split(' ')
28
+ stems = []
29
+
30
+ words.each do |word|
31
+ stems.push(stem_word(word))
32
+ end
33
+
34
+ stems.join(' ')
35
+ end
36
+
37
+ ##
38
+ # Stem a word to its base form
39
+
40
+ def stem_word(word)
41
+ if plural?(word)
42
+ stem_plural_word(word)
43
+ else
44
+ stem_singular_word(word)
45
+ end
46
+ end
47
+
48
+ def plural?(word)
49
+ matches = /^(.*)-(ku|mu|nya|lah|kah|tah|pun)$/.match(word)
50
+
51
+ return matches[1].include?('-') if matches
52
+
53
+ return word.include?('-')
54
+ end
55
+
56
+ ##
57
+ # Stem a plural word to its base form
58
+ # Asian J. (2007) "Effective Techniques for Indonesian Text Retrieval"
59
+ # page 76-77
60
+
61
+ def stem_plural_word(word)
62
+ first_match = /^(.*)-(.*)$/.match(word)
63
+
64
+ return word unless first_match
65
+
66
+ words = [first_match[1], first_match[2]]
67
+ suffix = words[1]
68
+ suffixes = %w[ku mu nya lah kah tah pun]
69
+ second_match = /^(.*)-(.*)$/.match(words[0])
70
+
71
+ if suffixes.include?(suffix) && second_match
72
+ words[0] = second_match[1]
73
+ words[1] = "#{second_match[2]}-#{suffix}"
74
+ end
75
+
76
+ root_first_word = stem_singular_word(words[0])
77
+ root_second_word = stem_singular_word(words[1])
78
+
79
+ if !@dictionary.contains?(words[1]) && root_second_word == words[1]
80
+ root_second_word = stem_singular_word("me#{words[1]}")
81
+ end
82
+
83
+ if root_first_word == root_second_word
84
+ root_first_word
85
+ else
86
+ word
87
+ end
88
+ end
89
+
90
+ ##
91
+ # Stem a singular word to its base form
92
+
93
+ def stem_singular_word(word)
94
+ context = Sastrawi::Stemmer::Context::Context.new(word, @dictionary, @visitor_provider)
95
+ context.execute
96
+
97
+ context.result
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,49 @@
1
+ require 'sastrawi/dictionary/array_dictionary'
2
+
3
+ require 'sastrawi/stemmer/cached_stemmer'
4
+ require 'sastrawi/stemmer/stemmer'
5
+
6
+ require 'sastrawi/stemmer/cache/array_cache'
7
+
8
+ ##
9
+ # Stemmer factory helps creating a pre-configured stemmer
10
+
11
+ module Sastrawi
12
+ module Stemmer
13
+ class StemmerFactory
14
+ def create_stemmer(is_dev = false)
15
+ stemmer = Sastrawi::Stemmer::Stemmer.new(create_default_dictionary(is_dev))
16
+
17
+ cache_result = Sastrawi::Stemmer::Cache::ArrayCache.new
18
+ cached_stemmer = Sastrawi::Stemmer::CachedStemmer.new(cache_result, stemmer)
19
+
20
+ cached_stemmer
21
+ end
22
+
23
+ def create_default_dictionary(is_dev = false)
24
+ words = get_words(is_dev)
25
+ dictionary = Sastrawi::Dictionary::ArrayDictionary.new(words)
26
+
27
+ dictionary
28
+ end
29
+
30
+ def get_words(is_dev = false)
31
+ get_words_from_file
32
+ end
33
+
34
+ def get_words_from_file
35
+ root_directory = File.expand_path('../../../..', __FILE__)
36
+ dictionary_file_path = File.join(root_directory, 'data/base-word.txt')
37
+
38
+ dictionary_content = []
39
+ File.open(dictionary_file_path, 'r') do |file|
40
+ file.each do |line|
41
+ dictionary_content.push(line.chomp)
42
+ end
43
+ end
44
+
45
+ dictionary_content
46
+ end
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,27 @@
1
+ module Sastrawi
2
+ module StopWordRemover
3
+ class StopWordRemover
4
+ attr_reader :dictionary
5
+
6
+ def initialize(dictionary)
7
+ @dictionary = dictionary
8
+ end
9
+
10
+ ##
11
+ # Remove stop words
12
+
13
+ def remove(text)
14
+ words = text.split(' ')
15
+ stop_words = []
16
+
17
+ words.each do |word|
18
+ unless @dictionary.contains?(word)
19
+ stop_words.push(word)
20
+ end
21
+ end
22
+
23
+ stop_words.join(' ')
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,124 @@
1
+ require 'sastrawi/dictionary/array_dictionary'
2
+
3
+ require 'sastrawi/stop_word_remover/stop_word_remover'
4
+
5
+ module Sastrawi
6
+ module StopWordRemover
7
+ class StopWordRemoverFactory
8
+ def create_stop_word_remover
9
+ stop_words = get_stop_word
10
+
11
+ dictionary = Sastrawi::Dictionary::ArrayDictionary.new(stop_words)
12
+ stop_word_remover = Sastrawi::StopWordRemover::StopWordRemover.new(dictionary)
13
+
14
+ stop_word_remover
15
+ end
16
+
17
+ def get_stop_word
18
+ stop_words = %w[a ada adalah adanya adapun agak agaknya agar akan
19
+ akankah akhir akhiri akhirnya aku akulah amat amatlah anda andalah
20
+ antar antara antaranya apa apaan apabila apakah apalagi apatah arti
21
+ artinya asal asalkan atas atau ataukah ataupun awal awalnya b bagai
22
+ bagaikan bagaimana bagaimanakah bagaimanapun bagainamakah bagi bagian
23
+ bahkan bahwa bahwasannya bahwasanya baik baiklah bakal bakalan balik
24
+ banyak bapak baru bawah beberapa begini beginian beginikah beginilah
25
+ begitu begitukah begitulah begitupun bekerja belakang belakangan
26
+ belum belumlah benar benarkah benarlah berada berakhir berakhirlah
27
+ berakhirnya berapa berapakah berapalah berapapun berarti berawal
28
+ berbagai berdatangan beri berikan berikut berikutnya berjumlah
29
+ berkali-kali berkata berkehendak berkeinginan berkenaan berlainan
30
+ berlalu berlangsung berlebihan bermacam bermacam-macam bermaksud
31
+ bermula bersama bersama-sama bersiap bersiap-siap bertanya
32
+ bertanya-tanya berturut berturut-turut bertutur berujar berupa besar
33
+ betul betulkah biasa biasanya bila bilakah bisa bisakah boleh bolehkah
34
+ bolehlah buat bukan bukankah bukanlah bukannya bulan bung c cara
35
+ caranya cukup cukupkah cukuplah cuma d dahulu dalam dan dapat dari
36
+ daripada datang dekat demi demikian demikianlah dengan depan di dia
37
+ diakhiri diakhirinya dialah diantara diantaranya diberi diberikan
38
+ diberikannya dibuat dibuatnya didapat didatangkan digunakan
39
+ diibaratkan diibaratkannya diingat diingatkan diinginkan dijawab
40
+ dijelaskan dijelaskannya dikarenakan dikatakan dikatakannya dikerjakan
41
+ diketahui diketahuinya dikira dilakukan dilalui dilihat dimaksud
42
+ dimaksudkan dimaksudkannya dimaksudnya diminta dimintai dimisalkan
43
+ dimulai dimulailah dimulainya dimungkinkan dini dipastikan diperbuat
44
+ diperbuatnya dipergunakan diperkirakan diperlihatkan diperlukan
45
+ diperlukannya dipersoalkan dipertanyakan dipunyai diri dirinya
46
+ disampaikan disebut disebutkan disebutkannya disini disinilah
47
+ ditambahkan ditandaskan ditanya ditanyai ditanyakan ditegaskan
48
+ ditujukan ditunjuk ditunjuki ditunjukkan ditunjukkannya ditunjuknya
49
+ dituturkan dituturkannya diucapkan diucapkannya diungkapkan dong dua
50
+ dulu e empat enak enggak enggaknya entah entahlah f g guna gunakan h
51
+ hadap hai hal halo hallo hampir hanya hanyalah hari harus haruslah
52
+ harusnya helo hello hendak hendaklah hendaknya hingga i ia ialah
53
+ ibarat ibaratkan ibaratnya ibu ikut ingat ingat-ingat ingin inginkah
54
+ inginkan ini inikah inilah itu itukah itulah j jadi jadilah jadinya
55
+ jangan jangankan janganlah jauh jawab jawaban jawabnya jelas jelaskan
56
+ jelaslah jelasnya jika jikalau juga jumlah jumlahnya justru k kadar
57
+ kala kalau kalaulah kalaupun kali kalian kami kamilah kamu kamulah kan
58
+ kapan kapankah kapanpun karena karenanya kasus kata katakan katakanlah
59
+ katanya ke keadaan kebetulan kecil kedua keduanya keinginan kelamaan
60
+ kelihatan kelihatannya kelima keluar kembali kemudian kemungkinan
61
+ kemungkinannya kena kenapa kepada kepadanya kerja kesampaian
62
+ keseluruhan keseluruhannya keterlaluan ketika khusus khususnya kini
63
+ kinilah kira kira-kira kiranya kita kitalah kok kurang l lagi lagian
64
+ lah lain lainnya laku lalu lama lamanya langsung lanjut lanjutnya
65
+ lebih lewat lihat lima luar m macam maka makanya makin maksud malah
66
+ malahan mampu mampukah mana manakala manalagi masa masalah masalahnya
67
+ masih masihkah masing masing-masing masuk mata mau maupun melainkan
68
+ melakukan melalui melihat melihatnya memang memastikan memberi
69
+ memberikan membuat memerlukan memihak meminta memintakan memisalkan
70
+ memperbuat mempergunakan memperkirakan memperlihatkan mempersiapkan
71
+ mempersoalkan mempertanyakan mempunyai memulai memungkinkan menaiki
72
+ menambahkan menandaskan menanti menanti-nanti menantikan menanya
73
+ menanyai menanyakan mendapat mendapatkan mendatang mendatangi
74
+ mendatangkan menegaskan mengakhiri mengapa mengatakan mengatakannya
75
+ mengenai mengerjakan mengetahui menggunakan menghendaki mengibaratkan
76
+ mengibaratkannya mengingat mengingatkan menginginkan mengira
77
+ mengucapkan mengucapkannya mengungkapkan menjadi menjawab menjelaskan
78
+ menuju menunjuk menunjuki menunjukkan menunjuknya menurut menuturkan
79
+ menyampaikan menyangkut menyatakan menyebutkan menyeluruh menyiapkan
80
+ merasa mereka merekalah merupakan meski meskipun meyakini meyakinkan
81
+ minta mirip misal misalkan misalnya mohon mula mulai mulailah mulanya
82
+ mungkin mungkinkah n nah naik namun nanti nantinya nya nyaris nyata
83
+ nyatanya o oleh olehnya orang p pada padahal padanya pak paling
84
+ panjang pantas para pasti pastilah penting pentingnya per percuma
85
+ perlu perlukah perlunya pernah persoalan pertama pertama-tama
86
+ pertanyaan pertanyakan pihak pihaknya pukul pula pun punya q r rasa
87
+ rasanya rupa rupanya s saat saatnya saja sajalah salam saling sama
88
+ sama-sama sambil sampai sampai-sampai sampaikan sana sangat sangatlah
89
+ sangkut satu saya sayalah se sebab sebabnya sebagai sebagaimana
90
+ sebagainya sebagian sebaik sebaik-baiknya sebaiknya sebaliknya
91
+ sebanyak sebegini sebegitu sebelum sebelumnya sebenarnya seberapa
92
+ sebesar sebetulnya sebisanya sebuah sebut sebutlah sebutnya secara
93
+ secukupnya sedang sedangkan sedemikian sedikit sedikitnya seenaknya
94
+ segala segalanya segera seharusnya sehingga seingat sejak sejauh
95
+ sejenak sejumlah sekadar sekadarnya sekali sekali-kali sekalian
96
+ sekaligus sekalipun sekarang sekaranglah sekecil seketika sekiranya
97
+ sekitar sekitarnya sekurang-kurangnya sekurangnya sela selain selaku
98
+ selalu selama selama-lamanya selamanya selanjutnya seluruh seluruhnya
99
+ semacam semakin semampu semampunya semasa semasih semata semata-mata
100
+ semaunya sementara semisal semisalnya sempat semua semuanya semula
101
+ sendiri sendirian sendirinya seolah seolah-olah seorang sepanjang
102
+ sepantasnya sepantasnyalah seperlunya seperti sepertinya sepihak
103
+ sering seringnya serta serupa sesaat sesama sesampai sesegera sesekali
104
+ seseorang sesuatu sesuatunya sesudah sesudahnya setelah setempat
105
+ setengah seterusnya setiap setiba setibanya setidak-tidaknya
106
+ setidaknya setinggi seusai sewaktu siap siapa siapakah siapapun sini
107
+ sinilah soal soalnya suatu sudah sudahkah sudahlah supaya t tadi
108
+ tadinya tahu tak tambah tambahnya tampak tampaknya tandas tandasnya
109
+ tanpa tanya tanyakan tanyanya tapi tegas tegasnya telah tempat tentang
110
+ tentu tentulah tentunya tepat terakhir terasa terbanyak terdahulu
111
+ terdapat terdiri terhadap terhadapnya teringat teringat-ingat terjadi
112
+ terjadilah terjadinya terkira terlalu terlebih terlihat termasuk
113
+ ternyata tersampaikan tersebut tersebutlah tertentu tertuju terus
114
+ terutama tetap tetapi tiap tiba tiba-tiba tidak tidakkah tidaklah tiga
115
+ toh tuju tunjuk turut tutur tuturnya u ucap ucapnya ujar ujarnya
116
+ umumnya ungkap ungkapnya untuk usah usai v w waduh wah wahai waktunya
117
+ walau walaupun wong x y ya yaitu yakin yakni yang z
118
+ ]
119
+
120
+ stop_words
121
+ end
122
+ end
123
+ end
124
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Sastrawi
4
+ VERSION = "0.2.0"
5
+ end
data/lib/sastrawi.rb ADDED
@@ -0,0 +1,4 @@
1
+ require 'sastrawi/version'
2
+
3
+ require 'sastrawi/stemmer/stemmer_factory'
4
+ require 'sastrawi/stop_word_remover/stop_word_remover_factory'
data/sastrawi.gemspec ADDED
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "lib/sastrawi/version"
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = "sastrawi-ruby"
7
+ spec.version = Sastrawi::VERSION
8
+ spec.required_ruby_version = ">= 3.0.0"
9
+ spec.authors = ["Johannes Dwi Cahyo"]
10
+ spec.email = ["csk.rage@gmail.com"]
11
+
12
+ spec.summary = "Indonesian language stemmer for Ruby"
13
+ spec.description = "A maintained fork of the sastrawi gem. Stems words in Bahasa Indonesia " \
14
+ "using the Nazief & Adriani algorithm with Enhanced Confix Stripping. " \
15
+ "Based on the original work by Andrias Meisyal (sastrawi gem) and the " \
16
+ "PHP Sastrawi project (github.com/sastrawi/sastrawi)."
17
+ spec.homepage = "https://github.com/johannesdwicahyo/sastrawi-ruby"
18
+ spec.license = "MIT"
19
+
20
+ spec.metadata = {
21
+ "source_code_uri" => "https://github.com/johannesdwicahyo/sastrawi-ruby",
22
+ "changelog_uri" => "https://github.com/johannesdwicahyo/sastrawi-ruby/blob/master/README.md",
23
+ "upstream_uri" => "https://github.com/meisyal/sastrawi-ruby"
24
+ }
25
+
26
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
27
+ spec.bindir = "bin"
28
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
29
+ spec.require_paths = ["lib"]
30
+
31
+ spec.add_development_dependency "bundler", ">= 2.0"
32
+ spec.add_development_dependency "rake", "~> 13.0"
33
+ spec.add_development_dependency "rspec", "~> 3.10"
34
+ end
metadata ADDED
@@ -0,0 +1,179 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sastrawi-ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.0
5
+ platform: ruby
6
+ authors:
7
+ - Johannes Dwi Cahyo
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies:
12
+ - !ruby/object:Gem::Dependency
13
+ name: bundler
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '2.0'
19
+ type: :development
20
+ prerelease: false
21
+ version_requirements: !ruby/object:Gem::Requirement
22
+ requirements:
23
+ - - ">="
24
+ - !ruby/object:Gem::Version
25
+ version: '2.0'
26
+ - !ruby/object:Gem::Dependency
27
+ name: rake
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - "~>"
31
+ - !ruby/object:Gem::Version
32
+ version: '13.0'
33
+ type: :development
34
+ prerelease: false
35
+ version_requirements: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - "~>"
38
+ - !ruby/object:Gem::Version
39
+ version: '13.0'
40
+ - !ruby/object:Gem::Dependency
41
+ name: rspec
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - "~>"
45
+ - !ruby/object:Gem::Version
46
+ version: '3.10'
47
+ type: :development
48
+ prerelease: false
49
+ version_requirements: !ruby/object:Gem::Requirement
50
+ requirements:
51
+ - - "~>"
52
+ - !ruby/object:Gem::Version
53
+ version: '3.10'
54
+ description: A maintained fork of the sastrawi gem. Stems words in Bahasa Indonesia
55
+ using the Nazief & Adriani algorithm with Enhanced Confix Stripping. Based on the
56
+ original work by Andrias Meisyal (sastrawi gem) and the PHP Sastrawi project (github.com/sastrawi/sastrawi).
57
+ email:
58
+ - csk.rage@gmail.com
59
+ executables:
60
+ - sastrawi
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - ".github/workflows/ci.yml"
65
+ - ".gitignore"
66
+ - ".travis.yml"
67
+ - CONTRIBUTING.md
68
+ - Gemfile
69
+ - LICENSE.txt
70
+ - README.md
71
+ - Rakefile
72
+ - _config.yml
73
+ - bin/sastrawi
74
+ - data/base-word.txt
75
+ - lib/sastrawi.rb
76
+ - lib/sastrawi/dictionary/array_dictionary.rb
77
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb
78
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb
79
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb
80
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb
81
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb
82
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb
83
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb
84
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb
85
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb
86
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb
87
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb
88
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb
89
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb
90
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb
91
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb
92
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb
93
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb
94
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb
95
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb
96
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb
97
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb
98
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb
99
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb
100
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb
101
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb
102
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb
103
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb
104
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb
105
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb
106
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb
107
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb
108
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb
109
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb
110
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb
111
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb
112
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb
113
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb
114
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb
115
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb
116
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb
117
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb
118
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb
119
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb
120
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb
121
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb
122
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb
123
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb
124
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb
125
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb
126
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb
127
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb
128
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb
129
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb
130
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb
131
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb
132
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb
133
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb
134
+ - lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb
135
+ - lib/sastrawi/morphology/invalid_affix_pair_specification.rb
136
+ - lib/sastrawi/stemmer/cache/array_cache.rb
137
+ - lib/sastrawi/stemmer/cached_stemmer.rb
138
+ - lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb
139
+ - lib/sastrawi/stemmer/context/context.rb
140
+ - lib/sastrawi/stemmer/context/removal.rb
141
+ - lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb
142
+ - lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb
143
+ - lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb
144
+ - lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb
145
+ - lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb
146
+ - lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb
147
+ - lib/sastrawi/stemmer/context/visitor/visitor_provider.rb
148
+ - lib/sastrawi/stemmer/filter/text_normalizer.rb
149
+ - lib/sastrawi/stemmer/stemmer.rb
150
+ - lib/sastrawi/stemmer/stemmer_factory.rb
151
+ - lib/sastrawi/stop_word_remover/stop_word_remover.rb
152
+ - lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb
153
+ - lib/sastrawi/version.rb
154
+ - sastrawi.gemspec
155
+ homepage: https://github.com/johannesdwicahyo/sastrawi-ruby
156
+ licenses:
157
+ - MIT
158
+ metadata:
159
+ source_code_uri: https://github.com/johannesdwicahyo/sastrawi-ruby
160
+ changelog_uri: https://github.com/johannesdwicahyo/sastrawi-ruby/blob/master/README.md
161
+ upstream_uri: https://github.com/meisyal/sastrawi-ruby
162
+ rdoc_options: []
163
+ require_paths:
164
+ - lib
165
+ required_ruby_version: !ruby/object:Gem::Requirement
166
+ requirements:
167
+ - - ">="
168
+ - !ruby/object:Gem::Version
169
+ version: 3.0.0
170
+ required_rubygems_version: !ruby/object:Gem::Requirement
171
+ requirements:
172
+ - - ">="
173
+ - !ruby/object:Gem::Version
174
+ version: '0'
175
+ requirements: []
176
+ rubygems_version: 3.6.9
177
+ specification_version: 4
178
+ summary: Indonesian language stemmer for Ruby
179
+ test_files: []