sastrawi 0.1.0.pre → 0.1.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/.gitignore +1 -1
- data/.travis.yml +7 -5
- data/CONTRIBUTING.md +22 -0
- data/Gemfile +0 -0
- data/LICENSE.txt +1 -1
- data/README.md +53 -19
- data/Rakefile +2 -2
- data/_config.yml +1 -0
- data/bin/sastrawi +24 -0
- data/data/{kata-dasar.txt → base-word.txt} +0 -0
- data/lib/sastrawi.rb +1 -9
- data/lib/sastrawi/dictionary/array_dictionary.rb +36 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +2 -2
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +0 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +1 -1
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +2 -2
- data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +4 -0
- data/lib/sastrawi/stemmer/cache/array_cache.rb +2 -2
- data/lib/sastrawi/stemmer/cached_stemmer.rb +1 -1
- data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +5 -0
- data/lib/sastrawi/stemmer/context/context.rb +28 -7
- data/lib/sastrawi/stemmer/context/removal.rb +1 -1
- data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +0 -0
- data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +2 -2
- data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +10 -1
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +9 -1
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +9 -1
- data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +9 -1
- data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +1 -1
- data/lib/sastrawi/stemmer/filter/text_normalizer.rb +0 -0
- data/lib/sastrawi/stemmer/stemmer.rb +31 -15
- data/lib/sastrawi/stemmer/stemmer_factory.rb +5 -1
- data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +5 -2
- data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +102 -130
- data/lib/sastrawi/version.rb +1 -1
- data/sastrawi.gemspec +6 -5
- metadata +22 -19
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 76f20957cffd660eeb6afefa58352d16e55e8f259f22fe31e65a800cea5372e2
|
4
|
+
data.tar.gz: 47a45adb1dd5aec42614ffaf5ab716ca591a3747222a05c61d8f26df8676a18b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: '0822820839bf44e8605d11b430551e55b93094704b7b3b82098510a2809df3984864388d73dfc4bed07d0989591fb4772f3e25c47192a257d02de23361b71a01'
|
7
|
+
data.tar.gz: a5980e90c5de75fb3d1d7c2137148ee4f93710e0d02a4341910b33156638df93b4cd0ee55f722abee072a52420550f918c813bda8bccdbda541994da6e5d358b
|
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
@@ -1,8 +1,10 @@
|
|
1
1
|
sudo: false
|
2
2
|
language: ruby
|
3
3
|
rvm:
|
4
|
-
-
|
5
|
-
- 2.
|
6
|
-
- 2.3
|
7
|
-
|
8
|
-
|
4
|
+
- 2.3.8
|
5
|
+
- 2.4.5
|
6
|
+
- 2.5.3
|
7
|
+
before_install: gem install bundler -v 2.2.14 --no-document
|
8
|
+
notifications:
|
9
|
+
email:
|
10
|
+
on_success: never
|
data/CONTRIBUTING.md
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
# Contributing to sastrawi-ruby
|
2
|
+
|
3
|
+
If you find a bug, please report it to [issue tracker][issue]. You can also
|
4
|
+
contribute by writing codes.
|
5
|
+
|
6
|
+
## How to contribute
|
7
|
+
|
8
|
+
There are some steps you must follow:
|
9
|
+
|
10
|
+
1. Fork this repository and clone it to your local environment
|
11
|
+
2. Create a named brached that contains your change
|
12
|
+
3. Install the development dependencies by running `bundle install`
|
13
|
+
4. Code
|
14
|
+
5. Add or adjust unit tests and make sure everything passes by running `bundle
|
15
|
+
exec rake`
|
16
|
+
6. Push your branch to GitHub
|
17
|
+
7. Send a pull request for your branch
|
18
|
+
|
19
|
+
Use `dev` branch as a target of your branch for pull request. Both issue and pull
|
20
|
+
request details must be written in English.
|
21
|
+
|
22
|
+
[issue]: https://github.com/meisyal/sastrawi-ruby/issues
|
data/Gemfile
CHANGED
File without changes
|
data/LICENSE.txt
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
The MIT License (MIT)
|
2
2
|
|
3
|
-
Copyright (c) 2016-
|
3
|
+
Copyright (c) 2016-2021 Andrias Meisyal
|
4
4
|
|
5
5
|
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
6
|
of this software and associated documentation files (the "Software"), to deal
|
data/README.md
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
# Sastrawi Bindings for Ruby [![Build Status](https://travis-ci.org/meisyal/sastrawi-ruby.svg?branch=master)](https://travis-ci.org/meisyal/sastrawi-ruby)
|
1
|
+
# Sastrawi Bindings for Ruby [![Build Status](https://travis-ci.org/meisyal/sastrawi-ruby.svg?branch=master)](https://travis-ci.org/meisyal/sastrawi-ruby) [![Gem Version](https://badge.fury.io/rb/sastrawi.svg)](https://badge.fury.io/rb/sastrawi)
|
2
2
|
|
3
3
|
sastrawi-ruby is Ruby bindings for [Sastrawi][sastrawi], a library which allows you
|
4
4
|
to stem words in Bahasa Indonesia. The original implementation of Sastrawi was
|
@@ -6,12 +6,18 @@ written in PHP and this library is written in Ruby language.
|
|
6
6
|
|
7
7
|
Taken from [Wikipedia][stemmingwiki], stemming is the process of reducing
|
8
8
|
inflected (or sometimes derived) words to their word stem, base or root form.
|
9
|
-
For instance, "menahan" has "tahan" as its base form.
|
9
|
+
For instance, "menahan" has "tahan" as its base form. If you want to know how
|
10
|
+
stemming works, please read this [page][howstemmingworks] (in Bahasa Indonesia)
|
11
|
+
for further details.
|
12
|
+
|
13
|
+
## Demo
|
14
|
+
|
15
|
+
The demo version of sastrawi-ruby can be accessed [here][demo].
|
10
16
|
|
11
17
|
## Documentation
|
12
18
|
|
13
|
-
Documentation for this library is
|
14
|
-
check [sastrawi-ruby GitHub Wiki][
|
19
|
+
Documentation for this library is available on [here][documentation]. You can
|
20
|
+
also check [sastrawi-ruby GitHub Wiki][wiki] that contains TODO list.
|
15
21
|
|
16
22
|
## Installation
|
17
23
|
|
@@ -22,38 +28,61 @@ Ruby bindings for Sastrawi, add this line to your application's Gemfile:
|
|
22
28
|
|
23
29
|
and then execute:
|
24
30
|
|
25
|
-
bundle install
|
31
|
+
$ bundle install
|
26
32
|
|
27
33
|
or you can install directly:
|
28
34
|
|
29
|
-
gem install sastrawi
|
35
|
+
$ gem install sastrawi
|
30
36
|
|
31
|
-
Note that, this library requires Ruby. Ruby
|
32
|
-
on your system. I would recommend to
|
37
|
+
Note that, this library requires Ruby. Ruby 2.3 series or above should be
|
38
|
+
installed on your system. I would recommend to use the stable versions.
|
33
39
|
|
34
40
|
## Usage
|
35
41
|
|
36
|
-
|
37
|
-
can't add or remove any base form. This feature will be implemented for next
|
38
|
-
release.
|
42
|
+
This library supports stemming words with provided base forms.
|
39
43
|
|
40
44
|
```ruby
|
41
45
|
require 'sastrawi'
|
42
46
|
|
47
|
+
# create stemmer
|
48
|
+
stemmer_factory = Sastrawi::Stemmer::StemmerFactory.new
|
49
|
+
stemmer = stemmer_factory.create_stemmer
|
50
|
+
|
43
51
|
# prepare a sentence or words to be stemmed and call the stem API
|
44
52
|
sentence = 'Perekonomian Indonesia sedang dalam pertumbuhan yang membanggakan.'
|
45
|
-
stemming_result =
|
53
|
+
stemming_result = stemmer.stem(sentence)
|
46
54
|
|
47
|
-
# the stemming result should be "ekonomi indonesia sedang dalam tumbuh yang
|
48
|
-
bangga"
|
55
|
+
# the stemming result should be "ekonomi indonesia sedang dalam tumbuh yang bangga"
|
49
56
|
puts stemming_result
|
50
57
|
```
|
51
58
|
|
59
|
+
Beside that, you can add or remove any base form.
|
60
|
+
|
61
|
+
```ruby
|
62
|
+
require 'sastrawi'
|
63
|
+
|
64
|
+
# create stemmer
|
65
|
+
stemmer_factory = Sastrawi::Stemmer::StemmerFactory.new
|
66
|
+
|
67
|
+
# create default dictionary and add a text file that contains words into it
|
68
|
+
dictionary = stemmer_factory.create_default_dictionary
|
69
|
+
dictionary.add_words_from_text_file('my-dictionary.txt')
|
70
|
+
|
71
|
+
# add or remove words
|
72
|
+
dictionary.add('internet')
|
73
|
+
dictionary.remove('desa')
|
74
|
+
|
75
|
+
# stem a word, "internetan", for example
|
76
|
+
stemmer = Sastrawi::Stemmer::Stemmer.new(dictionary)
|
77
|
+
|
78
|
+
# the stemming result should be "internet"
|
79
|
+
puts stemmer.stem('internetan')
|
80
|
+
```
|
81
|
+
|
52
82
|
## Contributing
|
53
83
|
|
54
|
-
Contributions are welcome.
|
55
|
-
|
56
|
-
Both issue and pull request details should be written in English.
|
84
|
+
Contributions are welcome. Please, read [CONTRIBUTING][contributing]
|
85
|
+
guidelines.
|
57
86
|
|
58
87
|
## License
|
59
88
|
|
@@ -62,9 +91,14 @@ This library is released under the terms of MIT License. See the
|
|
62
91
|
words from [Kateglo][kateglo] and it is licensed under a [Creative Commons
|
63
92
|
Attribution-NonCommercial-ShareAlike 3.0 Unported License][kateglolicense].
|
64
93
|
|
94
|
+
|
65
95
|
[sastrawi]: https://github.com/sastrawi/sastrawi
|
66
96
|
[stemmingwiki]: https://en.wikipedia.org/wiki/Stemming
|
67
|
-
[
|
97
|
+
[howstemmingworks]: https://github.com/sastrawi/sastrawi/wiki/Stemming-Bahasa-Indonesia
|
98
|
+
[demo]: https://sastrawi-ruby-demo.herokuapp.com
|
99
|
+
[documentation]: https://www.rubydoc.info/gems/sastrawi/
|
100
|
+
[contributing]: https://github.com/meisyal/sastrawi-ruby/blob/master/CONTRIBUTING.md
|
68
101
|
[license]: https://github.com/meisyal/sastrawi-ruby/blob/master/LICENSE.txt
|
69
|
-
[kateglo]:
|
102
|
+
[kateglo]: https://kateglo.com
|
70
103
|
[kateglolicense]: https://creativecommons.org/licenses/by-nc-sa/3.0/
|
104
|
+
[wiki]: https://github.com/meisyal/sastrawi-ruby/wiki
|
data/Rakefile
CHANGED
data/_config.yml
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
theme: jekyll-theme-cayman
|
data/bin/sastrawi
ADDED
@@ -0,0 +1,24 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'sastrawi'
|
4
|
+
|
5
|
+
# usage: $ sastrawi word/sentence
|
6
|
+
if ARGV.empty?
|
7
|
+
puts '--------------- sastrawi: ERROR ---------------'
|
8
|
+
puts 'Usage: Please specify a word or sentence to be stemmed.'
|
9
|
+
puts ' sastrawi word/sentence'
|
10
|
+
puts 'Example:'
|
11
|
+
puts ' sastrawi mengundang'
|
12
|
+
puts ' sastrawi Perekonomian Indonesia sedang dalam pertumbuhan yang membanggakan.'
|
13
|
+
else
|
14
|
+
stemmer_factory = Sastrawi::Stemmer::StemmerFactory.new
|
15
|
+
stemmer = stemmer_factory.create_stemmer
|
16
|
+
|
17
|
+
words = []
|
18
|
+
|
19
|
+
ARGV.each { |arg| words << arg }
|
20
|
+
|
21
|
+
sentence = words.join(' ')
|
22
|
+
|
23
|
+
puts stemmer.stem(sentence)
|
24
|
+
end
|
File without changes
|
data/lib/sastrawi.rb
CHANGED
@@ -1,12 +1,4 @@
|
|
1
1
|
require 'sastrawi/version'
|
2
2
|
|
3
3
|
require 'sastrawi/stemmer/stemmer_factory'
|
4
|
-
|
5
|
-
module Sastrawi
|
6
|
-
def self.stem(sentence)
|
7
|
-
stemmer_factory = Sastrawi::Stemmer::StemmerFactory.new
|
8
|
-
stemmer = stemmer_factory.create_stemmer
|
9
|
-
|
10
|
-
stemmer.stem(sentence)
|
11
|
-
end
|
12
|
-
end
|
4
|
+
require 'sastrawi/stop_word_remover/stop_word_remover_factory'
|
@@ -1,7 +1,7 @@
|
|
1
1
|
module Sastrawi
|
2
2
|
module Dictionary
|
3
3
|
class ArrayDictionary
|
4
|
-
|
4
|
+
attr_reader :words
|
5
5
|
|
6
6
|
def initialize(words = [])
|
7
7
|
@words = []
|
@@ -9,25 +9,59 @@ module Sastrawi
|
|
9
9
|
add_words(words)
|
10
10
|
end
|
11
11
|
|
12
|
+
##
|
13
|
+
# Check whether a word is contained in the dictionary
|
14
|
+
|
12
15
|
def contains?(word)
|
13
16
|
@words.include?(word)
|
14
17
|
end
|
15
18
|
|
19
|
+
##
|
20
|
+
# Count how many words in the dictionary
|
21
|
+
|
16
22
|
def count
|
17
23
|
@words.length
|
18
24
|
end
|
19
25
|
|
26
|
+
##
|
27
|
+
# Add multiple words to the dictionary
|
28
|
+
|
20
29
|
def add_words(new_words)
|
21
30
|
new_words.each do |word|
|
22
31
|
add(word)
|
23
32
|
end
|
24
33
|
end
|
25
34
|
|
35
|
+
##
|
36
|
+
# Add a word to the dictionary
|
37
|
+
|
26
38
|
def add(word)
|
27
|
-
return if word == ''
|
39
|
+
return if word.nil? || word.strip == ''
|
28
40
|
|
29
41
|
@words.push(word)
|
30
42
|
end
|
43
|
+
|
44
|
+
##
|
45
|
+
# Add words from a text file to the dictionary
|
46
|
+
|
47
|
+
def add_words_from_text_file(file_path)
|
48
|
+
words = []
|
49
|
+
|
50
|
+
File.open(file_path, 'r') do |file|
|
51
|
+
file.each do |line|
|
52
|
+
words.push(line.chomp)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
add_words(words)
|
57
|
+
end
|
58
|
+
|
59
|
+
##
|
60
|
+
# Remove a word from the dictionary
|
61
|
+
|
62
|
+
def remove(word)
|
63
|
+
@words.delete(word)
|
64
|
+
end
|
31
65
|
end
|
32
66
|
end
|
33
67
|
end
|
File without changes
|