sastrawi-ruby 0.2.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/MILESTONES.md +12 -0
- data/data/base-word.txt +17 -1
- data/data/stop-words.txt +842 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +2 -0
- data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +2 -0
- data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +2 -0
- data/lib/sastrawi/stemmer/cached_stemmer.rb +2 -0
- data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +2 -0
- data/lib/sastrawi/stemmer/context/context.rb +2 -0
- data/lib/sastrawi/stemmer/context/removal.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +2 -0
- data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +2 -0
- data/lib/sastrawi/stemmer/stemmer.rb +8 -0
- data/lib/sastrawi/stemmer/stemmer_factory.rb +2 -0
- data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +2 -0
- data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +19 -107
- data/lib/sastrawi/version.rb +1 -1
- data/lib/sastrawi.rb +2 -0
- metadata +3 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1a4a6852ccea2d8774edd6d7f9d20005cb16a7f885fcbefeecc56c3be0165ba0
|
|
4
|
+
data.tar.gz: 345763610036ee7fddd02eaf7d6cd8db383561635ffa9e439ed735576618ef7c
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: a2f339b1f63034743b959ab50ef543862c8baeb2ca3b1592b1eb17453a0dd946f75a3a442732a86657d8a2348622562673fd1350600420c129f99c8e534c6378
|
|
7
|
+
data.tar.gz: 632c7532bf85ece328ffc435779c7719172184c2d05a16f1bb6d6a9b5c940b04a744b6dc23462ff1f62113c5f8b1fbda5604ec7caf11929d41663a1a1ea1b2b9
|
data/.gitignore
CHANGED
data/MILESTONES.md
ADDED
data/data/base-word.txt
CHANGED
|
@@ -3653,6 +3653,7 @@ bramacorah
|
|
|
3653
3653
|
brambang
|
|
3654
3654
|
brana
|
|
3655
3655
|
brander
|
|
3656
|
+
branding
|
|
3656
3657
|
brankar
|
|
3657
3658
|
brankas
|
|
3658
3659
|
branwir
|
|
@@ -5043,6 +5044,7 @@ darat
|
|
|
5043
5044
|
darau
|
|
5044
5045
|
dargah
|
|
5045
5046
|
dari
|
|
5047
|
+
daring
|
|
5046
5048
|
daripada
|
|
5047
5049
|
darji
|
|
5048
5050
|
darma
|
|
@@ -5822,6 +5824,7 @@ diri
|
|
|
5822
5824
|
dirigen
|
|
5823
5825
|
diris
|
|
5824
5826
|
dirus
|
|
5827
|
+
disabilitas
|
|
5825
5828
|
disagio
|
|
5826
5829
|
disain
|
|
5827
5830
|
disainer
|
|
@@ -10189,6 +10192,7 @@ infleksibel
|
|
|
10189
10192
|
infleksif
|
|
10190
10193
|
infloresen
|
|
10191
10194
|
infloresens
|
|
10195
|
+
influencer
|
|
10192
10196
|
influensa
|
|
10193
10197
|
influenza
|
|
10194
10198
|
info
|
|
@@ -10238,6 +10242,7 @@ inkaso
|
|
|
10238
10242
|
inklaring
|
|
10239
10243
|
inklinasi
|
|
10240
10244
|
inklinometer
|
|
10245
|
+
inklusi
|
|
10241
10246
|
inklusif
|
|
10242
10247
|
inkognito
|
|
10243
10248
|
inkompabilitas
|
|
@@ -10381,6 +10386,7 @@ internalisasi
|
|
|
10381
10386
|
internasional
|
|
10382
10387
|
internasionalisasi
|
|
10383
10388
|
internat
|
|
10389
|
+
internet
|
|
10384
10390
|
internir
|
|
10385
10391
|
internis
|
|
10386
10392
|
internuntius
|
|
@@ -16157,6 +16163,7 @@ litah
|
|
|
16157
16163
|
litak
|
|
16158
16164
|
litani
|
|
16159
16165
|
liter
|
|
16166
|
+
literasi
|
|
16160
16167
|
literator
|
|
16161
16168
|
literer
|
|
16162
16169
|
litium
|
|
@@ -16462,6 +16469,7 @@ lupus
|
|
|
16462
16469
|
luput
|
|
16463
16470
|
lurah
|
|
16464
16471
|
lurik
|
|
16472
|
+
luring
|
|
16465
16473
|
luru
|
|
16466
16474
|
lurub
|
|
16467
16475
|
luruh
|
|
@@ -17043,6 +17051,7 @@ markado
|
|
|
17043
17051
|
markah
|
|
17044
17052
|
markas
|
|
17045
17053
|
markasit
|
|
17054
|
+
marketing
|
|
17046
17055
|
marketri
|
|
17047
17056
|
markis
|
|
17048
17057
|
markisa
|
|
@@ -18160,7 +18169,6 @@ muamalat
|
|
|
18160
18169
|
muanas
|
|
18161
18170
|
muara
|
|
18162
18171
|
muarikh
|
|
18163
|
-
muas
|
|
18164
18172
|
muasasah
|
|
18165
18173
|
muasir
|
|
18166
18174
|
muat
|
|
@@ -19330,6 +19338,7 @@ ongol-ongol
|
|
|
19330
19338
|
oniks
|
|
19331
19339
|
onkogen
|
|
19332
19340
|
onkologi
|
|
19341
|
+
online
|
|
19333
19342
|
onomasiologi
|
|
19334
19343
|
onomastika
|
|
19335
19344
|
onomatologi
|
|
@@ -21416,6 +21425,7 @@ poces
|
|
|
21416
21425
|
poci
|
|
21417
21426
|
pocok
|
|
21418
21427
|
pocong
|
|
21428
|
+
podcast
|
|
21419
21429
|
podemporem
|
|
21420
21430
|
podikal
|
|
21421
21431
|
podium
|
|
@@ -25873,6 +25883,7 @@ slintat-slintut
|
|
|
25873
25883
|
slip
|
|
25874
25884
|
slof
|
|
25875
25885
|
slogan
|
|
25886
|
+
smartphone
|
|
25876
25887
|
smes
|
|
25877
25888
|
smokel
|
|
25878
25889
|
snob
|
|
@@ -26182,6 +26193,7 @@ stapler
|
|
|
26182
26193
|
staples
|
|
26183
26194
|
start
|
|
26184
26195
|
starter
|
|
26196
|
+
startup
|
|
26185
26197
|
stasi
|
|
26186
26198
|
stasioner
|
|
26187
26199
|
stasis
|
|
@@ -26288,6 +26300,7 @@ stratopouse
|
|
|
26288
26300
|
stratosfer
|
|
26289
26301
|
stratum
|
|
26290
26302
|
stratus
|
|
26303
|
+
streaming
|
|
26291
26304
|
streng
|
|
26292
26305
|
streptokokus
|
|
26293
26306
|
streptomisin
|
|
@@ -26311,6 +26324,7 @@ studen
|
|
|
26311
26324
|
studi
|
|
26312
26325
|
studio
|
|
26313
26326
|
stuko
|
|
26327
|
+
stunting
|
|
26314
26328
|
stupa
|
|
26315
26329
|
sua
|
|
26316
26330
|
suah
|
|
@@ -29576,6 +29590,7 @@ wayang
|
|
|
29576
29590
|
wayuh
|
|
29577
29591
|
wazari
|
|
29578
29592
|
wazir
|
|
29593
|
+
webinar
|
|
29579
29594
|
weda
|
|
29580
29595
|
wedam
|
|
29581
29596
|
wedana
|
|
@@ -29668,6 +29683,7 @@ wirang
|
|
|
29668
29683
|
wiraniaga
|
|
29669
29684
|
wirasuara
|
|
29670
29685
|
wiraswasta
|
|
29686
|
+
wirausaha
|
|
29671
29687
|
wirid
|
|
29672
29688
|
wiru
|
|
29673
29689
|
wisa
|