sastrawi-ruby 0.2.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (82) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/MILESTONES.md +12 -0
  4. data/data/base-word.txt +17 -1
  5. data/data/stop-words.txt +842 -0
  6. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule10.rb +2 -0
  7. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule11.rb +2 -0
  8. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule12.rb +2 -0
  9. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13a.rb +2 -0
  10. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule13b.rb +2 -0
  11. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule14.rb +2 -0
  12. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15a.rb +2 -0
  13. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule15b.rb +2 -0
  14. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule16.rb +2 -0
  15. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17a.rb +2 -0
  16. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17b.rb +2 -0
  17. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17c.rb +2 -0
  18. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule17d.rb +2 -0
  19. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18a.rb +2 -0
  20. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule18b.rb +2 -0
  21. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule19.rb +2 -0
  22. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1a.rb +2 -0
  23. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule1b.rb +2 -0
  24. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule2.rb +2 -0
  25. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule20.rb +2 -0
  26. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21a.rb +2 -0
  27. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule21b.rb +2 -0
  28. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule23.rb +2 -0
  29. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule24.rb +2 -0
  30. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule25.rb +2 -0
  31. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26a.rb +2 -0
  32. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule26b.rb +2 -0
  33. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule27.rb +2 -0
  34. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28a.rb +2 -0
  35. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule28b.rb +2 -0
  36. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule29.rb +2 -0
  37. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule3.rb +2 -0
  38. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30a.rb +2 -0
  39. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30b.rb +2 -0
  40. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule30c.rb +2 -0
  41. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31a.rb +2 -0
  42. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule31b.rb +2 -0
  43. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule32.rb +2 -0
  44. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule34.rb +2 -0
  45. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule35.rb +2 -0
  46. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule36.rb +2 -0
  47. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37a.rb +2 -0
  48. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule37b.rb +2 -0
  49. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38a.rb +2 -0
  50. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule38b.rb +2 -0
  51. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39a.rb +2 -0
  52. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule39b.rb +2 -0
  53. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule4.rb +2 -0
  54. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40a.rb +2 -0
  55. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule40b.rb +2 -0
  56. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule41.rb +2 -0
  57. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule42.rb +2 -0
  58. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule5.rb +2 -0
  59. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6a.rb +2 -0
  60. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule6b.rb +2 -0
  61. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule7.rb +2 -0
  62. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule8.rb +2 -0
  63. data/lib/sastrawi/morphology/disambiguator/disambiguator_prefix_rule9.rb +2 -0
  64. data/lib/sastrawi/morphology/invalid_affix_pair_specification.rb +2 -0
  65. data/lib/sastrawi/stemmer/cached_stemmer.rb +2 -0
  66. data/lib/sastrawi/stemmer/confix_stripping/precedence_adjustment_specification.rb +2 -0
  67. data/lib/sastrawi/stemmer/context/context.rb +2 -0
  68. data/lib/sastrawi/stemmer/context/removal.rb +2 -0
  69. data/lib/sastrawi/stemmer/context/visitor/dont_stem_short_word.rb +2 -0
  70. data/lib/sastrawi/stemmer/context/visitor/prefix_disambiguator.rb +2 -0
  71. data/lib/sastrawi/stemmer/context/visitor/remove_derivational_suffix.rb +2 -0
  72. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_particle.rb +2 -0
  73. data/lib/sastrawi/stemmer/context/visitor/remove_inflectional_possessive_pronoun.rb +2 -0
  74. data/lib/sastrawi/stemmer/context/visitor/remove_plain_prefix.rb +2 -0
  75. data/lib/sastrawi/stemmer/context/visitor/visitor_provider.rb +2 -0
  76. data/lib/sastrawi/stemmer/stemmer.rb +8 -0
  77. data/lib/sastrawi/stemmer/stemmer_factory.rb +2 -0
  78. data/lib/sastrawi/stop_word_remover/stop_word_remover.rb +2 -0
  79. data/lib/sastrawi/stop_word_remover/stop_word_remover_factory.rb +19 -107
  80. data/lib/sastrawi/version.rb +1 -1
  81. data/lib/sastrawi.rb +2 -0
  82. metadata +3 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b33ab69d9a6a019e376bb620152f76de882282d279fe141b93a0086b03165ac5
4
- data.tar.gz: 6fbe94a6ee7443ce97c5a83042a8bd3bccf473a1b9bf578da9b6a3ad04250eca
3
+ metadata.gz: 1a4a6852ccea2d8774edd6d7f9d20005cb16a7f885fcbefeecc56c3be0165ba0
4
+ data.tar.gz: 345763610036ee7fddd02eaf7d6cd8db383561635ffa9e439ed735576618ef7c
5
5
  SHA512:
6
- metadata.gz: c58d1702bb5ec1d2fa5a964c8aa15420e32946cb31a0c3227a8d8f27af737d9720ae1fd77b1a9d117d08fe969cad270f2a5e5ee7d54e3903a2a39c7f54ef793d
7
- data.tar.gz: 5db74112b60b6c74fde9e1ad94488a998aa762f606b71a395ff99521ecea4db1e341e1bf1d6bafe8cf13a4d21ce55f7740085ad8f6546daedfdb3d97f2647e76
6
+ metadata.gz: a2f339b1f63034743b959ab50ef543862c8baeb2ca3b1592b1eb17453a0dd946f75a3a442732a86657d8a2348622562673fd1350600420c129f99c8e534c6378
7
+ data.tar.gz: 632c7532bf85ece328ffc435779c7719172184c2d05a16f1bb6d6a9b5c940b04a744b6dc23462ff1f62113c5f8b1fbda5604ec7caf11929d41663a1a1ea1b2b9
data/.gitignore CHANGED
@@ -49,3 +49,4 @@ Gemfile.lock
49
49
  # unless supporting rvm < 1.11.0 or doing something fancy, ignore this:
50
50
  .rvmrc
51
51
  CLAUDE.md
52
+ .claude/
data/MILESTONES.md ADDED
@@ -0,0 +1,12 @@
1
+ # Milestones
2
+
3
+ ## v0.2.1 (2026-03-10)
4
+
5
+ ### Changes
6
+ - Set-based dictionary
7
+ - array mutation fix
8
+ - thread-safe cache with eviction
9
+ - nil input guards
10
+
11
+ ## v0.2.0 (Initial release)
12
+ - Initial release
data/data/base-word.txt CHANGED
@@ -3653,6 +3653,7 @@ bramacorah
3653
3653
  brambang
3654
3654
  brana
3655
3655
  brander
3656
+ branding
3656
3657
  brankar
3657
3658
  brankas
3658
3659
  branwir
@@ -5043,6 +5044,7 @@ darat
5043
5044
  darau
5044
5045
  dargah
5045
5046
  dari
5047
+ daring
5046
5048
  daripada
5047
5049
  darji
5048
5050
  darma
@@ -5822,6 +5824,7 @@ diri
5822
5824
  dirigen
5823
5825
  diris
5824
5826
  dirus
5827
+ disabilitas
5825
5828
  disagio
5826
5829
  disain
5827
5830
  disainer
@@ -10189,6 +10192,7 @@ infleksibel
10189
10192
  infleksif
10190
10193
  infloresen
10191
10194
  infloresens
10195
+ influencer
10192
10196
  influensa
10193
10197
  influenza
10194
10198
  info
@@ -10238,6 +10242,7 @@ inkaso
10238
10242
  inklaring
10239
10243
  inklinasi
10240
10244
  inklinometer
10245
+ inklusi
10241
10246
  inklusif
10242
10247
  inkognito
10243
10248
  inkompabilitas
@@ -10381,6 +10386,7 @@ internalisasi
10381
10386
  internasional
10382
10387
  internasionalisasi
10383
10388
  internat
10389
+ internet
10384
10390
  internir
10385
10391
  internis
10386
10392
  internuntius
@@ -16157,6 +16163,7 @@ litah
16157
16163
  litak
16158
16164
  litani
16159
16165
  liter
16166
+ literasi
16160
16167
  literator
16161
16168
  literer
16162
16169
  litium
@@ -16462,6 +16469,7 @@ lupus
16462
16469
  luput
16463
16470
  lurah
16464
16471
  lurik
16472
+ luring
16465
16473
  luru
16466
16474
  lurub
16467
16475
  luruh
@@ -17043,6 +17051,7 @@ markado
17043
17051
  markah
17044
17052
  markas
17045
17053
  markasit
17054
+ marketing
17046
17055
  marketri
17047
17056
  markis
17048
17057
  markisa
@@ -18160,7 +18169,6 @@ muamalat
18160
18169
  muanas
18161
18170
  muara
18162
18171
  muarikh
18163
- muas
18164
18172
  muasasah
18165
18173
  muasir
18166
18174
  muat
@@ -19330,6 +19338,7 @@ ongol-ongol
19330
19338
  oniks
19331
19339
  onkogen
19332
19340
  onkologi
19341
+ online
19333
19342
  onomasiologi
19334
19343
  onomastika
19335
19344
  onomatologi
@@ -21416,6 +21425,7 @@ poces
21416
21425
  poci
21417
21426
  pocok
21418
21427
  pocong
21428
+ podcast
21419
21429
  podemporem
21420
21430
  podikal
21421
21431
  podium
@@ -25873,6 +25883,7 @@ slintat-slintut
25873
25883
  slip
25874
25884
  slof
25875
25885
  slogan
25886
+ smartphone
25876
25887
  smes
25877
25888
  smokel
25878
25889
  snob
@@ -26182,6 +26193,7 @@ stapler
26182
26193
  staples
26183
26194
  start
26184
26195
  starter
26196
+ startup
26185
26197
  stasi
26186
26198
  stasioner
26187
26199
  stasis
@@ -26288,6 +26300,7 @@ stratopouse
26288
26300
  stratosfer
26289
26301
  stratum
26290
26302
  stratus
26303
+ streaming
26291
26304
  streng
26292
26305
  streptokokus
26293
26306
  streptomisin
@@ -26311,6 +26324,7 @@ studen
26311
26324
  studi
26312
26325
  studio
26313
26326
  stuko
26327
+ stunting
26314
26328
  stupa
26315
26329
  sua
26316
26330
  suah
@@ -29576,6 +29590,7 @@ wayang
29576
29590
  wayuh
29577
29591
  wazari
29578
29592
  wazir
29593
+ webinar
29579
29594
  weda
29580
29595
  wedam
29581
29596
  wedana
@@ -29668,6 +29683,7 @@ wirang
29668
29683
  wiraniaga
29669
29684
  wirasuara
29670
29685
  wiraswasta
29686
+ wirausaha
29671
29687
  wirid
29672
29688
  wiru
29673
29689
  wisa