sastrawi-ruby 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 293b0a6274835ba56e5df1e6c0a96b725e026ef703af178b7f791c3763b723c3
4
- data.tar.gz: 2b74b53603072b2e4171abfc8117308dae795a78b2e0071fd73a7961e732808b
3
+ metadata.gz: b33ab69d9a6a019e376bb620152f76de882282d279fe141b93a0086b03165ac5
4
+ data.tar.gz: 6fbe94a6ee7443ce97c5a83042a8bd3bccf473a1b9bf578da9b6a3ad04250eca
5
5
  SHA512:
6
- metadata.gz: dc74bcdbcb213b19c26e0dd6480077e69ae8dec455911b5004af662547fe0e7bd5af72943615025025d244b9f7532e1f6b062bf213b99ae8f0e96b7f8c8db73b
7
- data.tar.gz: eb5807126530f67a26935fd85130206ee08da1a4817d0e1d1f7151b011dc6533f6ef16b4a41673a5c2af5ec85a776d0be84e77ee024da15a730d7b090786dab9
6
+ metadata.gz: c58d1702bb5ec1d2fa5a964c8aa15420e32946cb31a0c3227a8d8f27af737d9720ae1fd77b1a9d117d08fe969cad270f2a5e5ee7d54e3903a2a39c7f54ef793d
7
+ data.tar.gz: 5db74112b60b6c74fde9e1ad94488a998aa762f606b71a395ff99521ecea4db1e341e1bf1d6bafe8cf13a4d21ce55f7740085ad8f6546daedfdb3d97f2647e76
@@ -1,14 +1,23 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+
1
5
  module Sastrawi
2
6
  module Dictionary
3
7
  class ArrayDictionary
4
- attr_reader :words
5
-
6
8
  def initialize(words = [])
7
- @words = []
9
+ @words = Set.new
8
10
 
9
11
  add_words(words)
10
12
  end
11
13
 
14
+ ##
15
+ # Return the words as an Array (for backward compatibility)
16
+
17
+ def words
18
+ @words.to_a
19
+ end
20
+
12
21
  ##
13
22
  # Check whether a word is contained in the dictionary
14
23
 
@@ -20,7 +29,7 @@ module Sastrawi
20
29
  # Count how many words in the dictionary
21
30
 
22
31
  def count
23
- @words.length
32
+ @words.size
24
33
  end
25
34
 
26
35
  ##
@@ -36,9 +45,13 @@ module Sastrawi
36
45
  # Add a word to the dictionary
37
46
 
38
47
  def add(word)
39
- return if word.nil? || word.strip == ''
48
+ unless word.is_a?(String)
49
+ raise ArgumentError, "dictionary entries must be strings, got #{word.class}"
50
+ end
51
+
52
+ return if word.strip == ''
40
53
 
41
- @words.push(word)
54
+ @words.add(word)
42
55
  end
43
56
 
44
57
  ##
@@ -1,23 +1,58 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Sastrawi
2
4
  module Stemmer
3
5
  module Cache
4
6
  class ArrayCache
5
- attr_reader :data
7
+ DEFAULT_MAX_SIZE = 10_000
8
+
9
+ attr_reader :max_size
6
10
 
7
- def initialize
11
+ def initialize(max_size: DEFAULT_MAX_SIZE)
8
12
  @data = {}
13
+ @mutex = Mutex.new
14
+ @max_size = max_size
15
+ end
16
+
17
+ def data
18
+ @mutex.synchronize { @data.dup }
9
19
  end
10
20
 
11
21
  def set(key, value)
12
- @data[key.to_sym] = value
22
+ @mutex.synchronize do
23
+ evict_if_full
24
+ @data[key.to_sym] = value
25
+ end
13
26
  end
14
27
 
15
28
  def get(key)
16
- @data[key.to_sym] if @data.key?(key.to_sym)
29
+ @mutex.synchronize do
30
+ @data[key.to_sym] if @data.key?(key.to_sym)
31
+ end
17
32
  end
18
33
 
19
34
  def has?(key)
20
- @data.key?(key.to_sym)
35
+ @mutex.synchronize do
36
+ @data.key?(key.to_sym)
37
+ end
38
+ end
39
+
40
+ def size
41
+ @mutex.synchronize { @data.size }
42
+ end
43
+
44
+ def clear!
45
+ @mutex.synchronize { @data.clear }
46
+ end
47
+
48
+ private
49
+
50
+ def evict_if_full
51
+ return if @data.size < @max_size
52
+
53
+ # Remove the oldest entry (first inserted key)
54
+ oldest_key = @data.keys.first
55
+ @data.delete(oldest_key)
21
56
  end
22
57
  end
23
58
  end
@@ -10,9 +10,15 @@ module Sastrawi
10
10
  @delegated_stemmer = delegated_stemmer
11
11
  end
12
12
 
13
+ def clear_cache!
14
+ @cache.clear!
15
+ end
16
+
13
17
  def stem(text)
14
18
  normalized_text = Sastrawi::Stemmer::Filter::TextNormalizer.normalize_text(text)
15
19
 
20
+ return "" if normalized_text.empty?
21
+
16
22
  words = normalized_text.split(' ')
17
23
  stems = []
18
24
 
@@ -205,11 +205,7 @@ module Sastrawi
205
205
  end
206
206
  end
207
207
 
208
- @removals.each do |removal|
209
- if removal.affix_type == 'DP'
210
- @removals.delete(removal)
211
- end
212
- end
208
+ @removals = @removals.reject { |removal| removal.affix_type == 'DP' }
213
209
  end
214
210
  end
215
211
  end
@@ -1,8 +1,18 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Sastrawi
2
4
  module Stemmer
3
5
  module Filter
4
6
  class TextNormalizer
5
7
  def self.normalize_text(text)
8
+ return "" if text.nil?
9
+
10
+ unless text.is_a?(String)
11
+ raise ArgumentError, "expected a String, got #{text.class}"
12
+ end
13
+
14
+ return "" if text.empty?
15
+
6
16
  lowercase_text = text.downcase
7
17
  replaced_text = lowercase_text.gsub(/[^a-z0-9 -]/im, ' ')
8
18
  replaced_text = replaced_text.gsub(/( +)/im, ' ')
@@ -24,6 +24,8 @@ module Sastrawi
24
24
  def stem(text)
25
25
  normalized_text = Sastrawi::Stemmer::Filter::TextNormalizer.normalize_text(text)
26
26
 
27
+ return "" if normalized_text.empty?
28
+
27
29
  words = normalized_text.split(' ')
28
30
  stems = []
29
31
 
@@ -11,6 +11,8 @@ module Sastrawi
11
11
  # Remove stop words
12
12
 
13
13
  def remove(text)
14
+ return "" if text.nil? || text.empty?
15
+
14
16
  words = text.split(' ')
15
17
  stop_words = []
16
18
 
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Sastrawi
4
- VERSION = "0.2.0"
4
+ VERSION = "0.2.1"
5
5
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sastrawi-ruby
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Johannes Dwi Cahyo