ar-stemmer 0.1.1 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 029d013e8fd23e2964db1941d9f00a0c61eb12b2
4
- data.tar.gz: 81cda1f2db208084d8a527f6155f0c5e799fce02
3
+ metadata.gz: 2dc21e273c406dd6d38f7517a7a560e38635a5bf
4
+ data.tar.gz: 26fc0d3b1055d9c72d50a14dafec6b73f0df0c2f
5
5
  SHA512:
6
- metadata.gz: 7bb90e9a007c4dc50bb367e871cfbd5e8871c47c8df1594f144a4d9d5ed07a58ab710d003e987b94d20eb654e4bdd9c9b84c924a0c1a32e43dabcdba3599caf9
7
- data.tar.gz: 8f8b5171731c0f0fd0e59cd9be517544cd98c6a95906c40917fc5ed6dc70425f6661cb7d7d0777a0b4375498c5bd5642a6a1e2b9d6b28adcfbdc11d2ba92c834
6
+ metadata.gz: b5a86a69fd409515cfb0e6ef19efee86807506edad369966656062715542004e4cab9526d95211f570d15538de7eab617ea70f451522916baaa3209adac18f06
7
+ data.tar.gz: cda405430462c6591d48bb026237a5d1872f100633c53b024933213b20ffd90c02d40fd7f2ffa9c64e8f67f21c79ce8044060c3441fe194a2894869be3904f42
data/README.md CHANGED
@@ -30,6 +30,12 @@ ArStemmer.stem("الدونات")
30
30
  "دونات"
31
31
  ```
32
32
 
33
+ When you want to turn off the specific rules for your own purpose, you can pass the name to `disable` option.
34
+
35
+ ```
36
+ ArStemmer.stem(word, disable: [:yeh_noon, :waw_noon])
37
+ ```
38
+
33
39
  ## License
34
40
 
35
41
  The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "ar-stemmer"
7
- spec.version = "0.1.1"
7
+ spec.version = "0.2.0"
8
8
  spec.authors = ["Tomoya Hirano"]
9
9
  spec.email = ["hiranotomoya@gmail.com"]
10
10
 
@@ -15,37 +15,38 @@ class ArStemmer
15
15
  WAW = "\u0648"
16
16
  YEH = "\u064A"
17
17
 
18
- PREFIXES = [
19
- ALEF + LAM,
20
- WAW + ALEF + LAM,
21
- BEH + ALEF + LAM,
22
- KAF + ALEF + LAM,
23
- FEH + ALEF + LAM,
24
- LAM + LAM,
25
- WAW
26
- ]
18
+ PREFIXES = {
19
+ alef_lam: ALEF + LAM,
20
+ waw_alef_lam: WAW + ALEF + LAM,
21
+ beh_alef_lam: BEH + ALEF + LAM,
22
+ kaf_alef_lam: KAF + ALEF + LAM,
23
+ feh_alef_lam: FEH + ALEF + LAM,
24
+ lam_lam: LAM + LAM,
25
+ waw: WAW
26
+ }
27
27
 
28
- SUFFIXES = [
29
- HEH + ALEF,
30
- ALEF + NOON,
31
- ALEF + TEH,
32
- WAW + NOON,
33
- YEH + NOON,
34
- YEH + HEH,
35
- YEH + TEH_MARBUTA,
36
- HEH,
37
- TEH_MARBUTA,
38
- YEH
39
- ]
28
+ SUFFIXES = {
29
+ heh_alef: HEH + ALEF,
30
+ alef_noon: ALEF + NOON,
31
+ alef_teh: ALEF + TEH,
32
+ waw_noon: WAW + NOON,
33
+ yeh_noon: YEH + NOON,
34
+ yeh_heh: YEH + HEH,
35
+ yeh_teh_marbuta: YEH + TEH_MARBUTA,
36
+ heh: HEH,
37
+ teh_marbuta: TEH_MARBUTA,
38
+ yeh: YEH
39
+ }
40
40
 
41
- def self.stem(word)
42
- new(word).stem
41
+ def self.stem(word, options = {})
42
+ new(word, options).stem
43
43
  end
44
44
 
45
- attr_reader :word
45
+ attr_reader :word, :disabled
46
46
 
47
- def initialize(word)
47
+ def initialize(word, options = {})
48
48
  @word = word.dup
49
+ @disabled = options[:disable] || []
49
50
  end
50
51
 
51
52
  def stem
@@ -54,33 +55,39 @@ class ArStemmer
54
55
  word
55
56
  end
56
57
 
57
- def stem_prefix
58
- PREFIXES.each do |prefix|
59
- @word = word[prefix.length .. -1] if starts_with_check_length(word, prefix)
58
+ private
59
+
60
+ def rules(rule_set)
61
+ rule_set.reject {|k, v| disabled.include?(k) }.values
60
62
  end
61
- end
62
63
 
63
- def stem_suffix
64
- SUFFIXES.each do |suffix|
65
- @word = word[0 .. -(suffix.length + 1)] if ends_with_check_length(word, suffix)
64
+ def stem_prefix
65
+ rules(PREFIXES).each do |prefix|
66
+ @word = word[prefix.length .. -1] if starts_with_check_length(word, prefix)
67
+ end
66
68
  end
67
- end
68
69
 
69
- def starts_with_check_length(word, prefix)
70
- if prefix.length == 1 && word.length < 4 # wa- prefix requires at least 3 characters
71
- false
72
- elsif word.length < prefix.length + 2
73
- false
74
- else
75
- word.start_with?(prefix)
70
+ def stem_suffix
71
+ rules(SUFFIXES).each do |suffix|
72
+ @word = word[0 .. -(suffix.length + 1)] if ends_with_check_length(word, suffix)
73
+ end
76
74
  end
77
- end
78
75
 
79
- def ends_with_check_length(word, suffix)
80
- if word.length < suffix.length + 2
81
- false
82
- else
83
- word.end_with?(suffix)
76
+ def starts_with_check_length(word, prefix)
77
+ if prefix.length == 1 && word.length < 4 # wa- prefix requires at least 3 characters
78
+ false
79
+ elsif word.length < prefix.length + 2
80
+ false
81
+ else
82
+ word.start_with?(prefix)
83
+ end
84
+ end
85
+
86
+ def ends_with_check_length(word, suffix)
87
+ if word.length < suffix.length + 2
88
+ false
89
+ else
90
+ word.end_with?(suffix)
91
+ end
84
92
  end
85
- end
86
93
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ar-stemmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomoya Hirano
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-08 00:00:00.000000000 Z
11
+ date: 2016-02-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler