ar-stemmer 0.1.1 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 029d013e8fd23e2964db1941d9f00a0c61eb12b2
4
- data.tar.gz: 81cda1f2db208084d8a527f6155f0c5e799fce02
3
+ metadata.gz: 2dc21e273c406dd6d38f7517a7a560e38635a5bf
4
+ data.tar.gz: 26fc0d3b1055d9c72d50a14dafec6b73f0df0c2f
5
5
  SHA512:
6
- metadata.gz: 7bb90e9a007c4dc50bb367e871cfbd5e8871c47c8df1594f144a4d9d5ed07a58ab710d003e987b94d20eb654e4bdd9c9b84c924a0c1a32e43dabcdba3599caf9
7
- data.tar.gz: 8f8b5171731c0f0fd0e59cd9be517544cd98c6a95906c40917fc5ed6dc70425f6661cb7d7d0777a0b4375498c5bd5642a6a1e2b9d6b28adcfbdc11d2ba92c834
6
+ metadata.gz: b5a86a69fd409515cfb0e6ef19efee86807506edad369966656062715542004e4cab9526d95211f570d15538de7eab617ea70f451522916baaa3209adac18f06
7
+ data.tar.gz: cda405430462c6591d48bb026237a5d1872f100633c53b024933213b20ffd90c02d40fd7f2ffa9c64e8f67f21c79ce8044060c3441fe194a2894869be3904f42
data/README.md CHANGED
@@ -30,6 +30,12 @@ ArStemmer.stem("الدونات")
30
30
  "دونات"
31
31
  ```
32
32
 
33
+ When you want to turn off the specific rules for your own purpose, you can pass the name to `disable` option.
34
+
35
+ ```
36
+ ArStemmer.stem(word, disable: [:yeh_noon, :waw_noon])
37
+ ```
38
+
33
39
  ## License
34
40
 
35
41
  The gem is available as open source under the terms of the [MIT License](http://opensource.org/licenses/MIT).
@@ -4,7 +4,7 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "ar-stemmer"
7
- spec.version = "0.1.1"
7
+ spec.version = "0.2.0"
8
8
  spec.authors = ["Tomoya Hirano"]
9
9
  spec.email = ["hiranotomoya@gmail.com"]
10
10
 
@@ -15,37 +15,38 @@ class ArStemmer
15
15
  WAW = "\u0648"
16
16
  YEH = "\u064A"
17
17
 
18
- PREFIXES = [
19
- ALEF + LAM,
20
- WAW + ALEF + LAM,
21
- BEH + ALEF + LAM,
22
- KAF + ALEF + LAM,
23
- FEH + ALEF + LAM,
24
- LAM + LAM,
25
- WAW
26
- ]
18
+ PREFIXES = {
19
+ alef_lam: ALEF + LAM,
20
+ waw_alef_lam: WAW + ALEF + LAM,
21
+ beh_alef_lam: BEH + ALEF + LAM,
22
+ kaf_alef_lam: KAF + ALEF + LAM,
23
+ feh_alef_lam: FEH + ALEF + LAM,
24
+ lam_lam: LAM + LAM,
25
+ waw: WAW
26
+ }
27
27
 
28
- SUFFIXES = [
29
- HEH + ALEF,
30
- ALEF + NOON,
31
- ALEF + TEH,
32
- WAW + NOON,
33
- YEH + NOON,
34
- YEH + HEH,
35
- YEH + TEH_MARBUTA,
36
- HEH,
37
- TEH_MARBUTA,
38
- YEH
39
- ]
28
+ SUFFIXES = {
29
+ heh_alef: HEH + ALEF,
30
+ alef_noon: ALEF + NOON,
31
+ alef_teh: ALEF + TEH,
32
+ waw_noon: WAW + NOON,
33
+ yeh_noon: YEH + NOON,
34
+ yeh_heh: YEH + HEH,
35
+ yeh_teh_marbuta: YEH + TEH_MARBUTA,
36
+ heh: HEH,
37
+ teh_marbuta: TEH_MARBUTA,
38
+ yeh: YEH
39
+ }
40
40
 
41
- def self.stem(word)
42
- new(word).stem
41
+ def self.stem(word, options = {})
42
+ new(word, options).stem
43
43
  end
44
44
 
45
- attr_reader :word
45
+ attr_reader :word, :disabled
46
46
 
47
- def initialize(word)
47
+ def initialize(word, options = {})
48
48
  @word = word.dup
49
+ @disabled = options[:disable] || []
49
50
  end
50
51
 
51
52
  def stem
@@ -54,33 +55,39 @@ class ArStemmer
54
55
  word
55
56
  end
56
57
 
57
- def stem_prefix
58
- PREFIXES.each do |prefix|
59
- @word = word[prefix.length .. -1] if starts_with_check_length(word, prefix)
58
+ private
59
+
60
+ def rules(rule_set)
61
+ rule_set.reject {|k, v| disabled.include?(k) }.values
60
62
  end
61
- end
62
63
 
63
- def stem_suffix
64
- SUFFIXES.each do |suffix|
65
- @word = word[0 .. -(suffix.length + 1)] if ends_with_check_length(word, suffix)
64
+ def stem_prefix
65
+ rules(PREFIXES).each do |prefix|
66
+ @word = word[prefix.length .. -1] if starts_with_check_length(word, prefix)
67
+ end
66
68
  end
67
- end
68
69
 
69
- def starts_with_check_length(word, prefix)
70
- if prefix.length == 1 && word.length < 4 # wa- prefix requires at least 3 characters
71
- false
72
- elsif word.length < prefix.length + 2
73
- false
74
- else
75
- word.start_with?(prefix)
70
+ def stem_suffix
71
+ rules(SUFFIXES).each do |suffix|
72
+ @word = word[0 .. -(suffix.length + 1)] if ends_with_check_length(word, suffix)
73
+ end
76
74
  end
77
- end
78
75
 
79
- def ends_with_check_length(word, suffix)
80
- if word.length < suffix.length + 2
81
- false
82
- else
83
- word.end_with?(suffix)
76
+ def starts_with_check_length(word, prefix)
77
+ if prefix.length == 1 && word.length < 4 # wa- prefix requires at least 3 characters
78
+ false
79
+ elsif word.length < prefix.length + 2
80
+ false
81
+ else
82
+ word.start_with?(prefix)
83
+ end
84
+ end
85
+
86
+ def ends_with_check_length(word, suffix)
87
+ if word.length < suffix.length + 2
88
+ false
89
+ else
90
+ word.end_with?(suffix)
91
+ end
84
92
  end
85
- end
86
93
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ar-stemmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomoya Hirano
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-02-08 00:00:00.000000000 Z
11
+ date: 2016-02-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler