arabic_normalizer 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +3 -3
- data/lib/arabic_normalizer.rb +11 -13
- data/lib/arabic_normalizer/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 595779d3887e6b4c0785c2ccd35e2d428809d118
|
4
|
+
data.tar.gz: 08b6b15c78cc24ff59d41c0bb9015b81e04901a5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2db268cf7d49a480126bbe1e56d8c517c98fbe22b38270511537f43fb40be1eda15e1b9bc27db3a7c4f19a93c83d64b6d18297adb1b0759b17a95d87b13d6e14
|
7
|
+
data.tar.gz: c31c17c2e523ab80d53cbbfc41135875a752e030bb63f7e6db513696127b5febd3d9b6eb6cfe2730f5cd2c145d09bc6d9184d2dd6ef58daaea2346a4588c94c4
|
data/README.md
CHANGED
@@ -7,7 +7,7 @@ ArabicNormalizer is pure Ruby port of Arabic Normalizer from Lucene.
|
|
7
7
|
Add this line to your application's Gemfile:
|
8
8
|
|
9
9
|
```ruby
|
10
|
-
gem '
|
10
|
+
gem 'arabic_normalizer'
|
11
11
|
```
|
12
12
|
|
13
13
|
And then execute:
|
@@ -16,11 +16,11 @@ And then execute:
|
|
16
16
|
|
17
17
|
Or install it yourself as:
|
18
18
|
|
19
|
-
$ gem install
|
19
|
+
$ gem install arabic_normalizer
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
```
|
23
|
-
require '
|
23
|
+
require 'arabic_normalizer'
|
24
24
|
|
25
25
|
ArabicNormalizer::normalize("مكتبٌ")
|
26
26
|
=> "مكتب"
|
data/lib/arabic_normalizer.rb
CHANGED
@@ -27,19 +27,17 @@ module ArabicNormalizer
|
|
27
27
|
SHADDA = "\u0651"
|
28
28
|
SUKUN = "\u0652"
|
29
29
|
|
30
|
+
NORMALIZATION_RULES = {
|
31
|
+
ALEF_MADDA => ALEF, ALEF_HAMZA_ABOVE => ALEF, ALEF_HAMZA_BELOW => ALEF,
|
32
|
+
YEH_HAMZA => YEH, DOTLESS_YEH => YEH,
|
33
|
+
TEH_MARBOUTA => HEH,
|
34
|
+
WAW_HAMZA => WAW,
|
35
|
+
TATWEEL => '', FATHATAN => '', DAMMATAN => '', KASRATAN => '', FATHA => '', DAMMA => '', KASRA => '', SHADDA => '', SUKUN => ''
|
36
|
+
}
|
37
|
+
|
38
|
+
NORMALIZATION_REGEX = /[#{NORMALIZATION_RULES.keys.join}]/.freeze
|
39
|
+
|
30
40
|
def self.normalize(string)
|
31
|
-
string.gsub(
|
32
|
-
#{ALEF_MADDA}#{ALEF_HAMZA_ABOVE}#{ALEF_HAMZA_BELOW}
|
33
|
-
#{YEH_HAMZA}#{DOTLESS_YEH}
|
34
|
-
#{TEH_MARBOUTA}
|
35
|
-
#{WAW_HAMZA}
|
36
|
-
#{TATWEEL}#{FATHATAN}#{DAMMATAN}#{KASRATAN}#{FATHA}#{DAMMA}#{KASRA}#{SHADDA}#{SUKUN}
|
37
|
-
]/x,
|
38
|
-
ALEF_MADDA => ALEF, ALEF_HAMZA_ABOVE => ALEF, ALEF_HAMZA_BELOW => ALEF,
|
39
|
-
YEH_HAMZA => YEH, DOTLESS_YEH => YEH,
|
40
|
-
TEH_MARBOUTA => HEH,
|
41
|
-
WAW_HAMZA => WAW,
|
42
|
-
TATWEEL => '', FATHATAN => '', DAMMATAN => '', KASRATAN => '', FATHA => '', DAMMA => '', KASRA => '', SHADDA => '', SUKUN => ''
|
43
|
-
)
|
41
|
+
string.gsub(NORMALIZATION_REGEX, NORMALIZATION_RULES)
|
44
42
|
end
|
45
43
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: arabic_normalizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jean Debs
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-03-
|
11
|
+
date: 2016-03-29 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|