arabic_normalizer 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 084a93775f2ff33d26782e6ff7a0ac71faae8359
4
- data.tar.gz: db45f2e595f9965a86e26b856b6623a27c36acac
3
+ metadata.gz: 595779d3887e6b4c0785c2ccd35e2d428809d118
4
+ data.tar.gz: 08b6b15c78cc24ff59d41c0bb9015b81e04901a5
5
5
  SHA512:
6
- metadata.gz: 7202d85d202569b2de4034af2002b4952535853cb33605d95dfb20452dbf9acecab851e2ddaa078d3fc78ffb8e8ac358811f228733e3e31720c056097dfaadda
7
- data.tar.gz: 0ea11e87913ab2aba14eb05396a90e314a3549518bf8749bc4a829ec1c5873c07ead1c3c9a63724d746897a167fdf854196a53148b5f45007e0f8debbe00852e
6
+ metadata.gz: 2db268cf7d49a480126bbe1e56d8c517c98fbe22b38270511537f43fb40be1eda15e1b9bc27db3a7c4f19a93c83d64b6d18297adb1b0759b17a95d87b13d6e14
7
+ data.tar.gz: c31c17c2e523ab80d53cbbfc41135875a752e030bb63f7e6db513696127b5febd3d9b6eb6cfe2730f5cd2c145d09bc6d9184d2dd6ef58daaea2346a4588c94c4
data/README.md CHANGED
@@ -7,7 +7,7 @@ ArabicNormalizer is pure Ruby port of Arabic Normalizer from Lucene.
7
7
  Add this line to your application's Gemfile:
8
8
 
9
9
  ```ruby
10
- gem 'arabicNormalizer'
10
+ gem 'arabic_normalizer'
11
11
  ```
12
12
 
13
13
  And then execute:
@@ -16,11 +16,11 @@ And then execute:
16
16
 
17
17
  Or install it yourself as:
18
18
 
19
- $ gem install arabicNormalizer
19
+ $ gem install arabic_normalizer
20
20
 
21
21
  ## Usage
22
22
  ```
23
- require 'arabicNormalizer'
23
+ require 'arabic_normalizer'
24
24
 
25
25
  ArabicNormalizer::normalize("مكتبٌ")
26
26
  => "مكتب"
@@ -27,19 +27,17 @@ module ArabicNormalizer
27
27
  SHADDA = "\u0651"
28
28
  SUKUN = "\u0652"
29
29
 
30
+ NORMALIZATION_RULES = {
31
+ ALEF_MADDA => ALEF, ALEF_HAMZA_ABOVE => ALEF, ALEF_HAMZA_BELOW => ALEF,
32
+ YEH_HAMZA => YEH, DOTLESS_YEH => YEH,
33
+ TEH_MARBOUTA => HEH,
34
+ WAW_HAMZA => WAW,
35
+ TATWEEL => '', FATHATAN => '', DAMMATAN => '', KASRATAN => '', FATHA => '', DAMMA => '', KASRA => '', SHADDA => '', SUKUN => ''
36
+ }
37
+
38
+ NORMALIZATION_REGEX = /[#{NORMALIZATION_RULES.keys.join}]/.freeze
39
+
30
40
  def self.normalize(string)
31
- string.gsub(/[
32
- #{ALEF_MADDA}#{ALEF_HAMZA_ABOVE}#{ALEF_HAMZA_BELOW}
33
- #{YEH_HAMZA}#{DOTLESS_YEH}
34
- #{TEH_MARBOUTA}
35
- #{WAW_HAMZA}
36
- #{TATWEEL}#{FATHATAN}#{DAMMATAN}#{KASRATAN}#{FATHA}#{DAMMA}#{KASRA}#{SHADDA}#{SUKUN}
37
- ]/x,
38
- ALEF_MADDA => ALEF, ALEF_HAMZA_ABOVE => ALEF, ALEF_HAMZA_BELOW => ALEF,
39
- YEH_HAMZA => YEH, DOTLESS_YEH => YEH,
40
- TEH_MARBOUTA => HEH,
41
- WAW_HAMZA => WAW,
42
- TATWEEL => '', FATHATAN => '', DAMMATAN => '', KASRATAN => '', FATHA => '', DAMMA => '', KASRA => '', SHADDA => '', SUKUN => ''
43
- )
41
+ string.gsub(NORMALIZATION_REGEX, NORMALIZATION_RULES)
44
42
  end
45
43
  end
@@ -1,3 +1,3 @@
1
1
  module ArabicNormalizer
2
- VERSION = "0.1.0"
2
+ VERSION = "0.1.1"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arabic_normalizer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jean Debs
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-03-22 00:00:00.000000000 Z
11
+ date: 2016-03-29 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler