ar-stemmer 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Changelog.md +5 -0
- data/README.md +2 -2
- data/ar-stemmer.gemspec +3 -3
- data/lib/ar_stemmer.rb +15 -14
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13dc9de75b953d2c16e3ab0d0f4d7e3165ccc5dc
|
4
|
+
data.tar.gz: ebebffcab3e84f2cc1c01cfd13d6b0f014877731
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e7e3d0cff23231ec2fa78bcbbc1cdcaeeeca668320b194acba50ca35b300e37f07d92540caf74eee480f89a46f6ea1ff3d980adf26ccbd6b247a13c7492925d1
|
7
|
+
data.tar.gz: a5dd90d9b63df99d3dcf30da6e2c5854bc712f9df6898d7f47581d2387f061b0c39b011f444699f6902f8497529897cdda3a7266c269aa37da6302984c2726f3
|
data/Changelog.md
ADDED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[![Build Status](https://travis-ci.org/tomoya55/ar-stemmer.svg?branch=master)](https://travis-ci.org/tomoya55/ar-stemmer)
|
4
4
|
|
5
|
-
Ar-Stemmer is
|
5
|
+
Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with some extensions.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -37,7 +37,7 @@ ArStemmer.stem(word, only: [:alef_lam, :waw_alef_lam])
|
|
37
37
|
ArStemmer.stem(word, except: [:yeh_noon, :waw_noon])
|
38
38
|
```
|
39
39
|
|
40
|
-
You can find the rule names in [the source code](https://github.com/tomoya55/ar-stemmer/blob/master/lib/ar_stemmer.rb#L18-
|
40
|
+
You can find the rule names in [the source code](https://github.com/tomoya55/ar-stemmer/blob/master/lib/ar_stemmer.rb#L18-L40).
|
41
41
|
|
42
42
|
## License
|
43
43
|
|
data/ar-stemmer.gemspec
CHANGED
@@ -4,12 +4,12 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "ar-stemmer"
|
7
|
-
spec.version = "0.
|
7
|
+
spec.version = "0.4.0"
|
8
8
|
spec.authors = ["Tomoya Hirano"]
|
9
9
|
spec.email = ["hiranotomoya@gmail.com"]
|
10
10
|
|
11
|
-
spec.summary = "Ar-Stemmer is
|
12
|
-
spec.description = "Ar-Stemmer is
|
11
|
+
spec.summary = "Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions."
|
12
|
+
spec.description = "Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions."
|
13
13
|
spec.homepage = "https://github.com/tomoya55/ar-stemmer"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
data/lib/ar_stemmer.rb
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
-
# ArStemmer is a
|
1
|
+
# ArStemmer is a ruby port of Lucene's ArabicStemmer class with extensions
|
2
2
|
#
|
3
3
|
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
|
4
4
|
class ArStemmer
|
5
5
|
|
6
|
-
ALEF = "\u0627"
|
7
|
-
BEH = "\u0628"
|
8
|
-
TEH_MARBUTA = "\u0629"
|
9
|
-
TEH = "\u062A"
|
10
|
-
FEH = "\u0641"
|
11
|
-
KAF = "\u0643"
|
12
|
-
LAM = "\u0644"
|
13
|
-
NOON = "\u0646"
|
14
|
-
HEH = "\u0647"
|
15
|
-
WAW = "\u0648"
|
16
|
-
YEH = "\u064A"
|
6
|
+
ALEF = "\u0627" # --> أ
|
7
|
+
BEH = "\u0628" # --> ب
|
8
|
+
TEH_MARBUTA = "\u0629" # --> ة
|
9
|
+
TEH = "\u062A" # --> ت
|
10
|
+
FEH = "\u0641" # --> ف
|
11
|
+
KAF = "\u0643" # --> ك
|
12
|
+
LAM = "\u0644" # --> ل
|
13
|
+
NOON = "\u0646" # --> ن
|
14
|
+
HEH = "\u0647" # --> ه
|
15
|
+
WAW = "\u0648" # --> و
|
16
|
+
YEH = "\u064A" # --> ي
|
17
17
|
|
18
18
|
PREFIXES = {
|
19
19
|
alef_lam: ALEF + LAM,
|
@@ -22,7 +22,8 @@ class ArStemmer
|
|
22
22
|
kaf_alef_lam: KAF + ALEF + LAM,
|
23
23
|
feh_alef_lam: FEH + ALEF + LAM,
|
24
24
|
lam_lam: LAM + LAM,
|
25
|
-
waw: WAW
|
25
|
+
waw: WAW,
|
26
|
+
beh: BEH
|
26
27
|
}
|
27
28
|
|
28
29
|
SUFFIXES = {
|
@@ -84,7 +85,7 @@ class ArStemmer
|
|
84
85
|
end
|
85
86
|
|
86
87
|
def starts_with_check_length(word, prefix)
|
87
|
-
if prefix.length == 1 && word.length <
|
88
|
+
if prefix.length == 1 && word.length < 3 # 'waw' and 'beh' prefix requires at least 3 characters
|
88
89
|
false
|
89
90
|
elsif word.length < prefix.length + 2
|
90
91
|
false
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ar-stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomoya Hirano
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
description: Ar-Stemmer is
|
69
|
+
description: Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions.
|
70
70
|
email:
|
71
71
|
- hiranotomoya@gmail.com
|
72
72
|
executables: []
|
@@ -76,6 +76,7 @@ files:
|
|
76
76
|
- ".gitignore"
|
77
77
|
- ".travis.yml"
|
78
78
|
- CODE_OF_CONDUCT.md
|
79
|
+
- Changelog.md
|
79
80
|
- Gemfile
|
80
81
|
- LICENSE.txt
|
81
82
|
- README.md
|
@@ -106,5 +107,5 @@ rubyforge_project:
|
|
106
107
|
rubygems_version: 2.4.5
|
107
108
|
signing_key:
|
108
109
|
specification_version: 4
|
109
|
-
summary: Ar-Stemmer is
|
110
|
+
summary: Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions.
|
110
111
|
test_files: []
|