ar-stemmer 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Changelog.md +5 -0
- data/README.md +2 -2
- data/ar-stemmer.gemspec +3 -3
- data/lib/ar_stemmer.rb +15 -14
- metadata +5 -4
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 13dc9de75b953d2c16e3ab0d0f4d7e3165ccc5dc
|
4
|
+
data.tar.gz: ebebffcab3e84f2cc1c01cfd13d6b0f014877731
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e7e3d0cff23231ec2fa78bcbbc1cdcaeeeca668320b194acba50ca35b300e37f07d92540caf74eee480f89a46f6ea1ff3d980adf26ccbd6b247a13c7492925d1
|
7
|
+
data.tar.gz: a5dd90d9b63df99d3dcf30da6e2c5854bc712f9df6898d7f47581d2387f061b0c39b011f444699f6902f8497529897cdda3a7266c269aa37da6302984c2726f3
|
data/Changelog.md
ADDED
data/README.md
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
|
3
3
|
[](https://travis-ci.org/tomoya55/ar-stemmer)
|
4
4
|
|
5
|
-
Ar-Stemmer is
|
5
|
+
Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with some extensions.
|
6
6
|
|
7
7
|
## Installation
|
8
8
|
|
@@ -37,7 +37,7 @@ ArStemmer.stem(word, only: [:alef_lam, :waw_alef_lam])
|
|
37
37
|
ArStemmer.stem(word, except: [:yeh_noon, :waw_noon])
|
38
38
|
```
|
39
39
|
|
40
|
-
You can find the rule names in [the source code](https://github.com/tomoya55/ar-stemmer/blob/master/lib/ar_stemmer.rb#L18-
|
40
|
+
You can find the rule names in [the source code](https://github.com/tomoya55/ar-stemmer/blob/master/lib/ar_stemmer.rb#L18-L40).
|
41
41
|
|
42
42
|
## License
|
43
43
|
|
data/ar-stemmer.gemspec
CHANGED
@@ -4,12 +4,12 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
|
4
4
|
|
5
5
|
Gem::Specification.new do |spec|
|
6
6
|
spec.name = "ar-stemmer"
|
7
|
-
spec.version = "0.
|
7
|
+
spec.version = "0.4.0"
|
8
8
|
spec.authors = ["Tomoya Hirano"]
|
9
9
|
spec.email = ["hiranotomoya@gmail.com"]
|
10
10
|
|
11
|
-
spec.summary = "Ar-Stemmer is
|
12
|
-
spec.description = "Ar-Stemmer is
|
11
|
+
spec.summary = "Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions."
|
12
|
+
spec.description = "Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions."
|
13
13
|
spec.homepage = "https://github.com/tomoya55/ar-stemmer"
|
14
14
|
spec.license = "MIT"
|
15
15
|
|
data/lib/ar_stemmer.rb
CHANGED
@@ -1,19 +1,19 @@
|
|
1
|
-
# ArStemmer is a
|
1
|
+
# ArStemmer is a ruby port of Lucene's ArabicStemmer class with extensions
|
2
2
|
#
|
3
3
|
# https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
|
4
4
|
class ArStemmer
|
5
5
|
|
6
|
-
ALEF = "\u0627"
|
7
|
-
BEH = "\u0628"
|
8
|
-
TEH_MARBUTA = "\u0629"
|
9
|
-
TEH = "\u062A"
|
10
|
-
FEH = "\u0641"
|
11
|
-
KAF = "\u0643"
|
12
|
-
LAM = "\u0644"
|
13
|
-
NOON = "\u0646"
|
14
|
-
HEH = "\u0647"
|
15
|
-
WAW = "\u0648"
|
16
|
-
YEH = "\u064A"
|
6
|
+
ALEF = "\u0627" # --> أ
|
7
|
+
BEH = "\u0628" # --> ب
|
8
|
+
TEH_MARBUTA = "\u0629" # --> ة
|
9
|
+
TEH = "\u062A" # --> ت
|
10
|
+
FEH = "\u0641" # --> ف
|
11
|
+
KAF = "\u0643" # --> ك
|
12
|
+
LAM = "\u0644" # --> ل
|
13
|
+
NOON = "\u0646" # --> ن
|
14
|
+
HEH = "\u0647" # --> ه
|
15
|
+
WAW = "\u0648" # --> و
|
16
|
+
YEH = "\u064A" # --> ي
|
17
17
|
|
18
18
|
PREFIXES = {
|
19
19
|
alef_lam: ALEF + LAM,
|
@@ -22,7 +22,8 @@ class ArStemmer
|
|
22
22
|
kaf_alef_lam: KAF + ALEF + LAM,
|
23
23
|
feh_alef_lam: FEH + ALEF + LAM,
|
24
24
|
lam_lam: LAM + LAM,
|
25
|
-
waw: WAW
|
25
|
+
waw: WAW,
|
26
|
+
beh: BEH
|
26
27
|
}
|
27
28
|
|
28
29
|
SUFFIXES = {
|
@@ -84,7 +85,7 @@ class ArStemmer
|
|
84
85
|
end
|
85
86
|
|
86
87
|
def starts_with_check_length(word, prefix)
|
87
|
-
if prefix.length == 1 && word.length <
|
88
|
+
if prefix.length == 1 && word.length < 3 # 'waw' and 'beh' prefix requires at least 3 characters
|
88
89
|
false
|
89
90
|
elsif word.length < prefix.length + 2
|
90
91
|
false
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: ar-stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Tomoya Hirano
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2016-
|
11
|
+
date: 2016-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
- - ">="
|
67
67
|
- !ruby/object:Gem::Version
|
68
68
|
version: '0'
|
69
|
-
description: Ar-Stemmer is
|
69
|
+
description: Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions.
|
70
70
|
email:
|
71
71
|
- hiranotomoya@gmail.com
|
72
72
|
executables: []
|
@@ -76,6 +76,7 @@ files:
|
|
76
76
|
- ".gitignore"
|
77
77
|
- ".travis.yml"
|
78
78
|
- CODE_OF_CONDUCT.md
|
79
|
+
- Changelog.md
|
79
80
|
- Gemfile
|
80
81
|
- LICENSE.txt
|
81
82
|
- README.md
|
@@ -106,5 +107,5 @@ rubyforge_project:
|
|
106
107
|
rubygems_version: 2.4.5
|
107
108
|
signing_key:
|
108
109
|
specification_version: 4
|
109
|
-
summary: Ar-Stemmer is
|
110
|
+
summary: Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions.
|
110
111
|
test_files: []
|