ar-stemmer 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ceb4076e652815af29f052fdee80703387ec221
4
- data.tar.gz: a41c51a64dcb5e3c5aa8c907b107fdcae224a7c8
3
+ metadata.gz: 13dc9de75b953d2c16e3ab0d0f4d7e3165ccc5dc
4
+ data.tar.gz: ebebffcab3e84f2cc1c01cfd13d6b0f014877731
5
5
  SHA512:
6
- metadata.gz: 8d41ccb2cabfb1eb171228f9a282edc89f4c1cd935114623e202b79b0ef364e0858b6598afe6c81a5830e732151931b7ef7c2b6be14639a9eedcfa76ad045610
7
- data.tar.gz: b96df525795e8d4d7e325f19aa2bb27029f9c5a410bad9b552e8208a86532978be2c419ae9bab6ab6dd6ce4272410a25281dfb7764cccd55843ec92c2c834bd8
6
+ metadata.gz: e7e3d0cff23231ec2fa78bcbbc1cdcaeeeca668320b194acba50ca35b300e37f07d92540caf74eee480f89a46f6ea1ff3d980adf26ccbd6b247a13c7492925d1
7
+ data.tar.gz: a5dd90d9b63df99d3dcf30da6e2c5854bc712f9df6898d7f47581d2387f061b0c39b011f444699f6902f8497529897cdda3a7266c269aa37da6302984c2726f3
@@ -0,0 +1,5 @@
1
+ ## 0.4.0
2
+
3
+ * Add "BEH" prefix rule
4
+
5
+ _@mshaka_
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/tomoya55/ar-stemmer.svg?branch=master)](https://travis-ci.org/tomoya55/ar-stemmer)
4
4
 
5
- Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene.
5
+ Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with some extensions.
6
6
 
7
7
  ## Installation
8
8
 
@@ -37,7 +37,7 @@ ArStemmer.stem(word, only: [:alef_lam, :waw_alef_lam])
37
37
  ArStemmer.stem(word, except: [:yeh_noon, :waw_noon])
38
38
  ```
39
39
 
40
- You can find the rule names in [the source code](https://github.com/tomoya55/ar-stemmer/blob/master/lib/ar_stemmer.rb#L18-L39).
40
+ You can find the rule names in [the source code](https://github.com/tomoya55/ar-stemmer/blob/master/lib/ar_stemmer.rb#L18-L40).
41
41
 
42
42
  ## License
43
43
 
@@ -4,12 +4,12 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "ar-stemmer"
7
- spec.version = "0.3.0"
7
+ spec.version = "0.4.0"
8
8
  spec.authors = ["Tomoya Hirano"]
9
9
  spec.email = ["hiranotomoya@gmail.com"]
10
10
 
11
- spec.summary = "Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene."
12
- spec.description = "Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene."
11
+ spec.summary = "Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions."
12
+ spec.description = "Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions."
13
13
  spec.homepage = "https://github.com/tomoya55/ar-stemmer"
14
14
  spec.license = "MIT"
15
15
 
@@ -1,19 +1,19 @@
1
- # ArStemmer is a pure ruby port of Lucene's ArabicStemmer class
1
+ # ArStemmer is a ruby port of Lucene's ArabicStemmer class with extensions
2
2
  #
3
3
  # https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
4
4
  class ArStemmer
5
5
 
6
- ALEF = "\u0627"
7
- BEH = "\u0628"
8
- TEH_MARBUTA = "\u0629"
9
- TEH = "\u062A"
10
- FEH = "\u0641"
11
- KAF = "\u0643"
12
- LAM = "\u0644"
13
- NOON = "\u0646"
14
- HEH = "\u0647"
15
- WAW = "\u0648"
16
- YEH = "\u064A"
6
+ ALEF = "\u0627" # --> أ
7
+ BEH = "\u0628" # --> ب
8
+ TEH_MARBUTA = "\u0629" # --> ة
9
+ TEH = "\u062A" # --> ت
10
+ FEH = "\u0641" # --> ف
11
+ KAF = "\u0643" # --> ك
12
+ LAM = "\u0644" # --> ل
13
+ NOON = "\u0646" # --> ن
14
+ HEH = "\u0647" # --> ه
15
+ WAW = "\u0648" # --> و
16
+ YEH = "\u064A" # --> ي
17
17
 
18
18
  PREFIXES = {
19
19
  alef_lam: ALEF + LAM,
@@ -22,7 +22,8 @@ class ArStemmer
22
22
  kaf_alef_lam: KAF + ALEF + LAM,
23
23
  feh_alef_lam: FEH + ALEF + LAM,
24
24
  lam_lam: LAM + LAM,
25
- waw: WAW
25
+ waw: WAW,
26
+ beh: BEH
26
27
  }
27
28
 
28
29
  SUFFIXES = {
@@ -84,7 +85,7 @@ class ArStemmer
84
85
  end
85
86
 
86
87
  def starts_with_check_length(word, prefix)
87
- if prefix.length == 1 && word.length < 4 # wa- prefix requires at least 3 characters
88
+ if prefix.length == 1 && word.length < 3 # 'waw' and 'beh' prefix requires at least 3 characters
88
89
  false
89
90
  elsif word.length < prefix.length + 2
90
91
  false
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ar-stemmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomoya Hirano
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-03-04 00:00:00.000000000 Z
11
+ date: 2016-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,7 +66,7 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
- description: Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene.
69
+ description: Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions.
70
70
  email:
71
71
  - hiranotomoya@gmail.com
72
72
  executables: []
@@ -76,6 +76,7 @@ files:
76
76
  - ".gitignore"
77
77
  - ".travis.yml"
78
78
  - CODE_OF_CONDUCT.md
79
+ - Changelog.md
79
80
  - Gemfile
80
81
  - LICENSE.txt
81
82
  - README.md
@@ -106,5 +107,5 @@ rubyforge_project:
106
107
  rubygems_version: 2.4.5
107
108
  signing_key:
108
109
  specification_version: 4
109
- summary: Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene.
110
+ summary: Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions.
110
111
  test_files: []