ar-stemmer 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 6ceb4076e652815af29f052fdee80703387ec221
4
- data.tar.gz: a41c51a64dcb5e3c5aa8c907b107fdcae224a7c8
3
+ metadata.gz: 13dc9de75b953d2c16e3ab0d0f4d7e3165ccc5dc
4
+ data.tar.gz: ebebffcab3e84f2cc1c01cfd13d6b0f014877731
5
5
  SHA512:
6
- metadata.gz: 8d41ccb2cabfb1eb171228f9a282edc89f4c1cd935114623e202b79b0ef364e0858b6598afe6c81a5830e732151931b7ef7c2b6be14639a9eedcfa76ad045610
7
- data.tar.gz: b96df525795e8d4d7e325f19aa2bb27029f9c5a410bad9b552e8208a86532978be2c419ae9bab6ab6dd6ce4272410a25281dfb7764cccd55843ec92c2c834bd8
6
+ metadata.gz: e7e3d0cff23231ec2fa78bcbbc1cdcaeeeca668320b194acba50ca35b300e37f07d92540caf74eee480f89a46f6ea1ff3d980adf26ccbd6b247a13c7492925d1
7
+ data.tar.gz: a5dd90d9b63df99d3dcf30da6e2c5854bc712f9df6898d7f47581d2387f061b0c39b011f444699f6902f8497529897cdda3a7266c269aa37da6302984c2726f3
@@ -0,0 +1,5 @@
1
+ ## 0.4.0
2
+
3
+ * Add "BEH" prefix rule
4
+
5
+ _@mshaka_
data/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  [![Build Status](https://travis-ci.org/tomoya55/ar-stemmer.svg?branch=master)](https://travis-ci.org/tomoya55/ar-stemmer)
4
4
 
5
- Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene.
5
+ Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with some extensions.
6
6
 
7
7
  ## Installation
8
8
 
@@ -37,7 +37,7 @@ ArStemmer.stem(word, only: [:alef_lam, :waw_alef_lam])
37
37
  ArStemmer.stem(word, except: [:yeh_noon, :waw_noon])
38
38
  ```
39
39
 
40
- You can find the rule names in [the source code](https://github.com/tomoya55/ar-stemmer/blob/master/lib/ar_stemmer.rb#L18-L39).
40
+ You can find the rule names in [the source code](https://github.com/tomoya55/ar-stemmer/blob/master/lib/ar_stemmer.rb#L18-L40).
41
41
 
42
42
  ## License
43
43
 
@@ -4,12 +4,12 @@ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
 
5
5
  Gem::Specification.new do |spec|
6
6
  spec.name = "ar-stemmer"
7
- spec.version = "0.3.0"
7
+ spec.version = "0.4.0"
8
8
  spec.authors = ["Tomoya Hirano"]
9
9
  spec.email = ["hiranotomoya@gmail.com"]
10
10
 
11
- spec.summary = "Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene."
12
- spec.description = "Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene."
11
+ spec.summary = "Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions."
12
+ spec.description = "Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions."
13
13
  spec.homepage = "https://github.com/tomoya55/ar-stemmer"
14
14
  spec.license = "MIT"
15
15
 
@@ -1,19 +1,19 @@
1
- # ArStemmer is a pure ruby port of Lucene's ArabicStemmer class
1
+ # ArStemmer is a ruby port of Lucene's ArabicStemmer class with extensions
2
2
  #
3
3
  # https://github.com/apache/lucene-solr/blob/master/lucene/analysis/common/src/java/org/apache/lucene/analysis/ar/ArabicStemmer.java
4
4
  class ArStemmer
5
5
 
6
- ALEF = "\u0627"
7
- BEH = "\u0628"
8
- TEH_MARBUTA = "\u0629"
9
- TEH = "\u062A"
10
- FEH = "\u0641"
11
- KAF = "\u0643"
12
- LAM = "\u0644"
13
- NOON = "\u0646"
14
- HEH = "\u0647"
15
- WAW = "\u0648"
16
- YEH = "\u064A"
6
+ ALEF = "\u0627" # --> أ
7
+ BEH = "\u0628" # --> ب
8
+ TEH_MARBUTA = "\u0629" # --> ة
9
+ TEH = "\u062A" # --> ت
10
+ FEH = "\u0641" # --> ف
11
+ KAF = "\u0643" # --> ك
12
+ LAM = "\u0644" # --> ل
13
+ NOON = "\u0646" # --> ن
14
+ HEH = "\u0647" # --> ه
15
+ WAW = "\u0648" # --> و
16
+ YEH = "\u064A" # --> ي
17
17
 
18
18
  PREFIXES = {
19
19
  alef_lam: ALEF + LAM,
@@ -22,7 +22,8 @@ class ArStemmer
22
22
  kaf_alef_lam: KAF + ALEF + LAM,
23
23
  feh_alef_lam: FEH + ALEF + LAM,
24
24
  lam_lam: LAM + LAM,
25
- waw: WAW
25
+ waw: WAW,
26
+ beh: BEH
26
27
  }
27
28
 
28
29
  SUFFIXES = {
@@ -84,7 +85,7 @@ class ArStemmer
84
85
  end
85
86
 
86
87
  def starts_with_check_length(word, prefix)
87
- if prefix.length == 1 && word.length < 4 # wa- prefix requires at least 3 characters
88
+ if prefix.length == 1 && word.length < 3 # 'waw' and 'beh' prefix requires at least 3 characters
88
89
  false
89
90
  elsif word.length < prefix.length + 2
90
91
  false
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: ar-stemmer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Tomoya Hirano
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-03-04 00:00:00.000000000 Z
11
+ date: 2016-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -66,7 +66,7 @@ dependencies:
66
66
  - - ">="
67
67
  - !ruby/object:Gem::Version
68
68
  version: '0'
69
- description: Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene.
69
+ description: Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions.
70
70
  email:
71
71
  - hiranotomoya@gmail.com
72
72
  executables: []
@@ -76,6 +76,7 @@ files:
76
76
  - ".gitignore"
77
77
  - ".travis.yml"
78
78
  - CODE_OF_CONDUCT.md
79
+ - Changelog.md
79
80
  - Gemfile
80
81
  - LICENSE.txt
81
82
  - README.md
@@ -106,5 +107,5 @@ rubyforge_project:
106
107
  rubygems_version: 2.4.5
107
108
  signing_key:
108
109
  specification_version: 4
109
- summary: Ar-Stemmer is pure Ruby port of Arabic Stemmer from Lucene.
110
+ summary: Ar-Stemmer is a Ruby port of Arabic Stemmer from Lucene with extensions.
110
111
  test_files: []