uea-stemmer 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +5 -4
- data/VERSION +1 -1
- data/lib/uea-stemmer.rb +2 -0
- data/test/uea_stemmer_test.rb +19 -0
- data/uea-stemmer.gemspec +7 -6
- metadata +17 -5
data/README.rdoc
CHANGED
@@ -14,10 +14,9 @@ This is a port to Ruby from the port to Java from the original Perl script by Ma
|
|
14
14
|
|
15
15
|
== Installation
|
16
16
|
|
17
|
-
Install the gem
|
17
|
+
Install the gem:
|
18
18
|
|
19
|
-
gem
|
20
|
-
sudo gem install ealdent-uea-stemmer
|
19
|
+
gem install uea-stemmer
|
21
20
|
|
22
21
|
Install the gem from source:
|
23
22
|
|
@@ -25,6 +24,8 @@ Install the gem from source:
|
|
25
24
|
cd uea-stemmer
|
26
25
|
rake install
|
27
26
|
|
27
|
+
Depending on your setup, you may need to use +sudo+ for either of these methods.
|
28
|
+
|
28
29
|
== Example Usage
|
29
30
|
|
30
31
|
Typical usage:
|
@@ -40,7 +41,7 @@ Typical usage:
|
|
40
41
|
'bodies'.stem # body
|
41
42
|
'ordained'.stem # ordain
|
42
43
|
|
43
|
-
You can also extract the stemmed word along with the rule by using the
|
44
|
+
You can also extract the stemmed word along with the rule by using the +stem_with_rule+ method.
|
44
45
|
|
45
46
|
stem = stemmer.stem_with_rule('invited') # Word('invite', Rule #22.3)
|
46
47
|
puts stem.rule # rule #22.3 (remove -d when the word ends in -ited)
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.10.
|
1
|
+
0.10.1
|
data/lib/uea-stemmer.rb
CHANGED
@@ -326,6 +326,8 @@ class UEAStemmer
|
|
326
326
|
@rules << EndingRule.new('oded', 1, 61.1)
|
327
327
|
@rules << EndingRule.new('ated', 1, 61)
|
328
328
|
@rules << CustomRule.new(/.*\w\weds?$/, 2, 62)
|
329
|
+
@rules << EndingRule.new('des', 1, 63.10) # Fix for words like grades, escapades, abodes
|
330
|
+
@rules << EndingRule.new('res', 1, 63.9) # Fix for words like fires, acres, wires, cares
|
329
331
|
@rules << EndingRule.new('pes', 1, 63.8)
|
330
332
|
@rules << EndingRule.new('mes', 1, 63.7)
|
331
333
|
@rules << EndingRule.new('ones', 1, 63.6)
|
data/test/uea_stemmer_test.rb
CHANGED
@@ -11,6 +11,11 @@ class UeaStemmerTest < Test::Unit::TestCase
|
|
11
11
|
assert @stemmer.max_acronym_length == 'CAVASSOO'.size
|
12
12
|
end
|
13
13
|
|
14
|
+
should "allow setting options" do
|
15
|
+
@stemmer.options[:test] = true
|
16
|
+
assert @stemmer.options[:test]
|
17
|
+
end
|
18
|
+
|
14
19
|
context "stem method" do
|
15
20
|
should "stem words as Strings" do
|
16
21
|
assert @stemmer.stem('word').is_a?(String)
|
@@ -60,6 +65,20 @@ class UeaStemmerTest < Test::Unit::TestCase
|
|
60
65
|
assert_equal @stemmer.stem('smokes'), 'smoke'
|
61
66
|
assert_equal @stemmer.stem('does'), 'do'
|
62
67
|
end
|
68
|
+
|
69
|
+
should "stem various words with -des suffix" do
|
70
|
+
assert_equal @stemmer.stem('abodes'), 'abode'
|
71
|
+
assert_equal @stemmer.stem('escapades'), 'escapade'
|
72
|
+
assert_equal @stemmer.stem('crusades'), 'crusade'
|
73
|
+
assert_equal @stemmer.stem('grades'), 'grade'
|
74
|
+
end
|
75
|
+
|
76
|
+
should "stem various words with -res suffix" do
|
77
|
+
assert_equal @stemmer.stem('wires'), 'wire'
|
78
|
+
assert_equal @stemmer.stem('acres'), 'acre'
|
79
|
+
assert_equal @stemmer.stem('fires'), 'fire'
|
80
|
+
assert_equal @stemmer.stem('cares'), 'care'
|
81
|
+
end
|
63
82
|
|
64
83
|
should "stem acronyms when pluralized otherwise they should be left alone" do
|
65
84
|
assert_equal @stemmer.stem('USA'), 'USA'
|
data/uea-stemmer.gemspec
CHANGED
@@ -1,15 +1,15 @@
|
|
1
1
|
# Generated by jeweler
|
2
|
-
# DO NOT EDIT THIS FILE
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{uea-stemmer}
|
8
|
-
s.version = "0.10.
|
8
|
+
s.version = "0.10.1"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Marie-Claire Jenkins", "Dan J. Smith", "Richard Churchill", "Jason Adams"]
|
12
|
-
s.date = %q{
|
12
|
+
s.date = %q{2011-01-19}
|
13
13
|
s.description = %q{Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.}
|
14
14
|
s.email = %q{jasonmadams@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -34,7 +34,7 @@ Gem::Specification.new do |s|
|
|
34
34
|
s.homepage = %q{http://github.com/ealdent/uea-stemmer}
|
35
35
|
s.rdoc_options = ["--charset=UTF-8"]
|
36
36
|
s.require_paths = ["lib"]
|
37
|
-
s.rubygems_version = %q{1.3.
|
37
|
+
s.rubygems_version = %q{1.3.7}
|
38
38
|
s.summary = %q{Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.}
|
39
39
|
s.test_files = [
|
40
40
|
"test/test_helper.rb",
|
@@ -45,9 +45,10 @@ Gem::Specification.new do |s|
|
|
45
45
|
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
46
46
|
s.specification_version = 3
|
47
47
|
|
48
|
-
if Gem::Version.new(Gem::
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
49
|
else
|
50
50
|
end
|
51
51
|
else
|
52
52
|
end
|
53
53
|
end
|
54
|
+
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: uea-stemmer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 53
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 10
|
9
|
+
- 1
|
10
|
+
version: 0.10.1
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Marie-Claire Jenkins
|
@@ -12,7 +18,7 @@ autorequire:
|
|
12
18
|
bindir: bin
|
13
19
|
cert_chain: []
|
14
20
|
|
15
|
-
date:
|
21
|
+
date: 2011-01-19 00:00:00 -05:00
|
16
22
|
default_executable:
|
17
23
|
dependencies: []
|
18
24
|
|
@@ -49,21 +55,27 @@ rdoc_options:
|
|
49
55
|
require_paths:
|
50
56
|
- lib
|
51
57
|
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
+
none: false
|
52
59
|
requirements:
|
53
60
|
- - ">="
|
54
61
|
- !ruby/object:Gem::Version
|
62
|
+
hash: 3
|
63
|
+
segments:
|
64
|
+
- 0
|
55
65
|
version: "0"
|
56
|
-
version:
|
57
66
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
+
none: false
|
58
68
|
requirements:
|
59
69
|
- - ">="
|
60
70
|
- !ruby/object:Gem::Version
|
71
|
+
hash: 3
|
72
|
+
segments:
|
73
|
+
- 0
|
61
74
|
version: "0"
|
62
|
-
version:
|
63
75
|
requirements: []
|
64
76
|
|
65
77
|
rubyforge_project:
|
66
|
-
rubygems_version: 1.3.
|
78
|
+
rubygems_version: 1.3.7
|
67
79
|
signing_key:
|
68
80
|
specification_version: 3
|
69
81
|
summary: Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.
|