uea-stemmer 0.10.1 → 0.10.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.ruby-gemset +1 -0
- data/.ruby-version +1 -0
- data/Gemfile +5 -0
- data/Gemfile.lock +13 -0
- data/README.rdoc +2 -2
- data/Rakefile +0 -19
- data/VERSION +1 -1
- data/lib/uea-stemmer.rb +2 -1
- data/test/uea_stemmer_test.rb +10 -2
- data/uea-stemmer.gemspec +35 -29
- metadata +43 -49
- data/.gitignore +0 -6
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 2cf61c35c259651c4eddd37baa65bacb16ecace5
|
4
|
+
data.tar.gz: d896151f060bf289abf012d2ff64ff24048f6f05
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: f873e292c3c2d30f40a05a1f1c1bf17222770519869aeac302719c7e94704f7c723e33b29133dc77489e2fdc10f4bffd56544af5a0445ad1a4e1ddca0f469d55
|
7
|
+
data.tar.gz: b4c4d0e05e480bc7f2b5751539a9a9f0986d6b556353089ca8ff7f91523826edfdee96f3bec44506a2dd3091f7d0008fd053bd97a9da4ada59acac690983f9ac
|
data/.ruby-gemset
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
uea-stemmer
|
data/.ruby-version
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
ruby-2.3.0
|
data/Gemfile
ADDED
data/Gemfile.lock
ADDED
data/README.rdoc
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
= uea-stemmer
|
2
2
|
|
3
|
-
Similar to other stemmers, UEA-Lite[http://www.uea.ac.uk/cmp/research/graphicsvisionspeech/speech/WordStemming] operates on a set of rules which are used as steps. There are two groups of rules: the first to clean the tokens, and the second to alter suffixes.
|
3
|
+
Similar to other stemmers, UEA-Lite[https://web.archive.org/web/20120728132949/http://www.uea.ac.uk/cmp/research/graphicsvisionspeech/speech/WordStemming] operates on a set of rules which are used as steps. There are two groups of rules: the first to clean the tokens, and the second to alter suffixes.
|
4
4
|
|
5
5
|
The first group of rules first avoids a small list of six frequent problem words. An improvement to the stemmer would be to expand this list by adding other problem words which the second rule set cannot deal with. Second, possessive apostrophes are removed and contractions are expanded. All hyphens are removed and tokens containing digits are left untouched. Strings which are all upper case and digits are left untouched unless there is a lower case terminal 's' (i.e. transforming plural forms of acronyms to singular forms).
|
6
6
|
|
@@ -63,7 +63,7 @@ You can also extract the stemmed word along with the rule by using the +stem_wit
|
|
63
63
|
|
64
64
|
== Relevant Web Pages
|
65
65
|
|
66
|
-
* http://www.uea.ac.uk/cmp/research/graphicsvisionspeech/speech/WordStemming
|
66
|
+
* https://web.archive.org/web/20120728132949/http://www.uea.ac.uk/cmp/research/graphicsvisionspeech/speech/WordStemming
|
67
67
|
* Stemming[http://en.wikipedia.org/wiki/Stemming]
|
68
68
|
|
69
69
|
== Copyright
|
data/Rakefile
CHANGED
@@ -37,23 +37,4 @@ rescue LoadError
|
|
37
37
|
end
|
38
38
|
end
|
39
39
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
40
|
task :default => :test
|
44
|
-
|
45
|
-
require 'rake/rdoctask'
|
46
|
-
Rake::RDocTask.new do |rdoc|
|
47
|
-
if File.exist?('VERSION.yml')
|
48
|
-
config = YAML.load(File.read('VERSION.yml'))
|
49
|
-
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
50
|
-
else
|
51
|
-
version = ""
|
52
|
-
end
|
53
|
-
|
54
|
-
rdoc.rdoc_dir = 'rdoc'
|
55
|
-
rdoc.title = "uea-stemmer #{version}"
|
56
|
-
rdoc.rdoc_files.include('README*')
|
57
|
-
rdoc.rdoc_files.include('lib/**/*.rb')
|
58
|
-
end
|
59
|
-
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.10.
|
1
|
+
0.10.2
|
data/lib/uea-stemmer.rb
CHANGED
@@ -109,6 +109,7 @@ class UEAStemmer
|
|
109
109
|
@rules << Rule.new(/^[A-Z]+s$/, 1, 91.1)
|
110
110
|
@rules << Rule.new(/^[A-Z]+$/, 0, 91)
|
111
111
|
@rules << Rule.new(/^((.*[A-Z].*[A-Z])|([A-Z]{1})).*$/, 0, 92)
|
112
|
+
@rules << Rule.new(/^[a-z]{1}(|[rl])ing$/i, 0, 97)
|
112
113
|
|
113
114
|
@rules << EndingRule.new('aceous', 6, 1)
|
114
115
|
@rules << EndingRule.new('ces', 1, 2)
|
@@ -357,4 +358,4 @@ end
|
|
357
358
|
|
358
359
|
class DefaultUEAStemmer < UEAStemmer
|
359
360
|
include Singleton
|
360
|
-
end
|
361
|
+
end
|
data/test/uea_stemmer_test.rb
CHANGED
@@ -53,6 +53,14 @@ class UeaStemmerTest < Test::Unit::TestCase
|
|
53
53
|
assert_equal @stemmer.stem('charring'), 'char'
|
54
54
|
end
|
55
55
|
|
56
|
+
should "not stem false progressive verbs such as 'sing'" do
|
57
|
+
assert_equal @stemmer.stem('ring'), 'ring'
|
58
|
+
assert_equal @stemmer.stem('sing'), 'sing'
|
59
|
+
assert_equal @stemmer.stem('ring'), 'ring'
|
60
|
+
assert_equal @stemmer.stem('bring'), 'bring'
|
61
|
+
assert_equal @stemmer.stem('fling'), 'fling'
|
62
|
+
end
|
63
|
+
|
56
64
|
should "stem various plural nouns and 3rd-pres verbs without the -s/-es" do
|
57
65
|
assert_equal @stemmer.stem('changes'), 'change'
|
58
66
|
assert_equal @stemmer.stem('deaths'), 'death'
|
@@ -65,14 +73,14 @@ class UeaStemmerTest < Test::Unit::TestCase
|
|
65
73
|
assert_equal @stemmer.stem('smokes'), 'smoke'
|
66
74
|
assert_equal @stemmer.stem('does'), 'do'
|
67
75
|
end
|
68
|
-
|
76
|
+
|
69
77
|
should "stem various words with -des suffix" do
|
70
78
|
assert_equal @stemmer.stem('abodes'), 'abode'
|
71
79
|
assert_equal @stemmer.stem('escapades'), 'escapade'
|
72
80
|
assert_equal @stemmer.stem('crusades'), 'crusade'
|
73
81
|
assert_equal @stemmer.stem('grades'), 'grade'
|
74
82
|
end
|
75
|
-
|
83
|
+
|
76
84
|
should "stem various words with -res suffix" do
|
77
85
|
assert_equal @stemmer.stem('wires'), 'wire'
|
78
86
|
assert_equal @stemmer.stem('acres'), 'acre'
|
data/uea-stemmer.gemspec
CHANGED
@@ -1,54 +1,60 @@
|
|
1
1
|
# Generated by jeweler
|
2
2
|
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
-
# Instead, edit Jeweler::Tasks in Rakefile, and run
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
|
+
# stub: uea-stemmer 0.10.2 ruby lib
|
5
6
|
|
6
7
|
Gem::Specification.new do |s|
|
7
|
-
s.name =
|
8
|
-
s.version = "0.10.
|
8
|
+
s.name = "uea-stemmer"
|
9
|
+
s.version = "0.10.2"
|
9
10
|
|
10
11
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
12
|
+
s.require_paths = ["lib"]
|
11
13
|
s.authors = ["Marie-Claire Jenkins", "Dan J. Smith", "Richard Churchill", "Jason Adams"]
|
12
|
-
s.date =
|
13
|
-
s.description =
|
14
|
-
s.email =
|
14
|
+
s.date = "2017-08-14"
|
15
|
+
s.description = "Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing."
|
16
|
+
s.email = "jasonmadams@gmail.com"
|
15
17
|
s.extra_rdoc_files = [
|
16
18
|
"LICENSE",
|
17
|
-
|
19
|
+
"README.rdoc"
|
18
20
|
]
|
19
21
|
s.files = [
|
20
22
|
".document",
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
"lib/uea-stemmer/word.rb",
|
30
|
-
"test/test_helper.rb",
|
31
|
-
"test/uea_stemmer_test.rb",
|
32
|
-
"uea-stemmer.gemspec"
|
33
|
-
]
|
34
|
-
s.homepage = %q{http://github.com/ealdent/uea-stemmer}
|
35
|
-
s.rdoc_options = ["--charset=UTF-8"]
|
36
|
-
s.require_paths = ["lib"]
|
37
|
-
s.rubygems_version = %q{1.3.7}
|
38
|
-
s.summary = %q{Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.}
|
39
|
-
s.test_files = [
|
23
|
+
"LICENSE",
|
24
|
+
"README.rdoc",
|
25
|
+
"Rakefile",
|
26
|
+
"VERSION",
|
27
|
+
"lib/uea-stemmer.rb",
|
28
|
+
"lib/uea-stemmer/rule.rb",
|
29
|
+
"lib/uea-stemmer/string_helpers.rb",
|
30
|
+
"lib/uea-stemmer/word.rb",
|
40
31
|
"test/test_helper.rb",
|
41
|
-
|
32
|
+
"test/uea_stemmer_test.rb",
|
33
|
+
"uea-stemmer.gemspec"
|
42
34
|
]
|
35
|
+
s.homepage = "http://github.com/ealdent/uea-stemmer"
|
36
|
+
s.rubygems_version = "2.4.8"
|
37
|
+
s.summary = "Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing."
|
43
38
|
|
44
39
|
if s.respond_to? :specification_version then
|
45
|
-
|
46
|
-
s.specification_version = 3
|
40
|
+
s.specification_version = 4
|
47
41
|
|
48
42
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
43
|
+
s.add_runtime_dependency(%q<uea-stemmer>, [">= 0"])
|
44
|
+
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
45
|
+
s.add_development_dependency(%q<test-unit>, [">= 0"])
|
46
|
+
s.add_development_dependency(%q<shoulda>, [">= 0"])
|
49
47
|
else
|
48
|
+
s.add_dependency(%q<uea-stemmer>, [">= 0"])
|
49
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
50
|
+
s.add_dependency(%q<test-unit>, [">= 0"])
|
51
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
50
52
|
end
|
51
53
|
else
|
54
|
+
s.add_dependency(%q<uea-stemmer>, [">= 0"])
|
55
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
56
|
+
s.add_dependency(%q<test-unit>, [">= 0"])
|
57
|
+
s.add_dependency(%q<shoulda>, [">= 0"])
|
52
58
|
end
|
53
59
|
end
|
54
60
|
|
metadata
CHANGED
@@ -1,15 +1,9 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: uea-stemmer
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
prerelease: false
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 10
|
9
|
-
- 1
|
10
|
-
version: 0.10.1
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.10.2
|
11
5
|
platform: ruby
|
12
|
-
authors:
|
6
|
+
authors:
|
13
7
|
- Marie-Claire Jenkins
|
14
8
|
- Dan J. Smith
|
15
9
|
- Richard Churchill
|
@@ -17,23 +11,36 @@ authors:
|
|
17
11
|
autorequire:
|
18
12
|
bindir: bin
|
19
13
|
cert_chain: []
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
14
|
+
date: 2017-08-14 00:00:00.000000000 Z
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: awesome_print
|
18
|
+
requirement: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">="
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: '0'
|
23
|
+
type: :development
|
24
|
+
prerelease: false
|
25
|
+
version_requirements: !ruby/object:Gem::Requirement
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
description: Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and
|
31
|
+
indexing.
|
26
32
|
email: jasonmadams@gmail.com
|
27
33
|
executables: []
|
28
|
-
|
29
34
|
extensions: []
|
30
|
-
|
31
|
-
extra_rdoc_files:
|
35
|
+
extra_rdoc_files:
|
32
36
|
- LICENSE
|
33
37
|
- README.rdoc
|
34
|
-
files:
|
35
|
-
- .document
|
36
|
-
- .
|
38
|
+
files:
|
39
|
+
- ".document"
|
40
|
+
- ".ruby-gemset"
|
41
|
+
- ".ruby-version"
|
42
|
+
- Gemfile
|
43
|
+
- Gemfile.lock
|
37
44
|
- LICENSE
|
38
45
|
- README.rdoc
|
39
46
|
- Rakefile
|
@@ -45,40 +52,27 @@ files:
|
|
45
52
|
- test/test_helper.rb
|
46
53
|
- test/uea_stemmer_test.rb
|
47
54
|
- uea-stemmer.gemspec
|
48
|
-
has_rdoc: true
|
49
55
|
homepage: http://github.com/ealdent/uea-stemmer
|
50
56
|
licenses: []
|
51
|
-
|
57
|
+
metadata: {}
|
52
58
|
post_install_message:
|
53
|
-
rdoc_options:
|
54
|
-
|
55
|
-
require_paths:
|
59
|
+
rdoc_options: []
|
60
|
+
require_paths:
|
56
61
|
- lib
|
57
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
58
|
-
|
59
|
-
requirements:
|
62
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
63
|
+
requirements:
|
60
64
|
- - ">="
|
61
|
-
- !ruby/object:Gem::Version
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
version: "0"
|
66
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
67
|
-
none: false
|
68
|
-
requirements:
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: '0'
|
67
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
68
|
+
requirements:
|
69
69
|
- - ">="
|
70
|
-
- !ruby/object:Gem::Version
|
71
|
-
|
72
|
-
segments:
|
73
|
-
- 0
|
74
|
-
version: "0"
|
70
|
+
- !ruby/object:Gem::Version
|
71
|
+
version: '0'
|
75
72
|
requirements: []
|
76
|
-
|
77
73
|
rubyforge_project:
|
78
|
-
rubygems_version:
|
74
|
+
rubygems_version: 2.4.8
|
79
75
|
signing_key:
|
80
|
-
specification_version:
|
76
|
+
specification_version: 4
|
81
77
|
summary: Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.
|
82
|
-
test_files:
|
83
|
-
- test/test_helper.rb
|
84
|
-
- test/uea_stemmer_test.rb
|
78
|
+
test_files: []
|