uea-stemmer 0.10.1 → 0.10.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 2cf61c35c259651c4eddd37baa65bacb16ecace5
4
+ data.tar.gz: d896151f060bf289abf012d2ff64ff24048f6f05
5
+ SHA512:
6
+ metadata.gz: f873e292c3c2d30f40a05a1f1c1bf17222770519869aeac302719c7e94704f7c723e33b29133dc77489e2fdc10f4bffd56544af5a0445ad1a4e1ddca0f469d55
7
+ data.tar.gz: b4c4d0e05e480bc7f2b5751539a9a9f0986d6b556353089ca8ff7f91523826edfdee96f3bec44506a2dd3091f7d0008fd053bd97a9da4ada59acac690983f9ac
@@ -0,0 +1 @@
1
+ uea-stemmer
@@ -0,0 +1 @@
1
+ ruby-2.3.0
data/Gemfile ADDED
@@ -0,0 +1,5 @@
1
+ source 'https://rubygems.org'
2
+
3
+ group :development do
4
+ gem 'awesome_print'
5
+ end
@@ -0,0 +1,13 @@
1
+ GEM
2
+ remote: https://rubygems.org/
3
+ specs:
4
+ awesome_print (1.8.0)
5
+
6
+ PLATFORMS
7
+ ruby
8
+
9
+ DEPENDENCIES
10
+ awesome_print
11
+
12
+ BUNDLED WITH
13
+ 1.15.3
@@ -1,6 +1,6 @@
1
1
  = uea-stemmer
2
2
 
3
- Similar to other stemmers, UEA-Lite[http://www.uea.ac.uk/cmp/research/graphicsvisionspeech/speech/WordStemming] operates on a set of rules which are used as steps. There are two groups of rules: the first to clean the tokens, and the second to alter suffixes.
3
+ Similar to other stemmers, UEA-Lite[https://web.archive.org/web/20120728132949/http://www.uea.ac.uk/cmp/research/graphicsvisionspeech/speech/WordStemming] operates on a set of rules which are used as steps. There are two groups of rules: the first to clean the tokens, and the second to alter suffixes.
4
4
 
5
5
  The first group of rules first avoids a small list of six frequent problem words. An improvement to the stemmer would be to expand this list by adding other problem words which the second rule set cannot deal with. Second, possessive apostrophes are removed and contractions are expanded. All hyphens are removed and tokens containing digits are left untouched. Strings which are all upper case and digits are left untouched unless there is a lower case terminal 's' (i.e. transforming plural forms of acronyms to singular forms).
6
6
 
@@ -63,7 +63,7 @@ You can also extract the stemmed word along with the rule by using the +stem_wit
63
63
 
64
64
  == Relevant Web Pages
65
65
 
66
- * http://www.uea.ac.uk/cmp/research/graphicsvisionspeech/speech/WordStemming
66
+ * https://web.archive.org/web/20120728132949/http://www.uea.ac.uk/cmp/research/graphicsvisionspeech/speech/WordStemming
67
67
  * Stemming[http://en.wikipedia.org/wiki/Stemming]
68
68
 
69
69
  == Copyright
data/Rakefile CHANGED
@@ -37,23 +37,4 @@ rescue LoadError
37
37
  end
38
38
  end
39
39
 
40
-
41
-
42
-
43
40
  task :default => :test
44
-
45
- require 'rake/rdoctask'
46
- Rake::RDocTask.new do |rdoc|
47
- if File.exist?('VERSION.yml')
48
- config = YAML.load(File.read('VERSION.yml'))
49
- version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
50
- else
51
- version = ""
52
- end
53
-
54
- rdoc.rdoc_dir = 'rdoc'
55
- rdoc.title = "uea-stemmer #{version}"
56
- rdoc.rdoc_files.include('README*')
57
- rdoc.rdoc_files.include('lib/**/*.rb')
58
- end
59
-
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.10.1
1
+ 0.10.2
@@ -109,6 +109,7 @@ class UEAStemmer
109
109
  @rules << Rule.new(/^[A-Z]+s$/, 1, 91.1)
110
110
  @rules << Rule.new(/^[A-Z]+$/, 0, 91)
111
111
  @rules << Rule.new(/^((.*[A-Z].*[A-Z])|([A-Z]{1})).*$/, 0, 92)
112
+ @rules << Rule.new(/^[a-z]{1}(|[rl])ing$/i, 0, 97)
112
113
 
113
114
  @rules << EndingRule.new('aceous', 6, 1)
114
115
  @rules << EndingRule.new('ces', 1, 2)
@@ -357,4 +358,4 @@ end
357
358
 
358
359
  class DefaultUEAStemmer < UEAStemmer
359
360
  include Singleton
360
- end
361
+ end
@@ -53,6 +53,14 @@ class UeaStemmerTest < Test::Unit::TestCase
53
53
  assert_equal @stemmer.stem('charring'), 'char'
54
54
  end
55
55
 
56
+ should "not stem false progressive verbs such as 'sing'" do
57
+ assert_equal @stemmer.stem('ring'), 'ring'
58
+ assert_equal @stemmer.stem('sing'), 'sing'
59
+ assert_equal @stemmer.stem('ring'), 'ring'
60
+ assert_equal @stemmer.stem('bring'), 'bring'
61
+ assert_equal @stemmer.stem('fling'), 'fling'
62
+ end
63
+
56
64
  should "stem various plural nouns and 3rd-pres verbs without the -s/-es" do
57
65
  assert_equal @stemmer.stem('changes'), 'change'
58
66
  assert_equal @stemmer.stem('deaths'), 'death'
@@ -65,14 +73,14 @@ class UeaStemmerTest < Test::Unit::TestCase
65
73
  assert_equal @stemmer.stem('smokes'), 'smoke'
66
74
  assert_equal @stemmer.stem('does'), 'do'
67
75
  end
68
-
76
+
69
77
  should "stem various words with -des suffix" do
70
78
  assert_equal @stemmer.stem('abodes'), 'abode'
71
79
  assert_equal @stemmer.stem('escapades'), 'escapade'
72
80
  assert_equal @stemmer.stem('crusades'), 'crusade'
73
81
  assert_equal @stemmer.stem('grades'), 'grade'
74
82
  end
75
-
83
+
76
84
  should "stem various words with -res suffix" do
77
85
  assert_equal @stemmer.stem('wires'), 'wire'
78
86
  assert_equal @stemmer.stem('acres'), 'acre'
@@ -1,54 +1,60 @@
1
1
  # Generated by jeweler
2
2
  # DO NOT EDIT THIS FILE DIRECTLY
3
- # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run 'rake gemspec'
4
4
  # -*- encoding: utf-8 -*-
5
+ # stub: uea-stemmer 0.10.2 ruby lib
5
6
 
6
7
  Gem::Specification.new do |s|
7
- s.name = %q{uea-stemmer}
8
- s.version = "0.10.1"
8
+ s.name = "uea-stemmer"
9
+ s.version = "0.10.2"
9
10
 
10
11
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
12
+ s.require_paths = ["lib"]
11
13
  s.authors = ["Marie-Claire Jenkins", "Dan J. Smith", "Richard Churchill", "Jason Adams"]
12
- s.date = %q{2011-01-19}
13
- s.description = %q{Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.}
14
- s.email = %q{jasonmadams@gmail.com}
14
+ s.date = "2017-08-14"
15
+ s.description = "Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing."
16
+ s.email = "jasonmadams@gmail.com"
15
17
  s.extra_rdoc_files = [
16
18
  "LICENSE",
17
- "README.rdoc"
19
+ "README.rdoc"
18
20
  ]
19
21
  s.files = [
20
22
  ".document",
21
- ".gitignore",
22
- "LICENSE",
23
- "README.rdoc",
24
- "Rakefile",
25
- "VERSION",
26
- "lib/uea-stemmer.rb",
27
- "lib/uea-stemmer/rule.rb",
28
- "lib/uea-stemmer/string_helpers.rb",
29
- "lib/uea-stemmer/word.rb",
30
- "test/test_helper.rb",
31
- "test/uea_stemmer_test.rb",
32
- "uea-stemmer.gemspec"
33
- ]
34
- s.homepage = %q{http://github.com/ealdent/uea-stemmer}
35
- s.rdoc_options = ["--charset=UTF-8"]
36
- s.require_paths = ["lib"]
37
- s.rubygems_version = %q{1.3.7}
38
- s.summary = %q{Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.}
39
- s.test_files = [
23
+ "LICENSE",
24
+ "README.rdoc",
25
+ "Rakefile",
26
+ "VERSION",
27
+ "lib/uea-stemmer.rb",
28
+ "lib/uea-stemmer/rule.rb",
29
+ "lib/uea-stemmer/string_helpers.rb",
30
+ "lib/uea-stemmer/word.rb",
40
31
  "test/test_helper.rb",
41
- "test/uea_stemmer_test.rb"
32
+ "test/uea_stemmer_test.rb",
33
+ "uea-stemmer.gemspec"
42
34
  ]
35
+ s.homepage = "http://github.com/ealdent/uea-stemmer"
36
+ s.rubygems_version = "2.4.8"
37
+ s.summary = "Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing."
43
38
 
44
39
  if s.respond_to? :specification_version then
45
- current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
- s.specification_version = 3
40
+ s.specification_version = 4
47
41
 
48
42
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
43
+ s.add_runtime_dependency(%q<uea-stemmer>, [">= 0"])
44
+ s.add_development_dependency(%q<jeweler>, [">= 0"])
45
+ s.add_development_dependency(%q<test-unit>, [">= 0"])
46
+ s.add_development_dependency(%q<shoulda>, [">= 0"])
49
47
  else
48
+ s.add_dependency(%q<uea-stemmer>, [">= 0"])
49
+ s.add_dependency(%q<jeweler>, [">= 0"])
50
+ s.add_dependency(%q<test-unit>, [">= 0"])
51
+ s.add_dependency(%q<shoulda>, [">= 0"])
50
52
  end
51
53
  else
54
+ s.add_dependency(%q<uea-stemmer>, [">= 0"])
55
+ s.add_dependency(%q<jeweler>, [">= 0"])
56
+ s.add_dependency(%q<test-unit>, [">= 0"])
57
+ s.add_dependency(%q<shoulda>, [">= 0"])
52
58
  end
53
59
  end
54
60
 
metadata CHANGED
@@ -1,15 +1,9 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: uea-stemmer
3
- version: !ruby/object:Gem::Version
4
- hash: 53
5
- prerelease: false
6
- segments:
7
- - 0
8
- - 10
9
- - 1
10
- version: 0.10.1
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.10.2
11
5
  platform: ruby
12
- authors:
6
+ authors:
13
7
  - Marie-Claire Jenkins
14
8
  - Dan J. Smith
15
9
  - Richard Churchill
@@ -17,23 +11,36 @@ authors:
17
11
  autorequire:
18
12
  bindir: bin
19
13
  cert_chain: []
20
-
21
- date: 2011-01-19 00:00:00 -05:00
22
- default_executable:
23
- dependencies: []
24
-
25
- description: Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.
14
+ date: 2017-08-14 00:00:00.000000000 Z
15
+ dependencies:
16
+ - !ruby/object:Gem::Dependency
17
+ name: awesome_print
18
+ requirement: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">="
21
+ - !ruby/object:Gem::Version
22
+ version: '0'
23
+ type: :development
24
+ prerelease: false
25
+ version_requirements: !ruby/object:Gem::Requirement
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ version: '0'
30
+ description: Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and
31
+ indexing.
26
32
  email: jasonmadams@gmail.com
27
33
  executables: []
28
-
29
34
  extensions: []
30
-
31
- extra_rdoc_files:
35
+ extra_rdoc_files:
32
36
  - LICENSE
33
37
  - README.rdoc
34
- files:
35
- - .document
36
- - .gitignore
38
+ files:
39
+ - ".document"
40
+ - ".ruby-gemset"
41
+ - ".ruby-version"
42
+ - Gemfile
43
+ - Gemfile.lock
37
44
  - LICENSE
38
45
  - README.rdoc
39
46
  - Rakefile
@@ -45,40 +52,27 @@ files:
45
52
  - test/test_helper.rb
46
53
  - test/uea_stemmer_test.rb
47
54
  - uea-stemmer.gemspec
48
- has_rdoc: true
49
55
  homepage: http://github.com/ealdent/uea-stemmer
50
56
  licenses: []
51
-
57
+ metadata: {}
52
58
  post_install_message:
53
- rdoc_options:
54
- - --charset=UTF-8
55
- require_paths:
59
+ rdoc_options: []
60
+ require_paths:
56
61
  - lib
57
- required_ruby_version: !ruby/object:Gem::Requirement
58
- none: false
59
- requirements:
62
+ required_ruby_version: !ruby/object:Gem::Requirement
63
+ requirements:
60
64
  - - ">="
61
- - !ruby/object:Gem::Version
62
- hash: 3
63
- segments:
64
- - 0
65
- version: "0"
66
- required_rubygems_version: !ruby/object:Gem::Requirement
67
- none: false
68
- requirements:
65
+ - !ruby/object:Gem::Version
66
+ version: '0'
67
+ required_rubygems_version: !ruby/object:Gem::Requirement
68
+ requirements:
69
69
  - - ">="
70
- - !ruby/object:Gem::Version
71
- hash: 3
72
- segments:
73
- - 0
74
- version: "0"
70
+ - !ruby/object:Gem::Version
71
+ version: '0'
75
72
  requirements: []
76
-
77
73
  rubyforge_project:
78
- rubygems_version: 1.3.7
74
+ rubygems_version: 2.4.8
79
75
  signing_key:
80
- specification_version: 3
76
+ specification_version: 4
81
77
  summary: Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.
82
- test_files:
83
- - test/test_helper.rb
84
- - test/uea_stemmer_test.rb
78
+ test_files: []
data/.gitignore DELETED
@@ -1,6 +0,0 @@
1
- *.sw?
2
- .DS_Store
3
- coverage
4
- rdoc
5
- pkg
6
- *.tmproj