uea-stemmer 0.10.0 → 0.10.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +5 -4
 - data/VERSION +1 -1
 - data/lib/uea-stemmer.rb +2 -0
 - data/test/uea_stemmer_test.rb +19 -0
 - data/uea-stemmer.gemspec +7 -6
 - metadata +17 -5
 
    
        data/README.rdoc
    CHANGED
    
    | 
         @@ -14,10 +14,9 @@ This is a port to Ruby from the port to Java from the original Perl script by Ma 
     | 
|
| 
       14 
14 
     | 
    
         | 
| 
       15 
15 
     | 
    
         
             
            == Installation
         
     | 
| 
       16 
16 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
            Install the gem 
     | 
| 
      
 17 
     | 
    
         
            +
            Install the gem:
         
     | 
| 
       18 
18 
     | 
    
         | 
| 
       19 
     | 
    
         
            -
              gem  
     | 
| 
       20 
     | 
    
         
            -
              sudo gem install ealdent-uea-stemmer
         
     | 
| 
      
 19 
     | 
    
         
            +
              gem install uea-stemmer
         
     | 
| 
       21 
20 
     | 
    
         | 
| 
       22 
21 
     | 
    
         
             
            Install the gem from source:
         
     | 
| 
       23 
22 
     | 
    
         | 
| 
         @@ -25,6 +24,8 @@ Install the gem from source: 
     | 
|
| 
       25 
24 
     | 
    
         
             
              cd uea-stemmer
         
     | 
| 
       26 
25 
     | 
    
         
             
              rake install
         
     | 
| 
       27 
26 
     | 
    
         | 
| 
      
 27 
     | 
    
         
            +
            Depending on your setup, you may need to use +sudo+ for either of these methods.
         
     | 
| 
      
 28 
     | 
    
         
            +
             
     | 
| 
       28 
29 
     | 
    
         
             
            == Example Usage
         
     | 
| 
       29 
30 
     | 
    
         | 
| 
       30 
31 
     | 
    
         
             
            Typical usage:
         
     | 
| 
         @@ -40,7 +41,7 @@ Typical usage: 
     | 
|
| 
       40 
41 
     | 
    
         
             
              'bodies'.stem             # body
         
     | 
| 
       41 
42 
     | 
    
         
             
              'ordained'.stem           # ordain
         
     | 
| 
       42 
43 
     | 
    
         | 
| 
       43 
     | 
    
         
            -
            You can also extract the stemmed word along with the rule by using the  
     | 
| 
      
 44 
     | 
    
         
            +
            You can also extract the stemmed word along with the rule by using the +stem_with_rule+ method.
         
     | 
| 
       44 
45 
     | 
    
         | 
| 
       45 
46 
     | 
    
         
             
              stem = stemmer.stem_with_rule('invited')   # Word('invite', Rule #22.3)
         
     | 
| 
       46 
47 
     | 
    
         
             
              puts stem.rule  # rule #22.3 (remove -d when the word ends in -ited)
         
     | 
    
        data/VERSION
    CHANGED
    
    | 
         @@ -1 +1 @@ 
     | 
|
| 
       1 
     | 
    
         
            -
            0.10. 
     | 
| 
      
 1 
     | 
    
         
            +
            0.10.1
         
     | 
    
        data/lib/uea-stemmer.rb
    CHANGED
    
    | 
         @@ -326,6 +326,8 @@ class UEAStemmer 
     | 
|
| 
       326 
326 
     | 
    
         
             
                @rules << EndingRule.new('oded', 1, 61.1)
         
     | 
| 
       327 
327 
     | 
    
         
             
                @rules << EndingRule.new('ated', 1, 61)
         
     | 
| 
       328 
328 
     | 
    
         
             
                @rules << CustomRule.new(/.*\w\weds?$/, 2, 62)
         
     | 
| 
      
 329 
     | 
    
         
            +
                @rules << EndingRule.new('des', 1, 63.10) # Fix for words like grades, escapades, abodes
         
     | 
| 
      
 330 
     | 
    
         
            +
                @rules << EndingRule.new('res', 1, 63.9) # Fix for words like fires, acres, wires, cares
         
     | 
| 
       329 
331 
     | 
    
         
             
                @rules << EndingRule.new('pes', 1, 63.8)
         
     | 
| 
       330 
332 
     | 
    
         
             
                @rules << EndingRule.new('mes', 1, 63.7)
         
     | 
| 
       331 
333 
     | 
    
         
             
                @rules << EndingRule.new('ones', 1, 63.6)
         
     | 
    
        data/test/uea_stemmer_test.rb
    CHANGED
    
    | 
         @@ -11,6 +11,11 @@ class UeaStemmerTest < Test::Unit::TestCase 
     | 
|
| 
       11 
11 
     | 
    
         
             
                  assert @stemmer.max_acronym_length == 'CAVASSOO'.size
         
     | 
| 
       12 
12 
     | 
    
         
             
                end
         
     | 
| 
       13 
13 
     | 
    
         | 
| 
      
 14 
     | 
    
         
            +
                should "allow setting options" do
         
     | 
| 
      
 15 
     | 
    
         
            +
                  @stemmer.options[:test] = true
         
     | 
| 
      
 16 
     | 
    
         
            +
                  assert @stemmer.options[:test]
         
     | 
| 
      
 17 
     | 
    
         
            +
                end
         
     | 
| 
      
 18 
     | 
    
         
            +
             
     | 
| 
       14 
19 
     | 
    
         
             
                context "stem method" do
         
     | 
| 
       15 
20 
     | 
    
         
             
                  should "stem words as Strings" do
         
     | 
| 
       16 
21 
     | 
    
         
             
                    assert @stemmer.stem('word').is_a?(String)
         
     | 
| 
         @@ -60,6 +65,20 @@ class UeaStemmerTest < Test::Unit::TestCase 
     | 
|
| 
       60 
65 
     | 
    
         
             
                    assert_equal @stemmer.stem('smokes'), 'smoke'
         
     | 
| 
       61 
66 
     | 
    
         
             
                    assert_equal @stemmer.stem('does'), 'do'
         
     | 
| 
       62 
67 
     | 
    
         
             
                  end
         
     | 
| 
      
 68 
     | 
    
         
            +
                  
         
     | 
| 
      
 69 
     | 
    
         
            +
                  should "stem various words with -des suffix" do
         
     | 
| 
      
 70 
     | 
    
         
            +
                    assert_equal @stemmer.stem('abodes'), 'abode'
         
     | 
| 
      
 71 
     | 
    
         
            +
                    assert_equal @stemmer.stem('escapades'), 'escapade'
         
     | 
| 
      
 72 
     | 
    
         
            +
                    assert_equal @stemmer.stem('crusades'), 'crusade'
         
     | 
| 
      
 73 
     | 
    
         
            +
                    assert_equal @stemmer.stem('grades'), 'grade'
         
     | 
| 
      
 74 
     | 
    
         
            +
                  end
         
     | 
| 
      
 75 
     | 
    
         
            +
                  
         
     | 
| 
      
 76 
     | 
    
         
            +
                  should "stem various words with -res suffix" do
         
     | 
| 
      
 77 
     | 
    
         
            +
                    assert_equal @stemmer.stem('wires'), 'wire'
         
     | 
| 
      
 78 
     | 
    
         
            +
                    assert_equal @stemmer.stem('acres'), 'acre'
         
     | 
| 
      
 79 
     | 
    
         
            +
                    assert_equal @stemmer.stem('fires'), 'fire'
         
     | 
| 
      
 80 
     | 
    
         
            +
                    assert_equal @stemmer.stem('cares'), 'care'
         
     | 
| 
      
 81 
     | 
    
         
            +
                  end
         
     | 
| 
       63 
82 
     | 
    
         | 
| 
       64 
83 
     | 
    
         
             
                  should "stem acronyms when pluralized otherwise they should be left alone" do
         
     | 
| 
       65 
84 
     | 
    
         
             
                    assert_equal @stemmer.stem('USA'), 'USA'
         
     | 
    
        data/uea-stemmer.gemspec
    CHANGED
    
    | 
         @@ -1,15 +1,15 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            # Generated by jeweler
         
     | 
| 
       2 
     | 
    
         
            -
            # DO NOT EDIT THIS FILE
         
     | 
| 
       3 
     | 
    
         
            -
            # Instead, edit Jeweler::Tasks in Rakefile, and run  
     | 
| 
      
 2 
     | 
    
         
            +
            # DO NOT EDIT THIS FILE DIRECTLY
         
     | 
| 
      
 3 
     | 
    
         
            +
            # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
         
     | 
| 
       4 
4 
     | 
    
         
             
            # -*- encoding: utf-8 -*-
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
       6 
6 
     | 
    
         
             
            Gem::Specification.new do |s|
         
     | 
| 
       7 
7 
     | 
    
         
             
              s.name = %q{uea-stemmer}
         
     | 
| 
       8 
     | 
    
         
            -
              s.version = "0.10. 
     | 
| 
      
 8 
     | 
    
         
            +
              s.version = "0.10.1"
         
     | 
| 
       9 
9 
     | 
    
         | 
| 
       10 
10 
     | 
    
         
             
              s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
         
     | 
| 
       11 
11 
     | 
    
         
             
              s.authors = ["Marie-Claire Jenkins", "Dan J. Smith", "Richard Churchill", "Jason Adams"]
         
     | 
| 
       12 
     | 
    
         
            -
              s.date = %q{ 
     | 
| 
      
 12 
     | 
    
         
            +
              s.date = %q{2011-01-19}
         
     | 
| 
       13 
13 
     | 
    
         
             
              s.description = %q{Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.}
         
     | 
| 
       14 
14 
     | 
    
         
             
              s.email = %q{jasonmadams@gmail.com}
         
     | 
| 
       15 
15 
     | 
    
         
             
              s.extra_rdoc_files = [
         
     | 
| 
         @@ -34,7 +34,7 @@ Gem::Specification.new do |s| 
     | 
|
| 
       34 
34 
     | 
    
         
             
              s.homepage = %q{http://github.com/ealdent/uea-stemmer}
         
     | 
| 
       35 
35 
     | 
    
         
             
              s.rdoc_options = ["--charset=UTF-8"]
         
     | 
| 
       36 
36 
     | 
    
         
             
              s.require_paths = ["lib"]
         
     | 
| 
       37 
     | 
    
         
            -
              s.rubygems_version = %q{1.3. 
     | 
| 
      
 37 
     | 
    
         
            +
              s.rubygems_version = %q{1.3.7}
         
     | 
| 
       38 
38 
     | 
    
         
             
              s.summary = %q{Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.}
         
     | 
| 
       39 
39 
     | 
    
         
             
              s.test_files = [
         
     | 
| 
       40 
40 
     | 
    
         
             
                "test/test_helper.rb",
         
     | 
| 
         @@ -45,9 +45,10 @@ Gem::Specification.new do |s| 
     | 
|
| 
       45 
45 
     | 
    
         
             
                current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
         
     | 
| 
       46 
46 
     | 
    
         
             
                s.specification_version = 3
         
     | 
| 
       47 
47 
     | 
    
         | 
| 
       48 
     | 
    
         
            -
                if Gem::Version.new(Gem:: 
     | 
| 
      
 48 
     | 
    
         
            +
                if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
         
     | 
| 
       49 
49 
     | 
    
         
             
                else
         
     | 
| 
       50 
50 
     | 
    
         
             
                end
         
     | 
| 
       51 
51 
     | 
    
         
             
              else
         
     | 
| 
       52 
52 
     | 
    
         
             
              end
         
     | 
| 
       53 
53 
     | 
    
         
             
            end
         
     | 
| 
      
 54 
     | 
    
         
            +
             
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification 
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: uea-stemmer
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version 
         
     | 
| 
       4 
     | 
    
         
            -
               
     | 
| 
      
 4 
     | 
    
         
            +
              hash: 53
         
     | 
| 
      
 5 
     | 
    
         
            +
              prerelease: false
         
     | 
| 
      
 6 
     | 
    
         
            +
              segments: 
         
     | 
| 
      
 7 
     | 
    
         
            +
              - 0
         
     | 
| 
      
 8 
     | 
    
         
            +
              - 10
         
     | 
| 
      
 9 
     | 
    
         
            +
              - 1
         
     | 
| 
      
 10 
     | 
    
         
            +
              version: 0.10.1
         
     | 
| 
       5 
11 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
12 
     | 
    
         
             
            authors: 
         
     | 
| 
       7 
13 
     | 
    
         
             
            - Marie-Claire Jenkins
         
     | 
| 
         @@ -12,7 +18,7 @@ autorequire: 
     | 
|
| 
       12 
18 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       13 
19 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       14 
20 
     | 
    
         | 
| 
       15 
     | 
    
         
            -
            date:  
     | 
| 
      
 21 
     | 
    
         
            +
            date: 2011-01-19 00:00:00 -05:00
         
     | 
| 
       16 
22 
     | 
    
         
             
            default_executable: 
         
     | 
| 
       17 
23 
     | 
    
         
             
            dependencies: []
         
     | 
| 
       18 
24 
     | 
    
         | 
| 
         @@ -49,21 +55,27 @@ rdoc_options: 
     | 
|
| 
       49 
55 
     | 
    
         
             
            require_paths: 
         
     | 
| 
       50 
56 
     | 
    
         
             
            - lib
         
     | 
| 
       51 
57 
     | 
    
         
             
            required_ruby_version: !ruby/object:Gem::Requirement 
         
     | 
| 
      
 58 
     | 
    
         
            +
              none: false
         
     | 
| 
       52 
59 
     | 
    
         
             
              requirements: 
         
     | 
| 
       53 
60 
     | 
    
         
             
              - - ">="
         
     | 
| 
       54 
61 
     | 
    
         
             
                - !ruby/object:Gem::Version 
         
     | 
| 
      
 62 
     | 
    
         
            +
                  hash: 3
         
     | 
| 
      
 63 
     | 
    
         
            +
                  segments: 
         
     | 
| 
      
 64 
     | 
    
         
            +
                  - 0
         
     | 
| 
       55 
65 
     | 
    
         
             
                  version: "0"
         
     | 
| 
       56 
     | 
    
         
            -
              version: 
         
     | 
| 
       57 
66 
     | 
    
         
             
            required_rubygems_version: !ruby/object:Gem::Requirement 
         
     | 
| 
      
 67 
     | 
    
         
            +
              none: false
         
     | 
| 
       58 
68 
     | 
    
         
             
              requirements: 
         
     | 
| 
       59 
69 
     | 
    
         
             
              - - ">="
         
     | 
| 
       60 
70 
     | 
    
         
             
                - !ruby/object:Gem::Version 
         
     | 
| 
      
 71 
     | 
    
         
            +
                  hash: 3
         
     | 
| 
      
 72 
     | 
    
         
            +
                  segments: 
         
     | 
| 
      
 73 
     | 
    
         
            +
                  - 0
         
     | 
| 
       61 
74 
     | 
    
         
             
                  version: "0"
         
     | 
| 
       62 
     | 
    
         
            -
              version: 
         
     | 
| 
       63 
75 
     | 
    
         
             
            requirements: []
         
     | 
| 
       64 
76 
     | 
    
         | 
| 
       65 
77 
     | 
    
         
             
            rubyforge_project: 
         
     | 
| 
       66 
     | 
    
         
            -
            rubygems_version: 1.3. 
     | 
| 
      
 78 
     | 
    
         
            +
            rubygems_version: 1.3.7
         
     | 
| 
       67 
79 
     | 
    
         
             
            signing_key: 
         
     | 
| 
       68 
80 
     | 
    
         
             
            specification_version: 3
         
     | 
| 
       69 
81 
     | 
    
         
             
            summary: Port of UEA-Lite Stemmer to Ruby, a conservative stemmer for search and indexing.
         
     |