picky 4.20.0 → 4.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/picky/tokenizer.rb +8 -6
 - data/spec/lib/tokenizer_spec.rb +52 -1
 - metadata +2 -2
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: f25d9203dd2ec9711b17ff15a7f8efe8e4625e63
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: 3422b02dc81bf9af884d0f54db2d94b53eedf284
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 8b6b0d73bbd233558fc64cbb65bd1975c5f37b260071d90c363933ac3863435a30b6067574ce48cdb16e117976da6713f05d5108eb47d0f9620de65e8666ab6b
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: 214ca1310b052232bafef670a2dc8013bd0c09c70f9ce7ea2e90338c858d71fd81e7e1a0a080bb91aff64c458b63f7a329abad8f9be543c558aee3747daf3996
         
     | 
    
        data/lib/picky/tokenizer.rb
    CHANGED
    
    | 
         @@ -59,11 +59,10 @@ Case sensitive?     #{@case_sensitive ? "Yes." : "-"} 
     | 
|
| 
       59 
59 
     | 
    
         | 
| 
       60 
60 
     | 
    
         
             
                # Stopwords.
         
     | 
| 
       61 
61 
     | 
    
         
             
                #
         
     | 
| 
       62 
     | 
    
         
            -
                # We  
     | 
| 
       63 
     | 
    
         
            -
                # too for gsub! - it's too hard to understand)
         
     | 
| 
      
 62 
     | 
    
         
            +
                # We even allow Strings even if it's hard to understand.
         
     | 
| 
       64 
63 
     | 
    
         
             
                #
         
     | 
| 
       65 
64 
     | 
    
         
             
                def stopwords regexp
         
     | 
| 
       66 
     | 
    
         
            -
                  check_argument_in __method__, Regexp, regexp
         
     | 
| 
      
 65 
     | 
    
         
            +
                  check_argument_in __method__, [Regexp, String, FalseClass], regexp
         
     | 
| 
       67 
66 
     | 
    
         
             
                  @remove_stopwords_regexp = regexp
         
     | 
| 
       68 
67 
     | 
    
         
             
                end
         
     | 
| 
       69 
68 
     | 
    
         
             
                def remove_stopwords text
         
     | 
| 
         @@ -83,7 +82,7 @@ Case sensitive?     #{@case_sensitive ? "Yes." : "-"} 
     | 
|
| 
       83 
82 
     | 
    
         
             
                # too for gsub! - it's too hard to understand)
         
     | 
| 
       84 
83 
     | 
    
         
             
                #
         
     | 
| 
       85 
84 
     | 
    
         
             
                def removes_characters regexp
         
     | 
| 
       86 
     | 
    
         
            -
                  check_argument_in __method__, Regexp, regexp
         
     | 
| 
      
 85 
     | 
    
         
            +
                  check_argument_in __method__, [Regexp, FalseClass], regexp
         
     | 
| 
       87 
86 
     | 
    
         
             
                  @removes_characters_regexp = regexp
         
     | 
| 
       88 
87 
     | 
    
         
             
                end
         
     | 
| 
       89 
88 
     | 
    
         
             
                def remove_illegals text
         
     | 
| 
         @@ -192,8 +191,11 @@ Case sensitive?     #{@case_sensitive ? "Yes." : "-"} 
     | 
|
| 
       192 
191 
     | 
    
         | 
| 
       193 
192 
     | 
    
         
             
                # Checks if the right argument type has been given.
         
     | 
| 
       194 
193 
     | 
    
         
             
                #
         
     | 
| 
       195 
     | 
    
         
            -
                def check_argument_in method,  
     | 
| 
       196 
     | 
    
         
            -
                   
     | 
| 
      
 194 
     | 
    
         
            +
                def check_argument_in method, types, argument, &condition
         
     | 
| 
      
 195 
     | 
    
         
            +
                  types = [*types]
         
     | 
| 
      
 196 
     | 
    
         
            +
                  unless types.any? { |type| type === argument }
         
     | 
| 
      
 197 
     | 
    
         
            +
                    raise ArgumentError.new "Application##{method} takes any of #{types.join(', ')} as argument, but not a #{argument.class}."
         
     | 
| 
      
 198 
     | 
    
         
            +
                  end
         
     | 
| 
       197 
199 
     | 
    
         
             
                end
         
     | 
| 
       198 
200 
     | 
    
         | 
| 
       199 
201 
     | 
    
         
             
                attr_reader :substituter, :stemmer
         
     | 
    
        data/spec/lib/tokenizer_spec.rb
    CHANGED
    
    | 
         @@ -236,11 +236,22 @@ ERROR 
     | 
|
| 
       236 
236 
     | 
    
         
             
                      tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
         
     | 
| 
       237 
237 
     | 
    
         
             
                    end
         
     | 
| 
       238 
238 
     | 
    
         
             
                  end
         
     | 
| 
      
 239 
     | 
    
         
            +
                  context "with removes_characters called using false" do
         
     | 
| 
      
 240 
     | 
    
         
            +
                    before(:each) do
         
     | 
| 
      
 241 
     | 
    
         
            +
                      tokenizer.removes_characters false
         
     | 
| 
      
 242 
     | 
    
         
            +
                    end
         
     | 
| 
      
 243 
     | 
    
         
            +
                    it "has remove_illegals" do
         
     | 
| 
      
 244 
     | 
    
         
            +
                      expect { tokenizer.remove_illegals('abcdefghijklmnop') }.to_not raise_error
         
     | 
| 
      
 245 
     | 
    
         
            +
                    end
         
     | 
| 
      
 246 
     | 
    
         
            +
                    it "removes illegal characters" do
         
     | 
| 
      
 247 
     | 
    
         
            +
                      tokenizer.remove_illegals('abcdefghijklmnop').should == 'abcdefghijklmnop'
         
     | 
| 
      
 248 
     | 
    
         
            +
                    end
         
     | 
| 
      
 249 
     | 
    
         
            +
                  end
         
     | 
| 
       239 
250 
     | 
    
         
             
                end
         
     | 
| 
       240 
251 
     | 
    
         | 
| 
       241 
252 
     | 
    
         
             
                describe 'stopwords' do
         
     | 
| 
       242 
253 
     | 
    
         
             
                  it 'handles broken arguments' do
         
     | 
| 
       243 
     | 
    
         
            -
                    expect { tokenizer.stopwords( 
     | 
| 
      
 254 
     | 
    
         
            +
                    expect { tokenizer.stopwords(1) }.to raise_error(ArgumentError)
         
     | 
| 
       244 
255 
     | 
    
         
             
                  end
         
     | 
| 
       245 
256 
     | 
    
         
             
                  context 'without stopwords given' do
         
     | 
| 
       246 
257 
     | 
    
         
             
                    it 'should define a method remove_stopwords' do
         
     | 
| 
         @@ -253,6 +264,26 @@ ERROR 
     | 
|
| 
       253 
264 
     | 
    
         
             
                      expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
         
     | 
| 
       254 
265 
     | 
    
         
             
                    end
         
     | 
| 
       255 
266 
     | 
    
         
             
                  end
         
     | 
| 
      
 267 
     | 
    
         
            +
                  context 'with stopwords given' do
         
     | 
| 
      
 268 
     | 
    
         
            +
                    before(:each) do
         
     | 
| 
      
 269 
     | 
    
         
            +
                      tokenizer.stopwords('t')
         
     | 
| 
      
 270 
     | 
    
         
            +
                    end
         
     | 
| 
      
 271 
     | 
    
         
            +
                    it 'should define a method remove_stopwords' do
         
     | 
| 
      
 272 
     | 
    
         
            +
                      lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
         
     | 
| 
      
 273 
     | 
    
         
            +
                    end
         
     | 
| 
      
 274 
     | 
    
         
            +
                    it 'should define a method stopwords that removes stopwords' do
         
     | 
| 
      
 275 
     | 
    
         
            +
                      tokenizer.remove_stopwords('from this text').should == 'from his ex'
         
     | 
| 
      
 276 
     | 
    
         
            +
                    end
         
     | 
| 
      
 277 
     | 
    
         
            +
                    it 'should define a method remove_non_single_stopwords' do
         
     | 
| 
      
 278 
     | 
    
         
            +
                      expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
         
     | 
| 
      
 279 
     | 
    
         
            +
                    end
         
     | 
| 
      
 280 
     | 
    
         
            +
                    it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
         
     | 
| 
      
 281 
     | 
    
         
            +
                      tokenizer.remove_non_single_stopwords('rerere rerere').should == 'rerere rerere'
         
     | 
| 
      
 282 
     | 
    
         
            +
                    end
         
     | 
| 
      
 283 
     | 
    
         
            +
                    it 'should define a method remove_non_single_stopwords that does not single stopwords' do
         
     | 
| 
      
 284 
     | 
    
         
            +
                      tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
         
     | 
| 
      
 285 
     | 
    
         
            +
                    end
         
     | 
| 
      
 286 
     | 
    
         
            +
                  end
         
     | 
| 
       256 
287 
     | 
    
         
             
                  context 'with stopwords given' do
         
     | 
| 
       257 
288 
     | 
    
         
             
                    before(:each) do
         
     | 
| 
       258 
289 
     | 
    
         
             
                      tokenizer.stopwords(/r|e/)
         
     | 
| 
         @@ -273,6 +304,26 @@ ERROR 
     | 
|
| 
       273 
304 
     | 
    
         
             
                      tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
         
     | 
| 
       274 
305 
     | 
    
         
             
                    end
         
     | 
| 
       275 
306 
     | 
    
         
             
                  end
         
     | 
| 
      
 307 
     | 
    
         
            +
                  context 'with stopwords explicitly not given' do
         
     | 
| 
      
 308 
     | 
    
         
            +
                    before(:each) do
         
     | 
| 
      
 309 
     | 
    
         
            +
                      tokenizer.stopwords(false)
         
     | 
| 
      
 310 
     | 
    
         
            +
                    end
         
     | 
| 
      
 311 
     | 
    
         
            +
                    it 'should define a method remove_stopwords' do
         
     | 
| 
      
 312 
     | 
    
         
            +
                      lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
         
     | 
| 
      
 313 
     | 
    
         
            +
                    end
         
     | 
| 
      
 314 
     | 
    
         
            +
                    it 'should define a method stopwords that removes stopwords' do
         
     | 
| 
      
 315 
     | 
    
         
            +
                      tokenizer.remove_stopwords('from this text').should == 'from this text'
         
     | 
| 
      
 316 
     | 
    
         
            +
                    end
         
     | 
| 
      
 317 
     | 
    
         
            +
                    it 'should define a method remove_non_single_stopwords' do
         
     | 
| 
      
 318 
     | 
    
         
            +
                      expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
         
     | 
| 
      
 319 
     | 
    
         
            +
                    end
         
     | 
| 
      
 320 
     | 
    
         
            +
                    it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
         
     | 
| 
      
 321 
     | 
    
         
            +
                      tokenizer.remove_non_single_stopwords('rerere rerere').should == 'rerere rerere'
         
     | 
| 
      
 322 
     | 
    
         
            +
                    end
         
     | 
| 
      
 323 
     | 
    
         
            +
                    it 'should define a method remove_non_single_stopwords that does not single stopwords' do
         
     | 
| 
      
 324 
     | 
    
         
            +
                      tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
         
     | 
| 
      
 325 
     | 
    
         
            +
                    end
         
     | 
| 
      
 326 
     | 
    
         
            +
                  end
         
     | 
| 
       276 
327 
     | 
    
         
             
                  context 'error case' do
         
     | 
| 
       277 
328 
     | 
    
         
             
                    before(:each) do
         
     | 
| 
       278 
329 
     | 
    
         
             
                      tokenizer.stopwords(/any/)
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,14 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: picky
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 4.20. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 4.20.1
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Florian Hanke
         
     | 
| 
       8 
8 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
9 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
10 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2014-02- 
     | 
| 
      
 11 
     | 
    
         
            +
            date: 2014-02-13 00:00:00.000000000 Z
         
     | 
| 
       12 
12 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
13 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
14 
     | 
    
         
             
              name: activesupport
         
     |