picky 4.20.0 → 4.20.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/picky/tokenizer.rb +8 -6
- data/spec/lib/tokenizer_spec.rb +52 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f25d9203dd2ec9711b17ff15a7f8efe8e4625e63
|
4
|
+
data.tar.gz: 3422b02dc81bf9af884d0f54db2d94b53eedf284
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b6b0d73bbd233558fc64cbb65bd1975c5f37b260071d90c363933ac3863435a30b6067574ce48cdb16e117976da6713f05d5108eb47d0f9620de65e8666ab6b
|
7
|
+
data.tar.gz: 214ca1310b052232bafef670a2dc8013bd0c09c70f9ce7ea2e90338c858d71fd81e7e1a0a080bb91aff64c458b63f7a329abad8f9be543c558aee3747daf3996
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -59,11 +59,10 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
59
59
|
|
60
60
|
# Stopwords.
|
61
61
|
#
|
62
|
-
# We
|
63
|
-
# too for gsub! - it's too hard to understand)
|
62
|
+
# We even allow Strings even if it's hard to understand.
|
64
63
|
#
|
65
64
|
def stopwords regexp
|
66
|
-
check_argument_in __method__, Regexp, regexp
|
65
|
+
check_argument_in __method__, [Regexp, String, FalseClass], regexp
|
67
66
|
@remove_stopwords_regexp = regexp
|
68
67
|
end
|
69
68
|
def remove_stopwords text
|
@@ -83,7 +82,7 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
83
82
|
# too for gsub! - it's too hard to understand)
|
84
83
|
#
|
85
84
|
def removes_characters regexp
|
86
|
-
check_argument_in __method__, Regexp, regexp
|
85
|
+
check_argument_in __method__, [Regexp, FalseClass], regexp
|
87
86
|
@removes_characters_regexp = regexp
|
88
87
|
end
|
89
88
|
def remove_illegals text
|
@@ -192,8 +191,11 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
192
191
|
|
193
192
|
# Checks if the right argument type has been given.
|
194
193
|
#
|
195
|
-
def check_argument_in method,
|
196
|
-
|
194
|
+
def check_argument_in method, types, argument, &condition
|
195
|
+
types = [*types]
|
196
|
+
unless types.any? { |type| type === argument }
|
197
|
+
raise ArgumentError.new "Application##{method} takes any of #{types.join(', ')} as argument, but not a #{argument.class}."
|
198
|
+
end
|
197
199
|
end
|
198
200
|
|
199
201
|
attr_reader :substituter, :stemmer
|
data/spec/lib/tokenizer_spec.rb
CHANGED
@@ -236,11 +236,22 @@ ERROR
|
|
236
236
|
tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
237
237
|
end
|
238
238
|
end
|
239
|
+
context "with removes_characters called using false" do
|
240
|
+
before(:each) do
|
241
|
+
tokenizer.removes_characters false
|
242
|
+
end
|
243
|
+
it "has remove_illegals" do
|
244
|
+
expect { tokenizer.remove_illegals('abcdefghijklmnop') }.to_not raise_error
|
245
|
+
end
|
246
|
+
it "removes illegal characters" do
|
247
|
+
tokenizer.remove_illegals('abcdefghijklmnop').should == 'abcdefghijklmnop'
|
248
|
+
end
|
249
|
+
end
|
239
250
|
end
|
240
251
|
|
241
252
|
describe 'stopwords' do
|
242
253
|
it 'handles broken arguments' do
|
243
|
-
expect { tokenizer.stopwords(
|
254
|
+
expect { tokenizer.stopwords(1) }.to raise_error(ArgumentError)
|
244
255
|
end
|
245
256
|
context 'without stopwords given' do
|
246
257
|
it 'should define a method remove_stopwords' do
|
@@ -253,6 +264,26 @@ ERROR
|
|
253
264
|
expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
|
254
265
|
end
|
255
266
|
end
|
267
|
+
context 'with stopwords given' do
|
268
|
+
before(:each) do
|
269
|
+
tokenizer.stopwords('t')
|
270
|
+
end
|
271
|
+
it 'should define a method remove_stopwords' do
|
272
|
+
lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
273
|
+
end
|
274
|
+
it 'should define a method stopwords that removes stopwords' do
|
275
|
+
tokenizer.remove_stopwords('from this text').should == 'from his ex'
|
276
|
+
end
|
277
|
+
it 'should define a method remove_non_single_stopwords' do
|
278
|
+
expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
|
279
|
+
end
|
280
|
+
it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
|
281
|
+
tokenizer.remove_non_single_stopwords('rerere rerere').should == 'rerere rerere'
|
282
|
+
end
|
283
|
+
it 'should define a method remove_non_single_stopwords that does not single stopwords' do
|
284
|
+
tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
|
285
|
+
end
|
286
|
+
end
|
256
287
|
context 'with stopwords given' do
|
257
288
|
before(:each) do
|
258
289
|
tokenizer.stopwords(/r|e/)
|
@@ -273,6 +304,26 @@ ERROR
|
|
273
304
|
tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
|
274
305
|
end
|
275
306
|
end
|
307
|
+
context 'with stopwords explicitly not given' do
|
308
|
+
before(:each) do
|
309
|
+
tokenizer.stopwords(false)
|
310
|
+
end
|
311
|
+
it 'should define a method remove_stopwords' do
|
312
|
+
lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
313
|
+
end
|
314
|
+
it 'should define a method stopwords that removes stopwords' do
|
315
|
+
tokenizer.remove_stopwords('from this text').should == 'from this text'
|
316
|
+
end
|
317
|
+
it 'should define a method remove_non_single_stopwords' do
|
318
|
+
expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
|
319
|
+
end
|
320
|
+
it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
|
321
|
+
tokenizer.remove_non_single_stopwords('rerere rerere').should == 'rerere rerere'
|
322
|
+
end
|
323
|
+
it 'should define a method remove_non_single_stopwords that does not single stopwords' do
|
324
|
+
tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
|
325
|
+
end
|
326
|
+
end
|
276
327
|
context 'error case' do
|
277
328
|
before(:each) do
|
278
329
|
tokenizer.stopwords(/any/)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.20.
|
4
|
+
version: 4.20.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Hanke
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|