picky 4.20.0 → 4.20.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/picky/tokenizer.rb +8 -6
- data/spec/lib/tokenizer_spec.rb +52 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: f25d9203dd2ec9711b17ff15a7f8efe8e4625e63
|
4
|
+
data.tar.gz: 3422b02dc81bf9af884d0f54db2d94b53eedf284
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8b6b0d73bbd233558fc64cbb65bd1975c5f37b260071d90c363933ac3863435a30b6067574ce48cdb16e117976da6713f05d5108eb47d0f9620de65e8666ab6b
|
7
|
+
data.tar.gz: 214ca1310b052232bafef670a2dc8013bd0c09c70f9ce7ea2e90338c858d71fd81e7e1a0a080bb91aff64c458b63f7a329abad8f9be543c558aee3747daf3996
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -59,11 +59,10 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
59
59
|
|
60
60
|
# Stopwords.
|
61
61
|
#
|
62
|
-
# We
|
63
|
-
# too for gsub! - it's too hard to understand)
|
62
|
+
# We even allow Strings even if it's hard to understand.
|
64
63
|
#
|
65
64
|
def stopwords regexp
|
66
|
-
check_argument_in __method__, Regexp, regexp
|
65
|
+
check_argument_in __method__, [Regexp, String, FalseClass], regexp
|
67
66
|
@remove_stopwords_regexp = regexp
|
68
67
|
end
|
69
68
|
def remove_stopwords text
|
@@ -83,7 +82,7 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
83
82
|
# too for gsub! - it's too hard to understand)
|
84
83
|
#
|
85
84
|
def removes_characters regexp
|
86
|
-
check_argument_in __method__, Regexp, regexp
|
85
|
+
check_argument_in __method__, [Regexp, FalseClass], regexp
|
87
86
|
@removes_characters_regexp = regexp
|
88
87
|
end
|
89
88
|
def remove_illegals text
|
@@ -192,8 +191,11 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
192
191
|
|
193
192
|
# Checks if the right argument type has been given.
|
194
193
|
#
|
195
|
-
def check_argument_in method,
|
196
|
-
|
194
|
+
def check_argument_in method, types, argument, &condition
|
195
|
+
types = [*types]
|
196
|
+
unless types.any? { |type| type === argument }
|
197
|
+
raise ArgumentError.new "Application##{method} takes any of #{types.join(', ')} as argument, but not a #{argument.class}."
|
198
|
+
end
|
197
199
|
end
|
198
200
|
|
199
201
|
attr_reader :substituter, :stemmer
|
data/spec/lib/tokenizer_spec.rb
CHANGED
@@ -236,11 +236,22 @@ ERROR
|
|
236
236
|
tokenizer.remove_illegals('abcdefghijklmnop').should == 'bcdeghijklmnp'
|
237
237
|
end
|
238
238
|
end
|
239
|
+
context "with removes_characters called using false" do
|
240
|
+
before(:each) do
|
241
|
+
tokenizer.removes_characters false
|
242
|
+
end
|
243
|
+
it "has remove_illegals" do
|
244
|
+
expect { tokenizer.remove_illegals('abcdefghijklmnop') }.to_not raise_error
|
245
|
+
end
|
246
|
+
it "removes illegal characters" do
|
247
|
+
tokenizer.remove_illegals('abcdefghijklmnop').should == 'abcdefghijklmnop'
|
248
|
+
end
|
249
|
+
end
|
239
250
|
end
|
240
251
|
|
241
252
|
describe 'stopwords' do
|
242
253
|
it 'handles broken arguments' do
|
243
|
-
expect { tokenizer.stopwords(
|
254
|
+
expect { tokenizer.stopwords(1) }.to raise_error(ArgumentError)
|
244
255
|
end
|
245
256
|
context 'without stopwords given' do
|
246
257
|
it 'should define a method remove_stopwords' do
|
@@ -253,6 +264,26 @@ ERROR
|
|
253
264
|
expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
|
254
265
|
end
|
255
266
|
end
|
267
|
+
context 'with stopwords given' do
|
268
|
+
before(:each) do
|
269
|
+
tokenizer.stopwords('t')
|
270
|
+
end
|
271
|
+
it 'should define a method remove_stopwords' do
|
272
|
+
lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
273
|
+
end
|
274
|
+
it 'should define a method stopwords that removes stopwords' do
|
275
|
+
tokenizer.remove_stopwords('from this text').should == 'from his ex'
|
276
|
+
end
|
277
|
+
it 'should define a method remove_non_single_stopwords' do
|
278
|
+
expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
|
279
|
+
end
|
280
|
+
it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
|
281
|
+
tokenizer.remove_non_single_stopwords('rerere rerere').should == 'rerere rerere'
|
282
|
+
end
|
283
|
+
it 'should define a method remove_non_single_stopwords that does not single stopwords' do
|
284
|
+
tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
|
285
|
+
end
|
286
|
+
end
|
256
287
|
context 'with stopwords given' do
|
257
288
|
before(:each) do
|
258
289
|
tokenizer.stopwords(/r|e/)
|
@@ -273,6 +304,26 @@ ERROR
|
|
273
304
|
tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
|
274
305
|
end
|
275
306
|
end
|
307
|
+
context 'with stopwords explicitly not given' do
|
308
|
+
before(:each) do
|
309
|
+
tokenizer.stopwords(false)
|
310
|
+
end
|
311
|
+
it 'should define a method remove_stopwords' do
|
312
|
+
lambda { tokenizer.remove_stopwords('from this text') }.should_not raise_error
|
313
|
+
end
|
314
|
+
it 'should define a method stopwords that removes stopwords' do
|
315
|
+
tokenizer.remove_stopwords('from this text').should == 'from this text'
|
316
|
+
end
|
317
|
+
it 'should define a method remove_non_single_stopwords' do
|
318
|
+
expect { tokenizer.remove_non_single_stopwords('from this text') }.to_not raise_error
|
319
|
+
end
|
320
|
+
it 'should define a method remove_non_single_stopwords that removes non-single stopwords' do
|
321
|
+
tokenizer.remove_non_single_stopwords('rerere rerere').should == 'rerere rerere'
|
322
|
+
end
|
323
|
+
it 'should define a method remove_non_single_stopwords that does not single stopwords' do
|
324
|
+
tokenizer.remove_non_single_stopwords('rerere').should == 'rerere'
|
325
|
+
end
|
326
|
+
end
|
276
327
|
context 'error case' do
|
277
328
|
before(:each) do
|
278
329
|
tokenizer.stopwords(/any/)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.20.
|
4
|
+
version: 4.20.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Hanke
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2014-02-
|
11
|
+
date: 2014-02-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|