picky 4.25.3 → 4.26.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/picky/category_indexed.rb +11 -3
- data/lib/picky/query/token.rb +16 -0
- data/lib/picky/tokenizer.rb +1 -1
- data/spec/functional/stemming_spec.rb +50 -32
- data/spec/functional/symbol_keys_spec.rb +41 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 361f560a9705c05e6e4105e283edb8af462f6d12
|
4
|
+
data.tar.gz: 6e7430ed38b525ed918cfc8633659ce6f978f142
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7fd4d1022e3ea493a015579a4020b2d30a6058c0231fe3429e4ca255f697e30e78391afc5591b6f229ad504fed5b3f2b1a35c12e373243a494bb2275c54df80
|
7
|
+
data.tar.gz: fa28a3b92a92ba1d6668ee52e5dd1c9dfc3802b1cd47e2262b092c6469af9c79a1c56fbfda8838f1e545b1ced50a813dc98b48ea130cb0b735254c85f377d3a5
|
@@ -25,7 +25,11 @@ module Picky
|
|
25
25
|
weight && (weight + (sum || 0)) || sum
|
26
26
|
end
|
27
27
|
else
|
28
|
-
|
28
|
+
if tokenizer && tokenizer.stemmer?
|
29
|
+
bundle.weight token.stem(tokenizer)
|
30
|
+
else
|
31
|
+
bundle.weight token.text
|
32
|
+
end
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
@@ -45,14 +49,18 @@ module Picky
|
|
45
49
|
ids.empty? ? result : result << ids
|
46
50
|
end.flatten
|
47
51
|
else
|
48
|
-
|
52
|
+
# Optimization
|
53
|
+
if tokenizer && tokenizer.stemmer?
|
54
|
+
bundle.ids token.stem(tokenizer)
|
55
|
+
else
|
56
|
+
bundle.ids token.text
|
57
|
+
end
|
49
58
|
end
|
50
59
|
end
|
51
60
|
|
52
61
|
# Returns the right index bundle for this token.
|
53
62
|
#
|
54
63
|
def bundle_for token
|
55
|
-
# token.partial? ? partial : exact
|
56
64
|
token.select_bundle exact, partial
|
57
65
|
end
|
58
66
|
|
data/lib/picky/query/token.rb
CHANGED
@@ -82,6 +82,22 @@ module Picky
|
|
82
82
|
def select_bundle exact, partial
|
83
83
|
@partial ? partial : exact
|
84
84
|
end
|
85
|
+
|
86
|
+
# Generates a reused stem.
|
87
|
+
#
|
88
|
+
# TODO Probably should not cache, as not
|
89
|
+
# the same stemmer will be used always.
|
90
|
+
#
|
91
|
+
def stem tokenizer
|
92
|
+
if stem?
|
93
|
+
@stem ||= tokenizer.stem(@text)
|
94
|
+
else
|
95
|
+
@text
|
96
|
+
end
|
97
|
+
end
|
98
|
+
def stem?
|
99
|
+
@text !~ @@no_partial
|
100
|
+
end
|
85
101
|
|
86
102
|
# Partial is a conditional setter.
|
87
103
|
#
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -282,7 +282,7 @@ ERROR
|
|
282
282
|
#
|
283
283
|
def tokens_for words
|
284
284
|
words.collect! { |word| word.downcase!; word } if downcase?
|
285
|
-
words.collect! { |word| stem word } if stemmer?
|
285
|
+
words.collect! { |word| stem word } if stemmer? # Usually only done in indexing step.
|
286
286
|
words
|
287
287
|
end
|
288
288
|
|
@@ -6,18 +6,19 @@ require 'stemmer'
|
|
6
6
|
require 'lingua/stemmer'
|
7
7
|
|
8
8
|
describe 'stemming' do
|
9
|
-
let(:stemmer) {
|
10
|
-
# Fast stemmer does not conform with the API.
|
11
|
-
#
|
12
|
-
module Stemmer
|
13
|
-
class << self
|
14
|
-
alias_method :stem, :stem_word
|
15
|
-
end
|
16
|
-
end
|
17
|
-
Stemmer
|
18
|
-
}
|
19
9
|
|
20
|
-
describe '
|
10
|
+
describe 'per-index stemming' do
|
11
|
+
let(:stemmer) {
|
12
|
+
# Fast stemmer does not conform with the API.
|
13
|
+
#
|
14
|
+
module Stemmer
|
15
|
+
class << self
|
16
|
+
alias_method :stem, :stem_word
|
17
|
+
end
|
18
|
+
end
|
19
|
+
Stemmer
|
20
|
+
}
|
21
|
+
|
21
22
|
it 'works correctly' do
|
22
23
|
tokenizer = Picky::Tokenizer.new(stems_with: stemmer)
|
23
24
|
|
@@ -58,15 +59,7 @@ describe 'stemming' do
|
|
58
59
|
|
59
60
|
try = Picky::Search.new index
|
60
61
|
|
61
|
-
#
|
62
|
-
#
|
63
|
-
try.search("text:stemming").ids.should == []
|
64
|
-
|
65
|
-
try = Picky::Search.new index do
|
66
|
-
searching stems_with: Stemmer
|
67
|
-
end
|
68
|
-
|
69
|
-
# With stemming in search AND indexing, it works :)
|
62
|
+
# Stems for both, so finds both.
|
70
63
|
#
|
71
64
|
try.search("text:stemming").ids.should == [2, 1]
|
72
65
|
try.search("text:lem").ids.should == [2]
|
@@ -80,27 +73,52 @@ describe 'stemming' do
|
|
80
73
|
# eg. Lemming!, then stemming won't work.
|
81
74
|
#
|
82
75
|
indexing removes_characters: /[^a-z\s]/i,
|
83
|
-
stems_with: Lingua::Stemmer.new
|
76
|
+
stems_with: Lingua::Stemmer.new # Both stem
|
84
77
|
category :text
|
85
78
|
end
|
86
|
-
|
79
|
+
|
87
80
|
index.replace_from id: 1, text: "Hello good Sirs, these things here need stems to work!"
|
88
81
|
index.replace_from id: 2, text: "Stemming Lemming!"
|
89
82
|
|
90
83
|
try = Picky::Search.new index
|
91
|
-
|
92
|
-
# If you don't stem in the search, it should not be found!
|
93
|
-
#
|
94
|
-
try.search("text:stemming").ids.should == []
|
95
84
|
|
96
|
-
try = Picky::Search.new index do
|
97
|
-
searching stems_with: Lingua::Stemmer.new
|
98
|
-
end
|
99
|
-
|
100
|
-
# With stemming in search AND indexing, it works :)
|
101
|
-
#
|
102
85
|
try.search("text:stemming").ids.should == [2, 1]
|
103
86
|
try.search("text:lem").ids.should == [2]
|
104
87
|
end
|
105
88
|
end
|
89
|
+
|
90
|
+
describe 'per-category stemming' do
|
91
|
+
describe 'mixed stemming categories' do
|
92
|
+
it 'stems some but not others' do
|
93
|
+
index = Picky::Index.new :stemming do
|
94
|
+
# Be aware that if !s are not removed from
|
95
|
+
# eg. Lemming!, then stemming won't work.
|
96
|
+
#
|
97
|
+
indexing removes_characters: /[^a-z\s]/i
|
98
|
+
category :text1,
|
99
|
+
partial: Picky::Partial::None.new,
|
100
|
+
indexing: { stems_with: Lingua::Stemmer.new }
|
101
|
+
category :text2,
|
102
|
+
partial: Picky::Partial::None.new
|
103
|
+
end
|
104
|
+
|
105
|
+
index.replace_from id: 1, text1: 'stemming', text2: 'ios'
|
106
|
+
index.replace_from id: 2, text1: 'ios', text2: 'stemming'
|
107
|
+
|
108
|
+
try = Picky::Search.new index
|
109
|
+
|
110
|
+
try.search("text1:stemming").ids.should == [1]
|
111
|
+
try.search("text2:ios").ids.should == [1]
|
112
|
+
|
113
|
+
try.search("text1:ios").ids.should == [2]
|
114
|
+
try.search("text2:stemming").ids.should == [2]
|
115
|
+
|
116
|
+
try.search("text1:stem").ids.should == [1]
|
117
|
+
try.search("text2:io").ids.should == []
|
118
|
+
|
119
|
+
try.search("text1:io").ids.should == [2]
|
120
|
+
try.search("text2:stem").ids.should == []
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
106
124
|
end
|
@@ -12,9 +12,7 @@ describe "Option symbol_keys" do
|
|
12
12
|
Picky::Search.new(index) { symbol_keys }
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
#
|
17
|
-
it 'can enumerate through the allocations' do
|
15
|
+
it 'returns results' do
|
18
16
|
index.category :text
|
19
17
|
|
20
18
|
thing = OpenStruct.new id: 1, text: "ohai"
|
@@ -25,5 +23,45 @@ describe "Option symbol_keys" do
|
|
25
23
|
|
26
24
|
try.search("text:ohai").ids.should == [2, 1]
|
27
25
|
end
|
26
|
+
|
27
|
+
it 'works with facets' do
|
28
|
+
index.category :text
|
29
|
+
|
30
|
+
thing = OpenStruct.new id: 1, text: "ohai"
|
31
|
+
other = OpenStruct.new id: 2, text: "ohai kthxbye"
|
32
|
+
|
33
|
+
index.add thing
|
34
|
+
index.add other
|
35
|
+
|
36
|
+
index.facets(:text).should == { ohai: 2, kthxbye: 1 }
|
37
|
+
try.facets(:text).should == { ohai: 2, kthxbye: 1 }
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'actually uses symbols - paranoia' do
|
41
|
+
index.category :text
|
42
|
+
|
43
|
+
thing = OpenStruct.new id: 1, text: "ohai"
|
44
|
+
|
45
|
+
index.add thing
|
46
|
+
|
47
|
+
index[:text].exact.inverted[:ohai].should == [1]
|
48
|
+
index[:text].exact.weights[:ohai].should == 0.0
|
49
|
+
index[:text].exact.realtime[1].should == [:ohai]
|
50
|
+
index[:text].exact.similarity[:ohai].should == nil
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'does the internals right - uses symbols' do
|
54
|
+
index.category :text
|
55
|
+
|
56
|
+
thing = OpenStruct.new id: 1, text: "ohai"
|
57
|
+
|
58
|
+
index.add thing
|
59
|
+
|
60
|
+
index[:text].exact.inverted.should == { ohai: [1] }
|
61
|
+
index[:text].exact.weights.should == { ohai: 0.0 }
|
62
|
+
# TODO This could be removed if sorting was always explicitly done.
|
63
|
+
index[:text].exact.realtime.should == { 1 => [:ohai] }
|
64
|
+
index[:text].exact.similarity.should == {}
|
65
|
+
end
|
28
66
|
|
29
67
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.26.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Hanke
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|