picky 4.25.3 → 4.26.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/picky/category_indexed.rb +11 -3
- data/lib/picky/query/token.rb +16 -0
- data/lib/picky/tokenizer.rb +1 -1
- data/spec/functional/stemming_spec.rb +50 -32
- data/spec/functional/symbol_keys_spec.rb +41 -3
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 361f560a9705c05e6e4105e283edb8af462f6d12
|
4
|
+
data.tar.gz: 6e7430ed38b525ed918cfc8633659ce6f978f142
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d7fd4d1022e3ea493a015579a4020b2d30a6058c0231fe3429e4ca255f697e30e78391afc5591b6f229ad504fed5b3f2b1a35c12e373243a494bb2275c54df80
|
7
|
+
data.tar.gz: fa28a3b92a92ba1d6668ee52e5dd1c9dfc3802b1cd47e2262b092c6469af9c79a1c56fbfda8838f1e545b1ced50a813dc98b48ea130cb0b735254c85f377d3a5
|
@@ -25,7 +25,11 @@ module Picky
|
|
25
25
|
weight && (weight + (sum || 0)) || sum
|
26
26
|
end
|
27
27
|
else
|
28
|
-
|
28
|
+
if tokenizer && tokenizer.stemmer?
|
29
|
+
bundle.weight token.stem(tokenizer)
|
30
|
+
else
|
31
|
+
bundle.weight token.text
|
32
|
+
end
|
29
33
|
end
|
30
34
|
end
|
31
35
|
|
@@ -45,14 +49,18 @@ module Picky
|
|
45
49
|
ids.empty? ? result : result << ids
|
46
50
|
end.flatten
|
47
51
|
else
|
48
|
-
|
52
|
+
# Optimization
|
53
|
+
if tokenizer && tokenizer.stemmer?
|
54
|
+
bundle.ids token.stem(tokenizer)
|
55
|
+
else
|
56
|
+
bundle.ids token.text
|
57
|
+
end
|
49
58
|
end
|
50
59
|
end
|
51
60
|
|
52
61
|
# Returns the right index bundle for this token.
|
53
62
|
#
|
54
63
|
def bundle_for token
|
55
|
-
# token.partial? ? partial : exact
|
56
64
|
token.select_bundle exact, partial
|
57
65
|
end
|
58
66
|
|
data/lib/picky/query/token.rb
CHANGED
@@ -82,6 +82,22 @@ module Picky
|
|
82
82
|
def select_bundle exact, partial
|
83
83
|
@partial ? partial : exact
|
84
84
|
end
|
85
|
+
|
86
|
+
# Generates a reused stem.
|
87
|
+
#
|
88
|
+
# TODO Probably should not cache, as not
|
89
|
+
# the same stemmer will be used always.
|
90
|
+
#
|
91
|
+
def stem tokenizer
|
92
|
+
if stem?
|
93
|
+
@stem ||= tokenizer.stem(@text)
|
94
|
+
else
|
95
|
+
@text
|
96
|
+
end
|
97
|
+
end
|
98
|
+
def stem?
|
99
|
+
@text !~ @@no_partial
|
100
|
+
end
|
85
101
|
|
86
102
|
# Partial is a conditional setter.
|
87
103
|
#
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -282,7 +282,7 @@ ERROR
|
|
282
282
|
#
|
283
283
|
def tokens_for words
|
284
284
|
words.collect! { |word| word.downcase!; word } if downcase?
|
285
|
-
words.collect! { |word| stem word } if stemmer?
|
285
|
+
words.collect! { |word| stem word } if stemmer? # Usually only done in indexing step.
|
286
286
|
words
|
287
287
|
end
|
288
288
|
|
@@ -6,18 +6,19 @@ require 'stemmer'
|
|
6
6
|
require 'lingua/stemmer'
|
7
7
|
|
8
8
|
describe 'stemming' do
|
9
|
-
let(:stemmer) {
|
10
|
-
# Fast stemmer does not conform with the API.
|
11
|
-
#
|
12
|
-
module Stemmer
|
13
|
-
class << self
|
14
|
-
alias_method :stem, :stem_word
|
15
|
-
end
|
16
|
-
end
|
17
|
-
Stemmer
|
18
|
-
}
|
19
9
|
|
20
|
-
describe '
|
10
|
+
describe 'per-index stemming' do
|
11
|
+
let(:stemmer) {
|
12
|
+
# Fast stemmer does not conform with the API.
|
13
|
+
#
|
14
|
+
module Stemmer
|
15
|
+
class << self
|
16
|
+
alias_method :stem, :stem_word
|
17
|
+
end
|
18
|
+
end
|
19
|
+
Stemmer
|
20
|
+
}
|
21
|
+
|
21
22
|
it 'works correctly' do
|
22
23
|
tokenizer = Picky::Tokenizer.new(stems_with: stemmer)
|
23
24
|
|
@@ -58,15 +59,7 @@ describe 'stemming' do
|
|
58
59
|
|
59
60
|
try = Picky::Search.new index
|
60
61
|
|
61
|
-
#
|
62
|
-
#
|
63
|
-
try.search("text:stemming").ids.should == []
|
64
|
-
|
65
|
-
try = Picky::Search.new index do
|
66
|
-
searching stems_with: Stemmer
|
67
|
-
end
|
68
|
-
|
69
|
-
# With stemming in search AND indexing, it works :)
|
62
|
+
# Stems for both, so finds both.
|
70
63
|
#
|
71
64
|
try.search("text:stemming").ids.should == [2, 1]
|
72
65
|
try.search("text:lem").ids.should == [2]
|
@@ -80,27 +73,52 @@ describe 'stemming' do
|
|
80
73
|
# eg. Lemming!, then stemming won't work.
|
81
74
|
#
|
82
75
|
indexing removes_characters: /[^a-z\s]/i,
|
83
|
-
stems_with: Lingua::Stemmer.new
|
76
|
+
stems_with: Lingua::Stemmer.new # Both stem
|
84
77
|
category :text
|
85
78
|
end
|
86
|
-
|
79
|
+
|
87
80
|
index.replace_from id: 1, text: "Hello good Sirs, these things here need stems to work!"
|
88
81
|
index.replace_from id: 2, text: "Stemming Lemming!"
|
89
82
|
|
90
83
|
try = Picky::Search.new index
|
91
|
-
|
92
|
-
# If you don't stem in the search, it should not be found!
|
93
|
-
#
|
94
|
-
try.search("text:stemming").ids.should == []
|
95
84
|
|
96
|
-
try = Picky::Search.new index do
|
97
|
-
searching stems_with: Lingua::Stemmer.new
|
98
|
-
end
|
99
|
-
|
100
|
-
# With stemming in search AND indexing, it works :)
|
101
|
-
#
|
102
85
|
try.search("text:stemming").ids.should == [2, 1]
|
103
86
|
try.search("text:lem").ids.should == [2]
|
104
87
|
end
|
105
88
|
end
|
89
|
+
|
90
|
+
describe 'per-category stemming' do
|
91
|
+
describe 'mixed stemming categories' do
|
92
|
+
it 'stems some but not others' do
|
93
|
+
index = Picky::Index.new :stemming do
|
94
|
+
# Be aware that if !s are not removed from
|
95
|
+
# eg. Lemming!, then stemming won't work.
|
96
|
+
#
|
97
|
+
indexing removes_characters: /[^a-z\s]/i
|
98
|
+
category :text1,
|
99
|
+
partial: Picky::Partial::None.new,
|
100
|
+
indexing: { stems_with: Lingua::Stemmer.new }
|
101
|
+
category :text2,
|
102
|
+
partial: Picky::Partial::None.new
|
103
|
+
end
|
104
|
+
|
105
|
+
index.replace_from id: 1, text1: 'stemming', text2: 'ios'
|
106
|
+
index.replace_from id: 2, text1: 'ios', text2: 'stemming'
|
107
|
+
|
108
|
+
try = Picky::Search.new index
|
109
|
+
|
110
|
+
try.search("text1:stemming").ids.should == [1]
|
111
|
+
try.search("text2:ios").ids.should == [1]
|
112
|
+
|
113
|
+
try.search("text1:ios").ids.should == [2]
|
114
|
+
try.search("text2:stemming").ids.should == [2]
|
115
|
+
|
116
|
+
try.search("text1:stem").ids.should == [1]
|
117
|
+
try.search("text2:io").ids.should == []
|
118
|
+
|
119
|
+
try.search("text1:io").ids.should == [2]
|
120
|
+
try.search("text2:stem").ids.should == []
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
106
124
|
end
|
@@ -12,9 +12,7 @@ describe "Option symbol_keys" do
|
|
12
12
|
Picky::Search.new(index) { symbol_keys }
|
13
13
|
end
|
14
14
|
|
15
|
-
|
16
|
-
#
|
17
|
-
it 'can enumerate through the allocations' do
|
15
|
+
it 'returns results' do
|
18
16
|
index.category :text
|
19
17
|
|
20
18
|
thing = OpenStruct.new id: 1, text: "ohai"
|
@@ -25,5 +23,45 @@ describe "Option symbol_keys" do
|
|
25
23
|
|
26
24
|
try.search("text:ohai").ids.should == [2, 1]
|
27
25
|
end
|
26
|
+
|
27
|
+
it 'works with facets' do
|
28
|
+
index.category :text
|
29
|
+
|
30
|
+
thing = OpenStruct.new id: 1, text: "ohai"
|
31
|
+
other = OpenStruct.new id: 2, text: "ohai kthxbye"
|
32
|
+
|
33
|
+
index.add thing
|
34
|
+
index.add other
|
35
|
+
|
36
|
+
index.facets(:text).should == { ohai: 2, kthxbye: 1 }
|
37
|
+
try.facets(:text).should == { ohai: 2, kthxbye: 1 }
|
38
|
+
end
|
39
|
+
|
40
|
+
it 'actually uses symbols - paranoia' do
|
41
|
+
index.category :text
|
42
|
+
|
43
|
+
thing = OpenStruct.new id: 1, text: "ohai"
|
44
|
+
|
45
|
+
index.add thing
|
46
|
+
|
47
|
+
index[:text].exact.inverted[:ohai].should == [1]
|
48
|
+
index[:text].exact.weights[:ohai].should == 0.0
|
49
|
+
index[:text].exact.realtime[1].should == [:ohai]
|
50
|
+
index[:text].exact.similarity[:ohai].should == nil
|
51
|
+
end
|
52
|
+
|
53
|
+
it 'does the internals right - uses symbols' do
|
54
|
+
index.category :text
|
55
|
+
|
56
|
+
thing = OpenStruct.new id: 1, text: "ohai"
|
57
|
+
|
58
|
+
index.add thing
|
59
|
+
|
60
|
+
index[:text].exact.inverted.should == { ohai: [1] }
|
61
|
+
index[:text].exact.weights.should == { ohai: 0.0 }
|
62
|
+
# TODO This could be removed if sorting was always explicitly done.
|
63
|
+
index[:text].exact.realtime.should == { 1 => [:ohai] }
|
64
|
+
index[:text].exact.similarity.should == {}
|
65
|
+
end
|
28
66
|
|
29
67
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.26.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Florian Hanke
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-01-
|
11
|
+
date: 2015-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: activesupport
|