picky 4.25.3 → 4.26.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c80310289f311bff593cc485ba9a16a528748ac5
4
- data.tar.gz: 81cbd3f12b05b4b7c709b1eefede543ba0b18ae2
3
+ metadata.gz: 361f560a9705c05e6e4105e283edb8af462f6d12
4
+ data.tar.gz: 6e7430ed38b525ed918cfc8633659ce6f978f142
5
5
  SHA512:
6
- metadata.gz: 5c1193dd935e6cec656979c20c78bcd0d7913ed1205573add458adaccea6e987842bfe356d6fb6dd500a9bf932e8d760917eff0024f9539eea5afe8026ab7176
7
- data.tar.gz: 4aaf6e8987190d760ce4c60f07a7c4d694de5943bda4458176caf5db961e3693abc0c2bda91d879b48ee5625d603ec179b3cdbcb357d7a0c049c6e340468dbaa
6
+ metadata.gz: d7fd4d1022e3ea493a015579a4020b2d30a6058c0231fe3429e4ca255f697e30e78391afc5591b6f229ad504fed5b3f2b1a35c12e373243a494bb2275c54df80
7
+ data.tar.gz: fa28a3b92a92ba1d6668ee52e5dd1c9dfc3802b1cd47e2262b092c6469af9c79a1c56fbfda8838f1e545b1ced50a813dc98b48ea130cb0b735254c85f377d3a5
@@ -25,7 +25,11 @@ module Picky
25
25
  weight && (weight + (sum || 0)) || sum
26
26
  end
27
27
  else
28
- bundle.weight token.text
28
+ if tokenizer && tokenizer.stemmer?
29
+ bundle.weight token.stem(tokenizer)
30
+ else
31
+ bundle.weight token.text
32
+ end
29
33
  end
30
34
  end
31
35
 
@@ -45,14 +49,18 @@ module Picky
45
49
  ids.empty? ? result : result << ids
46
50
  end.flatten
47
51
  else
48
- bundle.ids token.text
52
+ # Optimization
53
+ if tokenizer && tokenizer.stemmer?
54
+ bundle.ids token.stem(tokenizer)
55
+ else
56
+ bundle.ids token.text
57
+ end
49
58
  end
50
59
  end
51
60
 
52
61
  # Returns the right index bundle for this token.
53
62
  #
54
63
  def bundle_for token
55
- # token.partial? ? partial : exact
56
64
  token.select_bundle exact, partial
57
65
  end
58
66
 
@@ -82,6 +82,22 @@ module Picky
82
82
  def select_bundle exact, partial
83
83
  @partial ? partial : exact
84
84
  end
85
+
86
+ # Generates a reused stem.
87
+ #
88
+ # TODO Probably should not cache, as not
89
+ # the same stemmer will be used always.
90
+ #
91
+ def stem tokenizer
92
+ if stem?
93
+ @stem ||= tokenizer.stem(@text)
94
+ else
95
+ @text
96
+ end
97
+ end
98
+ def stem?
99
+ @text !~ @@no_partial
100
+ end
85
101
 
86
102
  # Partial is a conditional setter.
87
103
  #
@@ -282,7 +282,7 @@ ERROR
282
282
  #
283
283
  def tokens_for words
284
284
  words.collect! { |word| word.downcase!; word } if downcase?
285
- words.collect! { |word| stem word } if stemmer?
285
+ words.collect! { |word| stem word } if stemmer? # Usually only done in indexing step.
286
286
  words
287
287
  end
288
288
 
@@ -6,18 +6,19 @@ require 'stemmer'
6
6
  require 'lingua/stemmer'
7
7
 
8
8
  describe 'stemming' do
9
- let(:stemmer) {
10
- # Fast stemmer does not conform with the API.
11
- #
12
- module Stemmer
13
- class << self
14
- alias_method :stem, :stem_word
15
- end
16
- end
17
- Stemmer
18
- }
19
9
 
20
- describe 'examples' do
10
+ describe 'per-index stemming' do
11
+ let(:stemmer) {
12
+ # Fast stemmer does not conform with the API.
13
+ #
14
+ module Stemmer
15
+ class << self
16
+ alias_method :stem, :stem_word
17
+ end
18
+ end
19
+ Stemmer
20
+ }
21
+
21
22
  it 'works correctly' do
22
23
  tokenizer = Picky::Tokenizer.new(stems_with: stemmer)
23
24
 
@@ -58,15 +59,7 @@ describe 'stemming' do
58
59
 
59
60
  try = Picky::Search.new index
60
61
 
61
- # If you don't stem in the search, it should not be found!
62
- #
63
- try.search("text:stemming").ids.should == []
64
-
65
- try = Picky::Search.new index do
66
- searching stems_with: Stemmer
67
- end
68
-
69
- # With stemming in search AND indexing, it works :)
62
+ # Stems for both, so finds both.
70
63
  #
71
64
  try.search("text:stemming").ids.should == [2, 1]
72
65
  try.search("text:lem").ids.should == [2]
@@ -80,27 +73,52 @@ describe 'stemming' do
80
73
  # eg. Lemming!, then stemming won't work.
81
74
  #
82
75
  indexing removes_characters: /[^a-z\s]/i,
83
- stems_with: Lingua::Stemmer.new
76
+ stems_with: Lingua::Stemmer.new # Both stem
84
77
  category :text
85
78
  end
86
-
79
+
87
80
  index.replace_from id: 1, text: "Hello good Sirs, these things here need stems to work!"
88
81
  index.replace_from id: 2, text: "Stemming Lemming!"
89
82
 
90
83
  try = Picky::Search.new index
91
-
92
- # If you don't stem in the search, it should not be found!
93
- #
94
- try.search("text:stemming").ids.should == []
95
84
 
96
- try = Picky::Search.new index do
97
- searching stems_with: Lingua::Stemmer.new
98
- end
99
-
100
- # With stemming in search AND indexing, it works :)
101
- #
102
85
  try.search("text:stemming").ids.should == [2, 1]
103
86
  try.search("text:lem").ids.should == [2]
104
87
  end
105
88
  end
89
+
90
+ describe 'per-category stemming' do
91
+ describe 'mixed stemming categories' do
92
+ it 'stems some but not others' do
93
+ index = Picky::Index.new :stemming do
94
+ # Be aware that if !s are not removed from
95
+ # eg. Lemming!, then stemming won't work.
96
+ #
97
+ indexing removes_characters: /[^a-z\s]/i
98
+ category :text1,
99
+ partial: Picky::Partial::None.new,
100
+ indexing: { stems_with: Lingua::Stemmer.new }
101
+ category :text2,
102
+ partial: Picky::Partial::None.new
103
+ end
104
+
105
+ index.replace_from id: 1, text1: 'stemming', text2: 'ios'
106
+ index.replace_from id: 2, text1: 'ios', text2: 'stemming'
107
+
108
+ try = Picky::Search.new index
109
+
110
+ try.search("text1:stemming").ids.should == [1]
111
+ try.search("text2:ios").ids.should == [1]
112
+
113
+ try.search("text1:ios").ids.should == [2]
114
+ try.search("text2:stemming").ids.should == [2]
115
+
116
+ try.search("text1:stem").ids.should == [1]
117
+ try.search("text2:io").ids.should == []
118
+
119
+ try.search("text1:io").ids.should == [2]
120
+ try.search("text2:stem").ids.should == []
121
+ end
122
+ end
123
+ end
106
124
  end
@@ -12,9 +12,7 @@ describe "Option symbol_keys" do
12
12
  Picky::Search.new(index) { symbol_keys }
13
13
  end
14
14
 
15
- # Test the enumerator abilities.
16
- #
17
- it 'can enumerate through the allocations' do
15
+ it 'returns results' do
18
16
  index.category :text
19
17
 
20
18
  thing = OpenStruct.new id: 1, text: "ohai"
@@ -25,5 +23,45 @@ describe "Option symbol_keys" do
25
23
 
26
24
  try.search("text:ohai").ids.should == [2, 1]
27
25
  end
26
+
27
+ it 'works with facets' do
28
+ index.category :text
29
+
30
+ thing = OpenStruct.new id: 1, text: "ohai"
31
+ other = OpenStruct.new id: 2, text: "ohai kthxbye"
32
+
33
+ index.add thing
34
+ index.add other
35
+
36
+ index.facets(:text).should == { ohai: 2, kthxbye: 1 }
37
+ try.facets(:text).should == { ohai: 2, kthxbye: 1 }
38
+ end
39
+
40
+ it 'actually uses symbols - paranoia' do
41
+ index.category :text
42
+
43
+ thing = OpenStruct.new id: 1, text: "ohai"
44
+
45
+ index.add thing
46
+
47
+ index[:text].exact.inverted[:ohai].should == [1]
48
+ index[:text].exact.weights[:ohai].should == 0.0
49
+ index[:text].exact.realtime[1].should == [:ohai]
50
+ index[:text].exact.similarity[:ohai].should == nil
51
+ end
52
+
53
+ it 'does the internals right - uses symbols' do
54
+ index.category :text
55
+
56
+ thing = OpenStruct.new id: 1, text: "ohai"
57
+
58
+ index.add thing
59
+
60
+ index[:text].exact.inverted.should == { ohai: [1] }
61
+ index[:text].exact.weights.should == { ohai: 0.0 }
62
+ # TODO This could be removed if sorting was always explicitly done.
63
+ index[:text].exact.realtime.should == { 1 => [:ohai] }
64
+ index[:text].exact.similarity.should == {}
65
+ end
28
66
 
29
67
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.25.3
4
+ version: 4.26.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Hanke
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-20 00:00:00.000000000 Z
11
+ date: 2015-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport