picky 4.25.3 → 4.26.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: c80310289f311bff593cc485ba9a16a528748ac5
4
- data.tar.gz: 81cbd3f12b05b4b7c709b1eefede543ba0b18ae2
3
+ metadata.gz: 361f560a9705c05e6e4105e283edb8af462f6d12
4
+ data.tar.gz: 6e7430ed38b525ed918cfc8633659ce6f978f142
5
5
  SHA512:
6
- metadata.gz: 5c1193dd935e6cec656979c20c78bcd0d7913ed1205573add458adaccea6e987842bfe356d6fb6dd500a9bf932e8d760917eff0024f9539eea5afe8026ab7176
7
- data.tar.gz: 4aaf6e8987190d760ce4c60f07a7c4d694de5943bda4458176caf5db961e3693abc0c2bda91d879b48ee5625d603ec179b3cdbcb357d7a0c049c6e340468dbaa
6
+ metadata.gz: d7fd4d1022e3ea493a015579a4020b2d30a6058c0231fe3429e4ca255f697e30e78391afc5591b6f229ad504fed5b3f2b1a35c12e373243a494bb2275c54df80
7
+ data.tar.gz: fa28a3b92a92ba1d6668ee52e5dd1c9dfc3802b1cd47e2262b092c6469af9c79a1c56fbfda8838f1e545b1ced50a813dc98b48ea130cb0b735254c85f377d3a5
@@ -25,7 +25,11 @@ module Picky
25
25
  weight && (weight + (sum || 0)) || sum
26
26
  end
27
27
  else
28
- bundle.weight token.text
28
+ if tokenizer && tokenizer.stemmer?
29
+ bundle.weight token.stem(tokenizer)
30
+ else
31
+ bundle.weight token.text
32
+ end
29
33
  end
30
34
  end
31
35
 
@@ -45,14 +49,18 @@ module Picky
45
49
  ids.empty? ? result : result << ids
46
50
  end.flatten
47
51
  else
48
- bundle.ids token.text
52
+ # Optimization
53
+ if tokenizer && tokenizer.stemmer?
54
+ bundle.ids token.stem(tokenizer)
55
+ else
56
+ bundle.ids token.text
57
+ end
49
58
  end
50
59
  end
51
60
 
52
61
  # Returns the right index bundle for this token.
53
62
  #
54
63
  def bundle_for token
55
- # token.partial? ? partial : exact
56
64
  token.select_bundle exact, partial
57
65
  end
58
66
 
@@ -82,6 +82,22 @@ module Picky
82
82
  def select_bundle exact, partial
83
83
  @partial ? partial : exact
84
84
  end
85
+
86
+ # Generates a reused stem.
87
+ #
88
+ # TODO Probably should not cache, as not
89
+ # the same stemmer will be used always.
90
+ #
91
+ def stem tokenizer
92
+ if stem?
93
+ @stem ||= tokenizer.stem(@text)
94
+ else
95
+ @text
96
+ end
97
+ end
98
+ def stem?
99
+ @text !~ @@no_partial
100
+ end
85
101
 
86
102
  # Partial is a conditional setter.
87
103
  #
@@ -282,7 +282,7 @@ ERROR
282
282
  #
283
283
  def tokens_for words
284
284
  words.collect! { |word| word.downcase!; word } if downcase?
285
- words.collect! { |word| stem word } if stemmer?
285
+ words.collect! { |word| stem word } if stemmer? # Usually only done in indexing step.
286
286
  words
287
287
  end
288
288
 
@@ -6,18 +6,19 @@ require 'stemmer'
6
6
  require 'lingua/stemmer'
7
7
 
8
8
  describe 'stemming' do
9
- let(:stemmer) {
10
- # Fast stemmer does not conform with the API.
11
- #
12
- module Stemmer
13
- class << self
14
- alias_method :stem, :stem_word
15
- end
16
- end
17
- Stemmer
18
- }
19
9
 
20
- describe 'examples' do
10
+ describe 'per-index stemming' do
11
+ let(:stemmer) {
12
+ # Fast stemmer does not conform with the API.
13
+ #
14
+ module Stemmer
15
+ class << self
16
+ alias_method :stem, :stem_word
17
+ end
18
+ end
19
+ Stemmer
20
+ }
21
+
21
22
  it 'works correctly' do
22
23
  tokenizer = Picky::Tokenizer.new(stems_with: stemmer)
23
24
 
@@ -58,15 +59,7 @@ describe 'stemming' do
58
59
 
59
60
  try = Picky::Search.new index
60
61
 
61
- # If you don't stem in the search, it should not be found!
62
- #
63
- try.search("text:stemming").ids.should == []
64
-
65
- try = Picky::Search.new index do
66
- searching stems_with: Stemmer
67
- end
68
-
69
- # With stemming in search AND indexing, it works :)
62
+ # Stems for both, so finds both.
70
63
  #
71
64
  try.search("text:stemming").ids.should == [2, 1]
72
65
  try.search("text:lem").ids.should == [2]
@@ -80,27 +73,52 @@ describe 'stemming' do
80
73
  # eg. Lemming!, then stemming won't work.
81
74
  #
82
75
  indexing removes_characters: /[^a-z\s]/i,
83
- stems_with: Lingua::Stemmer.new
76
+ stems_with: Lingua::Stemmer.new # Both stem
84
77
  category :text
85
78
  end
86
-
79
+
87
80
  index.replace_from id: 1, text: "Hello good Sirs, these things here need stems to work!"
88
81
  index.replace_from id: 2, text: "Stemming Lemming!"
89
82
 
90
83
  try = Picky::Search.new index
91
-
92
- # If you don't stem in the search, it should not be found!
93
- #
94
- try.search("text:stemming").ids.should == []
95
84
 
96
- try = Picky::Search.new index do
97
- searching stems_with: Lingua::Stemmer.new
98
- end
99
-
100
- # With stemming in search AND indexing, it works :)
101
- #
102
85
  try.search("text:stemming").ids.should == [2, 1]
103
86
  try.search("text:lem").ids.should == [2]
104
87
  end
105
88
  end
89
+
90
+ describe 'per-category stemming' do
91
+ describe 'mixed stemming categories' do
92
+ it 'stems some but not others' do
93
+ index = Picky::Index.new :stemming do
94
+ # Be aware that if !s are not removed from
95
+ # eg. Lemming!, then stemming won't work.
96
+ #
97
+ indexing removes_characters: /[^a-z\s]/i
98
+ category :text1,
99
+ partial: Picky::Partial::None.new,
100
+ indexing: { stems_with: Lingua::Stemmer.new }
101
+ category :text2,
102
+ partial: Picky::Partial::None.new
103
+ end
104
+
105
+ index.replace_from id: 1, text1: 'stemming', text2: 'ios'
106
+ index.replace_from id: 2, text1: 'ios', text2: 'stemming'
107
+
108
+ try = Picky::Search.new index
109
+
110
+ try.search("text1:stemming").ids.should == [1]
111
+ try.search("text2:ios").ids.should == [1]
112
+
113
+ try.search("text1:ios").ids.should == [2]
114
+ try.search("text2:stemming").ids.should == [2]
115
+
116
+ try.search("text1:stem").ids.should == [1]
117
+ try.search("text2:io").ids.should == []
118
+
119
+ try.search("text1:io").ids.should == [2]
120
+ try.search("text2:stem").ids.should == []
121
+ end
122
+ end
123
+ end
106
124
  end
@@ -12,9 +12,7 @@ describe "Option symbol_keys" do
12
12
  Picky::Search.new(index) { symbol_keys }
13
13
  end
14
14
 
15
- # Test the enumerator abilities.
16
- #
17
- it 'can enumerate through the allocations' do
15
+ it 'returns results' do
18
16
  index.category :text
19
17
 
20
18
  thing = OpenStruct.new id: 1, text: "ohai"
@@ -25,5 +23,45 @@ describe "Option symbol_keys" do
25
23
 
26
24
  try.search("text:ohai").ids.should == [2, 1]
27
25
  end
26
+
27
+ it 'works with facets' do
28
+ index.category :text
29
+
30
+ thing = OpenStruct.new id: 1, text: "ohai"
31
+ other = OpenStruct.new id: 2, text: "ohai kthxbye"
32
+
33
+ index.add thing
34
+ index.add other
35
+
36
+ index.facets(:text).should == { ohai: 2, kthxbye: 1 }
37
+ try.facets(:text).should == { ohai: 2, kthxbye: 1 }
38
+ end
39
+
40
+ it 'actually uses symbols - paranoia' do
41
+ index.category :text
42
+
43
+ thing = OpenStruct.new id: 1, text: "ohai"
44
+
45
+ index.add thing
46
+
47
+ index[:text].exact.inverted[:ohai].should == [1]
48
+ index[:text].exact.weights[:ohai].should == 0.0
49
+ index[:text].exact.realtime[1].should == [:ohai]
50
+ index[:text].exact.similarity[:ohai].should == nil
51
+ end
52
+
53
+ it 'does the internals right - uses symbols' do
54
+ index.category :text
55
+
56
+ thing = OpenStruct.new id: 1, text: "ohai"
57
+
58
+ index.add thing
59
+
60
+ index[:text].exact.inverted.should == { ohai: [1] }
61
+ index[:text].exact.weights.should == { ohai: 0.0 }
62
+ # TODO This could be removed if sorting was always explicitly done.
63
+ index[:text].exact.realtime.should == { 1 => [:ohai] }
64
+ index[:text].exact.similarity.should == {}
65
+ end
28
66
 
29
67
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.25.3
4
+ version: 4.26.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Florian Hanke
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-01-20 00:00:00.000000000 Z
11
+ date: 2015-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: activesupport