picky 3.1.4 → 3.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -15,23 +15,8 @@ module Picky
15
15
 
16
16
  # A list of indexed categories.
17
17
  #
18
- # Options:
19
- # * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
20
- # The default behaviour is that if a token does not match to
21
- # any category, the query will not return anything (since a
22
- # single token cannot be matched). If you set this option to
23
- # true, any token that cannot be matched to a category will be
24
- # simply ignored.
25
- # Use this if only a few matched words are important, like for
26
- # example of the query "Jonathan Myers 86455 Las Cucarachas"
27
- # you only want to match the zipcode, to have the search engine
28
- # display advertisements on the side for the zipcode.
29
- # Nifty! :)
30
- #
31
18
  def initialize options = {}
32
19
  clear_categories
33
-
34
- @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
35
20
  end
36
21
 
37
22
  # Clears both the array of categories and the hash of categories.
@@ -2,8 +2,6 @@ module Picky
2
2
 
3
3
  class Categories
4
4
 
5
- attr_reader :ignore_unassigned_tokens
6
-
7
5
  each_delegate :load_from_cache,
8
6
  :analyze,
9
7
  :to => :categories
@@ -56,20 +54,13 @@ module Picky
56
54
  # Returns possible Combinations for the token.
57
55
  #
58
56
  # Note: The preselected_categories param is an optimization.
59
- #
60
57
  # Note: Returns [] if no categories matched (will produce no result).
61
- # Returns nil if this token needs to be removed from the query.
62
- # (Also none of the categories matched, but the ignore unassigned
63
- # tokens option is true)
64
58
  #
65
59
  def possible_for token, preselected_categories = nil
66
60
  possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
67
61
  combination = category.combination_for token
68
62
  combination ? combinations << combination : combinations
69
63
  end
70
- # This is an optimization to mark tokens that are ignored.
71
- #
72
- return if ignore_unassigned_tokens && possible.empty?
73
64
  possible
74
65
  end
75
66
 
data/lib/picky/index.rb CHANGED
@@ -98,7 +98,7 @@ module Picky
98
98
  # === Parameters
99
99
  # * name: A name that will be used for the index directory and in the Picky front end.
100
100
  #
101
- # === Options (all are used in the block, see examples)
101
+ # === Options (all are used in the block - not passed as a Hash, see examples)
102
102
  # * source: Where the data comes from, e.g. Sources::CSV.new(...). Optional, can be defined in the block using #source.
103
103
  # * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
104
104
  # * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
@@ -113,12 +113,9 @@ module Picky
113
113
  # result_identifier :my_special_results
114
114
  # end
115
115
  #
116
- def initialize name, options = {}
117
- @name = name.to_sym
118
-
119
- # TODO Move ignore_unassigned_tokens to query, somehow. Then, remove options.
120
- #
121
- @categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
116
+ def initialize name
117
+ @name = name.to_sym
118
+ @categories = Categories.new
122
119
 
123
120
  # Centralized registry.
124
121
  #
@@ -8,20 +8,23 @@ module Picky
8
8
  #
9
9
  class Tokens # :nodoc:all
10
10
 
11
+ attr_reader :ignore_unassigned
12
+
11
13
  # Basically delegates to its internal tokens array.
12
14
  #
13
15
  self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
14
16
 
15
17
  # Create a new Tokens object with the array of tokens passed in.
16
18
  #
17
- def initialize tokens = []
18
- @tokens = tokens
19
+ def initialize tokens, ignore_unassigned = false
20
+ @tokens = tokens
21
+ @ignore_unassigned = ignore_unassigned
19
22
  end
20
23
 
21
24
  # Creates a new Tokens object from a number of Strings.
22
25
  #
23
- def self.processed words, originals
24
- new words.zip(originals).collect! { |word, original| Token.processed word, original }
26
+ def self.processed words, originals, ignore_unassigned = false
27
+ new words.zip(originals).collect! { |word, original| Token.processed word, original }, ignore_unassigned
25
28
  end
26
29
 
27
30
  # Tokenizes each token.
@@ -45,14 +48,16 @@ module Picky
45
48
  @tokens.inject([]) do |combinations, token|
46
49
  possible_combinations = token.possible_combinations_in index
47
50
 
48
- # TODO Could move the ignore_unassigned_tokens here!
49
- #
50
51
  # Note: Optimization for ignoring tokens that allocate to nothing and
51
52
  # can be ignored.
52
53
  # For example in a special search, where "florian" is not
53
54
  # mapped to any category.
54
55
  #
55
- possible_combinations ? combinations << possible_combinations : combinations
56
+ if ignore_unassigned && possible_combinations.empty?
57
+ combinations
58
+ else
59
+ combinations << possible_combinations
60
+ end
56
61
  end
57
62
  end
58
63
 
data/lib/picky/search.rb CHANGED
@@ -44,6 +44,7 @@ module Picky
44
44
 
45
45
  @tokenizer ||= Tokenizer.query_default # THINK Not dynamic. Ok?
46
46
  @weights ||= Query::Weights.new
47
+ @ignore_unassigned = false if @ignore_unassigned.nil?
47
48
 
48
49
  self
49
50
  end
@@ -99,6 +100,33 @@ module Picky
99
100
  end
100
101
  end
101
102
 
103
+ # Ignore the given token if it cannot be matched to a category.
104
+ # The default behaviour is that if a token does not match to
105
+ # any category, the query will not return anything (since a
106
+ # single token cannot be matched). If you set this option to
107
+ # true, any token that cannot be matched to a category will be
108
+ # simply ignored.
109
+ #
110
+ # Use this if only a few matched words are important, like for
111
+ # example of the query "Jonathan Myers 86455 Las Cucarachas"
112
+ # you only want to match the zipcode, to have the search engine
113
+ # display advertisements on the side for the zipcode.
114
+ #
115
+ # False by default.
116
+ #
117
+ # Example:
118
+ # search = Search.new(books_index, dvd_index, mp3_index) do
119
+ # ignore_unassigned_tokens true
120
+ # end
121
+ #
122
+ # With this set to true, if in "Peter Flunder", "Flunder"
123
+ # couldn't be assigned to any category, it will simply be
124
+ # ignored. This is done for each categorization.
125
+ #
126
+ def ignore_unassigned_tokens value
127
+ @ignore_unassigned = value
128
+ end
129
+
102
130
  # This is the main entry point for a query.
103
131
  # Use this in specs and also for running queries.
104
132
  #
@@ -146,7 +174,7 @@ module Picky
146
174
  #
147
175
  def tokenized text
148
176
  tokens, originals = tokenizer.tokenize text
149
- tokens = Query::Tokens.processed tokens, originals || tokens
177
+ tokens = Query::Tokens.processed tokens, originals || tokens, @ignore_unassigned
150
178
  tokens.partialize_last # Note: In the standard Picky search, the last token is always partial.
151
179
  tokens
152
180
  end
@@ -1,24 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Picky::Categories do
4
- context 'with option ignore_unassigned_tokens' do
5
- context 'ignore_unassigned_tokens true' do
6
- before(:each) do
7
- @categories = described_class.new ignore_unassigned_tokens: true
8
- end
9
- it 'should return the right value' do
10
- @categories.ignore_unassigned_tokens.should == true
11
- end
12
- end
13
- context 'ignore_unassigned_tokens false' do
14
- before(:each) do
15
- @categories = described_class.new ignore_unassigned_tokens: false
16
- end
17
- it 'should return the right value' do
18
- @categories.ignore_unassigned_tokens.should == false
19
- end
20
- end
21
- end
22
4
 
23
5
  context "with real categories" do
24
6
  before(:each) do
@@ -2,6 +2,53 @@ require 'spec_helper'
2
2
 
3
3
  describe Picky::Query::Tokens do
4
4
 
5
+ context 'with ignore_unassigned_tokens true' do
6
+ it 'generates processed tokens from all words' do
7
+ expected = [
8
+ Picky::Query::Token.processed('this~'),
9
+ Picky::Query::Token.processed('is'),
10
+ Picky::Query::Token.processed('a'),
11
+ Picky::Query::Token.processed('sp:solr'),
12
+ Picky::Query::Token.processed('query"')
13
+ ]
14
+
15
+ described_class.should_receive(:new).once.with expected, true
16
+
17
+ described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], [], true
18
+ end
19
+
20
+ describe 'possible_combinations_in' do
21
+ before(:each) do
22
+ @token1 = stub :token1
23
+ @token2 = stub :token2
24
+ @token3 = stub :token3
25
+
26
+ @tokens = described_class.new [@token1, @token2, @token3], true
27
+ end
28
+ it 'should work correctly' do
29
+ @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
30
+ @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination21]
31
+ @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
32
+
33
+ @tokens.possible_combinations_in(:some_index).should == [
34
+ [:combination11, :combination12],
35
+ [:combination21],
36
+ [:combination31, :combination32, :combination33]
37
+ ]
38
+ end
39
+ it 'should work correctly' do
40
+ @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
41
+ @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return []
42
+ @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
43
+
44
+ @tokens.possible_combinations_in(:some_index).should == [
45
+ [:combination11, :combination12],
46
+ [:combination31, :combination32, :combination33]
47
+ ]
48
+ end
49
+ end
50
+ end
51
+
5
52
  describe '.processed' do
6
53
  it 'generates processed tokens from all words' do
7
54
  expected = [
@@ -12,7 +59,7 @@ describe Picky::Query::Tokens do
12
59
  Picky::Query::Token.processed('query"')
13
60
  ]
14
61
 
15
- described_class.should_receive(:new).once.with expected
62
+ described_class.should_receive(:new).once.with expected, false
16
63
 
17
64
  described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
18
65
  end
@@ -25,7 +72,7 @@ describe Picky::Query::Tokens do
25
72
  Picky::Query::Token.processed('query"')
26
73
  ]
27
74
 
28
- described_class.should_receive(:new).once.with expected
75
+ described_class.should_receive(:new).once.with expected, false
29
76
 
30
77
  described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
31
78
  end
@@ -114,6 +161,7 @@ describe Picky::Query::Tokens do
114
161
 
115
162
  @tokens.possible_combinations_in(:some_index).should == [
116
163
  [:combination11, :combination12],
164
+ nil,
117
165
  [:combination31, :combination32, :combination33]
118
166
  ]
119
167
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 3.1.4
5
+ version: 3.1.5
6
6
  platform: ruby
7
7
  authors:
8
8
  - Florian Hanke
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-09-02 00:00:00 +02:00
13
+ date: 2011-09-04 00:00:00 +02:00
14
14
  default_executable: picky
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ dependencies:
32
32
  requirements:
33
33
  - - "="
34
34
  - !ruby/object:Gem::Version
35
- version: 3.1.4
35
+ version: 3.1.5
36
36
  type: :development
37
37
  version_requirements: *id002
38
38
  description: Fast Ruby semantic text search engine with comfortable single field interface.