picky 3.1.4 → 3.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -15,23 +15,8 @@ module Picky
15
15
 
16
16
  # A list of indexed categories.
17
17
  #
18
- # Options:
19
- # * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
20
- # The default behaviour is that if a token does not match to
21
- # any category, the query will not return anything (since a
22
- # single token cannot be matched). If you set this option to
23
- # true, any token that cannot be matched to a category will be
24
- # simply ignored.
25
- # Use this if only a few matched words are important, like for
26
- # example of the query "Jonathan Myers 86455 Las Cucarachas"
27
- # you only want to match the zipcode, to have the search engine
28
- # display advertisements on the side for the zipcode.
29
- # Nifty! :)
30
- #
31
18
  def initialize options = {}
32
19
  clear_categories
33
-
34
- @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
35
20
  end
36
21
 
37
22
  # Clears both the array of categories and the hash of categories.
@@ -2,8 +2,6 @@ module Picky
2
2
 
3
3
  class Categories
4
4
 
5
- attr_reader :ignore_unassigned_tokens
6
-
7
5
  each_delegate :load_from_cache,
8
6
  :analyze,
9
7
  :to => :categories
@@ -56,20 +54,13 @@ module Picky
56
54
  # Returns possible Combinations for the token.
57
55
  #
58
56
  # Note: The preselected_categories param is an optimization.
59
- #
60
57
  # Note: Returns [] if no categories matched (will produce no result).
61
- # Returns nil if this token needs to be removed from the query.
62
- # (Also none of the categories matched, but the ignore unassigned
63
- # tokens option is true)
64
58
  #
65
59
  def possible_for token, preselected_categories = nil
66
60
  possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
67
61
  combination = category.combination_for token
68
62
  combination ? combinations << combination : combinations
69
63
  end
70
- # This is an optimization to mark tokens that are ignored.
71
- #
72
- return if ignore_unassigned_tokens && possible.empty?
73
64
  possible
74
65
  end
75
66
 
data/lib/picky/index.rb CHANGED
@@ -98,7 +98,7 @@ module Picky
98
98
  # === Parameters
99
99
  # * name: A name that will be used for the index directory and in the Picky front end.
100
100
  #
101
- # === Options (all are used in the block, see examples)
101
+ # === Options (all are used in the block - not passed as a Hash, see examples)
102
102
  # * source: Where the data comes from, e.g. Sources::CSV.new(...). Optional, can be defined in the block using #source.
103
103
  # * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
104
104
  # * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
@@ -113,12 +113,9 @@ module Picky
113
113
  # result_identifier :my_special_results
114
114
  # end
115
115
  #
116
- def initialize name, options = {}
117
- @name = name.to_sym
118
-
119
- # TODO Move ignore_unassigned_tokens to query, somehow. Then, remove options.
120
- #
121
- @categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
116
+ def initialize name
117
+ @name = name.to_sym
118
+ @categories = Categories.new
122
119
 
123
120
  # Centralized registry.
124
121
  #
@@ -8,20 +8,23 @@ module Picky
8
8
  #
9
9
  class Tokens # :nodoc:all
10
10
 
11
+ attr_reader :ignore_unassigned
12
+
11
13
  # Basically delegates to its internal tokens array.
12
14
  #
13
15
  self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
14
16
 
15
17
  # Create a new Tokens object with the array of tokens passed in.
16
18
  #
17
- def initialize tokens = []
18
- @tokens = tokens
19
+ def initialize tokens, ignore_unassigned = false
20
+ @tokens = tokens
21
+ @ignore_unassigned = ignore_unassigned
19
22
  end
20
23
 
21
24
  # Creates a new Tokens object from a number of Strings.
22
25
  #
23
- def self.processed words, originals
24
- new words.zip(originals).collect! { |word, original| Token.processed word, original }
26
+ def self.processed words, originals, ignore_unassigned = false
27
+ new words.zip(originals).collect! { |word, original| Token.processed word, original }, ignore_unassigned
25
28
  end
26
29
 
27
30
  # Tokenizes each token.
@@ -45,14 +48,16 @@ module Picky
45
48
  @tokens.inject([]) do |combinations, token|
46
49
  possible_combinations = token.possible_combinations_in index
47
50
 
48
- # TODO Could move the ignore_unassigned_tokens here!
49
- #
50
51
  # Note: Optimization for ignoring tokens that allocate to nothing and
51
52
  # can be ignored.
52
53
  # For example in a special search, where "florian" is not
53
54
  # mapped to any category.
54
55
  #
55
- possible_combinations ? combinations << possible_combinations : combinations
56
+ if ignore_unassigned && possible_combinations.empty?
57
+ combinations
58
+ else
59
+ combinations << possible_combinations
60
+ end
56
61
  end
57
62
  end
58
63
 
data/lib/picky/search.rb CHANGED
@@ -44,6 +44,7 @@ module Picky
44
44
 
45
45
  @tokenizer ||= Tokenizer.query_default # THINK Not dynamic. Ok?
46
46
  @weights ||= Query::Weights.new
47
+ @ignore_unassigned = false if @ignore_unassigned.nil?
47
48
 
48
49
  self
49
50
  end
@@ -99,6 +100,33 @@ module Picky
99
100
  end
100
101
  end
101
102
 
103
+ # Ignore the given token if it cannot be matched to a category.
104
+ # The default behaviour is that if a token does not match to
105
+ # any category, the query will not return anything (since a
106
+ # single token cannot be matched). If you set this option to
107
+ # true, any token that cannot be matched to a category will be
108
+ # simply ignored.
109
+ #
110
+ # Use this if only a few matched words are important, like for
111
+ # example of the query "Jonathan Myers 86455 Las Cucarachas"
112
+ # you only want to match the zipcode, to have the search engine
113
+ # display advertisements on the side for the zipcode.
114
+ #
115
+ # False by default.
116
+ #
117
+ # Example:
118
+ # search = Search.new(books_index, dvd_index, mp3_index) do
119
+ # ignore_unassigned_tokens true
120
+ # end
121
+ #
122
+ # With this set to true, if in "Peter Flunder", "Flunder"
123
+ # couldn't be assigned to any category, it will simply be
124
+ # ignored. This is done for each categorization.
125
+ #
126
+ def ignore_unassigned_tokens value
127
+ @ignore_unassigned = value
128
+ end
129
+
102
130
  # This is the main entry point for a query.
103
131
  # Use this in specs and also for running queries.
104
132
  #
@@ -146,7 +174,7 @@ module Picky
146
174
  #
147
175
  def tokenized text
148
176
  tokens, originals = tokenizer.tokenize text
149
- tokens = Query::Tokens.processed tokens, originals || tokens
177
+ tokens = Query::Tokens.processed tokens, originals || tokens, @ignore_unassigned
150
178
  tokens.partialize_last # Note: In the standard Picky search, the last token is always partial.
151
179
  tokens
152
180
  end
@@ -1,24 +1,6 @@
1
1
  require 'spec_helper'
2
2
 
3
3
  describe Picky::Categories do
4
- context 'with option ignore_unassigned_tokens' do
5
- context 'ignore_unassigned_tokens true' do
6
- before(:each) do
7
- @categories = described_class.new ignore_unassigned_tokens: true
8
- end
9
- it 'should return the right value' do
10
- @categories.ignore_unassigned_tokens.should == true
11
- end
12
- end
13
- context 'ignore_unassigned_tokens false' do
14
- before(:each) do
15
- @categories = described_class.new ignore_unassigned_tokens: false
16
- end
17
- it 'should return the right value' do
18
- @categories.ignore_unassigned_tokens.should == false
19
- end
20
- end
21
- end
22
4
 
23
5
  context "with real categories" do
24
6
  before(:each) do
@@ -2,6 +2,53 @@ require 'spec_helper'
2
2
 
3
3
  describe Picky::Query::Tokens do
4
4
 
5
+ context 'with ignore_unassigned_tokens true' do
6
+ it 'generates processed tokens from all words' do
7
+ expected = [
8
+ Picky::Query::Token.processed('this~'),
9
+ Picky::Query::Token.processed('is'),
10
+ Picky::Query::Token.processed('a'),
11
+ Picky::Query::Token.processed('sp:solr'),
12
+ Picky::Query::Token.processed('query"')
13
+ ]
14
+
15
+ described_class.should_receive(:new).once.with expected, true
16
+
17
+ described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], [], true
18
+ end
19
+
20
+ describe 'possible_combinations_in' do
21
+ before(:each) do
22
+ @token1 = stub :token1
23
+ @token2 = stub :token2
24
+ @token3 = stub :token3
25
+
26
+ @tokens = described_class.new [@token1, @token2, @token3], true
27
+ end
28
+ it 'should work correctly' do
29
+ @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
30
+ @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination21]
31
+ @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
32
+
33
+ @tokens.possible_combinations_in(:some_index).should == [
34
+ [:combination11, :combination12],
35
+ [:combination21],
36
+ [:combination31, :combination32, :combination33]
37
+ ]
38
+ end
39
+ it 'should work correctly' do
40
+ @token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
41
+ @token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return []
42
+ @token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
43
+
44
+ @tokens.possible_combinations_in(:some_index).should == [
45
+ [:combination11, :combination12],
46
+ [:combination31, :combination32, :combination33]
47
+ ]
48
+ end
49
+ end
50
+ end
51
+
5
52
  describe '.processed' do
6
53
  it 'generates processed tokens from all words' do
7
54
  expected = [
@@ -12,7 +59,7 @@ describe Picky::Query::Tokens do
12
59
  Picky::Query::Token.processed('query"')
13
60
  ]
14
61
 
15
- described_class.should_receive(:new).once.with expected
62
+ described_class.should_receive(:new).once.with expected, false
16
63
 
17
64
  described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
18
65
  end
@@ -25,7 +72,7 @@ describe Picky::Query::Tokens do
25
72
  Picky::Query::Token.processed('query"')
26
73
  ]
27
74
 
28
- described_class.should_receive(:new).once.with expected
75
+ described_class.should_receive(:new).once.with expected, false
29
76
 
30
77
  described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
31
78
  end
@@ -114,6 +161,7 @@ describe Picky::Query::Tokens do
114
161
 
115
162
  @tokens.possible_combinations_in(:some_index).should == [
116
163
  [:combination11, :combination12],
164
+ nil,
117
165
  [:combination31, :combination32, :combination33]
118
166
  ]
119
167
  end
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: picky
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 3.1.4
5
+ version: 3.1.5
6
6
  platform: ruby
7
7
  authors:
8
8
  - Florian Hanke
@@ -10,7 +10,7 @@ autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
12
 
13
- date: 2011-09-02 00:00:00 +02:00
13
+ date: 2011-09-04 00:00:00 +02:00
14
14
  default_executable: picky
15
15
  dependencies:
16
16
  - !ruby/object:Gem::Dependency
@@ -32,7 +32,7 @@ dependencies:
32
32
  requirements:
33
33
  - - "="
34
34
  - !ruby/object:Gem::Version
35
- version: 3.1.4
35
+ version: 3.1.5
36
36
  type: :development
37
37
  version_requirements: *id002
38
38
  description: Fast Ruby semantic text search engine with comfortable single field interface.