picky 3.1.4 → 3.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/categories.rb +0 -15
- data/lib/picky/categories_indexed.rb +0 -9
- data/lib/picky/index.rb +4 -7
- data/lib/picky/query/tokens.rb +12 -7
- data/lib/picky/search.rb +29 -1
- data/spec/lib/categories_indexed_spec.rb +0 -18
- data/spec/lib/query/tokens_spec.rb +50 -2
- metadata +3 -3
data/lib/picky/categories.rb
CHANGED
@@ -15,23 +15,8 @@ module Picky
|
|
15
15
|
|
16
16
|
# A list of indexed categories.
|
17
17
|
#
|
18
|
-
# Options:
|
19
|
-
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
20
|
-
# The default behaviour is that if a token does not match to
|
21
|
-
# any category, the query will not return anything (since a
|
22
|
-
# single token cannot be matched). If you set this option to
|
23
|
-
# true, any token that cannot be matched to a category will be
|
24
|
-
# simply ignored.
|
25
|
-
# Use this if only a few matched words are important, like for
|
26
|
-
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
27
|
-
# you only want to match the zipcode, to have the search engine
|
28
|
-
# display advertisements on the side for the zipcode.
|
29
|
-
# Nifty! :)
|
30
|
-
#
|
31
18
|
def initialize options = {}
|
32
19
|
clear_categories
|
33
|
-
|
34
|
-
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
35
20
|
end
|
36
21
|
|
37
22
|
# Clears both the array of categories and the hash of categories.
|
@@ -2,8 +2,6 @@ module Picky
|
|
2
2
|
|
3
3
|
class Categories
|
4
4
|
|
5
|
-
attr_reader :ignore_unassigned_tokens
|
6
|
-
|
7
5
|
each_delegate :load_from_cache,
|
8
6
|
:analyze,
|
9
7
|
:to => :categories
|
@@ -56,20 +54,13 @@ module Picky
|
|
56
54
|
# Returns possible Combinations for the token.
|
57
55
|
#
|
58
56
|
# Note: The preselected_categories param is an optimization.
|
59
|
-
#
|
60
57
|
# Note: Returns [] if no categories matched (will produce no result).
|
61
|
-
# Returns nil if this token needs to be removed from the query.
|
62
|
-
# (Also none of the categories matched, but the ignore unassigned
|
63
|
-
# tokens option is true)
|
64
58
|
#
|
65
59
|
def possible_for token, preselected_categories = nil
|
66
60
|
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
67
61
|
combination = category.combination_for token
|
68
62
|
combination ? combinations << combination : combinations
|
69
63
|
end
|
70
|
-
# This is an optimization to mark tokens that are ignored.
|
71
|
-
#
|
72
|
-
return if ignore_unassigned_tokens && possible.empty?
|
73
64
|
possible
|
74
65
|
end
|
75
66
|
|
data/lib/picky/index.rb
CHANGED
@@ -98,7 +98,7 @@ module Picky
|
|
98
98
|
# === Parameters
|
99
99
|
# * name: A name that will be used for the index directory and in the Picky front end.
|
100
100
|
#
|
101
|
-
# === Options (all are used in the block, see examples)
|
101
|
+
# === Options (all are used in the block - not passed as a Hash, see examples)
|
102
102
|
# * source: Where the data comes from, e.g. Sources::CSV.new(...). Optional, can be defined in the block using #source.
|
103
103
|
# * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
|
104
104
|
# * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
|
@@ -113,12 +113,9 @@ module Picky
|
|
113
113
|
# result_identifier :my_special_results
|
114
114
|
# end
|
115
115
|
#
|
116
|
-
def initialize name
|
117
|
-
@name
|
118
|
-
|
119
|
-
# TODO Move ignore_unassigned_tokens to query, somehow. Then, remove options.
|
120
|
-
#
|
121
|
-
@categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
|
116
|
+
def initialize name
|
117
|
+
@name = name.to_sym
|
118
|
+
@categories = Categories.new
|
122
119
|
|
123
120
|
# Centralized registry.
|
124
121
|
#
|
data/lib/picky/query/tokens.rb
CHANGED
@@ -8,20 +8,23 @@ module Picky
|
|
8
8
|
#
|
9
9
|
class Tokens # :nodoc:all
|
10
10
|
|
11
|
+
attr_reader :ignore_unassigned
|
12
|
+
|
11
13
|
# Basically delegates to its internal tokens array.
|
12
14
|
#
|
13
15
|
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
14
16
|
|
15
17
|
# Create a new Tokens object with the array of tokens passed in.
|
16
18
|
#
|
17
|
-
def initialize tokens =
|
18
|
-
@tokens
|
19
|
+
def initialize tokens, ignore_unassigned = false
|
20
|
+
@tokens = tokens
|
21
|
+
@ignore_unassigned = ignore_unassigned
|
19
22
|
end
|
20
23
|
|
21
24
|
# Creates a new Tokens object from a number of Strings.
|
22
25
|
#
|
23
|
-
def self.processed words, originals
|
24
|
-
new words.zip(originals).collect! { |word, original| Token.processed word, original }
|
26
|
+
def self.processed words, originals, ignore_unassigned = false
|
27
|
+
new words.zip(originals).collect! { |word, original| Token.processed word, original }, ignore_unassigned
|
25
28
|
end
|
26
29
|
|
27
30
|
# Tokenizes each token.
|
@@ -45,14 +48,16 @@ module Picky
|
|
45
48
|
@tokens.inject([]) do |combinations, token|
|
46
49
|
possible_combinations = token.possible_combinations_in index
|
47
50
|
|
48
|
-
# TODO Could move the ignore_unassigned_tokens here!
|
49
|
-
#
|
50
51
|
# Note: Optimization for ignoring tokens that allocate to nothing and
|
51
52
|
# can be ignored.
|
52
53
|
# For example in a special search, where "florian" is not
|
53
54
|
# mapped to any category.
|
54
55
|
#
|
55
|
-
|
56
|
+
if ignore_unassigned && possible_combinations.empty?
|
57
|
+
combinations
|
58
|
+
else
|
59
|
+
combinations << possible_combinations
|
60
|
+
end
|
56
61
|
end
|
57
62
|
end
|
58
63
|
|
data/lib/picky/search.rb
CHANGED
@@ -44,6 +44,7 @@ module Picky
|
|
44
44
|
|
45
45
|
@tokenizer ||= Tokenizer.query_default # THINK Not dynamic. Ok?
|
46
46
|
@weights ||= Query::Weights.new
|
47
|
+
@ignore_unassigned = false if @ignore_unassigned.nil?
|
47
48
|
|
48
49
|
self
|
49
50
|
end
|
@@ -99,6 +100,33 @@ module Picky
|
|
99
100
|
end
|
100
101
|
end
|
101
102
|
|
103
|
+
# Ignore the given token if it cannot be matched to a category.
|
104
|
+
# The default behaviour is that if a token does not match to
|
105
|
+
# any category, the query will not return anything (since a
|
106
|
+
# single token cannot be matched). If you set this option to
|
107
|
+
# true, any token that cannot be matched to a category will be
|
108
|
+
# simply ignored.
|
109
|
+
#
|
110
|
+
# Use this if only a few matched words are important, like for
|
111
|
+
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
112
|
+
# you only want to match the zipcode, to have the search engine
|
113
|
+
# display advertisements on the side for the zipcode.
|
114
|
+
#
|
115
|
+
# False by default.
|
116
|
+
#
|
117
|
+
# Example:
|
118
|
+
# search = Search.new(books_index, dvd_index, mp3_index) do
|
119
|
+
# ignore_unassigned_tokens true
|
120
|
+
# end
|
121
|
+
#
|
122
|
+
# With this set to true, if in "Peter Flunder", "Flunder"
|
123
|
+
# couldn't be assigned to any category, it will simply be
|
124
|
+
# ignored. This is done for each categorization.
|
125
|
+
#
|
126
|
+
def ignore_unassigned_tokens value
|
127
|
+
@ignore_unassigned = value
|
128
|
+
end
|
129
|
+
|
102
130
|
# This is the main entry point for a query.
|
103
131
|
# Use this in specs and also for running queries.
|
104
132
|
#
|
@@ -146,7 +174,7 @@ module Picky
|
|
146
174
|
#
|
147
175
|
def tokenized text
|
148
176
|
tokens, originals = tokenizer.tokenize text
|
149
|
-
tokens = Query::Tokens.processed tokens, originals || tokens
|
177
|
+
tokens = Query::Tokens.processed tokens, originals || tokens, @ignore_unassigned
|
150
178
|
tokens.partialize_last # Note: In the standard Picky search, the last token is always partial.
|
151
179
|
tokens
|
152
180
|
end
|
@@ -1,24 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Picky::Categories do
|
4
|
-
context 'with option ignore_unassigned_tokens' do
|
5
|
-
context 'ignore_unassigned_tokens true' do
|
6
|
-
before(:each) do
|
7
|
-
@categories = described_class.new ignore_unassigned_tokens: true
|
8
|
-
end
|
9
|
-
it 'should return the right value' do
|
10
|
-
@categories.ignore_unassigned_tokens.should == true
|
11
|
-
end
|
12
|
-
end
|
13
|
-
context 'ignore_unassigned_tokens false' do
|
14
|
-
before(:each) do
|
15
|
-
@categories = described_class.new ignore_unassigned_tokens: false
|
16
|
-
end
|
17
|
-
it 'should return the right value' do
|
18
|
-
@categories.ignore_unassigned_tokens.should == false
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
4
|
|
23
5
|
context "with real categories" do
|
24
6
|
before(:each) do
|
@@ -2,6 +2,53 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Picky::Query::Tokens do
|
4
4
|
|
5
|
+
context 'with ignore_unassigned_tokens true' do
|
6
|
+
it 'generates processed tokens from all words' do
|
7
|
+
expected = [
|
8
|
+
Picky::Query::Token.processed('this~'),
|
9
|
+
Picky::Query::Token.processed('is'),
|
10
|
+
Picky::Query::Token.processed('a'),
|
11
|
+
Picky::Query::Token.processed('sp:solr'),
|
12
|
+
Picky::Query::Token.processed('query"')
|
13
|
+
]
|
14
|
+
|
15
|
+
described_class.should_receive(:new).once.with expected, true
|
16
|
+
|
17
|
+
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], [], true
|
18
|
+
end
|
19
|
+
|
20
|
+
describe 'possible_combinations_in' do
|
21
|
+
before(:each) do
|
22
|
+
@token1 = stub :token1
|
23
|
+
@token2 = stub :token2
|
24
|
+
@token3 = stub :token3
|
25
|
+
|
26
|
+
@tokens = described_class.new [@token1, @token2, @token3], true
|
27
|
+
end
|
28
|
+
it 'should work correctly' do
|
29
|
+
@token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
|
30
|
+
@token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination21]
|
31
|
+
@token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
|
32
|
+
|
33
|
+
@tokens.possible_combinations_in(:some_index).should == [
|
34
|
+
[:combination11, :combination12],
|
35
|
+
[:combination21],
|
36
|
+
[:combination31, :combination32, :combination33]
|
37
|
+
]
|
38
|
+
end
|
39
|
+
it 'should work correctly' do
|
40
|
+
@token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
|
41
|
+
@token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return []
|
42
|
+
@token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
|
43
|
+
|
44
|
+
@tokens.possible_combinations_in(:some_index).should == [
|
45
|
+
[:combination11, :combination12],
|
46
|
+
[:combination31, :combination32, :combination33]
|
47
|
+
]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
5
52
|
describe '.processed' do
|
6
53
|
it 'generates processed tokens from all words' do
|
7
54
|
expected = [
|
@@ -12,7 +59,7 @@ describe Picky::Query::Tokens do
|
|
12
59
|
Picky::Query::Token.processed('query"')
|
13
60
|
]
|
14
61
|
|
15
|
-
described_class.should_receive(:new).once.with expected
|
62
|
+
described_class.should_receive(:new).once.with expected, false
|
16
63
|
|
17
64
|
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
|
18
65
|
end
|
@@ -25,7 +72,7 @@ describe Picky::Query::Tokens do
|
|
25
72
|
Picky::Query::Token.processed('query"')
|
26
73
|
]
|
27
74
|
|
28
|
-
described_class.should_receive(:new).once.with expected
|
75
|
+
described_class.should_receive(:new).once.with expected, false
|
29
76
|
|
30
77
|
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
|
31
78
|
end
|
@@ -114,6 +161,7 @@ describe Picky::Query::Tokens do
|
|
114
161
|
|
115
162
|
@tokens.possible_combinations_in(:some_index).should == [
|
116
163
|
[:combination11, :combination12],
|
164
|
+
nil,
|
117
165
|
[:combination31, :combination32, :combination33]
|
118
166
|
]
|
119
167
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 3.1.
|
5
|
+
version: 3.1.5
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Florian Hanke
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-09-
|
13
|
+
date: 2011-09-04 00:00:00 +02:00
|
14
14
|
default_executable: picky
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
requirements:
|
33
33
|
- - "="
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version: 3.1.
|
35
|
+
version: 3.1.5
|
36
36
|
type: :development
|
37
37
|
version_requirements: *id002
|
38
38
|
description: Fast Ruby semantic text search engine with comfortable single field interface.
|