picky 3.1.4 → 3.1.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/categories.rb +0 -15
- data/lib/picky/categories_indexed.rb +0 -9
- data/lib/picky/index.rb +4 -7
- data/lib/picky/query/tokens.rb +12 -7
- data/lib/picky/search.rb +29 -1
- data/spec/lib/categories_indexed_spec.rb +0 -18
- data/spec/lib/query/tokens_spec.rb +50 -2
- metadata +3 -3
data/lib/picky/categories.rb
CHANGED
@@ -15,23 +15,8 @@ module Picky
|
|
15
15
|
|
16
16
|
# A list of indexed categories.
|
17
17
|
#
|
18
|
-
# Options:
|
19
|
-
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
20
|
-
# The default behaviour is that if a token does not match to
|
21
|
-
# any category, the query will not return anything (since a
|
22
|
-
# single token cannot be matched). If you set this option to
|
23
|
-
# true, any token that cannot be matched to a category will be
|
24
|
-
# simply ignored.
|
25
|
-
# Use this if only a few matched words are important, like for
|
26
|
-
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
27
|
-
# you only want to match the zipcode, to have the search engine
|
28
|
-
# display advertisements on the side for the zipcode.
|
29
|
-
# Nifty! :)
|
30
|
-
#
|
31
18
|
def initialize options = {}
|
32
19
|
clear_categories
|
33
|
-
|
34
|
-
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
35
20
|
end
|
36
21
|
|
37
22
|
# Clears both the array of categories and the hash of categories.
|
@@ -2,8 +2,6 @@ module Picky
|
|
2
2
|
|
3
3
|
class Categories
|
4
4
|
|
5
|
-
attr_reader :ignore_unassigned_tokens
|
6
|
-
|
7
5
|
each_delegate :load_from_cache,
|
8
6
|
:analyze,
|
9
7
|
:to => :categories
|
@@ -56,20 +54,13 @@ module Picky
|
|
56
54
|
# Returns possible Combinations for the token.
|
57
55
|
#
|
58
56
|
# Note: The preselected_categories param is an optimization.
|
59
|
-
#
|
60
57
|
# Note: Returns [] if no categories matched (will produce no result).
|
61
|
-
# Returns nil if this token needs to be removed from the query.
|
62
|
-
# (Also none of the categories matched, but the ignore unassigned
|
63
|
-
# tokens option is true)
|
64
58
|
#
|
65
59
|
def possible_for token, preselected_categories = nil
|
66
60
|
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
67
61
|
combination = category.combination_for token
|
68
62
|
combination ? combinations << combination : combinations
|
69
63
|
end
|
70
|
-
# This is an optimization to mark tokens that are ignored.
|
71
|
-
#
|
72
|
-
return if ignore_unassigned_tokens && possible.empty?
|
73
64
|
possible
|
74
65
|
end
|
75
66
|
|
data/lib/picky/index.rb
CHANGED
@@ -98,7 +98,7 @@ module Picky
|
|
98
98
|
# === Parameters
|
99
99
|
# * name: A name that will be used for the index directory and in the Picky front end.
|
100
100
|
#
|
101
|
-
# === Options (all are used in the block, see examples)
|
101
|
+
# === Options (all are used in the block - not passed as a Hash, see examples)
|
102
102
|
# * source: Where the data comes from, e.g. Sources::CSV.new(...). Optional, can be defined in the block using #source.
|
103
103
|
# * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
|
104
104
|
# * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
|
@@ -113,12 +113,9 @@ module Picky
|
|
113
113
|
# result_identifier :my_special_results
|
114
114
|
# end
|
115
115
|
#
|
116
|
-
def initialize name
|
117
|
-
@name
|
118
|
-
|
119
|
-
# TODO Move ignore_unassigned_tokens to query, somehow. Then, remove options.
|
120
|
-
#
|
121
|
-
@categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
|
116
|
+
def initialize name
|
117
|
+
@name = name.to_sym
|
118
|
+
@categories = Categories.new
|
122
119
|
|
123
120
|
# Centralized registry.
|
124
121
|
#
|
data/lib/picky/query/tokens.rb
CHANGED
@@ -8,20 +8,23 @@ module Picky
|
|
8
8
|
#
|
9
9
|
class Tokens # :nodoc:all
|
10
10
|
|
11
|
+
attr_reader :ignore_unassigned
|
12
|
+
|
11
13
|
# Basically delegates to its internal tokens array.
|
12
14
|
#
|
13
15
|
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
14
16
|
|
15
17
|
# Create a new Tokens object with the array of tokens passed in.
|
16
18
|
#
|
17
|
-
def initialize tokens =
|
18
|
-
@tokens
|
19
|
+
def initialize tokens, ignore_unassigned = false
|
20
|
+
@tokens = tokens
|
21
|
+
@ignore_unassigned = ignore_unassigned
|
19
22
|
end
|
20
23
|
|
21
24
|
# Creates a new Tokens object from a number of Strings.
|
22
25
|
#
|
23
|
-
def self.processed words, originals
|
24
|
-
new words.zip(originals).collect! { |word, original| Token.processed word, original }
|
26
|
+
def self.processed words, originals, ignore_unassigned = false
|
27
|
+
new words.zip(originals).collect! { |word, original| Token.processed word, original }, ignore_unassigned
|
25
28
|
end
|
26
29
|
|
27
30
|
# Tokenizes each token.
|
@@ -45,14 +48,16 @@ module Picky
|
|
45
48
|
@tokens.inject([]) do |combinations, token|
|
46
49
|
possible_combinations = token.possible_combinations_in index
|
47
50
|
|
48
|
-
# TODO Could move the ignore_unassigned_tokens here!
|
49
|
-
#
|
50
51
|
# Note: Optimization for ignoring tokens that allocate to nothing and
|
51
52
|
# can be ignored.
|
52
53
|
# For example in a special search, where "florian" is not
|
53
54
|
# mapped to any category.
|
54
55
|
#
|
55
|
-
|
56
|
+
if ignore_unassigned && possible_combinations.empty?
|
57
|
+
combinations
|
58
|
+
else
|
59
|
+
combinations << possible_combinations
|
60
|
+
end
|
56
61
|
end
|
57
62
|
end
|
58
63
|
|
data/lib/picky/search.rb
CHANGED
@@ -44,6 +44,7 @@ module Picky
|
|
44
44
|
|
45
45
|
@tokenizer ||= Tokenizer.query_default # THINK Not dynamic. Ok?
|
46
46
|
@weights ||= Query::Weights.new
|
47
|
+
@ignore_unassigned = false if @ignore_unassigned.nil?
|
47
48
|
|
48
49
|
self
|
49
50
|
end
|
@@ -99,6 +100,33 @@ module Picky
|
|
99
100
|
end
|
100
101
|
end
|
101
102
|
|
103
|
+
# Ignore the given token if it cannot be matched to a category.
|
104
|
+
# The default behaviour is that if a token does not match to
|
105
|
+
# any category, the query will not return anything (since a
|
106
|
+
# single token cannot be matched). If you set this option to
|
107
|
+
# true, any token that cannot be matched to a category will be
|
108
|
+
# simply ignored.
|
109
|
+
#
|
110
|
+
# Use this if only a few matched words are important, like for
|
111
|
+
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
112
|
+
# you only want to match the zipcode, to have the search engine
|
113
|
+
# display advertisements on the side for the zipcode.
|
114
|
+
#
|
115
|
+
# False by default.
|
116
|
+
#
|
117
|
+
# Example:
|
118
|
+
# search = Search.new(books_index, dvd_index, mp3_index) do
|
119
|
+
# ignore_unassigned_tokens true
|
120
|
+
# end
|
121
|
+
#
|
122
|
+
# With this set to true, if in "Peter Flunder", "Flunder"
|
123
|
+
# couldn't be assigned to any category, it will simply be
|
124
|
+
# ignored. This is done for each categorization.
|
125
|
+
#
|
126
|
+
def ignore_unassigned_tokens value
|
127
|
+
@ignore_unassigned = value
|
128
|
+
end
|
129
|
+
|
102
130
|
# This is the main entry point for a query.
|
103
131
|
# Use this in specs and also for running queries.
|
104
132
|
#
|
@@ -146,7 +174,7 @@ module Picky
|
|
146
174
|
#
|
147
175
|
def tokenized text
|
148
176
|
tokens, originals = tokenizer.tokenize text
|
149
|
-
tokens = Query::Tokens.processed tokens, originals || tokens
|
177
|
+
tokens = Query::Tokens.processed tokens, originals || tokens, @ignore_unassigned
|
150
178
|
tokens.partialize_last # Note: In the standard Picky search, the last token is always partial.
|
151
179
|
tokens
|
152
180
|
end
|
@@ -1,24 +1,6 @@
|
|
1
1
|
require 'spec_helper'
|
2
2
|
|
3
3
|
describe Picky::Categories do
|
4
|
-
context 'with option ignore_unassigned_tokens' do
|
5
|
-
context 'ignore_unassigned_tokens true' do
|
6
|
-
before(:each) do
|
7
|
-
@categories = described_class.new ignore_unassigned_tokens: true
|
8
|
-
end
|
9
|
-
it 'should return the right value' do
|
10
|
-
@categories.ignore_unassigned_tokens.should == true
|
11
|
-
end
|
12
|
-
end
|
13
|
-
context 'ignore_unassigned_tokens false' do
|
14
|
-
before(:each) do
|
15
|
-
@categories = described_class.new ignore_unassigned_tokens: false
|
16
|
-
end
|
17
|
-
it 'should return the right value' do
|
18
|
-
@categories.ignore_unassigned_tokens.should == false
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
4
|
|
23
5
|
context "with real categories" do
|
24
6
|
before(:each) do
|
@@ -2,6 +2,53 @@ require 'spec_helper'
|
|
2
2
|
|
3
3
|
describe Picky::Query::Tokens do
|
4
4
|
|
5
|
+
context 'with ignore_unassigned_tokens true' do
|
6
|
+
it 'generates processed tokens from all words' do
|
7
|
+
expected = [
|
8
|
+
Picky::Query::Token.processed('this~'),
|
9
|
+
Picky::Query::Token.processed('is'),
|
10
|
+
Picky::Query::Token.processed('a'),
|
11
|
+
Picky::Query::Token.processed('sp:solr'),
|
12
|
+
Picky::Query::Token.processed('query"')
|
13
|
+
]
|
14
|
+
|
15
|
+
described_class.should_receive(:new).once.with expected, true
|
16
|
+
|
17
|
+
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], [], true
|
18
|
+
end
|
19
|
+
|
20
|
+
describe 'possible_combinations_in' do
|
21
|
+
before(:each) do
|
22
|
+
@token1 = stub :token1
|
23
|
+
@token2 = stub :token2
|
24
|
+
@token3 = stub :token3
|
25
|
+
|
26
|
+
@tokens = described_class.new [@token1, @token2, @token3], true
|
27
|
+
end
|
28
|
+
it 'should work correctly' do
|
29
|
+
@token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
|
30
|
+
@token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination21]
|
31
|
+
@token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
|
32
|
+
|
33
|
+
@tokens.possible_combinations_in(:some_index).should == [
|
34
|
+
[:combination11, :combination12],
|
35
|
+
[:combination21],
|
36
|
+
[:combination31, :combination32, :combination33]
|
37
|
+
]
|
38
|
+
end
|
39
|
+
it 'should work correctly' do
|
40
|
+
@token1.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination11, :combination12]
|
41
|
+
@token2.should_receive(:possible_combinations_in).once.with(:some_index).and_return []
|
42
|
+
@token3.should_receive(:possible_combinations_in).once.with(:some_index).and_return [:combination31, :combination32, :combination33]
|
43
|
+
|
44
|
+
@tokens.possible_combinations_in(:some_index).should == [
|
45
|
+
[:combination11, :combination12],
|
46
|
+
[:combination31, :combination32, :combination33]
|
47
|
+
]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
5
52
|
describe '.processed' do
|
6
53
|
it 'generates processed tokens from all words' do
|
7
54
|
expected = [
|
@@ -12,7 +59,7 @@ describe Picky::Query::Tokens do
|
|
12
59
|
Picky::Query::Token.processed('query"')
|
13
60
|
]
|
14
61
|
|
15
|
-
described_class.should_receive(:new).once.with expected
|
62
|
+
described_class.should_receive(:new).once.with expected, false
|
16
63
|
|
17
64
|
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
|
18
65
|
end
|
@@ -25,7 +72,7 @@ describe Picky::Query::Tokens do
|
|
25
72
|
Picky::Query::Token.processed('query"')
|
26
73
|
]
|
27
74
|
|
28
|
-
described_class.should_receive(:new).once.with expected
|
75
|
+
described_class.should_receive(:new).once.with expected, false
|
29
76
|
|
30
77
|
described_class.processed ['this~', 'is', 'a', 'sp:solr', 'query"'], []
|
31
78
|
end
|
@@ -114,6 +161,7 @@ describe Picky::Query::Tokens do
|
|
114
161
|
|
115
162
|
@tokens.possible_combinations_in(:some_index).should == [
|
116
163
|
[:combination11, :combination12],
|
164
|
+
nil,
|
117
165
|
[:combination31, :combination32, :combination33]
|
118
166
|
]
|
119
167
|
end
|
metadata
CHANGED
@@ -2,7 +2,7 @@
|
|
2
2
|
name: picky
|
3
3
|
version: !ruby/object:Gem::Version
|
4
4
|
prerelease:
|
5
|
-
version: 3.1.
|
5
|
+
version: 3.1.5
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Florian Hanke
|
@@ -10,7 +10,7 @@ autorequire:
|
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
12
|
|
13
|
-
date: 2011-09-
|
13
|
+
date: 2011-09-04 00:00:00 +02:00
|
14
14
|
default_executable: picky
|
15
15
|
dependencies:
|
16
16
|
- !ruby/object:Gem::Dependency
|
@@ -32,7 +32,7 @@ dependencies:
|
|
32
32
|
requirements:
|
33
33
|
- - "="
|
34
34
|
- !ruby/object:Gem::Version
|
35
|
-
version: 3.1.
|
35
|
+
version: 3.1.5
|
36
36
|
type: :development
|
37
37
|
version_requirements: *id002
|
38
38
|
description: Fast Ruby semantic text search engine with comfortable single field interface.
|