picky 4.0.0pre6 → 4.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/api/category/partial.rb +26 -0
- data/lib/picky/api/category/similarity.rb +26 -0
- data/lib/picky/api/category/weight.rb +26 -0
- data/lib/picky/api/search/boost.rb +28 -0
- data/lib/picky/api/source.rb +35 -0
- data/lib/picky/api/tokenizer/character_substituter.rb +22 -0
- data/lib/picky/api/tokenizer.rb +37 -0
- data/lib/picky/bundle.rb +1 -1
- data/lib/picky/bundle_realtime.rb +2 -2
- data/lib/picky/category.rb +12 -6
- data/lib/picky/category_indexing.rb +2 -8
- data/lib/picky/generators/similarity/double_metaphone.rb +1 -1
- data/lib/picky/generators/similarity/metaphone.rb +1 -1
- data/lib/picky/generators/similarity/none.rb +1 -1
- data/lib/picky/generators/similarity/soundex.rb +1 -1
- data/lib/picky/index_indexing.rb +5 -25
- data/lib/picky/loader.rb +15 -5
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/{weights.rb → boosts.rb} +17 -17
- data/lib/picky/query/combinations.rb +2 -2
- data/lib/picky/search.rb +17 -19
- data/lib/picky/tokenizer.rb +7 -4
- data/spec/lib/api/category/partial_spec.rb +49 -0
- data/spec/lib/api/category/similarity_spec.rb +50 -0
- data/spec/lib/api/category/weight_spec.rb +47 -0
- data/spec/lib/api/search/boost_spec.rb +44 -0
- data/spec/lib/api/source_spec.rb +68 -0
- data/spec/lib/api/tokenizer/character_substituter_spec.rb +34 -0
- data/spec/lib/api/tokenizer_spec.rb +42 -0
- data/spec/lib/category_indexed_spec.rb +2 -2
- data/spec/lib/category_indexing_spec.rb +11 -24
- data/spec/lib/category_spec.rb +48 -11
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/index_indexing_spec.rb +10 -14
- data/spec/lib/index_spec.rb +1 -1
- data/spec/lib/query/allocation_spec.rb +2 -2
- data/spec/lib/query/boosts_spec.rb +79 -0
- data/spec/lib/query/combinations_spec.rb +3 -3
- data/spec/lib/search_spec.rb +13 -13
- data/spec/lib/tokenizer_spec.rb +12 -8
- metadata +44 -23
- data/spec/lib/query/weights_spec.rb +0 -81
@@ -0,0 +1,26 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Category
|
4
|
+
|
5
|
+
module Partial
|
6
|
+
|
7
|
+
def extract_partial thing
|
8
|
+
return Generators::Partial::Default unless thing
|
9
|
+
|
10
|
+
if thing.respond_to? :each_partial
|
11
|
+
thing
|
12
|
+
else
|
13
|
+
raise <<-ERROR
|
14
|
+
partial options for #{index_name}:#{name} should be either
|
15
|
+
* for example a Partial::Substring.new(from: m, to: n), Partial::Postfix.new(from: n), Partial::Infix.new(min: m, max: n) etc.
|
16
|
+
or
|
17
|
+
* an object that responds to #each_partial(str_or_sym) and yields each partial
|
18
|
+
ERROR
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Category
|
4
|
+
|
5
|
+
module Similarity
|
6
|
+
|
7
|
+
def extract_similarity thing
|
8
|
+
return Generators::Similarity::Default unless thing
|
9
|
+
|
10
|
+
if thing.respond_to?(:encode) && thing.respond_to?(:prioritize)
|
11
|
+
thing
|
12
|
+
else
|
13
|
+
raise <<-ERROR
|
14
|
+
similarity options for #{index_name}:#{name} should be either
|
15
|
+
* for example a Similarity::Phonetic.new(n), Similarity::Metaphone.new(n), Similarity::DoubleMetaphone.new(n) etc.
|
16
|
+
or
|
17
|
+
* an object that responds to #encode(text) => encoded_text and #prioritize(array_of_encoded, encoded)
|
18
|
+
ERROR
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Category
|
4
|
+
|
5
|
+
module Weight
|
6
|
+
|
7
|
+
def extract_weight thing
|
8
|
+
return Generators::Weights::Default unless thing
|
9
|
+
|
10
|
+
if thing.respond_to? :weight_for
|
11
|
+
thing
|
12
|
+
else
|
13
|
+
raise <<-ERROR
|
14
|
+
weight options for #{index_name}:#{name} should be either
|
15
|
+
* for example a Weights::Logarithmic.new, Weights::Constant.new(int = 0), Weights::Dynamic.new(&block) etc.
|
16
|
+
or
|
17
|
+
* an object that responds to #weight_for(amount_of_ids_for_token) => float
|
18
|
+
ERROR
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Search
|
4
|
+
|
5
|
+
module Boost
|
6
|
+
|
7
|
+
def extract_boosts thing
|
8
|
+
if thing.respond_to?(:boost_for)
|
9
|
+
thing
|
10
|
+
else
|
11
|
+
if thing.respond_to?(:[])
|
12
|
+
Query::Boosts.new thing
|
13
|
+
else
|
14
|
+
raise <<-ERROR
|
15
|
+
boost options for a Search should be either
|
16
|
+
* for example a Hash { [:name, :surname] => +3 }
|
17
|
+
or
|
18
|
+
* an object that responds to #boost_for(combinations) and returns a boost float
|
19
|
+
ERROR
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Source
|
4
|
+
|
5
|
+
def extract_source thing, options = {}
|
6
|
+
if thing.respond_to?(:each) || thing.respond_to?(:call)
|
7
|
+
thing
|
8
|
+
else
|
9
|
+
return if options[:nil_ok]
|
10
|
+
if respond_to? :name
|
11
|
+
if @index
|
12
|
+
location = " #{@index.name}:#{name}"
|
13
|
+
else
|
14
|
+
location = " #{name}"
|
15
|
+
end
|
16
|
+
else
|
17
|
+
location = ''
|
18
|
+
end
|
19
|
+
raise ArgumentError.new(<<-ERROR)
|
20
|
+
The#{location} source should respond to either the method #each or
|
21
|
+
it can be a lambda/block, returning such a source.
|
22
|
+
ERROR
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get the actual source if it is wrapped in a time
|
27
|
+
# capsule, i.e. a block/lambda.
|
28
|
+
#
|
29
|
+
def unblock_source
|
30
|
+
@source.respond_to?(:call) ? @source.call : @source
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Tokenizer
|
4
|
+
|
5
|
+
module CharacterSubstituter
|
6
|
+
|
7
|
+
def extract_character_substituter thing
|
8
|
+
if thing.respond_to? :substitute
|
9
|
+
thing
|
10
|
+
else
|
11
|
+
raise ArgumentError.new <<-ERROR
|
12
|
+
The substitutes_characters_with option needs a character substituter,
|
13
|
+
which responds to #substitute(text) and returns substituted_text."
|
14
|
+
ERROR
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
|
4
|
+
module Tokenizer
|
5
|
+
|
6
|
+
def extract_tokenizer thing
|
7
|
+
return unless thing
|
8
|
+
if thing.respond_to? :tokenize
|
9
|
+
thing
|
10
|
+
else
|
11
|
+
if thing.respond_to? :[]
|
12
|
+
Picky::Tokenizer.new thing
|
13
|
+
else
|
14
|
+
if respond_to? :name
|
15
|
+
location = ' for '
|
16
|
+
if @index
|
17
|
+
location += "#{@index.name}:#{name}"
|
18
|
+
else
|
19
|
+
location += "#{name}"
|
20
|
+
end
|
21
|
+
else
|
22
|
+
location = ''
|
23
|
+
end
|
24
|
+
raise <<-ERROR
|
25
|
+
indexing options#{location} should be either
|
26
|
+
* a Hash
|
27
|
+
or
|
28
|
+
* an object that responds to #tokenize(text) => [[token1, ...], [original1, ...]]
|
29
|
+
ERROR
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
data/lib/picky/bundle.rb
CHANGED
@@ -126,7 +126,7 @@ module Picky
|
|
126
126
|
# Note: Does not return itself.
|
127
127
|
#
|
128
128
|
def similar text
|
129
|
-
code = similarity_strategy.
|
129
|
+
code = similarity_strategy.encode text
|
130
130
|
return [] unless code
|
131
131
|
similar_codes = @similarity[code]
|
132
132
|
if similar_codes.blank?
|
@@ -22,7 +22,7 @@ module Picky
|
|
22
22
|
# Since no element uses this sym anymore, we can delete the similarity for it.
|
23
23
|
# TODO Not really. Since multiple syms can point to the same encoded.
|
24
24
|
#
|
25
|
-
@similarity.delete self.similarity_strategy.
|
25
|
+
@similarity.delete self.similarity_strategy.encode(str_or_sym)
|
26
26
|
else
|
27
27
|
@weights[str_or_sym] = self.weight_strategy.weight_for ids.size
|
28
28
|
end
|
@@ -67,7 +67,7 @@ module Picky
|
|
67
67
|
# Add string/symbol to similarity index.
|
68
68
|
#
|
69
69
|
def add_similarity str_or_sym, where = :unshift
|
70
|
-
if encoded = self.similarity_strategy.
|
70
|
+
if encoded = self.similarity_strategy.encode(str_or_sym)
|
71
71
|
similars = @similarity[encoded] ||= []
|
72
72
|
|
73
73
|
# Not completely correct, as others will also be affected, but meh.
|
data/lib/picky/category.rb
CHANGED
@@ -2,6 +2,12 @@ module Picky
|
|
2
2
|
|
3
3
|
class Category
|
4
4
|
|
5
|
+
include API::Tokenizer
|
6
|
+
include API::Source
|
7
|
+
include API::Category::Weight
|
8
|
+
include API::Category::Partial
|
9
|
+
include API::Category::Similarity
|
10
|
+
|
5
11
|
attr_accessor :exact,
|
6
12
|
:partial
|
7
13
|
attr_reader :name,
|
@@ -33,9 +39,9 @@ module Picky
|
|
33
39
|
|
34
40
|
# Indexing.
|
35
41
|
#
|
36
|
-
@source
|
37
|
-
@from
|
38
|
-
@tokenizer
|
42
|
+
@source = extract_source options[:source], nil_ok: true
|
43
|
+
@from = options[:from]
|
44
|
+
@tokenizer = extract_tokenizer options[:indexing]
|
39
45
|
|
40
46
|
@key_format = options.delete :key_format
|
41
47
|
@backend = options.delete :backend
|
@@ -44,9 +50,9 @@ module Picky
|
|
44
50
|
|
45
51
|
# @symbols = options[:use_symbols] || index.use_symbols? # TODO Symbols.
|
46
52
|
|
47
|
-
weights = options[:weight]
|
48
|
-
partial = options[:partial]
|
49
|
-
similarity = options[:similarity]
|
53
|
+
weights = extract_weight options[:weight]
|
54
|
+
partial = extract_partial options[:partial]
|
55
|
+
similarity = extract_similarity options[:similarity]
|
50
56
|
|
51
57
|
no_partial = Generators::Partial::None.new
|
52
58
|
no_similarity = Generators::Similarity::None.new
|
@@ -78,13 +78,7 @@ module Picky
|
|
78
78
|
# If we have no explicit source, we'll check the index for one.
|
79
79
|
#
|
80
80
|
def source
|
81
|
-
extract_source || @index.source
|
82
|
-
end
|
83
|
-
# Extract the actual source if it is wrapped in a time
|
84
|
-
# capsule, i.e. a block/lambda.
|
85
|
-
#
|
86
|
-
def extract_source
|
87
|
-
@source = @source.respond_to?(:call) ? @source.call : @source
|
81
|
+
(@source = extract_source(@source, nil_ok: true)) || @index.source
|
88
82
|
end
|
89
83
|
|
90
84
|
# Return the key format.
|
@@ -96,7 +90,7 @@ module Picky
|
|
96
90
|
# Default is to_i.
|
97
91
|
#
|
98
92
|
def key_format
|
99
|
-
@key_format ||=
|
93
|
+
@key_format ||= @index.key_format || :to_i
|
100
94
|
end
|
101
95
|
|
102
96
|
# Where the data is taken from.
|
data/lib/picky/index_indexing.rb
CHANGED
@@ -4,6 +4,9 @@ module Picky
|
|
4
4
|
#
|
5
5
|
class Index
|
6
6
|
|
7
|
+
include API::Tokenizer
|
8
|
+
include API::Source
|
9
|
+
|
7
10
|
include Helpers::Indexing
|
8
11
|
|
9
12
|
# Delegators for indexing.
|
@@ -17,11 +20,7 @@ module Picky
|
|
17
20
|
# Parameters are the exact same as for indexing.
|
18
21
|
#
|
19
22
|
def indexing options = {}
|
20
|
-
@tokenizer =
|
21
|
-
options
|
22
|
-
else
|
23
|
-
options && Tokenizer.new(options)
|
24
|
-
end
|
23
|
+
@tokenizer = extract_tokenizer options
|
25
24
|
end
|
26
25
|
|
27
26
|
#
|
@@ -99,26 +98,7 @@ module Picky
|
|
99
98
|
#
|
100
99
|
def source some_source = nil, &block
|
101
100
|
some_source ||= block
|
102
|
-
some_source ? (
|
103
|
-
end
|
104
|
-
# Extract the actual source if it is wrapped in a time
|
105
|
-
# capsule, i.e. a block/lambda.
|
106
|
-
#
|
107
|
-
def extract_source
|
108
|
-
@source.respond_to?(:call) ? @source.call : @source
|
109
|
-
end
|
110
|
-
def check_source source # :nodoc:
|
111
|
-
raise ArgumentError.new(<<-SOURCE
|
112
|
-
|
113
|
-
|
114
|
-
The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text), OR it can be a lambda/block, returning such a source.
|
115
|
-
Or it could use one of the built-in sources:
|
116
|
-
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
117
|
-
Sources::')}
|
118
|
-
|
119
|
-
|
120
|
-
SOURCE
|
121
|
-
) unless source.respond_to?(:each) || source.respond_to?(:harvest) || source.respond_to?(:call)
|
101
|
+
some_source ? (@source = extract_source(some_source)) : unblock_source
|
122
102
|
end
|
123
103
|
|
124
104
|
# Define a key_format on the index.
|
data/lib/picky/loader.rb
CHANGED
@@ -165,10 +165,6 @@ module Picky
|
|
165
165
|
load_relative 'query/token'
|
166
166
|
load_relative 'query/tokens'
|
167
167
|
|
168
|
-
# Tokenizer.
|
169
|
-
#
|
170
|
-
load_relative 'tokenizer'
|
171
|
-
|
172
168
|
# Query combinations, qualifiers, weigher.
|
173
169
|
#
|
174
170
|
load_relative 'query/combination'
|
@@ -179,7 +175,7 @@ module Picky
|
|
179
175
|
|
180
176
|
load_relative 'query/qualifier_category_mapper'
|
181
177
|
|
182
|
-
load_relative 'query/
|
178
|
+
load_relative 'query/boosts'
|
183
179
|
|
184
180
|
load_relative 'query/indexes'
|
185
181
|
load_relative 'query/indexes_check'
|
@@ -187,6 +183,20 @@ module Picky
|
|
187
183
|
# Loads the user interface parts.
|
188
184
|
#
|
189
185
|
def load_user_interface
|
186
|
+
# Load API parts.
|
187
|
+
#
|
188
|
+
load_relative 'api/tokenizer'
|
189
|
+
load_relative 'api/tokenizer/character_substituter'
|
190
|
+
load_relative 'api/source'
|
191
|
+
load_relative 'api/category/weight'
|
192
|
+
load_relative 'api/category/partial'
|
193
|
+
load_relative 'api/category/similarity'
|
194
|
+
load_relative 'api/search/boost'
|
195
|
+
|
196
|
+
# Tokenizer.
|
197
|
+
#
|
198
|
+
load_relative 'tokenizer'
|
199
|
+
|
190
200
|
# Load harakiri.
|
191
201
|
#
|
192
202
|
load_relative 'rack/harakiri'
|
@@ -2,7 +2,7 @@ module Picky
|
|
2
2
|
|
3
3
|
module Query
|
4
4
|
|
5
|
-
# Calculates
|
5
|
+
# Calculates boosts for combinations.
|
6
6
|
#
|
7
7
|
# Example:
|
8
8
|
# Someone searches for peter fish.
|
@@ -11,34 +11,34 @@ module Picky
|
|
11
11
|
# and
|
12
12
|
# [:name, :surname]
|
13
13
|
#
|
14
|
-
# This class is concerned with calculating
|
14
|
+
# This class is concerned with calculating boosts
|
15
15
|
# for the category combinations.
|
16
16
|
#
|
17
17
|
# Implement either
|
18
|
-
# #
|
18
|
+
# #boost_for(combinations)
|
19
19
|
# or
|
20
|
-
# #
|
20
|
+
# #boost_for_categories(category_names) # Subclass this class for this.
|
21
21
|
#
|
22
|
-
# And return a
|
22
|
+
# And return a boost (float).
|
23
23
|
#
|
24
|
-
class
|
24
|
+
class Boosts
|
25
25
|
|
26
|
-
attr_reader :
|
26
|
+
attr_reader :boosts
|
27
27
|
|
28
28
|
delegate :empty?,
|
29
|
-
:to => :
|
29
|
+
:to => :boosts
|
30
30
|
|
31
31
|
# Needs a Hash of
|
32
32
|
# [:category_name1, :category_name2] => +3
|
33
33
|
# (some positive or negative weight)
|
34
34
|
#
|
35
|
-
def initialize
|
36
|
-
@
|
35
|
+
def initialize boosts = {}
|
36
|
+
@boosts = boosts
|
37
37
|
end
|
38
38
|
|
39
39
|
# API.
|
40
40
|
#
|
41
|
-
# Get the
|
41
|
+
# Get the boost for an array of category names.
|
42
42
|
#
|
43
43
|
# Example:
|
44
44
|
# [:name, :height, :color] returns +3, but
|
@@ -47,8 +47,8 @@ module Picky
|
|
47
47
|
# Note: Use Array#clustered_uniq_fast to make
|
48
48
|
# [:a, :a, :b, :a] => [:a, :b, :a]
|
49
49
|
#
|
50
|
-
def
|
51
|
-
@
|
50
|
+
def boost_for_categories names
|
51
|
+
@boosts[names.clustered_uniq_fast] || 0
|
52
52
|
end
|
53
53
|
|
54
54
|
# API.
|
@@ -60,22 +60,22 @@ module Picky
|
|
60
60
|
# Note: Cache this if more complicated weighings become necessary.
|
61
61
|
# Note: Maybe make combinations comparable to Symbols?
|
62
62
|
#
|
63
|
-
def
|
64
|
-
|
63
|
+
def boost_for combinations
|
64
|
+
boost_for_categories combinations.map(&:category_name)
|
65
65
|
end
|
66
66
|
|
67
67
|
# A Weights instance is == to another if
|
68
68
|
# the weights are the same.
|
69
69
|
#
|
70
70
|
def == other
|
71
|
-
@
|
71
|
+
@boosts == other.boosts
|
72
72
|
end
|
73
73
|
|
74
74
|
# Prints out a nice representation of the
|
75
75
|
# configured weights.
|
76
76
|
#
|
77
77
|
def to_s
|
78
|
-
"#{self.class}(#{@
|
78
|
+
"#{self.class}(#{@boosts})"
|
79
79
|
end
|
80
80
|
|
81
81
|
end
|
@@ -28,8 +28,8 @@ module Picky
|
|
28
28
|
def score
|
29
29
|
@combinations.sum &:weight
|
30
30
|
end
|
31
|
-
def
|
32
|
-
weights.
|
31
|
+
def boost_for weights
|
32
|
+
weights.boost_for @combinations
|
33
33
|
end
|
34
34
|
|
35
35
|
# Filters the tokens and categories such that categories
|
data/lib/picky/search.rb
CHANGED
@@ -15,11 +15,13 @@ module Picky
|
|
15
15
|
#
|
16
16
|
class Search
|
17
17
|
|
18
|
+
include API::Search::Boost
|
19
|
+
|
18
20
|
include Helpers::Measuring
|
19
21
|
|
20
22
|
attr_reader :indexes
|
21
23
|
attr_accessor :tokenizer,
|
22
|
-
:
|
24
|
+
:boosts
|
23
25
|
|
24
26
|
delegate :ignore,
|
25
27
|
:to => :indexes
|
@@ -27,11 +29,11 @@ module Picky
|
|
27
29
|
# Takes:
|
28
30
|
# * A number of indexes
|
29
31
|
#
|
30
|
-
# It is also possible to define the tokenizer and
|
32
|
+
# It is also possible to define the tokenizer and boosts like so.
|
31
33
|
# Example:
|
32
34
|
# search = Search.new(index1, index2, index3) do
|
33
35
|
# searching removes_characters: /[^a-z]/ # etc.
|
34
|
-
#
|
36
|
+
# boosts [:author, :title] => +3,
|
35
37
|
# [:title, :isbn] => +1
|
36
38
|
# end
|
37
39
|
#
|
@@ -41,7 +43,7 @@ module Picky
|
|
41
43
|
instance_eval(&Proc.new) if block_given?
|
42
44
|
|
43
45
|
@tokenizer ||= Tokenizer.searching # THINK Not dynamic. Ok?
|
44
|
-
@
|
46
|
+
@boosts ||= Query::Boosts.new
|
45
47
|
@ignore_unassigned = false if @ignore_unassigned.nil?
|
46
48
|
|
47
49
|
self
|
@@ -87,7 +89,7 @@ module Picky
|
|
87
89
|
# Note: When using the Picky interface, do not terminate too
|
88
90
|
# early as this will kill off the allocation selections.
|
89
91
|
# A value of
|
90
|
-
#
|
92
|
+
# terminate_early 5
|
91
93
|
# is probably a good idea to show the user 5 extra
|
92
94
|
# beyond the needed ones.
|
93
95
|
#
|
@@ -117,29 +119,25 @@ module Picky
|
|
117
119
|
#
|
118
120
|
# or
|
119
121
|
#
|
120
|
-
# # Explicitly add a random number (0...1) to the
|
122
|
+
# # Explicitly add a random number (0...1) to the boosts.
|
121
123
|
# #
|
122
|
-
#
|
124
|
+
# my_boosts = Class.new do
|
123
125
|
# # Instance only needs to implement
|
124
|
-
# #
|
126
|
+
# # boost_for combinations
|
125
127
|
# # and return a number that is
|
126
|
-
# # added to the
|
128
|
+
# # added to the score.
|
127
129
|
# #
|
128
|
-
# def
|
130
|
+
# def boost_for combinations
|
129
131
|
# rand
|
130
132
|
# end
|
131
133
|
# end.new
|
132
134
|
#
|
133
135
|
# search = Search.new(books_index, dvd_index, mp3_index) do
|
134
|
-
# boost
|
136
|
+
# boost my_boosts
|
135
137
|
# end
|
136
138
|
#
|
137
|
-
def boost
|
138
|
-
@
|
139
|
-
weights
|
140
|
-
else
|
141
|
-
Query::Weights.new weights
|
142
|
-
end
|
139
|
+
def boost boosts
|
140
|
+
@boosts = extract_boosts boosts
|
143
141
|
end
|
144
142
|
|
145
143
|
# Ignore the given token if it cannot be matched to a category.
|
@@ -225,7 +223,7 @@ module Picky
|
|
225
223
|
# Gets sorted allocations for the tokens.
|
226
224
|
#
|
227
225
|
def sorted_allocations tokens, amount = nil # :nodoc:
|
228
|
-
indexes.prepared_allocations_for tokens,
|
226
|
+
indexes.prepared_allocations_for tokens, boosts, amount
|
229
227
|
end
|
230
228
|
|
231
229
|
# Display some nice information for the user.
|
@@ -234,7 +232,7 @@ module Picky
|
|
234
232
|
s = "#{self.class}("
|
235
233
|
ary = []
|
236
234
|
ary << @indexes.indexes.map(&:name).join(', ') unless @indexes.indexes.empty?
|
237
|
-
ary << "
|
235
|
+
ary << "boosts: #{@boosts}" if @boosts
|
238
236
|
s << ary.join(', ')
|
239
237
|
s << ")"
|
240
238
|
s
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -6,15 +6,19 @@ module Picky
|
|
6
6
|
#
|
7
7
|
class Tokenizer
|
8
8
|
|
9
|
+
extend API::Tokenizer
|
10
|
+
|
11
|
+
include API::Tokenizer::CharacterSubstituter
|
12
|
+
|
9
13
|
def self.default_indexing_with options = {}
|
10
|
-
@indexing =
|
14
|
+
@indexing = extract_tokenizer options
|
11
15
|
end
|
12
16
|
def self.indexing
|
13
17
|
@indexing ||= new
|
14
18
|
end
|
15
19
|
|
16
20
|
def self.default_searching_with options = {}
|
17
|
-
@searching =
|
21
|
+
@searching = extract_tokenizer options
|
18
22
|
end
|
19
23
|
def self.searching
|
20
24
|
@searching ||= new
|
@@ -108,8 +112,7 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
108
112
|
# Default is European Character substitution.
|
109
113
|
#
|
110
114
|
def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
|
111
|
-
|
112
|
-
@substituter = substituter
|
115
|
+
@substituter = extract_character_substituter substituter
|
113
116
|
end
|
114
117
|
def substitute_characters text
|
115
118
|
substituter?? substituter.substitute(text) : text
|