picky 4.0.0pre6 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/api/category/partial.rb +26 -0
- data/lib/picky/api/category/similarity.rb +26 -0
- data/lib/picky/api/category/weight.rb +26 -0
- data/lib/picky/api/search/boost.rb +28 -0
- data/lib/picky/api/source.rb +35 -0
- data/lib/picky/api/tokenizer/character_substituter.rb +22 -0
- data/lib/picky/api/tokenizer.rb +37 -0
- data/lib/picky/bundle.rb +1 -1
- data/lib/picky/bundle_realtime.rb +2 -2
- data/lib/picky/category.rb +12 -6
- data/lib/picky/category_indexing.rb +2 -8
- data/lib/picky/generators/similarity/double_metaphone.rb +1 -1
- data/lib/picky/generators/similarity/metaphone.rb +1 -1
- data/lib/picky/generators/similarity/none.rb +1 -1
- data/lib/picky/generators/similarity/soundex.rb +1 -1
- data/lib/picky/index_indexing.rb +5 -25
- data/lib/picky/loader.rb +15 -5
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/{weights.rb → boosts.rb} +17 -17
- data/lib/picky/query/combinations.rb +2 -2
- data/lib/picky/search.rb +17 -19
- data/lib/picky/tokenizer.rb +7 -4
- data/spec/lib/api/category/partial_spec.rb +49 -0
- data/spec/lib/api/category/similarity_spec.rb +50 -0
- data/spec/lib/api/category/weight_spec.rb +47 -0
- data/spec/lib/api/search/boost_spec.rb +44 -0
- data/spec/lib/api/source_spec.rb +68 -0
- data/spec/lib/api/tokenizer/character_substituter_spec.rb +34 -0
- data/spec/lib/api/tokenizer_spec.rb +42 -0
- data/spec/lib/category_indexed_spec.rb +2 -2
- data/spec/lib/category_indexing_spec.rb +11 -24
- data/spec/lib/category_spec.rb +48 -11
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/index_indexing_spec.rb +10 -14
- data/spec/lib/index_spec.rb +1 -1
- data/spec/lib/query/allocation_spec.rb +2 -2
- data/spec/lib/query/boosts_spec.rb +79 -0
- data/spec/lib/query/combinations_spec.rb +3 -3
- data/spec/lib/search_spec.rb +13 -13
- data/spec/lib/tokenizer_spec.rb +12 -8
- metadata +44 -23
- data/spec/lib/query/weights_spec.rb +0 -81
@@ -0,0 +1,26 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Category
|
4
|
+
|
5
|
+
module Partial
|
6
|
+
|
7
|
+
def extract_partial thing
|
8
|
+
return Generators::Partial::Default unless thing
|
9
|
+
|
10
|
+
if thing.respond_to? :each_partial
|
11
|
+
thing
|
12
|
+
else
|
13
|
+
raise <<-ERROR
|
14
|
+
partial options for #{index_name}:#{name} should be either
|
15
|
+
* for example a Partial::Substring.new(from: m, to: n), Partial::Postfix.new(from: n), Partial::Infix.new(min: m, max: n) etc.
|
16
|
+
or
|
17
|
+
* an object that responds to #each_partial(str_or_sym) and yields each partial
|
18
|
+
ERROR
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Category
|
4
|
+
|
5
|
+
module Similarity
|
6
|
+
|
7
|
+
def extract_similarity thing
|
8
|
+
return Generators::Similarity::Default unless thing
|
9
|
+
|
10
|
+
if thing.respond_to?(:encode) && thing.respond_to?(:prioritize)
|
11
|
+
thing
|
12
|
+
else
|
13
|
+
raise <<-ERROR
|
14
|
+
similarity options for #{index_name}:#{name} should be either
|
15
|
+
* for example a Similarity::Phonetic.new(n), Similarity::Metaphone.new(n), Similarity::DoubleMetaphone.new(n) etc.
|
16
|
+
or
|
17
|
+
* an object that responds to #encode(text) => encoded_text and #prioritize(array_of_encoded, encoded)
|
18
|
+
ERROR
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Category
|
4
|
+
|
5
|
+
module Weight
|
6
|
+
|
7
|
+
def extract_weight thing
|
8
|
+
return Generators::Weights::Default unless thing
|
9
|
+
|
10
|
+
if thing.respond_to? :weight_for
|
11
|
+
thing
|
12
|
+
else
|
13
|
+
raise <<-ERROR
|
14
|
+
weight options for #{index_name}:#{name} should be either
|
15
|
+
* for example a Weights::Logarithmic.new, Weights::Constant.new(int = 0), Weights::Dynamic.new(&block) etc.
|
16
|
+
or
|
17
|
+
* an object that responds to #weight_for(amount_of_ids_for_token) => float
|
18
|
+
ERROR
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Search
|
4
|
+
|
5
|
+
module Boost
|
6
|
+
|
7
|
+
def extract_boosts thing
|
8
|
+
if thing.respond_to?(:boost_for)
|
9
|
+
thing
|
10
|
+
else
|
11
|
+
if thing.respond_to?(:[])
|
12
|
+
Query::Boosts.new thing
|
13
|
+
else
|
14
|
+
raise <<-ERROR
|
15
|
+
boost options for a Search should be either
|
16
|
+
* for example a Hash { [:name, :surname] => +3 }
|
17
|
+
or
|
18
|
+
* an object that responds to #boost_for(combinations) and returns a boost float
|
19
|
+
ERROR
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Source
|
4
|
+
|
5
|
+
def extract_source thing, options = {}
|
6
|
+
if thing.respond_to?(:each) || thing.respond_to?(:call)
|
7
|
+
thing
|
8
|
+
else
|
9
|
+
return if options[:nil_ok]
|
10
|
+
if respond_to? :name
|
11
|
+
if @index
|
12
|
+
location = " #{@index.name}:#{name}"
|
13
|
+
else
|
14
|
+
location = " #{name}"
|
15
|
+
end
|
16
|
+
else
|
17
|
+
location = ''
|
18
|
+
end
|
19
|
+
raise ArgumentError.new(<<-ERROR)
|
20
|
+
The#{location} source should respond to either the method #each or
|
21
|
+
it can be a lambda/block, returning such a source.
|
22
|
+
ERROR
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get the actual source if it is wrapped in a time
|
27
|
+
# capsule, i.e. a block/lambda.
|
28
|
+
#
|
29
|
+
def unblock_source
|
30
|
+
@source.respond_to?(:call) ? @source.call : @source
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,22 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
module Tokenizer
|
4
|
+
|
5
|
+
module CharacterSubstituter
|
6
|
+
|
7
|
+
def extract_character_substituter thing
|
8
|
+
if thing.respond_to? :substitute
|
9
|
+
thing
|
10
|
+
else
|
11
|
+
raise ArgumentError.new <<-ERROR
|
12
|
+
The substitutes_characters_with option needs a character substituter,
|
13
|
+
which responds to #substitute(text) and returns substituted_text."
|
14
|
+
ERROR
|
15
|
+
end
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
end
|
22
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
module Picky
|
2
|
+
module API
|
3
|
+
|
4
|
+
module Tokenizer
|
5
|
+
|
6
|
+
def extract_tokenizer thing
|
7
|
+
return unless thing
|
8
|
+
if thing.respond_to? :tokenize
|
9
|
+
thing
|
10
|
+
else
|
11
|
+
if thing.respond_to? :[]
|
12
|
+
Picky::Tokenizer.new thing
|
13
|
+
else
|
14
|
+
if respond_to? :name
|
15
|
+
location = ' for '
|
16
|
+
if @index
|
17
|
+
location += "#{@index.name}:#{name}"
|
18
|
+
else
|
19
|
+
location += "#{name}"
|
20
|
+
end
|
21
|
+
else
|
22
|
+
location = ''
|
23
|
+
end
|
24
|
+
raise <<-ERROR
|
25
|
+
indexing options#{location} should be either
|
26
|
+
* a Hash
|
27
|
+
or
|
28
|
+
* an object that responds to #tokenize(text) => [[token1, ...], [original1, ...]]
|
29
|
+
ERROR
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
data/lib/picky/bundle.rb
CHANGED
@@ -126,7 +126,7 @@ module Picky
|
|
126
126
|
# Note: Does not return itself.
|
127
127
|
#
|
128
128
|
def similar text
|
129
|
-
code = similarity_strategy.
|
129
|
+
code = similarity_strategy.encode text
|
130
130
|
return [] unless code
|
131
131
|
similar_codes = @similarity[code]
|
132
132
|
if similar_codes.blank?
|
@@ -22,7 +22,7 @@ module Picky
|
|
22
22
|
# Since no element uses this sym anymore, we can delete the similarity for it.
|
23
23
|
# TODO Not really. Since multiple syms can point to the same encoded.
|
24
24
|
#
|
25
|
-
@similarity.delete self.similarity_strategy.
|
25
|
+
@similarity.delete self.similarity_strategy.encode(str_or_sym)
|
26
26
|
else
|
27
27
|
@weights[str_or_sym] = self.weight_strategy.weight_for ids.size
|
28
28
|
end
|
@@ -67,7 +67,7 @@ module Picky
|
|
67
67
|
# Add string/symbol to similarity index.
|
68
68
|
#
|
69
69
|
def add_similarity str_or_sym, where = :unshift
|
70
|
-
if encoded = self.similarity_strategy.
|
70
|
+
if encoded = self.similarity_strategy.encode(str_or_sym)
|
71
71
|
similars = @similarity[encoded] ||= []
|
72
72
|
|
73
73
|
# Not completely correct, as others will also be affected, but meh.
|
data/lib/picky/category.rb
CHANGED
@@ -2,6 +2,12 @@ module Picky
|
|
2
2
|
|
3
3
|
class Category
|
4
4
|
|
5
|
+
include API::Tokenizer
|
6
|
+
include API::Source
|
7
|
+
include API::Category::Weight
|
8
|
+
include API::Category::Partial
|
9
|
+
include API::Category::Similarity
|
10
|
+
|
5
11
|
attr_accessor :exact,
|
6
12
|
:partial
|
7
13
|
attr_reader :name,
|
@@ -33,9 +39,9 @@ module Picky
|
|
33
39
|
|
34
40
|
# Indexing.
|
35
41
|
#
|
36
|
-
@source
|
37
|
-
@from
|
38
|
-
@tokenizer
|
42
|
+
@source = extract_source options[:source], nil_ok: true
|
43
|
+
@from = options[:from]
|
44
|
+
@tokenizer = extract_tokenizer options[:indexing]
|
39
45
|
|
40
46
|
@key_format = options.delete :key_format
|
41
47
|
@backend = options.delete :backend
|
@@ -44,9 +50,9 @@ module Picky
|
|
44
50
|
|
45
51
|
# @symbols = options[:use_symbols] || index.use_symbols? # TODO Symbols.
|
46
52
|
|
47
|
-
weights = options[:weight]
|
48
|
-
partial = options[:partial]
|
49
|
-
similarity = options[:similarity]
|
53
|
+
weights = extract_weight options[:weight]
|
54
|
+
partial = extract_partial options[:partial]
|
55
|
+
similarity = extract_similarity options[:similarity]
|
50
56
|
|
51
57
|
no_partial = Generators::Partial::None.new
|
52
58
|
no_similarity = Generators::Similarity::None.new
|
@@ -78,13 +78,7 @@ module Picky
|
|
78
78
|
# If we have no explicit source, we'll check the index for one.
|
79
79
|
#
|
80
80
|
def source
|
81
|
-
extract_source || @index.source
|
82
|
-
end
|
83
|
-
# Extract the actual source if it is wrapped in a time
|
84
|
-
# capsule, i.e. a block/lambda.
|
85
|
-
#
|
86
|
-
def extract_source
|
87
|
-
@source = @source.respond_to?(:call) ? @source.call : @source
|
81
|
+
(@source = extract_source(@source, nil_ok: true)) || @index.source
|
88
82
|
end
|
89
83
|
|
90
84
|
# Return the key format.
|
@@ -96,7 +90,7 @@ module Picky
|
|
96
90
|
# Default is to_i.
|
97
91
|
#
|
98
92
|
def key_format
|
99
|
-
@key_format ||=
|
93
|
+
@key_format ||= @index.key_format || :to_i
|
100
94
|
end
|
101
95
|
|
102
96
|
# Where the data is taken from.
|
data/lib/picky/index_indexing.rb
CHANGED
@@ -4,6 +4,9 @@ module Picky
|
|
4
4
|
#
|
5
5
|
class Index
|
6
6
|
|
7
|
+
include API::Tokenizer
|
8
|
+
include API::Source
|
9
|
+
|
7
10
|
include Helpers::Indexing
|
8
11
|
|
9
12
|
# Delegators for indexing.
|
@@ -17,11 +20,7 @@ module Picky
|
|
17
20
|
# Parameters are the exact same as for indexing.
|
18
21
|
#
|
19
22
|
def indexing options = {}
|
20
|
-
@tokenizer =
|
21
|
-
options
|
22
|
-
else
|
23
|
-
options && Tokenizer.new(options)
|
24
|
-
end
|
23
|
+
@tokenizer = extract_tokenizer options
|
25
24
|
end
|
26
25
|
|
27
26
|
#
|
@@ -99,26 +98,7 @@ module Picky
|
|
99
98
|
#
|
100
99
|
def source some_source = nil, &block
|
101
100
|
some_source ||= block
|
102
|
-
some_source ? (
|
103
|
-
end
|
104
|
-
# Extract the actual source if it is wrapped in a time
|
105
|
-
# capsule, i.e. a block/lambda.
|
106
|
-
#
|
107
|
-
def extract_source
|
108
|
-
@source.respond_to?(:call) ? @source.call : @source
|
109
|
-
end
|
110
|
-
def check_source source # :nodoc:
|
111
|
-
raise ArgumentError.new(<<-SOURCE
|
112
|
-
|
113
|
-
|
114
|
-
The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text), OR it can be a lambda/block, returning such a source.
|
115
|
-
Or it could use one of the built-in sources:
|
116
|
-
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
117
|
-
Sources::')}
|
118
|
-
|
119
|
-
|
120
|
-
SOURCE
|
121
|
-
) unless source.respond_to?(:each) || source.respond_to?(:harvest) || source.respond_to?(:call)
|
101
|
+
some_source ? (@source = extract_source(some_source)) : unblock_source
|
122
102
|
end
|
123
103
|
|
124
104
|
# Define a key_format on the index.
|
data/lib/picky/loader.rb
CHANGED
@@ -165,10 +165,6 @@ module Picky
|
|
165
165
|
load_relative 'query/token'
|
166
166
|
load_relative 'query/tokens'
|
167
167
|
|
168
|
-
# Tokenizer.
|
169
|
-
#
|
170
|
-
load_relative 'tokenizer'
|
171
|
-
|
172
168
|
# Query combinations, qualifiers, weigher.
|
173
169
|
#
|
174
170
|
load_relative 'query/combination'
|
@@ -179,7 +175,7 @@ module Picky
|
|
179
175
|
|
180
176
|
load_relative 'query/qualifier_category_mapper'
|
181
177
|
|
182
|
-
load_relative 'query/
|
178
|
+
load_relative 'query/boosts'
|
183
179
|
|
184
180
|
load_relative 'query/indexes'
|
185
181
|
load_relative 'query/indexes_check'
|
@@ -187,6 +183,20 @@ module Picky
|
|
187
183
|
# Loads the user interface parts.
|
188
184
|
#
|
189
185
|
def load_user_interface
|
186
|
+
# Load API parts.
|
187
|
+
#
|
188
|
+
load_relative 'api/tokenizer'
|
189
|
+
load_relative 'api/tokenizer/character_substituter'
|
190
|
+
load_relative 'api/source'
|
191
|
+
load_relative 'api/category/weight'
|
192
|
+
load_relative 'api/category/partial'
|
193
|
+
load_relative 'api/category/similarity'
|
194
|
+
load_relative 'api/search/boost'
|
195
|
+
|
196
|
+
# Tokenizer.
|
197
|
+
#
|
198
|
+
load_relative 'tokenizer'
|
199
|
+
|
190
200
|
# Load harakiri.
|
191
201
|
#
|
192
202
|
load_relative 'rack/harakiri'
|
@@ -2,7 +2,7 @@ module Picky
|
|
2
2
|
|
3
3
|
module Query
|
4
4
|
|
5
|
-
# Calculates
|
5
|
+
# Calculates boosts for combinations.
|
6
6
|
#
|
7
7
|
# Example:
|
8
8
|
# Someone searches for peter fish.
|
@@ -11,34 +11,34 @@ module Picky
|
|
11
11
|
# and
|
12
12
|
# [:name, :surname]
|
13
13
|
#
|
14
|
-
# This class is concerned with calculating
|
14
|
+
# This class is concerned with calculating boosts
|
15
15
|
# for the category combinations.
|
16
16
|
#
|
17
17
|
# Implement either
|
18
|
-
# #
|
18
|
+
# #boost_for(combinations)
|
19
19
|
# or
|
20
|
-
# #
|
20
|
+
# #boost_for_categories(category_names) # Subclass this class for this.
|
21
21
|
#
|
22
|
-
# And return a
|
22
|
+
# And return a boost (float).
|
23
23
|
#
|
24
|
-
class
|
24
|
+
class Boosts
|
25
25
|
|
26
|
-
attr_reader :
|
26
|
+
attr_reader :boosts
|
27
27
|
|
28
28
|
delegate :empty?,
|
29
|
-
:to => :
|
29
|
+
:to => :boosts
|
30
30
|
|
31
31
|
# Needs a Hash of
|
32
32
|
# [:category_name1, :category_name2] => +3
|
33
33
|
# (some positive or negative weight)
|
34
34
|
#
|
35
|
-
def initialize
|
36
|
-
@
|
35
|
+
def initialize boosts = {}
|
36
|
+
@boosts = boosts
|
37
37
|
end
|
38
38
|
|
39
39
|
# API.
|
40
40
|
#
|
41
|
-
# Get the
|
41
|
+
# Get the boost for an array of category names.
|
42
42
|
#
|
43
43
|
# Example:
|
44
44
|
# [:name, :height, :color] returns +3, but
|
@@ -47,8 +47,8 @@ module Picky
|
|
47
47
|
# Note: Use Array#clustered_uniq_fast to make
|
48
48
|
# [:a, :a, :b, :a] => [:a, :b, :a]
|
49
49
|
#
|
50
|
-
def
|
51
|
-
@
|
50
|
+
def boost_for_categories names
|
51
|
+
@boosts[names.clustered_uniq_fast] || 0
|
52
52
|
end
|
53
53
|
|
54
54
|
# API.
|
@@ -60,22 +60,22 @@ module Picky
|
|
60
60
|
# Note: Cache this if more complicated weighings become necessary.
|
61
61
|
# Note: Maybe make combinations comparable to Symbols?
|
62
62
|
#
|
63
|
-
def
|
64
|
-
|
63
|
+
def boost_for combinations
|
64
|
+
boost_for_categories combinations.map(&:category_name)
|
65
65
|
end
|
66
66
|
|
67
67
|
# A Weights instance is == to another if
|
68
68
|
# the weights are the same.
|
69
69
|
#
|
70
70
|
def == other
|
71
|
-
@
|
71
|
+
@boosts == other.boosts
|
72
72
|
end
|
73
73
|
|
74
74
|
# Prints out a nice representation of the
|
75
75
|
# configured weights.
|
76
76
|
#
|
77
77
|
def to_s
|
78
|
-
"#{self.class}(#{@
|
78
|
+
"#{self.class}(#{@boosts})"
|
79
79
|
end
|
80
80
|
|
81
81
|
end
|
@@ -28,8 +28,8 @@ module Picky
|
|
28
28
|
def score
|
29
29
|
@combinations.sum &:weight
|
30
30
|
end
|
31
|
-
def
|
32
|
-
weights.
|
31
|
+
def boost_for weights
|
32
|
+
weights.boost_for @combinations
|
33
33
|
end
|
34
34
|
|
35
35
|
# Filters the tokens and categories such that categories
|
data/lib/picky/search.rb
CHANGED
@@ -15,11 +15,13 @@ module Picky
|
|
15
15
|
#
|
16
16
|
class Search
|
17
17
|
|
18
|
+
include API::Search::Boost
|
19
|
+
|
18
20
|
include Helpers::Measuring
|
19
21
|
|
20
22
|
attr_reader :indexes
|
21
23
|
attr_accessor :tokenizer,
|
22
|
-
:
|
24
|
+
:boosts
|
23
25
|
|
24
26
|
delegate :ignore,
|
25
27
|
:to => :indexes
|
@@ -27,11 +29,11 @@ module Picky
|
|
27
29
|
# Takes:
|
28
30
|
# * A number of indexes
|
29
31
|
#
|
30
|
-
# It is also possible to define the tokenizer and
|
32
|
+
# It is also possible to define the tokenizer and boosts like so.
|
31
33
|
# Example:
|
32
34
|
# search = Search.new(index1, index2, index3) do
|
33
35
|
# searching removes_characters: /[^a-z]/ # etc.
|
34
|
-
#
|
36
|
+
# boosts [:author, :title] => +3,
|
35
37
|
# [:title, :isbn] => +1
|
36
38
|
# end
|
37
39
|
#
|
@@ -41,7 +43,7 @@ module Picky
|
|
41
43
|
instance_eval(&Proc.new) if block_given?
|
42
44
|
|
43
45
|
@tokenizer ||= Tokenizer.searching # THINK Not dynamic. Ok?
|
44
|
-
@
|
46
|
+
@boosts ||= Query::Boosts.new
|
45
47
|
@ignore_unassigned = false if @ignore_unassigned.nil?
|
46
48
|
|
47
49
|
self
|
@@ -87,7 +89,7 @@ module Picky
|
|
87
89
|
# Note: When using the Picky interface, do not terminate too
|
88
90
|
# early as this will kill off the allocation selections.
|
89
91
|
# A value of
|
90
|
-
#
|
92
|
+
# terminate_early 5
|
91
93
|
# is probably a good idea to show the user 5 extra
|
92
94
|
# beyond the needed ones.
|
93
95
|
#
|
@@ -117,29 +119,25 @@ module Picky
|
|
117
119
|
#
|
118
120
|
# or
|
119
121
|
#
|
120
|
-
# # Explicitly add a random number (0...1) to the
|
122
|
+
# # Explicitly add a random number (0...1) to the boosts.
|
121
123
|
# #
|
122
|
-
#
|
124
|
+
# my_boosts = Class.new do
|
123
125
|
# # Instance only needs to implement
|
124
|
-
# #
|
126
|
+
# # boost_for combinations
|
125
127
|
# # and return a number that is
|
126
|
-
# # added to the
|
128
|
+
# # added to the score.
|
127
129
|
# #
|
128
|
-
# def
|
130
|
+
# def boost_for combinations
|
129
131
|
# rand
|
130
132
|
# end
|
131
133
|
# end.new
|
132
134
|
#
|
133
135
|
# search = Search.new(books_index, dvd_index, mp3_index) do
|
134
|
-
# boost
|
136
|
+
# boost my_boosts
|
135
137
|
# end
|
136
138
|
#
|
137
|
-
def boost
|
138
|
-
@
|
139
|
-
weights
|
140
|
-
else
|
141
|
-
Query::Weights.new weights
|
142
|
-
end
|
139
|
+
def boost boosts
|
140
|
+
@boosts = extract_boosts boosts
|
143
141
|
end
|
144
142
|
|
145
143
|
# Ignore the given token if it cannot be matched to a category.
|
@@ -225,7 +223,7 @@ module Picky
|
|
225
223
|
# Gets sorted allocations for the tokens.
|
226
224
|
#
|
227
225
|
def sorted_allocations tokens, amount = nil # :nodoc:
|
228
|
-
indexes.prepared_allocations_for tokens,
|
226
|
+
indexes.prepared_allocations_for tokens, boosts, amount
|
229
227
|
end
|
230
228
|
|
231
229
|
# Display some nice information for the user.
|
@@ -234,7 +232,7 @@ module Picky
|
|
234
232
|
s = "#{self.class}("
|
235
233
|
ary = []
|
236
234
|
ary << @indexes.indexes.map(&:name).join(', ') unless @indexes.indexes.empty?
|
237
|
-
ary << "
|
235
|
+
ary << "boosts: #{@boosts}" if @boosts
|
238
236
|
s << ary.join(', ')
|
239
237
|
s << ")"
|
240
238
|
s
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -6,15 +6,19 @@ module Picky
|
|
6
6
|
#
|
7
7
|
class Tokenizer
|
8
8
|
|
9
|
+
extend API::Tokenizer
|
10
|
+
|
11
|
+
include API::Tokenizer::CharacterSubstituter
|
12
|
+
|
9
13
|
def self.default_indexing_with options = {}
|
10
|
-
@indexing =
|
14
|
+
@indexing = extract_tokenizer options
|
11
15
|
end
|
12
16
|
def self.indexing
|
13
17
|
@indexing ||= new
|
14
18
|
end
|
15
19
|
|
16
20
|
def self.default_searching_with options = {}
|
17
|
-
@searching =
|
21
|
+
@searching = extract_tokenizer options
|
18
22
|
end
|
19
23
|
def self.searching
|
20
24
|
@searching ||= new
|
@@ -108,8 +112,7 @@ Case sensitive? #{@case_sensitive ? "Yes." : "-"}
|
|
108
112
|
# Default is European Character substitution.
|
109
113
|
#
|
110
114
|
def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
|
111
|
-
|
112
|
-
@substituter = substituter
|
115
|
+
@substituter = extract_character_substituter substituter
|
113
116
|
end
|
114
117
|
def substitute_characters text
|
115
118
|
substituter?? substituter.substitute(text) : text
|