picky 4.17.1 → 4.18.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +6 -14
- data/lib/picky/backends/backend.rb +9 -15
- data/lib/picky/backends/prepared/text.rb +1 -1
- data/lib/picky/backends/sqlite/basic.rb +3 -10
- data/lib/picky/bundle.rb +2 -2
- data/lib/picky/category.rb +10 -5
- data/lib/picky/category_indexed.rb +2 -1
- data/lib/picky/category_realtime.rb +10 -3
- data/lib/picky/helpers/measuring.rb +4 -6
- data/lib/picky/index_indexing.rb +0 -2
- data/lib/picky/indexes.rb +2 -2
- data/lib/picky/query/allocation.rb +3 -2
- data/lib/picky/query/allocations.rb +2 -2
- data/lib/picky/query/boosts.rb +2 -0
- data/lib/picky/query/combination.rb +1 -1
- data/lib/picky/query/combinations.rb +1 -3
- data/lib/picky/query/indexes.rb +1 -1
- data/lib/picky/query/token.rb +44 -23
- data/lib/picky/search.rb +2 -0
- data/lib/picky/search_facets.rb +3 -1
- data/lib/picky/tokenizer.rb +3 -3
- data/spec/functional/allocations_uniq_by_definition_spec.rb +19 -14
- data/spec/functional/arrays_as_ids_spec.rb +8 -17
- data/spec/functional/automatic_segmentation_spec.rb +40 -37
- data/spec/functional/custom_delimiters_spec.rb +30 -20
- data/spec/functional/no_tokenize_spec.rb +2 -2
- data/spec/functional/or_spec.rb +74 -75
- data/spec/functional/pool_spec.rb +54 -53
- data/spec/functional/realtime_spec.rb +1 -1
- data/spec/lib/backends/backend_spec.rb +9 -9
- data/spec/lib/backends/sqlite/array_spec.rb +2 -8
- data/spec/lib/backends/sqlite/value_spec.rb +2 -2
- data/spec/lib/category_indexed_spec.rb +0 -12
- data/spec/lib/query/allocation_spec.rb +3 -3
- data/spec/lib/query/combinations_spec.rb +0 -17
- data/spec/lib/query/token_spec.rb +9 -4
- data/spec/lib/tokenizer_spec.rb +3 -3
- metadata +20 -21
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
metadata.gz: !binary |-
|
9
|
-
ZmE5NDZlZjVhN2I2OGM2YzUyMmM0ZWFjMGU4Mjc5NzA0MmEwMmU5MTg1ZDg0
|
10
|
-
ZWJiYzhiZTc4MDgzMjY3MGZlM2U3N2QxOTMyZWY2NzQzN2UwZmVkYWRjZWVl
|
11
|
-
YzdkNGFjYjYwZTM5ODdlOGMzZGY4MjljNjk5ZDYzZmU3MzcwMGE=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
ZDY1OThhODliOWI4M2VmOTM3MWYzZTVmNjlmMDM4NzhiNmZkY2UzOTdkYTJl
|
14
|
-
MTA0N2U0MjVhYTU4YTYxZDI2ZDBiYjMxNGYzODFkNGUyM2VjMGJiZTI5NDZl
|
15
|
-
OGVlZWJlODhiNzY5YmJjODk3NjMzMTQ5NjBkYWM5MTk0ZmYyOTI=
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5771adfbe24b649d5377b9d1233783ff5edb0c1e
|
4
|
+
data.tar.gz: 140aca46019f8b09496b508c9842a3ac84f2a933
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 07d12c4437ba880e486d73626873f1e1fc16e26512a37d564a172cb23bf517ad2c1503d841d0bb3dcd8ef9653a2f776979a2467761a01c412df2acf5a6837547
|
7
|
+
data.tar.gz: bf488e2fac14e6d1f4a26a354a8579e5931b18779c3e115e92062008bc1535f0e6ebdcbb21bfa60758be8df5557c64779a420957ef4c3b106ace7afe12b6a384
|
@@ -48,13 +48,13 @@ module Picky
|
|
48
48
|
json bundle.index_path(:realtime)
|
49
49
|
end
|
50
50
|
|
51
|
-
# Returns the total score of the combinations.
|
52
|
-
#
|
53
|
-
# Default implementation. Override to speed up.
|
54
|
-
#
|
55
|
-
def
|
56
|
-
|
57
|
-
end
|
51
|
+
# # Returns the total score of the combinations.
|
52
|
+
# #
|
53
|
+
# # Default implementation. Override to speed up.
|
54
|
+
# #
|
55
|
+
# def score combinations
|
56
|
+
# combinations.score
|
57
|
+
# end
|
58
58
|
|
59
59
|
# Returns the result ids for the allocation.
|
60
60
|
#
|
@@ -72,17 +72,11 @@ module Picky
|
|
72
72
|
# unfortunately.
|
73
73
|
#
|
74
74
|
def ids combinations, _, _
|
75
|
-
# Get the ids for each combination.
|
76
|
-
#
|
77
|
-
id_arrays = combinations.inject([]) do |total, combination|
|
78
|
-
total << combination.ids
|
79
|
-
end
|
80
|
-
|
81
|
-
# Call the optimized C algorithm.
|
75
|
+
# Get the ids for each combination and pass to the optimized C algorithm.
|
82
76
|
#
|
83
77
|
# Note: It orders the passed arrays by size.
|
84
78
|
#
|
85
|
-
Performant::Array.memory_efficient_intersect
|
79
|
+
Performant::Array.memory_efficient_intersect combinations.map { |combination| combination.ids }
|
86
80
|
end
|
87
81
|
|
88
82
|
#
|
@@ -15,8 +15,6 @@ module Picky
|
|
15
15
|
@empty = options[:empty]
|
16
16
|
@initial = options[:initial]
|
17
17
|
@realtime = options[:realtime]
|
18
|
-
|
19
|
-
lazily_initialize_client
|
20
18
|
|
21
19
|
# Note: If on OSX, too many files get opened during
|
22
20
|
# the specs -> ulimit -n 3000
|
@@ -46,9 +44,10 @@ module Picky
|
|
46
44
|
db.execute 'delete from key_value'
|
47
45
|
end
|
48
46
|
|
49
|
-
#
|
47
|
+
# Lazily creates SQLite client.
|
48
|
+
# Note: Perhaps it would be advisable to create only one, when initialising.
|
50
49
|
#
|
51
|
-
def
|
50
|
+
def db
|
52
51
|
@db ||= (create_directory cache_path; SQLite3::Database.new cache_path)
|
53
52
|
end
|
54
53
|
|
@@ -67,13 +66,7 @@ module Picky
|
|
67
66
|
end
|
68
67
|
|
69
68
|
def reset
|
70
|
-
# TODO Still necessary?
|
71
|
-
#
|
72
|
-
create_directory cache_path
|
73
|
-
lazily_initialize_client
|
74
|
-
|
75
69
|
truncate_db
|
76
|
-
|
77
70
|
self
|
78
71
|
end
|
79
72
|
|
data/lib/picky/bundle.rb
CHANGED
@@ -145,10 +145,10 @@ module Picky
|
|
145
145
|
|
146
146
|
# If a key format is set, use it, else forward to the category.
|
147
147
|
#
|
148
|
-
# TODO
|
148
|
+
# TODO Remove optimisation? @category.key_format
|
149
149
|
#
|
150
150
|
def key_format
|
151
|
-
@category.key_format
|
151
|
+
@key_format ||= @category.key_format
|
152
152
|
end
|
153
153
|
|
154
154
|
# Path and partial filename of a specific subindex.
|
data/lib/picky/category.rb
CHANGED
@@ -7,7 +7,6 @@ module Picky
|
|
7
7
|
attr_accessor :exact,
|
8
8
|
:partial
|
9
9
|
attr_reader :name,
|
10
|
-
:prepared,
|
11
10
|
:backend
|
12
11
|
attr_writer :source
|
13
12
|
|
@@ -71,22 +70,22 @@ module Picky
|
|
71
70
|
|
72
71
|
@exact = exact_for weights, similarity, options
|
73
72
|
@partial = partial_for @exact, partial, weights, options
|
74
|
-
|
75
|
-
@prepared = Backends::Prepared::Text.new prepared_index_path
|
76
73
|
end
|
77
74
|
# Since the options hash might contain options that do not exist,
|
78
75
|
# we should warn people if they use the wrong options.
|
79
76
|
# (Problem is that if the option is not found, then Picky will use the default)
|
80
77
|
#
|
81
|
-
# TODO Rewrite it such that this does not need to be maintained separately.
|
78
|
+
# TODO Rewrite it such that this does not need to be maintained separately (and gets available options automatically).
|
82
79
|
#
|
83
80
|
@@known_keys = [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :tokenize, :tokenizer, :weight]
|
84
81
|
def warn_if_unknown options
|
85
|
-
|
82
|
+
if options && (options.keys - @@known_keys).size > 0
|
83
|
+
warn <<-WARNING
|
86
84
|
|
87
85
|
Warning: Category options #{options} for category #{name} contain an unknown option.
|
88
86
|
Working options are: #@@known_keys.
|
89
87
|
WARNING
|
88
|
+
end
|
90
89
|
end
|
91
90
|
def weights_from options
|
92
91
|
Generators::Weights.from options[:weight], index_name, name
|
@@ -112,6 +111,12 @@ WARNING
|
|
112
111
|
end
|
113
112
|
end
|
114
113
|
|
114
|
+
# Lazily create a prepared index proxy.
|
115
|
+
#
|
116
|
+
def prepared
|
117
|
+
@prepared ||= Backends::Prepared::Text.new prepared_index_path
|
118
|
+
end
|
119
|
+
|
115
120
|
# Indexes and loads the category.
|
116
121
|
#
|
117
122
|
def reindex
|
@@ -74,9 +74,16 @@ module Picky
|
|
74
74
|
|
75
75
|
format = self.key_format?
|
76
76
|
tokens.each { |text| add_tokenized_token id, text, where, format }
|
77
|
-
rescue NoMethodError
|
78
|
-
|
79
|
-
|
77
|
+
rescue NoMethodError => e
|
78
|
+
show_informative_add_text_error_message_for e
|
79
|
+
end
|
80
|
+
|
81
|
+
def show_informative_add_text_error_message_for e
|
82
|
+
if e.name == :each
|
83
|
+
raise %Q{#{e.message}. You probably set tokenize: false on category "#{name}". It will need an Enumerator of previously tokenized tokens.}
|
84
|
+
else
|
85
|
+
raise e
|
86
|
+
end
|
80
87
|
end
|
81
88
|
|
82
89
|
#
|
@@ -7,14 +7,12 @@ module Picky
|
|
7
7
|
|
8
8
|
# Returns a duration in seconds.
|
9
9
|
#
|
10
|
-
def timed
|
11
|
-
|
10
|
+
def timed
|
11
|
+
time_begin = Time.new
|
12
12
|
|
13
|
-
|
13
|
+
yield
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
Time.now.to_f - time_begin
|
15
|
+
(Time.new - time_begin).to_f
|
18
16
|
end
|
19
17
|
|
20
18
|
end
|
data/lib/picky/index_indexing.rb
CHANGED
data/lib/picky/indexes.rb
CHANGED
@@ -11,7 +11,7 @@ module Picky
|
|
11
11
|
attr_reader :indexes,
|
12
12
|
:index_mapping
|
13
13
|
|
14
|
-
forward :size, :each, :to => :indexes
|
14
|
+
forward :size, :each, :map, :to => :indexes
|
15
15
|
each_forward :reindex, :to => :indexes
|
16
16
|
instance_forward :clear,
|
17
17
|
:clear_indexes,
|
@@ -47,7 +47,7 @@ module Picky
|
|
47
47
|
# Registers an index with the indexes.
|
48
48
|
#
|
49
49
|
def register index
|
50
|
-
# TODO
|
50
|
+
# TODO Do not store duplicate indexes.
|
51
51
|
#
|
52
52
|
# self.indexes.delete_if { |existing| existing.name == index.name }
|
53
53
|
self.indexes << index
|
@@ -40,8 +40,9 @@ module Picky
|
|
40
40
|
@score ||= if @combinations.empty?
|
41
41
|
0 # Optimization.
|
42
42
|
else
|
43
|
-
@backend.
|
44
|
-
|
43
|
+
# TODO Was @backend.score(@combinations) - indirection for maximum flexibility.
|
44
|
+
@combinations.score + weights.boost_for(@combinations)
|
45
|
+
end
|
45
46
|
end
|
46
47
|
|
47
48
|
# Asks the backend for the (intersected) ids.
|
@@ -60,7 +60,7 @@ module Picky
|
|
60
60
|
#
|
61
61
|
# TODO Rewrite, speed up.
|
62
62
|
#
|
63
|
-
def remove_allocations qualifiers_array
|
63
|
+
def remove_allocations qualifiers_array
|
64
64
|
return if qualifiers_array.empty?
|
65
65
|
@allocations.select! do |allocation|
|
66
66
|
allocation_qualifiers = allocation.combinations.to_qualifiers.clustered_uniq
|
@@ -77,7 +77,7 @@ module Picky
|
|
77
77
|
#
|
78
78
|
# TODO Rewrite, speed up.
|
79
79
|
#
|
80
|
-
def keep_allocations qualifiers_array
|
80
|
+
def keep_allocations qualifiers_array
|
81
81
|
return if qualifiers_array.empty?
|
82
82
|
@allocations.select! do |allocation|
|
83
83
|
allocation_qualifiers = allocation.combinations.to_qualifiers.clustered_uniq
|
data/lib/picky/query/boosts.rb
CHANGED
@@ -59,6 +59,8 @@ module Picky
|
|
59
59
|
# Note: Cache this if more complicated weighings become necessary.
|
60
60
|
# Note: Maybe make combinations comparable to Symbols?
|
61
61
|
#
|
62
|
+
# TODO Push into categories? Store boosts in categories?
|
63
|
+
#
|
62
64
|
def boost_for combinations
|
63
65
|
boost_for_categories combinations.map { |combination| combination.category_name }
|
64
66
|
end
|
@@ -5,7 +5,7 @@ module Picky
|
|
5
5
|
# Describes the Combination of:
|
6
6
|
# * a token
|
7
7
|
# * a category
|
8
|
-
# * the weight of the token in the category (cached)
|
8
|
+
# * the weight of the token in the category (cached from earlier)
|
9
9
|
#
|
10
10
|
# An Allocation consists of an ordered number of Combinations.
|
11
11
|
#
|
@@ -16,6 +16,7 @@ module Picky
|
|
16
16
|
|
17
17
|
forward :empty?,
|
18
18
|
:inject,
|
19
|
+
:map,
|
19
20
|
:to => :@combinations
|
20
21
|
|
21
22
|
def initialize combinations = []
|
@@ -29,9 +30,6 @@ module Picky
|
|
29
30
|
def score
|
30
31
|
@combinations.inject(0) { |total, combination| total + combination.weight }
|
31
32
|
end
|
32
|
-
def boost_for weights
|
33
|
-
weights.boost_for @combinations
|
34
|
-
end
|
35
33
|
|
36
34
|
# Filters the tokens and categories such that categories
|
37
35
|
# that are passed in, are removed.
|
data/lib/picky/query/indexes.rb
CHANGED
@@ -183,7 +183,7 @@ module Picky
|
|
183
183
|
# If an element has size 0, this means one of the
|
184
184
|
# tokens could not be allocated.
|
185
185
|
#
|
186
|
-
return [] if possible_combinations.any?
|
186
|
+
return [] if possible_combinations.any? { |possible_combination| possible_combination.empty? }
|
187
187
|
|
188
188
|
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
189
189
|
#
|
data/lib/picky/query/token.rb
CHANGED
@@ -41,8 +41,8 @@ module Picky
|
|
41
41
|
end
|
42
42
|
def process
|
43
43
|
qualify
|
44
|
-
partialize
|
45
44
|
similarize
|
45
|
+
partialize
|
46
46
|
rangify
|
47
47
|
remove_illegals
|
48
48
|
self
|
@@ -64,8 +64,8 @@ module Picky
|
|
64
64
|
#
|
65
65
|
# TODO Do we really need to set the predefined categories on the token?
|
66
66
|
#
|
67
|
-
def predefined_categories mapper
|
68
|
-
@predefined_categories || extract_predefined(mapper)
|
67
|
+
def predefined_categories mapper = nil
|
68
|
+
@predefined_categories || mapper && extract_predefined(mapper)
|
69
69
|
end
|
70
70
|
def extract_predefined mapper
|
71
71
|
user_qualified = categorize_with mapper, @qualifiers
|
@@ -76,6 +76,12 @@ module Picky
|
|
76
76
|
mapper.map qualifier
|
77
77
|
end.compact
|
78
78
|
end
|
79
|
+
|
80
|
+
# Selects the bundle to be used.
|
81
|
+
#
|
82
|
+
def select_bundle exact, partial
|
83
|
+
@partial ? partial : exact
|
84
|
+
end
|
79
85
|
|
80
86
|
# Partial is a conditional setter.
|
81
87
|
#
|
@@ -90,8 +96,11 @@ module Picky
|
|
90
96
|
#
|
91
97
|
# It can't be similar and partial at the same time.
|
92
98
|
#
|
99
|
+
# Note: @partial is calculated at processing time (see Token#process).
|
100
|
+
#
|
93
101
|
def partial?
|
94
|
-
!@similar && @partial
|
102
|
+
# Was: !@similar && @partial
|
103
|
+
@partial
|
95
104
|
end
|
96
105
|
|
97
106
|
# If the text ends with *, partialize it. If with ",
|
@@ -106,15 +115,21 @@ module Picky
|
|
106
115
|
@@no_partial = /\"\z/
|
107
116
|
@@partial = /\*\z/
|
108
117
|
def partialize
|
109
|
-
|
110
|
-
|
118
|
+
# A token is partial? only if it not similar
|
119
|
+
# and is partial.
|
120
|
+
#
|
121
|
+
# It can't be similar and partial at the same time.
|
122
|
+
#
|
123
|
+
self.partial = false or return if @similar
|
124
|
+
self.partial = false or return if @text =~ @@no_partial
|
125
|
+
self.partial = true if @text =~ @@partial # TODO Move this one line up since it occurs more often?
|
111
126
|
end
|
112
127
|
# Define a character which stops a token from
|
113
128
|
# being a partial token, even if it is the last token.
|
114
129
|
#
|
115
130
|
# Default is '"'.
|
116
131
|
#
|
117
|
-
# This is used in a regexp (%r{#{char}\z}) for String
|
132
|
+
# This is used in a regexp (%r{#{char}\z}) for String#=~,
|
118
133
|
# so escape the character.
|
119
134
|
#
|
120
135
|
# Example:
|
@@ -124,12 +139,13 @@ module Picky
|
|
124
139
|
def self.no_partial_character= character
|
125
140
|
@@no_partial_character = character
|
126
141
|
@@no_partial = %r{#{character}\z}
|
142
|
+
redefine_illegals
|
127
143
|
end
|
128
144
|
# Define a character which makes a token a partial token.
|
129
145
|
#
|
130
146
|
# Default is '*'.
|
131
147
|
#
|
132
|
-
# This is used in a regexp (%r{#{char}\z}) for String
|
148
|
+
# This is used in a regexp (%r{#{char}\z}) for String#=~,
|
133
149
|
# so escape the character.
|
134
150
|
#
|
135
151
|
# Example:
|
@@ -151,15 +167,15 @@ module Picky
|
|
151
167
|
@@no_similar = %r{#@@no_similar_character\z}
|
152
168
|
@@similar = %r{#@@similar_character\z}
|
153
169
|
def similarize
|
154
|
-
self.similar = false or return
|
155
|
-
self.similar = true
|
170
|
+
self.similar = false or return if @text =~ @@no_similar
|
171
|
+
self.similar = true if @text =~ @@similar
|
156
172
|
end
|
157
173
|
# Define a character which stops a token from
|
158
174
|
# being a similar token, even if it is the last token.
|
159
175
|
#
|
160
176
|
# Default is '"'.
|
161
177
|
#
|
162
|
-
# This is used in a regexp (%r{#{char}\z}) for String
|
178
|
+
# This is used in a regexp (%r{#{char}\z}) for String#=~,
|
163
179
|
# so escape the character.
|
164
180
|
#
|
165
181
|
# Example:
|
@@ -169,12 +185,13 @@ module Picky
|
|
169
185
|
def self.no_similar_character= character
|
170
186
|
@@no_similar_character = character
|
171
187
|
@@no_similar = %r{#{character}\z}
|
188
|
+
redefine_illegals
|
172
189
|
end
|
173
190
|
# Define a character which makes a token a similar token.
|
174
191
|
#
|
175
192
|
# Default is '~'.
|
176
193
|
#
|
177
|
-
# This is used in a regexp (%r{#{char}\z}) for String
|
194
|
+
# This is used in a regexp (%r{#{char}\z}) for String#=~,
|
178
195
|
# so escape the character.
|
179
196
|
#
|
180
197
|
# Example:
|
@@ -200,9 +217,7 @@ module Picky
|
|
200
217
|
@@range_character = character
|
201
218
|
end
|
202
219
|
def rangify
|
203
|
-
if @text.include? @@range_character
|
204
|
-
@range = @text.split(@@range_character, 2)
|
205
|
-
end
|
220
|
+
@range = @text.split(@@range_character, 2) if @text.include? @@range_character
|
206
221
|
end
|
207
222
|
def range
|
208
223
|
@range
|
@@ -222,7 +237,7 @@ module Picky
|
|
222
237
|
@text.gsub! @@illegals, EMPTY_STRING unless @text == EMPTY_STRING
|
223
238
|
end
|
224
239
|
def self.redefine_illegals
|
225
|
-
@@illegals = %r{[#@@no_similar_character#@@partial_character
|
240
|
+
@@illegals = %r{[#@@no_similar_character#@@similar_character#@@no_partial_character#@@partial_character]}
|
226
241
|
end
|
227
242
|
redefine_illegals
|
228
243
|
|
@@ -236,7 +251,9 @@ module Picky
|
|
236
251
|
similar? ? categories.similar_possible_for(self) : categories.possible_for(self)
|
237
252
|
end
|
238
253
|
|
239
|
-
#
|
254
|
+
# If the Token has weight for the given category,
|
255
|
+
# it will return a new combination for the tuple
|
256
|
+
# (self, category, weight).
|
240
257
|
#
|
241
258
|
def combination_for category
|
242
259
|
weight = category.weight self
|
@@ -259,12 +276,16 @@ module Picky
|
|
259
276
|
@@qualifier_text_delimiter = ':'
|
260
277
|
@@qualifiers_delimiter = ','
|
261
278
|
def qualify
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
@text = @
|
267
|
-
@
|
279
|
+
# TODO Is this actually an optimization?
|
280
|
+
# Check using include? + split, and split alone.
|
281
|
+
#
|
282
|
+
if @text.include? @@qualifier_text_delimiter
|
283
|
+
@qualifiers, @text = @text.split @@qualifier_text_delimiter, 2
|
284
|
+
if @text
|
285
|
+
@qualifiers = @qualifiers.split @@qualifiers_delimiter
|
286
|
+
else
|
287
|
+
@text, @qualifiers = @qualifiers, nil
|
288
|
+
end
|
268
289
|
end
|
269
290
|
end
|
270
291
|
# Define a character which separates the qualifier
|
data/lib/picky/search.rb
CHANGED
@@ -265,6 +265,8 @@ module Picky
|
|
265
265
|
|
266
266
|
# Gets sorted allocations for the tokens.
|
267
267
|
#
|
268
|
+
# TODO Remove and just call prepared (and rename to sorted)?
|
269
|
+
#
|
268
270
|
def sorted_allocations tokens, amount = nil
|
269
271
|
indexes.prepared_allocations_for tokens, boosts, amount
|
270
272
|
end
|
data/lib/picky/search_facets.rb
CHANGED
@@ -36,7 +36,9 @@ module Picky
|
|
36
36
|
# Pre-tokenize key token – replace text below.
|
37
37
|
# Note: The original is not important.
|
38
38
|
#
|
39
|
-
# TODO Don't use predefined.
|
39
|
+
# TODO Don't use predefined. Perhaps do:
|
40
|
+
# key_token = Query::Token.new ''
|
41
|
+
# key_token.predefined_categories = [index[category_identifier]]
|
40
42
|
#
|
41
43
|
key_token = Query::Token.new '', nil, predefined_categories
|
42
44
|
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -212,7 +212,7 @@ A short overview:
|
|
212
212
|
stopwords /regexp/
|
213
213
|
splits_text_on /regexp/ or "String", default /\s/
|
214
214
|
normalizes_words [[/replace (this)/, 'with this \\1'], ...]
|
215
|
-
rejects_token_if Proc/lambda, default :
|
215
|
+
rejects_token_if Proc/lambda, default :empty?.to_proc
|
216
216
|
substitutes_characters_with Picky::CharacterSubstituter or responds to #substitute(String)
|
217
217
|
stems_with Instance responds to #stem(String)
|
218
218
|
case_sensitive true/false
|
@@ -222,7 +222,7 @@ ERROR
|
|
222
222
|
def default_options
|
223
223
|
{
|
224
224
|
splits_text_on: /\s/,
|
225
|
-
rejects_token_if: :
|
225
|
+
rejects_token_if: :empty?.to_proc
|
226
226
|
}
|
227
227
|
end
|
228
228
|
|
@@ -234,7 +234,7 @@ ERROR
|
|
234
234
|
#
|
235
235
|
def tokenize text
|
236
236
|
text = preprocess text.to_s # processing the text
|
237
|
-
return empty_tokens if text.blank?
|
237
|
+
return empty_tokens if text.empty? # TODO blank?
|
238
238
|
words = pretokenize text # splitting and preparations for tokenizing
|
239
239
|
return empty_tokens if words.empty?
|
240
240
|
tokens = tokens_for words # creating tokens / strings
|
@@ -7,30 +7,35 @@ require 'spec_helper'
|
|
7
7
|
#
|
8
8
|
describe 'uniqueness of allocations' do
|
9
9
|
|
10
|
-
|
11
|
-
index
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
context 'is already uniq' do
|
11
|
+
let(:index) do
|
12
|
+
index = Picky::Index.new :already_uniq do
|
13
|
+
category :category1
|
14
|
+
category :category2
|
15
|
+
category :category3
|
16
|
+
end
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
thing = Struct.new(:id, :category1, :category2, :category3)
|
19
|
+
index.add thing.new(1, 'text1', 'text2', 'text3')
|
20
|
+
|
21
|
+
index
|
22
|
+
end
|
23
|
+
let(:try) do
|
24
|
+
Picky::Search.new index do
|
25
|
+
max_allocations 100
|
26
|
+
end
|
22
27
|
end
|
23
28
|
|
24
29
|
# Picky finds three categories.
|
25
30
|
#
|
26
|
-
try.search('text*').ids.should == [1,1,1]
|
31
|
+
it { try.search('text*').ids.should == [1,1,1] }
|
27
32
|
|
28
33
|
# Picky finds 9 possible allocations.
|
29
34
|
#
|
30
|
-
try.search('text* text*').ids.should == [1,1,1]*3
|
35
|
+
it { try.search('text* text*').ids.should == [1,1,1]*3 }
|
31
36
|
|
32
37
|
# Picky finds 27 possible allocations.
|
33
38
|
#
|
34
|
-
try.search('text* text* text*', 100).ids.should == [1,1,1]*3*3
|
39
|
+
it { try.search('text* text* text*', 100).ids.should == [1,1,1]*3*3 }
|
35
40
|
end
|
36
41
|
end
|
@@ -1,17 +1,17 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
require 'spec_helper'
|
4
|
+
require 'ostruct'
|
4
5
|
|
5
6
|
describe "Array IDs" do
|
7
|
+
|
8
|
+
let(:index) { Picky::Index.new :arrays }
|
9
|
+
let(:try) { Picky::Search.new index }
|
6
10
|
|
7
11
|
# This tests the weights option.
|
8
12
|
#
|
9
13
|
it 'can use Arrays as IDs' do
|
10
|
-
index
|
11
|
-
category :text1
|
12
|
-
end
|
13
|
-
|
14
|
-
require 'ostruct'
|
14
|
+
index.category :text1
|
15
15
|
|
16
16
|
thing = OpenStruct.new id: ['id1', 'thing1'], text1: "ohai"
|
17
17
|
other = OpenStruct.new id: ['id2', 'thing2'], text1: "ohai kthxbye"
|
@@ -19,8 +19,6 @@ describe "Array IDs" do
|
|
19
19
|
index.add thing
|
20
20
|
index.add other
|
21
21
|
|
22
|
-
try = Picky::Search.new index
|
23
|
-
|
24
22
|
try.search("text1:ohai").ids.should == [
|
25
23
|
["id2", "thing2"],
|
26
24
|
["id1", "thing1"]
|
@@ -30,22 +28,15 @@ describe "Array IDs" do
|
|
30
28
|
# This tests the weights option.
|
31
29
|
#
|
32
30
|
it 'can use split as key_format' do
|
33
|
-
index
|
34
|
-
|
35
|
-
|
36
|
-
category :text1
|
37
|
-
end
|
38
|
-
|
39
|
-
require 'ostruct'
|
40
|
-
|
31
|
+
index.key_format :split
|
32
|
+
index.category :text1
|
33
|
+
|
41
34
|
thing = OpenStruct.new id: "id1 thing1", text1: "ohai"
|
42
35
|
other = OpenStruct.new id: "id2 thing2", text1: "ohai kthxbye"
|
43
36
|
|
44
37
|
index.add thing
|
45
38
|
index.add other
|
46
39
|
|
47
|
-
try = Picky::Search.new index
|
48
|
-
|
49
40
|
try.search("text1:ohai").ids.should == [
|
50
41
|
["id2", "thing2"],
|
51
42
|
["id1", "thing1"]
|