picky 4.17.1 → 4.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +6 -14
- data/lib/picky/backends/backend.rb +9 -15
- data/lib/picky/backends/prepared/text.rb +1 -1
- data/lib/picky/backends/sqlite/basic.rb +3 -10
- data/lib/picky/bundle.rb +2 -2
- data/lib/picky/category.rb +10 -5
- data/lib/picky/category_indexed.rb +2 -1
- data/lib/picky/category_realtime.rb +10 -3
- data/lib/picky/helpers/measuring.rb +4 -6
- data/lib/picky/index_indexing.rb +0 -2
- data/lib/picky/indexes.rb +2 -2
- data/lib/picky/query/allocation.rb +3 -2
- data/lib/picky/query/allocations.rb +2 -2
- data/lib/picky/query/boosts.rb +2 -0
- data/lib/picky/query/combination.rb +1 -1
- data/lib/picky/query/combinations.rb +1 -3
- data/lib/picky/query/indexes.rb +1 -1
- data/lib/picky/query/token.rb +44 -23
- data/lib/picky/search.rb +2 -0
- data/lib/picky/search_facets.rb +3 -1
- data/lib/picky/tokenizer.rb +3 -3
- data/spec/functional/allocations_uniq_by_definition_spec.rb +19 -14
- data/spec/functional/arrays_as_ids_spec.rb +8 -17
- data/spec/functional/automatic_segmentation_spec.rb +40 -37
- data/spec/functional/custom_delimiters_spec.rb +30 -20
- data/spec/functional/no_tokenize_spec.rb +2 -2
- data/spec/functional/or_spec.rb +74 -75
- data/spec/functional/pool_spec.rb +54 -53
- data/spec/functional/realtime_spec.rb +1 -1
- data/spec/lib/backends/backend_spec.rb +9 -9
- data/spec/lib/backends/sqlite/array_spec.rb +2 -8
- data/spec/lib/backends/sqlite/value_spec.rb +2 -2
- data/spec/lib/category_indexed_spec.rb +0 -12
- data/spec/lib/query/allocation_spec.rb +3 -3
- data/spec/lib/query/combinations_spec.rb +0 -17
- data/spec/lib/query/token_spec.rb +9 -4
- data/spec/lib/tokenizer_spec.rb +3 -3
- metadata +20 -21
checksums.yaml
CHANGED
@@ -1,15 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
metadata.gz: !binary |-
|
9
|
-
ZmE5NDZlZjVhN2I2OGM2YzUyMmM0ZWFjMGU4Mjc5NzA0MmEwMmU5MTg1ZDg0
|
10
|
-
ZWJiYzhiZTc4MDgzMjY3MGZlM2U3N2QxOTMyZWY2NzQzN2UwZmVkYWRjZWVl
|
11
|
-
YzdkNGFjYjYwZTM5ODdlOGMzZGY4MjljNjk5ZDYzZmU3MzcwMGE=
|
12
|
-
data.tar.gz: !binary |-
|
13
|
-
ZDY1OThhODliOWI4M2VmOTM3MWYzZTVmNjlmMDM4NzhiNmZkY2UzOTdkYTJl
|
14
|
-
MTA0N2U0MjVhYTU4YTYxZDI2ZDBiYjMxNGYzODFkNGUyM2VjMGJiZTI5NDZl
|
15
|
-
OGVlZWJlODhiNzY5YmJjODk3NjMzMTQ5NjBkYWM5MTk0ZmYyOTI=
|
2
|
+
SHA1:
|
3
|
+
metadata.gz: 5771adfbe24b649d5377b9d1233783ff5edb0c1e
|
4
|
+
data.tar.gz: 140aca46019f8b09496b508c9842a3ac84f2a933
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 07d12c4437ba880e486d73626873f1e1fc16e26512a37d564a172cb23bf517ad2c1503d841d0bb3dcd8ef9653a2f776979a2467761a01c412df2acf5a6837547
|
7
|
+
data.tar.gz: bf488e2fac14e6d1f4a26a354a8579e5931b18779c3e115e92062008bc1535f0e6ebdcbb21bfa60758be8df5557c64779a420957ef4c3b106ace7afe12b6a384
|
@@ -48,13 +48,13 @@ module Picky
|
|
48
48
|
json bundle.index_path(:realtime)
|
49
49
|
end
|
50
50
|
|
51
|
-
# Returns the total score of the combinations.
|
52
|
-
#
|
53
|
-
# Default implementation. Override to speed up.
|
54
|
-
#
|
55
|
-
def
|
56
|
-
|
57
|
-
end
|
51
|
+
# # Returns the total score of the combinations.
|
52
|
+
# #
|
53
|
+
# # Default implementation. Override to speed up.
|
54
|
+
# #
|
55
|
+
# def score combinations
|
56
|
+
# combinations.score
|
57
|
+
# end
|
58
58
|
|
59
59
|
# Returns the result ids for the allocation.
|
60
60
|
#
|
@@ -72,17 +72,11 @@ module Picky
|
|
72
72
|
# unfortunately.
|
73
73
|
#
|
74
74
|
def ids combinations, _, _
|
75
|
-
# Get the ids for each combination.
|
76
|
-
#
|
77
|
-
id_arrays = combinations.inject([]) do |total, combination|
|
78
|
-
total << combination.ids
|
79
|
-
end
|
80
|
-
|
81
|
-
# Call the optimized C algorithm.
|
75
|
+
# Get the ids for each combination and pass to the optimized C algorithm.
|
82
76
|
#
|
83
77
|
# Note: It orders the passed arrays by size.
|
84
78
|
#
|
85
|
-
Performant::Array.memory_efficient_intersect
|
79
|
+
Performant::Array.memory_efficient_intersect combinations.map { |combination| combination.ids }
|
86
80
|
end
|
87
81
|
|
88
82
|
#
|
@@ -15,8 +15,6 @@ module Picky
|
|
15
15
|
@empty = options[:empty]
|
16
16
|
@initial = options[:initial]
|
17
17
|
@realtime = options[:realtime]
|
18
|
-
|
19
|
-
lazily_initialize_client
|
20
18
|
|
21
19
|
# Note: If on OSX, too many files get opened during
|
22
20
|
# the specs -> ulimit -n 3000
|
@@ -46,9 +44,10 @@ module Picky
|
|
46
44
|
db.execute 'delete from key_value'
|
47
45
|
end
|
48
46
|
|
49
|
-
#
|
47
|
+
# Lazily creates SQLite client.
|
48
|
+
# Note: Perhaps it would be advisable to create only one, when initialising.
|
50
49
|
#
|
51
|
-
def
|
50
|
+
def db
|
52
51
|
@db ||= (create_directory cache_path; SQLite3::Database.new cache_path)
|
53
52
|
end
|
54
53
|
|
@@ -67,13 +66,7 @@ module Picky
|
|
67
66
|
end
|
68
67
|
|
69
68
|
def reset
|
70
|
-
# TODO Still necessary?
|
71
|
-
#
|
72
|
-
create_directory cache_path
|
73
|
-
lazily_initialize_client
|
74
|
-
|
75
69
|
truncate_db
|
76
|
-
|
77
70
|
self
|
78
71
|
end
|
79
72
|
|
data/lib/picky/bundle.rb
CHANGED
@@ -145,10 +145,10 @@ module Picky
|
|
145
145
|
|
146
146
|
# If a key format is set, use it, else forward to the category.
|
147
147
|
#
|
148
|
-
# TODO
|
148
|
+
# TODO Remove optimisation? @category.key_format
|
149
149
|
#
|
150
150
|
def key_format
|
151
|
-
@category.key_format
|
151
|
+
@key_format ||= @category.key_format
|
152
152
|
end
|
153
153
|
|
154
154
|
# Path and partial filename of a specific subindex.
|
data/lib/picky/category.rb
CHANGED
@@ -7,7 +7,6 @@ module Picky
|
|
7
7
|
attr_accessor :exact,
|
8
8
|
:partial
|
9
9
|
attr_reader :name,
|
10
|
-
:prepared,
|
11
10
|
:backend
|
12
11
|
attr_writer :source
|
13
12
|
|
@@ -71,22 +70,22 @@ module Picky
|
|
71
70
|
|
72
71
|
@exact = exact_for weights, similarity, options
|
73
72
|
@partial = partial_for @exact, partial, weights, options
|
74
|
-
|
75
|
-
@prepared = Backends::Prepared::Text.new prepared_index_path
|
76
73
|
end
|
77
74
|
# Since the options hash might contain options that do not exist,
|
78
75
|
# we should warn people if they use the wrong options.
|
79
76
|
# (Problem is that if the option is not found, then Picky will use the default)
|
80
77
|
#
|
81
|
-
# TODO Rewrite it such that this does not need to be maintained separately.
|
78
|
+
# TODO Rewrite it such that this does not need to be maintained separately (and gets available options automatically).
|
82
79
|
#
|
83
80
|
@@known_keys = [:indexing, :partial, :qualifier, :qualifiers, :ranging, :similarity, :source, :tokenize, :tokenizer, :weight]
|
84
81
|
def warn_if_unknown options
|
85
|
-
|
82
|
+
if options && (options.keys - @@known_keys).size > 0
|
83
|
+
warn <<-WARNING
|
86
84
|
|
87
85
|
Warning: Category options #{options} for category #{name} contain an unknown option.
|
88
86
|
Working options are: #@@known_keys.
|
89
87
|
WARNING
|
88
|
+
end
|
90
89
|
end
|
91
90
|
def weights_from options
|
92
91
|
Generators::Weights.from options[:weight], index_name, name
|
@@ -112,6 +111,12 @@ WARNING
|
|
112
111
|
end
|
113
112
|
end
|
114
113
|
|
114
|
+
# Lazily create a prepared index proxy.
|
115
|
+
#
|
116
|
+
def prepared
|
117
|
+
@prepared ||= Backends::Prepared::Text.new prepared_index_path
|
118
|
+
end
|
119
|
+
|
115
120
|
# Indexes and loads the category.
|
116
121
|
#
|
117
122
|
def reindex
|
@@ -74,9 +74,16 @@ module Picky
|
|
74
74
|
|
75
75
|
format = self.key_format?
|
76
76
|
tokens.each { |text| add_tokenized_token id, text, where, format }
|
77
|
-
rescue NoMethodError
|
78
|
-
|
79
|
-
|
77
|
+
rescue NoMethodError => e
|
78
|
+
show_informative_add_text_error_message_for e
|
79
|
+
end
|
80
|
+
|
81
|
+
def show_informative_add_text_error_message_for e
|
82
|
+
if e.name == :each
|
83
|
+
raise %Q{#{e.message}. You probably set tokenize: false on category "#{name}". It will need an Enumerator of previously tokenized tokens.}
|
84
|
+
else
|
85
|
+
raise e
|
86
|
+
end
|
80
87
|
end
|
81
88
|
|
82
89
|
#
|
@@ -7,14 +7,12 @@ module Picky
|
|
7
7
|
|
8
8
|
# Returns a duration in seconds.
|
9
9
|
#
|
10
|
-
def timed
|
11
|
-
|
10
|
+
def timed
|
11
|
+
time_begin = Time.new
|
12
12
|
|
13
|
-
|
13
|
+
yield
|
14
14
|
|
15
|
-
|
16
|
-
|
17
|
-
Time.now.to_f - time_begin
|
15
|
+
(Time.new - time_begin).to_f
|
18
16
|
end
|
19
17
|
|
20
18
|
end
|
data/lib/picky/index_indexing.rb
CHANGED
data/lib/picky/indexes.rb
CHANGED
@@ -11,7 +11,7 @@ module Picky
|
|
11
11
|
attr_reader :indexes,
|
12
12
|
:index_mapping
|
13
13
|
|
14
|
-
forward :size, :each, :to => :indexes
|
14
|
+
forward :size, :each, :map, :to => :indexes
|
15
15
|
each_forward :reindex, :to => :indexes
|
16
16
|
instance_forward :clear,
|
17
17
|
:clear_indexes,
|
@@ -47,7 +47,7 @@ module Picky
|
|
47
47
|
# Registers an index with the indexes.
|
48
48
|
#
|
49
49
|
def register index
|
50
|
-
# TODO
|
50
|
+
# TODO Do not store duplicate indexes.
|
51
51
|
#
|
52
52
|
# self.indexes.delete_if { |existing| existing.name == index.name }
|
53
53
|
self.indexes << index
|
@@ -40,8 +40,9 @@ module Picky
|
|
40
40
|
@score ||= if @combinations.empty?
|
41
41
|
0 # Optimization.
|
42
42
|
else
|
43
|
-
@backend.
|
44
|
-
|
43
|
+
# TODO Was @backend.score(@combinations) - indirection for maximum flexibility.
|
44
|
+
@combinations.score + weights.boost_for(@combinations)
|
45
|
+
end
|
45
46
|
end
|
46
47
|
|
47
48
|
# Asks the backend for the (intersected) ids.
|
@@ -60,7 +60,7 @@ module Picky
|
|
60
60
|
#
|
61
61
|
# TODO Rewrite, speed up.
|
62
62
|
#
|
63
|
-
def remove_allocations qualifiers_array
|
63
|
+
def remove_allocations qualifiers_array
|
64
64
|
return if qualifiers_array.empty?
|
65
65
|
@allocations.select! do |allocation|
|
66
66
|
allocation_qualifiers = allocation.combinations.to_qualifiers.clustered_uniq
|
@@ -77,7 +77,7 @@ module Picky
|
|
77
77
|
#
|
78
78
|
# TODO Rewrite, speed up.
|
79
79
|
#
|
80
|
-
def keep_allocations qualifiers_array
|
80
|
+
def keep_allocations qualifiers_array
|
81
81
|
return if qualifiers_array.empty?
|
82
82
|
@allocations.select! do |allocation|
|
83
83
|
allocation_qualifiers = allocation.combinations.to_qualifiers.clustered_uniq
|
data/lib/picky/query/boosts.rb
CHANGED
@@ -59,6 +59,8 @@ module Picky
|
|
59
59
|
# Note: Cache this if more complicated weighings become necessary.
|
60
60
|
# Note: Maybe make combinations comparable to Symbols?
|
61
61
|
#
|
62
|
+
# TODO Push into categories? Store boosts in categories?
|
63
|
+
#
|
62
64
|
def boost_for combinations
|
63
65
|
boost_for_categories combinations.map { |combination| combination.category_name }
|
64
66
|
end
|
@@ -5,7 +5,7 @@ module Picky
|
|
5
5
|
# Describes the Combination of:
|
6
6
|
# * a token
|
7
7
|
# * a category
|
8
|
-
# * the weight of the token in the category (cached)
|
8
|
+
# * the weight of the token in the category (cached from earlier)
|
9
9
|
#
|
10
10
|
# An Allocation consists of an ordered number of Combinations.
|
11
11
|
#
|
@@ -16,6 +16,7 @@ module Picky
|
|
16
16
|
|
17
17
|
forward :empty?,
|
18
18
|
:inject,
|
19
|
+
:map,
|
19
20
|
:to => :@combinations
|
20
21
|
|
21
22
|
def initialize combinations = []
|
@@ -29,9 +30,6 @@ module Picky
|
|
29
30
|
def score
|
30
31
|
@combinations.inject(0) { |total, combination| total + combination.weight }
|
31
32
|
end
|
32
|
-
def boost_for weights
|
33
|
-
weights.boost_for @combinations
|
34
|
-
end
|
35
33
|
|
36
34
|
# Filters the tokens and categories such that categories
|
37
35
|
# that are passed in, are removed.
|
data/lib/picky/query/indexes.rb
CHANGED
@@ -183,7 +183,7 @@ module Picky
|
|
183
183
|
# If an element has size 0, this means one of the
|
184
184
|
# tokens could not be allocated.
|
185
185
|
#
|
186
|
-
return [] if possible_combinations.any?
|
186
|
+
return [] if possible_combinations.any? { |possible_combination| possible_combination.empty? }
|
187
187
|
|
188
188
|
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
189
189
|
#
|
data/lib/picky/query/token.rb
CHANGED
@@ -41,8 +41,8 @@ module Picky
|
|
41
41
|
end
|
42
42
|
def process
|
43
43
|
qualify
|
44
|
-
partialize
|
45
44
|
similarize
|
45
|
+
partialize
|
46
46
|
rangify
|
47
47
|
remove_illegals
|
48
48
|
self
|
@@ -64,8 +64,8 @@ module Picky
|
|
64
64
|
#
|
65
65
|
# TODO Do we really need to set the predefined categories on the token?
|
66
66
|
#
|
67
|
-
def predefined_categories mapper
|
68
|
-
@predefined_categories || extract_predefined(mapper)
|
67
|
+
def predefined_categories mapper = nil
|
68
|
+
@predefined_categories || mapper && extract_predefined(mapper)
|
69
69
|
end
|
70
70
|
def extract_predefined mapper
|
71
71
|
user_qualified = categorize_with mapper, @qualifiers
|
@@ -76,6 +76,12 @@ module Picky
|
|
76
76
|
mapper.map qualifier
|
77
77
|
end.compact
|
78
78
|
end
|
79
|
+
|
80
|
+
# Selects the bundle to be used.
|
81
|
+
#
|
82
|
+
def select_bundle exact, partial
|
83
|
+
@partial ? partial : exact
|
84
|
+
end
|
79
85
|
|
80
86
|
# Partial is a conditional setter.
|
81
87
|
#
|
@@ -90,8 +96,11 @@ module Picky
|
|
90
96
|
#
|
91
97
|
# It can't be similar and partial at the same time.
|
92
98
|
#
|
99
|
+
# Note: @partial is calculated at processing time (see Token#process).
|
100
|
+
#
|
93
101
|
def partial?
|
94
|
-
!@similar && @partial
|
102
|
+
# Was: !@similar && @partial
|
103
|
+
@partial
|
95
104
|
end
|
96
105
|
|
97
106
|
# If the text ends with *, partialize it. If with ",
|
@@ -106,15 +115,21 @@ module Picky
|
|
106
115
|
@@no_partial = /\"\z/
|
107
116
|
@@partial = /\*\z/
|
108
117
|
def partialize
|
109
|
-
|
110
|
-
|
118
|
+
# A token is partial? only if it not similar
|
119
|
+
# and is partial.
|
120
|
+
#
|
121
|
+
# It can't be similar and partial at the same time.
|
122
|
+
#
|
123
|
+
self.partial = false or return if @similar
|
124
|
+
self.partial = false or return if @text =~ @@no_partial
|
125
|
+
self.partial = true if @text =~ @@partial # TODO Move this one line up since it occurs more often?
|
111
126
|
end
|
112
127
|
# Define a character which stops a token from
|
113
128
|
# being a partial token, even if it is the last token.
|
114
129
|
#
|
115
130
|
# Default is '"'.
|
116
131
|
#
|
117
|
-
# This is used in a regexp (%r{#{char}\z}) for String
|
132
|
+
# This is used in a regexp (%r{#{char}\z}) for String#=~,
|
118
133
|
# so escape the character.
|
119
134
|
#
|
120
135
|
# Example:
|
@@ -124,12 +139,13 @@ module Picky
|
|
124
139
|
def self.no_partial_character= character
|
125
140
|
@@no_partial_character = character
|
126
141
|
@@no_partial = %r{#{character}\z}
|
142
|
+
redefine_illegals
|
127
143
|
end
|
128
144
|
# Define a character which makes a token a partial token.
|
129
145
|
#
|
130
146
|
# Default is '*'.
|
131
147
|
#
|
132
|
-
# This is used in a regexp (%r{#{char}\z}) for String
|
148
|
+
# This is used in a regexp (%r{#{char}\z}) for String#=~,
|
133
149
|
# so escape the character.
|
134
150
|
#
|
135
151
|
# Example:
|
@@ -151,15 +167,15 @@ module Picky
|
|
151
167
|
@@no_similar = %r{#@@no_similar_character\z}
|
152
168
|
@@similar = %r{#@@similar_character\z}
|
153
169
|
def similarize
|
154
|
-
self.similar = false or return
|
155
|
-
self.similar = true
|
170
|
+
self.similar = false or return if @text =~ @@no_similar
|
171
|
+
self.similar = true if @text =~ @@similar
|
156
172
|
end
|
157
173
|
# Define a character which stops a token from
|
158
174
|
# being a similar token, even if it is the last token.
|
159
175
|
#
|
160
176
|
# Default is '"'.
|
161
177
|
#
|
162
|
-
# This is used in a regexp (%r{#{char}\z}) for String
|
178
|
+
# This is used in a regexp (%r{#{char}\z}) for String#=~,
|
163
179
|
# so escape the character.
|
164
180
|
#
|
165
181
|
# Example:
|
@@ -169,12 +185,13 @@ module Picky
|
|
169
185
|
def self.no_similar_character= character
|
170
186
|
@@no_similar_character = character
|
171
187
|
@@no_similar = %r{#{character}\z}
|
188
|
+
redefine_illegals
|
172
189
|
end
|
173
190
|
# Define a character which makes a token a similar token.
|
174
191
|
#
|
175
192
|
# Default is '~'.
|
176
193
|
#
|
177
|
-
# This is used in a regexp (%r{#{char}\z}) for String
|
194
|
+
# This is used in a regexp (%r{#{char}\z}) for String#=~,
|
178
195
|
# so escape the character.
|
179
196
|
#
|
180
197
|
# Example:
|
@@ -200,9 +217,7 @@ module Picky
|
|
200
217
|
@@range_character = character
|
201
218
|
end
|
202
219
|
def rangify
|
203
|
-
if @text.include? @@range_character
|
204
|
-
@range = @text.split(@@range_character, 2)
|
205
|
-
end
|
220
|
+
@range = @text.split(@@range_character, 2) if @text.include? @@range_character
|
206
221
|
end
|
207
222
|
def range
|
208
223
|
@range
|
@@ -222,7 +237,7 @@ module Picky
|
|
222
237
|
@text.gsub! @@illegals, EMPTY_STRING unless @text == EMPTY_STRING
|
223
238
|
end
|
224
239
|
def self.redefine_illegals
|
225
|
-
@@illegals = %r{[#@@no_similar_character#@@partial_character
|
240
|
+
@@illegals = %r{[#@@no_similar_character#@@similar_character#@@no_partial_character#@@partial_character]}
|
226
241
|
end
|
227
242
|
redefine_illegals
|
228
243
|
|
@@ -236,7 +251,9 @@ module Picky
|
|
236
251
|
similar? ? categories.similar_possible_for(self) : categories.possible_for(self)
|
237
252
|
end
|
238
253
|
|
239
|
-
#
|
254
|
+
# If the Token has weight for the given category,
|
255
|
+
# it will return a new combination for the tuple
|
256
|
+
# (self, category, weight).
|
240
257
|
#
|
241
258
|
def combination_for category
|
242
259
|
weight = category.weight self
|
@@ -259,12 +276,16 @@ module Picky
|
|
259
276
|
@@qualifier_text_delimiter = ':'
|
260
277
|
@@qualifiers_delimiter = ','
|
261
278
|
def qualify
|
262
|
-
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
@text = @
|
267
|
-
@
|
279
|
+
# TODO Is this actually an optimization?
|
280
|
+
# Check using include? + split, and split alone.
|
281
|
+
#
|
282
|
+
if @text.include? @@qualifier_text_delimiter
|
283
|
+
@qualifiers, @text = @text.split @@qualifier_text_delimiter, 2
|
284
|
+
if @text
|
285
|
+
@qualifiers = @qualifiers.split @@qualifiers_delimiter
|
286
|
+
else
|
287
|
+
@text, @qualifiers = @qualifiers, nil
|
288
|
+
end
|
268
289
|
end
|
269
290
|
end
|
270
291
|
# Define a character which separates the qualifier
|
data/lib/picky/search.rb
CHANGED
@@ -265,6 +265,8 @@ module Picky
|
|
265
265
|
|
266
266
|
# Gets sorted allocations for the tokens.
|
267
267
|
#
|
268
|
+
# TODO Remove and just call prepared (and rename to sorted)?
|
269
|
+
#
|
268
270
|
def sorted_allocations tokens, amount = nil
|
269
271
|
indexes.prepared_allocations_for tokens, boosts, amount
|
270
272
|
end
|
data/lib/picky/search_facets.rb
CHANGED
@@ -36,7 +36,9 @@ module Picky
|
|
36
36
|
# Pre-tokenize key token – replace text below.
|
37
37
|
# Note: The original is not important.
|
38
38
|
#
|
39
|
-
# TODO Don't use predefined.
|
39
|
+
# TODO Don't use predefined. Perhaps do:
|
40
|
+
# key_token = Query::Token.new ''
|
41
|
+
# key_token.predefined_categories = [index[category_identifier]]
|
40
42
|
#
|
41
43
|
key_token = Query::Token.new '', nil, predefined_categories
|
42
44
|
|
data/lib/picky/tokenizer.rb
CHANGED
@@ -212,7 +212,7 @@ A short overview:
|
|
212
212
|
stopwords /regexp/
|
213
213
|
splits_text_on /regexp/ or "String", default /\s/
|
214
214
|
normalizes_words [[/replace (this)/, 'with this \\1'], ...]
|
215
|
-
rejects_token_if Proc/lambda, default :
|
215
|
+
rejects_token_if Proc/lambda, default :empty?.to_proc
|
216
216
|
substitutes_characters_with Picky::CharacterSubstituter or responds to #substitute(String)
|
217
217
|
stems_with Instance responds to #stem(String)
|
218
218
|
case_sensitive true/false
|
@@ -222,7 +222,7 @@ ERROR
|
|
222
222
|
def default_options
|
223
223
|
{
|
224
224
|
splits_text_on: /\s/,
|
225
|
-
rejects_token_if: :
|
225
|
+
rejects_token_if: :empty?.to_proc
|
226
226
|
}
|
227
227
|
end
|
228
228
|
|
@@ -234,7 +234,7 @@ ERROR
|
|
234
234
|
#
|
235
235
|
def tokenize text
|
236
236
|
text = preprocess text.to_s # processing the text
|
237
|
-
return empty_tokens if text.blank?
|
237
|
+
return empty_tokens if text.empty? # TODO blank?
|
238
238
|
words = pretokenize text # splitting and preparations for tokenizing
|
239
239
|
return empty_tokens if words.empty?
|
240
240
|
tokens = tokens_for words # creating tokens / strings
|
@@ -7,30 +7,35 @@ require 'spec_helper'
|
|
7
7
|
#
|
8
8
|
describe 'uniqueness of allocations' do
|
9
9
|
|
10
|
-
|
11
|
-
index
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
10
|
+
context 'is already uniq' do
|
11
|
+
let(:index) do
|
12
|
+
index = Picky::Index.new :already_uniq do
|
13
|
+
category :category1
|
14
|
+
category :category2
|
15
|
+
category :category3
|
16
|
+
end
|
16
17
|
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
18
|
+
thing = Struct.new(:id, :category1, :category2, :category3)
|
19
|
+
index.add thing.new(1, 'text1', 'text2', 'text3')
|
20
|
+
|
21
|
+
index
|
22
|
+
end
|
23
|
+
let(:try) do
|
24
|
+
Picky::Search.new index do
|
25
|
+
max_allocations 100
|
26
|
+
end
|
22
27
|
end
|
23
28
|
|
24
29
|
# Picky finds three categories.
|
25
30
|
#
|
26
|
-
try.search('text*').ids.should == [1,1,1]
|
31
|
+
it { try.search('text*').ids.should == [1,1,1] }
|
27
32
|
|
28
33
|
# Picky finds 9 possible allocations.
|
29
34
|
#
|
30
|
-
try.search('text* text*').ids.should == [1,1,1]*3
|
35
|
+
it { try.search('text* text*').ids.should == [1,1,1]*3 }
|
31
36
|
|
32
37
|
# Picky finds 27 possible allocations.
|
33
38
|
#
|
34
|
-
try.search('text* text* text*', 100).ids.should == [1,1,1]*3*3
|
39
|
+
it { try.search('text* text* text*', 100).ids.should == [1,1,1]*3*3 }
|
35
40
|
end
|
36
41
|
end
|
@@ -1,17 +1,17 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
3
|
require 'spec_helper'
|
4
|
+
require 'ostruct'
|
4
5
|
|
5
6
|
describe "Array IDs" do
|
7
|
+
|
8
|
+
let(:index) { Picky::Index.new :arrays }
|
9
|
+
let(:try) { Picky::Search.new index }
|
6
10
|
|
7
11
|
# This tests the weights option.
|
8
12
|
#
|
9
13
|
it 'can use Arrays as IDs' do
|
10
|
-
index
|
11
|
-
category :text1
|
12
|
-
end
|
13
|
-
|
14
|
-
require 'ostruct'
|
14
|
+
index.category :text1
|
15
15
|
|
16
16
|
thing = OpenStruct.new id: ['id1', 'thing1'], text1: "ohai"
|
17
17
|
other = OpenStruct.new id: ['id2', 'thing2'], text1: "ohai kthxbye"
|
@@ -19,8 +19,6 @@ describe "Array IDs" do
|
|
19
19
|
index.add thing
|
20
20
|
index.add other
|
21
21
|
|
22
|
-
try = Picky::Search.new index
|
23
|
-
|
24
22
|
try.search("text1:ohai").ids.should == [
|
25
23
|
["id2", "thing2"],
|
26
24
|
["id1", "thing1"]
|
@@ -30,22 +28,15 @@ describe "Array IDs" do
|
|
30
28
|
# This tests the weights option.
|
31
29
|
#
|
32
30
|
it 'can use split as key_format' do
|
33
|
-
index
|
34
|
-
|
35
|
-
|
36
|
-
category :text1
|
37
|
-
end
|
38
|
-
|
39
|
-
require 'ostruct'
|
40
|
-
|
31
|
+
index.key_format :split
|
32
|
+
index.category :text1
|
33
|
+
|
41
34
|
thing = OpenStruct.new id: "id1 thing1", text1: "ohai"
|
42
35
|
other = OpenStruct.new id: "id2 thing2", text1: "ohai kthxbye"
|
43
36
|
|
44
37
|
index.add thing
|
45
38
|
index.add other
|
46
39
|
|
47
|
-
try = Picky::Search.new index
|
48
|
-
|
49
40
|
try.search("text1:ohai").ids.should == [
|
50
41
|
["id2", "thing2"],
|
51
42
|
["id1", "thing1"]
|