picky 3.6.16 → 4.0.0pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/application.rb +1 -1
- data/lib/picky/backends/backend.rb +2 -0
- data/lib/picky/backends/memory.rb +14 -7
- data/lib/picky/backends/{memory → prepared}/text.rb +10 -4
- data/lib/picky/backends/redis/directly_manipulable.rb +3 -5
- data/lib/picky/backends/redis/list.rb +5 -1
- data/lib/picky/backends/sqlite/basic.rb +4 -2
- data/lib/picky/bundle.rb +6 -7
- data/lib/picky/bundle_indexed.rb +2 -2
- data/lib/picky/bundle_realtime.rb +8 -7
- data/lib/picky/categories.rb +0 -1
- data/lib/picky/categories_indexing.rb +14 -0
- data/lib/picky/category.rb +3 -5
- data/lib/picky/category_indexed.rb +2 -5
- data/lib/picky/category_indexing.rb +28 -16
- data/lib/picky/constants.rb +3 -1
- data/lib/picky/frontend_adapters/rack.rb +2 -2
- data/lib/picky/generators/similarity/phonetic.rb +6 -14
- data/lib/picky/generators/strategy.rb +1 -1
- data/lib/picky/generators/weights/runtime.rb +2 -2
- data/lib/picky/helpers/indexing.rb +20 -0
- data/lib/picky/index.rb +7 -10
- data/lib/picky/index_indexed.rb +1 -8
- data/lib/picky/index_indexing.rb +44 -42
- data/lib/picky/indexers/base.rb +5 -6
- data/lib/picky/indexers/parallel.rb +35 -32
- data/lib/picky/indexers/serial.rb +38 -15
- data/lib/picky/indexes_indexed.rb +0 -7
- data/lib/picky/indexes_indexing.rb +16 -19
- data/lib/picky/loader.rb +6 -4
- data/lib/picky/query/allocation.rb +7 -2
- data/lib/picky/query/combination.rb +1 -1
- data/lib/picky/query/indexes.rb +1 -1
- data/lib/picky/query/indexes_check.rb +12 -14
- data/lib/picky/query/token.rb +33 -15
- data/lib/picky/results/exact_first.rb +53 -0
- data/lib/picky/scheduler.rb +43 -0
- data/lib/picky/search.rb +0 -2
- data/lib/picky/sources/csv.rb +2 -3
- data/lib/picky/sources/db.rb +4 -3
- data/lib/picky/sources/mongo.rb +1 -1
- data/lib/picky/tokenizer.rb +0 -4
- data/lib/picky/wrappers/bundle/location.rb +1 -1
- data/lib/picky.rb +2 -2
- data/lib/tasks/index.rake +13 -14
- data/spec/functional/backends/file_spec.rb +2 -4
- data/spec/functional/backends/memory_spec.rb +2 -2
- data/spec/functional/backends/redis_spec.rb +1 -1
- data/spec/functional/exact_first_spec.rb +24 -4
- data/spec/functional/realtime_spec.rb +7 -3
- data/spec/lib/application_spec.rb +30 -30
- data/spec/lib/backends/backend_spec.rb +25 -27
- data/spec/lib/backends/{memory → prepared}/text_spec.rb +1 -1
- data/spec/lib/category_indexing_spec.rb +1 -1
- data/spec/lib/extensions/symbol_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +46 -0
- data/spec/lib/index_indexed_spec.rb +5 -5
- data/spec/lib/index_indexing_spec.rb +13 -12
- data/spec/lib/index_spec.rb +8 -8
- data/spec/lib/indexers/base_spec.rb +5 -6
- data/spec/lib/indexers/parallel_spec.rb +10 -10
- data/spec/lib/indexes_indexed_spec.rb +1 -7
- data/spec/lib/indexes_indexing_spec.rb +10 -5
- data/spec/lib/query/indexes_check_spec.rb +44 -15
- data/spec/lib/query/indexes_spec.rb +11 -11
- data/spec/lib/query/token_spec.rb +10 -0
- data/spec/lib/{indexed/wrappers → results}/exact_first_spec.rb +18 -21
- data/spec/lib/scheduler_spec.rb +92 -0
- metadata +45 -34
- data/lib/picky/cores.rb +0 -127
- data/lib/picky/tokenizers/location.rb +0 -53
- data/lib/picky/wrappers/category/exact_first.rb +0 -94
- data/spec/lib/cores_spec.rb +0 -185
data/lib/picky/index_indexing.rb
CHANGED
@@ -4,30 +4,14 @@ module Picky
|
|
4
4
|
#
|
5
5
|
class Index
|
6
6
|
|
7
|
+
include Helpers::Indexing
|
8
|
+
|
7
9
|
# Delegators for indexing.
|
8
10
|
#
|
9
11
|
delegate :cache,
|
10
12
|
:clear,
|
11
|
-
:prepare,
|
12
13
|
:to => :categories
|
13
14
|
|
14
|
-
# Calling index on an index will call index
|
15
|
-
# on every category.
|
16
|
-
#
|
17
|
-
# Decides whether to use a parallel indexer or whether to
|
18
|
-
# delegate to each category to index themselves.
|
19
|
-
#
|
20
|
-
def index
|
21
|
-
if source.respond_to?(:each)
|
22
|
-
check_source_empty
|
23
|
-
index_in_parallel
|
24
|
-
else
|
25
|
-
with_data_snapshot do
|
26
|
-
categories.index
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
15
|
# Define an index tokenizer on the index.
|
32
16
|
#
|
33
17
|
# Parameters are the exact same as for indexing.
|
@@ -39,7 +23,35 @@ module Picky
|
|
39
23
|
options && Tokenizer.new(options)
|
40
24
|
end
|
41
25
|
end
|
42
|
-
|
26
|
+
|
27
|
+
#
|
28
|
+
#
|
29
|
+
def index scheduler = Scheduler.new
|
30
|
+
timed_indexing scheduler do
|
31
|
+
prepare scheduler
|
32
|
+
scheduler.finish
|
33
|
+
|
34
|
+
cache scheduler
|
35
|
+
scheduler.finish
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Calling prepare on an index will call prepare
|
40
|
+
# on every category.
|
41
|
+
#
|
42
|
+
# Decides whether to use a parallel indexer or whether to
|
43
|
+
# delegate to each category to prepare themselves.
|
44
|
+
#
|
45
|
+
def prepare scheduler = Scheduler.new
|
46
|
+
if source.respond_to?(:each)
|
47
|
+
check_source_empty
|
48
|
+
prepare_in_parallel scheduler
|
49
|
+
else
|
50
|
+
with_data_snapshot do
|
51
|
+
categories.prepare scheduler
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
43
55
|
|
44
56
|
# Check if the given enumerable source is empty.
|
45
57
|
#
|
@@ -50,6 +62,15 @@ module Picky
|
|
50
62
|
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
51
63
|
end
|
52
64
|
|
65
|
+
# Indexes the categories in parallel.
|
66
|
+
#
|
67
|
+
# Only use where the category does have a #each source defined.
|
68
|
+
#
|
69
|
+
def prepare_in_parallel scheduler
|
70
|
+
indexer = Indexers::Parallel.new self
|
71
|
+
indexer.prepare categories, scheduler
|
72
|
+
end
|
73
|
+
|
53
74
|
# Note: Duplicated in category_indexing.rb.
|
54
75
|
#
|
55
76
|
# Take a data snapshot if the source offers it.
|
@@ -64,15 +85,6 @@ module Picky
|
|
64
85
|
end
|
65
86
|
end
|
66
87
|
|
67
|
-
# Indexes the categories in parallel.
|
68
|
-
#
|
69
|
-
# Only use where the category does have a #each source defined.
|
70
|
-
#
|
71
|
-
def index_in_parallel
|
72
|
-
indexer = Indexers::Parallel.new self
|
73
|
-
indexer.index categories
|
74
|
-
end
|
75
|
-
|
76
88
|
# Returns the installed tokenizer or the default.
|
77
89
|
#
|
78
90
|
def tokenizer
|
@@ -87,7 +99,7 @@ module Picky
|
|
87
99
|
#
|
88
100
|
def source some_source = nil, &block
|
89
101
|
some_source ||= block
|
90
|
-
some_source ?
|
102
|
+
some_source ? (check_source(some_source); @source = some_source) : (@source && extract_source)
|
91
103
|
end
|
92
104
|
# Extract the actual source if it is wrapped in a time
|
93
105
|
# capsule, i.e. a block/lambda.
|
@@ -97,10 +109,6 @@ module Picky
|
|
97
109
|
def extract_source
|
98
110
|
@source = @source.respond_to?(:call) ? @source.call : @source
|
99
111
|
end
|
100
|
-
def define_source source
|
101
|
-
check_source source
|
102
|
-
@source = source
|
103
|
-
end
|
104
112
|
def check_source source # :nodoc:
|
105
113
|
raise ArgumentError.new(<<-SOURCE
|
106
114
|
|
@@ -119,21 +127,15 @@ SOURCE
|
|
119
127
|
#
|
120
128
|
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
|
121
129
|
#
|
122
|
-
def key_format
|
123
|
-
|
124
|
-
end
|
125
|
-
def define_key_format key_format
|
126
|
-
@key_format = key_format
|
130
|
+
def key_format key_format = nil
|
131
|
+
key_format ? (@key_format = key_format) : @key_format
|
127
132
|
end
|
128
133
|
|
129
134
|
# Define what to do after indexing.
|
130
135
|
# (Only used in the Sources::DB)
|
131
136
|
#
|
132
137
|
def after_indexing after_indexing = nil
|
133
|
-
after_indexing ?
|
134
|
-
end
|
135
|
-
def define_after_indexing after_indexing
|
136
|
-
@after_indexing = after_indexing
|
138
|
+
after_indexing ? (@after_indexing = after_indexing) : @after_indexing
|
137
139
|
end
|
138
140
|
|
139
141
|
end
|
data/lib/picky/indexers/base.rb
CHANGED
@@ -19,21 +19,20 @@ module Picky
|
|
19
19
|
|
20
20
|
# Starts the indexing process.
|
21
21
|
#
|
22
|
-
def
|
22
|
+
def prepare categories, scheduler = Scheduler.new
|
23
23
|
check_source
|
24
24
|
categories.empty
|
25
|
-
process categories do |
|
26
|
-
notify_finished
|
25
|
+
process categories, scheduler do |prepared_file|
|
26
|
+
notify_finished prepared_file
|
27
27
|
end
|
28
|
-
categories.cache
|
29
28
|
end
|
30
29
|
|
31
30
|
def check_source # :nodoc:
|
32
31
|
raise "Trying to index without a source for #{@index_or_category.name}." unless source
|
33
32
|
end
|
34
33
|
|
35
|
-
def notify_finished
|
36
|
-
timed_exclaim %Q{"#{@index_or_category.identifier}": Tokenized -> #{
|
34
|
+
def notify_finished prepared_file
|
35
|
+
timed_exclaim %Q{ "#{@index_or_category.identifier}": Tokenized -> #{prepared_file.path.gsub("#{PICKY_ROOT}/", '')}.}
|
37
36
|
end
|
38
37
|
|
39
38
|
end
|
@@ -15,62 +15,65 @@ module Picky
|
|
15
15
|
# Parameters:
|
16
16
|
# * categories: An Enumerable of Category-s.
|
17
17
|
#
|
18
|
-
def process categories
|
19
|
-
comma = ?,
|
20
|
-
newline = ?\n
|
21
|
-
|
18
|
+
def process categories, scheduler = Scheduler.new
|
22
19
|
# Prepare a combined object - array.
|
23
20
|
#
|
24
21
|
combined = categories.map do |category|
|
25
|
-
[category,
|
22
|
+
[category, category.prepared_index_file, [], (category.tokenizer || tokenizer)]
|
26
23
|
end
|
27
24
|
|
28
|
-
# Index.
|
29
|
-
#
|
30
|
-
# TODO Extract into flush_every(100_000) do
|
31
|
-
#
|
32
|
-
i = 0
|
33
|
-
|
34
25
|
# Explicitly reset the source to avoid caching trouble.
|
35
26
|
#
|
36
27
|
source.reset if source.respond_to?(:reset)
|
37
28
|
|
38
29
|
# Go through each object in the source.
|
39
30
|
#
|
31
|
+
objects = []
|
32
|
+
|
40
33
|
source.each do |object|
|
41
|
-
id = object.id
|
42
34
|
|
43
|
-
#
|
35
|
+
# Accumulate objects.
|
44
36
|
#
|
45
|
-
|
37
|
+
objects << object
|
38
|
+
next if objects.size < 10_000
|
39
|
+
|
40
|
+
# THINK Is it a good idea that not the tokenizer has
|
41
|
+
# control over when he gets the next text?
|
46
42
|
#
|
47
|
-
combined.each do |category,
|
48
|
-
|
49
|
-
tokens.each do |token_text|
|
50
|
-
next unless token_text
|
51
|
-
cache << id << comma << token_text << newline
|
52
|
-
end
|
43
|
+
combined.each do |category, file, cache, tokenizer|
|
44
|
+
index_flush objects, file, category, cache, tokenizer
|
53
45
|
end
|
54
46
|
|
55
|
-
|
56
|
-
|
57
|
-
i = 0
|
58
|
-
end
|
59
|
-
i += 1
|
47
|
+
objects.clear
|
48
|
+
|
60
49
|
end
|
61
|
-
|
62
|
-
|
50
|
+
|
51
|
+
# Close all files.
|
52
|
+
#
|
53
|
+
combined.each do |category, file, cache, tokenizer|
|
54
|
+
index_flush objects, file, category, cache, tokenizer
|
63
55
|
yield file
|
64
56
|
file.close
|
65
57
|
end
|
66
58
|
end
|
67
59
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
60
|
+
def index_flush objects, file, category, cache, tokenizer
|
61
|
+
comma = ?,
|
62
|
+
newline = ?\n
|
63
|
+
|
64
|
+
objects.each do |object|
|
65
|
+
tokens, _ = tokenizer.tokenize object.send(category.from) # Note: Originals not needed.
|
66
|
+
tokens.each do |token_text|
|
67
|
+
next unless token_text
|
68
|
+
cache << object.id << comma << token_text << newline
|
69
|
+
end
|
73
70
|
end
|
71
|
+
|
72
|
+
flush file, cache
|
73
|
+
end
|
74
|
+
|
75
|
+
def flush file, cache
|
76
|
+
file.write(cache.join) && cache.clear
|
74
77
|
end
|
75
78
|
|
76
79
|
end
|
@@ -16,33 +16,56 @@ module Picky
|
|
16
16
|
# Parameters:
|
17
17
|
# * categories: An enumerable of Category-s.
|
18
18
|
#
|
19
|
-
def process categories
|
20
|
-
comma = ?,
|
21
|
-
newline = ?\n
|
22
|
-
|
19
|
+
def process categories, scheduler = Scheduler.new
|
23
20
|
categories.each do |category|
|
24
21
|
|
25
|
-
tokenizer = category.tokenizer
|
26
|
-
|
27
22
|
category.prepared_index_file do |file|
|
23
|
+
|
24
|
+
datas = []
|
28
25
|
result = []
|
26
|
+
tokenizer = category.tokenizer
|
27
|
+
|
28
|
+
source.harvest(category) do |*data|
|
29
|
+
|
30
|
+
# Accumulate data.
|
31
|
+
#
|
32
|
+
datas << data
|
33
|
+
next if datas.size < 10_000
|
34
|
+
|
35
|
+
# Opening the file inside the scheduler to
|
36
|
+
# have it automagically closed.
|
37
|
+
#
|
38
|
+
index_flush datas, file, result, tokenizer
|
39
|
+
|
40
|
+
datas.clear
|
29
41
|
|
30
|
-
source.harvest(category) do |indexed_id, text|
|
31
|
-
tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
|
32
|
-
tokens.each do |token_text|
|
33
|
-
next unless token_text
|
34
|
-
result << indexed_id << comma << token_text << newline
|
35
|
-
end
|
36
|
-
file.write(result.join) && result.clear if result.size > 100_000
|
37
42
|
end
|
38
43
|
|
39
|
-
|
44
|
+
index_flush datas, file, result, tokenizer
|
40
45
|
|
41
|
-
file
|
46
|
+
yield file
|
42
47
|
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
def index_flush datas, file, cache, tokenizer
|
53
|
+
comma = ?,
|
54
|
+
newline = ?\n
|
43
55
|
|
56
|
+
datas.each do |indexed_id, text|
|
57
|
+
tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
|
58
|
+
tokens.each do |token_text|
|
59
|
+
next unless token_text
|
60
|
+
cache << indexed_id << comma << token_text << newline
|
61
|
+
end
|
44
62
|
end
|
45
63
|
|
64
|
+
flush file, cache
|
65
|
+
end
|
66
|
+
|
67
|
+
def flush prepared_file, cache
|
68
|
+
prepared_file.write(cache.join) && cache.clear
|
46
69
|
end
|
47
70
|
|
48
71
|
end
|
@@ -4,35 +4,32 @@ module Picky
|
|
4
4
|
#
|
5
5
|
class Indexes
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
extend Helpers::Indexing
|
8
|
+
|
9
|
+
instance_delegate :clear,
|
10
10
|
:tokenizer
|
11
11
|
|
12
12
|
each_delegate :clear,
|
13
13
|
:to => :indexes
|
14
14
|
|
15
|
-
# Runs the indexers in parallel (prepare + cache).
|
16
15
|
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
# Run indexing/caching forked.
|
23
|
-
#
|
24
|
-
Cores.forked self.indexes, { randomly: randomly }, &:index
|
25
|
-
|
26
|
-
timed_exclaim "Indexing finished."
|
16
|
+
#
|
17
|
+
def self.index scheduler = Scheduler.new
|
18
|
+
timed_indexing scheduler do
|
19
|
+
instance.index scheduler
|
20
|
+
end
|
27
21
|
end
|
28
22
|
|
29
|
-
# For integration testing – indexes for the tests
|
30
|
-
# without forking and shouting ;)
|
31
23
|
#
|
32
|
-
# TODO Rename to #index_without_forking, or just #index.
|
33
24
|
#
|
34
|
-
def
|
35
|
-
indexes.each
|
25
|
+
def index scheduler = Scheduler.new
|
26
|
+
indexes.each { |index| index.prepare scheduler }
|
27
|
+
scheduler.finish
|
28
|
+
|
29
|
+
timed_exclaim "Tokenizing finished, generating data for indexes from tokenized data."
|
30
|
+
|
31
|
+
indexes.each { |index| index.cache scheduler }
|
32
|
+
scheduler.finish
|
36
33
|
end
|
37
34
|
|
38
35
|
#
|
data/lib/picky/loader.rb
CHANGED
@@ -64,6 +64,7 @@ module Picky
|
|
64
64
|
# Requiring Helpers
|
65
65
|
#
|
66
66
|
load_relative 'helpers/measuring'
|
67
|
+
load_relative 'helpers/indexing'
|
67
68
|
|
68
69
|
# Calculations.
|
69
70
|
#
|
@@ -112,9 +113,10 @@ module Picky
|
|
112
113
|
load_relative 'backends/helpers/file'
|
113
114
|
load_relative 'backends/backend'
|
114
115
|
|
116
|
+
load_relative 'backends/prepared/text'
|
117
|
+
|
115
118
|
load_relative 'backends/memory'
|
116
119
|
load_relative 'backends/memory/basic'
|
117
|
-
load_relative 'backends/memory/text'
|
118
120
|
load_relative 'backends/memory/marshal'
|
119
121
|
load_relative 'backends/memory/json'
|
120
122
|
|
@@ -147,7 +149,6 @@ module Picky
|
|
147
149
|
# Wrappers.
|
148
150
|
#
|
149
151
|
load_relative 'wrappers/category/location'
|
150
|
-
load_relative 'wrappers/category/exact_first'
|
151
152
|
|
152
153
|
load_relative 'wrappers/bundle/delegators'
|
153
154
|
load_relative 'wrappers/bundle/wrapper'
|
@@ -237,6 +238,7 @@ module Picky
|
|
237
238
|
# Results.
|
238
239
|
#
|
239
240
|
load_relative 'results'
|
241
|
+
load_relative 'results/exact_first'
|
240
242
|
|
241
243
|
# Search.
|
242
244
|
#
|
@@ -259,9 +261,9 @@ module Picky
|
|
259
261
|
#
|
260
262
|
load_relative 'application'
|
261
263
|
|
262
|
-
# Load tools. Load
|
264
|
+
# Load tools. Load specifically?
|
263
265
|
#
|
264
|
-
load_relative '
|
266
|
+
load_relative 'scheduler'
|
265
267
|
|
266
268
|
# Load migration notices.
|
267
269
|
#
|
@@ -16,8 +16,11 @@ module Picky
|
|
16
16
|
#
|
17
17
|
def initialize index, combinations
|
18
18
|
@combinations = combinations
|
19
|
-
|
20
|
-
|
19
|
+
|
20
|
+
# Could this be rewritten?
|
21
|
+
#
|
22
|
+
@result_identifier = index.result_identifier
|
23
|
+
@backend = index.backend
|
21
24
|
end
|
22
25
|
|
23
26
|
def hash
|
@@ -49,6 +52,8 @@ module Picky
|
|
49
52
|
|
50
53
|
# This starts the searching process.
|
51
54
|
#
|
55
|
+
# Returns the calculated ids (from the offset).
|
56
|
+
#
|
52
57
|
def process! amount, offset
|
53
58
|
ids = calculate_ids amount, offset
|
54
59
|
@count = ids.size # cache the count before throwing away the ids
|
data/lib/picky/query/indexes.rb
CHANGED
@@ -2,8 +2,6 @@ module Picky
|
|
2
2
|
|
3
3
|
module Query
|
4
4
|
|
5
|
-
# TODO Remove.
|
6
|
-
#
|
7
5
|
class IndexesCheck
|
8
6
|
|
9
7
|
class << self
|
@@ -14,16 +12,16 @@ module Picky
|
|
14
12
|
# Currently it isn't possible using Memory and Redis etc.
|
15
13
|
# indexes in the same query index group.
|
16
14
|
#
|
17
|
-
# Picky will raise a Query::Indexes::
|
15
|
+
# Picky will raise a Query::Indexes::DifferentBackendsError.
|
18
16
|
#
|
19
|
-
def
|
20
|
-
|
21
|
-
|
22
|
-
raise_different
|
23
|
-
|
17
|
+
def check_backends indexes # :nodoc:
|
18
|
+
backends = indexes.map &:backend
|
19
|
+
backends.uniq! &:class
|
20
|
+
raise_different backends if backends.size > 1
|
21
|
+
backends
|
24
22
|
end
|
25
|
-
def raise_different
|
26
|
-
raise
|
23
|
+
def raise_different backends # :nodoc:
|
24
|
+
raise DifferentBackendsError.new(backends)
|
27
25
|
end
|
28
26
|
|
29
27
|
end
|
@@ -33,12 +31,12 @@ module Picky
|
|
33
31
|
# Currently it isn't possible using Memory and Redis etc.
|
34
32
|
# indexes in the same query index group.
|
35
33
|
#
|
36
|
-
class
|
37
|
-
def initialize
|
38
|
-
@
|
34
|
+
class DifferentBackendsError < StandardError # :nodoc:all
|
35
|
+
def initialize backends
|
36
|
+
@backends = backends
|
39
37
|
end
|
40
38
|
def to_s
|
41
|
-
"Currently it isn't possible to mix Indexes with backends #{@
|
39
|
+
"Currently it isn't possible to mix Indexes with backends #{@backends.join(" and ")} in the same Search instance."
|
42
40
|
end
|
43
41
|
end
|
44
42
|
|
data/lib/picky/query/token.rb
CHANGED
@@ -6,7 +6,8 @@ module Picky
|
|
6
6
|
#
|
7
7
|
# It remembers the original form, and and a normalized form.
|
8
8
|
#
|
9
|
-
# It also knows whether it needs to look for similarity (bla~),
|
9
|
+
# It also knows whether it needs to look for similarity (bla~),
|
10
|
+
# or whether it is a partial (bla*).
|
10
11
|
#
|
11
12
|
class Token # :nodoc:all
|
12
13
|
|
@@ -17,7 +18,8 @@ module Picky
|
|
17
18
|
|
18
19
|
# Normal initializer.
|
19
20
|
#
|
20
|
-
# Note:
|
21
|
+
# Note:
|
22
|
+
# Use this if you do not want a normalized token.
|
21
23
|
#
|
22
24
|
def initialize text, original = nil
|
23
25
|
@text = text
|
@@ -26,21 +28,25 @@ module Picky
|
|
26
28
|
|
27
29
|
# Returns a qualified and normalized token.
|
28
30
|
#
|
29
|
-
# Note:
|
30
|
-
#
|
31
|
+
# Note:
|
32
|
+
# Use this in the search engine if you need a qualified
|
33
|
+
# and normalized token. I.e. one prepared for a search.
|
31
34
|
#
|
32
35
|
def self.processed text, original = nil
|
33
36
|
new(text, original).process
|
34
37
|
end
|
35
|
-
def process
|
36
|
-
qualify
|
37
|
-
partialize
|
38
|
-
similarize
|
39
|
-
remove_illegals
|
38
|
+
def process
|
39
|
+
qualify
|
40
|
+
partialize
|
41
|
+
similarize
|
42
|
+
remove_illegals
|
40
43
|
self
|
41
44
|
end
|
42
45
|
|
46
|
+
# Symbolizes this token's text.
|
43
47
|
#
|
48
|
+
# Note:
|
49
|
+
# Call externally when Picky operates in Symbols mode.
|
44
50
|
#
|
45
51
|
def symbolize!
|
46
52
|
@text = @text.to_sym
|
@@ -48,7 +54,10 @@ module Picky
|
|
48
54
|
|
49
55
|
# Translates this token's qualifiers into actual categories.
|
50
56
|
#
|
51
|
-
# Note:
|
57
|
+
# Note:
|
58
|
+
# If this is not done, there is no mapping.
|
59
|
+
#
|
60
|
+
# THINK Can this be improved somehow?
|
52
61
|
#
|
53
62
|
def categorize mapper
|
54
63
|
@user_defined_categories = @qualifiers && @qualifiers.map do |qualifier|
|
@@ -63,13 +72,22 @@ module Picky
|
|
63
72
|
def partial= partial
|
64
73
|
@partial = partial if @partial.nil?
|
65
74
|
end
|
75
|
+
|
76
|
+
# A token is partial? only if it not similar
|
77
|
+
# and is partial.
|
78
|
+
#
|
79
|
+
# It can't be similar and partial at the same time.
|
80
|
+
#
|
66
81
|
def partial?
|
67
82
|
!@similar && @partial
|
68
83
|
end
|
69
84
|
|
70
|
-
# If the text ends with *, partialize it. If with ",
|
85
|
+
# If the text ends with *, partialize it. If with ",
|
86
|
+
# non-partialize it.
|
71
87
|
#
|
72
|
-
# The
|
88
|
+
# The last one wins.
|
89
|
+
# So "hello*" will not be partially searched.
|
90
|
+
# So "hello"* will be partially searched.
|
73
91
|
#
|
74
92
|
@@no_partial = /\"\Z/
|
75
93
|
@@partial = /\*\Z/
|
@@ -97,7 +115,7 @@ module Picky
|
|
97
115
|
#
|
98
116
|
@@illegals = /["*~]/
|
99
117
|
def remove_illegals
|
100
|
-
@text.gsub! @@illegals,
|
118
|
+
@text.gsub! @@illegals, EMPTY_STRING unless @text.blank?
|
101
119
|
end
|
102
120
|
|
103
121
|
# Returns an array of possible combinations.
|
@@ -140,9 +158,9 @@ module Picky
|
|
140
158
|
@@split_qualifier_text = ':'
|
141
159
|
@@split_qualifiers = ','
|
142
160
|
def qualify
|
143
|
-
@qualifiers, @text = (@text ||
|
161
|
+
@qualifiers, @text = (@text || EMPTY_STRING).split(@@split_qualifier_text, 2)
|
144
162
|
@qualifiers, @text = if @text.blank?
|
145
|
-
[nil, (@qualifiers ||
|
163
|
+
[nil, (@qualifiers || EMPTY_STRING)]
|
146
164
|
else
|
147
165
|
[@qualifiers.split(@@split_qualifiers), @text]
|
148
166
|
end
|