picky 3.6.16 → 4.0.0pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/application.rb +1 -1
- data/lib/picky/backends/backend.rb +2 -0
- data/lib/picky/backends/memory.rb +14 -7
- data/lib/picky/backends/{memory → prepared}/text.rb +10 -4
- data/lib/picky/backends/redis/directly_manipulable.rb +3 -5
- data/lib/picky/backends/redis/list.rb +5 -1
- data/lib/picky/backends/sqlite/basic.rb +4 -2
- data/lib/picky/bundle.rb +6 -7
- data/lib/picky/bundle_indexed.rb +2 -2
- data/lib/picky/bundle_realtime.rb +8 -7
- data/lib/picky/categories.rb +0 -1
- data/lib/picky/categories_indexing.rb +14 -0
- data/lib/picky/category.rb +3 -5
- data/lib/picky/category_indexed.rb +2 -5
- data/lib/picky/category_indexing.rb +28 -16
- data/lib/picky/constants.rb +3 -1
- data/lib/picky/frontend_adapters/rack.rb +2 -2
- data/lib/picky/generators/similarity/phonetic.rb +6 -14
- data/lib/picky/generators/strategy.rb +1 -1
- data/lib/picky/generators/weights/runtime.rb +2 -2
- data/lib/picky/helpers/indexing.rb +20 -0
- data/lib/picky/index.rb +7 -10
- data/lib/picky/index_indexed.rb +1 -8
- data/lib/picky/index_indexing.rb +44 -42
- data/lib/picky/indexers/base.rb +5 -6
- data/lib/picky/indexers/parallel.rb +35 -32
- data/lib/picky/indexers/serial.rb +38 -15
- data/lib/picky/indexes_indexed.rb +0 -7
- data/lib/picky/indexes_indexing.rb +16 -19
- data/lib/picky/loader.rb +6 -4
- data/lib/picky/query/allocation.rb +7 -2
- data/lib/picky/query/combination.rb +1 -1
- data/lib/picky/query/indexes.rb +1 -1
- data/lib/picky/query/indexes_check.rb +12 -14
- data/lib/picky/query/token.rb +33 -15
- data/lib/picky/results/exact_first.rb +53 -0
- data/lib/picky/scheduler.rb +43 -0
- data/lib/picky/search.rb +0 -2
- data/lib/picky/sources/csv.rb +2 -3
- data/lib/picky/sources/db.rb +4 -3
- data/lib/picky/sources/mongo.rb +1 -1
- data/lib/picky/tokenizer.rb +0 -4
- data/lib/picky/wrappers/bundle/location.rb +1 -1
- data/lib/picky.rb +2 -2
- data/lib/tasks/index.rake +13 -14
- data/spec/functional/backends/file_spec.rb +2 -4
- data/spec/functional/backends/memory_spec.rb +2 -2
- data/spec/functional/backends/redis_spec.rb +1 -1
- data/spec/functional/exact_first_spec.rb +24 -4
- data/spec/functional/realtime_spec.rb +7 -3
- data/spec/lib/application_spec.rb +30 -30
- data/spec/lib/backends/backend_spec.rb +25 -27
- data/spec/lib/backends/{memory → prepared}/text_spec.rb +1 -1
- data/spec/lib/category_indexing_spec.rb +1 -1
- data/spec/lib/extensions/symbol_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +46 -0
- data/spec/lib/index_indexed_spec.rb +5 -5
- data/spec/lib/index_indexing_spec.rb +13 -12
- data/spec/lib/index_spec.rb +8 -8
- data/spec/lib/indexers/base_spec.rb +5 -6
- data/spec/lib/indexers/parallel_spec.rb +10 -10
- data/spec/lib/indexes_indexed_spec.rb +1 -7
- data/spec/lib/indexes_indexing_spec.rb +10 -5
- data/spec/lib/query/indexes_check_spec.rb +44 -15
- data/spec/lib/query/indexes_spec.rb +11 -11
- data/spec/lib/query/token_spec.rb +10 -0
- data/spec/lib/{indexed/wrappers → results}/exact_first_spec.rb +18 -21
- data/spec/lib/scheduler_spec.rb +92 -0
- metadata +45 -34
- data/lib/picky/cores.rb +0 -127
- data/lib/picky/tokenizers/location.rb +0 -53
- data/lib/picky/wrappers/category/exact_first.rb +0 -94
- data/spec/lib/cores_spec.rb +0 -185
data/lib/picky/index_indexing.rb
CHANGED
@@ -4,30 +4,14 @@ module Picky
|
|
4
4
|
#
|
5
5
|
class Index
|
6
6
|
|
7
|
+
include Helpers::Indexing
|
8
|
+
|
7
9
|
# Delegators for indexing.
|
8
10
|
#
|
9
11
|
delegate :cache,
|
10
12
|
:clear,
|
11
|
-
:prepare,
|
12
13
|
:to => :categories
|
13
14
|
|
14
|
-
# Calling index on an index will call index
|
15
|
-
# on every category.
|
16
|
-
#
|
17
|
-
# Decides whether to use a parallel indexer or whether to
|
18
|
-
# delegate to each category to index themselves.
|
19
|
-
#
|
20
|
-
def index
|
21
|
-
if source.respond_to?(:each)
|
22
|
-
check_source_empty
|
23
|
-
index_in_parallel
|
24
|
-
else
|
25
|
-
with_data_snapshot do
|
26
|
-
categories.index
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
30
|
-
|
31
15
|
# Define an index tokenizer on the index.
|
32
16
|
#
|
33
17
|
# Parameters are the exact same as for indexing.
|
@@ -39,7 +23,35 @@ module Picky
|
|
39
23
|
options && Tokenizer.new(options)
|
40
24
|
end
|
41
25
|
end
|
42
|
-
|
26
|
+
|
27
|
+
#
|
28
|
+
#
|
29
|
+
def index scheduler = Scheduler.new
|
30
|
+
timed_indexing scheduler do
|
31
|
+
prepare scheduler
|
32
|
+
scheduler.finish
|
33
|
+
|
34
|
+
cache scheduler
|
35
|
+
scheduler.finish
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Calling prepare on an index will call prepare
|
40
|
+
# on every category.
|
41
|
+
#
|
42
|
+
# Decides whether to use a parallel indexer or whether to
|
43
|
+
# delegate to each category to prepare themselves.
|
44
|
+
#
|
45
|
+
def prepare scheduler = Scheduler.new
|
46
|
+
if source.respond_to?(:each)
|
47
|
+
check_source_empty
|
48
|
+
prepare_in_parallel scheduler
|
49
|
+
else
|
50
|
+
with_data_snapshot do
|
51
|
+
categories.prepare scheduler
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
43
55
|
|
44
56
|
# Check if the given enumerable source is empty.
|
45
57
|
#
|
@@ -50,6 +62,15 @@ module Picky
|
|
50
62
|
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
51
63
|
end
|
52
64
|
|
65
|
+
# Indexes the categories in parallel.
|
66
|
+
#
|
67
|
+
# Only use where the category does have a #each source defined.
|
68
|
+
#
|
69
|
+
def prepare_in_parallel scheduler
|
70
|
+
indexer = Indexers::Parallel.new self
|
71
|
+
indexer.prepare categories, scheduler
|
72
|
+
end
|
73
|
+
|
53
74
|
# Note: Duplicated in category_indexing.rb.
|
54
75
|
#
|
55
76
|
# Take a data snapshot if the source offers it.
|
@@ -64,15 +85,6 @@ module Picky
|
|
64
85
|
end
|
65
86
|
end
|
66
87
|
|
67
|
-
# Indexes the categories in parallel.
|
68
|
-
#
|
69
|
-
# Only use where the category does have a #each source defined.
|
70
|
-
#
|
71
|
-
def index_in_parallel
|
72
|
-
indexer = Indexers::Parallel.new self
|
73
|
-
indexer.index categories
|
74
|
-
end
|
75
|
-
|
76
88
|
# Returns the installed tokenizer or the default.
|
77
89
|
#
|
78
90
|
def tokenizer
|
@@ -87,7 +99,7 @@ module Picky
|
|
87
99
|
#
|
88
100
|
def source some_source = nil, &block
|
89
101
|
some_source ||= block
|
90
|
-
some_source ?
|
102
|
+
some_source ? (check_source(some_source); @source = some_source) : (@source && extract_source)
|
91
103
|
end
|
92
104
|
# Extract the actual source if it is wrapped in a time
|
93
105
|
# capsule, i.e. a block/lambda.
|
@@ -97,10 +109,6 @@ module Picky
|
|
97
109
|
def extract_source
|
98
110
|
@source = @source.respond_to?(:call) ? @source.call : @source
|
99
111
|
end
|
100
|
-
def define_source source
|
101
|
-
check_source source
|
102
|
-
@source = source
|
103
|
-
end
|
104
112
|
def check_source source # :nodoc:
|
105
113
|
raise ArgumentError.new(<<-SOURCE
|
106
114
|
|
@@ -119,21 +127,15 @@ SOURCE
|
|
119
127
|
#
|
120
128
|
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
|
121
129
|
#
|
122
|
-
def key_format
|
123
|
-
|
124
|
-
end
|
125
|
-
def define_key_format key_format
|
126
|
-
@key_format = key_format
|
130
|
+
def key_format key_format = nil
|
131
|
+
key_format ? (@key_format = key_format) : @key_format
|
127
132
|
end
|
128
133
|
|
129
134
|
# Define what to do after indexing.
|
130
135
|
# (Only used in the Sources::DB)
|
131
136
|
#
|
132
137
|
def after_indexing after_indexing = nil
|
133
|
-
after_indexing ?
|
134
|
-
end
|
135
|
-
def define_after_indexing after_indexing
|
136
|
-
@after_indexing = after_indexing
|
138
|
+
after_indexing ? (@after_indexing = after_indexing) : @after_indexing
|
137
139
|
end
|
138
140
|
|
139
141
|
end
|
data/lib/picky/indexers/base.rb
CHANGED
@@ -19,21 +19,20 @@ module Picky
|
|
19
19
|
|
20
20
|
# Starts the indexing process.
|
21
21
|
#
|
22
|
-
def
|
22
|
+
def prepare categories, scheduler = Scheduler.new
|
23
23
|
check_source
|
24
24
|
categories.empty
|
25
|
-
process categories do |
|
26
|
-
notify_finished
|
25
|
+
process categories, scheduler do |prepared_file|
|
26
|
+
notify_finished prepared_file
|
27
27
|
end
|
28
|
-
categories.cache
|
29
28
|
end
|
30
29
|
|
31
30
|
def check_source # :nodoc:
|
32
31
|
raise "Trying to index without a source for #{@index_or_category.name}." unless source
|
33
32
|
end
|
34
33
|
|
35
|
-
def notify_finished
|
36
|
-
timed_exclaim %Q{"#{@index_or_category.identifier}": Tokenized -> #{
|
34
|
+
def notify_finished prepared_file
|
35
|
+
timed_exclaim %Q{ "#{@index_or_category.identifier}": Tokenized -> #{prepared_file.path.gsub("#{PICKY_ROOT}/", '')}.}
|
37
36
|
end
|
38
37
|
|
39
38
|
end
|
@@ -15,62 +15,65 @@ module Picky
|
|
15
15
|
# Parameters:
|
16
16
|
# * categories: An Enumerable of Category-s.
|
17
17
|
#
|
18
|
-
def process categories
|
19
|
-
comma = ?,
|
20
|
-
newline = ?\n
|
21
|
-
|
18
|
+
def process categories, scheduler = Scheduler.new
|
22
19
|
# Prepare a combined object - array.
|
23
20
|
#
|
24
21
|
combined = categories.map do |category|
|
25
|
-
[category,
|
22
|
+
[category, category.prepared_index_file, [], (category.tokenizer || tokenizer)]
|
26
23
|
end
|
27
24
|
|
28
|
-
# Index.
|
29
|
-
#
|
30
|
-
# TODO Extract into flush_every(100_000) do
|
31
|
-
#
|
32
|
-
i = 0
|
33
|
-
|
34
25
|
# Explicitly reset the source to avoid caching trouble.
|
35
26
|
#
|
36
27
|
source.reset if source.respond_to?(:reset)
|
37
28
|
|
38
29
|
# Go through each object in the source.
|
39
30
|
#
|
31
|
+
objects = []
|
32
|
+
|
40
33
|
source.each do |object|
|
41
|
-
id = object.id
|
42
34
|
|
43
|
-
#
|
35
|
+
# Accumulate objects.
|
44
36
|
#
|
45
|
-
|
37
|
+
objects << object
|
38
|
+
next if objects.size < 10_000
|
39
|
+
|
40
|
+
# THINK Is it a good idea that not the tokenizer has
|
41
|
+
# control over when he gets the next text?
|
46
42
|
#
|
47
|
-
combined.each do |category,
|
48
|
-
|
49
|
-
tokens.each do |token_text|
|
50
|
-
next unless token_text
|
51
|
-
cache << id << comma << token_text << newline
|
52
|
-
end
|
43
|
+
combined.each do |category, file, cache, tokenizer|
|
44
|
+
index_flush objects, file, category, cache, tokenizer
|
53
45
|
end
|
54
46
|
|
55
|
-
|
56
|
-
|
57
|
-
i = 0
|
58
|
-
end
|
59
|
-
i += 1
|
47
|
+
objects.clear
|
48
|
+
|
60
49
|
end
|
61
|
-
|
62
|
-
|
50
|
+
|
51
|
+
# Close all files.
|
52
|
+
#
|
53
|
+
combined.each do |category, file, cache, tokenizer|
|
54
|
+
index_flush objects, file, category, cache, tokenizer
|
63
55
|
yield file
|
64
56
|
file.close
|
65
57
|
end
|
66
58
|
end
|
67
59
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
60
|
+
def index_flush objects, file, category, cache, tokenizer
|
61
|
+
comma = ?,
|
62
|
+
newline = ?\n
|
63
|
+
|
64
|
+
objects.each do |object|
|
65
|
+
tokens, _ = tokenizer.tokenize object.send(category.from) # Note: Originals not needed.
|
66
|
+
tokens.each do |token_text|
|
67
|
+
next unless token_text
|
68
|
+
cache << object.id << comma << token_text << newline
|
69
|
+
end
|
73
70
|
end
|
71
|
+
|
72
|
+
flush file, cache
|
73
|
+
end
|
74
|
+
|
75
|
+
def flush file, cache
|
76
|
+
file.write(cache.join) && cache.clear
|
74
77
|
end
|
75
78
|
|
76
79
|
end
|
@@ -16,33 +16,56 @@ module Picky
|
|
16
16
|
# Parameters:
|
17
17
|
# * categories: An enumerable of Category-s.
|
18
18
|
#
|
19
|
-
def process categories
|
20
|
-
comma = ?,
|
21
|
-
newline = ?\n
|
22
|
-
|
19
|
+
def process categories, scheduler = Scheduler.new
|
23
20
|
categories.each do |category|
|
24
21
|
|
25
|
-
tokenizer = category.tokenizer
|
26
|
-
|
27
22
|
category.prepared_index_file do |file|
|
23
|
+
|
24
|
+
datas = []
|
28
25
|
result = []
|
26
|
+
tokenizer = category.tokenizer
|
27
|
+
|
28
|
+
source.harvest(category) do |*data|
|
29
|
+
|
30
|
+
# Accumulate data.
|
31
|
+
#
|
32
|
+
datas << data
|
33
|
+
next if datas.size < 10_000
|
34
|
+
|
35
|
+
# Opening the file inside the scheduler to
|
36
|
+
# have it automagically closed.
|
37
|
+
#
|
38
|
+
index_flush datas, file, result, tokenizer
|
39
|
+
|
40
|
+
datas.clear
|
29
41
|
|
30
|
-
source.harvest(category) do |indexed_id, text|
|
31
|
-
tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
|
32
|
-
tokens.each do |token_text|
|
33
|
-
next unless token_text
|
34
|
-
result << indexed_id << comma << token_text << newline
|
35
|
-
end
|
36
|
-
file.write(result.join) && result.clear if result.size > 100_000
|
37
42
|
end
|
38
43
|
|
39
|
-
|
44
|
+
index_flush datas, file, result, tokenizer
|
40
45
|
|
41
|
-
file
|
46
|
+
yield file
|
42
47
|
end
|
48
|
+
end
|
49
|
+
|
50
|
+
end
|
51
|
+
|
52
|
+
def index_flush datas, file, cache, tokenizer
|
53
|
+
comma = ?,
|
54
|
+
newline = ?\n
|
43
55
|
|
56
|
+
datas.each do |indexed_id, text|
|
57
|
+
tokens, _ = tokenizer.tokenize text # Note: Originals not needed.
|
58
|
+
tokens.each do |token_text|
|
59
|
+
next unless token_text
|
60
|
+
cache << indexed_id << comma << token_text << newline
|
61
|
+
end
|
44
62
|
end
|
45
63
|
|
64
|
+
flush file, cache
|
65
|
+
end
|
66
|
+
|
67
|
+
def flush prepared_file, cache
|
68
|
+
prepared_file.write(cache.join) && cache.clear
|
46
69
|
end
|
47
70
|
|
48
71
|
end
|
@@ -4,35 +4,32 @@ module Picky
|
|
4
4
|
#
|
5
5
|
class Indexes
|
6
6
|
|
7
|
-
|
8
|
-
|
9
|
-
|
7
|
+
extend Helpers::Indexing
|
8
|
+
|
9
|
+
instance_delegate :clear,
|
10
10
|
:tokenizer
|
11
11
|
|
12
12
|
each_delegate :clear,
|
13
13
|
:to => :indexes
|
14
14
|
|
15
|
-
# Runs the indexers in parallel (prepare + cache).
|
16
15
|
#
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
# Run indexing/caching forked.
|
23
|
-
#
|
24
|
-
Cores.forked self.indexes, { randomly: randomly }, &:index
|
25
|
-
|
26
|
-
timed_exclaim "Indexing finished."
|
16
|
+
#
|
17
|
+
def self.index scheduler = Scheduler.new
|
18
|
+
timed_indexing scheduler do
|
19
|
+
instance.index scheduler
|
20
|
+
end
|
27
21
|
end
|
28
22
|
|
29
|
-
# For integration testing – indexes for the tests
|
30
|
-
# without forking and shouting ;)
|
31
23
|
#
|
32
|
-
# TODO Rename to #index_without_forking, or just #index.
|
33
24
|
#
|
34
|
-
def
|
35
|
-
indexes.each
|
25
|
+
def index scheduler = Scheduler.new
|
26
|
+
indexes.each { |index| index.prepare scheduler }
|
27
|
+
scheduler.finish
|
28
|
+
|
29
|
+
timed_exclaim "Tokenizing finished, generating data for indexes from tokenized data."
|
30
|
+
|
31
|
+
indexes.each { |index| index.cache scheduler }
|
32
|
+
scheduler.finish
|
36
33
|
end
|
37
34
|
|
38
35
|
#
|
data/lib/picky/loader.rb
CHANGED
@@ -64,6 +64,7 @@ module Picky
|
|
64
64
|
# Requiring Helpers
|
65
65
|
#
|
66
66
|
load_relative 'helpers/measuring'
|
67
|
+
load_relative 'helpers/indexing'
|
67
68
|
|
68
69
|
# Calculations.
|
69
70
|
#
|
@@ -112,9 +113,10 @@ module Picky
|
|
112
113
|
load_relative 'backends/helpers/file'
|
113
114
|
load_relative 'backends/backend'
|
114
115
|
|
116
|
+
load_relative 'backends/prepared/text'
|
117
|
+
|
115
118
|
load_relative 'backends/memory'
|
116
119
|
load_relative 'backends/memory/basic'
|
117
|
-
load_relative 'backends/memory/text'
|
118
120
|
load_relative 'backends/memory/marshal'
|
119
121
|
load_relative 'backends/memory/json'
|
120
122
|
|
@@ -147,7 +149,6 @@ module Picky
|
|
147
149
|
# Wrappers.
|
148
150
|
#
|
149
151
|
load_relative 'wrappers/category/location'
|
150
|
-
load_relative 'wrappers/category/exact_first'
|
151
152
|
|
152
153
|
load_relative 'wrappers/bundle/delegators'
|
153
154
|
load_relative 'wrappers/bundle/wrapper'
|
@@ -237,6 +238,7 @@ module Picky
|
|
237
238
|
# Results.
|
238
239
|
#
|
239
240
|
load_relative 'results'
|
241
|
+
load_relative 'results/exact_first'
|
240
242
|
|
241
243
|
# Search.
|
242
244
|
#
|
@@ -259,9 +261,9 @@ module Picky
|
|
259
261
|
#
|
260
262
|
load_relative 'application'
|
261
263
|
|
262
|
-
# Load tools. Load
|
264
|
+
# Load tools. Load specifically?
|
263
265
|
#
|
264
|
-
load_relative '
|
266
|
+
load_relative 'scheduler'
|
265
267
|
|
266
268
|
# Load migration notices.
|
267
269
|
#
|
@@ -16,8 +16,11 @@ module Picky
|
|
16
16
|
#
|
17
17
|
def initialize index, combinations
|
18
18
|
@combinations = combinations
|
19
|
-
|
20
|
-
|
19
|
+
|
20
|
+
# Could this be rewritten?
|
21
|
+
#
|
22
|
+
@result_identifier = index.result_identifier
|
23
|
+
@backend = index.backend
|
21
24
|
end
|
22
25
|
|
23
26
|
def hash
|
@@ -49,6 +52,8 @@ module Picky
|
|
49
52
|
|
50
53
|
# This starts the searching process.
|
51
54
|
#
|
55
|
+
# Returns the calculated ids (from the offset).
|
56
|
+
#
|
52
57
|
def process! amount, offset
|
53
58
|
ids = calculate_ids amount, offset
|
54
59
|
@count = ids.size # cache the count before throwing away the ids
|
data/lib/picky/query/indexes.rb
CHANGED
@@ -2,8 +2,6 @@ module Picky
|
|
2
2
|
|
3
3
|
module Query
|
4
4
|
|
5
|
-
# TODO Remove.
|
6
|
-
#
|
7
5
|
class IndexesCheck
|
8
6
|
|
9
7
|
class << self
|
@@ -14,16 +12,16 @@ module Picky
|
|
14
12
|
# Currently it isn't possible using Memory and Redis etc.
|
15
13
|
# indexes in the same query index group.
|
16
14
|
#
|
17
|
-
# Picky will raise a Query::Indexes::
|
15
|
+
# Picky will raise a Query::Indexes::DifferentBackendsError.
|
18
16
|
#
|
19
|
-
def
|
20
|
-
|
21
|
-
|
22
|
-
raise_different
|
23
|
-
|
17
|
+
def check_backends indexes # :nodoc:
|
18
|
+
backends = indexes.map &:backend
|
19
|
+
backends.uniq! &:class
|
20
|
+
raise_different backends if backends.size > 1
|
21
|
+
backends
|
24
22
|
end
|
25
|
-
def raise_different
|
26
|
-
raise
|
23
|
+
def raise_different backends # :nodoc:
|
24
|
+
raise DifferentBackendsError.new(backends)
|
27
25
|
end
|
28
26
|
|
29
27
|
end
|
@@ -33,12 +31,12 @@ module Picky
|
|
33
31
|
# Currently it isn't possible using Memory and Redis etc.
|
34
32
|
# indexes in the same query index group.
|
35
33
|
#
|
36
|
-
class
|
37
|
-
def initialize
|
38
|
-
@
|
34
|
+
class DifferentBackendsError < StandardError # :nodoc:all
|
35
|
+
def initialize backends
|
36
|
+
@backends = backends
|
39
37
|
end
|
40
38
|
def to_s
|
41
|
-
"Currently it isn't possible to mix Indexes with backends #{@
|
39
|
+
"Currently it isn't possible to mix Indexes with backends #{@backends.join(" and ")} in the same Search instance."
|
42
40
|
end
|
43
41
|
end
|
44
42
|
|
data/lib/picky/query/token.rb
CHANGED
@@ -6,7 +6,8 @@ module Picky
|
|
6
6
|
#
|
7
7
|
# It remembers the original form, and and a normalized form.
|
8
8
|
#
|
9
|
-
# It also knows whether it needs to look for similarity (bla~),
|
9
|
+
# It also knows whether it needs to look for similarity (bla~),
|
10
|
+
# or whether it is a partial (bla*).
|
10
11
|
#
|
11
12
|
class Token # :nodoc:all
|
12
13
|
|
@@ -17,7 +18,8 @@ module Picky
|
|
17
18
|
|
18
19
|
# Normal initializer.
|
19
20
|
#
|
20
|
-
# Note:
|
21
|
+
# Note:
|
22
|
+
# Use this if you do not want a normalized token.
|
21
23
|
#
|
22
24
|
def initialize text, original = nil
|
23
25
|
@text = text
|
@@ -26,21 +28,25 @@ module Picky
|
|
26
28
|
|
27
29
|
# Returns a qualified and normalized token.
|
28
30
|
#
|
29
|
-
# Note:
|
30
|
-
#
|
31
|
+
# Note:
|
32
|
+
# Use this in the search engine if you need a qualified
|
33
|
+
# and normalized token. I.e. one prepared for a search.
|
31
34
|
#
|
32
35
|
def self.processed text, original = nil
|
33
36
|
new(text, original).process
|
34
37
|
end
|
35
|
-
def process
|
36
|
-
qualify
|
37
|
-
partialize
|
38
|
-
similarize
|
39
|
-
remove_illegals
|
38
|
+
def process
|
39
|
+
qualify
|
40
|
+
partialize
|
41
|
+
similarize
|
42
|
+
remove_illegals
|
40
43
|
self
|
41
44
|
end
|
42
45
|
|
46
|
+
# Symbolizes this token's text.
|
43
47
|
#
|
48
|
+
# Note:
|
49
|
+
# Call externally when Picky operates in Symbols mode.
|
44
50
|
#
|
45
51
|
def symbolize!
|
46
52
|
@text = @text.to_sym
|
@@ -48,7 +54,10 @@ module Picky
|
|
48
54
|
|
49
55
|
# Translates this token's qualifiers into actual categories.
|
50
56
|
#
|
51
|
-
# Note:
|
57
|
+
# Note:
|
58
|
+
# If this is not done, there is no mapping.
|
59
|
+
#
|
60
|
+
# THINK Can this be improved somehow?
|
52
61
|
#
|
53
62
|
def categorize mapper
|
54
63
|
@user_defined_categories = @qualifiers && @qualifiers.map do |qualifier|
|
@@ -63,13 +72,22 @@ module Picky
|
|
63
72
|
def partial= partial
|
64
73
|
@partial = partial if @partial.nil?
|
65
74
|
end
|
75
|
+
|
76
|
+
# A token is partial? only if it not similar
|
77
|
+
# and is partial.
|
78
|
+
#
|
79
|
+
# It can't be similar and partial at the same time.
|
80
|
+
#
|
66
81
|
def partial?
|
67
82
|
!@similar && @partial
|
68
83
|
end
|
69
84
|
|
70
|
-
# If the text ends with *, partialize it. If with ",
|
85
|
+
# If the text ends with *, partialize it. If with ",
|
86
|
+
# non-partialize it.
|
71
87
|
#
|
72
|
-
# The
|
88
|
+
# The last one wins.
|
89
|
+
# So "hello*" will not be partially searched.
|
90
|
+
# So "hello"* will be partially searched.
|
73
91
|
#
|
74
92
|
@@no_partial = /\"\Z/
|
75
93
|
@@partial = /\*\Z/
|
@@ -97,7 +115,7 @@ module Picky
|
|
97
115
|
#
|
98
116
|
@@illegals = /["*~]/
|
99
117
|
def remove_illegals
|
100
|
-
@text.gsub! @@illegals,
|
118
|
+
@text.gsub! @@illegals, EMPTY_STRING unless @text.blank?
|
101
119
|
end
|
102
120
|
|
103
121
|
# Returns an array of possible combinations.
|
@@ -140,9 +158,9 @@ module Picky
|
|
140
158
|
@@split_qualifier_text = ':'
|
141
159
|
@@split_qualifiers = ','
|
142
160
|
def qualify
|
143
|
-
@qualifiers, @text = (@text ||
|
161
|
+
@qualifiers, @text = (@text || EMPTY_STRING).split(@@split_qualifier_text, 2)
|
144
162
|
@qualifiers, @text = if @text.blank?
|
145
|
-
[nil, (@qualifiers ||
|
163
|
+
[nil, (@qualifiers || EMPTY_STRING)]
|
146
164
|
else
|
147
165
|
[@qualifiers.split(@@split_qualifiers), @text]
|
148
166
|
end
|