picky 1.5.2 → 1.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/analyzer.rb +154 -0
- data/lib/picky/application.rb +53 -33
- data/lib/picky/character_substituters/west_european.rb +10 -6
- data/lib/picky/cli.rb +18 -18
- data/lib/picky/index/base.rb +44 -13
- data/lib/picky/index_bundle.rb +13 -4
- data/lib/picky/indexed/indexes.rb +26 -10
- data/lib/picky/indexing/indexes.rb +26 -24
- data/lib/picky/interfaces/live_parameters.rb +23 -16
- data/lib/picky/internals/extensions/object.rb +13 -6
- data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
- data/lib/picky/internals/index/backend.rb +1 -2
- data/lib/picky/internals/index/file/basic.rb +18 -14
- data/lib/picky/internals/index/files.rb +16 -6
- data/lib/picky/internals/index/redis/basic.rb +12 -5
- data/lib/picky/internals/index/redis.rb +2 -2
- data/lib/picky/internals/indexed/bundle/base.rb +58 -14
- data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
- data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
- data/lib/picky/internals/indexed/categories.rb +19 -14
- data/lib/picky/internals/indexed/category.rb +44 -20
- data/lib/picky/internals/indexed/index.rb +23 -13
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
- data/lib/picky/internals/indexers/serial.rb +1 -1
- data/lib/picky/internals/indexing/bundle/base.rb +28 -28
- data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
- data/lib/picky/internals/indexing/categories.rb +15 -11
- data/lib/picky/internals/indexing/category.rb +30 -20
- data/lib/picky/internals/indexing/index.rb +22 -14
- data/lib/picky/internals/query/allocations.rb +0 -15
- data/lib/picky/internals/query/combinations/base.rb +0 -4
- data/lib/picky/internals/query/combinations/redis.rb +19 -8
- data/lib/picky/internals/query/indexes.rb +3 -6
- data/lib/picky/internals/query/token.rb +0 -4
- data/lib/picky/internals/query/weights.rb +2 -11
- data/lib/picky/internals/results/base.rb +3 -10
- data/lib/picky/internals/tokenizers/base.rb +64 -28
- data/lib/picky/internals/tokenizers/index.rb +8 -8
- data/lib/picky/loader.rb +59 -53
- data/lib/picky/query/base.rb +23 -29
- data/lib/picky/sources/base.rb +10 -10
- data/lib/picky/sources/couch.rb +14 -10
- data/lib/picky/sources/csv.rb +21 -14
- data/lib/picky/sources/db.rb +37 -31
- data/lib/picky/sources/delicious.rb +11 -8
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/statistics.rb +66 -0
- data/lib/tasks/application.rake +3 -0
- data/lib/tasks/checks.rake +11 -0
- data/lib/tasks/framework.rake +3 -0
- data/lib/tasks/index.rake +9 -11
- data/lib/tasks/routes.rake +3 -2
- data/lib/tasks/shortcuts.rake +17 -5
- data/lib/tasks/statistics.rake +20 -12
- data/lib/tasks/try.rake +14 -14
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/index/base_spec.rb +25 -3
- data/spec/lib/internals/extensions/object_spec.rb +46 -20
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
- data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
- data/spec/lib/internals/indexers/serial_spec.rb +1 -1
- data/spec/lib/internals/results/base_spec.rb +0 -12
- data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
- data/spec/lib/query/allocations_spec.rb +0 -56
- data/spec/lib/query/base_spec.rb +25 -21
- data/spec/lib/query/combinations/redis_spec.rb +6 -1
- data/spec/lib/sources/delicious_spec.rb +2 -2
- data/spec/lib/statistics_spec.rb +31 -0
- metadata +9 -2
@@ -0,0 +1,154 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
|
4
|
+
# Analyzes indexes (index bundles, actually).
|
5
|
+
#
|
6
|
+
# Can be output using to_s.
|
7
|
+
#
|
8
|
+
class Analyzer
|
9
|
+
|
10
|
+
attr_reader :analysis, :comments
|
11
|
+
|
12
|
+
#
|
13
|
+
#
|
14
|
+
def initialize
|
15
|
+
@analysis = {}
|
16
|
+
@comments = []
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
#
|
21
|
+
def analyze bundle
|
22
|
+
bundle.load_index
|
23
|
+
analysis[:__keys] = bundle.size
|
24
|
+
cardinality :index, bundle.index
|
25
|
+
index_analysis
|
26
|
+
bundle.clear_index
|
27
|
+
|
28
|
+
bundle.load_weights
|
29
|
+
weights bundle.weights
|
30
|
+
weights_analysis
|
31
|
+
bundle.clear_weights
|
32
|
+
|
33
|
+
bundle.load_similarity
|
34
|
+
cardinality :similarity, bundle.similarity
|
35
|
+
bundle.clear_similarity
|
36
|
+
|
37
|
+
# bundle.load_configuration
|
38
|
+
# analysis[:configuration] = bundle.configuration
|
39
|
+
# bundle.clear_configuration
|
40
|
+
|
41
|
+
self
|
42
|
+
end
|
43
|
+
def cardinality identifier, index
|
44
|
+
return if index.size.zero?
|
45
|
+
|
46
|
+
key_length_average = 0
|
47
|
+
ids_length_average = 0
|
48
|
+
|
49
|
+
min_ids_length = 1.0/0 # Infinity
|
50
|
+
max_ids_length = 0
|
51
|
+
min_key_length = 1.0/0 # Infinity
|
52
|
+
max_key_length = 0
|
53
|
+
|
54
|
+
key_size, ids_size =
|
55
|
+
index.each_pair do |key, ids|
|
56
|
+
key_size = key.size
|
57
|
+
if key_size < min_key_length
|
58
|
+
min_key_length = key_size
|
59
|
+
else
|
60
|
+
max_key_length = key_size if key_size > max_key_length
|
61
|
+
end
|
62
|
+
key_length_average += key_size
|
63
|
+
|
64
|
+
ids_size = ids.size
|
65
|
+
if ids_size < min_ids_length
|
66
|
+
min_ids_length = ids_size
|
67
|
+
else
|
68
|
+
max_ids_length = ids_size if ids_size > max_ids_length
|
69
|
+
end
|
70
|
+
ids_length_average += ids_size
|
71
|
+
end
|
72
|
+
index_size = index.size
|
73
|
+
key_length_average = key_length_average.to_f / index_size
|
74
|
+
ids_length_average = ids_length_average.to_f / index_size
|
75
|
+
|
76
|
+
analysis[identifier] ||= {}
|
77
|
+
analysis[identifier][:key_length] = (min_key_length..max_key_length)
|
78
|
+
analysis[identifier][:ids_length] = (min_ids_length..max_ids_length)
|
79
|
+
analysis[identifier][:key_length_average] = key_length_average
|
80
|
+
analysis[identifier][:ids_length_average] = ids_length_average
|
81
|
+
end
|
82
|
+
def index_analysis
|
83
|
+
return unless analysis[:index]
|
84
|
+
|
85
|
+
if analysis[:__keys] < 100
|
86
|
+
comments << "\033[33mVery small index (< 100 keys).\033[m"
|
87
|
+
end
|
88
|
+
|
89
|
+
range = analysis[:index][:key_length]
|
90
|
+
case range.min
|
91
|
+
when 1
|
92
|
+
comments << "\033[33mIndex matches single characters.\033[m"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
def weights index
|
96
|
+
return if index.size.zero?
|
97
|
+
|
98
|
+
min_weight = 1.0/0 # Infinity
|
99
|
+
max_weight = 0.0
|
100
|
+
|
101
|
+
weight_average = 0
|
102
|
+
|
103
|
+
index.each_pair do |key, value|
|
104
|
+
if value < min_weight
|
105
|
+
min_weight = value
|
106
|
+
else
|
107
|
+
max_weight = value if value > max_weight
|
108
|
+
end
|
109
|
+
weight_average += value
|
110
|
+
end
|
111
|
+
|
112
|
+
weight_average = weight_average / index.size
|
113
|
+
|
114
|
+
analysis[:weights] ||= {}
|
115
|
+
analysis[:weights][:weight_range] = (min_weight..max_weight)
|
116
|
+
analysis[:weights][:weight_average] = weight_average
|
117
|
+
end
|
118
|
+
def weights_analysis
|
119
|
+
return unless analysis[:weights]
|
120
|
+
|
121
|
+
range = analysis[:weights][:weight_range]
|
122
|
+
|
123
|
+
case range.max
|
124
|
+
when 0.0
|
125
|
+
comments << "\033[31mThere's only one id per key – you'll only get single results.\033[m"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
#
|
130
|
+
#
|
131
|
+
def to_s
|
132
|
+
[*comments, index_to_s, weights_to_s, similarity_to_s, configuration_to_s].compact.join "\n"
|
133
|
+
end
|
134
|
+
def index_to_s
|
135
|
+
return if analysis[:__keys].zero?
|
136
|
+
[
|
137
|
+
"index key cardinality: #{"%10d" % analysis[:__keys]}",
|
138
|
+
"index key length range (avg): #{"%10s" % analysis[:index][:key_length]} (#{analysis[:index][:key_length_average].round(2)})",
|
139
|
+
"index ids per key length range (avg): #{"%10s" % analysis[:index][:ids_length]} (#{analysis[:index][:ids_length_average].round(2)})"
|
140
|
+
].join("\n")
|
141
|
+
end
|
142
|
+
def weights_to_s
|
143
|
+
return unless analysis[:weights]
|
144
|
+
%Q{weights range (avg): #{"%10s" % analysis[:weights][:weight_range]} (#{analysis[:weights][:weight_average].round(2)})}
|
145
|
+
end
|
146
|
+
def similarity_to_s
|
147
|
+
return unless analysis[:similarity]
|
148
|
+
%Q{similarity key length range (avg): #{"%10s" % analysis[:similarity][:key_length]} (#{analysis[:similarity][:key_length_average].round(2)})}
|
149
|
+
end
|
150
|
+
def configuration_to_s
|
151
|
+
# analysis[:configuration]
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
data/lib/picky/application.rb
CHANGED
@@ -19,12 +19,12 @@
|
|
19
19
|
# my_index = Index::Memory.new :some_index_name, some_source
|
20
20
|
# You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
|
21
21
|
# class MyGreatSearch < Application
|
22
|
-
#
|
22
|
+
#
|
23
23
|
# books = Index::Memory.new :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
24
|
-
#
|
24
|
+
#
|
25
25
|
# end
|
26
26
|
# Now we have an index <tt>books</tt>.
|
27
|
-
#
|
27
|
+
#
|
28
28
|
# That on itself won't do much good.
|
29
29
|
#
|
30
30
|
# Note that a Redis index is also available: Index::Redis.new.
|
@@ -38,7 +38,7 @@
|
|
38
38
|
#
|
39
39
|
# Let's go ahead and define a category:
|
40
40
|
# class MyGreatSearch < Application
|
41
|
-
#
|
41
|
+
#
|
42
42
|
# books = Index::Memory.new :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
43
43
|
# books.define_category :title
|
44
44
|
#
|
@@ -56,16 +56,16 @@
|
|
56
56
|
# full_books_query = Query::Full.new books
|
57
57
|
# Full just means that the ids are returned with the results.
|
58
58
|
# Picky also offers a Query that returns live results, Query::Live. But that's not important right now.
|
59
|
-
#
|
59
|
+
#
|
60
60
|
# Now we have somebody we can ask about the index. But no external interface.
|
61
|
-
#
|
61
|
+
#
|
62
62
|
# == route(/regexp1/ => query1, /regexp2/ => query2, ...)
|
63
63
|
#
|
64
64
|
# Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
|
65
65
|
# route %r{^/books/full$} => full_books_query
|
66
66
|
# In full glory:
|
67
67
|
# class MyGreatSearch < Application
|
68
|
-
#
|
68
|
+
#
|
69
69
|
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
70
70
|
# books.define_category :title
|
71
71
|
#
|
@@ -110,7 +110,7 @@
|
|
110
110
|
#
|
111
111
|
# Our example, fully fleshed out with indexing, querying, and weights:
|
112
112
|
# class MyGreatSearch < Application
|
113
|
-
#
|
113
|
+
#
|
114
114
|
# default_indexing removes_characters: /[^a-zA-Z0-9\.]/,
|
115
115
|
# stopwords: /\b(and|or|in|on|is|has)\b/,
|
116
116
|
# splits_text_on: /\s/,
|
@@ -127,7 +127,7 @@
|
|
127
127
|
# removes_characters_after_splitting: /\./,
|
128
128
|
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
129
129
|
# maximum_tokens: 4
|
130
|
-
#
|
130
|
+
#
|
131
131
|
# books = Index::Memory.new :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
132
132
|
# books.define_category :title,
|
133
133
|
# qualifiers: [:t, :title, :titre],
|
@@ -136,36 +136,36 @@
|
|
136
136
|
# books.define_category :author,
|
137
137
|
# partial: Partial::Substring.new(:from => -2)
|
138
138
|
# books.define_category :isbn
|
139
|
-
#
|
139
|
+
#
|
140
140
|
# query_options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
|
141
|
-
#
|
141
|
+
#
|
142
142
|
# route %r{^/books/full$} => Query::Full.new(books, query_options)
|
143
143
|
# route %r{^/books/live$} => Query::Live.new(books, query_options)
|
144
|
-
#
|
144
|
+
#
|
145
145
|
# end
|
146
146
|
# That's actually already a full-blown Picky App!
|
147
147
|
#
|
148
148
|
class Application
|
149
|
-
|
149
|
+
|
150
150
|
class << self
|
151
|
-
|
151
|
+
|
152
152
|
# API
|
153
153
|
#
|
154
|
-
|
154
|
+
|
155
155
|
# Returns a configured tokenizer that
|
156
156
|
# is used for indexing by default.
|
157
|
-
#
|
157
|
+
#
|
158
158
|
def default_indexing options = {}
|
159
159
|
Internals::Tokenizers::Index.default = Internals::Tokenizers::Index.new(options)
|
160
160
|
end
|
161
|
-
|
161
|
+
|
162
162
|
# Returns a configured tokenizer that
|
163
163
|
# is used for querying by default.
|
164
|
-
#
|
164
|
+
#
|
165
165
|
def default_querying options = {}
|
166
166
|
Internals::Tokenizers::Query.default = Internals::Tokenizers::Query.new(options)
|
167
167
|
end
|
168
|
-
|
168
|
+
|
169
169
|
# Create a new index for indexing and for querying.
|
170
170
|
#
|
171
171
|
# Parameters:
|
@@ -176,22 +176,23 @@ class Application
|
|
176
176
|
# * source: The source the data comes from. See Sources::Base.
|
177
177
|
#
|
178
178
|
# Options:
|
179
|
-
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
179
|
+
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
180
180
|
#
|
181
|
-
# TODO
|
181
|
+
# TODO Remove in 1.6.
|
182
182
|
#
|
183
183
|
def index name, source, options = {}
|
184
|
+
raise "the Picky application method #index is deprecated, please use Index::Memory.new instead."
|
184
185
|
Index::Memory.new name, source, options
|
185
186
|
end
|
186
|
-
|
187
|
+
|
187
188
|
# Routes.
|
188
189
|
#
|
189
190
|
delegate :route, :root, :to => :rack_adapter
|
190
|
-
|
191
|
+
|
191
192
|
#
|
192
193
|
# API
|
193
|
-
|
194
|
-
|
194
|
+
|
195
|
+
|
195
196
|
# A Picky application implements the Rack interface.
|
196
197
|
#
|
197
198
|
# Delegates to its routing to handle a request.
|
@@ -202,7 +203,7 @@ class Application
|
|
202
203
|
def rack_adapter # :nodoc:
|
203
204
|
@rack_adapter ||= Internals::FrontendAdapters::Rack.new
|
204
205
|
end
|
205
|
-
|
206
|
+
|
206
207
|
# Finalize the subclass as soon as it
|
207
208
|
# has finished loading.
|
208
209
|
#
|
@@ -233,18 +234,37 @@ class Application
|
|
233
234
|
def check # :nodoc:
|
234
235
|
warnings = []
|
235
236
|
warnings << check_external_interface
|
236
|
-
|
237
|
+
warn "\n#{warnings.join(?\n)}\n\n" unless warnings.all? &:nil?
|
237
238
|
end
|
238
239
|
def check_external_interface
|
239
240
|
"WARNING: No routes defined for application configuration in #{self.class}." if rack_adapter.empty?
|
240
241
|
end
|
241
|
-
|
242
|
-
# TODO Add more info if possible.
|
243
|
-
#
|
242
|
+
|
244
243
|
def to_s # :nodoc:
|
245
|
-
|
244
|
+
<<-APPLICATION
|
245
|
+
\033[1m#{name}\033[m
|
246
|
+
#{to_stats.indented_to_s}
|
247
|
+
APPLICATION
|
248
|
+
end
|
249
|
+
def to_stats
|
250
|
+
<<-APP
|
251
|
+
\033[1mIndexing (default)\033[m:
|
252
|
+
#{Internals::Tokenizers::Index.default.indented_to_s}
|
253
|
+
|
254
|
+
\033[1mQuerying (default)\033[m:
|
255
|
+
#{Internals::Tokenizers::Query.default.indented_to_s}
|
256
|
+
|
257
|
+
\033[1mIndexes\033[m:
|
258
|
+
#{Indexes.to_s.indented_to_s}
|
259
|
+
|
260
|
+
\033[1mRoutes\033[m:
|
261
|
+
#{to_routes.indented_to_s}
|
262
|
+
APP
|
263
|
+
end
|
264
|
+
def to_routes
|
265
|
+
rack_adapter.to_s
|
246
266
|
end
|
247
|
-
|
267
|
+
|
248
268
|
end
|
249
|
-
|
269
|
+
|
250
270
|
end
|
@@ -6,28 +6,32 @@ module CharacterSubstituters # :nodoc:all
|
|
6
6
|
# (and more, see specs)
|
7
7
|
#
|
8
8
|
class WestEuropean
|
9
|
-
|
9
|
+
|
10
10
|
def initialize
|
11
11
|
@chars = ActiveSupport::Multibyte.proxy_class
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
self.class.name
|
16
|
+
end
|
17
|
+
|
14
18
|
def substitute text
|
15
19
|
trans = @chars.new(text).normalize(:kd)
|
16
|
-
|
20
|
+
|
17
21
|
# substitute special cases
|
18
22
|
#
|
19
23
|
trans.gsub!('ß', 'ss')
|
20
|
-
|
24
|
+
|
21
25
|
# substitute umlauts (of A,O,U,a,o,u)
|
22
26
|
#
|
23
27
|
trans.gsub!(/([AOUaou])\314\210/u, '\1e')
|
24
|
-
|
28
|
+
|
25
29
|
# get rid of ecutes, graves and …
|
26
30
|
#
|
27
31
|
trans.unpack('U*').select { |cp|
|
28
32
|
cp < 0x0300 || cp > 0x035F
|
29
33
|
}.pack('U*')
|
30
34
|
end
|
31
|
-
|
35
|
+
|
32
36
|
end
|
33
37
|
end
|
data/lib/picky/cli.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module Picky
|
2
|
-
|
2
|
+
|
3
3
|
# A very simple CLI selector.
|
4
4
|
#
|
5
5
|
class CLI # :nodoc:all
|
6
|
-
|
6
|
+
|
7
7
|
# Execute a command.
|
8
8
|
#
|
9
9
|
# Note: By default, help is displayed. I.e. when no command is given.
|
@@ -16,7 +16,7 @@ module Picky
|
|
16
16
|
def executor_class_for selector = nil
|
17
17
|
selector && @@mapping[selector.to_sym] || [Help]
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
class Base
|
21
21
|
def usage name, params
|
22
22
|
puts "Usage\n picky #{name} #{params_to_s(params)}"
|
@@ -31,20 +31,20 @@ module Picky
|
|
31
31
|
def execute name, args, params
|
32
32
|
relative_log_file = args.shift
|
33
33
|
port = args.shift
|
34
|
-
|
34
|
+
|
35
35
|
usage(name, params) || exit(1) unless relative_log_file
|
36
|
-
|
37
|
-
ENV['PICKY_LOG_FILE'] = File.expand_path relative_log_file
|
36
|
+
|
37
|
+
ENV['PICKY_LOG_FILE'] = File.expand_path relative_log_file
|
38
38
|
ENV['PICKY_STATISTICS_PORT'] = port
|
39
|
-
|
39
|
+
|
40
40
|
begin
|
41
41
|
require 'picky-statistics'
|
42
42
|
rescue LoadError => e
|
43
43
|
require 'picky/extensions/object'
|
44
|
-
|
44
|
+
warn_gem_missing 'picky-statistics', 'the Picky statistics'
|
45
45
|
exit 1
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
require 'picky-statistics/application/app'
|
49
49
|
end
|
50
50
|
end
|
@@ -52,20 +52,20 @@ module Picky
|
|
52
52
|
def execute name, args, params
|
53
53
|
url = args.shift
|
54
54
|
port = args.shift
|
55
|
-
|
55
|
+
|
56
56
|
usage(name, params) || exit(1) unless args.empty?
|
57
|
-
|
57
|
+
|
58
58
|
ENV['PICKY_LIVE_URL'] = url
|
59
59
|
ENV['PICKY_LIVE_PORT'] = port
|
60
|
-
|
60
|
+
|
61
61
|
begin
|
62
62
|
require 'picky-live'
|
63
63
|
rescue LoadError => e
|
64
64
|
require 'picky/extensions/object'
|
65
|
-
|
65
|
+
warn_gem_missing 'picky-live', 'the Picky Live Interface'
|
66
66
|
exit 1
|
67
67
|
end
|
68
|
-
|
68
|
+
|
69
69
|
require 'picky-live/application/app'
|
70
70
|
end
|
71
71
|
end
|
@@ -82,11 +82,11 @@ module Picky
|
|
82
82
|
_, *params = object_and_params
|
83
83
|
" picky #{command} #{params_to_s(params)}"
|
84
84
|
end.join(?\n)
|
85
|
-
|
85
|
+
|
86
86
|
puts "Possible commands:\n" + commands
|
87
87
|
end
|
88
88
|
end
|
89
|
-
|
89
|
+
|
90
90
|
# Maps commands to the other gem's command.
|
91
91
|
#
|
92
92
|
@@mapping = {
|
@@ -98,7 +98,7 @@ module Picky
|
|
98
98
|
def self.mapping
|
99
99
|
@@mapping
|
100
100
|
end
|
101
|
-
|
101
|
+
|
102
102
|
end
|
103
|
-
|
103
|
+
|
104
104
|
end
|
data/lib/picky/index/base.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module Index
|
2
|
-
|
2
|
+
|
3
3
|
# This class defines the indexing and index API that is exposed to the user
|
4
4
|
# as the #index method inside the Application class.
|
5
5
|
#
|
6
6
|
# It provides a single front for both indexing and index options. We suggest to always use the index API.
|
7
7
|
#
|
8
8
|
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
|
9
|
-
#
|
9
|
+
#
|
10
10
|
class Base
|
11
11
|
|
12
12
|
attr_reader :name, :indexing, :indexed
|
@@ -22,14 +22,45 @@ module Index
|
|
22
22
|
# * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
|
23
23
|
#
|
24
24
|
def initialize name, source, options = {}
|
25
|
-
|
25
|
+
check name, source
|
26
|
+
|
27
|
+
@name = name.to_sym
|
26
28
|
@indexing = Internals::Indexing::Index.new name, source, options
|
27
29
|
@indexed = Internals::Indexed::Index.new name, options
|
28
|
-
|
30
|
+
|
29
31
|
# Centralized registry.
|
30
32
|
#
|
31
33
|
Indexes.register self
|
32
34
|
end
|
35
|
+
#
|
36
|
+
# Since this is an API, we fail hard quickly.
|
37
|
+
#
|
38
|
+
def check name, source
|
39
|
+
raise ArgumentError.new(<<-NAME
|
40
|
+
The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a String/Symbol,
|
41
|
+
Examples:
|
42
|
+
Index::Memory.new(:my_cool_index, ...) # Recommended
|
43
|
+
Index::Redis.new("a-redis-index", ...)
|
44
|
+
NAME
|
45
|
+
) unless name.respond_to?(:to_sym)
|
46
|
+
raise ArgumentError.new(<<-SOURCE
|
47
|
+
The index "#{name}" should use a data source that responds to the method #harvest, which yields(id, text).
|
48
|
+
Or it could use one of the built-in sources:
|
49
|
+
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
50
|
+
Sources::')}
|
51
|
+
SOURCE
|
52
|
+
) unless source.respond_to?(:harvest)
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_stats
|
56
|
+
stats = <<-INDEX
|
57
|
+
#{name} (#{self.class}):
|
58
|
+
#{"source: #{indexing.source}".indented_to_s}
|
59
|
+
#{"categories: #{indexing.categories.categories.map(&:name).join(', ')}".indented_to_s}
|
60
|
+
INDEX
|
61
|
+
stats << " result identifier: \"#{indexed.result_identifier}\"".indented_to_s unless indexed.result_identifier.to_s == indexed.name.to_s
|
62
|
+
stats
|
63
|
+
end
|
33
64
|
|
34
65
|
# Defines a searchable category on the index.
|
35
66
|
#
|
@@ -46,12 +77,12 @@ module Index
|
|
46
77
|
#
|
47
78
|
def define_category category_name, options = {}
|
48
79
|
category_name = category_name.to_sym
|
49
|
-
|
80
|
+
|
50
81
|
indexing_category = indexing.define_category category_name, options
|
51
82
|
indexed_category = indexed.define_category category_name, options
|
52
|
-
|
83
|
+
|
53
84
|
yield indexing_category, indexed_category if block_given?
|
54
|
-
|
85
|
+
|
55
86
|
self
|
56
87
|
end
|
57
88
|
alias category define_category
|
@@ -87,7 +118,7 @@ module Index
|
|
87
118
|
# x:133, y:120
|
88
119
|
#
|
89
120
|
# This will search this square area (* = 133, 120: The "search" point entered):
|
90
|
-
#
|
121
|
+
#
|
91
122
|
# 132 134
|
92
123
|
# | |
|
93
124
|
# --|---------|-- 121
|
@@ -95,7 +126,7 @@ module Index
|
|
95
126
|
# | * |
|
96
127
|
# | |
|
97
128
|
# --|---------|-- 119
|
98
|
-
# | |
|
129
|
+
# | |
|
99
130
|
#
|
100
131
|
# Note: The area does not need to be square, but can be rectangular.
|
101
132
|
#
|
@@ -117,13 +148,13 @@ module Index
|
|
117
148
|
#
|
118
149
|
def define_ranged_category category_name, range, options = {}
|
119
150
|
precision = options[:precision]
|
120
|
-
|
151
|
+
|
121
152
|
options = { partial: Partial::None.new }.merge options
|
122
|
-
|
153
|
+
|
123
154
|
define_category category_name, options do |indexing, indexed|
|
124
155
|
indexing.source = Sources::Wrappers::Location.new indexing, grid: range, precision: precision
|
125
156
|
indexing.tokenizer = Internals::Tokenizers::Index.new
|
126
|
-
|
157
|
+
|
127
158
|
exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: range, precision: precision
|
128
159
|
indexed.exact = exact_bundle
|
129
160
|
indexed.partial = exact_bundle # A partial token also uses the exact index.
|
@@ -175,5 +206,5 @@ module Index
|
|
175
206
|
end
|
176
207
|
alias map_location define_map_location
|
177
208
|
end
|
178
|
-
|
209
|
+
|
179
210
|
end
|
data/lib/picky/index_bundle.rb
CHANGED
@@ -4,7 +4,12 @@ class IndexBundle # :nodoc:all
|
|
4
4
|
|
5
5
|
attr_reader :indexes, :index_mapping, :indexing, :indexed
|
6
6
|
|
7
|
-
delegate :
|
7
|
+
delegate :size,
|
8
|
+
:each,
|
9
|
+
:to => :indexes
|
10
|
+
|
11
|
+
delegate :analyze,
|
12
|
+
:reload,
|
8
13
|
:load_from_cache,
|
9
14
|
:to => :indexed
|
10
15
|
|
@@ -19,22 +24,26 @@ class IndexBundle # :nodoc:all
|
|
19
24
|
def initialize
|
20
25
|
@indexes = []
|
21
26
|
@index_mapping = {}
|
22
|
-
|
27
|
+
|
23
28
|
@indexed = Indexed::Indexes.new
|
24
29
|
@indexing = Indexing::Indexes.new
|
25
30
|
end
|
26
31
|
|
32
|
+
def to_s
|
33
|
+
indexes.map &:to_stats
|
34
|
+
end
|
35
|
+
|
27
36
|
def register index
|
28
37
|
self.indexes << index
|
29
38
|
self.index_mapping[index.name] = index
|
30
|
-
|
39
|
+
|
31
40
|
indexing.register index.indexing
|
32
41
|
indexed.register index.indexed
|
33
42
|
end
|
34
43
|
|
35
44
|
def [] name
|
36
45
|
name = name.to_sym
|
37
|
-
|
46
|
+
|
38
47
|
self.index_mapping[name]
|
39
48
|
end
|
40
49
|
|