picky 1.5.2 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/analyzer.rb +154 -0
- data/lib/picky/application.rb +53 -33
- data/lib/picky/character_substituters/west_european.rb +10 -6
- data/lib/picky/cli.rb +18 -18
- data/lib/picky/index/base.rb +44 -13
- data/lib/picky/index_bundle.rb +13 -4
- data/lib/picky/indexed/indexes.rb +26 -10
- data/lib/picky/indexing/indexes.rb +26 -24
- data/lib/picky/interfaces/live_parameters.rb +23 -16
- data/lib/picky/internals/extensions/object.rb +13 -6
- data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
- data/lib/picky/internals/index/backend.rb +1 -2
- data/lib/picky/internals/index/file/basic.rb +18 -14
- data/lib/picky/internals/index/files.rb +16 -6
- data/lib/picky/internals/index/redis/basic.rb +12 -5
- data/lib/picky/internals/index/redis.rb +2 -2
- data/lib/picky/internals/indexed/bundle/base.rb +58 -14
- data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
- data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
- data/lib/picky/internals/indexed/categories.rb +19 -14
- data/lib/picky/internals/indexed/category.rb +44 -20
- data/lib/picky/internals/indexed/index.rb +23 -13
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
- data/lib/picky/internals/indexers/serial.rb +1 -1
- data/lib/picky/internals/indexing/bundle/base.rb +28 -28
- data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
- data/lib/picky/internals/indexing/categories.rb +15 -11
- data/lib/picky/internals/indexing/category.rb +30 -20
- data/lib/picky/internals/indexing/index.rb +22 -14
- data/lib/picky/internals/query/allocations.rb +0 -15
- data/lib/picky/internals/query/combinations/base.rb +0 -4
- data/lib/picky/internals/query/combinations/redis.rb +19 -8
- data/lib/picky/internals/query/indexes.rb +3 -6
- data/lib/picky/internals/query/token.rb +0 -4
- data/lib/picky/internals/query/weights.rb +2 -11
- data/lib/picky/internals/results/base.rb +3 -10
- data/lib/picky/internals/tokenizers/base.rb +64 -28
- data/lib/picky/internals/tokenizers/index.rb +8 -8
- data/lib/picky/loader.rb +59 -53
- data/lib/picky/query/base.rb +23 -29
- data/lib/picky/sources/base.rb +10 -10
- data/lib/picky/sources/couch.rb +14 -10
- data/lib/picky/sources/csv.rb +21 -14
- data/lib/picky/sources/db.rb +37 -31
- data/lib/picky/sources/delicious.rb +11 -8
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/statistics.rb +66 -0
- data/lib/tasks/application.rake +3 -0
- data/lib/tasks/checks.rake +11 -0
- data/lib/tasks/framework.rake +3 -0
- data/lib/tasks/index.rake +9 -11
- data/lib/tasks/routes.rake +3 -2
- data/lib/tasks/shortcuts.rake +17 -5
- data/lib/tasks/statistics.rake +20 -12
- data/lib/tasks/try.rake +14 -14
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/index/base_spec.rb +25 -3
- data/spec/lib/internals/extensions/object_spec.rb +46 -20
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
- data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
- data/spec/lib/internals/indexers/serial_spec.rb +1 -1
- data/spec/lib/internals/results/base_spec.rb +0 -12
- data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
- data/spec/lib/query/allocations_spec.rb +0 -56
- data/spec/lib/query/base_spec.rb +25 -21
- data/spec/lib/query/combinations/redis_spec.rb +6 -1
- data/spec/lib/sources/delicious_spec.rb +2 -2
- data/spec/lib/statistics_spec.rb +31 -0
- metadata +9 -2
@@ -0,0 +1,154 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
|
4
|
+
# Analyzes indexes (index bundles, actually).
|
5
|
+
#
|
6
|
+
# Can be output using to_s.
|
7
|
+
#
|
8
|
+
class Analyzer
|
9
|
+
|
10
|
+
attr_reader :analysis, :comments
|
11
|
+
|
12
|
+
#
|
13
|
+
#
|
14
|
+
def initialize
|
15
|
+
@analysis = {}
|
16
|
+
@comments = []
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
#
|
21
|
+
def analyze bundle
|
22
|
+
bundle.load_index
|
23
|
+
analysis[:__keys] = bundle.size
|
24
|
+
cardinality :index, bundle.index
|
25
|
+
index_analysis
|
26
|
+
bundle.clear_index
|
27
|
+
|
28
|
+
bundle.load_weights
|
29
|
+
weights bundle.weights
|
30
|
+
weights_analysis
|
31
|
+
bundle.clear_weights
|
32
|
+
|
33
|
+
bundle.load_similarity
|
34
|
+
cardinality :similarity, bundle.similarity
|
35
|
+
bundle.clear_similarity
|
36
|
+
|
37
|
+
# bundle.load_configuration
|
38
|
+
# analysis[:configuration] = bundle.configuration
|
39
|
+
# bundle.clear_configuration
|
40
|
+
|
41
|
+
self
|
42
|
+
end
|
43
|
+
def cardinality identifier, index
|
44
|
+
return if index.size.zero?
|
45
|
+
|
46
|
+
key_length_average = 0
|
47
|
+
ids_length_average = 0
|
48
|
+
|
49
|
+
min_ids_length = 1.0/0 # Infinity
|
50
|
+
max_ids_length = 0
|
51
|
+
min_key_length = 1.0/0 # Infinity
|
52
|
+
max_key_length = 0
|
53
|
+
|
54
|
+
key_size, ids_size =
|
55
|
+
index.each_pair do |key, ids|
|
56
|
+
key_size = key.size
|
57
|
+
if key_size < min_key_length
|
58
|
+
min_key_length = key_size
|
59
|
+
else
|
60
|
+
max_key_length = key_size if key_size > max_key_length
|
61
|
+
end
|
62
|
+
key_length_average += key_size
|
63
|
+
|
64
|
+
ids_size = ids.size
|
65
|
+
if ids_size < min_ids_length
|
66
|
+
min_ids_length = ids_size
|
67
|
+
else
|
68
|
+
max_ids_length = ids_size if ids_size > max_ids_length
|
69
|
+
end
|
70
|
+
ids_length_average += ids_size
|
71
|
+
end
|
72
|
+
index_size = index.size
|
73
|
+
key_length_average = key_length_average.to_f / index_size
|
74
|
+
ids_length_average = ids_length_average.to_f / index_size
|
75
|
+
|
76
|
+
analysis[identifier] ||= {}
|
77
|
+
analysis[identifier][:key_length] = (min_key_length..max_key_length)
|
78
|
+
analysis[identifier][:ids_length] = (min_ids_length..max_ids_length)
|
79
|
+
analysis[identifier][:key_length_average] = key_length_average
|
80
|
+
analysis[identifier][:ids_length_average] = ids_length_average
|
81
|
+
end
|
82
|
+
def index_analysis
|
83
|
+
return unless analysis[:index]
|
84
|
+
|
85
|
+
if analysis[:__keys] < 100
|
86
|
+
comments << "\033[33mVery small index (< 100 keys).\033[m"
|
87
|
+
end
|
88
|
+
|
89
|
+
range = analysis[:index][:key_length]
|
90
|
+
case range.min
|
91
|
+
when 1
|
92
|
+
comments << "\033[33mIndex matches single characters.\033[m"
|
93
|
+
end
|
94
|
+
end
|
95
|
+
def weights index
|
96
|
+
return if index.size.zero?
|
97
|
+
|
98
|
+
min_weight = 1.0/0 # Infinity
|
99
|
+
max_weight = 0.0
|
100
|
+
|
101
|
+
weight_average = 0
|
102
|
+
|
103
|
+
index.each_pair do |key, value|
|
104
|
+
if value < min_weight
|
105
|
+
min_weight = value
|
106
|
+
else
|
107
|
+
max_weight = value if value > max_weight
|
108
|
+
end
|
109
|
+
weight_average += value
|
110
|
+
end
|
111
|
+
|
112
|
+
weight_average = weight_average / index.size
|
113
|
+
|
114
|
+
analysis[:weights] ||= {}
|
115
|
+
analysis[:weights][:weight_range] = (min_weight..max_weight)
|
116
|
+
analysis[:weights][:weight_average] = weight_average
|
117
|
+
end
|
118
|
+
def weights_analysis
|
119
|
+
return unless analysis[:weights]
|
120
|
+
|
121
|
+
range = analysis[:weights][:weight_range]
|
122
|
+
|
123
|
+
case range.max
|
124
|
+
when 0.0
|
125
|
+
comments << "\033[31mThere's only one id per key – you'll only get single results.\033[m"
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
#
|
130
|
+
#
|
131
|
+
def to_s
|
132
|
+
[*comments, index_to_s, weights_to_s, similarity_to_s, configuration_to_s].compact.join "\n"
|
133
|
+
end
|
134
|
+
def index_to_s
|
135
|
+
return if analysis[:__keys].zero?
|
136
|
+
[
|
137
|
+
"index key cardinality: #{"%10d" % analysis[:__keys]}",
|
138
|
+
"index key length range (avg): #{"%10s" % analysis[:index][:key_length]} (#{analysis[:index][:key_length_average].round(2)})",
|
139
|
+
"index ids per key length range (avg): #{"%10s" % analysis[:index][:ids_length]} (#{analysis[:index][:ids_length_average].round(2)})"
|
140
|
+
].join("\n")
|
141
|
+
end
|
142
|
+
def weights_to_s
|
143
|
+
return unless analysis[:weights]
|
144
|
+
%Q{weights range (avg): #{"%10s" % analysis[:weights][:weight_range]} (#{analysis[:weights][:weight_average].round(2)})}
|
145
|
+
end
|
146
|
+
def similarity_to_s
|
147
|
+
return unless analysis[:similarity]
|
148
|
+
%Q{similarity key length range (avg): #{"%10s" % analysis[:similarity][:key_length]} (#{analysis[:similarity][:key_length_average].round(2)})}
|
149
|
+
end
|
150
|
+
def configuration_to_s
|
151
|
+
# analysis[:configuration]
|
152
|
+
end
|
153
|
+
|
154
|
+
end
|
data/lib/picky/application.rb
CHANGED
@@ -19,12 +19,12 @@
|
|
19
19
|
# my_index = Index::Memory.new :some_index_name, some_source
|
20
20
|
# You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
|
21
21
|
# class MyGreatSearch < Application
|
22
|
-
#
|
22
|
+
#
|
23
23
|
# books = Index::Memory.new :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
24
|
-
#
|
24
|
+
#
|
25
25
|
# end
|
26
26
|
# Now we have an index <tt>books</tt>.
|
27
|
-
#
|
27
|
+
#
|
28
28
|
# That on itself won't do much good.
|
29
29
|
#
|
30
30
|
# Note that a Redis index is also available: Index::Redis.new.
|
@@ -38,7 +38,7 @@
|
|
38
38
|
#
|
39
39
|
# Let's go ahead and define a category:
|
40
40
|
# class MyGreatSearch < Application
|
41
|
-
#
|
41
|
+
#
|
42
42
|
# books = Index::Memory.new :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
43
43
|
# books.define_category :title
|
44
44
|
#
|
@@ -56,16 +56,16 @@
|
|
56
56
|
# full_books_query = Query::Full.new books
|
57
57
|
# Full just means that the ids are returned with the results.
|
58
58
|
# Picky also offers a Query that returns live results, Query::Live. But that's not important right now.
|
59
|
-
#
|
59
|
+
#
|
60
60
|
# Now we have somebody we can ask about the index. But no external interface.
|
61
|
-
#
|
61
|
+
#
|
62
62
|
# == route(/regexp1/ => query1, /regexp2/ => query2, ...)
|
63
63
|
#
|
64
64
|
# Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
|
65
65
|
# route %r{^/books/full$} => full_books_query
|
66
66
|
# In full glory:
|
67
67
|
# class MyGreatSearch < Application
|
68
|
-
#
|
68
|
+
#
|
69
69
|
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
70
70
|
# books.define_category :title
|
71
71
|
#
|
@@ -110,7 +110,7 @@
|
|
110
110
|
#
|
111
111
|
# Our example, fully fleshed out with indexing, querying, and weights:
|
112
112
|
# class MyGreatSearch < Application
|
113
|
-
#
|
113
|
+
#
|
114
114
|
# default_indexing removes_characters: /[^a-zA-Z0-9\.]/,
|
115
115
|
# stopwords: /\b(and|or|in|on|is|has)\b/,
|
116
116
|
# splits_text_on: /\s/,
|
@@ -127,7 +127,7 @@
|
|
127
127
|
# removes_characters_after_splitting: /\./,
|
128
128
|
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
129
129
|
# maximum_tokens: 4
|
130
|
-
#
|
130
|
+
#
|
131
131
|
# books = Index::Memory.new :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
132
132
|
# books.define_category :title,
|
133
133
|
# qualifiers: [:t, :title, :titre],
|
@@ -136,36 +136,36 @@
|
|
136
136
|
# books.define_category :author,
|
137
137
|
# partial: Partial::Substring.new(:from => -2)
|
138
138
|
# books.define_category :isbn
|
139
|
-
#
|
139
|
+
#
|
140
140
|
# query_options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
|
141
|
-
#
|
141
|
+
#
|
142
142
|
# route %r{^/books/full$} => Query::Full.new(books, query_options)
|
143
143
|
# route %r{^/books/live$} => Query::Live.new(books, query_options)
|
144
|
-
#
|
144
|
+
#
|
145
145
|
# end
|
146
146
|
# That's actually already a full-blown Picky App!
|
147
147
|
#
|
148
148
|
class Application
|
149
|
-
|
149
|
+
|
150
150
|
class << self
|
151
|
-
|
151
|
+
|
152
152
|
# API
|
153
153
|
#
|
154
|
-
|
154
|
+
|
155
155
|
# Returns a configured tokenizer that
|
156
156
|
# is used for indexing by default.
|
157
|
-
#
|
157
|
+
#
|
158
158
|
def default_indexing options = {}
|
159
159
|
Internals::Tokenizers::Index.default = Internals::Tokenizers::Index.new(options)
|
160
160
|
end
|
161
|
-
|
161
|
+
|
162
162
|
# Returns a configured tokenizer that
|
163
163
|
# is used for querying by default.
|
164
|
-
#
|
164
|
+
#
|
165
165
|
def default_querying options = {}
|
166
166
|
Internals::Tokenizers::Query.default = Internals::Tokenizers::Query.new(options)
|
167
167
|
end
|
168
|
-
|
168
|
+
|
169
169
|
# Create a new index for indexing and for querying.
|
170
170
|
#
|
171
171
|
# Parameters:
|
@@ -176,22 +176,23 @@ class Application
|
|
176
176
|
# * source: The source the data comes from. See Sources::Base.
|
177
177
|
#
|
178
178
|
# Options:
|
179
|
-
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
179
|
+
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
180
180
|
#
|
181
|
-
# TODO
|
181
|
+
# TODO Remove in 1.6.
|
182
182
|
#
|
183
183
|
def index name, source, options = {}
|
184
|
+
raise "the Picky application method #index is deprecated, please use Index::Memory.new instead."
|
184
185
|
Index::Memory.new name, source, options
|
185
186
|
end
|
186
|
-
|
187
|
+
|
187
188
|
# Routes.
|
188
189
|
#
|
189
190
|
delegate :route, :root, :to => :rack_adapter
|
190
|
-
|
191
|
+
|
191
192
|
#
|
192
193
|
# API
|
193
|
-
|
194
|
-
|
194
|
+
|
195
|
+
|
195
196
|
# A Picky application implements the Rack interface.
|
196
197
|
#
|
197
198
|
# Delegates to its routing to handle a request.
|
@@ -202,7 +203,7 @@ class Application
|
|
202
203
|
def rack_adapter # :nodoc:
|
203
204
|
@rack_adapter ||= Internals::FrontendAdapters::Rack.new
|
204
205
|
end
|
205
|
-
|
206
|
+
|
206
207
|
# Finalize the subclass as soon as it
|
207
208
|
# has finished loading.
|
208
209
|
#
|
@@ -233,18 +234,37 @@ class Application
|
|
233
234
|
def check # :nodoc:
|
234
235
|
warnings = []
|
235
236
|
warnings << check_external_interface
|
236
|
-
|
237
|
+
warn "\n#{warnings.join(?\n)}\n\n" unless warnings.all? &:nil?
|
237
238
|
end
|
238
239
|
def check_external_interface
|
239
240
|
"WARNING: No routes defined for application configuration in #{self.class}." if rack_adapter.empty?
|
240
241
|
end
|
241
|
-
|
242
|
-
# TODO Add more info if possible.
|
243
|
-
#
|
242
|
+
|
244
243
|
def to_s # :nodoc:
|
245
|
-
|
244
|
+
<<-APPLICATION
|
245
|
+
\033[1m#{name}\033[m
|
246
|
+
#{to_stats.indented_to_s}
|
247
|
+
APPLICATION
|
248
|
+
end
|
249
|
+
def to_stats
|
250
|
+
<<-APP
|
251
|
+
\033[1mIndexing (default)\033[m:
|
252
|
+
#{Internals::Tokenizers::Index.default.indented_to_s}
|
253
|
+
|
254
|
+
\033[1mQuerying (default)\033[m:
|
255
|
+
#{Internals::Tokenizers::Query.default.indented_to_s}
|
256
|
+
|
257
|
+
\033[1mIndexes\033[m:
|
258
|
+
#{Indexes.to_s.indented_to_s}
|
259
|
+
|
260
|
+
\033[1mRoutes\033[m:
|
261
|
+
#{to_routes.indented_to_s}
|
262
|
+
APP
|
263
|
+
end
|
264
|
+
def to_routes
|
265
|
+
rack_adapter.to_s
|
246
266
|
end
|
247
|
-
|
267
|
+
|
248
268
|
end
|
249
|
-
|
269
|
+
|
250
270
|
end
|
@@ -6,28 +6,32 @@ module CharacterSubstituters # :nodoc:all
|
|
6
6
|
# (and more, see specs)
|
7
7
|
#
|
8
8
|
class WestEuropean
|
9
|
-
|
9
|
+
|
10
10
|
def initialize
|
11
11
|
@chars = ActiveSupport::Multibyte.proxy_class
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
|
+
def to_s
|
15
|
+
self.class.name
|
16
|
+
end
|
17
|
+
|
14
18
|
def substitute text
|
15
19
|
trans = @chars.new(text).normalize(:kd)
|
16
|
-
|
20
|
+
|
17
21
|
# substitute special cases
|
18
22
|
#
|
19
23
|
trans.gsub!('ß', 'ss')
|
20
|
-
|
24
|
+
|
21
25
|
# substitute umlauts (of A,O,U,a,o,u)
|
22
26
|
#
|
23
27
|
trans.gsub!(/([AOUaou])\314\210/u, '\1e')
|
24
|
-
|
28
|
+
|
25
29
|
# get rid of ecutes, graves and …
|
26
30
|
#
|
27
31
|
trans.unpack('U*').select { |cp|
|
28
32
|
cp < 0x0300 || cp > 0x035F
|
29
33
|
}.pack('U*')
|
30
34
|
end
|
31
|
-
|
35
|
+
|
32
36
|
end
|
33
37
|
end
|
data/lib/picky/cli.rb
CHANGED
@@ -1,9 +1,9 @@
|
|
1
1
|
module Picky
|
2
|
-
|
2
|
+
|
3
3
|
# A very simple CLI selector.
|
4
4
|
#
|
5
5
|
class CLI # :nodoc:all
|
6
|
-
|
6
|
+
|
7
7
|
# Execute a command.
|
8
8
|
#
|
9
9
|
# Note: By default, help is displayed. I.e. when no command is given.
|
@@ -16,7 +16,7 @@ module Picky
|
|
16
16
|
def executor_class_for selector = nil
|
17
17
|
selector && @@mapping[selector.to_sym] || [Help]
|
18
18
|
end
|
19
|
-
|
19
|
+
|
20
20
|
class Base
|
21
21
|
def usage name, params
|
22
22
|
puts "Usage\n picky #{name} #{params_to_s(params)}"
|
@@ -31,20 +31,20 @@ module Picky
|
|
31
31
|
def execute name, args, params
|
32
32
|
relative_log_file = args.shift
|
33
33
|
port = args.shift
|
34
|
-
|
34
|
+
|
35
35
|
usage(name, params) || exit(1) unless relative_log_file
|
36
|
-
|
37
|
-
ENV['PICKY_LOG_FILE'] = File.expand_path relative_log_file
|
36
|
+
|
37
|
+
ENV['PICKY_LOG_FILE'] = File.expand_path relative_log_file
|
38
38
|
ENV['PICKY_STATISTICS_PORT'] = port
|
39
|
-
|
39
|
+
|
40
40
|
begin
|
41
41
|
require 'picky-statistics'
|
42
42
|
rescue LoadError => e
|
43
43
|
require 'picky/extensions/object'
|
44
|
-
|
44
|
+
warn_gem_missing 'picky-statistics', 'the Picky statistics'
|
45
45
|
exit 1
|
46
46
|
end
|
47
|
-
|
47
|
+
|
48
48
|
require 'picky-statistics/application/app'
|
49
49
|
end
|
50
50
|
end
|
@@ -52,20 +52,20 @@ module Picky
|
|
52
52
|
def execute name, args, params
|
53
53
|
url = args.shift
|
54
54
|
port = args.shift
|
55
|
-
|
55
|
+
|
56
56
|
usage(name, params) || exit(1) unless args.empty?
|
57
|
-
|
57
|
+
|
58
58
|
ENV['PICKY_LIVE_URL'] = url
|
59
59
|
ENV['PICKY_LIVE_PORT'] = port
|
60
|
-
|
60
|
+
|
61
61
|
begin
|
62
62
|
require 'picky-live'
|
63
63
|
rescue LoadError => e
|
64
64
|
require 'picky/extensions/object'
|
65
|
-
|
65
|
+
warn_gem_missing 'picky-live', 'the Picky Live Interface'
|
66
66
|
exit 1
|
67
67
|
end
|
68
|
-
|
68
|
+
|
69
69
|
require 'picky-live/application/app'
|
70
70
|
end
|
71
71
|
end
|
@@ -82,11 +82,11 @@ module Picky
|
|
82
82
|
_, *params = object_and_params
|
83
83
|
" picky #{command} #{params_to_s(params)}"
|
84
84
|
end.join(?\n)
|
85
|
-
|
85
|
+
|
86
86
|
puts "Possible commands:\n" + commands
|
87
87
|
end
|
88
88
|
end
|
89
|
-
|
89
|
+
|
90
90
|
# Maps commands to the other gem's command.
|
91
91
|
#
|
92
92
|
@@mapping = {
|
@@ -98,7 +98,7 @@ module Picky
|
|
98
98
|
def self.mapping
|
99
99
|
@@mapping
|
100
100
|
end
|
101
|
-
|
101
|
+
|
102
102
|
end
|
103
|
-
|
103
|
+
|
104
104
|
end
|
data/lib/picky/index/base.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module Index
|
2
|
-
|
2
|
+
|
3
3
|
# This class defines the indexing and index API that is exposed to the user
|
4
4
|
# as the #index method inside the Application class.
|
5
5
|
#
|
6
6
|
# It provides a single front for both indexing and index options. We suggest to always use the index API.
|
7
7
|
#
|
8
8
|
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
|
9
|
-
#
|
9
|
+
#
|
10
10
|
class Base
|
11
11
|
|
12
12
|
attr_reader :name, :indexing, :indexed
|
@@ -22,14 +22,45 @@ module Index
|
|
22
22
|
# * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
|
23
23
|
#
|
24
24
|
def initialize name, source, options = {}
|
25
|
-
|
25
|
+
check name, source
|
26
|
+
|
27
|
+
@name = name.to_sym
|
26
28
|
@indexing = Internals::Indexing::Index.new name, source, options
|
27
29
|
@indexed = Internals::Indexed::Index.new name, options
|
28
|
-
|
30
|
+
|
29
31
|
# Centralized registry.
|
30
32
|
#
|
31
33
|
Indexes.register self
|
32
34
|
end
|
35
|
+
#
|
36
|
+
# Since this is an API, we fail hard quickly.
|
37
|
+
#
|
38
|
+
def check name, source
|
39
|
+
raise ArgumentError.new(<<-NAME
|
40
|
+
The index identifier (you gave "#{name}") for Index::Memory/Index::Redis should be a String/Symbol,
|
41
|
+
Examples:
|
42
|
+
Index::Memory.new(:my_cool_index, ...) # Recommended
|
43
|
+
Index::Redis.new("a-redis-index", ...)
|
44
|
+
NAME
|
45
|
+
) unless name.respond_to?(:to_sym)
|
46
|
+
raise ArgumentError.new(<<-SOURCE
|
47
|
+
The index "#{name}" should use a data source that responds to the method #harvest, which yields(id, text).
|
48
|
+
Or it could use one of the built-in sources:
|
49
|
+
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
50
|
+
Sources::')}
|
51
|
+
SOURCE
|
52
|
+
) unless source.respond_to?(:harvest)
|
53
|
+
end
|
54
|
+
|
55
|
+
def to_stats
|
56
|
+
stats = <<-INDEX
|
57
|
+
#{name} (#{self.class}):
|
58
|
+
#{"source: #{indexing.source}".indented_to_s}
|
59
|
+
#{"categories: #{indexing.categories.categories.map(&:name).join(', ')}".indented_to_s}
|
60
|
+
INDEX
|
61
|
+
stats << " result identifier: \"#{indexed.result_identifier}\"".indented_to_s unless indexed.result_identifier.to_s == indexed.name.to_s
|
62
|
+
stats
|
63
|
+
end
|
33
64
|
|
34
65
|
# Defines a searchable category on the index.
|
35
66
|
#
|
@@ -46,12 +77,12 @@ module Index
|
|
46
77
|
#
|
47
78
|
def define_category category_name, options = {}
|
48
79
|
category_name = category_name.to_sym
|
49
|
-
|
80
|
+
|
50
81
|
indexing_category = indexing.define_category category_name, options
|
51
82
|
indexed_category = indexed.define_category category_name, options
|
52
|
-
|
83
|
+
|
53
84
|
yield indexing_category, indexed_category if block_given?
|
54
|
-
|
85
|
+
|
55
86
|
self
|
56
87
|
end
|
57
88
|
alias category define_category
|
@@ -87,7 +118,7 @@ module Index
|
|
87
118
|
# x:133, y:120
|
88
119
|
#
|
89
120
|
# This will search this square area (* = 133, 120: The "search" point entered):
|
90
|
-
#
|
121
|
+
#
|
91
122
|
# 132 134
|
92
123
|
# | |
|
93
124
|
# --|---------|-- 121
|
@@ -95,7 +126,7 @@ module Index
|
|
95
126
|
# | * |
|
96
127
|
# | |
|
97
128
|
# --|---------|-- 119
|
98
|
-
# | |
|
129
|
+
# | |
|
99
130
|
#
|
100
131
|
# Note: The area does not need to be square, but can be rectangular.
|
101
132
|
#
|
@@ -117,13 +148,13 @@ module Index
|
|
117
148
|
#
|
118
149
|
def define_ranged_category category_name, range, options = {}
|
119
150
|
precision = options[:precision]
|
120
|
-
|
151
|
+
|
121
152
|
options = { partial: Partial::None.new }.merge options
|
122
|
-
|
153
|
+
|
123
154
|
define_category category_name, options do |indexing, indexed|
|
124
155
|
indexing.source = Sources::Wrappers::Location.new indexing, grid: range, precision: precision
|
125
156
|
indexing.tokenizer = Internals::Tokenizers::Index.new
|
126
|
-
|
157
|
+
|
127
158
|
exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: range, precision: precision
|
128
159
|
indexed.exact = exact_bundle
|
129
160
|
indexed.partial = exact_bundle # A partial token also uses the exact index.
|
@@ -175,5 +206,5 @@ module Index
|
|
175
206
|
end
|
176
207
|
alias map_location define_map_location
|
177
208
|
end
|
178
|
-
|
209
|
+
|
179
210
|
end
|
data/lib/picky/index_bundle.rb
CHANGED
@@ -4,7 +4,12 @@ class IndexBundle # :nodoc:all
|
|
4
4
|
|
5
5
|
attr_reader :indexes, :index_mapping, :indexing, :indexed
|
6
6
|
|
7
|
-
delegate :
|
7
|
+
delegate :size,
|
8
|
+
:each,
|
9
|
+
:to => :indexes
|
10
|
+
|
11
|
+
delegate :analyze,
|
12
|
+
:reload,
|
8
13
|
:load_from_cache,
|
9
14
|
:to => :indexed
|
10
15
|
|
@@ -19,22 +24,26 @@ class IndexBundle # :nodoc:all
|
|
19
24
|
def initialize
|
20
25
|
@indexes = []
|
21
26
|
@index_mapping = {}
|
22
|
-
|
27
|
+
|
23
28
|
@indexed = Indexed::Indexes.new
|
24
29
|
@indexing = Indexing::Indexes.new
|
25
30
|
end
|
26
31
|
|
32
|
+
def to_s
|
33
|
+
indexes.map &:to_stats
|
34
|
+
end
|
35
|
+
|
27
36
|
def register index
|
28
37
|
self.indexes << index
|
29
38
|
self.index_mapping[index.name] = index
|
30
|
-
|
39
|
+
|
31
40
|
indexing.register index.indexing
|
32
41
|
indexed.register index.indexed
|
33
42
|
end
|
34
43
|
|
35
44
|
def [] name
|
36
45
|
name = name.to_sym
|
37
|
-
|
46
|
+
|
38
47
|
self.index_mapping[name]
|
39
48
|
end
|
40
49
|
|