picky 4.0.0pre1 → 4.0.0pre2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. data/aux/picky/cli.rb +6 -2
  2. data/lib/picky.rb +10 -8
  3. data/lib/picky/backends/backend.rb +37 -0
  4. data/lib/picky/backends/file.rb +0 -20
  5. data/lib/picky/backends/memory.rb +0 -29
  6. data/lib/picky/backends/redis.rb +74 -15
  7. data/lib/picky/backends/redis/list.rb +1 -1
  8. data/lib/picky/backends/sqlite.rb +0 -27
  9. data/lib/picky/bundle.rb +2 -2
  10. data/lib/picky/bundle_indexed.rb +1 -1
  11. data/lib/picky/bundle_indexing.rb +1 -1
  12. data/lib/picky/categories_indexed.rb +1 -11
  13. data/lib/picky/category.rb +4 -4
  14. data/lib/picky/category/location.rb +25 -0
  15. data/lib/picky/category_realtime.rb +4 -3
  16. data/lib/picky/console.rb +1 -1
  17. data/lib/picky/constants.rb +1 -1
  18. data/lib/picky/ext/maybe_compile.rb +2 -2
  19. data/lib/picky/extensions/object.rb +3 -2
  20. data/lib/picky/generators/aliases.rb +7 -2
  21. data/lib/picky/generators/partial/default.rb +1 -0
  22. data/lib/picky/generators/similarity/default.rb +1 -0
  23. data/lib/picky/generators/similarity/phonetic.rb +13 -2
  24. data/lib/picky/generators/strategy.rb +0 -2
  25. data/lib/picky/generators/weights/constant.rb +1 -2
  26. data/lib/picky/generators/weights/default.rb +1 -0
  27. data/lib/picky/generators/weights/dynamic.rb +1 -1
  28. data/lib/picky/generators/weights/logarithmic.rb +1 -1
  29. data/lib/picky/generators/weights/{runtime.rb → stub.rb} +1 -3
  30. data/lib/picky/index.rb +3 -3
  31. data/lib/picky/index_indexing.rb +0 -2
  32. data/lib/picky/index_realtime.rb +1 -1
  33. data/lib/picky/indexers/base.rb +7 -0
  34. data/lib/picky/indexers/parallel.rb +2 -4
  35. data/lib/picky/indexers/serial.rb +2 -0
  36. data/lib/picky/indexes_indexing.rb +1 -1
  37. data/lib/picky/interfaces/live_parameters/master_child.rb +175 -0
  38. data/lib/picky/interfaces/live_parameters/unicorn.rb +37 -0
  39. data/lib/picky/loader.rb +238 -259
  40. data/lib/picky/query/allocation.rb +19 -10
  41. data/lib/picky/query/combination.rb +7 -1
  42. data/lib/picky/query/combinations.rb +1 -6
  43. data/lib/picky/query/token.rb +26 -36
  44. data/lib/picky/results.rb +18 -17
  45. data/lib/picky/scheduler.rb +2 -1
  46. data/lib/picky/search.rb +1 -1
  47. data/lib/picky/sinatra.rb +6 -6
  48. data/lib/picky/statistics.rb +2 -0
  49. data/lib/picky/tokenizer.rb +8 -8
  50. data/lib/picky/wrappers/bundle/calculation.rb +4 -4
  51. data/lib/picky/wrappers/bundle/location.rb +1 -2
  52. data/lib/tasks/framework.rake +1 -1
  53. data/lib/tasks/statistics.rake +1 -1
  54. data/lib/tasks/try.rake +1 -1
  55. data/lib/tasks/try.rb +1 -1
  56. data/spec/aux/picky/cli_spec.rb +12 -12
  57. data/spec/ext/performant_spec.rb +16 -16
  58. data/spec/functional/backends/file_spec.rb +78 -7
  59. data/spec/functional/backends/memory_spec.rb +78 -7
  60. data/spec/functional/backends/redis_spec.rb +73 -13
  61. data/spec/functional/dynamic_weights_spec.rb +3 -4
  62. data/spec/functional/realtime_spec.rb +2 -2
  63. data/spec/functional/speed_spec.rb +2 -2
  64. data/spec/functional/terminate_early_spec.rb +3 -3
  65. data/spec/lib/analytics_spec.rb +1 -1
  66. data/spec/lib/analyzer_spec.rb +5 -3
  67. data/spec/lib/categories_indexed_spec.rb +38 -20
  68. data/spec/lib/category/location_spec.rb +30 -0
  69. data/spec/lib/character_substituters/west_european_spec.rb +1 -0
  70. data/spec/lib/extensions/hash_spec.rb +6 -5
  71. data/spec/lib/extensions/module_spec.rb +6 -6
  72. data/spec/lib/extensions/object_spec.rb +9 -8
  73. data/spec/lib/extensions/string_spec.rb +1 -1
  74. data/spec/lib/generators/similarity/phonetic_spec.rb +11 -0
  75. data/spec/lib/index_realtime_spec.rb +5 -5
  76. data/spec/lib/interfaces/{live_parameters_spec.rb → live_parameters/master_child_spec.rb} +26 -26
  77. data/spec/lib/interfaces/live_parameters/unicorn_spec.rb +160 -0
  78. data/spec/lib/loader_spec.rb +65 -25
  79. data/spec/lib/query/allocation_spec.rb +25 -22
  80. data/spec/lib/query/combinations_spec.rb +13 -36
  81. data/spec/lib/query/token_spec.rb +144 -131
  82. data/spec/lib/query/tokens_spec.rb +14 -0
  83. data/spec/lib/results_spec.rb +14 -8
  84. data/spec/lib/search_spec.rb +1 -1
  85. data/spec/lib/sinatra_spec.rb +8 -8
  86. metadata +28 -91
  87. data/lib/picky/adapters/rack.rb +0 -34
  88. data/lib/picky/adapters/rack/base.rb +0 -27
  89. data/lib/picky/adapters/rack/live_parameters.rb +0 -37
  90. data/lib/picky/adapters/rack/search.rb +0 -67
  91. data/lib/picky/application.rb +0 -268
  92. data/lib/picky/frontend_adapters/rack.rb +0 -161
  93. data/lib/picky/interfaces/live_parameters.rb +0 -187
  94. data/lib/picky/sources/base.rb +0 -92
  95. data/lib/picky/sources/couch.rb +0 -76
  96. data/lib/picky/sources/csv.rb +0 -83
  97. data/lib/picky/sources/db.rb +0 -189
  98. data/lib/picky/sources/delicious.rb +0 -63
  99. data/lib/picky/sources/mongo.rb +0 -80
  100. data/lib/picky/wrappers/category/location.rb +0 -38
  101. data/lib/tasks/routes.rake +0 -8
  102. data/spec/lib/adapters/rack/base_spec.rb +0 -24
  103. data/spec/lib/adapters/rack/live_parameters_spec.rb +0 -26
  104. data/spec/lib/adapters/rack/query_spec.rb +0 -39
  105. data/spec/lib/application_spec.rb +0 -155
  106. data/spec/lib/frontend_adapters/rack_spec.rb +0 -294
  107. data/spec/lib/sources/base_spec.rb +0 -53
  108. data/spec/lib/sources/couch_spec.rb +0 -114
  109. data/spec/lib/sources/csv_spec.rb +0 -89
  110. data/spec/lib/sources/db_spec.rb +0 -125
  111. data/spec/lib/sources/delicious_spec.rb +0 -94
  112. data/spec/lib/sources/mongo_spec.rb +0 -50
@@ -1,67 +0,0 @@
1
- module Picky
2
-
3
- module Adapters
4
- # This is an adapter that is plugged into a Rack outlet.
5
- #
6
- # It looks at what is given to it and generate an appropriate
7
- # adapter for it.
8
- #
9
- # For example, if you give it a query, it will extract the query param etc.
10
- # and call search on it if it is called by Rack.
11
- #
12
- module Rack
13
-
14
- class Search < Base
15
-
16
- @@defaults = {
17
- query_key: 'query'.freeze,
18
- ids_key: 'ids'.freeze,
19
- offset_key: 'offset'.freeze,
20
- content_type: 'application/json'.freeze
21
- }
22
-
23
- def initialize query
24
- @query = query
25
- @defaults = @@defaults.dup
26
- end
27
-
28
- def to_app options = {}
29
- # For capturing in the lambda.
30
- #
31
- query = @query
32
- query_key = options[:query_key] || @defaults[:query_key]
33
- content_type = options[:content_type] || @defaults[:content_type]
34
-
35
- lambda do |env|
36
- params = ::Rack::Request.new(env).params
37
-
38
- results = query.search *extracted(params)
39
-
40
- Picky.logger && Picky.logger.info(results)
41
-
42
- respond_with results.to_json, content_type
43
- end
44
- end
45
-
46
- # Helper method to extract the params
47
- #
48
- # Defaults are 20 ids, offset 0.
49
- #
50
- UTF8_STRING = 'UTF-8'.freeze
51
- def extracted params
52
- [
53
- # query is encoded in ASCII
54
- #
55
- params[@defaults[:query_key]] && params[@defaults[:query_key]].force_encoding(UTF8_STRING),
56
- params[@defaults[:ids_key]] && params[@defaults[:ids_key]].to_i || 20,
57
- params[@defaults[:offset_key]] && params[@defaults[:offset_key]].to_i || 0
58
- ]
59
- end
60
-
61
- end
62
-
63
- end
64
-
65
- end
66
-
67
- end
@@ -1,268 +0,0 @@
1
- module Picky
2
-
3
- # = Picky Applications
4
- #
5
- # A Picky Application is where you configure the whole search engine.
6
- #
7
- # This is a step-by-step description on how to configure your Picky app.
8
- #
9
- # Start by subclassing Application:
10
- # class MyGreatSearch < Application
11
- # # Your configuration goes here.
12
- # end
13
- # The generator
14
- # $ picky generate unicorn_server project_name
15
- # will generate an example <tt>project_name/app/application.rb</tt> file for you
16
- # with some example code inside.
17
- #
18
- # == Index.new(name)
19
- #
20
- # Next, define where your data comes from, creating an <tt>Index</tt>. You use the <tt>Index.new</tt> method for that:
21
- # my_index = Index.new :some_index_name
22
- # You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
23
- # class MyGreatSearch < Application
24
- #
25
- # books = Index.new :books do
26
- # source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
27
- # end
28
- #
29
- # end
30
- # Now we have an index <tt>books</tt>.
31
- #
32
- # That on itself won't do much good.
33
- #
34
- # Note that a Redis index is also available: Index.new.
35
- #
36
- # == category(identifier, options = {})
37
- #
38
- # Picky needs us to define categories on the data.
39
- #
40
- # Categories help your user find data.
41
- # It's best if you look at an example yourself: http://floere.github.com/picky/examples.html
42
- #
43
- # Let's go ahead and define a category:
44
- # class MyGreatSearch < Application
45
- #
46
- # books = Index.new :books do
47
- # source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
48
- # category :title
49
- # end
50
- #
51
- # end
52
- # Now we could already run the indexer:
53
- # $ rake index
54
- #
55
- # (You can define similarity or partial search capabilities on a category, see http://github.com/floere/picky/wiki/Categories-configuration for info)
56
- #
57
- # So now we have indexed data (the title), but nobody to ask the index anything.
58
- #
59
- # == Search.new(*indexes, options = {})
60
- #
61
- # We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration):
62
- # books_search = Search.new books
63
- #
64
- # Now we have somebody we can ask about the index. But no external interface.
65
- #
66
- # == route(/regexp1/ => search1, /regexp2/ => search2, ...)
67
- #
68
- # Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
69
- # route %r{^/books$} => books_query
70
- # In full glory:
71
- # class MyGreatSearch < Application
72
- #
73
- # books = Index.new :books do
74
- # source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
75
- # category :title
76
- # end
77
- #
78
- # route %r{^/books$} => Search.new(books)
79
- #
80
- # end
81
- # That's it!
82
- #
83
- # Now run the indexer and server:
84
- # $ rake index
85
- # $ rake start
86
- # Run your first query:
87
- # $ curl 'localhost:8080/books?query=hello server'
88
- #
89
- # Nice, right? Your first query!
90
- #
91
- # Maybe you don't find everything. We need to process the data before it goes into the index.
92
- #
93
- # == indexing(options = {})
94
- #
95
- # That's what the <tt>indexing</tt> method is for:
96
- # indexing options
97
- # Read more about the options here: http://github.com/floere/picky/wiki/Indexing-configuration
98
- #
99
- # Same thing with the search text – we need to process that as well.
100
- #
101
- # == searching(options = {})
102
- #
103
- # Analog to the indexing method, we use the <tt>searching</tt> method.
104
- # searching options
105
- # Read more about the options here: http://github.com/floere/picky/wiki/Searching-Configuration
106
- #
107
- # And that's all there is. It's incredibly powerful though, as you can combine, weigh, refine to the max.
108
- #
109
- # == Wiki
110
- #
111
- # Read more in the Wiki: http://github.com/floere/picky/wiki
112
- #
113
- # Have fun!
114
- #
115
- # == Full example
116
- #
117
- # Our example, fully fleshed out with indexing, querying, and weights:
118
- # class MyGreatSearch < Application
119
- #
120
- # indexing removes_characters: /[^a-zA-Z0-9\.]/,
121
- # stopwords: /\b(and|or|in|on|is|has)\b/,
122
- # splits_text_on: /\s/,
123
- # removes_characters_after_splitting: /\./,
124
- # substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
125
- # normalizes_words: [
126
- # [/(.*)hausen/, 'hn'],
127
- # [/\b(\w*)str(eet)?/, 'st']
128
- # ]
129
- #
130
- # searching removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/,
131
- # stopwords: /\b(and|the|of|it|in|for)\b/,
132
- # splits_text_on: /[\s\/\-\,\&]+/,
133
- # removes_characters_after_splitting: /\./,
134
- # substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
135
- # maximum_tokens: 4
136
- #
137
- # books = Index.new :books do
138
- # source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
139
- # category :title,
140
- # qualifiers: [:t, :title, :titre],
141
- # partial: Partial::Substring.new(:from => 1),
142
- # similarity: Similarity::DoubleMetaphone.new(2)
143
- # category :author,
144
- # partial: Partial::Substring.new(:from => -2)
145
- # category :isbn
146
- # end
147
- #
148
- # route %r{^/books$} => Search.new(books) do
149
- # boost [:title, :author] => +3, [:author, :title] => -1
150
- # end
151
- #
152
- # end
153
- # That's actually already a full-blown Picky App!
154
- #
155
- class Application
156
-
157
- class << self
158
-
159
- # API
160
- #
161
-
162
- # Returns a configured tokenizer that
163
- # is used for indexing by default.
164
- #
165
- def indexing options = {}
166
- Tokenizer.index_default = Tokenizer.new(options)
167
- end
168
-
169
- # Returns a configured tokenizer that
170
- # is used for querying by default.
171
- #
172
- def searching options = {}
173
- Tokenizer.query_default = Tokenizer.new(options)
174
- end
175
-
176
- # Routes.
177
- #
178
- def route options
179
- raise "Warning: block passed into #route method, not into Search.new!" if block_given?
180
- rack_adapter.route options
181
- end
182
-
183
- # A Picky application implements the Rack interface.
184
- #
185
- # Delegates to its routing to handle a request.
186
- #
187
- def call env
188
- rack_adapter.call env
189
- end
190
- def rack_adapter # :nodoc:
191
- @rack_adapter || reset_rack_adapter
192
- end
193
- def reset_rack_adapter
194
- @rack_adapter = FrontendAdapters::Rack.new
195
- end
196
-
197
- # Reloads & finalizes the apps.
198
- #
199
- def reload
200
- Loader.load_user 'app' # Sinatra appfile.
201
- Loader.load_user 'app/application' # Standard Picky appfile.
202
- finalize_apps
203
- exclaim "Loaded Picky application(s) with environment '#{PICKY_ENVIRONMENT}' in #{PICKY_ROOT} on Ruby #{RUBY_VERSION}."
204
- end
205
-
206
- # Finalize the subclass as soon as it
207
- # has finished loading.
208
- #
209
- attr_reader :apps # :nodoc:
210
- def initialize_apps # :nodoc:
211
- @apps ||= []
212
- end
213
- def inherited app # :nodoc:
214
- initialize_apps
215
- apps << app
216
- end
217
- def finalize_apps # :nodoc:
218
- initialize_apps
219
- apps.each &:finalize
220
- end
221
- # Finalizes the routes.
222
- #
223
- def finalize # :nodoc:
224
- check
225
- rack_adapter.finalize
226
- end
227
- # Checks app for missing things.
228
- #
229
- # Warns if something is missing.
230
- #
231
- def check # :nodoc:
232
- warnings = []
233
- warnings << check_external_interface
234
- warn "\n#{warnings.join(?\n)}\n\n" unless warnings.all? &:nil?
235
- end
236
- def check_external_interface
237
- "WARNING: No routes defined for application configuration in #{self.class}." if rack_adapter.empty?
238
- end
239
-
240
- def to_stats
241
- <<-APP
242
- \033[1mIndexing (default)\033[m:
243
- #{Tokenizer.index_default.indented_to_s}
244
-
245
- \033[1mQuerying (default)\033[m:
246
- #{Tokenizer.query_default.indented_to_s}
247
-
248
- \033[1mIndexes\033[m:
249
- #{Indexes.to_s.indented_to_s}
250
-
251
- \033[1mRoutes\033[m:
252
- #{to_routes.indented_to_s}
253
- APP
254
- end
255
-
256
- def to_routes
257
- rack_adapter.to_s
258
- end
259
-
260
- def to_s # :nodoc:
261
- self.name
262
- end
263
-
264
- end
265
-
266
- end
267
-
268
- end
@@ -1,161 +0,0 @@
1
- module Picky
2
-
3
- module FrontendAdapters
4
-
5
- # TODO Rename to Routing again. Push everything back into appropriate Adapters.
6
- #
7
- class Rack # :nodoc:all
8
-
9
- def initialize
10
- check_gem
11
- end
12
-
13
- # Tries to require the rest_client gem.
14
- #
15
- def check_gem # :nodoc:
16
- require 'rack/mount'
17
- rescue LoadError
18
- warn_gem_missing 'rack-mount', 'the Picky routing'
19
- exit 1
20
- end
21
-
22
- #
23
- #
24
- def reset_routes
25
- @routes = ::Rack::Mount::RouteSet.new
26
- end
27
- def routes
28
- @routes || reset_routes
29
- end
30
- def finalize
31
- routes.freeze
32
- end
33
-
34
- # Routing simply delegates to the route set to handle a request.
35
- #
36
- def call env
37
- routes.call env
38
- end
39
-
40
- # API method.
41
- #
42
- def route options = {}
43
- mappings, route_options = split options
44
- mappings.each do |url, query|
45
- route_one url, query, route_options
46
- end
47
- end
48
- # Splits the route method options
49
- # into real options and route options (/regexp/ => thing or 'some/path' => thing).
50
- #
51
- def split options
52
- mappings = {}
53
- route_options = {}
54
- options.each_pair do |key, value|
55
- if Regexp === key or String === key
56
- mappings[key] = value
57
- else
58
- route_options[key] = value
59
- end
60
- end
61
- [mappings, route_options]
62
- end
63
- def route_one url, query, route_options = {}
64
- raise RouteTargetNilError.new(url) unless query
65
- routes.add_route Adapters::Rack.app_for(query, route_options), default_options(url, route_options), {}, query.to_s
66
- end
67
- class RouteTargetNilError < StandardError
68
- def initialize url
69
- @url = url
70
- end
71
- def to_s
72
- "Routing for #{@url.inspect} was defined with a nil target object, i.e. #{@url.inspect} => nil."
73
- end
74
- end
75
- #
76
- #
77
- def root status
78
- answer %r{^/$}, STATUSES[status]
79
- end
80
- #
81
- #
82
- def default status
83
- answer nil, STATUSES[status]
84
- end
85
-
86
-
87
-
88
- # TODO Can Rack handle this for me?
89
- #
90
- # Note: Rack-mount already handles the 404.
91
- #
92
- STATUSES = {
93
- 200 => lambda { |_| [200, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, [EMPTY_STRING]] },
94
- 404 => lambda { |_| [404, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, [EMPTY_STRING]] }
95
- }
96
-
97
- #
98
- #
99
- def default_options url, route_options = {}
100
- url = normalized url
101
-
102
- options = { request_method: 'GET' }.merge route_options
103
-
104
- options[:path_info] = url if url
105
-
106
- options.delete :content_type
107
-
108
- query_params = options.delete :query
109
- options[:query_string] = %r{#{generate_query_string(query_params)}} if query_params
110
-
111
- options
112
- end
113
- #
114
- #
115
- def generate_query_string query_params
116
- raise "At least one query string condition is needed." if query_params.size.zero?
117
- raise "Too many query param conditions (only 1 allowed): #{query_params}" if query_params.size > 1
118
- k, v = query_params.first
119
- "#{k}=#{v}"
120
- end
121
-
122
- # Setup a route that answers using the given app.
123
- #
124
- def answer url = nil, app = nil
125
- routes.add_route (app || STATUSES[200]), default_options(url)
126
- end
127
-
128
- # Returns a regular expression for the url if it is given a String-like object.
129
- #
130
- def normalized url
131
- url.respond_to?(:to_str) ? %r{#{url}} : url
132
- end
133
-
134
- # Returns true if there are no routes defined.
135
- #
136
- def empty?
137
- routes.length.zero?
138
- end
139
-
140
- # TODO Beautify. Rewrite!
141
- #
142
- def to_s
143
- max_length = routes.instance_variable_get(:@routes).reduce(0) do |current_max, route|
144
- route_length = route.conditions[:path_info].source.to_s.size
145
- route_length > current_max ? route_length : current_max
146
- end
147
- "Note: Anchored (\u2713) regexps are faster, e.g. /\\A.*\\Z/ or /^.*$/.\n\n" +
148
- routes.instance_variable_get(:@routes).map do |route|
149
- path_info = route.conditions[:path_info]
150
- anchored = ::Rack::Mount::Utils.regexp_anchored?(path_info)
151
- anchored_ok = anchored ? "\u2713" : " "
152
- source = path_info.source
153
- "#{anchored_ok} #{source.ljust(max_length)} => #{route.name}"
154
- end.join("\n")
155
- end
156
-
157
- end
158
-
159
- end
160
-
161
- end