picky 4.0.0pre1 → 4.0.0pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. data/aux/picky/cli.rb +6 -2
  2. data/lib/picky.rb +10 -8
  3. data/lib/picky/backends/backend.rb +37 -0
  4. data/lib/picky/backends/file.rb +0 -20
  5. data/lib/picky/backends/memory.rb +0 -29
  6. data/lib/picky/backends/redis.rb +74 -15
  7. data/lib/picky/backends/redis/list.rb +1 -1
  8. data/lib/picky/backends/sqlite.rb +0 -27
  9. data/lib/picky/bundle.rb +2 -2
  10. data/lib/picky/bundle_indexed.rb +1 -1
  11. data/lib/picky/bundle_indexing.rb +1 -1
  12. data/lib/picky/categories_indexed.rb +1 -11
  13. data/lib/picky/category.rb +4 -4
  14. data/lib/picky/category/location.rb +25 -0
  15. data/lib/picky/category_realtime.rb +4 -3
  16. data/lib/picky/console.rb +1 -1
  17. data/lib/picky/constants.rb +1 -1
  18. data/lib/picky/ext/maybe_compile.rb +2 -2
  19. data/lib/picky/extensions/object.rb +3 -2
  20. data/lib/picky/generators/aliases.rb +7 -2
  21. data/lib/picky/generators/partial/default.rb +1 -0
  22. data/lib/picky/generators/similarity/default.rb +1 -0
  23. data/lib/picky/generators/similarity/phonetic.rb +13 -2
  24. data/lib/picky/generators/strategy.rb +0 -2
  25. data/lib/picky/generators/weights/constant.rb +1 -2
  26. data/lib/picky/generators/weights/default.rb +1 -0
  27. data/lib/picky/generators/weights/dynamic.rb +1 -1
  28. data/lib/picky/generators/weights/logarithmic.rb +1 -1
  29. data/lib/picky/generators/weights/{runtime.rb → stub.rb} +1 -3
  30. data/lib/picky/index.rb +3 -3
  31. data/lib/picky/index_indexing.rb +0 -2
  32. data/lib/picky/index_realtime.rb +1 -1
  33. data/lib/picky/indexers/base.rb +7 -0
  34. data/lib/picky/indexers/parallel.rb +2 -4
  35. data/lib/picky/indexers/serial.rb +2 -0
  36. data/lib/picky/indexes_indexing.rb +1 -1
  37. data/lib/picky/interfaces/live_parameters/master_child.rb +175 -0
  38. data/lib/picky/interfaces/live_parameters/unicorn.rb +37 -0
  39. data/lib/picky/loader.rb +238 -259
  40. data/lib/picky/query/allocation.rb +19 -10
  41. data/lib/picky/query/combination.rb +7 -1
  42. data/lib/picky/query/combinations.rb +1 -6
  43. data/lib/picky/query/token.rb +26 -36
  44. data/lib/picky/results.rb +18 -17
  45. data/lib/picky/scheduler.rb +2 -1
  46. data/lib/picky/search.rb +1 -1
  47. data/lib/picky/sinatra.rb +6 -6
  48. data/lib/picky/statistics.rb +2 -0
  49. data/lib/picky/tokenizer.rb +8 -8
  50. data/lib/picky/wrappers/bundle/calculation.rb +4 -4
  51. data/lib/picky/wrappers/bundle/location.rb +1 -2
  52. data/lib/tasks/framework.rake +1 -1
  53. data/lib/tasks/statistics.rake +1 -1
  54. data/lib/tasks/try.rake +1 -1
  55. data/lib/tasks/try.rb +1 -1
  56. data/spec/aux/picky/cli_spec.rb +12 -12
  57. data/spec/ext/performant_spec.rb +16 -16
  58. data/spec/functional/backends/file_spec.rb +78 -7
  59. data/spec/functional/backends/memory_spec.rb +78 -7
  60. data/spec/functional/backends/redis_spec.rb +73 -13
  61. data/spec/functional/dynamic_weights_spec.rb +3 -4
  62. data/spec/functional/realtime_spec.rb +2 -2
  63. data/spec/functional/speed_spec.rb +2 -2
  64. data/spec/functional/terminate_early_spec.rb +3 -3
  65. data/spec/lib/analytics_spec.rb +1 -1
  66. data/spec/lib/analyzer_spec.rb +5 -3
  67. data/spec/lib/categories_indexed_spec.rb +38 -20
  68. data/spec/lib/category/location_spec.rb +30 -0
  69. data/spec/lib/character_substituters/west_european_spec.rb +1 -0
  70. data/spec/lib/extensions/hash_spec.rb +6 -5
  71. data/spec/lib/extensions/module_spec.rb +6 -6
  72. data/spec/lib/extensions/object_spec.rb +9 -8
  73. data/spec/lib/extensions/string_spec.rb +1 -1
  74. data/spec/lib/generators/similarity/phonetic_spec.rb +11 -0
  75. data/spec/lib/index_realtime_spec.rb +5 -5
  76. data/spec/lib/interfaces/{live_parameters_spec.rb → live_parameters/master_child_spec.rb} +26 -26
  77. data/spec/lib/interfaces/live_parameters/unicorn_spec.rb +160 -0
  78. data/spec/lib/loader_spec.rb +65 -25
  79. data/spec/lib/query/allocation_spec.rb +25 -22
  80. data/spec/lib/query/combinations_spec.rb +13 -36
  81. data/spec/lib/query/token_spec.rb +144 -131
  82. data/spec/lib/query/tokens_spec.rb +14 -0
  83. data/spec/lib/results_spec.rb +14 -8
  84. data/spec/lib/search_spec.rb +1 -1
  85. data/spec/lib/sinatra_spec.rb +8 -8
  86. metadata +28 -91
  87. data/lib/picky/adapters/rack.rb +0 -34
  88. data/lib/picky/adapters/rack/base.rb +0 -27
  89. data/lib/picky/adapters/rack/live_parameters.rb +0 -37
  90. data/lib/picky/adapters/rack/search.rb +0 -67
  91. data/lib/picky/application.rb +0 -268
  92. data/lib/picky/frontend_adapters/rack.rb +0 -161
  93. data/lib/picky/interfaces/live_parameters.rb +0 -187
  94. data/lib/picky/sources/base.rb +0 -92
  95. data/lib/picky/sources/couch.rb +0 -76
  96. data/lib/picky/sources/csv.rb +0 -83
  97. data/lib/picky/sources/db.rb +0 -189
  98. data/lib/picky/sources/delicious.rb +0 -63
  99. data/lib/picky/sources/mongo.rb +0 -80
  100. data/lib/picky/wrappers/category/location.rb +0 -38
  101. data/lib/tasks/routes.rake +0 -8
  102. data/spec/lib/adapters/rack/base_spec.rb +0 -24
  103. data/spec/lib/adapters/rack/live_parameters_spec.rb +0 -26
  104. data/spec/lib/adapters/rack/query_spec.rb +0 -39
  105. data/spec/lib/application_spec.rb +0 -155
  106. data/spec/lib/frontend_adapters/rack_spec.rb +0 -294
  107. data/spec/lib/sources/base_spec.rb +0 -53
  108. data/spec/lib/sources/couch_spec.rb +0 -114
  109. data/spec/lib/sources/csv_spec.rb +0 -89
  110. data/spec/lib/sources/db_spec.rb +0 -125
  111. data/spec/lib/sources/delicious_spec.rb +0 -94
  112. data/spec/lib/sources/mongo_spec.rb +0 -50
@@ -1,67 +0,0 @@
1
- module Picky
2
-
3
- module Adapters
4
- # This is an adapter that is plugged into a Rack outlet.
5
- #
6
- # It looks at what is given to it and generate an appropriate
7
- # adapter for it.
8
- #
9
- # For example, if you give it a query, it will extract the query param etc.
10
- # and call search on it if it is called by Rack.
11
- #
12
- module Rack
13
-
14
- class Search < Base
15
-
16
- @@defaults = {
17
- query_key: 'query'.freeze,
18
- ids_key: 'ids'.freeze,
19
- offset_key: 'offset'.freeze,
20
- content_type: 'application/json'.freeze
21
- }
22
-
23
- def initialize query
24
- @query = query
25
- @defaults = @@defaults.dup
26
- end
27
-
28
- def to_app options = {}
29
- # For capturing in the lambda.
30
- #
31
- query = @query
32
- query_key = options[:query_key] || @defaults[:query_key]
33
- content_type = options[:content_type] || @defaults[:content_type]
34
-
35
- lambda do |env|
36
- params = ::Rack::Request.new(env).params
37
-
38
- results = query.search *extracted(params)
39
-
40
- Picky.logger && Picky.logger.info(results)
41
-
42
- respond_with results.to_json, content_type
43
- end
44
- end
45
-
46
- # Helper method to extract the params
47
- #
48
- # Defaults are 20 ids, offset 0.
49
- #
50
- UTF8_STRING = 'UTF-8'.freeze
51
- def extracted params
52
- [
53
- # query is encoded in ASCII
54
- #
55
- params[@defaults[:query_key]] && params[@defaults[:query_key]].force_encoding(UTF8_STRING),
56
- params[@defaults[:ids_key]] && params[@defaults[:ids_key]].to_i || 20,
57
- params[@defaults[:offset_key]] && params[@defaults[:offset_key]].to_i || 0
58
- ]
59
- end
60
-
61
- end
62
-
63
- end
64
-
65
- end
66
-
67
- end
@@ -1,268 +0,0 @@
1
- module Picky
2
-
3
- # = Picky Applications
4
- #
5
- # A Picky Application is where you configure the whole search engine.
6
- #
7
- # This is a step-by-step description on how to configure your Picky app.
8
- #
9
- # Start by subclassing Application:
10
- # class MyGreatSearch < Application
11
- # # Your configuration goes here.
12
- # end
13
- # The generator
14
- # $ picky generate unicorn_server project_name
15
- # will generate an example <tt>project_name/app/application.rb</tt> file for you
16
- # with some example code inside.
17
- #
18
- # == Index.new(name)
19
- #
20
- # Next, define where your data comes from, creating an <tt>Index</tt>. You use the <tt>Index.new</tt> method for that:
21
- # my_index = Index.new :some_index_name
22
- # You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
23
- # class MyGreatSearch < Application
24
- #
25
- # books = Index.new :books do
26
- # source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
27
- # end
28
- #
29
- # end
30
- # Now we have an index <tt>books</tt>.
31
- #
32
- # That on itself won't do much good.
33
- #
34
- # Note that a Redis index is also available: Index.new.
35
- #
36
- # == category(identifier, options = {})
37
- #
38
- # Picky needs us to define categories on the data.
39
- #
40
- # Categories help your user find data.
41
- # It's best if you look at an example yourself: http://floere.github.com/picky/examples.html
42
- #
43
- # Let's go ahead and define a category:
44
- # class MyGreatSearch < Application
45
- #
46
- # books = Index.new :books do
47
- # source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
48
- # category :title
49
- # end
50
- #
51
- # end
52
- # Now we could already run the indexer:
53
- # $ rake index
54
- #
55
- # (You can define similarity or partial search capabilities on a category, see http://github.com/floere/picky/wiki/Categories-configuration for info)
56
- #
57
- # So now we have indexed data (the title), but nobody to ask the index anything.
58
- #
59
- # == Search.new(*indexes, options = {})
60
- #
61
- # We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration):
62
- # books_search = Search.new books
63
- #
64
- # Now we have somebody we can ask about the index. But no external interface.
65
- #
66
- # == route(/regexp1/ => search1, /regexp2/ => search2, ...)
67
- #
68
- # Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
69
- # route %r{^/books$} => books_query
70
- # In full glory:
71
- # class MyGreatSearch < Application
72
- #
73
- # books = Index.new :books do
74
- # source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
75
- # category :title
76
- # end
77
- #
78
- # route %r{^/books$} => Search.new(books)
79
- #
80
- # end
81
- # That's it!
82
- #
83
- # Now run the indexer and server:
84
- # $ rake index
85
- # $ rake start
86
- # Run your first query:
87
- # $ curl 'localhost:8080/books?query=hello server'
88
- #
89
- # Nice, right? Your first query!
90
- #
91
- # Maybe you don't find everything. We need to process the data before it goes into the index.
92
- #
93
- # == indexing(options = {})
94
- #
95
- # That's what the <tt>indexing</tt> method is for:
96
- # indexing options
97
- # Read more about the options here: http://github.com/floere/picky/wiki/Indexing-configuration
98
- #
99
- # Same thing with the search text – we need to process that as well.
100
- #
101
- # == searching(options = {})
102
- #
103
- # Analog to the indexing method, we use the <tt>searching</tt> method.
104
- # searching options
105
- # Read more about the options here: http://github.com/floere/picky/wiki/Searching-Configuration
106
- #
107
- # And that's all there is. It's incredibly powerful though, as you can combine, weigh, refine to the max.
108
- #
109
- # == Wiki
110
- #
111
- # Read more in the Wiki: http://github.com/floere/picky/wiki
112
- #
113
- # Have fun!
114
- #
115
- # == Full example
116
- #
117
- # Our example, fully fleshed out with indexing, querying, and weights:
118
- # class MyGreatSearch < Application
119
- #
120
- # indexing removes_characters: /[^a-zA-Z0-9\.]/,
121
- # stopwords: /\b(and|or|in|on|is|has)\b/,
122
- # splits_text_on: /\s/,
123
- # removes_characters_after_splitting: /\./,
124
- # substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
125
- # normalizes_words: [
126
- # [/(.*)hausen/, 'hn'],
127
- # [/\b(\w*)str(eet)?/, 'st']
128
- # ]
129
- #
130
- # searching removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/,
131
- # stopwords: /\b(and|the|of|it|in|for)\b/,
132
- # splits_text_on: /[\s\/\-\,\&]+/,
133
- # removes_characters_after_splitting: /\./,
134
- # substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
135
- # maximum_tokens: 4
136
- #
137
- # books = Index.new :books do
138
- # source Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
139
- # category :title,
140
- # qualifiers: [:t, :title, :titre],
141
- # partial: Partial::Substring.new(:from => 1),
142
- # similarity: Similarity::DoubleMetaphone.new(2)
143
- # category :author,
144
- # partial: Partial::Substring.new(:from => -2)
145
- # category :isbn
146
- # end
147
- #
148
- # route %r{^/books$} => Search.new(books) do
149
- # boost [:title, :author] => +3, [:author, :title] => -1
150
- # end
151
- #
152
- # end
153
- # That's actually already a full-blown Picky App!
154
- #
155
- class Application
156
-
157
- class << self
158
-
159
- # API
160
- #
161
-
162
- # Returns a configured tokenizer that
163
- # is used for indexing by default.
164
- #
165
- def indexing options = {}
166
- Tokenizer.index_default = Tokenizer.new(options)
167
- end
168
-
169
- # Returns a configured tokenizer that
170
- # is used for querying by default.
171
- #
172
- def searching options = {}
173
- Tokenizer.query_default = Tokenizer.new(options)
174
- end
175
-
176
- # Routes.
177
- #
178
- def route options
179
- raise "Warning: block passed into #route method, not into Search.new!" if block_given?
180
- rack_adapter.route options
181
- end
182
-
183
- # A Picky application implements the Rack interface.
184
- #
185
- # Delegates to its routing to handle a request.
186
- #
187
- def call env
188
- rack_adapter.call env
189
- end
190
- def rack_adapter # :nodoc:
191
- @rack_adapter || reset_rack_adapter
192
- end
193
- def reset_rack_adapter
194
- @rack_adapter = FrontendAdapters::Rack.new
195
- end
196
-
197
- # Reloads & finalizes the apps.
198
- #
199
- def reload
200
- Loader.load_user 'app' # Sinatra appfile.
201
- Loader.load_user 'app/application' # Standard Picky appfile.
202
- finalize_apps
203
- exclaim "Loaded Picky application(s) with environment '#{PICKY_ENVIRONMENT}' in #{PICKY_ROOT} on Ruby #{RUBY_VERSION}."
204
- end
205
-
206
- # Finalize the subclass as soon as it
207
- # has finished loading.
208
- #
209
- attr_reader :apps # :nodoc:
210
- def initialize_apps # :nodoc:
211
- @apps ||= []
212
- end
213
- def inherited app # :nodoc:
214
- initialize_apps
215
- apps << app
216
- end
217
- def finalize_apps # :nodoc:
218
- initialize_apps
219
- apps.each &:finalize
220
- end
221
- # Finalizes the routes.
222
- #
223
- def finalize # :nodoc:
224
- check
225
- rack_adapter.finalize
226
- end
227
- # Checks app for missing things.
228
- #
229
- # Warns if something is missing.
230
- #
231
- def check # :nodoc:
232
- warnings = []
233
- warnings << check_external_interface
234
- warn "\n#{warnings.join(?\n)}\n\n" unless warnings.all? &:nil?
235
- end
236
- def check_external_interface
237
- "WARNING: No routes defined for application configuration in #{self.class}." if rack_adapter.empty?
238
- end
239
-
240
- def to_stats
241
- <<-APP
242
- \033[1mIndexing (default)\033[m:
243
- #{Tokenizer.index_default.indented_to_s}
244
-
245
- \033[1mQuerying (default)\033[m:
246
- #{Tokenizer.query_default.indented_to_s}
247
-
248
- \033[1mIndexes\033[m:
249
- #{Indexes.to_s.indented_to_s}
250
-
251
- \033[1mRoutes\033[m:
252
- #{to_routes.indented_to_s}
253
- APP
254
- end
255
-
256
- def to_routes
257
- rack_adapter.to_s
258
- end
259
-
260
- def to_s # :nodoc:
261
- self.name
262
- end
263
-
264
- end
265
-
266
- end
267
-
268
- end
@@ -1,161 +0,0 @@
1
- module Picky
2
-
3
- module FrontendAdapters
4
-
5
- # TODO Rename to Routing again. Push everything back into appropriate Adapters.
6
- #
7
- class Rack # :nodoc:all
8
-
9
- def initialize
10
- check_gem
11
- end
12
-
13
- # Tries to require the rest_client gem.
14
- #
15
- def check_gem # :nodoc:
16
- require 'rack/mount'
17
- rescue LoadError
18
- warn_gem_missing 'rack-mount', 'the Picky routing'
19
- exit 1
20
- end
21
-
22
- #
23
- #
24
- def reset_routes
25
- @routes = ::Rack::Mount::RouteSet.new
26
- end
27
- def routes
28
- @routes || reset_routes
29
- end
30
- def finalize
31
- routes.freeze
32
- end
33
-
34
- # Routing simply delegates to the route set to handle a request.
35
- #
36
- def call env
37
- routes.call env
38
- end
39
-
40
- # API method.
41
- #
42
- def route options = {}
43
- mappings, route_options = split options
44
- mappings.each do |url, query|
45
- route_one url, query, route_options
46
- end
47
- end
48
- # Splits the route method options
49
- # into real options and route options (/regexp/ => thing or 'some/path' => thing).
50
- #
51
- def split options
52
- mappings = {}
53
- route_options = {}
54
- options.each_pair do |key, value|
55
- if Regexp === key or String === key
56
- mappings[key] = value
57
- else
58
- route_options[key] = value
59
- end
60
- end
61
- [mappings, route_options]
62
- end
63
- def route_one url, query, route_options = {}
64
- raise RouteTargetNilError.new(url) unless query
65
- routes.add_route Adapters::Rack.app_for(query, route_options), default_options(url, route_options), {}, query.to_s
66
- end
67
- class RouteTargetNilError < StandardError
68
- def initialize url
69
- @url = url
70
- end
71
- def to_s
72
- "Routing for #{@url.inspect} was defined with a nil target object, i.e. #{@url.inspect} => nil."
73
- end
74
- end
75
- #
76
- #
77
- def root status
78
- answer %r{^/$}, STATUSES[status]
79
- end
80
- #
81
- #
82
- def default status
83
- answer nil, STATUSES[status]
84
- end
85
-
86
-
87
-
88
- # TODO Can Rack handle this for me?
89
- #
90
- # Note: Rack-mount already handles the 404.
91
- #
92
- STATUSES = {
93
- 200 => lambda { |_| [200, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, [EMPTY_STRING]] },
94
- 404 => lambda { |_| [404, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, [EMPTY_STRING]] }
95
- }
96
-
97
- #
98
- #
99
- def default_options url, route_options = {}
100
- url = normalized url
101
-
102
- options = { request_method: 'GET' }.merge route_options
103
-
104
- options[:path_info] = url if url
105
-
106
- options.delete :content_type
107
-
108
- query_params = options.delete :query
109
- options[:query_string] = %r{#{generate_query_string(query_params)}} if query_params
110
-
111
- options
112
- end
113
- #
114
- #
115
- def generate_query_string query_params
116
- raise "At least one query string condition is needed." if query_params.size.zero?
117
- raise "Too many query param conditions (only 1 allowed): #{query_params}" if query_params.size > 1
118
- k, v = query_params.first
119
- "#{k}=#{v}"
120
- end
121
-
122
- # Setup a route that answers using the given app.
123
- #
124
- def answer url = nil, app = nil
125
- routes.add_route (app || STATUSES[200]), default_options(url)
126
- end
127
-
128
- # Returns a regular expression for the url if it is given a String-like object.
129
- #
130
- def normalized url
131
- url.respond_to?(:to_str) ? %r{#{url}} : url
132
- end
133
-
134
- # Returns true if there are no routes defined.
135
- #
136
- def empty?
137
- routes.length.zero?
138
- end
139
-
140
- # TODO Beautify. Rewrite!
141
- #
142
- def to_s
143
- max_length = routes.instance_variable_get(:@routes).reduce(0) do |current_max, route|
144
- route_length = route.conditions[:path_info].source.to_s.size
145
- route_length > current_max ? route_length : current_max
146
- end
147
- "Note: Anchored (\u2713) regexps are faster, e.g. /\\A.*\\Z/ or /^.*$/.\n\n" +
148
- routes.instance_variable_get(:@routes).map do |route|
149
- path_info = route.conditions[:path_info]
150
- anchored = ::Rack::Mount::Utils.regexp_anchored?(path_info)
151
- anchored_ok = anchored ? "\u2713" : " "
152
- source = path_info.source
153
- "#{anchored_ok} #{source.ljust(max_length)} => #{route.name}"
154
- end.join("\n")
155
- end
156
-
157
- end
158
-
159
- end
160
-
161
- end