picky 1.2.4 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. data/lib/picky/adapters/rack/base.rb +23 -0
  2. data/lib/picky/adapters/rack/live_parameters.rb +33 -0
  3. data/lib/picky/adapters/rack/query.rb +59 -0
  4. data/lib/picky/adapters/rack.rb +28 -0
  5. data/lib/picky/alias_instances.rb +2 -0
  6. data/lib/picky/application.rb +9 -8
  7. data/lib/picky/cli.rb +25 -3
  8. data/lib/picky/frontend_adapters/rack.rb +150 -0
  9. data/lib/picky/helpers/measuring.rb +0 -2
  10. data/lib/picky/index_api.rb +1 -1
  11. data/lib/picky/indexed/categories.rb +51 -14
  12. data/lib/picky/indexers/solr.rb +1 -5
  13. data/lib/picky/indexing/indexes.rb +6 -0
  14. data/lib/picky/interfaces/live_parameters.rb +165 -0
  15. data/lib/picky/loader.rb +13 -2
  16. data/lib/picky/query/base.rb +15 -18
  17. data/lib/picky/query/combination.rb +2 -2
  18. data/lib/picky/query/solr.rb +0 -17
  19. data/lib/picky/query/token.rb +14 -27
  20. data/lib/picky/query/weights.rb +13 -1
  21. data/lib/picky/results/base.rb +9 -2
  22. data/spec/lib/adapters/rack/base_spec.rb +24 -0
  23. data/spec/lib/adapters/rack/live_parameters_spec.rb +21 -0
  24. data/spec/lib/adapters/rack/query_spec.rb +33 -0
  25. data/spec/lib/application_spec.rb +27 -8
  26. data/spec/lib/cli_spec.rb +9 -0
  27. data/spec/lib/extensions/symbol_spec.rb +1 -3
  28. data/spec/lib/{routing_spec.rb → frontend_adapters/rack_spec.rb} +69 -66
  29. data/spec/lib/indexed/categories_spec.rb +24 -0
  30. data/spec/lib/interfaces/live_parameters_spec.rb +138 -0
  31. data/spec/lib/query/base_spec.rb +10 -14
  32. data/spec/lib/query/live_spec.rb +1 -30
  33. data/spec/lib/query/token_spec.rb +72 -5
  34. data/spec/lib/query/weights_spec.rb +59 -36
  35. data/spec/lib/results/base_spec.rb +13 -1
  36. metadata +20 -7
  37. data/lib/picky/routing.rb +0 -171
@@ -0,0 +1,23 @@
1
+ module Adapters
2
+ # Adapter that is plugged into a Rack outlet.
3
+ #
4
+ module Rack
5
+
6
+ # Subclasses of this class should respond to
7
+ # * to_app(options)
8
+ #
9
+ class Base
10
+
11
+ # Puts together an appropriately structured Rack response.
12
+ #
13
+ # Note: Bytesize is needed to have special characters not trip up Rack.
14
+ #
15
+ def respond_with response, content_type = 'application/json'
16
+ [200, { 'Content-Type' => content_type, 'Content-Length' => response.bytesize.to_s }, [response]]
17
+ end
18
+
19
+ end
20
+
21
+ end
22
+
23
+ end
@@ -0,0 +1,33 @@
1
+ module Adapters
2
+
3
+ #
4
+ #
5
+ module Rack
6
+
7
+ class LiveParameters < Base
8
+
9
+ def initialize live_parameters
10
+ @live_parameters = live_parameters
11
+ end
12
+
13
+ #
14
+ #
15
+ def to_app options = {}
16
+ # For capturing by the lambda block.
17
+ #
18
+ live_parameters = @live_parameters
19
+
20
+ lambda do |env|
21
+ params = ::Rack::Request.new(env).params
22
+
23
+ results = live_parameters.parameters params
24
+
25
+ respond_with results.to_json
26
+ end
27
+ end
28
+
29
+ end
30
+
31
+ end
32
+
33
+ end
@@ -0,0 +1,59 @@
1
+ module Adapters
2
+ # This is an adapter that is plugged into a Rack outlet.
3
+ #
4
+ # It looks at what is given to it and generate an appropriate
5
+ # adapter for it.
6
+ #
7
+ # For example, if you give it a query, it will extract the query param etc.
8
+ # and call search_with_text on it if it is called by Rack.
9
+ #
10
+ module Rack
11
+
12
+ class Query < Base
13
+
14
+ @@defaults = {
15
+ query_key: 'query'.freeze,
16
+ offset_key: 'offset'.freeze,
17
+ content_type: 'application/json'.freeze
18
+ }
19
+
20
+ def initialize query
21
+ @query = query
22
+ @defaults = @@defaults.dup
23
+ end
24
+
25
+ def to_app options = {}
26
+ # For capturing in the lambda.
27
+ #
28
+ query = @query
29
+ query_key = options[:query_key] || @defaults[:query_key]
30
+ content_type = options[:content_type] || @defaults[:content_type]
31
+
32
+ lambda do |env|
33
+ params = ::Rack::Request.new(env).params
34
+
35
+ results = query.search_with_text *extracted(params)
36
+
37
+ PickyLog.log results.to_log(params[query_key])
38
+
39
+ respond_with results.to_response, content_type
40
+ end
41
+ end
42
+
43
+ # Helper method to extract the params
44
+ #
45
+ UTF8_STRING = 'UTF-8'.freeze
46
+ def extracted params
47
+ [
48
+ # query is encoded in ASCII
49
+ #
50
+ params[@defaults[:query_key]] && params[@defaults[:query_key]].force_encoding(UTF8_STRING),
51
+ params[@defaults[:offset_key]] && params[@defaults[:offset_key]].to_i || 0
52
+ ]
53
+ end
54
+
55
+ end
56
+
57
+ end
58
+
59
+ end
@@ -0,0 +1,28 @@
1
+ module Adapters
2
+ # This is an adapter that is plugged into a Rack outlet.
3
+ #
4
+ # It looks at what is given to it and generate an appropriate
5
+ # adapter for it.
6
+ #
7
+ # For example, if you give it a query, it will extract the query param etc.
8
+ # and call search_with_text on it if it is called by Rack.
9
+ #
10
+ # Usage:
11
+ # Adapters::Rack.app_for(thing, options)
12
+ #
13
+ module Rack
14
+
15
+ # Generates the appropriate app for Rack.
16
+ #
17
+ @@mapping = {
18
+ :search_with_text => Query,
19
+ :parameters => LiveParameters
20
+ }
21
+ def self.app_for thing, options = {}
22
+ @@mapping.each_pair do |method, adapter|
23
+ return adapter.new(thing).to_app(options) if thing.respond_to?(method)
24
+ end
25
+ end
26
+
27
+ end
28
+ end
@@ -1,4 +1,6 @@
1
1
  # This provides a nice accessor for users
2
2
  # who want to use the index API.
3
3
  #
4
+ # TODO Rename to API::Indexes?
5
+ #
4
6
  Indexes = IndexesAPI.new
@@ -176,7 +176,7 @@ class Application
176
176
  # - to identify an index (e.g. by you in Rake tasks).
177
177
  # - in the frontend to describe which index a result came from.
178
178
  # - index directory naming (index/development/the_identifier/<lots of indexes>)
179
- # * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
179
+ # * source: The source the data comes from. See Sources::Base.
180
180
  #
181
181
  # Options:
182
182
  # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
@@ -187,7 +187,7 @@ class Application
187
187
 
188
188
  # Routes.
189
189
  #
190
- delegate :route, :root, :to => :routing
190
+ delegate :route, :root, :to => :rack_adapter
191
191
 
192
192
  #
193
193
  # API
@@ -198,10 +198,10 @@ class Application
198
198
  # Delegates to its routing to handle a request.
199
199
  #
200
200
  def call env
201
- routing.call env
201
+ rack_adapter.call env
202
202
  end
203
- def routing # :nodoc:
204
- @routing ||= Routing.new
203
+ def rack_adapter # :nodoc:
204
+ @rack_adapter ||= FrontendAdapters::Rack.new
205
205
  end
206
206
 
207
207
  # Finalize the subclass as soon as it
@@ -223,7 +223,7 @@ class Application
223
223
  #
224
224
  def finalize # :nodoc:
225
225
  check
226
- routing.freeze
226
+ rack_adapter.finalize
227
227
  end
228
228
  # Checks app for missing things.
229
229
  #
@@ -237,14 +237,15 @@ class Application
237
237
  puts "\n#{warnings.join(?\n)}\n\n" unless warnings.all? &:nil?
238
238
  end
239
239
  def check_external_interface
240
- "WARNING: No routes defined for application configuration in #{self.class}." if routing.empty?
240
+ "WARNING: No routes defined for application configuration in #{self.class}." if rack_adapter.empty?
241
241
  end
242
242
 
243
243
  # TODO Add more info if possible.
244
244
  #
245
245
  def to_s # :nodoc:
246
- "#{self.name}:\n#{routing}"
246
+ "#{self.name}:\n#{rack_adapter}"
247
247
  end
248
248
 
249
249
  end
250
+
250
251
  end
data/lib/picky/cli.rb CHANGED
@@ -27,8 +27,8 @@ module Picky
27
27
  end
28
28
  class Statistics < Base
29
29
  def execute name, args, params
30
- relative_log_file = args.shift
31
- port = args.shift
30
+ relative_log_file = args.shift
31
+ port = args.shift
32
32
 
33
33
  usage(name, params) || exit(1) unless relative_log_file
34
34
 
@@ -46,6 +46,27 @@ module Picky
46
46
  require 'picky-statistics/application/app'
47
47
  end
48
48
  end
49
+ class Live < Base
50
+ def execute name, args, params
51
+ url = args.shift
52
+ port = args.shift
53
+
54
+ usage(name, params) || exit(1) unless args.empty?
55
+
56
+ ENV['PICKY_LIVE_URL'] = url
57
+ ENV['PICKY_LIVE_PORT'] = port
58
+
59
+ begin
60
+ require 'picky-live'
61
+ rescue LoadError => e
62
+ require 'picky/extensions/object'
63
+ puts_gem_missing 'picky-live', 'the Picky Live Interface'
64
+ exit 1
65
+ end
66
+
67
+ require 'picky-live/application/app'
68
+ end
69
+ end
49
70
  class Generate < Base
50
71
  def execute name, args, params
51
72
  system "picky-generate #{args.join(' ')}"
@@ -69,7 +90,8 @@ module Picky
69
90
  @@mapping = {
70
91
  :generate => [Generate, 'sinatra_client | unicorn_server | empty_unicorn_server', 'app_directory_name (optional)'],
71
92
  :help => [Help],
72
- :stats => [Statistics, 'logfile, e.g. log/search.log', 'port (optional)']
93
+ :stats => [Statistics, 'logfile, e.g. log/search.log', 'port (optional)'],
94
+ :live => [Live, 'host:port/path (optional, default: localhost:8080/admin)', 'port (optional)']
73
95
  }
74
96
  def self.mapping
75
97
  @@mapping
@@ -0,0 +1,150 @@
1
+ require 'rack/mount'
2
+
3
+ module FrontendAdapters
4
+
5
+ # TODO Rename to Routing again. Push everything back into appropriate Adapters.
6
+ #
7
+ class Rack # :nodoc:all
8
+
9
+ @@defaults = {
10
+ query_key: 'query'.freeze,
11
+ offset_key: 'offset'.freeze,
12
+ content_type: 'application/octet-stream'.freeze # TODO Wrong.
13
+ }
14
+
15
+ def initialize
16
+ @defaults = @@defaults.dup
17
+ end
18
+
19
+ #
20
+ #
21
+ def reset_routes
22
+ @routes = ::Rack::Mount::RouteSet.new
23
+ end
24
+ def routes
25
+ @routes || reset_routes
26
+ end
27
+ def finalize
28
+ routes.freeze
29
+ end
30
+
31
+ # Routing simply delegates to the route set to handle a request.
32
+ #
33
+ def call env
34
+ routes.call env
35
+ end
36
+
37
+ # API method.
38
+ #
39
+ def route options = {}
40
+ mappings, route_options = split options
41
+ mappings.each do |url, query|
42
+ route_one url, query, route_options
43
+ end
44
+ end
45
+ # Splits the route method options
46
+ # into real options and route options (/regexp/ => thing or 'some/path' => thing).
47
+ #
48
+ def split options
49
+ mappings = {}
50
+ route_options = {}
51
+ options.each_pair do |key, value|
52
+ if Regexp === key or String === key
53
+ mappings[key] = value
54
+ else
55
+ route_options[key] = value
56
+ end
57
+ end
58
+ [mappings, route_options]
59
+ end
60
+ def route_one url, query, route_options = {}
61
+ raise RouteTargetNilError.new(url) unless query
62
+ routes.add_route Adapters::Rack.app_for(query, route_options), default_options(url, route_options)
63
+ end
64
+ class RouteTargetNilError < StandardError
65
+ def initialize url
66
+ @url = url
67
+ end
68
+ def to_s
69
+ "Routing for #{@url.inspect} was defined with a nil target object, i.e. #{@url.inspect} => nil."
70
+ end
71
+ end
72
+ #
73
+ #
74
+ def root status
75
+ answer %r{^/$}, STATUSES[status]
76
+ end
77
+ #
78
+ #
79
+ def default status
80
+ answer nil, STATUSES[status]
81
+ end
82
+
83
+
84
+
85
+ # TODO Can Rack handle this for me?
86
+ #
87
+ # Note: Rack-mount already handles the 404.
88
+ #
89
+ STATUSES = {
90
+ 200 => lambda { |_| [200, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] },
91
+ 404 => lambda { |_| [404, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] }
92
+ }
93
+
94
+ #
95
+ #
96
+ def default_options url, route_options = {}
97
+ url = normalized url
98
+
99
+ options = { request_method: 'GET' }.merge route_options
100
+
101
+ options[:path_info] = url if url
102
+
103
+ options.delete :content_type
104
+
105
+ query_params = options.delete :query
106
+ options[:query_string] = %r{#{generate_query_string(query_params)}} if query_params
107
+
108
+ options
109
+ end
110
+ #
111
+ #
112
+ def generate_query_string query_params
113
+ raise "At least one query string condition is needed." if query_params.size.zero?
114
+ raise "Too many query param conditions (only 1 allowed): #{query_params}" if query_params.size > 1
115
+ k, v = query_params.first
116
+ "#{k}=#{v}"
117
+ end
118
+
119
+ # Setup a route that answers using the given app.
120
+ #
121
+ def answer url = nil, app = nil
122
+ routes.add_route (app || STATUSES[200]), default_options(url)
123
+ end
124
+
125
+ # Returns a regular expression for the url if it is given a String-like object.
126
+ #
127
+ def normalized url
128
+ url.respond_to?(:to_str) ? %r{#{url}} : url
129
+ end
130
+
131
+ # Returns true if there are no routes defined.
132
+ #
133
+ def empty?
134
+ routes.length.zero?
135
+ end
136
+
137
+ # TODO Beautify.
138
+ #
139
+ def to_s
140
+ routes.instance_variable_get(:@routes).map do |route|
141
+ path_info = route.conditions[:path_info]
142
+ anchored = ::Rack::Mount::Utils.regexp_anchored?(path_info)
143
+ anchored_ok = anchored ? "\u2713" : " "
144
+ "#{anchored_ok} #{path_info.source}"
145
+ end.join "\n"
146
+ end
147
+
148
+ end
149
+
150
+ end
@@ -1,7 +1,5 @@
1
1
  # Helper methods for measuring, benchmarking, logging.
2
2
  #
3
- # TODO Not used anymore? Remove.
4
- #
5
3
  module Helpers
6
4
  module Measuring
7
5
 
@@ -157,7 +157,7 @@ class IndexAPI
157
157
  # * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
158
158
  # * from: The data category to take the data for this category from.
159
159
  #
160
- # TODO Redo. Probably extract into define_latitude_category, define_longitude_category.
160
+ # TODO Redo. Will have to write a wrapper that combines two categories that are indexed simultaneously.
161
161
  #
162
162
  def define_map_location name, radius, options = {} # :nodoc:
163
163
  # The radius is given as if all the locations were on the equator.
@@ -7,29 +7,57 @@ module Indexed
7
7
  each_delegate :load_from_cache,
8
8
  :to => :categories
9
9
 
10
+ # A list of indexed categories.
11
+ #
12
+ # Options:
13
+ # * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
14
+ # The default behaviour is that if a token does not match to
15
+ # any category, the query will not return anything (since a
16
+ # single token cannot be matched). If you set this option to
17
+ # true, any token that cannot be matched to a category will be
18
+ # simply ignored.
19
+ # Use this if only a few matched words are important, like for
20
+ # example of the query "Jonathan Myers 86455 Las Cucarachas"
21
+ # you only want to match the zipcode, to have the search engine
22
+ # display advertisements on the side for the zipcode.
23
+ # Nifty! :)
24
+ #
10
25
  def initialize options = {}
11
26
  clear
12
27
 
13
28
  @ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
14
29
  end
15
30
 
31
+ # Clears both the array of categories and the hash of categories.
32
+ #
16
33
  def clear
17
34
  @categories = []
18
35
  @category_hash = {}
19
36
  end
20
37
 
38
+ # Add the given category to the list of categories.
39
+ #
21
40
  def << category
22
41
  categories << category
23
- category_hash[category.name] = [category] # TODO Why an Array?
42
+ # Note: [category] is an optimization, since I need an array
43
+ # of categories.
44
+ # It's faster to just package it in an array on loading
45
+ # Picky than doing it over and over with each query.
46
+ #
47
+ category_hash[category.name] = [category]
24
48
  end
25
49
 
50
+ # Return all possible combinations for the given token.
26
51
  #
52
+ # This checks if it needs to also search through similar
53
+ # tokens, if for example, the token is one with ~.
54
+ # If yes, it puts together all solutions.
27
55
  #
28
56
  def possible_combinations_for token
29
57
  token.similar? ? similar_possible_for(token) : possible_for(token)
30
58
  end
31
-
32
- #
59
+ # Gets all similar tokens and puts together the possible combinations
60
+ # for each found similar token.
33
61
  #
34
62
  def similar_possible_for token
35
63
  # Get as many similar tokens as necessary
@@ -43,9 +71,12 @@ module Indexed
43
71
  text = token.text
44
72
  categories.inject([]) do |result, category|
45
73
  next_token = token
46
- # TODO adjust either this or the amount of similar in index
74
+ # Note: We could also break off here if not all the available
75
+ # similars are needed.
76
+ # Wait for a concrete case that needs this before taking
77
+ # action.
47
78
  #
48
- while next_token = next_token.next(category)
79
+ while next_token = next_token.next_similar_token(category)
49
80
  result << next_token if next_token && next_token.text != text
50
81
  end
51
82
  result
@@ -60,13 +91,12 @@ module Indexed
60
91
 
61
92
  # Returns possible Combinations for the token.
62
93
  #
63
- # The preselected_categories param is an optimization.
64
- #
65
- # TODO Return [RemovedCategory(token, nil)]
66
- # If the search is ...
94
+ # Note: The preselected_categories param is an optimization.
67
95
  #
68
- # TODO Return [] if not ok, nil if needs to be removed?
69
- # Somehow unnice, but…
96
+ # Note: Returns [] if no categories matched (will produce no result).
97
+ # Returns nil if this token needs to be removed from the query.
98
+ # (Also none of the categories matched, but the ignore unassigned
99
+ # tokens option is true)
70
100
  #
71
101
  def possible_for token, preselected_categories = nil
72
102
  possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
@@ -76,14 +106,21 @@ module Indexed
76
106
  return if ignore_unassigned_tokens && possible.empty?
77
107
  possible # wrap in combinations
78
108
  end
109
+ # This returns the possible categories for this token.
110
+ # If the user has already preselected a category for this token,
111
+ # like "artist:moby", if not just return all for the given token,
112
+ # since all are possible.
79
113
  #
80
- #
81
- # TODO too many calls?
114
+ # Note: Once I thought this was called too often. But it is not (18.01.2011).
82
115
  #
83
116
  def possible_categories token
84
117
  user_defined_categories(token) || categories
85
118
  end
86
- # Returns nil if there is no user defined category, the category else.
119
+ # This returns the array of categories if the user has defined
120
+ # an existing category.
121
+ #
122
+ # Note: Returns nil if the user did not define one
123
+ # or if he/she has defined a non-existing one.
87
124
  #
88
125
  def user_defined_categories token
89
126
  category_hash[token.user_defined_category_name]
@@ -2,7 +2,7 @@
2
2
  #
3
3
  require 'rsolr'
4
4
  module Indexers
5
- # TODO Totally deprecated. This should be a source.
5
+ # TODO Deprecated. This should be handled in a special bundle which goes through Solr.
6
6
  #
7
7
  class Solr
8
8
 
@@ -16,14 +16,10 @@ module Indexers
16
16
  @solr = RSolr.connect
17
17
  end
18
18
 
19
- # TODO Rewrite such that it works in batches.
20
- #
21
19
  def index
22
20
  timed_exclaim "Indexing solr for #{type.name}:#{fields.join(', ')}"
23
21
  statement = "SELECT indexed_id, #{fields.join(',')} FROM #{type.snapshot_table_name}"
24
22
 
25
- # TODO Rewrite.
26
- #
27
23
  DB.connect
28
24
  results = DB.connection.execute statement
29
25
 
@@ -39,8 +39,14 @@ module Indexing
39
39
  # Run in parallel.
40
40
  #
41
41
  timed_exclaim "INDEXING USING #{Cores.max_processors} PROCESSORS, IN #{randomly ? 'RANDOM' : 'GIVEN'} ORDER."
42
+
43
+ # TODO Think about having serial work units.
44
+ #
42
45
  Cores.forked self.indexes, { randomly: randomly } do |an_index|
43
46
  an_index.index
47
+ # TODO
48
+ # end
49
+ # Cores.forked self.indexes, { randomly: randomly } do |an_index|
44
50
  an_index.cache
45
51
  end
46
52
  timed_exclaim "INDEXING FINISHED."