picky 1.5.4 → 2.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
@@ -50,26 +50,24 @@
50
50
  #
51
51
  # So now we have indexed data (the title), but nobody to ask the index anything.
52
52
  #
53
- # == Query::Full.new(*indexes, options = {})
53
+ # == Search.new(*indexes, options = {})
54
54
  #
55
55
  # We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration). That works like this:
56
- # full_books_query = Query::Full.new books
57
- # Full just means that the ids are returned with the results.
58
- # Picky also offers a Query that returns live results, Query::Live. But that's not important right now.
56
+ # books_search = Search.new books
59
57
  #
60
58
  # Now we have somebody we can ask about the index. But no external interface.
61
59
  #
62
- # == route(/regexp1/ => query1, /regexp2/ => query2, ...)
60
+ # == route(/regexp1/ => search1, /regexp2/ => search2, ...)
63
61
  #
64
62
  # Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
65
- # route %r{^/books/full$} => full_books_query
63
+ # route %r{^/books$} => books_query
66
64
  # In full glory:
67
65
  # class MyGreatSearch < Application
68
66
  #
69
67
  # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
70
68
  # books.define_category :title
71
69
  #
72
- # route %r{^/books/full$} => Query::Full.new(books)
70
+ # route %r{^/books$} => Search.new(books)
73
71
  #
74
72
  # end
75
73
  # That's it!
@@ -137,10 +135,9 @@
137
135
  # partial: Partial::Substring.new(:from => -2)
138
136
  # books.define_category :isbn
139
137
  #
140
- # query_options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
138
+ # options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
141
139
  #
142
- # route %r{^/books/full$} => Query::Full.new(books, query_options)
143
- # route %r{^/books/live$} => Query::Live.new(books, query_options)
140
+ # route %r{^/books$} => Search.new(books, options)
144
141
  #
145
142
  # end
146
143
  # That's actually already a full-blown Picky App!
@@ -166,25 +163,6 @@ class Application
166
163
  Internals::Tokenizers::Query.default = Internals::Tokenizers::Query.new(options)
167
164
  end
168
165
 
169
- # Create a new index for indexing and for querying.
170
- #
171
- # Parameters:
172
- # * name: The identifier of the index. Used:
173
- # - to identify an index (e.g. by you in Rake tasks).
174
- # - in the frontend to describe which index a result came from.
175
- # - index directory naming (index/development/the_identifier/<lots of indexes>)
176
- # * source: The source the data comes from. See Sources::Base.
177
- #
178
- # Options:
179
- # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
180
- #
181
- # TODO Remove in 1.6.
182
- #
183
- def index name, source, options = {}
184
- raise "the Picky application method #index is deprecated, please use Index::Memory.new instead."
185
- Index::Memory.new name, source, options
186
- end
187
-
188
166
  # Routes.
189
167
  #
190
168
  delegate :route, :root, :to => :rack_adapter
@@ -10,11 +10,12 @@ module Internals
10
10
  # and call search_with_text on it if it is called by Rack.
11
11
  #
12
12
  module Rack
13
-
13
+
14
14
  class Query < Base
15
-
15
+
16
16
  @@defaults = {
17
17
  query_key: 'query'.freeze,
18
+ ids_key: 'ids'.freeze,
18
19
  offset_key: 'offset'.freeze,
19
20
  content_type: 'application/json'.freeze
20
21
  }
@@ -23,7 +24,7 @@ module Internals
23
24
  @query = query
24
25
  @defaults = @@defaults.dup
25
26
  end
26
-
27
+
27
28
  def to_app options = {}
28
29
  # For capturing in the lambda.
29
30
  #
@@ -41,23 +42,26 @@ module Internals
41
42
  respond_with results.to_response, content_type
42
43
  end
43
44
  end
44
-
45
+
45
46
  # Helper method to extract the params
46
47
  #
48
+ # Defaults are 20 ids, offset 0.
49
+ #
47
50
  UTF8_STRING = 'UTF-8'.freeze
48
51
  def extracted params
49
52
  [
50
53
  # query is encoded in ASCII
51
54
  #
52
55
  params[@defaults[:query_key]] && params[@defaults[:query_key]].force_encoding(UTF8_STRING),
56
+ params[@defaults[:ids_key]] && params[@defaults[:ids_key]].to_i || 20,
53
57
  params[@defaults[:offset_key]] && params[@defaults[:offset_key]].to_i || 0
54
58
  ]
55
59
  end
56
-
60
+
57
61
  end
58
-
62
+
59
63
  end
60
-
64
+
61
65
  end
62
-
66
+
63
67
  end
@@ -1,20 +1,20 @@
1
1
  module Internals
2
2
 
3
3
  module Query
4
-
4
+
5
5
  # The query indexes class bundles indexes given to a query.
6
6
  #
7
7
  # Example:
8
8
  # # If you call
9
- # Query::Full.new dvd_index, mp3_index, video_index
10
- #
9
+ # Search.new dvd_index, mp3_index, video_index
10
+ #
11
11
  # # What it does is take the three given (API-) indexes and
12
12
  # # bundle them in an index bundle.
13
13
  #
14
14
  class Indexes
15
-
15
+
16
16
  attr_reader :indexes
17
-
17
+
18
18
  # Creates a new Query::Indexes.
19
19
  #
20
20
  # Its job is to generate all possible combinations, but also
@@ -33,22 +33,22 @@ module Internals
33
33
  # Expand the combinations.
34
34
  #
35
35
  possible_combinations = tokens.possible_combinations_in index
36
-
36
+
37
37
  # Optimization for ignoring tokens that allocate to nothing and
38
38
  # can be ignored.
39
39
  # For example in a special search, where "florian" is not
40
40
  # mapped to any category.
41
41
  #
42
42
  possible_combinations.compact!
43
-
43
+
44
44
  # Generate all possible combinations.
45
45
  #
46
46
  expanded_combinations = expand_combinations_from possible_combinations
47
-
47
+
48
48
  # If there are none, try the next allocation.
49
49
  #
50
50
  next previous_allocations unless expanded_combinations
51
-
51
+
52
52
  # Add the wrapped possible allocations to the ones we already have.
53
53
  #
54
54
  previous_allocations + expanded_combinations.map! do |expanded_combination|
@@ -56,7 +56,7 @@ module Internals
56
56
  end
57
57
  end)
58
58
  end
59
-
59
+
60
60
  # This is the core of the search engine.
61
61
  #
62
62
  # Gets an array of
@@ -118,7 +118,7 @@ module Internals
118
118
  # tokens could not be allocated.
119
119
  #
120
120
  return if possible_combinations.any?(&:empty?)
121
-
121
+
122
122
  # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
123
123
  #
124
124
  single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
@@ -131,7 +131,7 @@ module Internals
131
131
  # for later combination in allocations.
132
132
  #
133
133
  possible_combinations.collect! do |combinations|
134
-
134
+
135
135
  # Get the size of the combinations of the first token.
136
136
  #
137
137
  combinations_size = combinations.size
@@ -143,7 +143,7 @@ module Internals
143
143
  # by the number of combinations.
144
144
  #
145
145
  single_mult /= combinations_size unless combinations_size.zero?
146
-
146
+
147
147
  # Expand each combination by the single mult:
148
148
  # [a,b,c]
149
149
  # [a,a,a, b,b,b, c,c,c]
@@ -153,25 +153,25 @@ module Internals
153
153
  combinations = combinations.inject([]) do |total, combination|
154
154
  total + Array.new(single_mult, combination)
155
155
  end * group_mult
156
-
156
+
157
157
  # Multiply the group mult by the combinations size,
158
158
  # since the next combinations' single mult is smaller
159
159
  # and we need to adjust for that.
160
160
  #
161
161
  group_mult = group_mult * combinations_size
162
-
162
+
163
163
  # Return the combinations.
164
164
  #
165
165
  combinations
166
166
  end
167
-
167
+
168
168
  return if possible_combinations.empty?
169
-
169
+
170
170
  possible_combinations.shift.zip *possible_combinations
171
171
  end
172
-
172
+
173
173
  end
174
-
174
+
175
175
  end
176
-
176
+
177
177
  end
data/lib/picky/loader.rb CHANGED
@@ -214,12 +214,6 @@ module Loader # :nodoc:all
214
214
 
215
215
  load_internals 'query/indexes'
216
216
 
217
- # Results.
218
- #
219
- load_internals 'results/base'
220
- load_internals 'results/full'
221
- load_internals 'results/live'
222
-
223
217
  # Configuration.
224
218
  #
225
219
  load_internals 'configuration/index'
@@ -274,11 +268,13 @@ module Loader # :nodoc:all
274
268
  load_relative 'index_bundle'
275
269
  load_relative 'aliases'
276
270
 
277
- # Query.
271
+ # Results.
272
+ #
273
+ load_relative 'results'
274
+
275
+ # Search.
278
276
  #
279
- load_relative 'query/base'
280
- load_relative 'query/live'
281
- load_relative 'query/full'
277
+ load_relative 'search'
282
278
  #
283
279
  # load_relative 'query/solr'
284
280
 
@@ -0,0 +1,93 @@
1
+ module Internals
2
+
3
+ # This is the internal results object. Usually, to_marshal, or to_json
4
+ # is called on it to get a string for the answer.
5
+ #
6
+ class Results
7
+
8
+ # Duration is set externally by the query.
9
+ #
10
+ attr_writer :duration
11
+ attr_reader :allocations, :offset, :amount
12
+
13
+ # Takes instances of Query::Allocations as param.
14
+ #
15
+ def initialize amount = 0, offset = 0, allocations = Query::Allocations.new
16
+ @offset = offset
17
+ @amount = amount
18
+ @allocations = allocations
19
+ end
20
+ # Create new results and calculate the ids.
21
+ #
22
+ def self.from amount, offset, allocations
23
+ results = new amount, offset, allocations
24
+ results.prepare!
25
+ results
26
+ end
27
+
28
+ # Returns a hash with the allocations, offset, duration and total.
29
+ #
30
+ def serialize
31
+ { allocations: allocations.to_result,
32
+ offset: offset,
33
+ duration: duration,
34
+ total: total }
35
+ end
36
+ # The default format is json.
37
+ #
38
+ def to_response options = {}
39
+ to_json options
40
+ end
41
+ # Convert to json format.
42
+ #
43
+ def to_json options = {}
44
+ serialize.to_json options
45
+ end
46
+
47
+ # This starts the actual processing.
48
+ #
49
+ # Without this, the allocations are not processed,
50
+ # and no ids are calculated.
51
+ #
52
+ def prepare!
53
+ allocations.process! amount, offset
54
+ end
55
+
56
+ # Duration default is 0.
57
+ #
58
+ def duration
59
+ @duration || 0
60
+ end
61
+ # The total results. Delegates to the allocations.
62
+ #
63
+ # Caches.
64
+ #
65
+ def total
66
+ @total || @total = allocations.total || 0
67
+ end
68
+
69
+ # Convenience methods.
70
+ #
71
+
72
+ # Delegates to allocations.
73
+ #
74
+ def ids amount = 20
75
+ allocations.ids amount
76
+ end
77
+
78
+ # Human readable log.
79
+ #
80
+ def to_log query
81
+ "#{log_type}|#{Time.now.to_s(:db)}|#{'%8f' % duration}|#{'%-50s' % query}|#{'%8d' % total}|#{'%4d' % offset}|#{'%2d' % allocations.size}|"
82
+ end
83
+ # The first character in the blog designates what type of query it is.
84
+ #
85
+ # No calculated ids means: No results.
86
+ #
87
+ def log_type
88
+ amount.zero?? :'.' : :'>'
89
+ end
90
+
91
+ end
92
+
93
+ end
@@ -0,0 +1,180 @@
1
+ # = Picky Queries
2
+ #
3
+ # A Picky Search is an object which:
4
+ # * holds one or more indexes
5
+ # * offers an interface to query these indexes.
6
+ #
7
+ # You connect URL paths to indexes via a Query.
8
+ #
9
+ # We recommend not to use this directly, but connect it to an URL and query through one of these
10
+ # (Protip: Use "curl 'localhost:8080/query/path?query=exampletext')" in a Terminal.
11
+ #
12
+ class Search
13
+
14
+ include Helpers::Measuring
15
+
16
+ attr_reader :indexes
17
+ attr_writer :tokenizer, :identifiers_to_remove
18
+ attr_accessor :reduce_to_amount, :weights
19
+
20
+ # Takes:
21
+ # * A number of indexes
22
+ # * Options hash (optional) with:
23
+ # * tokenizer: Tokenizers::Query.default by default.
24
+ # * weights: A hash of weights, or a Query::Weights object.
25
+ #
26
+ def initialize *index_definitions
27
+ options = Hash === index_definitions.last ? index_definitions.pop : {}
28
+
29
+ @indexes = Internals::Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
30
+ @tokenizer = options[:tokenizer] || Internals::Tokenizers::Query.default
31
+ weights = options[:weights] || Query::Weights.new
32
+ @weights = Hash === weights ? Query::Weights.new(weights) : weights
33
+ end
34
+
35
+ # Returns the right combinations strategy for
36
+ # a number of query indexes.
37
+ #
38
+ # Currently it isn't possible using Memory and Redis etc.
39
+ # indexes in the same query index group.
40
+ #
41
+ # Picky will raise a Query::Indexes::DifferentTypesError.
42
+ #
43
+ @@mapping = {
44
+ Index::Memory => Internals::Query::Combinations::Memory,
45
+ Index::Redis => Internals::Query::Combinations::Redis
46
+ }
47
+ def combinations_type_for index_definitions_ary
48
+ index_types = index_definitions_ary.map(&:class)
49
+ index_types.uniq!
50
+ raise_different(index_types) if index_types.size > 1
51
+ !index_types.empty? && @@mapping[*index_types] || Internals::Query::Combinations::Memory
52
+ end
53
+ # Currently it isn't possible using Memory and Redis etc.
54
+ # indexes in the same query index group.
55
+ #
56
+ class DifferentTypesError < StandardError
57
+ def initialize types
58
+ @types = types
59
+ end
60
+ def to_s
61
+ "Currently it isn't possible to mix #{@types.join(" and ")} Indexes in the same Search instance."
62
+ end
63
+ end
64
+ def raise_different index_types
65
+ raise DifferentTypesError.new(index_types)
66
+ end
67
+
68
+ # This is the main entry point for a query.
69
+ # Use this in specs and also for running queries.
70
+ #
71
+ # Parameters:
72
+ # * text: The search text.
73
+ # * ids = 20: _optional_ The amount of ids to calculate (with offset).
74
+ # * offset = 0: _optional_ The offset from which position to return the ids. Useful for pagination.
75
+ #
76
+ # Note: The Rack adapter calls this method after unravelling the HTTP request.
77
+ #
78
+ def search_with_text text, ids = 20, offset = 0
79
+ search tokenized(text), ids, offset
80
+ end
81
+
82
+ # Runs the actual search using Query::Tokens.
83
+ #
84
+ # Note: Internal method, use #search_with_text.
85
+ #
86
+ def search tokens, ids = 20, offset = 0
87
+ results = nil
88
+
89
+ duration = timed do
90
+ results = execute tokens, ids, offset
91
+ end
92
+ results.duration = duration.round 6
93
+
94
+ results
95
+ end
96
+
97
+ # Execute a search using Query::Tokens.
98
+ #
99
+ # Note: Internal method, use #search_with_text.
100
+ #
101
+ def execute tokens, ids, offset
102
+ Internals::Results.from ids, offset, sorted_allocations(tokens)
103
+ end
104
+
105
+ # Delegates the tokenizing to the query tokenizer.
106
+ #
107
+ # Parameters:
108
+ # * text: The text to tokenize.
109
+ #
110
+ def tokenized text
111
+ @tokenizer.tokenize text
112
+ end
113
+
114
+ # Gets sorted allocations for the tokens.
115
+ #
116
+ # This generates the possible allocations, sorted.
117
+ #
118
+ # TODO Smallify.
119
+ #
120
+ # TODO Rename: allocations
121
+ #
122
+ def sorted_allocations tokens # :nodoc:
123
+ # Get the allocations.
124
+ #
125
+ # TODO Pass in reduce_to_amount (aka max_allocations)
126
+ #
127
+ # TODO uniq, score, sort in there
128
+ #
129
+ allocations = @indexes.allocations_for tokens
130
+
131
+ # Callbacks.
132
+ #
133
+ # TODO Reduce before sort?
134
+ #
135
+ reduce allocations
136
+ remove_from allocations
137
+
138
+ # Remove double allocations.
139
+ #
140
+ allocations.uniq
141
+
142
+ # Score the allocations using weights as bias.
143
+ #
144
+ allocations.calculate_score weights
145
+
146
+ # Sort the allocations.
147
+ # (allocations are sorted according to score, highest to lowest)
148
+ #
149
+ allocations.sort
150
+
151
+ # Return the allocations.
152
+ #
153
+ allocations
154
+ end
155
+ def reduce allocations # :nodoc:
156
+ allocations.reduce_to reduce_to_amount if reduce_to_amount
157
+ end
158
+
159
+ #
160
+ #
161
+ def remove_from allocations # :nodoc:
162
+ allocations.remove identifiers_to_remove
163
+ end
164
+ #
165
+ #
166
+ def identifiers_to_remove # :nodoc:
167
+ @identifiers_to_remove ||= []
168
+ end
169
+
170
+ # Display some nice information for the user.
171
+ #
172
+ def to_s
173
+ s = "#{self.class}("
174
+ s << @indexes.indexes.map(&:name).join(', ')
175
+ s << ", weights: #{@weights}" unless @weights.empty?
176
+ s << ")"
177
+ s
178
+ end
179
+
180
+ end
@@ -48,9 +48,11 @@ module Sources
48
48
  # be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
49
49
  # for the given type symbol and category symbol.
50
50
  #
51
- def harvest index, category # :yields: id, text_for_id
52
- # This concrete implementation yields "nothing", override in subclasses.
53
- end
51
+ # Note: Since harvest needs to be implemented, it has no default impementation.
52
+ #
53
+ # def harvest index, category # :yields: id, text_for_id
54
+ #
55
+ # end
54
56
 
55
57
  # Used to take a snapshot of your data if it is fast changing.
56
58
  #
@@ -0,0 +1,8 @@
1
+ desc "Finds where Picky still needs input from you."
2
+ task :todo do
3
+ if system "grep -e 'TODO.*' -n --color=always -R *"
4
+ puts "Picky needs a bit of input from you there. Thanks."
5
+ else
6
+ puts "Picky seems to be fine (no TODOs found)."
7
+ end
8
+ end
@@ -11,13 +11,9 @@ describe Application do
11
11
  books = Index::Memory.new :books, Sources::DB.new('SELECT id, title FROM books', :file => 'app/db.yml')
12
12
  books.define_category :title
13
13
 
14
- full = Query::Full.new books
15
- live = Query::Live.new books
16
-
17
14
  rack_adapter.stub! :exclaim # Stopping it from exclaiming.
18
15
 
19
- route %r{^/books/full} => full
20
- route %r{^/books/live} => live
16
+ route %r{^/books} => Search.new(books)
21
17
  end
22
18
  Internals::Tokenizers::Index.default.tokenize 'some text'
23
19
  Internals::Tokenizers::Query.default.tokenize 'some text'
@@ -56,13 +52,9 @@ describe Application do
56
52
  geo_index.define_ranged_category(:north1, 1, precision: 3, from: :north)
57
53
  .define_ranged_category(:east1, 1, precision: 3, from: :east)
58
54
 
59
- full = Query::Full.new books_index
60
- live = Query::Live.new books_index
61
-
62
55
  rack_adapter.stub! :exclaim # Stopping it from exclaiming.
63
56
 
64
- route %r{^/books/full} => full
65
- route %r{^/books/live} => live
57
+ route %r{^/books} => Search.new(books_index)
66
58
  end
67
59
  }.should_not raise_error
68
60
  end
@@ -19,6 +19,20 @@ describe CharacterSubstituters do
19
19
  @substituter.substitute(special_character).should == special_character
20
20
  end
21
21
  end
22
+
23
+ # Speed spec at the top since the order of the describes made the
24
+ # speed spec trip. And not on mushrooms either.
25
+ #
26
+ describe "speed" do
27
+ it "is fast" do
28
+ result = performance_of { @substituter.substitute('ä') }
29
+ result.should < 0.00009
30
+ end
31
+ it "is fast" do
32
+ result = performance_of { @substituter.substitute('abcdefghijklmnopqrstuvwxyz1234567890') }
33
+ result.should < 0.00015
34
+ end
35
+ end
22
36
 
23
37
  describe "normal characters" do
24
38
  it_should_not_substitute('abcdefghijklmnopqrstuvwxyz1234567890')
@@ -91,16 +105,5 @@ describe CharacterSubstituters do
91
105
  describe "diacritic" do
92
106
  it_should_substitute 'ñ', 'n'
93
107
  end
94
-
95
- describe "speed" do
96
- it "is fast" do
97
- result = performance_of { @substituter.substitute('ä') }
98
- result.should < 0.00009
99
- end
100
- it "is fast" do
101
- result = performance_of { @substituter.substitute('abcdefghijklmnopqrstuvwxyz1234567890') }
102
- result.should < 0.00015
103
- end
104
- end
105
108
 
106
109
  end
@@ -22,11 +22,17 @@ describe Internals::Adapters::Rack::Query do
22
22
  it 'extracts the query' do
23
23
  @adapter.extracted('query' => 'some_query')[0].should == 'some_query'
24
24
  end
25
+ it 'extracts the default ids amount' do
26
+ @adapter.extracted('query' => 'some_query')[1].should == 20
27
+ end
25
28
  it 'extracts the default offset' do
26
- @adapter.extracted('query' => 'some_query')[1].should == 0
29
+ @adapter.extracted('query' => 'some_query')[2].should == 0
30
+ end
31
+ it 'extracts a given ids amount' do
32
+ @adapter.extracted('query' => 'some_query', 'ids' => '123')[1].should == 123
27
33
  end
28
34
  it 'extracts a given offset' do
29
- @adapter.extracted('query' => 'some_query', 'offset' => '123')[1].should == 123
35
+ @adapter.extracted('query' => 'some_query', 'offset' => '123')[2].should == 123
30
36
  end
31
37
  end
32
38