picky 1.5.4 → 2.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -50,26 +50,24 @@
50
50
  #
51
51
  # So now we have indexed data (the title), but nobody to ask the index anything.
52
52
  #
53
- # == Query::Full.new(*indexes, options = {})
53
+ # == Search.new(*indexes, options = {})
54
54
  #
55
55
  # We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration). That works like this:
56
- # full_books_query = Query::Full.new books
57
- # Full just means that the ids are returned with the results.
58
- # Picky also offers a Query that returns live results, Query::Live. But that's not important right now.
56
+ # books_search = Search.new books
59
57
  #
60
58
  # Now we have somebody we can ask about the index. But no external interface.
61
59
  #
62
- # == route(/regexp1/ => query1, /regexp2/ => query2, ...)
60
+ # == route(/regexp1/ => search1, /regexp2/ => search2, ...)
63
61
  #
64
62
  # Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
65
- # route %r{^/books/full$} => full_books_query
63
+ # route %r{^/books$} => books_query
66
64
  # In full glory:
67
65
  # class MyGreatSearch < Application
68
66
  #
69
67
  # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
70
68
  # books.define_category :title
71
69
  #
72
- # route %r{^/books/full$} => Query::Full.new(books)
70
+ # route %r{^/books$} => Search.new(books)
73
71
  #
74
72
  # end
75
73
  # That's it!
@@ -137,10 +135,9 @@
137
135
  # partial: Partial::Substring.new(:from => -2)
138
136
  # books.define_category :isbn
139
137
  #
140
- # query_options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
138
+ # options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
141
139
  #
142
- # route %r{^/books/full$} => Query::Full.new(books, query_options)
143
- # route %r{^/books/live$} => Query::Live.new(books, query_options)
140
+ # route %r{^/books$} => Search.new(books, options)
144
141
  #
145
142
  # end
146
143
  # That's actually already a full-blown Picky App!
@@ -166,25 +163,6 @@ class Application
166
163
  Internals::Tokenizers::Query.default = Internals::Tokenizers::Query.new(options)
167
164
  end
168
165
 
169
- # Create a new index for indexing and for querying.
170
- #
171
- # Parameters:
172
- # * name: The identifier of the index. Used:
173
- # - to identify an index (e.g. by you in Rake tasks).
174
- # - in the frontend to describe which index a result came from.
175
- # - index directory naming (index/development/the_identifier/<lots of indexes>)
176
- # * source: The source the data comes from. See Sources::Base.
177
- #
178
- # Options:
179
- # * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
180
- #
181
- # TODO Remove in 1.6.
182
- #
183
- def index name, source, options = {}
184
- raise "the Picky application method #index is deprecated, please use Index::Memory.new instead."
185
- Index::Memory.new name, source, options
186
- end
187
-
188
166
  # Routes.
189
167
  #
190
168
  delegate :route, :root, :to => :rack_adapter
@@ -10,11 +10,12 @@ module Internals
10
10
  # and call search_with_text on it if it is called by Rack.
11
11
  #
12
12
  module Rack
13
-
13
+
14
14
  class Query < Base
15
-
15
+
16
16
  @@defaults = {
17
17
  query_key: 'query'.freeze,
18
+ ids_key: 'ids'.freeze,
18
19
  offset_key: 'offset'.freeze,
19
20
  content_type: 'application/json'.freeze
20
21
  }
@@ -23,7 +24,7 @@ module Internals
23
24
  @query = query
24
25
  @defaults = @@defaults.dup
25
26
  end
26
-
27
+
27
28
  def to_app options = {}
28
29
  # For capturing in the lambda.
29
30
  #
@@ -41,23 +42,26 @@ module Internals
41
42
  respond_with results.to_response, content_type
42
43
  end
43
44
  end
44
-
45
+
45
46
  # Helper method to extract the params
46
47
  #
48
+ # Defaults are 20 ids, offset 0.
49
+ #
47
50
  UTF8_STRING = 'UTF-8'.freeze
48
51
  def extracted params
49
52
  [
50
53
  # query is encoded in ASCII
51
54
  #
52
55
  params[@defaults[:query_key]] && params[@defaults[:query_key]].force_encoding(UTF8_STRING),
56
+ params[@defaults[:ids_key]] && params[@defaults[:ids_key]].to_i || 20,
53
57
  params[@defaults[:offset_key]] && params[@defaults[:offset_key]].to_i || 0
54
58
  ]
55
59
  end
56
-
60
+
57
61
  end
58
-
62
+
59
63
  end
60
-
64
+
61
65
  end
62
-
66
+
63
67
  end
@@ -1,20 +1,20 @@
1
1
  module Internals
2
2
 
3
3
  module Query
4
-
4
+
5
5
  # The query indexes class bundles indexes given to a query.
6
6
  #
7
7
  # Example:
8
8
  # # If you call
9
- # Query::Full.new dvd_index, mp3_index, video_index
10
- #
9
+ # Search.new dvd_index, mp3_index, video_index
10
+ #
11
11
  # # What it does is take the three given (API-) indexes and
12
12
  # # bundle them in an index bundle.
13
13
  #
14
14
  class Indexes
15
-
15
+
16
16
  attr_reader :indexes
17
-
17
+
18
18
  # Creates a new Query::Indexes.
19
19
  #
20
20
  # Its job is to generate all possible combinations, but also
@@ -33,22 +33,22 @@ module Internals
33
33
  # Expand the combinations.
34
34
  #
35
35
  possible_combinations = tokens.possible_combinations_in index
36
-
36
+
37
37
  # Optimization for ignoring tokens that allocate to nothing and
38
38
  # can be ignored.
39
39
  # For example in a special search, where "florian" is not
40
40
  # mapped to any category.
41
41
  #
42
42
  possible_combinations.compact!
43
-
43
+
44
44
  # Generate all possible combinations.
45
45
  #
46
46
  expanded_combinations = expand_combinations_from possible_combinations
47
-
47
+
48
48
  # If there are none, try the next allocation.
49
49
  #
50
50
  next previous_allocations unless expanded_combinations
51
-
51
+
52
52
  # Add the wrapped possible allocations to the ones we already have.
53
53
  #
54
54
  previous_allocations + expanded_combinations.map! do |expanded_combination|
@@ -56,7 +56,7 @@ module Internals
56
56
  end
57
57
  end)
58
58
  end
59
-
59
+
60
60
  # This is the core of the search engine.
61
61
  #
62
62
  # Gets an array of
@@ -118,7 +118,7 @@ module Internals
118
118
  # tokens could not be allocated.
119
119
  #
120
120
  return if possible_combinations.any?(&:empty?)
121
-
121
+
122
122
  # Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
123
123
  #
124
124
  single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
@@ -131,7 +131,7 @@ module Internals
131
131
  # for later combination in allocations.
132
132
  #
133
133
  possible_combinations.collect! do |combinations|
134
-
134
+
135
135
  # Get the size of the combinations of the first token.
136
136
  #
137
137
  combinations_size = combinations.size
@@ -143,7 +143,7 @@ module Internals
143
143
  # by the number of combinations.
144
144
  #
145
145
  single_mult /= combinations_size unless combinations_size.zero?
146
-
146
+
147
147
  # Expand each combination by the single mult:
148
148
  # [a,b,c]
149
149
  # [a,a,a, b,b,b, c,c,c]
@@ -153,25 +153,25 @@ module Internals
153
153
  combinations = combinations.inject([]) do |total, combination|
154
154
  total + Array.new(single_mult, combination)
155
155
  end * group_mult
156
-
156
+
157
157
  # Multiply the group mult by the combinations size,
158
158
  # since the next combinations' single mult is smaller
159
159
  # and we need to adjust for that.
160
160
  #
161
161
  group_mult = group_mult * combinations_size
162
-
162
+
163
163
  # Return the combinations.
164
164
  #
165
165
  combinations
166
166
  end
167
-
167
+
168
168
  return if possible_combinations.empty?
169
-
169
+
170
170
  possible_combinations.shift.zip *possible_combinations
171
171
  end
172
-
172
+
173
173
  end
174
-
174
+
175
175
  end
176
-
176
+
177
177
  end
data/lib/picky/loader.rb CHANGED
@@ -214,12 +214,6 @@ module Loader # :nodoc:all
214
214
 
215
215
  load_internals 'query/indexes'
216
216
 
217
- # Results.
218
- #
219
- load_internals 'results/base'
220
- load_internals 'results/full'
221
- load_internals 'results/live'
222
-
223
217
  # Configuration.
224
218
  #
225
219
  load_internals 'configuration/index'
@@ -274,11 +268,13 @@ module Loader # :nodoc:all
274
268
  load_relative 'index_bundle'
275
269
  load_relative 'aliases'
276
270
 
277
- # Query.
271
+ # Results.
272
+ #
273
+ load_relative 'results'
274
+
275
+ # Search.
278
276
  #
279
- load_relative 'query/base'
280
- load_relative 'query/live'
281
- load_relative 'query/full'
277
+ load_relative 'search'
282
278
  #
283
279
  # load_relative 'query/solr'
284
280
 
@@ -0,0 +1,93 @@
1
+ module Internals
2
+
3
+ # This is the internal results object. Usually, to_marshal, or to_json
4
+ # is called on it to get a string for the answer.
5
+ #
6
+ class Results
7
+
8
+ # Duration is set externally by the query.
9
+ #
10
+ attr_writer :duration
11
+ attr_reader :allocations, :offset, :amount
12
+
13
+ # Takes instances of Query::Allocations as param.
14
+ #
15
+ def initialize amount = 0, offset = 0, allocations = Query::Allocations.new
16
+ @offset = offset
17
+ @amount = amount
18
+ @allocations = allocations
19
+ end
20
+ # Create new results and calculate the ids.
21
+ #
22
+ def self.from amount, offset, allocations
23
+ results = new amount, offset, allocations
24
+ results.prepare!
25
+ results
26
+ end
27
+
28
+ # Returns a hash with the allocations, offset, duration and total.
29
+ #
30
+ def serialize
31
+ { allocations: allocations.to_result,
32
+ offset: offset,
33
+ duration: duration,
34
+ total: total }
35
+ end
36
+ # The default format is json.
37
+ #
38
+ def to_response options = {}
39
+ to_json options
40
+ end
41
+ # Convert to json format.
42
+ #
43
+ def to_json options = {}
44
+ serialize.to_json options
45
+ end
46
+
47
+ # This starts the actual processing.
48
+ #
49
+ # Without this, the allocations are not processed,
50
+ # and no ids are calculated.
51
+ #
52
+ def prepare!
53
+ allocations.process! amount, offset
54
+ end
55
+
56
+ # Duration default is 0.
57
+ #
58
+ def duration
59
+ @duration || 0
60
+ end
61
+ # The total results. Delegates to the allocations.
62
+ #
63
+ # Caches.
64
+ #
65
+ def total
66
+ @total || @total = allocations.total || 0
67
+ end
68
+
69
+ # Convenience methods.
70
+ #
71
+
72
+ # Delegates to allocations.
73
+ #
74
+ def ids amount = 20
75
+ allocations.ids amount
76
+ end
77
+
78
+ # Human readable log.
79
+ #
80
+ def to_log query
81
+ "#{log_type}|#{Time.now.to_s(:db)}|#{'%8f' % duration}|#{'%-50s' % query}|#{'%8d' % total}|#{'%4d' % offset}|#{'%2d' % allocations.size}|"
82
+ end
83
+ # The first character in the blog designates what type of query it is.
84
+ #
85
+ # No calculated ids means: No results.
86
+ #
87
+ def log_type
88
+ amount.zero?? :'.' : :'>'
89
+ end
90
+
91
+ end
92
+
93
+ end
@@ -0,0 +1,180 @@
1
+ # = Picky Queries
2
+ #
3
+ # A Picky Search is an object which:
4
+ # * holds one or more indexes
5
+ # * offers an interface to query these indexes.
6
+ #
7
+ # You connect URL paths to indexes via a Query.
8
+ #
9
+ # We recommend not to use this directly, but connect it to an URL and query through one of these
10
+ # (Protip: Use "curl 'localhost:8080/query/path?query=exampletext')" in a Terminal.
11
+ #
12
+ class Search
13
+
14
+ include Helpers::Measuring
15
+
16
+ attr_reader :indexes
17
+ attr_writer :tokenizer, :identifiers_to_remove
18
+ attr_accessor :reduce_to_amount, :weights
19
+
20
+ # Takes:
21
+ # * A number of indexes
22
+ # * Options hash (optional) with:
23
+ # * tokenizer: Tokenizers::Query.default by default.
24
+ # * weights: A hash of weights, or a Query::Weights object.
25
+ #
26
+ def initialize *index_definitions
27
+ options = Hash === index_definitions.last ? index_definitions.pop : {}
28
+
29
+ @indexes = Internals::Query::Indexes.new *index_definitions, combinations_type_for(index_definitions)
30
+ @tokenizer = options[:tokenizer] || Internals::Tokenizers::Query.default
31
+ weights = options[:weights] || Query::Weights.new
32
+ @weights = Hash === weights ? Query::Weights.new(weights) : weights
33
+ end
34
+
35
+ # Returns the right combinations strategy for
36
+ # a number of query indexes.
37
+ #
38
+ # Currently it isn't possible using Memory and Redis etc.
39
+ # indexes in the same query index group.
40
+ #
41
+ # Picky will raise a Query::Indexes::DifferentTypesError.
42
+ #
43
+ @@mapping = {
44
+ Index::Memory => Internals::Query::Combinations::Memory,
45
+ Index::Redis => Internals::Query::Combinations::Redis
46
+ }
47
+ def combinations_type_for index_definitions_ary
48
+ index_types = index_definitions_ary.map(&:class)
49
+ index_types.uniq!
50
+ raise_different(index_types) if index_types.size > 1
51
+ !index_types.empty? && @@mapping[*index_types] || Internals::Query::Combinations::Memory
52
+ end
53
+ # Currently it isn't possible using Memory and Redis etc.
54
+ # indexes in the same query index group.
55
+ #
56
+ class DifferentTypesError < StandardError
57
+ def initialize types
58
+ @types = types
59
+ end
60
+ def to_s
61
+ "Currently it isn't possible to mix #{@types.join(" and ")} Indexes in the same Search instance."
62
+ end
63
+ end
64
+ def raise_different index_types
65
+ raise DifferentTypesError.new(index_types)
66
+ end
67
+
68
+ # This is the main entry point for a query.
69
+ # Use this in specs and also for running queries.
70
+ #
71
+ # Parameters:
72
+ # * text: The search text.
73
+ # * ids = 20: _optional_ The amount of ids to calculate (with offset).
74
+ # * offset = 0: _optional_ The offset from which position to return the ids. Useful for pagination.
75
+ #
76
+ # Note: The Rack adapter calls this method after unravelling the HTTP request.
77
+ #
78
+ def search_with_text text, ids = 20, offset = 0
79
+ search tokenized(text), ids, offset
80
+ end
81
+
82
+ # Runs the actual search using Query::Tokens.
83
+ #
84
+ # Note: Internal method, use #search_with_text.
85
+ #
86
+ def search tokens, ids = 20, offset = 0
87
+ results = nil
88
+
89
+ duration = timed do
90
+ results = execute tokens, ids, offset
91
+ end
92
+ results.duration = duration.round 6
93
+
94
+ results
95
+ end
96
+
97
+ # Execute a search using Query::Tokens.
98
+ #
99
+ # Note: Internal method, use #search_with_text.
100
+ #
101
+ def execute tokens, ids, offset
102
+ Internals::Results.from ids, offset, sorted_allocations(tokens)
103
+ end
104
+
105
+ # Delegates the tokenizing to the query tokenizer.
106
+ #
107
+ # Parameters:
108
+ # * text: The text to tokenize.
109
+ #
110
+ def tokenized text
111
+ @tokenizer.tokenize text
112
+ end
113
+
114
+ # Gets sorted allocations for the tokens.
115
+ #
116
+ # This generates the possible allocations, sorted.
117
+ #
118
+ # TODO Smallify.
119
+ #
120
+ # TODO Rename: allocations
121
+ #
122
+ def sorted_allocations tokens # :nodoc:
123
+ # Get the allocations.
124
+ #
125
+ # TODO Pass in reduce_to_amount (aka max_allocations)
126
+ #
127
+ # TODO uniq, score, sort in there
128
+ #
129
+ allocations = @indexes.allocations_for tokens
130
+
131
+ # Callbacks.
132
+ #
133
+ # TODO Reduce before sort?
134
+ #
135
+ reduce allocations
136
+ remove_from allocations
137
+
138
+ # Remove double allocations.
139
+ #
140
+ allocations.uniq
141
+
142
+ # Score the allocations using weights as bias.
143
+ #
144
+ allocations.calculate_score weights
145
+
146
+ # Sort the allocations.
147
+ # (allocations are sorted according to score, highest to lowest)
148
+ #
149
+ allocations.sort
150
+
151
+ # Return the allocations.
152
+ #
153
+ allocations
154
+ end
155
+ def reduce allocations # :nodoc:
156
+ allocations.reduce_to reduce_to_amount if reduce_to_amount
157
+ end
158
+
159
+ #
160
+ #
161
+ def remove_from allocations # :nodoc:
162
+ allocations.remove identifiers_to_remove
163
+ end
164
+ #
165
+ #
166
+ def identifiers_to_remove # :nodoc:
167
+ @identifiers_to_remove ||= []
168
+ end
169
+
170
+ # Display some nice information for the user.
171
+ #
172
+ def to_s
173
+ s = "#{self.class}("
174
+ s << @indexes.indexes.map(&:name).join(', ')
175
+ s << ", weights: #{@weights}" unless @weights.empty?
176
+ s << ")"
177
+ s
178
+ end
179
+
180
+ end
@@ -48,9 +48,11 @@ module Sources
48
48
  # be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
49
49
  # for the given type symbol and category symbol.
50
50
  #
51
- def harvest index, category # :yields: id, text_for_id
52
- # This concrete implementation yields "nothing", override in subclasses.
53
- end
51
+ # Note: Since harvest needs to be implemented, it has no default impementation.
52
+ #
53
+ # def harvest index, category # :yields: id, text_for_id
54
+ #
55
+ # end
54
56
 
55
57
  # Used to take a snapshot of your data if it is fast changing.
56
58
  #
@@ -0,0 +1,8 @@
1
+ desc "Finds where Picky still needs input from you."
2
+ task :todo do
3
+ if system "grep -e 'TODO.*' -n --color=always -R *"
4
+ puts "Picky needs a bit of input from you there. Thanks."
5
+ else
6
+ puts "Picky seems to be fine (no TODOs found)."
7
+ end
8
+ end
@@ -11,13 +11,9 @@ describe Application do
11
11
  books = Index::Memory.new :books, Sources::DB.new('SELECT id, title FROM books', :file => 'app/db.yml')
12
12
  books.define_category :title
13
13
 
14
- full = Query::Full.new books
15
- live = Query::Live.new books
16
-
17
14
  rack_adapter.stub! :exclaim # Stopping it from exclaiming.
18
15
 
19
- route %r{^/books/full} => full
20
- route %r{^/books/live} => live
16
+ route %r{^/books} => Search.new(books)
21
17
  end
22
18
  Internals::Tokenizers::Index.default.tokenize 'some text'
23
19
  Internals::Tokenizers::Query.default.tokenize 'some text'
@@ -56,13 +52,9 @@ describe Application do
56
52
  geo_index.define_ranged_category(:north1, 1, precision: 3, from: :north)
57
53
  .define_ranged_category(:east1, 1, precision: 3, from: :east)
58
54
 
59
- full = Query::Full.new books_index
60
- live = Query::Live.new books_index
61
-
62
55
  rack_adapter.stub! :exclaim # Stopping it from exclaiming.
63
56
 
64
- route %r{^/books/full} => full
65
- route %r{^/books/live} => live
57
+ route %r{^/books} => Search.new(books_index)
66
58
  end
67
59
  }.should_not raise_error
68
60
  end
@@ -19,6 +19,20 @@ describe CharacterSubstituters do
19
19
  @substituter.substitute(special_character).should == special_character
20
20
  end
21
21
  end
22
+
23
+ # Speed spec at the top since the order of the describes made the
24
+ # speed spec trip. And not on mushrooms either.
25
+ #
26
+ describe "speed" do
27
+ it "is fast" do
28
+ result = performance_of { @substituter.substitute('ä') }
29
+ result.should < 0.00009
30
+ end
31
+ it "is fast" do
32
+ result = performance_of { @substituter.substitute('abcdefghijklmnopqrstuvwxyz1234567890') }
33
+ result.should < 0.00015
34
+ end
35
+ end
22
36
 
23
37
  describe "normal characters" do
24
38
  it_should_not_substitute('abcdefghijklmnopqrstuvwxyz1234567890')
@@ -91,16 +105,5 @@ describe CharacterSubstituters do
91
105
  describe "diacritic" do
92
106
  it_should_substitute 'ñ', 'n'
93
107
  end
94
-
95
- describe "speed" do
96
- it "is fast" do
97
- result = performance_of { @substituter.substitute('ä') }
98
- result.should < 0.00009
99
- end
100
- it "is fast" do
101
- result = performance_of { @substituter.substitute('abcdefghijklmnopqrstuvwxyz1234567890') }
102
- result.should < 0.00015
103
- end
104
- end
105
108
 
106
109
  end
@@ -22,11 +22,17 @@ describe Internals::Adapters::Rack::Query do
22
22
  it 'extracts the query' do
23
23
  @adapter.extracted('query' => 'some_query')[0].should == 'some_query'
24
24
  end
25
+ it 'extracts the default ids amount' do
26
+ @adapter.extracted('query' => 'some_query')[1].should == 20
27
+ end
25
28
  it 'extracts the default offset' do
26
- @adapter.extracted('query' => 'some_query')[1].should == 0
29
+ @adapter.extracted('query' => 'some_query')[2].should == 0
30
+ end
31
+ it 'extracts a given ids amount' do
32
+ @adapter.extracted('query' => 'some_query', 'ids' => '123')[1].should == 123
27
33
  end
28
34
  it 'extracts a given offset' do
29
- @adapter.extracted('query' => 'some_query', 'offset' => '123')[1].should == 123
35
+ @adapter.extracted('query' => 'some_query', 'offset' => '123')[2].should == 123
30
36
  end
31
37
  end
32
38