picky 0.12.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/lib/deployment.rb +2 -2
  2. data/lib/picky/application.rb +172 -12
  3. data/lib/picky/cacher/generator.rb +1 -1
  4. data/lib/picky/calculations/location.rb +9 -1
  5. data/lib/picky/character_substituters/west_european.rb +1 -1
  6. data/lib/picky/configuration/index.rb +1 -1
  7. data/lib/picky/cores.rb +1 -1
  8. data/lib/picky/extensions/array.rb +1 -1
  9. data/lib/picky/extensions/hash.rb +1 -1
  10. data/lib/picky/extensions/module.rb +1 -1
  11. data/lib/picky/extensions/object.rb +1 -1
  12. data/lib/picky/extensions/symbol.rb +1 -1
  13. data/lib/picky/generator.rb +2 -2
  14. data/lib/picky/helpers/cache.rb +7 -5
  15. data/lib/picky/helpers/gc.rb +2 -0
  16. data/lib/picky/helpers/measuring.rb +2 -0
  17. data/lib/picky/index/bundle.rb +1 -1
  18. data/lib/picky/index_api.rb +33 -15
  19. data/lib/picky/indexed/bundle.rb +1 -1
  20. data/lib/picky/indexed/index.rb +1 -1
  21. data/lib/picky/indexed/wrappers/bundle/location.rb +1 -1
  22. data/lib/picky/indexers/no_source_specified_error.rb +1 -1
  23. data/lib/picky/indexes_api.rb +1 -1
  24. data/lib/picky/indexing/bundle.rb +1 -1
  25. data/lib/picky/indexing/index.rb +1 -1
  26. data/lib/picky/loader.rb +1 -1
  27. data/lib/picky/loggers/search.rb +1 -1
  28. data/lib/picky/performant.rb +3 -0
  29. data/lib/picky/query/allocation.rb +1 -1
  30. data/lib/picky/query/allocations.rb +1 -1
  31. data/lib/picky/query/base.rb +48 -16
  32. data/lib/picky/query/combination.rb +1 -1
  33. data/lib/picky/query/combinations.rb +1 -1
  34. data/lib/picky/query/full.rb +7 -2
  35. data/lib/picky/query/live.rb +9 -7
  36. data/lib/picky/query/qualifiers.rb +6 -2
  37. data/lib/picky/query/solr.rb +1 -1
  38. data/lib/picky/query/token.rb +2 -1
  39. data/lib/picky/query/tokens.rb +4 -1
  40. data/lib/picky/query/weigher.rb +1 -1
  41. data/lib/picky/query/weights.rb +1 -1
  42. data/lib/picky/rack/harakiri.rb +14 -5
  43. data/lib/picky/results/base.rb +1 -1
  44. data/lib/picky/routing.rb +1 -1
  45. data/lib/picky/solr/schema_generator.rb +2 -1
  46. data/lib/picky/sources/base.rb +39 -25
  47. data/lib/picky/sources/couch.rb +22 -8
  48. data/lib/picky/sources/csv.rb +29 -6
  49. data/lib/picky/sources/db.rb +46 -30
  50. data/lib/picky/sources/delicious.rb +12 -2
  51. data/lib/picky/sources/wrappers/base.rb +3 -1
  52. data/lib/picky/tokenizers/base.rb +1 -1
  53. data/project_prototype/Gemfile +1 -1
  54. data/project_prototype/app/README +0 -1
  55. data/spec/lib/calculations/location_spec.rb +28 -16
  56. data/spec/lib/index_api_spec.rb +64 -0
  57. data/spec/lib/indexed/index_spec.rb +2 -2
  58. data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
  59. data/spec/lib/indexing/index_spec.rb +2 -2
  60. data/spec/lib/rack/harakiri_spec.rb +22 -10
  61. metadata +7 -4
data/lib/deployment.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require File.expand_path '../constants', __FILE__
2
2
 
3
- module Picky
4
- module Capistrano
3
+ module Picky # :nodoc:
4
+ module Capistrano # :nodoc:all
5
5
 
6
6
  # Include all
7
7
  #
@@ -1,4 +1,152 @@
1
- # The Picky application wherein the indexing and querying is defined.
1
+ # = Picky Applications
2
+ #
3
+ # A Picky Application is where you configure the whole search engine.
4
+ #
5
+ # This is a step-by-step description on how to configure your Picky app.
6
+ #
7
+ # Start by subclassing Application:
8
+ # class MyGreatSearch < Application
9
+ # # Your configuration goes here.
10
+ # end
11
+ # The generator
12
+ # $ picky project project_name
13
+ # will generate an example <tt>project_name/app/application.rb</tt> file for you
14
+ # with some example code inside.
15
+ #
16
+ # == index(name, source)
17
+ #
18
+ # Next, define where your data comes from. You use the <tt>index</tt> method for that:
19
+ # my_index = index :some_index_name, some_source
20
+ # You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
21
+ # class MyGreatSearch < Application
22
+ #
23
+ # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
24
+ #
25
+ # end
26
+ # Now we have an index <tt>books</tt>.
27
+ #
28
+ # That on itself won't do much good.
29
+ #
30
+ # == index.define_category(identifier, options = {})
31
+ #
32
+ # Picky needs us to define categories on the data.
33
+ #
34
+ # Categories help your user find data.
35
+ # It's best if you look at an example yourself: http://floere.github.com/picky/examples.html
36
+ #
37
+ # Let's go ahead and define a category:
38
+ # class MyGreatSearch < Application
39
+ #
40
+ # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
41
+ # books.define_category :title
42
+ #
43
+ # end
44
+ # Now we could already run the indexer:
45
+ # $ rake index
46
+ #
47
+ # (You can define similarity or partial search capabilities on a category, see http://github.com/floere/picky/wiki/Categories-configuration for info)
48
+ #
49
+ # So now we have indexed data (the title), but nobody to ask the index anything.
50
+ #
51
+ # == Query::Full.new(*indexes, options = {})
52
+ #
53
+ # We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration). That works like this:
54
+ # full_books_query = Query::Full.new books
55
+ # Full just means that the ids are returned with the results.
56
+ # Picky also offers a Query that returns live results, Query::Live. But that's not important right now.
57
+ #
58
+ # Now we have somebody we can ask about the index. But no external interface.
59
+ #
60
+ # == route(/regexp1/ => query1, /regexp2/ => query2, ...)
61
+ #
62
+ # Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
63
+ # route %r{^/books/full$} => full_books_query
64
+ # In full glory:
65
+ # class MyGreatSearch < Application
66
+ #
67
+ # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
68
+ # books.define_category :title
69
+ #
70
+ # full_books_query = Query::Full.new books
71
+ #
72
+ # route %r{^/books/full$} => full_books_query
73
+ #
74
+ # end
75
+ # That's it!
76
+ #
77
+ # Now run the indexer and server:
78
+ # $ rake index
79
+ # $ rake start
80
+ # Run your first query:
81
+ # $ curl 'localhost:8080/books/full?query=hello server'
82
+ #
83
+ # Nice, right? Your first query!
84
+ #
85
+ # Maybe you don't find everything. We need to process the data before it goes into the index.
86
+ #
87
+ # == default_indexing(options = {})
88
+ #
89
+ # That's what the <tt>default_indexing</tt> method is for:
90
+ # default_indexing options
91
+ # Read more about the options here: http://github.com/floere/picky/wiki/Indexing-configuration
92
+ #
93
+ # Same thing with the search text – we need to process that as well.
94
+ #
95
+ # == default_querying(options = {})
96
+ #
97
+ # Analog to the default_indexing method, we use the <tt>default_querying</tt> method.
98
+ # default_querying options
99
+ # Read more about the options here: http://github.com/floere/picky/wiki/Querying-Configuration
100
+ #
101
+ # And that's all there is. It's incredibly powerful though, as you can combine, weigh, refine to the max.
102
+ #
103
+ # == Wiki
104
+ #
105
+ # Read more in the Wiki: http://github.com/floere/picky/wiki
106
+ #
107
+ # Have fun!
108
+ #
109
+ # == Full example
110
+ #
111
+ # Our example, fully fleshed out with indexing, querying, and weights:
112
+ # class MyGreatSearch < Application
113
+ #
114
+ # default_indexing removes_characters: /[^a-zA-Z0-9\.]/,
115
+ # stopwords: /\b(and|or|in|on|is|has)\b/,
116
+ # splits_text_on: /\s/,
117
+ # removes_characters_after_splitting: /\./,
118
+ # substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
119
+ # normalizes_words: [
120
+ # [/(.*)hausen/, 'hn'],
121
+ # [/\b(\w*)str(eet)?/, 'st']
122
+ # ]
123
+ #
124
+ # default_querying removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/,
125
+ # stopwords: /\b(and|the|of|it|in|for)\b/,
126
+ # splits_text_on: /[\s\/\-\,\&]+/,
127
+ # removes_characters_after_splitting: /\./,
128
+ # substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
129
+ # maximum_tokens: 4
130
+ #
131
+ # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
132
+ # books.define_category :title,
133
+ # qualifiers: [:t, :title, :titre],
134
+ # partial: Partial::Substring.new(:from => 1),
135
+ # similarity: Similarity::Phonetic.new(2)
136
+ # books.define_category :author,
137
+ # partial: Partial::Substring.new(:from => -2)
138
+ # books.define_category :isbn
139
+ #
140
+ # query_options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
141
+ #
142
+ # full_books_query = Query::Full.new books, query_options
143
+ # live_books_query = Query::Full.new books, query_options
144
+ #
145
+ # route %r{^/books/full$} => full_books_query
146
+ # route %r{^/books/live$} => live_books_query
147
+ #
148
+ # end
149
+ # That's actually already a full-blown Picky App!
2
150
  #
3
151
  class Application
4
152
 
@@ -21,10 +169,20 @@ class Application
21
169
  Tokenizers::Query.default = Tokenizers::Query.new(options)
22
170
  end
23
171
 
24
- # Returns a new index frontend for configuring the index.
172
+ # Create a new index for indexing and for querying.
173
+ #
174
+ # Parameters:
175
+ # * name: The identifier of the index. Used:
176
+ # - to identify an index (e.g. by you in Rake tasks).
177
+ # - in the frontend to describe which index a result came from.
178
+ # - index directory naming (index/development/the_identifier/<lots of indexes>)
179
+ # * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
25
180
  #
26
- def index *args
27
- IndexAPI.new *args
181
+ # Options:
182
+ # * result_type: # TODO Rename.
183
+ #
184
+ def index name, source, options = {}
185
+ IndexAPI.new name, source, options
28
186
  end
29
187
 
30
188
  # Routes.
@@ -35,39 +193,41 @@ class Application
35
193
  # API
36
194
 
37
195
 
38
- # An application simply delegates to the routing to handle a request.
196
+ # A Picky application implements the Rack interface.
197
+ #
198
+ # Delegates to its routing to handle a request.
39
199
  #
40
200
  def call env
41
201
  routing.call env
42
202
  end
43
- def routing
203
+ def routing # :nodoc:
44
204
  @routing ||= Routing.new
45
205
  end
46
206
 
47
207
  # Finalize the subclass as soon as it
48
208
  # has finished loading.
49
209
  #
50
- attr_reader :apps
51
- def initialize_apps
210
+ attr_reader :apps # :nodoc:
211
+ def initialize_apps # :nodoc:
52
212
  @apps ||= []
53
213
  end
54
- def inherited app
214
+ def inherited app # :nodoc:
55
215
  initialize_apps
56
216
  apps << app
57
217
  end
58
- def finalize_apps
218
+ def finalize_apps # :nodoc:
59
219
  initialize_apps
60
220
  apps.each &:finalize
61
221
  end
62
222
  # Finalizes the routes.
63
223
  #
64
- def finalize
224
+ def finalize # :nodoc:
65
225
  routing.freeze
66
226
  end
67
227
 
68
228
  # TODO Add more info if possible.
69
229
  #
70
- def to_s
230
+ def to_s # :nodoc:
71
231
  "#{self.name}:\n#{routing}"
72
232
  end
73
233
 
@@ -1,4 +1,4 @@
1
- module Cacher
1
+ module Cacher # :nodoc:all
2
2
 
3
3
  # A cache generator holds an index.
4
4
  #
@@ -1,4 +1,4 @@
1
- module Calculations
1
+ module Calculations # :nodoc:all
2
2
 
3
3
  # A location calculation recalculates a 1-d location
4
4
  # to the Picky internal 1-d "grid".
@@ -18,7 +18,15 @@ module Calculations
18
18
  end
19
19
 
20
20
  def minimum= minimum
21
+ # Add a margin of 1 user grid.
22
+ #
21
23
  minimum -= @user_grid
24
+
25
+ # Add plus 1 grid so that the index key never falls on 0.
26
+ # Why? to_i maps by default to 0.
27
+ #
28
+ minimum -= @grid
29
+
22
30
  @minimum = minimum
23
31
  end
24
32
 
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module CharacterSubstituters
3
+ module CharacterSubstituters # :nodoc:all
4
4
  # Substitutes Umlauts like
5
5
  # ä, ö, ü => ae, oe, ue.
6
6
  # (and more, see specs)
@@ -1,4 +1,4 @@
1
- module Configuration
1
+ module Configuration # :nodoc:all
2
2
 
3
3
  # Holds the configuration for a
4
4
  # index/category combination.
data/lib/picky/cores.rb CHANGED
@@ -2,7 +2,7 @@ Infinity = 1.0/0
2
2
 
3
3
  # Handles processing over multiple cores.
4
4
  #
5
- class Cores
5
+ class Cores # :nodoc:all
6
6
 
7
7
  # Pass it an ary or generator.
8
8
  #
@@ -1,6 +1,6 @@
1
1
  # The Array class we all know and love.
2
2
  #
3
- class Array
3
+ class Array # :nodoc:all
4
4
 
5
5
  # Cluster-uniqs equal neighborly elements.
6
6
  #
@@ -1,6 +1,6 @@
1
1
  # Extensions for the Hash.
2
2
  #
3
- class Hash
3
+ class Hash # :nodoc:all
4
4
 
5
5
  # Dumps jsonized self to the path given. Minus extension.
6
6
  #
@@ -1,6 +1,6 @@
1
1
  # The original Module class.
2
2
  #
3
- class Module
3
+ class Module # :nodoc:all
4
4
 
5
5
  def each_delegate *methods
6
6
  options = methods.pop
@@ -1,4 +1,4 @@
1
- class Object
1
+ class Object # :nodoc:all
2
2
 
3
3
  # Puts a text in the form:
4
4
  # 12:34:56: text here
@@ -1,6 +1,6 @@
1
1
  # Extending the Symbol class.
2
2
  #
3
- class Symbol
3
+ class Symbol # :nodoc:all
4
4
 
5
5
  # :keys.each_subtoken # => yields each of [:keys, :key, :ke, :k]
6
6
  # :keys.each_subtoken(2) # => yields each of [:keys, :key, :ke]
@@ -8,7 +8,7 @@ module Picky
8
8
  # picky <command> <options>
9
9
  # is found.
10
10
  #
11
- class NoGeneratorError < StandardError
11
+ class NoGeneratorError < StandardError # :nodoc:all
12
12
 
13
13
  def initialize generator
14
14
  super usage + possible_commands(generator.types)
@@ -38,7 +38,7 @@ module Picky
38
38
  #
39
39
  # Basically copies a prototype project into a newly generated directory.
40
40
  #
41
- class Generator
41
+ class Generator # :nodoc:all
42
42
 
43
43
  attr_reader :types
44
44
 
@@ -1,6 +1,7 @@
1
+ # TODO Not used anymore? Remove.
1
2
  #
2
- #
3
- module Helpers
3
+ module Helpers # :nodoc:all
4
+
4
5
  module Cache
5
6
  # This is a simple cache.
6
7
  # The store needs to be able to answer to [] and []=.
@@ -10,14 +11,15 @@ module Helpers
10
11
  #
11
12
  results = store[key]
12
13
  return results if results
13
-
14
+
14
15
  results = lambda(&block).call
15
-
16
+
16
17
  # Store results
17
18
  #
18
19
  store[key] = results
19
-
20
+
20
21
  results
21
22
  end
22
23
  end
24
+
23
25
  end
@@ -1,3 +1,5 @@
1
+ # TODO Not used anymore? Remove.
2
+ #
1
3
  module Helpers
2
4
  module GC
3
5
  def gc_disabled &block
@@ -1,5 +1,7 @@
1
1
  # Helper methods for measuring, benchmarking, logging.
2
2
  #
3
+ # TODO Not used anymore? Remove.
4
+ #
3
5
  module Helpers
4
6
  module Measuring
5
7
 
@@ -1,4 +1,4 @@
1
- module Index
1
+ module Index # :nodoc:all
2
2
  # A Bundle is a number of indexes
3
3
  # per [index, category] combination.
4
4
  #
@@ -1,14 +1,14 @@
1
- # This class defines the indexing and index API.
1
+ # This class defines the indexing and index API that is exposed to the user.
2
+ # It provides a single front for both indexing and index options.
2
3
  #
3
4
  # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
4
5
  #
5
- class IndexAPI
6
-
7
- # TODO Delegation.
8
- #
6
+ class IndexAPI # :nodoc:all
9
7
 
10
8
  attr_reader :name, :indexing, :indexed
11
9
 
10
+ # TODO Doc.
11
+ #
12
12
  def initialize name, source, options = {}
13
13
  @name = name
14
14
  @indexing = Indexing::Index.new name, source, options
@@ -19,15 +19,13 @@ class IndexAPI
19
19
  Indexes.register self
20
20
  end
21
21
 
22
- # API.
23
- #
24
- # TODO Spec! Doc!
22
+ # TODO Doc.
25
23
  #
26
24
  def define_category category_name, options = {}
27
25
  category_name = category_name.to_sym
28
26
 
29
- indexing_category = indexing.add_category category_name, options
30
- indexed_category = indexed.add_category category_name, options
27
+ indexing_category = indexing.define_category category_name, options
28
+ indexed_category = indexed.define_category category_name, options
31
29
 
32
30
  yield indexing_category, indexed_category if block_given?
33
31
 
@@ -35,22 +33,42 @@ class IndexAPI
35
33
  end
36
34
  alias category define_category
37
35
 
38
- # TODO Rewrite wrap_exact, wrap_source ?
36
+ #
39
37
  #
40
38
  def define_location name, options = {}
41
- grid = options[:grid] || raise("Grid size needs to be given to a location")
39
+ grid = options[:radius] || raise("Option :radius needs to be set on define_location, it defines the search radius.")
42
40
  precision = options[:precision]
43
41
 
42
+ options = { partial: Partial::None.new }.merge options
43
+
44
44
  define_category name, options do |indexing, indexed|
45
45
  indexing.source = Sources::Wrappers::Location.new indexing, grid: grid, precision: precision
46
46
  indexing.tokenizer = Tokenizers::Index.new
47
- # indexing.partial = Partial::None.new
48
47
 
49
- exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid
48
+ exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid, precision: precision
50
49
  indexed.exact = exact_bundle
51
- indexed.partial = exact_bundle
50
+ indexed.partial = exact_bundle # A partial token also uses the exact index.
52
51
  end
53
52
  end
54
53
  alias location define_location
55
54
 
55
+ # Options
56
+ # * radius (in km).
57
+ #
58
+ def define_map_location name, options = {}
59
+ radius = options[:radius] || raise("Option :radius needs to be set on define_map_location, it defines the search radius.")
60
+
61
+ # The radius is given as if all the locations were on the equator.
62
+ #
63
+ # TODO Need to recalculate since not many locations are on the equator ;) This is just a prototype.
64
+ #
65
+ # This calculates km -> longitude (degrees).
66
+ #
67
+ # A degree on the equator is equal to ~111,319.9 meters.
68
+ # So a km on the equator is equal to 0.00898312 degrees.
69
+ #
70
+ options[:radius] = radius * 0.00898312
71
+
72
+ define_location name, options
73
+ end
56
74
  end
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Indexed
3
+ module Indexed # :nodoc:all
4
4
 
5
5
  # This is the _actual_ index.
6
6
  #
@@ -20,7 +20,7 @@ module Indexed
20
20
 
21
21
  # TODO Spec. Doc.
22
22
  #
23
- def add_category category_name, options = {}
23
+ def define_category category_name, options = {}
24
24
  new_category = Category.new category_name, self, options
25
25
  categories << new_category
26
26
  new_category
@@ -28,7 +28,7 @@ module Indexed
28
28
  # Load first the bundle, then extract the config.
29
29
  #
30
30
  bundle.load
31
- minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing.")
31
+ minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing. Did you run rake index already?")
32
32
  @calculation.minimum = minimum
33
33
  end
34
34
 
@@ -1,4 +1,4 @@
1
- module Indexers
1
+ module Indexers # :nodoc:all
2
2
 
3
3
  # Raised if no source is available on a category.
4
4
  #
@@ -1,6 +1,6 @@
1
1
  # Comfortable API convenience class, splits methods to indexes.
2
2
  #
3
- class IndexesAPI
3
+ class IndexesAPI # :nodoc:all
4
4
 
5
5
  attr_reader :indexes, :index_mapping
6
6
 
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Indexing
3
+ module Indexing # :nodoc:all
4
4
 
5
5
  # This is the indexing bundle.
6
6
  # It does all menial tasks that have nothing to do
@@ -30,7 +30,7 @@ module Indexing
30
30
 
31
31
  # TODO Spec. Doc.
32
32
  #
33
- def add_category category_name, options = {}
33
+ def define_category category_name, options = {}
34
34
  options = default_category_options.merge options
35
35
 
36
36
  new_category = Category.new category_name, self, options
data/lib/picky/loader.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # Loads the search engine and itself.
2
2
  #
3
- module Loader
3
+ module Loader # :nodoc:all
4
4
 
5
5
  # Reloads the whole app.
6
6
  # First itself, then the app.
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Loggers
3
+ module Loggers # :nodoc:all
4
4
  # Log Proxy
5
5
  #
6
6
  class Search
@@ -0,0 +1,3 @@
1
+ module Performant # :nodoc:all
2
+ # C Code here.
3
+ end
@@ -2,7 +2,7 @@ module Query
2
2
  # An allocation has a number of combinations:
3
3
  # [token, index] [other_token, other_index], ...
4
4
  #
5
- class Allocation
5
+ class Allocation # :nodoc:all
6
6
 
7
7
  attr_reader :count, :ids, :score, :combinations, :result_type
8
8
 
@@ -1,7 +1,7 @@
1
1
  module Query
2
2
  # Container class for allocations.
3
3
  #
4
- class Allocations
4
+ class Allocations # :nodoc:all
5
5
 
6
6
  # TODO Remove size
7
7
  #