picky 0.12.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/lib/deployment.rb +2 -2
  2. data/lib/picky/application.rb +172 -12
  3. data/lib/picky/cacher/generator.rb +1 -1
  4. data/lib/picky/calculations/location.rb +9 -1
  5. data/lib/picky/character_substituters/west_european.rb +1 -1
  6. data/lib/picky/configuration/index.rb +1 -1
  7. data/lib/picky/cores.rb +1 -1
  8. data/lib/picky/extensions/array.rb +1 -1
  9. data/lib/picky/extensions/hash.rb +1 -1
  10. data/lib/picky/extensions/module.rb +1 -1
  11. data/lib/picky/extensions/object.rb +1 -1
  12. data/lib/picky/extensions/symbol.rb +1 -1
  13. data/lib/picky/generator.rb +2 -2
  14. data/lib/picky/helpers/cache.rb +7 -5
  15. data/lib/picky/helpers/gc.rb +2 -0
  16. data/lib/picky/helpers/measuring.rb +2 -0
  17. data/lib/picky/index/bundle.rb +1 -1
  18. data/lib/picky/index_api.rb +33 -15
  19. data/lib/picky/indexed/bundle.rb +1 -1
  20. data/lib/picky/indexed/index.rb +1 -1
  21. data/lib/picky/indexed/wrappers/bundle/location.rb +1 -1
  22. data/lib/picky/indexers/no_source_specified_error.rb +1 -1
  23. data/lib/picky/indexes_api.rb +1 -1
  24. data/lib/picky/indexing/bundle.rb +1 -1
  25. data/lib/picky/indexing/index.rb +1 -1
  26. data/lib/picky/loader.rb +1 -1
  27. data/lib/picky/loggers/search.rb +1 -1
  28. data/lib/picky/performant.rb +3 -0
  29. data/lib/picky/query/allocation.rb +1 -1
  30. data/lib/picky/query/allocations.rb +1 -1
  31. data/lib/picky/query/base.rb +48 -16
  32. data/lib/picky/query/combination.rb +1 -1
  33. data/lib/picky/query/combinations.rb +1 -1
  34. data/lib/picky/query/full.rb +7 -2
  35. data/lib/picky/query/live.rb +9 -7
  36. data/lib/picky/query/qualifiers.rb +6 -2
  37. data/lib/picky/query/solr.rb +1 -1
  38. data/lib/picky/query/token.rb +2 -1
  39. data/lib/picky/query/tokens.rb +4 -1
  40. data/lib/picky/query/weigher.rb +1 -1
  41. data/lib/picky/query/weights.rb +1 -1
  42. data/lib/picky/rack/harakiri.rb +14 -5
  43. data/lib/picky/results/base.rb +1 -1
  44. data/lib/picky/routing.rb +1 -1
  45. data/lib/picky/solr/schema_generator.rb +2 -1
  46. data/lib/picky/sources/base.rb +39 -25
  47. data/lib/picky/sources/couch.rb +22 -8
  48. data/lib/picky/sources/csv.rb +29 -6
  49. data/lib/picky/sources/db.rb +46 -30
  50. data/lib/picky/sources/delicious.rb +12 -2
  51. data/lib/picky/sources/wrappers/base.rb +3 -1
  52. data/lib/picky/tokenizers/base.rb +1 -1
  53. data/project_prototype/Gemfile +1 -1
  54. data/project_prototype/app/README +0 -1
  55. data/spec/lib/calculations/location_spec.rb +28 -16
  56. data/spec/lib/index_api_spec.rb +64 -0
  57. data/spec/lib/indexed/index_spec.rb +2 -2
  58. data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
  59. data/spec/lib/indexing/index_spec.rb +2 -2
  60. data/spec/lib/rack/harakiri_spec.rb +22 -10
  61. metadata +7 -4
data/lib/deployment.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require File.expand_path '../constants', __FILE__
2
2
 
3
- module Picky
4
- module Capistrano
3
+ module Picky # :nodoc:
4
+ module Capistrano # :nodoc:all
5
5
 
6
6
  # Include all
7
7
  #
@@ -1,4 +1,152 @@
1
- # The Picky application wherein the indexing and querying is defined.
1
+ # = Picky Applications
2
+ #
3
+ # A Picky Application is where you configure the whole search engine.
4
+ #
5
+ # This is a step-by-step description on how to configure your Picky app.
6
+ #
7
+ # Start by subclassing Application:
8
+ # class MyGreatSearch < Application
9
+ # # Your configuration goes here.
10
+ # end
11
+ # The generator
12
+ # $ picky project project_name
13
+ # will generate an example <tt>project_name/app/application.rb</tt> file for you
14
+ # with some example code inside.
15
+ #
16
+ # == index(name, source)
17
+ #
18
+ # Next, define where your data comes from. You use the <tt>index</tt> method for that:
19
+ # my_index = index :some_index_name, some_source
20
+ # You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
21
+ # class MyGreatSearch < Application
22
+ #
23
+ # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
24
+ #
25
+ # end
26
+ # Now we have an index <tt>books</tt>.
27
+ #
28
+ # That on itself won't do much good.
29
+ #
30
+ # == index.define_category(identifier, options = {})
31
+ #
32
+ # Picky needs us to define categories on the data.
33
+ #
34
+ # Categories help your user find data.
35
+ # It's best if you look at an example yourself: http://floere.github.com/picky/examples.html
36
+ #
37
+ # Let's go ahead and define a category:
38
+ # class MyGreatSearch < Application
39
+ #
40
+ # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
41
+ # books.define_category :title
42
+ #
43
+ # end
44
+ # Now we could already run the indexer:
45
+ # $ rake index
46
+ #
47
+ # (You can define similarity or partial search capabilities on a category, see http://github.com/floere/picky/wiki/Categories-configuration for info)
48
+ #
49
+ # So now we have indexed data (the title), but nobody to ask the index anything.
50
+ #
51
+ # == Query::Full.new(*indexes, options = {})
52
+ #
53
+ # We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration). That works like this:
54
+ # full_books_query = Query::Full.new books
55
+ # Full just means that the ids are returned with the results.
56
+ # Picky also offers a Query that returns live results, Query::Live. But that's not important right now.
57
+ #
58
+ # Now we have somebody we can ask about the index. But no external interface.
59
+ #
60
+ # == route(/regexp1/ => query1, /regexp2/ => query2, ...)
61
+ #
62
+ # Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
63
+ # route %r{^/books/full$} => full_books_query
64
+ # In full glory:
65
+ # class MyGreatSearch < Application
66
+ #
67
+ # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
68
+ # books.define_category :title
69
+ #
70
+ # full_books_query = Query::Full.new books
71
+ #
72
+ # route %r{^/books/full$} => full_books_query
73
+ #
74
+ # end
75
+ # That's it!
76
+ #
77
+ # Now run the indexer and server:
78
+ # $ rake index
79
+ # $ rake start
80
+ # Run your first query:
81
+ # $ curl 'localhost:8080/books/full?query=hello server'
82
+ #
83
+ # Nice, right? Your first query!
84
+ #
85
+ # Maybe you don't find everything. We need to process the data before it goes into the index.
86
+ #
87
+ # == default_indexing(options = {})
88
+ #
89
+ # That's what the <tt>default_indexing</tt> method is for:
90
+ # default_indexing options
91
+ # Read more about the options here: http://github.com/floere/picky/wiki/Indexing-configuration
92
+ #
93
+ # Same thing with the search text – we need to process that as well.
94
+ #
95
+ # == default_querying(options = {})
96
+ #
97
+ # Analog to the default_indexing method, we use the <tt>default_querying</tt> method.
98
+ # default_querying options
99
+ # Read more about the options here: http://github.com/floere/picky/wiki/Querying-Configuration
100
+ #
101
+ # And that's all there is. It's incredibly powerful though, as you can combine, weigh, refine to the max.
102
+ #
103
+ # == Wiki
104
+ #
105
+ # Read more in the Wiki: http://github.com/floere/picky/wiki
106
+ #
107
+ # Have fun!
108
+ #
109
+ # == Full example
110
+ #
111
+ # Our example, fully fleshed out with indexing, querying, and weights:
112
+ # class MyGreatSearch < Application
113
+ #
114
+ # default_indexing removes_characters: /[^a-zA-Z0-9\.]/,
115
+ # stopwords: /\b(and|or|in|on|is|has)\b/,
116
+ # splits_text_on: /\s/,
117
+ # removes_characters_after_splitting: /\./,
118
+ # substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
119
+ # normalizes_words: [
120
+ # [/(.*)hausen/, 'hn'],
121
+ # [/\b(\w*)str(eet)?/, 'st']
122
+ # ]
123
+ #
124
+ # default_querying removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/,
125
+ # stopwords: /\b(and|the|of|it|in|for)\b/,
126
+ # splits_text_on: /[\s\/\-\,\&]+/,
127
+ # removes_characters_after_splitting: /\./,
128
+ # substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
129
+ # maximum_tokens: 4
130
+ #
131
+ # books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
132
+ # books.define_category :title,
133
+ # qualifiers: [:t, :title, :titre],
134
+ # partial: Partial::Substring.new(:from => 1),
135
+ # similarity: Similarity::Phonetic.new(2)
136
+ # books.define_category :author,
137
+ # partial: Partial::Substring.new(:from => -2)
138
+ # books.define_category :isbn
139
+ #
140
+ # query_options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
141
+ #
142
+ # full_books_query = Query::Full.new books, query_options
143
+ # live_books_query = Query::Full.new books, query_options
144
+ #
145
+ # route %r{^/books/full$} => full_books_query
146
+ # route %r{^/books/live$} => live_books_query
147
+ #
148
+ # end
149
+ # That's actually already a full-blown Picky App!
2
150
  #
3
151
  class Application
4
152
 
@@ -21,10 +169,20 @@ class Application
21
169
  Tokenizers::Query.default = Tokenizers::Query.new(options)
22
170
  end
23
171
 
24
- # Returns a new index frontend for configuring the index.
172
+ # Create a new index for indexing and for querying.
173
+ #
174
+ # Parameters:
175
+ # * name: The identifier of the index. Used:
176
+ # - to identify an index (e.g. by you in Rake tasks).
177
+ # - in the frontend to describe which index a result came from.
178
+ # - index directory naming (index/development/the_identifier/<lots of indexes>)
179
+ # * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
25
180
  #
26
- def index *args
27
- IndexAPI.new *args
181
+ # Options:
182
+ # * result_type: # TODO Rename.
183
+ #
184
+ def index name, source, options = {}
185
+ IndexAPI.new name, source, options
28
186
  end
29
187
 
30
188
  # Routes.
@@ -35,39 +193,41 @@ class Application
35
193
  # API
36
194
 
37
195
 
38
- # An application simply delegates to the routing to handle a request.
196
+ # A Picky application implements the Rack interface.
197
+ #
198
+ # Delegates to its routing to handle a request.
39
199
  #
40
200
  def call env
41
201
  routing.call env
42
202
  end
43
- def routing
203
+ def routing # :nodoc:
44
204
  @routing ||= Routing.new
45
205
  end
46
206
 
47
207
  # Finalize the subclass as soon as it
48
208
  # has finished loading.
49
209
  #
50
- attr_reader :apps
51
- def initialize_apps
210
+ attr_reader :apps # :nodoc:
211
+ def initialize_apps # :nodoc:
52
212
  @apps ||= []
53
213
  end
54
- def inherited app
214
+ def inherited app # :nodoc:
55
215
  initialize_apps
56
216
  apps << app
57
217
  end
58
- def finalize_apps
218
+ def finalize_apps # :nodoc:
59
219
  initialize_apps
60
220
  apps.each &:finalize
61
221
  end
62
222
  # Finalizes the routes.
63
223
  #
64
- def finalize
224
+ def finalize # :nodoc:
65
225
  routing.freeze
66
226
  end
67
227
 
68
228
  # TODO Add more info if possible.
69
229
  #
70
- def to_s
230
+ def to_s # :nodoc:
71
231
  "#{self.name}:\n#{routing}"
72
232
  end
73
233
 
@@ -1,4 +1,4 @@
1
- module Cacher
1
+ module Cacher # :nodoc:all
2
2
 
3
3
  # A cache generator holds an index.
4
4
  #
@@ -1,4 +1,4 @@
1
- module Calculations
1
+ module Calculations # :nodoc:all
2
2
 
3
3
  # A location calculation recalculates a 1-d location
4
4
  # to the Picky internal 1-d "grid".
@@ -18,7 +18,15 @@ module Calculations
18
18
  end
19
19
 
20
20
  def minimum= minimum
21
+ # Add a margin of 1 user grid.
22
+ #
21
23
  minimum -= @user_grid
24
+
25
+ # Add plus 1 grid so that the index key never falls on 0.
26
+ # Why? to_i maps by default to 0.
27
+ #
28
+ minimum -= @grid
29
+
22
30
  @minimum = minimum
23
31
  end
24
32
 
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module CharacterSubstituters
3
+ module CharacterSubstituters # :nodoc:all
4
4
  # Substitutes Umlauts like
5
5
  # ä, ö, ü => ae, oe, ue.
6
6
  # (and more, see specs)
@@ -1,4 +1,4 @@
1
- module Configuration
1
+ module Configuration # :nodoc:all
2
2
 
3
3
  # Holds the configuration for a
4
4
  # index/category combination.
data/lib/picky/cores.rb CHANGED
@@ -2,7 +2,7 @@ Infinity = 1.0/0
2
2
 
3
3
  # Handles processing over multiple cores.
4
4
  #
5
- class Cores
5
+ class Cores # :nodoc:all
6
6
 
7
7
  # Pass it an ary or generator.
8
8
  #
@@ -1,6 +1,6 @@
1
1
  # The Array class we all know and love.
2
2
  #
3
- class Array
3
+ class Array # :nodoc:all
4
4
 
5
5
  # Cluster-uniqs equal neighborly elements.
6
6
  #
@@ -1,6 +1,6 @@
1
1
  # Extensions for the Hash.
2
2
  #
3
- class Hash
3
+ class Hash # :nodoc:all
4
4
 
5
5
  # Dumps jsonized self to the path given. Minus extension.
6
6
  #
@@ -1,6 +1,6 @@
1
1
  # The original Module class.
2
2
  #
3
- class Module
3
+ class Module # :nodoc:all
4
4
 
5
5
  def each_delegate *methods
6
6
  options = methods.pop
@@ -1,4 +1,4 @@
1
- class Object
1
+ class Object # :nodoc:all
2
2
 
3
3
  # Puts a text in the form:
4
4
  # 12:34:56: text here
@@ -1,6 +1,6 @@
1
1
  # Extending the Symbol class.
2
2
  #
3
- class Symbol
3
+ class Symbol # :nodoc:all
4
4
 
5
5
  # :keys.each_subtoken # => yields each of [:keys, :key, :ke, :k]
6
6
  # :keys.each_subtoken(2) # => yields each of [:keys, :key, :ke]
@@ -8,7 +8,7 @@ module Picky
8
8
  # picky <command> <options>
9
9
  # is found.
10
10
  #
11
- class NoGeneratorError < StandardError
11
+ class NoGeneratorError < StandardError # :nodoc:all
12
12
 
13
13
  def initialize generator
14
14
  super usage + possible_commands(generator.types)
@@ -38,7 +38,7 @@ module Picky
38
38
  #
39
39
  # Basically copies a prototype project into a newly generated directory.
40
40
  #
41
- class Generator
41
+ class Generator # :nodoc:all
42
42
 
43
43
  attr_reader :types
44
44
 
@@ -1,6 +1,7 @@
1
+ # TODO Not used anymore? Remove.
1
2
  #
2
- #
3
- module Helpers
3
+ module Helpers # :nodoc:all
4
+
4
5
  module Cache
5
6
  # This is a simple cache.
6
7
  # The store needs to be able to answer to [] and []=.
@@ -10,14 +11,15 @@ module Helpers
10
11
  #
11
12
  results = store[key]
12
13
  return results if results
13
-
14
+
14
15
  results = lambda(&block).call
15
-
16
+
16
17
  # Store results
17
18
  #
18
19
  store[key] = results
19
-
20
+
20
21
  results
21
22
  end
22
23
  end
24
+
23
25
  end
@@ -1,3 +1,5 @@
1
+ # TODO Not used anymore? Remove.
2
+ #
1
3
  module Helpers
2
4
  module GC
3
5
  def gc_disabled &block
@@ -1,5 +1,7 @@
1
1
  # Helper methods for measuring, benchmarking, logging.
2
2
  #
3
+ # TODO Not used anymore? Remove.
4
+ #
3
5
  module Helpers
4
6
  module Measuring
5
7
 
@@ -1,4 +1,4 @@
1
- module Index
1
+ module Index # :nodoc:all
2
2
  # A Bundle is a number of indexes
3
3
  # per [index, category] combination.
4
4
  #
@@ -1,14 +1,14 @@
1
- # This class defines the indexing and index API.
1
+ # This class defines the indexing and index API that is exposed to the user.
2
+ # It provides a single front for both indexing and index options.
2
3
  #
3
4
  # Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
4
5
  #
5
- class IndexAPI
6
-
7
- # TODO Delegation.
8
- #
6
+ class IndexAPI # :nodoc:all
9
7
 
10
8
  attr_reader :name, :indexing, :indexed
11
9
 
10
+ # TODO Doc.
11
+ #
12
12
  def initialize name, source, options = {}
13
13
  @name = name
14
14
  @indexing = Indexing::Index.new name, source, options
@@ -19,15 +19,13 @@ class IndexAPI
19
19
  Indexes.register self
20
20
  end
21
21
 
22
- # API.
23
- #
24
- # TODO Spec! Doc!
22
+ # TODO Doc.
25
23
  #
26
24
  def define_category category_name, options = {}
27
25
  category_name = category_name.to_sym
28
26
 
29
- indexing_category = indexing.add_category category_name, options
30
- indexed_category = indexed.add_category category_name, options
27
+ indexing_category = indexing.define_category category_name, options
28
+ indexed_category = indexed.define_category category_name, options
31
29
 
32
30
  yield indexing_category, indexed_category if block_given?
33
31
 
@@ -35,22 +33,42 @@ class IndexAPI
35
33
  end
36
34
  alias category define_category
37
35
 
38
- # TODO Rewrite wrap_exact, wrap_source ?
36
+ #
39
37
  #
40
38
  def define_location name, options = {}
41
- grid = options[:grid] || raise("Grid size needs to be given to a location")
39
+ grid = options[:radius] || raise("Option :radius needs to be set on define_location, it defines the search radius.")
42
40
  precision = options[:precision]
43
41
 
42
+ options = { partial: Partial::None.new }.merge options
43
+
44
44
  define_category name, options do |indexing, indexed|
45
45
  indexing.source = Sources::Wrappers::Location.new indexing, grid: grid, precision: precision
46
46
  indexing.tokenizer = Tokenizers::Index.new
47
- # indexing.partial = Partial::None.new
48
47
 
49
- exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid
48
+ exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid, precision: precision
50
49
  indexed.exact = exact_bundle
51
- indexed.partial = exact_bundle
50
+ indexed.partial = exact_bundle # A partial token also uses the exact index.
52
51
  end
53
52
  end
54
53
  alias location define_location
55
54
 
55
+ # Options
56
+ # * radius (in km).
57
+ #
58
+ def define_map_location name, options = {}
59
+ radius = options[:radius] || raise("Option :radius needs to be set on define_map_location, it defines the search radius.")
60
+
61
+ # The radius is given as if all the locations were on the equator.
62
+ #
63
+ # TODO Need to recalculate since not many locations are on the equator ;) This is just a prototype.
64
+ #
65
+ # This calculates km -> longitude (degrees).
66
+ #
67
+ # A degree on the equator is equal to ~111,319.9 meters.
68
+ # So a km on the equator is equal to 0.00898312 degrees.
69
+ #
70
+ options[:radius] = radius * 0.00898312
71
+
72
+ define_location name, options
73
+ end
56
74
  end
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Indexed
3
+ module Indexed # :nodoc:all
4
4
 
5
5
  # This is the _actual_ index.
6
6
  #
@@ -20,7 +20,7 @@ module Indexed
20
20
 
21
21
  # TODO Spec. Doc.
22
22
  #
23
- def add_category category_name, options = {}
23
+ def define_category category_name, options = {}
24
24
  new_category = Category.new category_name, self, options
25
25
  categories << new_category
26
26
  new_category
@@ -28,7 +28,7 @@ module Indexed
28
28
  # Load first the bundle, then extract the config.
29
29
  #
30
30
  bundle.load
31
- minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing.")
31
+ minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing. Did you run rake index already?")
32
32
  @calculation.minimum = minimum
33
33
  end
34
34
 
@@ -1,4 +1,4 @@
1
- module Indexers
1
+ module Indexers # :nodoc:all
2
2
 
3
3
  # Raised if no source is available on a category.
4
4
  #
@@ -1,6 +1,6 @@
1
1
  # Comfortable API convenience class, splits methods to indexes.
2
2
  #
3
- class IndexesAPI
3
+ class IndexesAPI # :nodoc:all
4
4
 
5
5
  attr_reader :indexes, :index_mapping
6
6
 
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Indexing
3
+ module Indexing # :nodoc:all
4
4
 
5
5
  # This is the indexing bundle.
6
6
  # It does all menial tasks that have nothing to do
@@ -30,7 +30,7 @@ module Indexing
30
30
 
31
31
  # TODO Spec. Doc.
32
32
  #
33
- def add_category category_name, options = {}
33
+ def define_category category_name, options = {}
34
34
  options = default_category_options.merge options
35
35
 
36
36
  new_category = Category.new category_name, self, options
data/lib/picky/loader.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  # Loads the search engine and itself.
2
2
  #
3
- module Loader
3
+ module Loader # :nodoc:all
4
4
 
5
5
  # Reloads the whole app.
6
6
  # First itself, then the app.
@@ -1,6 +1,6 @@
1
1
  # encoding: utf-8
2
2
  #
3
- module Loggers
3
+ module Loggers # :nodoc:all
4
4
  # Log Proxy
5
5
  #
6
6
  class Search
@@ -0,0 +1,3 @@
1
+ module Performant # :nodoc:all
2
+ # C Code here.
3
+ end
@@ -2,7 +2,7 @@ module Query
2
2
  # An allocation has a number of combinations:
3
3
  # [token, index] [other_token, other_index], ...
4
4
  #
5
- class Allocation
5
+ class Allocation # :nodoc:all
6
6
 
7
7
  attr_reader :count, :ids, :score, :combinations, :result_type
8
8
 
@@ -1,7 +1,7 @@
1
1
  module Query
2
2
  # Container class for allocations.
3
3
  #
4
- class Allocations
4
+ class Allocations # :nodoc:all
5
5
 
6
6
  # TODO Remove size
7
7
  #