picky 0.12.3 → 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (61) hide show
  1. data/lib/deployment.rb +2 -2
  2. data/lib/picky/application.rb +172 -12
  3. data/lib/picky/cacher/generator.rb +1 -1
  4. data/lib/picky/calculations/location.rb +9 -1
  5. data/lib/picky/character_substituters/west_european.rb +1 -1
  6. data/lib/picky/configuration/index.rb +1 -1
  7. data/lib/picky/cores.rb +1 -1
  8. data/lib/picky/extensions/array.rb +1 -1
  9. data/lib/picky/extensions/hash.rb +1 -1
  10. data/lib/picky/extensions/module.rb +1 -1
  11. data/lib/picky/extensions/object.rb +1 -1
  12. data/lib/picky/extensions/symbol.rb +1 -1
  13. data/lib/picky/generator.rb +2 -2
  14. data/lib/picky/helpers/cache.rb +7 -5
  15. data/lib/picky/helpers/gc.rb +2 -0
  16. data/lib/picky/helpers/measuring.rb +2 -0
  17. data/lib/picky/index/bundle.rb +1 -1
  18. data/lib/picky/index_api.rb +33 -15
  19. data/lib/picky/indexed/bundle.rb +1 -1
  20. data/lib/picky/indexed/index.rb +1 -1
  21. data/lib/picky/indexed/wrappers/bundle/location.rb +1 -1
  22. data/lib/picky/indexers/no_source_specified_error.rb +1 -1
  23. data/lib/picky/indexes_api.rb +1 -1
  24. data/lib/picky/indexing/bundle.rb +1 -1
  25. data/lib/picky/indexing/index.rb +1 -1
  26. data/lib/picky/loader.rb +1 -1
  27. data/lib/picky/loggers/search.rb +1 -1
  28. data/lib/picky/performant.rb +3 -0
  29. data/lib/picky/query/allocation.rb +1 -1
  30. data/lib/picky/query/allocations.rb +1 -1
  31. data/lib/picky/query/base.rb +48 -16
  32. data/lib/picky/query/combination.rb +1 -1
  33. data/lib/picky/query/combinations.rb +1 -1
  34. data/lib/picky/query/full.rb +7 -2
  35. data/lib/picky/query/live.rb +9 -7
  36. data/lib/picky/query/qualifiers.rb +6 -2
  37. data/lib/picky/query/solr.rb +1 -1
  38. data/lib/picky/query/token.rb +2 -1
  39. data/lib/picky/query/tokens.rb +4 -1
  40. data/lib/picky/query/weigher.rb +1 -1
  41. data/lib/picky/query/weights.rb +1 -1
  42. data/lib/picky/rack/harakiri.rb +14 -5
  43. data/lib/picky/results/base.rb +1 -1
  44. data/lib/picky/routing.rb +1 -1
  45. data/lib/picky/solr/schema_generator.rb +2 -1
  46. data/lib/picky/sources/base.rb +39 -25
  47. data/lib/picky/sources/couch.rb +22 -8
  48. data/lib/picky/sources/csv.rb +29 -6
  49. data/lib/picky/sources/db.rb +46 -30
  50. data/lib/picky/sources/delicious.rb +12 -2
  51. data/lib/picky/sources/wrappers/base.rb +3 -1
  52. data/lib/picky/tokenizers/base.rb +1 -1
  53. data/project_prototype/Gemfile +1 -1
  54. data/project_prototype/app/README +0 -1
  55. data/spec/lib/calculations/location_spec.rb +28 -16
  56. data/spec/lib/index_api_spec.rb +64 -0
  57. data/spec/lib/indexed/index_spec.rb +2 -2
  58. data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
  59. data/spec/lib/indexing/index_spec.rb +2 -2
  60. data/spec/lib/rack/harakiri_spec.rb +22 -10
  61. metadata +7 -4
@@ -1,7 +1,23 @@
1
+ # = Picky Queries
2
+ #
3
+ # A Picky Query is an object which:
4
+ # * holds one or more indexes
5
+ # * offers an interface to query these indexes.
6
+ #
7
+ # You connect URL paths to indexes via a Query.
8
+ #
9
+ # We recommend not to use this directly, but connect it to an URL and query through one of these
10
+ # (Protip: Use "curl 'localhost:8080/query/path?query=exampletext')" in a Terminal.
11
+ #
12
+ # There are two flavors of queries:
13
+ # * Query::Full (Full results with all infos)
14
+ # * Query::Live (Same as the Full results without result ids. Useful for query result counters.)
15
+ #
1
16
  module Query
2
- # Base query class.
17
+
18
+ # The base query class.
3
19
  #
4
- # Initialized with the index types it should search on.
20
+ # Not directly instantiated. However, its methods are used by its subclasses, Full and Live.
5
21
  #
6
22
  class Base
7
23
 
@@ -11,11 +27,11 @@ module Query
11
27
  attr_accessor :reduce_to_amount, :weights
12
28
 
13
29
  # Takes:
14
- # * A number of indexes
15
- # * Options hash (optional) with:
16
- # * weigher: A weigher. Query::Weigher by default.
17
- # * tokenizer: Tokenizers::Query.default by default.
18
- # * weights: A hash of weights, or a Query::Weights object.
30
+ # * A number of indexes
31
+ # * Options hash (optional) with:
32
+ # * weigher: A weigher. Query::Weigher by default.
33
+ # * tokenizer: Tokenizers::Query.default by default.
34
+ # * weights: A hash of weights, or a Query::Weights object.
19
35
  #
20
36
  def initialize *index_type_definitions
21
37
  options = Hash === index_type_definitions.last ? index_type_definitions.pop : {}
@@ -27,13 +43,21 @@ module Query
27
43
  @weights = Hash === weights ? Weights.new(weights) : weights
28
44
  end
29
45
 
30
- # Convenience method.
46
+ # Search through this method.
47
+ #
48
+ # Parameters:
49
+ # * text: The search text.
50
+ # * offset = 0: _optional_ The offset from which position to return the ids. Useful for pagination.
51
+ #
52
+ # Note: The Routing uses this method after unravelling the HTTP request.
31
53
  #
32
54
  def search_with_text text, offset = 0
33
55
  search tokenized(text), offset
34
56
  end
35
57
 
36
- # This runs the actual search.
58
+ # Runs the actual search using Query::Tokens.
59
+ #
60
+ # Note: Internal method, use #search_with_text.
37
61
  #
38
62
  def search tokens, offset = 0
39
63
  results = nil
@@ -46,7 +70,9 @@ module Query
46
70
  results
47
71
  end
48
72
 
49
- # Return nil if no results have been found.
73
+ # Execute a search using Query::Tokens.
74
+ #
75
+ # Note: Internal method, use #search_with_text.
50
76
  #
51
77
  def execute tokens, offset
52
78
  results_from offset, sorted_allocations(tokens)
@@ -54,12 +80,18 @@ module Query
54
80
 
55
81
  # Returns an empty result with default values.
56
82
  #
83
+ # Parameters:
84
+ # * offset = 0: _optional_ The offset to use for the empty results.
85
+ #
57
86
  def empty_results offset = 0
58
87
  result_type.new offset
59
88
  end
60
89
 
61
90
  # Delegates the tokenizing to the query tokenizer.
62
91
  #
92
+ # Parameters:
93
+ # * text: The text to tokenize.
94
+ #
63
95
  def tokenized text
64
96
  @tokenizer.tokenize text
65
97
  end
@@ -72,7 +104,7 @@ module Query
72
104
  #
73
105
  # TODO Rename: allocations
74
106
  #
75
- def sorted_allocations tokens
107
+ def sorted_allocations tokens # :nodoc:
76
108
  # Get the allocations.
77
109
  #
78
110
  # TODO Pass in reduce_to_amount (aka max_allocations)
@@ -105,18 +137,18 @@ module Query
105
137
  #
106
138
  allocations
107
139
  end
108
- def reduce allocations
140
+ def reduce allocations # :nodoc:
109
141
  allocations.reduce_to reduce_to_amount if reduce_to_amount
110
142
  end
111
- def remove_identifiers?
143
+ def remove_identifiers? # :nodoc:
112
144
  identifiers_to_remove.present?
113
145
  end
114
- def remove_from allocations
146
+ def remove_from allocations # :nodoc:
115
147
  allocations.remove(identifiers_to_remove) if remove_identifiers?
116
148
  end
117
149
  # Override. TODO No, redesign.
118
150
  #
119
- def identifiers_to_remove
151
+ def identifiers_to_remove # :nodoc:
120
152
  @identifiers_to_remove ||= []
121
153
  end
122
154
 
@@ -126,7 +158,7 @@ module Query
126
158
  #
127
159
  # TODO Move to results. result_type.from allocations, offset
128
160
  #
129
- def results_from offset = 0, allocations = nil
161
+ def results_from offset = 0, allocations = nil # :nodoc:
130
162
  results = result_type.new offset, allocations
131
163
  results.prepare!
132
164
  results
@@ -7,7 +7,7 @@ module Query
7
7
  #
8
8
  # An allocation consists of a number of combinations.
9
9
  #
10
- class Combination
10
+ class Combination # :nodoc:all
11
11
 
12
12
  attr_reader :token, :bundle, :category_name
13
13
 
@@ -4,7 +4,7 @@ module Query
4
4
  #
5
5
  # They are the core of an allocation.
6
6
  #
7
- class Combinations
7
+ class Combinations # :nodoc:all
8
8
 
9
9
  attr_reader :combinations
10
10
 
@@ -1,10 +1,15 @@
1
1
  module Query
2
2
 
3
- # This is the query class for performing full fledged queries.
3
+ # This Query class performs full queries.
4
+ #
5
+ # It includes in its results:
6
+ # * A count of results.
7
+ # * All possible combinations with its weights.
8
+ # * The top X result ids.
4
9
  #
5
10
  class Full < Base
6
11
 
7
- # Generates full results.
12
+ # Returns Results::Full as its result type.
8
13
  #
9
14
  def result_type
10
15
  Results::Full
@@ -1,17 +1,19 @@
1
1
  module Query
2
2
 
3
- # This is the query class for live queries.
3
+ # This Query class performs live queries.
4
4
  #
5
- # It does:
6
- # * Return a count of results.
5
+ # It is useful for updating counters, or any job where you don't need the result ids.
7
6
  #
8
- # It does NOT:
9
- # * Sort results geographically.
10
- # * Do any postprocessing.
7
+ # It includes in its results:
8
+ # * A count of results.
9
+ # * All possible combinations with its weights.
10
+ #
11
+ # But not:
12
+ # * The top X result ids.
11
13
  #
12
14
  class Live < Base
13
15
 
14
- # Generates results from allocations.
16
+ # Returns Results::Live as its result type.
15
17
  #
16
18
  def result_type
17
19
  Results::Live
@@ -1,9 +1,13 @@
1
1
  # coding: utf-8
2
+ #
3
+
4
+ #
5
+ #
2
6
  module Query
3
7
 
4
8
  # A single qualifier.
5
9
  #
6
- class Qualifier
10
+ class Qualifier # :nodoc:all
7
11
 
8
12
  attr_reader :normalized_qualifier, :codes
9
13
 
@@ -28,7 +32,7 @@ module Query
28
32
 
29
33
  # Collection class for qualifiers.
30
34
  #
31
- class Qualifiers
35
+ class Qualifiers # :nodoc:all
32
36
 
33
37
  include Singleton
34
38
 
@@ -4,7 +4,7 @@ module Query
4
4
 
5
5
  #
6
6
  #
7
- class Solr < Base
7
+ class Solr < Base # :nodoc:all
8
8
 
9
9
  attr_reader :server, :index_types
10
10
 
@@ -1,4 +1,5 @@
1
1
  module Query
2
+
2
3
  # This is a query token. Together with other tokens it makes up a query.
3
4
  #
4
5
  # It remembers the original form, and and a normalized form.
@@ -7,7 +8,7 @@ module Query
7
8
  #
8
9
  # TODO Make partial / similarity char configurable.
9
10
  #
10
- class Token
11
+ class Token # :nodoc:all
11
12
 
12
13
  attr_reader :text, :original
13
14
  attr_writer :similar
@@ -1,10 +1,13 @@
1
1
  # encoding: utf-8
2
2
  #
3
+
4
+ #
5
+ #
3
6
  module Query
4
7
 
5
8
  # This class primarily handles switching through similar token constellations.
6
9
  #
7
- class Tokens
10
+ class Tokens # :nodoc:all
8
11
 
9
12
  # Basically delegates to its internal tokens array.
10
13
  #
@@ -2,7 +2,7 @@ module Query
2
2
 
3
3
  # Weighs the given tokens, generates Allocations -> Allocation -> Combinations.
4
4
  #
5
- class Weigher
5
+ class Weigher # :nodoc:all
6
6
 
7
7
  attr_reader :indexes
8
8
 
@@ -2,7 +2,7 @@ module Query
2
2
 
3
3
  # Calculates weights for certain combinations.
4
4
  #
5
- class Weights
5
+ class Weights # :nodoc:all
6
6
 
7
7
  #
8
8
  #
@@ -1,4 +1,4 @@
1
- module Rack
1
+ module Rack # :nodoc:
2
2
 
3
3
  # Simple Rack Middleware to kill Unicorns after X requests.
4
4
  #
@@ -16,7 +16,6 @@ module Rack
16
16
  # Set the amount of requests before the Unicorn commits Harakiri.
17
17
  #
18
18
  cattr_accessor :after
19
- attr_reader :quit_after_requests
20
19
 
21
20
  def initialize app
22
21
  @app = app
@@ -25,8 +24,10 @@ module Rack
25
24
  @quit_after_requests = self.class.after || 50
26
25
  end
27
26
 
28
- # Harakiri is a middleware, so it passes the call on after checking if it
29
- # is time to honorably retire.
27
+ # #call interface method.
28
+ #
29
+ # Harakiri is a middleware, so it delegates the the app or
30
+ # the next middleware after checking if it is time to honorably retire.
30
31
  #
31
32
  def call env
32
33
  harakiri
@@ -37,9 +38,17 @@ module Rack
37
38
  #
38
39
  # If yes, kills itself (Unicorn will answer the request, honorably).
39
40
  #
41
+ # Note: Sends its process a QUIT signal if it is time.
42
+ #
40
43
  def harakiri
41
44
  @requests = @requests + 1
42
- Process.kill(:QUIT, Process.pid) if @requests >= @quit_after_requests
45
+ Process.kill(:QUIT, Process.pid) if harakiri?
46
+ end
47
+
48
+ # Is it time to honorably retire?
49
+ #
50
+ def harakiri?
51
+ @requests >= @quit_after_requests
43
52
  end
44
53
 
45
54
  end
@@ -1,4 +1,4 @@
1
- module Results
1
+ module Results # :nodoc:all
2
2
 
3
3
  # This is the internal results object. Usually, to_marshal, or to_json
4
4
  # is called on it to get a string for the answer.
data/lib/picky/routing.rb CHANGED
@@ -2,7 +2,7 @@ require 'rack/mount'
2
2
 
3
3
  #
4
4
  #
5
- class Routing
5
+ class Routing # :nodoc:all
6
6
 
7
7
  @@defaults = {
8
8
  query_key: 'query'.freeze,
@@ -1,4 +1,5 @@
1
- module Solr
1
+ module Solr # :nodoc:all
2
+
2
3
  class SchemaGenerator
3
4
 
4
5
  attr_reader :types
@@ -1,50 +1,64 @@
1
+ # = Data Sources
2
+ #
3
+ # Currently, Picky offers the following Sources:
4
+ # * CSV (comma – or other – separated file)
5
+ # * Couch (CouchDB, key-value store)
6
+ # * DB (Databases, foremost MySQL)
7
+ # * Delicious (http://del.icio.us, online bookmarking service)
8
+ # See also:
9
+ # http://github.com/floere/picky/wiki/Sources-Configuration
10
+ #
11
+ # Don't worry if your source isn't here. Adding your own is easy:
12
+ # http://github.com/floere/picky/wiki/Contributing-sources
13
+ #
1
14
  module Sources
2
15
 
3
16
  # Sources are where your data comes from.
4
17
  #
5
- # Basically, a source has 1-3 methods.
6
- # * harvest: Used by the indexer to gather data.
7
- # Yields an indexed_id (string or integer) and a string value.
18
+ # A source has 1 mandatory and 2 optional methods:
19
+ # * connect_backend (_optional_): called once for each type/category pair.
20
+ # * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
21
+ # * take_snapshot (_optional_): called once for each type.
22
+ #
23
+ # This base class "implements" all these methods, but they don't do anything.
24
+ # Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
8
25
  #
9
- # * connect_backend: Optional, called once for each type/category pair.
10
- # * take_snapshot: Optional, called once for each type.
11
26
  class Base
12
27
 
13
- # Note: Default methods do nothing.
14
- #
15
-
16
28
  # Connect to the backend.
17
29
  #
18
- # Note: Called once per index/category combination
19
- # before harvesting.
30
+ # Called once per index/category combination before harvesting.
20
31
  #
21
- # For example, the db backend connects the db adapter.
32
+ # Examples:
33
+ # * The DB backend connects the DB adapter.
34
+ # * We open a connection to a key value store.
35
+ # * We open an file with data.
22
36
  #
23
37
  def connect_backend
24
38
 
25
39
  end
26
40
 
27
- # Used to take a snapshot of your data if it is fast changing.
28
- # e.g. in a database, a table based on the source's select
29
- # statement is created.
41
+ # Called by the indexer when gathering data.
30
42
  #
31
- # Note: Called before harvesting.
43
+ # Yields the data (id, text for id) for the given type and category.
32
44
  #
33
- def take_snapshot type
34
-
45
+ # When implementing or overriding your own,
46
+ # be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
47
+ # for the given type symbol and category symbol.
48
+ #
49
+ def harvest index, category # :yields: id, text_for_id
50
+ # This concrete implementation yields "nothing", override in subclasses.
35
51
  end
36
52
 
37
- # Called by the indexer when gathering data.
53
+ # Used to take a snapshot of your data if it is fast changing.
38
54
  #
39
- # Yields the data (id, text for id) for the given type and category.
55
+ # Called once for each type before harvesting.
40
56
  #
41
- # When implementing or overriding your own,
42
- # be sure to <tt>yield</tt> (or <tt>block.call</tt>) an id (as string or integer)
43
- # and a corresponding text for the given type symbol and
44
- # category symbol.
57
+ # Example:
58
+ # * In a DB source, a table based on the source's select statement is created.
45
59
  #
46
- def harvest type, category
47
- # yields nothing
60
+ def take_snapshot index
61
+
48
62
  end
49
63
 
50
64
  end
@@ -1,11 +1,23 @@
1
1
  module Sources
2
2
 
3
- # Describes a Couch database
4
- # Give it a databse url and optionally username and password
3
+ # Raised when a Couch source is instantiated without a file.
4
+ #
5
+ # Example:
6
+ # Sources::Couch.new(:column1, :column2) # without file option
5
7
  #
6
-
7
8
  class NoCouchDBGiven < StandardError; end
8
-
9
+
10
+ # A Couch database source.
11
+ #
12
+ # Options:
13
+ # * url
14
+ # and all the options of a <tt>RestClient::Resource</tt>.
15
+ # See http://github.com/archiloque/rest-client.
16
+ #
17
+ # Examples:
18
+ # Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
19
+ # Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
20
+ #
9
21
  class Couch < Base
10
22
 
11
23
  def initialize *category_names, options
@@ -14,7 +26,9 @@ module Sources
14
26
  @db = RestClient::Resource.new options.delete(:url), options
15
27
  end
16
28
 
17
- def check_gem
29
+ # Tries to require the rest_client gem.
30
+ #
31
+ def check_gem # :nodoc:
18
32
  require 'rest_client'
19
33
  rescue LoadError
20
34
  puts "Rest-client gem missing!\nTo use the CouchDB source, you need to:\n 1. Add the following line to Gemfile:\n gem 'rest-client'\n 2. Then, run:\n bundle update\n"
@@ -29,15 +43,15 @@ module Sources
29
43
  yield doc['_id'].to_i, doc[category_name] || next
30
44
  end
31
45
  end
32
-
33
- def get_data &block
46
+
47
+ def get_data &block # :nodoc:
34
48
  resp = @db['_all_docs?include_docs=true'].get
35
49
  JSON.parse(resp)['rows'].
36
50
  map{|row| row['doc']}.
37
51
  each &block
38
52
  end
39
53
 
40
- def raise_no_db_given category_names
54
+ def raise_no_db_given category_names # :nodoc:
41
55
  raise NoCouchDBGiven.new(category_names.join(', '))
42
56
  end
43
57
  end
@@ -1,13 +1,36 @@
1
1
  module Sources
2
2
 
3
- # Describes a CSV source, a file with csv in it.
4
- # Give it a sequence of category names and a file option with the filename.
3
+ # Raised when a CSV source is instantiated without a file.
4
+ #
5
+ # Example:
6
+ # Sources::CSV.new(:column1, :column2) # without file option
5
7
  #
6
8
  class NoCSVFileGiven < StandardError; end
7
9
 
10
+ # Describes a CSV source, a file with comma separated values in it.
11
+ #
12
+ # The first column is implicitly assumed to be the id column.
13
+ #
14
+ # It takes the same options as the Ruby 1.9 CSV class.
15
+ #
16
+ # Examples:
17
+ # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
18
+ # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
19
+ # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
20
+ #
8
21
  class CSV < Base
9
22
 
10
- attr_reader :file_name, :csv_options, :category_names
23
+ # The CSV file's path, relative to PICKY_ROOT.
24
+ #
25
+ attr_reader :file_name
26
+
27
+ # The options that were passed into #new.
28
+ #
29
+ attr_reader :csv_options
30
+
31
+ # The data category names.
32
+ #
33
+ attr_reader :category_names
11
34
 
12
35
  def initialize *category_names, options
13
36
  require 'csv'
@@ -17,9 +40,9 @@ module Sources
17
40
  @file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
18
41
  end
19
42
 
43
+ # Raises a NoCSVFileGiven exception.
20
44
  #
21
- #
22
- def raise_no_file_given category_names
45
+ def raise_no_file_given category_names # :nodoc:
23
46
  raise NoCSVFileGiven.new(category_names.join(', '))
24
47
  end
25
48
 
@@ -38,7 +61,7 @@ module Sources
38
61
 
39
62
  #
40
63
  #
41
- def get_data &block
64
+ def get_data &block # :nodoc:
42
65
  ::CSV.foreach file_name, csv_options, &block
43
66
  end
44
67