picky 0.12.3 → 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (61) hide show
  1. data/lib/deployment.rb +2 -2
  2. data/lib/picky/application.rb +172 -12
  3. data/lib/picky/cacher/generator.rb +1 -1
  4. data/lib/picky/calculations/location.rb +9 -1
  5. data/lib/picky/character_substituters/west_european.rb +1 -1
  6. data/lib/picky/configuration/index.rb +1 -1
  7. data/lib/picky/cores.rb +1 -1
  8. data/lib/picky/extensions/array.rb +1 -1
  9. data/lib/picky/extensions/hash.rb +1 -1
  10. data/lib/picky/extensions/module.rb +1 -1
  11. data/lib/picky/extensions/object.rb +1 -1
  12. data/lib/picky/extensions/symbol.rb +1 -1
  13. data/lib/picky/generator.rb +2 -2
  14. data/lib/picky/helpers/cache.rb +7 -5
  15. data/lib/picky/helpers/gc.rb +2 -0
  16. data/lib/picky/helpers/measuring.rb +2 -0
  17. data/lib/picky/index/bundle.rb +1 -1
  18. data/lib/picky/index_api.rb +33 -15
  19. data/lib/picky/indexed/bundle.rb +1 -1
  20. data/lib/picky/indexed/index.rb +1 -1
  21. data/lib/picky/indexed/wrappers/bundle/location.rb +1 -1
  22. data/lib/picky/indexers/no_source_specified_error.rb +1 -1
  23. data/lib/picky/indexes_api.rb +1 -1
  24. data/lib/picky/indexing/bundle.rb +1 -1
  25. data/lib/picky/indexing/index.rb +1 -1
  26. data/lib/picky/loader.rb +1 -1
  27. data/lib/picky/loggers/search.rb +1 -1
  28. data/lib/picky/performant.rb +3 -0
  29. data/lib/picky/query/allocation.rb +1 -1
  30. data/lib/picky/query/allocations.rb +1 -1
  31. data/lib/picky/query/base.rb +48 -16
  32. data/lib/picky/query/combination.rb +1 -1
  33. data/lib/picky/query/combinations.rb +1 -1
  34. data/lib/picky/query/full.rb +7 -2
  35. data/lib/picky/query/live.rb +9 -7
  36. data/lib/picky/query/qualifiers.rb +6 -2
  37. data/lib/picky/query/solr.rb +1 -1
  38. data/lib/picky/query/token.rb +2 -1
  39. data/lib/picky/query/tokens.rb +4 -1
  40. data/lib/picky/query/weigher.rb +1 -1
  41. data/lib/picky/query/weights.rb +1 -1
  42. data/lib/picky/rack/harakiri.rb +14 -5
  43. data/lib/picky/results/base.rb +1 -1
  44. data/lib/picky/routing.rb +1 -1
  45. data/lib/picky/solr/schema_generator.rb +2 -1
  46. data/lib/picky/sources/base.rb +39 -25
  47. data/lib/picky/sources/couch.rb +22 -8
  48. data/lib/picky/sources/csv.rb +29 -6
  49. data/lib/picky/sources/db.rb +46 -30
  50. data/lib/picky/sources/delicious.rb +12 -2
  51. data/lib/picky/sources/wrappers/base.rb +3 -1
  52. data/lib/picky/tokenizers/base.rb +1 -1
  53. data/project_prototype/Gemfile +1 -1
  54. data/project_prototype/app/README +0 -1
  55. data/spec/lib/calculations/location_spec.rb +28 -16
  56. data/spec/lib/index_api_spec.rb +64 -0
  57. data/spec/lib/indexed/index_spec.rb +2 -2
  58. data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
  59. data/spec/lib/indexing/index_spec.rb +2 -2
  60. data/spec/lib/rack/harakiri_spec.rb +22 -10
  61. metadata +7 -4
@@ -1,7 +1,23 @@
1
+ # = Picky Queries
2
+ #
3
+ # A Picky Query is an object which:
4
+ # * holds one or more indexes
5
+ # * offers an interface to query these indexes.
6
+ #
7
+ # You connect URL paths to indexes via a Query.
8
+ #
9
+ # We recommend not to use this directly, but connect it to an URL and query through one of these
10
+ # (Protip: Use "curl 'localhost:8080/query/path?query=exampletext')" in a Terminal.
11
+ #
12
+ # There are two flavors of queries:
13
+ # * Query::Full (Full results with all infos)
14
+ # * Query::Live (Same as the Full results without result ids. Useful for query result counters.)
15
+ #
1
16
  module Query
2
- # Base query class.
17
+
18
+ # The base query class.
3
19
  #
4
- # Initialized with the index types it should search on.
20
+ # Not directly instantiated. However, its methods are used by its subclasses, Full and Live.
5
21
  #
6
22
  class Base
7
23
 
@@ -11,11 +27,11 @@ module Query
11
27
  attr_accessor :reduce_to_amount, :weights
12
28
 
13
29
  # Takes:
14
- # * A number of indexes
15
- # * Options hash (optional) with:
16
- # * weigher: A weigher. Query::Weigher by default.
17
- # * tokenizer: Tokenizers::Query.default by default.
18
- # * weights: A hash of weights, or a Query::Weights object.
30
+ # * A number of indexes
31
+ # * Options hash (optional) with:
32
+ # * weigher: A weigher. Query::Weigher by default.
33
+ # * tokenizer: Tokenizers::Query.default by default.
34
+ # * weights: A hash of weights, or a Query::Weights object.
19
35
  #
20
36
  def initialize *index_type_definitions
21
37
  options = Hash === index_type_definitions.last ? index_type_definitions.pop : {}
@@ -27,13 +43,21 @@ module Query
27
43
  @weights = Hash === weights ? Weights.new(weights) : weights
28
44
  end
29
45
 
30
- # Convenience method.
46
+ # Search through this method.
47
+ #
48
+ # Parameters:
49
+ # * text: The search text.
50
+ # * offset = 0: _optional_ The offset from which position to return the ids. Useful for pagination.
51
+ #
52
+ # Note: The Routing uses this method after unravelling the HTTP request.
31
53
  #
32
54
  def search_with_text text, offset = 0
33
55
  search tokenized(text), offset
34
56
  end
35
57
 
36
- # This runs the actual search.
58
+ # Runs the actual search using Query::Tokens.
59
+ #
60
+ # Note: Internal method, use #search_with_text.
37
61
  #
38
62
  def search tokens, offset = 0
39
63
  results = nil
@@ -46,7 +70,9 @@ module Query
46
70
  results
47
71
  end
48
72
 
49
- # Return nil if no results have been found.
73
+ # Execute a search using Query::Tokens.
74
+ #
75
+ # Note: Internal method, use #search_with_text.
50
76
  #
51
77
  def execute tokens, offset
52
78
  results_from offset, sorted_allocations(tokens)
@@ -54,12 +80,18 @@ module Query
54
80
 
55
81
  # Returns an empty result with default values.
56
82
  #
83
+ # Parameters:
84
+ # * offset = 0: _optional_ The offset to use for the empty results.
85
+ #
57
86
  def empty_results offset = 0
58
87
  result_type.new offset
59
88
  end
60
89
 
61
90
  # Delegates the tokenizing to the query tokenizer.
62
91
  #
92
+ # Parameters:
93
+ # * text: The text to tokenize.
94
+ #
63
95
  def tokenized text
64
96
  @tokenizer.tokenize text
65
97
  end
@@ -72,7 +104,7 @@ module Query
72
104
  #
73
105
  # TODO Rename: allocations
74
106
  #
75
- def sorted_allocations tokens
107
+ def sorted_allocations tokens # :nodoc:
76
108
  # Get the allocations.
77
109
  #
78
110
  # TODO Pass in reduce_to_amount (aka max_allocations)
@@ -105,18 +137,18 @@ module Query
105
137
  #
106
138
  allocations
107
139
  end
108
- def reduce allocations
140
+ def reduce allocations # :nodoc:
109
141
  allocations.reduce_to reduce_to_amount if reduce_to_amount
110
142
  end
111
- def remove_identifiers?
143
+ def remove_identifiers? # :nodoc:
112
144
  identifiers_to_remove.present?
113
145
  end
114
- def remove_from allocations
146
+ def remove_from allocations # :nodoc:
115
147
  allocations.remove(identifiers_to_remove) if remove_identifiers?
116
148
  end
117
149
  # Override. TODO No, redesign.
118
150
  #
119
- def identifiers_to_remove
151
+ def identifiers_to_remove # :nodoc:
120
152
  @identifiers_to_remove ||= []
121
153
  end
122
154
 
@@ -126,7 +158,7 @@ module Query
126
158
  #
127
159
  # TODO Move to results. result_type.from allocations, offset
128
160
  #
129
- def results_from offset = 0, allocations = nil
161
+ def results_from offset = 0, allocations = nil # :nodoc:
130
162
  results = result_type.new offset, allocations
131
163
  results.prepare!
132
164
  results
@@ -7,7 +7,7 @@ module Query
7
7
  #
8
8
  # An allocation consists of a number of combinations.
9
9
  #
10
- class Combination
10
+ class Combination # :nodoc:all
11
11
 
12
12
  attr_reader :token, :bundle, :category_name
13
13
 
@@ -4,7 +4,7 @@ module Query
4
4
  #
5
5
  # They are the core of an allocation.
6
6
  #
7
- class Combinations
7
+ class Combinations # :nodoc:all
8
8
 
9
9
  attr_reader :combinations
10
10
 
@@ -1,10 +1,15 @@
1
1
  module Query
2
2
 
3
- # This is the query class for performing full fledged queries.
3
+ # This Query class performs full queries.
4
+ #
5
+ # It includes in its results:
6
+ # * A count of results.
7
+ # * All possible combinations with its weights.
8
+ # * The top X result ids.
4
9
  #
5
10
  class Full < Base
6
11
 
7
- # Generates full results.
12
+ # Returns Results::Full as its result type.
8
13
  #
9
14
  def result_type
10
15
  Results::Full
@@ -1,17 +1,19 @@
1
1
  module Query
2
2
 
3
- # This is the query class for live queries.
3
+ # This Query class performs live queries.
4
4
  #
5
- # It does:
6
- # * Return a count of results.
5
+ # It is useful for updating counters, or any job where you don't need the result ids.
7
6
  #
8
- # It does NOT:
9
- # * Sort results geographically.
10
- # * Do any postprocessing.
7
+ # It includes in its results:
8
+ # * A count of results.
9
+ # * All possible combinations with its weights.
10
+ #
11
+ # But not:
12
+ # * The top X result ids.
11
13
  #
12
14
  class Live < Base
13
15
 
14
- # Generates results from allocations.
16
+ # Returns Results::Live as its result type.
15
17
  #
16
18
  def result_type
17
19
  Results::Live
@@ -1,9 +1,13 @@
1
1
  # coding: utf-8
2
+ #
3
+
4
+ #
5
+ #
2
6
  module Query
3
7
 
4
8
  # A single qualifier.
5
9
  #
6
- class Qualifier
10
+ class Qualifier # :nodoc:all
7
11
 
8
12
  attr_reader :normalized_qualifier, :codes
9
13
 
@@ -28,7 +32,7 @@ module Query
28
32
 
29
33
  # Collection class for qualifiers.
30
34
  #
31
- class Qualifiers
35
+ class Qualifiers # :nodoc:all
32
36
 
33
37
  include Singleton
34
38
 
@@ -4,7 +4,7 @@ module Query
4
4
 
5
5
  #
6
6
  #
7
- class Solr < Base
7
+ class Solr < Base # :nodoc:all
8
8
 
9
9
  attr_reader :server, :index_types
10
10
 
@@ -1,4 +1,5 @@
1
1
  module Query
2
+
2
3
  # This is a query token. Together with other tokens it makes up a query.
3
4
  #
4
5
  # It remembers the original form, and and a normalized form.
@@ -7,7 +8,7 @@ module Query
7
8
  #
8
9
  # TODO Make partial / similarity char configurable.
9
10
  #
10
- class Token
11
+ class Token # :nodoc:all
11
12
 
12
13
  attr_reader :text, :original
13
14
  attr_writer :similar
@@ -1,10 +1,13 @@
1
1
  # encoding: utf-8
2
2
  #
3
+
4
+ #
5
+ #
3
6
  module Query
4
7
 
5
8
  # This class primarily handles switching through similar token constellations.
6
9
  #
7
- class Tokens
10
+ class Tokens # :nodoc:all
8
11
 
9
12
  # Basically delegates to its internal tokens array.
10
13
  #
@@ -2,7 +2,7 @@ module Query
2
2
 
3
3
  # Weighs the given tokens, generates Allocations -> Allocation -> Combinations.
4
4
  #
5
- class Weigher
5
+ class Weigher # :nodoc:all
6
6
 
7
7
  attr_reader :indexes
8
8
 
@@ -2,7 +2,7 @@ module Query
2
2
 
3
3
  # Calculates weights for certain combinations.
4
4
  #
5
- class Weights
5
+ class Weights # :nodoc:all
6
6
 
7
7
  #
8
8
  #
@@ -1,4 +1,4 @@
1
- module Rack
1
+ module Rack # :nodoc:
2
2
 
3
3
  # Simple Rack Middleware to kill Unicorns after X requests.
4
4
  #
@@ -16,7 +16,6 @@ module Rack
16
16
  # Set the amount of requests before the Unicorn commits Harakiri.
17
17
  #
18
18
  cattr_accessor :after
19
- attr_reader :quit_after_requests
20
19
 
21
20
  def initialize app
22
21
  @app = app
@@ -25,8 +24,10 @@ module Rack
25
24
  @quit_after_requests = self.class.after || 50
26
25
  end
27
26
 
28
- # Harakiri is a middleware, so it passes the call on after checking if it
29
- # is time to honorably retire.
27
+ # #call interface method.
28
+ #
29
+ # Harakiri is a middleware, so it delegates the the app or
30
+ # the next middleware after checking if it is time to honorably retire.
30
31
  #
31
32
  def call env
32
33
  harakiri
@@ -37,9 +38,17 @@ module Rack
37
38
  #
38
39
  # If yes, kills itself (Unicorn will answer the request, honorably).
39
40
  #
41
+ # Note: Sends its process a QUIT signal if it is time.
42
+ #
40
43
  def harakiri
41
44
  @requests = @requests + 1
42
- Process.kill(:QUIT, Process.pid) if @requests >= @quit_after_requests
45
+ Process.kill(:QUIT, Process.pid) if harakiri?
46
+ end
47
+
48
+ # Is it time to honorably retire?
49
+ #
50
+ def harakiri?
51
+ @requests >= @quit_after_requests
43
52
  end
44
53
 
45
54
  end
@@ -1,4 +1,4 @@
1
- module Results
1
+ module Results # :nodoc:all
2
2
 
3
3
  # This is the internal results object. Usually, to_marshal, or to_json
4
4
  # is called on it to get a string for the answer.
data/lib/picky/routing.rb CHANGED
@@ -2,7 +2,7 @@ require 'rack/mount'
2
2
 
3
3
  #
4
4
  #
5
- class Routing
5
+ class Routing # :nodoc:all
6
6
 
7
7
  @@defaults = {
8
8
  query_key: 'query'.freeze,
@@ -1,4 +1,5 @@
1
- module Solr
1
+ module Solr # :nodoc:all
2
+
2
3
  class SchemaGenerator
3
4
 
4
5
  attr_reader :types
@@ -1,50 +1,64 @@
1
+ # = Data Sources
2
+ #
3
+ # Currently, Picky offers the following Sources:
4
+ # * CSV (comma – or other – separated file)
5
+ # * Couch (CouchDB, key-value store)
6
+ # * DB (Databases, foremost MySQL)
7
+ # * Delicious (http://del.icio.us, online bookmarking service)
8
+ # See also:
9
+ # http://github.com/floere/picky/wiki/Sources-Configuration
10
+ #
11
+ # Don't worry if your source isn't here. Adding your own is easy:
12
+ # http://github.com/floere/picky/wiki/Contributing-sources
13
+ #
1
14
  module Sources
2
15
 
3
16
  # Sources are where your data comes from.
4
17
  #
5
- # Basically, a source has 1-3 methods.
6
- # * harvest: Used by the indexer to gather data.
7
- # Yields an indexed_id (string or integer) and a string value.
18
+ # A source has 1 mandatory and 2 optional methods:
19
+ # * connect_backend (_optional_): called once for each type/category pair.
20
+ # * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
21
+ # * take_snapshot (_optional_): called once for each type.
22
+ #
23
+ # This base class "implements" all these methods, but they don't do anything.
24
+ # Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
8
25
  #
9
- # * connect_backend: Optional, called once for each type/category pair.
10
- # * take_snapshot: Optional, called once for each type.
11
26
  class Base
12
27
 
13
- # Note: Default methods do nothing.
14
- #
15
-
16
28
  # Connect to the backend.
17
29
  #
18
- # Note: Called once per index/category combination
19
- # before harvesting.
30
+ # Called once per index/category combination before harvesting.
20
31
  #
21
- # For example, the db backend connects the db adapter.
32
+ # Examples:
33
+ # * The DB backend connects the DB adapter.
34
+ # * We open a connection to a key value store.
35
+ # * We open an file with data.
22
36
  #
23
37
  def connect_backend
24
38
 
25
39
  end
26
40
 
27
- # Used to take a snapshot of your data if it is fast changing.
28
- # e.g. in a database, a table based on the source's select
29
- # statement is created.
41
+ # Called by the indexer when gathering data.
30
42
  #
31
- # Note: Called before harvesting.
43
+ # Yields the data (id, text for id) for the given type and category.
32
44
  #
33
- def take_snapshot type
34
-
45
+ # When implementing or overriding your own,
46
+ # be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
47
+ # for the given type symbol and category symbol.
48
+ #
49
+ def harvest index, category # :yields: id, text_for_id
50
+ # This concrete implementation yields "nothing", override in subclasses.
35
51
  end
36
52
 
37
- # Called by the indexer when gathering data.
53
+ # Used to take a snapshot of your data if it is fast changing.
38
54
  #
39
- # Yields the data (id, text for id) for the given type and category.
55
+ # Called once for each type before harvesting.
40
56
  #
41
- # When implementing or overriding your own,
42
- # be sure to <tt>yield</tt> (or <tt>block.call</tt>) an id (as string or integer)
43
- # and a corresponding text for the given type symbol and
44
- # category symbol.
57
+ # Example:
58
+ # * In a DB source, a table based on the source's select statement is created.
45
59
  #
46
- def harvest type, category
47
- # yields nothing
60
+ def take_snapshot index
61
+
48
62
  end
49
63
 
50
64
  end
@@ -1,11 +1,23 @@
1
1
  module Sources
2
2
 
3
- # Describes a Couch database
4
- # Give it a databse url and optionally username and password
3
+ # Raised when a Couch source is instantiated without a file.
4
+ #
5
+ # Example:
6
+ # Sources::Couch.new(:column1, :column2) # without file option
5
7
  #
6
-
7
8
  class NoCouchDBGiven < StandardError; end
8
-
9
+
10
+ # A Couch database source.
11
+ #
12
+ # Options:
13
+ # * url
14
+ # and all the options of a <tt>RestClient::Resource</tt>.
15
+ # See http://github.com/archiloque/rest-client.
16
+ #
17
+ # Examples:
18
+ # Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
19
+ # Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
20
+ #
9
21
  class Couch < Base
10
22
 
11
23
  def initialize *category_names, options
@@ -14,7 +26,9 @@ module Sources
14
26
  @db = RestClient::Resource.new options.delete(:url), options
15
27
  end
16
28
 
17
- def check_gem
29
+ # Tries to require the rest_client gem.
30
+ #
31
+ def check_gem # :nodoc:
18
32
  require 'rest_client'
19
33
  rescue LoadError
20
34
  puts "Rest-client gem missing!\nTo use the CouchDB source, you need to:\n 1. Add the following line to Gemfile:\n gem 'rest-client'\n 2. Then, run:\n bundle update\n"
@@ -29,15 +43,15 @@ module Sources
29
43
  yield doc['_id'].to_i, doc[category_name] || next
30
44
  end
31
45
  end
32
-
33
- def get_data &block
46
+
47
+ def get_data &block # :nodoc:
34
48
  resp = @db['_all_docs?include_docs=true'].get
35
49
  JSON.parse(resp)['rows'].
36
50
  map{|row| row['doc']}.
37
51
  each &block
38
52
  end
39
53
 
40
- def raise_no_db_given category_names
54
+ def raise_no_db_given category_names # :nodoc:
41
55
  raise NoCouchDBGiven.new(category_names.join(', '))
42
56
  end
43
57
  end
@@ -1,13 +1,36 @@
1
1
  module Sources
2
2
 
3
- # Describes a CSV source, a file with csv in it.
4
- # Give it a sequence of category names and a file option with the filename.
3
+ # Raised when a CSV source is instantiated without a file.
4
+ #
5
+ # Example:
6
+ # Sources::CSV.new(:column1, :column2) # without file option
5
7
  #
6
8
  class NoCSVFileGiven < StandardError; end
7
9
 
10
+ # Describes a CSV source, a file with comma separated values in it.
11
+ #
12
+ # The first column is implicitly assumed to be the id column.
13
+ #
14
+ # It takes the same options as the Ruby 1.9 CSV class.
15
+ #
16
+ # Examples:
17
+ # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
18
+ # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
19
+ # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
20
+ #
8
21
  class CSV < Base
9
22
 
10
- attr_reader :file_name, :csv_options, :category_names
23
+ # The CSV file's path, relative to PICKY_ROOT.
24
+ #
25
+ attr_reader :file_name
26
+
27
+ # The options that were passed into #new.
28
+ #
29
+ attr_reader :csv_options
30
+
31
+ # The data category names.
32
+ #
33
+ attr_reader :category_names
11
34
 
12
35
  def initialize *category_names, options
13
36
  require 'csv'
@@ -17,9 +40,9 @@ module Sources
17
40
  @file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
18
41
  end
19
42
 
43
+ # Raises a NoCSVFileGiven exception.
20
44
  #
21
- #
22
- def raise_no_file_given category_names
45
+ def raise_no_file_given category_names # :nodoc:
23
46
  raise NoCSVFileGiven.new(category_names.join(', '))
24
47
  end
25
48
 
@@ -38,7 +61,7 @@ module Sources
38
61
 
39
62
  #
40
63
  #
41
- def get_data &block
64
+ def get_data &block # :nodoc:
42
65
  ::CSV.foreach file_name, csv_options, &block
43
66
  end
44
67