picky 0.12.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/deployment.rb +2 -2
- data/lib/picky/application.rb +172 -12
- data/lib/picky/cacher/generator.rb +1 -1
- data/lib/picky/calculations/location.rb +9 -1
- data/lib/picky/character_substituters/west_european.rb +1 -1
- data/lib/picky/configuration/index.rb +1 -1
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/extensions/array.rb +1 -1
- data/lib/picky/extensions/hash.rb +1 -1
- data/lib/picky/extensions/module.rb +1 -1
- data/lib/picky/extensions/object.rb +1 -1
- data/lib/picky/extensions/symbol.rb +1 -1
- data/lib/picky/generator.rb +2 -2
- data/lib/picky/helpers/cache.rb +7 -5
- data/lib/picky/helpers/gc.rb +2 -0
- data/lib/picky/helpers/measuring.rb +2 -0
- data/lib/picky/index/bundle.rb +1 -1
- data/lib/picky/index_api.rb +33 -15
- data/lib/picky/indexed/bundle.rb +1 -1
- data/lib/picky/indexed/index.rb +1 -1
- data/lib/picky/indexed/wrappers/bundle/location.rb +1 -1
- data/lib/picky/indexers/no_source_specified_error.rb +1 -1
- data/lib/picky/indexes_api.rb +1 -1
- data/lib/picky/indexing/bundle.rb +1 -1
- data/lib/picky/indexing/index.rb +1 -1
- data/lib/picky/loader.rb +1 -1
- data/lib/picky/loggers/search.rb +1 -1
- data/lib/picky/performant.rb +3 -0
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/allocations.rb +1 -1
- data/lib/picky/query/base.rb +48 -16
- data/lib/picky/query/combination.rb +1 -1
- data/lib/picky/query/combinations.rb +1 -1
- data/lib/picky/query/full.rb +7 -2
- data/lib/picky/query/live.rb +9 -7
- data/lib/picky/query/qualifiers.rb +6 -2
- data/lib/picky/query/solr.rb +1 -1
- data/lib/picky/query/token.rb +2 -1
- data/lib/picky/query/tokens.rb +4 -1
- data/lib/picky/query/weigher.rb +1 -1
- data/lib/picky/query/weights.rb +1 -1
- data/lib/picky/rack/harakiri.rb +14 -5
- data/lib/picky/results/base.rb +1 -1
- data/lib/picky/routing.rb +1 -1
- data/lib/picky/solr/schema_generator.rb +2 -1
- data/lib/picky/sources/base.rb +39 -25
- data/lib/picky/sources/couch.rb +22 -8
- data/lib/picky/sources/csv.rb +29 -6
- data/lib/picky/sources/db.rb +46 -30
- data/lib/picky/sources/delicious.rb +12 -2
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/tokenizers/base.rb +1 -1
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/README +0 -1
- data/spec/lib/calculations/location_spec.rb +28 -16
- data/spec/lib/index_api_spec.rb +64 -0
- data/spec/lib/indexed/index_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
- data/spec/lib/indexing/index_spec.rb +2 -2
- data/spec/lib/rack/harakiri_spec.rb +22 -10
- metadata +7 -4
data/lib/picky/query/base.rb
CHANGED
@@ -1,7 +1,23 @@
|
|
1
|
+
# = Picky Queries
|
2
|
+
#
|
3
|
+
# A Picky Query is an object which:
|
4
|
+
# * holds one or more indexes
|
5
|
+
# * offers an interface to query these indexes.
|
6
|
+
#
|
7
|
+
# You connect URL paths to indexes via a Query.
|
8
|
+
#
|
9
|
+
# We recommend not to use this directly, but connect it to an URL and query through one of these
|
10
|
+
# (Protip: Use "curl 'localhost:8080/query/path?query=exampletext')" in a Terminal.
|
11
|
+
#
|
12
|
+
# There are two flavors of queries:
|
13
|
+
# * Query::Full (Full results with all infos)
|
14
|
+
# * Query::Live (Same as the Full results without result ids. Useful for query result counters.)
|
15
|
+
#
|
1
16
|
module Query
|
2
|
-
|
17
|
+
|
18
|
+
# The base query class.
|
3
19
|
#
|
4
|
-
#
|
20
|
+
# Not directly instantiated. However, its methods are used by its subclasses, Full and Live.
|
5
21
|
#
|
6
22
|
class Base
|
7
23
|
|
@@ -11,11 +27,11 @@ module Query
|
|
11
27
|
attr_accessor :reduce_to_amount, :weights
|
12
28
|
|
13
29
|
# Takes:
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
30
|
+
# * A number of indexes
|
31
|
+
# * Options hash (optional) with:
|
32
|
+
# * weigher: A weigher. Query::Weigher by default.
|
33
|
+
# * tokenizer: Tokenizers::Query.default by default.
|
34
|
+
# * weights: A hash of weights, or a Query::Weights object.
|
19
35
|
#
|
20
36
|
def initialize *index_type_definitions
|
21
37
|
options = Hash === index_type_definitions.last ? index_type_definitions.pop : {}
|
@@ -27,13 +43,21 @@ module Query
|
|
27
43
|
@weights = Hash === weights ? Weights.new(weights) : weights
|
28
44
|
end
|
29
45
|
|
30
|
-
#
|
46
|
+
# Search through this method.
|
47
|
+
#
|
48
|
+
# Parameters:
|
49
|
+
# * text: The search text.
|
50
|
+
# * offset = 0: _optional_ The offset from which position to return the ids. Useful for pagination.
|
51
|
+
#
|
52
|
+
# Note: The Routing uses this method after unravelling the HTTP request.
|
31
53
|
#
|
32
54
|
def search_with_text text, offset = 0
|
33
55
|
search tokenized(text), offset
|
34
56
|
end
|
35
57
|
|
36
|
-
#
|
58
|
+
# Runs the actual search using Query::Tokens.
|
59
|
+
#
|
60
|
+
# Note: Internal method, use #search_with_text.
|
37
61
|
#
|
38
62
|
def search tokens, offset = 0
|
39
63
|
results = nil
|
@@ -46,7 +70,9 @@ module Query
|
|
46
70
|
results
|
47
71
|
end
|
48
72
|
|
49
|
-
#
|
73
|
+
# Execute a search using Query::Tokens.
|
74
|
+
#
|
75
|
+
# Note: Internal method, use #search_with_text.
|
50
76
|
#
|
51
77
|
def execute tokens, offset
|
52
78
|
results_from offset, sorted_allocations(tokens)
|
@@ -54,12 +80,18 @@ module Query
|
|
54
80
|
|
55
81
|
# Returns an empty result with default values.
|
56
82
|
#
|
83
|
+
# Parameters:
|
84
|
+
# * offset = 0: _optional_ The offset to use for the empty results.
|
85
|
+
#
|
57
86
|
def empty_results offset = 0
|
58
87
|
result_type.new offset
|
59
88
|
end
|
60
89
|
|
61
90
|
# Delegates the tokenizing to the query tokenizer.
|
62
91
|
#
|
92
|
+
# Parameters:
|
93
|
+
# * text: The text to tokenize.
|
94
|
+
#
|
63
95
|
def tokenized text
|
64
96
|
@tokenizer.tokenize text
|
65
97
|
end
|
@@ -72,7 +104,7 @@ module Query
|
|
72
104
|
#
|
73
105
|
# TODO Rename: allocations
|
74
106
|
#
|
75
|
-
def sorted_allocations tokens
|
107
|
+
def sorted_allocations tokens # :nodoc:
|
76
108
|
# Get the allocations.
|
77
109
|
#
|
78
110
|
# TODO Pass in reduce_to_amount (aka max_allocations)
|
@@ -105,18 +137,18 @@ module Query
|
|
105
137
|
#
|
106
138
|
allocations
|
107
139
|
end
|
108
|
-
def reduce allocations
|
140
|
+
def reduce allocations # :nodoc:
|
109
141
|
allocations.reduce_to reduce_to_amount if reduce_to_amount
|
110
142
|
end
|
111
|
-
def remove_identifiers?
|
143
|
+
def remove_identifiers? # :nodoc:
|
112
144
|
identifiers_to_remove.present?
|
113
145
|
end
|
114
|
-
def remove_from allocations
|
146
|
+
def remove_from allocations # :nodoc:
|
115
147
|
allocations.remove(identifiers_to_remove) if remove_identifiers?
|
116
148
|
end
|
117
149
|
# Override. TODO No, redesign.
|
118
150
|
#
|
119
|
-
def identifiers_to_remove
|
151
|
+
def identifiers_to_remove # :nodoc:
|
120
152
|
@identifiers_to_remove ||= []
|
121
153
|
end
|
122
154
|
|
@@ -126,7 +158,7 @@ module Query
|
|
126
158
|
#
|
127
159
|
# TODO Move to results. result_type.from allocations, offset
|
128
160
|
#
|
129
|
-
def results_from offset = 0, allocations = nil
|
161
|
+
def results_from offset = 0, allocations = nil # :nodoc:
|
130
162
|
results = result_type.new offset, allocations
|
131
163
|
results.prepare!
|
132
164
|
results
|
data/lib/picky/query/full.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
module Query
|
2
2
|
|
3
|
-
# This
|
3
|
+
# This Query class performs full queries.
|
4
|
+
#
|
5
|
+
# It includes in its results:
|
6
|
+
# * A count of results.
|
7
|
+
# * All possible combinations with its weights.
|
8
|
+
# * The top X result ids.
|
4
9
|
#
|
5
10
|
class Full < Base
|
6
11
|
|
7
|
-
#
|
12
|
+
# Returns Results::Full as its result type.
|
8
13
|
#
|
9
14
|
def result_type
|
10
15
|
Results::Full
|
data/lib/picky/query/live.rb
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
module Query
|
2
2
|
|
3
|
-
# This
|
3
|
+
# This Query class performs live queries.
|
4
4
|
#
|
5
|
-
# It
|
6
|
-
# * Return a count of results.
|
5
|
+
# It is useful for updating counters, or any job where you don't need the result ids.
|
7
6
|
#
|
8
|
-
# It
|
9
|
-
#
|
10
|
-
#
|
7
|
+
# It includes in its results:
|
8
|
+
# * A count of results.
|
9
|
+
# * All possible combinations with its weights.
|
10
|
+
#
|
11
|
+
# But not:
|
12
|
+
# * The top X result ids.
|
11
13
|
#
|
12
14
|
class Live < Base
|
13
15
|
|
14
|
-
#
|
16
|
+
# Returns Results::Live as its result type.
|
15
17
|
#
|
16
18
|
def result_type
|
17
19
|
Results::Live
|
@@ -1,9 +1,13 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
#
|
3
|
+
|
4
|
+
#
|
5
|
+
#
|
2
6
|
module Query
|
3
7
|
|
4
8
|
# A single qualifier.
|
5
9
|
#
|
6
|
-
class Qualifier
|
10
|
+
class Qualifier # :nodoc:all
|
7
11
|
|
8
12
|
attr_reader :normalized_qualifier, :codes
|
9
13
|
|
@@ -28,7 +32,7 @@ module Query
|
|
28
32
|
|
29
33
|
# Collection class for qualifiers.
|
30
34
|
#
|
31
|
-
class Qualifiers
|
35
|
+
class Qualifiers # :nodoc:all
|
32
36
|
|
33
37
|
include Singleton
|
34
38
|
|
data/lib/picky/query/solr.rb
CHANGED
data/lib/picky/query/token.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Query
|
2
|
+
|
2
3
|
# This is a query token. Together with other tokens it makes up a query.
|
3
4
|
#
|
4
5
|
# It remembers the original form, and and a normalized form.
|
@@ -7,7 +8,7 @@ module Query
|
|
7
8
|
#
|
8
9
|
# TODO Make partial / similarity char configurable.
|
9
10
|
#
|
10
|
-
class Token
|
11
|
+
class Token # :nodoc:all
|
11
12
|
|
12
13
|
attr_reader :text, :original
|
13
14
|
attr_writer :similar
|
data/lib/picky/query/tokens.rb
CHANGED
data/lib/picky/query/weigher.rb
CHANGED
data/lib/picky/query/weights.rb
CHANGED
data/lib/picky/rack/harakiri.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module Rack
|
1
|
+
module Rack # :nodoc:
|
2
2
|
|
3
3
|
# Simple Rack Middleware to kill Unicorns after X requests.
|
4
4
|
#
|
@@ -16,7 +16,6 @@ module Rack
|
|
16
16
|
# Set the amount of requests before the Unicorn commits Harakiri.
|
17
17
|
#
|
18
18
|
cattr_accessor :after
|
19
|
-
attr_reader :quit_after_requests
|
20
19
|
|
21
20
|
def initialize app
|
22
21
|
@app = app
|
@@ -25,8 +24,10 @@ module Rack
|
|
25
24
|
@quit_after_requests = self.class.after || 50
|
26
25
|
end
|
27
26
|
|
28
|
-
#
|
29
|
-
#
|
27
|
+
# #call interface method.
|
28
|
+
#
|
29
|
+
# Harakiri is a middleware, so it delegates the the app or
|
30
|
+
# the next middleware after checking if it is time to honorably retire.
|
30
31
|
#
|
31
32
|
def call env
|
32
33
|
harakiri
|
@@ -37,9 +38,17 @@ module Rack
|
|
37
38
|
#
|
38
39
|
# If yes, kills itself (Unicorn will answer the request, honorably).
|
39
40
|
#
|
41
|
+
# Note: Sends its process a QUIT signal if it is time.
|
42
|
+
#
|
40
43
|
def harakiri
|
41
44
|
@requests = @requests + 1
|
42
|
-
Process.kill(:QUIT, Process.pid) if
|
45
|
+
Process.kill(:QUIT, Process.pid) if harakiri?
|
46
|
+
end
|
47
|
+
|
48
|
+
# Is it time to honorably retire?
|
49
|
+
#
|
50
|
+
def harakiri?
|
51
|
+
@requests >= @quit_after_requests
|
43
52
|
end
|
44
53
|
|
45
54
|
end
|
data/lib/picky/results/base.rb
CHANGED
data/lib/picky/routing.rb
CHANGED
data/lib/picky/sources/base.rb
CHANGED
@@ -1,50 +1,64 @@
|
|
1
|
+
# = Data Sources
|
2
|
+
#
|
3
|
+
# Currently, Picky offers the following Sources:
|
4
|
+
# * CSV (comma – or other – separated file)
|
5
|
+
# * Couch (CouchDB, key-value store)
|
6
|
+
# * DB (Databases, foremost MySQL)
|
7
|
+
# * Delicious (http://del.icio.us, online bookmarking service)
|
8
|
+
# See also:
|
9
|
+
# http://github.com/floere/picky/wiki/Sources-Configuration
|
10
|
+
#
|
11
|
+
# Don't worry if your source isn't here. Adding your own is easy:
|
12
|
+
# http://github.com/floere/picky/wiki/Contributing-sources
|
13
|
+
#
|
1
14
|
module Sources
|
2
15
|
|
3
16
|
# Sources are where your data comes from.
|
4
17
|
#
|
5
|
-
#
|
6
|
-
# *
|
7
|
-
#
|
18
|
+
# A source has 1 mandatory and 2 optional methods:
|
19
|
+
# * connect_backend (_optional_): called once for each type/category pair.
|
20
|
+
# * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
|
21
|
+
# * take_snapshot (_optional_): called once for each type.
|
22
|
+
#
|
23
|
+
# This base class "implements" all these methods, but they don't do anything.
|
24
|
+
# Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
|
8
25
|
#
|
9
|
-
# * connect_backend: Optional, called once for each type/category pair.
|
10
|
-
# * take_snapshot: Optional, called once for each type.
|
11
26
|
class Base
|
12
27
|
|
13
|
-
# Note: Default methods do nothing.
|
14
|
-
#
|
15
|
-
|
16
28
|
# Connect to the backend.
|
17
29
|
#
|
18
|
-
#
|
19
|
-
# before harvesting.
|
30
|
+
# Called once per index/category combination before harvesting.
|
20
31
|
#
|
21
|
-
#
|
32
|
+
# Examples:
|
33
|
+
# * The DB backend connects the DB adapter.
|
34
|
+
# * We open a connection to a key value store.
|
35
|
+
# * We open an file with data.
|
22
36
|
#
|
23
37
|
def connect_backend
|
24
38
|
|
25
39
|
end
|
26
40
|
|
27
|
-
#
|
28
|
-
# e.g. in a database, a table based on the source's select
|
29
|
-
# statement is created.
|
41
|
+
# Called by the indexer when gathering data.
|
30
42
|
#
|
31
|
-
#
|
43
|
+
# Yields the data (id, text for id) for the given type and category.
|
32
44
|
#
|
33
|
-
|
34
|
-
|
45
|
+
# When implementing or overriding your own,
|
46
|
+
# be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
|
47
|
+
# for the given type symbol and category symbol.
|
48
|
+
#
|
49
|
+
def harvest index, category # :yields: id, text_for_id
|
50
|
+
# This concrete implementation yields "nothing", override in subclasses.
|
35
51
|
end
|
36
52
|
|
37
|
-
#
|
53
|
+
# Used to take a snapshot of your data if it is fast changing.
|
38
54
|
#
|
39
|
-
#
|
55
|
+
# Called once for each type before harvesting.
|
40
56
|
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
# and a corresponding text for the given type symbol and
|
44
|
-
# category symbol.
|
57
|
+
# Example:
|
58
|
+
# * In a DB source, a table based on the source's select statement is created.
|
45
59
|
#
|
46
|
-
def
|
47
|
-
|
60
|
+
def take_snapshot index
|
61
|
+
|
48
62
|
end
|
49
63
|
|
50
64
|
end
|
data/lib/picky/sources/couch.rb
CHANGED
@@ -1,11 +1,23 @@
|
|
1
1
|
module Sources
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
3
|
+
# Raised when a Couch source is instantiated without a file.
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
# Sources::Couch.new(:column1, :column2) # without file option
|
5
7
|
#
|
6
|
-
|
7
8
|
class NoCouchDBGiven < StandardError; end
|
8
|
-
|
9
|
+
|
10
|
+
# A Couch database source.
|
11
|
+
#
|
12
|
+
# Options:
|
13
|
+
# * url
|
14
|
+
# and all the options of a <tt>RestClient::Resource</tt>.
|
15
|
+
# See http://github.com/archiloque/rest-client.
|
16
|
+
#
|
17
|
+
# Examples:
|
18
|
+
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
|
19
|
+
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
|
20
|
+
#
|
9
21
|
class Couch < Base
|
10
22
|
|
11
23
|
def initialize *category_names, options
|
@@ -14,7 +26,9 @@ module Sources
|
|
14
26
|
@db = RestClient::Resource.new options.delete(:url), options
|
15
27
|
end
|
16
28
|
|
17
|
-
|
29
|
+
# Tries to require the rest_client gem.
|
30
|
+
#
|
31
|
+
def check_gem # :nodoc:
|
18
32
|
require 'rest_client'
|
19
33
|
rescue LoadError
|
20
34
|
puts "Rest-client gem missing!\nTo use the CouchDB source, you need to:\n 1. Add the following line to Gemfile:\n gem 'rest-client'\n 2. Then, run:\n bundle update\n"
|
@@ -29,15 +43,15 @@ module Sources
|
|
29
43
|
yield doc['_id'].to_i, doc[category_name] || next
|
30
44
|
end
|
31
45
|
end
|
32
|
-
|
33
|
-
def get_data &block
|
46
|
+
|
47
|
+
def get_data &block # :nodoc:
|
34
48
|
resp = @db['_all_docs?include_docs=true'].get
|
35
49
|
JSON.parse(resp)['rows'].
|
36
50
|
map{|row| row['doc']}.
|
37
51
|
each &block
|
38
52
|
end
|
39
53
|
|
40
|
-
def raise_no_db_given category_names
|
54
|
+
def raise_no_db_given category_names # :nodoc:
|
41
55
|
raise NoCouchDBGiven.new(category_names.join(', '))
|
42
56
|
end
|
43
57
|
end
|
data/lib/picky/sources/csv.rb
CHANGED
@@ -1,13 +1,36 @@
|
|
1
1
|
module Sources
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
3
|
+
# Raised when a CSV source is instantiated without a file.
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
# Sources::CSV.new(:column1, :column2) # without file option
|
5
7
|
#
|
6
8
|
class NoCSVFileGiven < StandardError; end
|
7
9
|
|
10
|
+
# Describes a CSV source, a file with comma separated values in it.
|
11
|
+
#
|
12
|
+
# The first column is implicitly assumed to be the id column.
|
13
|
+
#
|
14
|
+
# It takes the same options as the Ruby 1.9 CSV class.
|
15
|
+
#
|
16
|
+
# Examples:
|
17
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
|
18
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
|
19
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
|
20
|
+
#
|
8
21
|
class CSV < Base
|
9
22
|
|
10
|
-
|
23
|
+
# The CSV file's path, relative to PICKY_ROOT.
|
24
|
+
#
|
25
|
+
attr_reader :file_name
|
26
|
+
|
27
|
+
# The options that were passed into #new.
|
28
|
+
#
|
29
|
+
attr_reader :csv_options
|
30
|
+
|
31
|
+
# The data category names.
|
32
|
+
#
|
33
|
+
attr_reader :category_names
|
11
34
|
|
12
35
|
def initialize *category_names, options
|
13
36
|
require 'csv'
|
@@ -17,9 +40,9 @@ module Sources
|
|
17
40
|
@file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
|
18
41
|
end
|
19
42
|
|
43
|
+
# Raises a NoCSVFileGiven exception.
|
20
44
|
#
|
21
|
-
#
|
22
|
-
def raise_no_file_given category_names
|
45
|
+
def raise_no_file_given category_names # :nodoc:
|
23
46
|
raise NoCSVFileGiven.new(category_names.join(', '))
|
24
47
|
end
|
25
48
|
|
@@ -38,7 +61,7 @@ module Sources
|
|
38
61
|
|
39
62
|
#
|
40
63
|
#
|
41
|
-
def get_data &block
|
64
|
+
def get_data &block # :nodoc:
|
42
65
|
::CSV.foreach file_name, csv_options, &block
|
43
66
|
end
|
44
67
|
|