picky 0.12.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/deployment.rb +2 -2
- data/lib/picky/application.rb +172 -12
- data/lib/picky/cacher/generator.rb +1 -1
- data/lib/picky/calculations/location.rb +9 -1
- data/lib/picky/character_substituters/west_european.rb +1 -1
- data/lib/picky/configuration/index.rb +1 -1
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/extensions/array.rb +1 -1
- data/lib/picky/extensions/hash.rb +1 -1
- data/lib/picky/extensions/module.rb +1 -1
- data/lib/picky/extensions/object.rb +1 -1
- data/lib/picky/extensions/symbol.rb +1 -1
- data/lib/picky/generator.rb +2 -2
- data/lib/picky/helpers/cache.rb +7 -5
- data/lib/picky/helpers/gc.rb +2 -0
- data/lib/picky/helpers/measuring.rb +2 -0
- data/lib/picky/index/bundle.rb +1 -1
- data/lib/picky/index_api.rb +33 -15
- data/lib/picky/indexed/bundle.rb +1 -1
- data/lib/picky/indexed/index.rb +1 -1
- data/lib/picky/indexed/wrappers/bundle/location.rb +1 -1
- data/lib/picky/indexers/no_source_specified_error.rb +1 -1
- data/lib/picky/indexes_api.rb +1 -1
- data/lib/picky/indexing/bundle.rb +1 -1
- data/lib/picky/indexing/index.rb +1 -1
- data/lib/picky/loader.rb +1 -1
- data/lib/picky/loggers/search.rb +1 -1
- data/lib/picky/performant.rb +3 -0
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/allocations.rb +1 -1
- data/lib/picky/query/base.rb +48 -16
- data/lib/picky/query/combination.rb +1 -1
- data/lib/picky/query/combinations.rb +1 -1
- data/lib/picky/query/full.rb +7 -2
- data/lib/picky/query/live.rb +9 -7
- data/lib/picky/query/qualifiers.rb +6 -2
- data/lib/picky/query/solr.rb +1 -1
- data/lib/picky/query/token.rb +2 -1
- data/lib/picky/query/tokens.rb +4 -1
- data/lib/picky/query/weigher.rb +1 -1
- data/lib/picky/query/weights.rb +1 -1
- data/lib/picky/rack/harakiri.rb +14 -5
- data/lib/picky/results/base.rb +1 -1
- data/lib/picky/routing.rb +1 -1
- data/lib/picky/solr/schema_generator.rb +2 -1
- data/lib/picky/sources/base.rb +39 -25
- data/lib/picky/sources/couch.rb +22 -8
- data/lib/picky/sources/csv.rb +29 -6
- data/lib/picky/sources/db.rb +46 -30
- data/lib/picky/sources/delicious.rb +12 -2
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/tokenizers/base.rb +1 -1
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/README +0 -1
- data/spec/lib/calculations/location_spec.rb +28 -16
- data/spec/lib/index_api_spec.rb +64 -0
- data/spec/lib/indexed/index_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
- data/spec/lib/indexing/index_spec.rb +2 -2
- data/spec/lib/rack/harakiri_spec.rb +22 -10
- metadata +7 -4
data/lib/picky/query/base.rb
CHANGED
@@ -1,7 +1,23 @@
|
|
1
|
+
# = Picky Queries
|
2
|
+
#
|
3
|
+
# A Picky Query is an object which:
|
4
|
+
# * holds one or more indexes
|
5
|
+
# * offers an interface to query these indexes.
|
6
|
+
#
|
7
|
+
# You connect URL paths to indexes via a Query.
|
8
|
+
#
|
9
|
+
# We recommend not to use this directly, but connect it to an URL and query through one of these
|
10
|
+
# (Protip: Use "curl 'localhost:8080/query/path?query=exampletext')" in a Terminal.
|
11
|
+
#
|
12
|
+
# There are two flavors of queries:
|
13
|
+
# * Query::Full (Full results with all infos)
|
14
|
+
# * Query::Live (Same as the Full results without result ids. Useful for query result counters.)
|
15
|
+
#
|
1
16
|
module Query
|
2
|
-
|
17
|
+
|
18
|
+
# The base query class.
|
3
19
|
#
|
4
|
-
#
|
20
|
+
# Not directly instantiated. However, its methods are used by its subclasses, Full and Live.
|
5
21
|
#
|
6
22
|
class Base
|
7
23
|
|
@@ -11,11 +27,11 @@ module Query
|
|
11
27
|
attr_accessor :reduce_to_amount, :weights
|
12
28
|
|
13
29
|
# Takes:
|
14
|
-
#
|
15
|
-
#
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
30
|
+
# * A number of indexes
|
31
|
+
# * Options hash (optional) with:
|
32
|
+
# * weigher: A weigher. Query::Weigher by default.
|
33
|
+
# * tokenizer: Tokenizers::Query.default by default.
|
34
|
+
# * weights: A hash of weights, or a Query::Weights object.
|
19
35
|
#
|
20
36
|
def initialize *index_type_definitions
|
21
37
|
options = Hash === index_type_definitions.last ? index_type_definitions.pop : {}
|
@@ -27,13 +43,21 @@ module Query
|
|
27
43
|
@weights = Hash === weights ? Weights.new(weights) : weights
|
28
44
|
end
|
29
45
|
|
30
|
-
#
|
46
|
+
# Search through this method.
|
47
|
+
#
|
48
|
+
# Parameters:
|
49
|
+
# * text: The search text.
|
50
|
+
# * offset = 0: _optional_ The offset from which position to return the ids. Useful for pagination.
|
51
|
+
#
|
52
|
+
# Note: The Routing uses this method after unravelling the HTTP request.
|
31
53
|
#
|
32
54
|
def search_with_text text, offset = 0
|
33
55
|
search tokenized(text), offset
|
34
56
|
end
|
35
57
|
|
36
|
-
#
|
58
|
+
# Runs the actual search using Query::Tokens.
|
59
|
+
#
|
60
|
+
# Note: Internal method, use #search_with_text.
|
37
61
|
#
|
38
62
|
def search tokens, offset = 0
|
39
63
|
results = nil
|
@@ -46,7 +70,9 @@ module Query
|
|
46
70
|
results
|
47
71
|
end
|
48
72
|
|
49
|
-
#
|
73
|
+
# Execute a search using Query::Tokens.
|
74
|
+
#
|
75
|
+
# Note: Internal method, use #search_with_text.
|
50
76
|
#
|
51
77
|
def execute tokens, offset
|
52
78
|
results_from offset, sorted_allocations(tokens)
|
@@ -54,12 +80,18 @@ module Query
|
|
54
80
|
|
55
81
|
# Returns an empty result with default values.
|
56
82
|
#
|
83
|
+
# Parameters:
|
84
|
+
# * offset = 0: _optional_ The offset to use for the empty results.
|
85
|
+
#
|
57
86
|
def empty_results offset = 0
|
58
87
|
result_type.new offset
|
59
88
|
end
|
60
89
|
|
61
90
|
# Delegates the tokenizing to the query tokenizer.
|
62
91
|
#
|
92
|
+
# Parameters:
|
93
|
+
# * text: The text to tokenize.
|
94
|
+
#
|
63
95
|
def tokenized text
|
64
96
|
@tokenizer.tokenize text
|
65
97
|
end
|
@@ -72,7 +104,7 @@ module Query
|
|
72
104
|
#
|
73
105
|
# TODO Rename: allocations
|
74
106
|
#
|
75
|
-
def sorted_allocations tokens
|
107
|
+
def sorted_allocations tokens # :nodoc:
|
76
108
|
# Get the allocations.
|
77
109
|
#
|
78
110
|
# TODO Pass in reduce_to_amount (aka max_allocations)
|
@@ -105,18 +137,18 @@ module Query
|
|
105
137
|
#
|
106
138
|
allocations
|
107
139
|
end
|
108
|
-
def reduce allocations
|
140
|
+
def reduce allocations # :nodoc:
|
109
141
|
allocations.reduce_to reduce_to_amount if reduce_to_amount
|
110
142
|
end
|
111
|
-
def remove_identifiers?
|
143
|
+
def remove_identifiers? # :nodoc:
|
112
144
|
identifiers_to_remove.present?
|
113
145
|
end
|
114
|
-
def remove_from allocations
|
146
|
+
def remove_from allocations # :nodoc:
|
115
147
|
allocations.remove(identifiers_to_remove) if remove_identifiers?
|
116
148
|
end
|
117
149
|
# Override. TODO No, redesign.
|
118
150
|
#
|
119
|
-
def identifiers_to_remove
|
151
|
+
def identifiers_to_remove # :nodoc:
|
120
152
|
@identifiers_to_remove ||= []
|
121
153
|
end
|
122
154
|
|
@@ -126,7 +158,7 @@ module Query
|
|
126
158
|
#
|
127
159
|
# TODO Move to results. result_type.from allocations, offset
|
128
160
|
#
|
129
|
-
def results_from offset = 0, allocations = nil
|
161
|
+
def results_from offset = 0, allocations = nil # :nodoc:
|
130
162
|
results = result_type.new offset, allocations
|
131
163
|
results.prepare!
|
132
164
|
results
|
data/lib/picky/query/full.rb
CHANGED
@@ -1,10 +1,15 @@
|
|
1
1
|
module Query
|
2
2
|
|
3
|
-
# This
|
3
|
+
# This Query class performs full queries.
|
4
|
+
#
|
5
|
+
# It includes in its results:
|
6
|
+
# * A count of results.
|
7
|
+
# * All possible combinations with its weights.
|
8
|
+
# * The top X result ids.
|
4
9
|
#
|
5
10
|
class Full < Base
|
6
11
|
|
7
|
-
#
|
12
|
+
# Returns Results::Full as its result type.
|
8
13
|
#
|
9
14
|
def result_type
|
10
15
|
Results::Full
|
data/lib/picky/query/live.rb
CHANGED
@@ -1,17 +1,19 @@
|
|
1
1
|
module Query
|
2
2
|
|
3
|
-
# This
|
3
|
+
# This Query class performs live queries.
|
4
4
|
#
|
5
|
-
# It
|
6
|
-
# * Return a count of results.
|
5
|
+
# It is useful for updating counters, or any job where you don't need the result ids.
|
7
6
|
#
|
8
|
-
# It
|
9
|
-
#
|
10
|
-
#
|
7
|
+
# It includes in its results:
|
8
|
+
# * A count of results.
|
9
|
+
# * All possible combinations with its weights.
|
10
|
+
#
|
11
|
+
# But not:
|
12
|
+
# * The top X result ids.
|
11
13
|
#
|
12
14
|
class Live < Base
|
13
15
|
|
14
|
-
#
|
16
|
+
# Returns Results::Live as its result type.
|
15
17
|
#
|
16
18
|
def result_type
|
17
19
|
Results::Live
|
@@ -1,9 +1,13 @@
|
|
1
1
|
# coding: utf-8
|
2
|
+
#
|
3
|
+
|
4
|
+
#
|
5
|
+
#
|
2
6
|
module Query
|
3
7
|
|
4
8
|
# A single qualifier.
|
5
9
|
#
|
6
|
-
class Qualifier
|
10
|
+
class Qualifier # :nodoc:all
|
7
11
|
|
8
12
|
attr_reader :normalized_qualifier, :codes
|
9
13
|
|
@@ -28,7 +32,7 @@ module Query
|
|
28
32
|
|
29
33
|
# Collection class for qualifiers.
|
30
34
|
#
|
31
|
-
class Qualifiers
|
35
|
+
class Qualifiers # :nodoc:all
|
32
36
|
|
33
37
|
include Singleton
|
34
38
|
|
data/lib/picky/query/solr.rb
CHANGED
data/lib/picky/query/token.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
module Query
|
2
|
+
|
2
3
|
# This is a query token. Together with other tokens it makes up a query.
|
3
4
|
#
|
4
5
|
# It remembers the original form, and and a normalized form.
|
@@ -7,7 +8,7 @@ module Query
|
|
7
8
|
#
|
8
9
|
# TODO Make partial / similarity char configurable.
|
9
10
|
#
|
10
|
-
class Token
|
11
|
+
class Token # :nodoc:all
|
11
12
|
|
12
13
|
attr_reader :text, :original
|
13
14
|
attr_writer :similar
|
data/lib/picky/query/tokens.rb
CHANGED
data/lib/picky/query/weigher.rb
CHANGED
data/lib/picky/query/weights.rb
CHANGED
data/lib/picky/rack/harakiri.rb
CHANGED
@@ -1,4 +1,4 @@
|
|
1
|
-
module Rack
|
1
|
+
module Rack # :nodoc:
|
2
2
|
|
3
3
|
# Simple Rack Middleware to kill Unicorns after X requests.
|
4
4
|
#
|
@@ -16,7 +16,6 @@ module Rack
|
|
16
16
|
# Set the amount of requests before the Unicorn commits Harakiri.
|
17
17
|
#
|
18
18
|
cattr_accessor :after
|
19
|
-
attr_reader :quit_after_requests
|
20
19
|
|
21
20
|
def initialize app
|
22
21
|
@app = app
|
@@ -25,8 +24,10 @@ module Rack
|
|
25
24
|
@quit_after_requests = self.class.after || 50
|
26
25
|
end
|
27
26
|
|
28
|
-
#
|
29
|
-
#
|
27
|
+
# #call interface method.
|
28
|
+
#
|
29
|
+
# Harakiri is a middleware, so it delegates the the app or
|
30
|
+
# the next middleware after checking if it is time to honorably retire.
|
30
31
|
#
|
31
32
|
def call env
|
32
33
|
harakiri
|
@@ -37,9 +38,17 @@ module Rack
|
|
37
38
|
#
|
38
39
|
# If yes, kills itself (Unicorn will answer the request, honorably).
|
39
40
|
#
|
41
|
+
# Note: Sends its process a QUIT signal if it is time.
|
42
|
+
#
|
40
43
|
def harakiri
|
41
44
|
@requests = @requests + 1
|
42
|
-
Process.kill(:QUIT, Process.pid) if
|
45
|
+
Process.kill(:QUIT, Process.pid) if harakiri?
|
46
|
+
end
|
47
|
+
|
48
|
+
# Is it time to honorably retire?
|
49
|
+
#
|
50
|
+
def harakiri?
|
51
|
+
@requests >= @quit_after_requests
|
43
52
|
end
|
44
53
|
|
45
54
|
end
|
data/lib/picky/results/base.rb
CHANGED
data/lib/picky/routing.rb
CHANGED
data/lib/picky/sources/base.rb
CHANGED
@@ -1,50 +1,64 @@
|
|
1
|
+
# = Data Sources
|
2
|
+
#
|
3
|
+
# Currently, Picky offers the following Sources:
|
4
|
+
# * CSV (comma – or other – separated file)
|
5
|
+
# * Couch (CouchDB, key-value store)
|
6
|
+
# * DB (Databases, foremost MySQL)
|
7
|
+
# * Delicious (http://del.icio.us, online bookmarking service)
|
8
|
+
# See also:
|
9
|
+
# http://github.com/floere/picky/wiki/Sources-Configuration
|
10
|
+
#
|
11
|
+
# Don't worry if your source isn't here. Adding your own is easy:
|
12
|
+
# http://github.com/floere/picky/wiki/Contributing-sources
|
13
|
+
#
|
1
14
|
module Sources
|
2
15
|
|
3
16
|
# Sources are where your data comes from.
|
4
17
|
#
|
5
|
-
#
|
6
|
-
# *
|
7
|
-
#
|
18
|
+
# A source has 1 mandatory and 2 optional methods:
|
19
|
+
# * connect_backend (_optional_): called once for each type/category pair.
|
20
|
+
# * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
|
21
|
+
# * take_snapshot (_optional_): called once for each type.
|
22
|
+
#
|
23
|
+
# This base class "implements" all these methods, but they don't do anything.
|
24
|
+
# Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
|
8
25
|
#
|
9
|
-
# * connect_backend: Optional, called once for each type/category pair.
|
10
|
-
# * take_snapshot: Optional, called once for each type.
|
11
26
|
class Base
|
12
27
|
|
13
|
-
# Note: Default methods do nothing.
|
14
|
-
#
|
15
|
-
|
16
28
|
# Connect to the backend.
|
17
29
|
#
|
18
|
-
#
|
19
|
-
# before harvesting.
|
30
|
+
# Called once per index/category combination before harvesting.
|
20
31
|
#
|
21
|
-
#
|
32
|
+
# Examples:
|
33
|
+
# * The DB backend connects the DB adapter.
|
34
|
+
# * We open a connection to a key value store.
|
35
|
+
# * We open an file with data.
|
22
36
|
#
|
23
37
|
def connect_backend
|
24
38
|
|
25
39
|
end
|
26
40
|
|
27
|
-
#
|
28
|
-
# e.g. in a database, a table based on the source's select
|
29
|
-
# statement is created.
|
41
|
+
# Called by the indexer when gathering data.
|
30
42
|
#
|
31
|
-
#
|
43
|
+
# Yields the data (id, text for id) for the given type and category.
|
32
44
|
#
|
33
|
-
|
34
|
-
|
45
|
+
# When implementing or overriding your own,
|
46
|
+
# be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
|
47
|
+
# for the given type symbol and category symbol.
|
48
|
+
#
|
49
|
+
def harvest index, category # :yields: id, text_for_id
|
50
|
+
# This concrete implementation yields "nothing", override in subclasses.
|
35
51
|
end
|
36
52
|
|
37
|
-
#
|
53
|
+
# Used to take a snapshot of your data if it is fast changing.
|
38
54
|
#
|
39
|
-
#
|
55
|
+
# Called once for each type before harvesting.
|
40
56
|
#
|
41
|
-
#
|
42
|
-
#
|
43
|
-
# and a corresponding text for the given type symbol and
|
44
|
-
# category symbol.
|
57
|
+
# Example:
|
58
|
+
# * In a DB source, a table based on the source's select statement is created.
|
45
59
|
#
|
46
|
-
def
|
47
|
-
|
60
|
+
def take_snapshot index
|
61
|
+
|
48
62
|
end
|
49
63
|
|
50
64
|
end
|
data/lib/picky/sources/couch.rb
CHANGED
@@ -1,11 +1,23 @@
|
|
1
1
|
module Sources
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
3
|
+
# Raised when a Couch source is instantiated without a file.
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
# Sources::Couch.new(:column1, :column2) # without file option
|
5
7
|
#
|
6
|
-
|
7
8
|
class NoCouchDBGiven < StandardError; end
|
8
|
-
|
9
|
+
|
10
|
+
# A Couch database source.
|
11
|
+
#
|
12
|
+
# Options:
|
13
|
+
# * url
|
14
|
+
# and all the options of a <tt>RestClient::Resource</tt>.
|
15
|
+
# See http://github.com/archiloque/rest-client.
|
16
|
+
#
|
17
|
+
# Examples:
|
18
|
+
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
|
19
|
+
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
|
20
|
+
#
|
9
21
|
class Couch < Base
|
10
22
|
|
11
23
|
def initialize *category_names, options
|
@@ -14,7 +26,9 @@ module Sources
|
|
14
26
|
@db = RestClient::Resource.new options.delete(:url), options
|
15
27
|
end
|
16
28
|
|
17
|
-
|
29
|
+
# Tries to require the rest_client gem.
|
30
|
+
#
|
31
|
+
def check_gem # :nodoc:
|
18
32
|
require 'rest_client'
|
19
33
|
rescue LoadError
|
20
34
|
puts "Rest-client gem missing!\nTo use the CouchDB source, you need to:\n 1. Add the following line to Gemfile:\n gem 'rest-client'\n 2. Then, run:\n bundle update\n"
|
@@ -29,15 +43,15 @@ module Sources
|
|
29
43
|
yield doc['_id'].to_i, doc[category_name] || next
|
30
44
|
end
|
31
45
|
end
|
32
|
-
|
33
|
-
def get_data &block
|
46
|
+
|
47
|
+
def get_data &block # :nodoc:
|
34
48
|
resp = @db['_all_docs?include_docs=true'].get
|
35
49
|
JSON.parse(resp)['rows'].
|
36
50
|
map{|row| row['doc']}.
|
37
51
|
each &block
|
38
52
|
end
|
39
53
|
|
40
|
-
def raise_no_db_given category_names
|
54
|
+
def raise_no_db_given category_names # :nodoc:
|
41
55
|
raise NoCouchDBGiven.new(category_names.join(', '))
|
42
56
|
end
|
43
57
|
end
|
data/lib/picky/sources/csv.rb
CHANGED
@@ -1,13 +1,36 @@
|
|
1
1
|
module Sources
|
2
2
|
|
3
|
-
#
|
4
|
-
#
|
3
|
+
# Raised when a CSV source is instantiated without a file.
|
4
|
+
#
|
5
|
+
# Example:
|
6
|
+
# Sources::CSV.new(:column1, :column2) # without file option
|
5
7
|
#
|
6
8
|
class NoCSVFileGiven < StandardError; end
|
7
9
|
|
10
|
+
# Describes a CSV source, a file with comma separated values in it.
|
11
|
+
#
|
12
|
+
# The first column is implicitly assumed to be the id column.
|
13
|
+
#
|
14
|
+
# It takes the same options as the Ruby 1.9 CSV class.
|
15
|
+
#
|
16
|
+
# Examples:
|
17
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
|
18
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
|
19
|
+
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
|
20
|
+
#
|
8
21
|
class CSV < Base
|
9
22
|
|
10
|
-
|
23
|
+
# The CSV file's path, relative to PICKY_ROOT.
|
24
|
+
#
|
25
|
+
attr_reader :file_name
|
26
|
+
|
27
|
+
# The options that were passed into #new.
|
28
|
+
#
|
29
|
+
attr_reader :csv_options
|
30
|
+
|
31
|
+
# The data category names.
|
32
|
+
#
|
33
|
+
attr_reader :category_names
|
11
34
|
|
12
35
|
def initialize *category_names, options
|
13
36
|
require 'csv'
|
@@ -17,9 +40,9 @@ module Sources
|
|
17
40
|
@file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
|
18
41
|
end
|
19
42
|
|
43
|
+
# Raises a NoCSVFileGiven exception.
|
20
44
|
#
|
21
|
-
#
|
22
|
-
def raise_no_file_given category_names
|
45
|
+
def raise_no_file_given category_names # :nodoc:
|
23
46
|
raise NoCSVFileGiven.new(category_names.join(', '))
|
24
47
|
end
|
25
48
|
|
@@ -38,7 +61,7 @@ module Sources
|
|
38
61
|
|
39
62
|
#
|
40
63
|
#
|
41
|
-
def get_data &block
|
64
|
+
def get_data &block # :nodoc:
|
42
65
|
::CSV.foreach file_name, csv_options, &block
|
43
66
|
end
|
44
67
|
|