picky 0.12.3 → 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/deployment.rb +2 -2
- data/lib/picky/application.rb +172 -12
- data/lib/picky/cacher/generator.rb +1 -1
- data/lib/picky/calculations/location.rb +9 -1
- data/lib/picky/character_substituters/west_european.rb +1 -1
- data/lib/picky/configuration/index.rb +1 -1
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/extensions/array.rb +1 -1
- data/lib/picky/extensions/hash.rb +1 -1
- data/lib/picky/extensions/module.rb +1 -1
- data/lib/picky/extensions/object.rb +1 -1
- data/lib/picky/extensions/symbol.rb +1 -1
- data/lib/picky/generator.rb +2 -2
- data/lib/picky/helpers/cache.rb +7 -5
- data/lib/picky/helpers/gc.rb +2 -0
- data/lib/picky/helpers/measuring.rb +2 -0
- data/lib/picky/index/bundle.rb +1 -1
- data/lib/picky/index_api.rb +33 -15
- data/lib/picky/indexed/bundle.rb +1 -1
- data/lib/picky/indexed/index.rb +1 -1
- data/lib/picky/indexed/wrappers/bundle/location.rb +1 -1
- data/lib/picky/indexers/no_source_specified_error.rb +1 -1
- data/lib/picky/indexes_api.rb +1 -1
- data/lib/picky/indexing/bundle.rb +1 -1
- data/lib/picky/indexing/index.rb +1 -1
- data/lib/picky/loader.rb +1 -1
- data/lib/picky/loggers/search.rb +1 -1
- data/lib/picky/performant.rb +3 -0
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/allocations.rb +1 -1
- data/lib/picky/query/base.rb +48 -16
- data/lib/picky/query/combination.rb +1 -1
- data/lib/picky/query/combinations.rb +1 -1
- data/lib/picky/query/full.rb +7 -2
- data/lib/picky/query/live.rb +9 -7
- data/lib/picky/query/qualifiers.rb +6 -2
- data/lib/picky/query/solr.rb +1 -1
- data/lib/picky/query/token.rb +2 -1
- data/lib/picky/query/tokens.rb +4 -1
- data/lib/picky/query/weigher.rb +1 -1
- data/lib/picky/query/weights.rb +1 -1
- data/lib/picky/rack/harakiri.rb +14 -5
- data/lib/picky/results/base.rb +1 -1
- data/lib/picky/routing.rb +1 -1
- data/lib/picky/solr/schema_generator.rb +2 -1
- data/lib/picky/sources/base.rb +39 -25
- data/lib/picky/sources/couch.rb +22 -8
- data/lib/picky/sources/csv.rb +29 -6
- data/lib/picky/sources/db.rb +46 -30
- data/lib/picky/sources/delicious.rb +12 -2
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/tokenizers/base.rb +1 -1
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/README +0 -1
- data/spec/lib/calculations/location_spec.rb +28 -16
- data/spec/lib/index_api_spec.rb +64 -0
- data/spec/lib/indexed/index_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
- data/spec/lib/indexing/index_spec.rb +2 -2
- data/spec/lib/rack/harakiri_spec.rb +22 -10
- metadata +7 -4
data/lib/deployment.rb
CHANGED
data/lib/picky/application.rb
CHANGED
@@ -1,4 +1,152 @@
|
|
1
|
-
#
|
1
|
+
# = Picky Applications
|
2
|
+
#
|
3
|
+
# A Picky Application is where you configure the whole search engine.
|
4
|
+
#
|
5
|
+
# This is a step-by-step description on how to configure your Picky app.
|
6
|
+
#
|
7
|
+
# Start by subclassing Application:
|
8
|
+
# class MyGreatSearch < Application
|
9
|
+
# # Your configuration goes here.
|
10
|
+
# end
|
11
|
+
# The generator
|
12
|
+
# $ picky project project_name
|
13
|
+
# will generate an example <tt>project_name/app/application.rb</tt> file for you
|
14
|
+
# with some example code inside.
|
15
|
+
#
|
16
|
+
# == index(name, source)
|
17
|
+
#
|
18
|
+
# Next, define where your data comes from. You use the <tt>index</tt> method for that:
|
19
|
+
# my_index = index :some_index_name, some_source
|
20
|
+
# You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
|
21
|
+
# class MyGreatSearch < Application
|
22
|
+
#
|
23
|
+
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
24
|
+
#
|
25
|
+
# end
|
26
|
+
# Now we have an index <tt>books</tt>.
|
27
|
+
#
|
28
|
+
# That on itself won't do much good.
|
29
|
+
#
|
30
|
+
# == index.define_category(identifier, options = {})
|
31
|
+
#
|
32
|
+
# Picky needs us to define categories on the data.
|
33
|
+
#
|
34
|
+
# Categories help your user find data.
|
35
|
+
# It's best if you look at an example yourself: http://floere.github.com/picky/examples.html
|
36
|
+
#
|
37
|
+
# Let's go ahead and define a category:
|
38
|
+
# class MyGreatSearch < Application
|
39
|
+
#
|
40
|
+
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
41
|
+
# books.define_category :title
|
42
|
+
#
|
43
|
+
# end
|
44
|
+
# Now we could already run the indexer:
|
45
|
+
# $ rake index
|
46
|
+
#
|
47
|
+
# (You can define similarity or partial search capabilities on a category, see http://github.com/floere/picky/wiki/Categories-configuration for info)
|
48
|
+
#
|
49
|
+
# So now we have indexed data (the title), but nobody to ask the index anything.
|
50
|
+
#
|
51
|
+
# == Query::Full.new(*indexes, options = {})
|
52
|
+
#
|
53
|
+
# We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration). That works like this:
|
54
|
+
# full_books_query = Query::Full.new books
|
55
|
+
# Full just means that the ids are returned with the results.
|
56
|
+
# Picky also offers a Query that returns live results, Query::Live. But that's not important right now.
|
57
|
+
#
|
58
|
+
# Now we have somebody we can ask about the index. But no external interface.
|
59
|
+
#
|
60
|
+
# == route(/regexp1/ => query1, /regexp2/ => query2, ...)
|
61
|
+
#
|
62
|
+
# Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
|
63
|
+
# route %r{^/books/full$} => full_books_query
|
64
|
+
# In full glory:
|
65
|
+
# class MyGreatSearch < Application
|
66
|
+
#
|
67
|
+
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
68
|
+
# books.define_category :title
|
69
|
+
#
|
70
|
+
# full_books_query = Query::Full.new books
|
71
|
+
#
|
72
|
+
# route %r{^/books/full$} => full_books_query
|
73
|
+
#
|
74
|
+
# end
|
75
|
+
# That's it!
|
76
|
+
#
|
77
|
+
# Now run the indexer and server:
|
78
|
+
# $ rake index
|
79
|
+
# $ rake start
|
80
|
+
# Run your first query:
|
81
|
+
# $ curl 'localhost:8080/books/full?query=hello server'
|
82
|
+
#
|
83
|
+
# Nice, right? Your first query!
|
84
|
+
#
|
85
|
+
# Maybe you don't find everything. We need to process the data before it goes into the index.
|
86
|
+
#
|
87
|
+
# == default_indexing(options = {})
|
88
|
+
#
|
89
|
+
# That's what the <tt>default_indexing</tt> method is for:
|
90
|
+
# default_indexing options
|
91
|
+
# Read more about the options here: http://github.com/floere/picky/wiki/Indexing-configuration
|
92
|
+
#
|
93
|
+
# Same thing with the search text – we need to process that as well.
|
94
|
+
#
|
95
|
+
# == default_querying(options = {})
|
96
|
+
#
|
97
|
+
# Analog to the default_indexing method, we use the <tt>default_querying</tt> method.
|
98
|
+
# default_querying options
|
99
|
+
# Read more about the options here: http://github.com/floere/picky/wiki/Querying-Configuration
|
100
|
+
#
|
101
|
+
# And that's all there is. It's incredibly powerful though, as you can combine, weigh, refine to the max.
|
102
|
+
#
|
103
|
+
# == Wiki
|
104
|
+
#
|
105
|
+
# Read more in the Wiki: http://github.com/floere/picky/wiki
|
106
|
+
#
|
107
|
+
# Have fun!
|
108
|
+
#
|
109
|
+
# == Full example
|
110
|
+
#
|
111
|
+
# Our example, fully fleshed out with indexing, querying, and weights:
|
112
|
+
# class MyGreatSearch < Application
|
113
|
+
#
|
114
|
+
# default_indexing removes_characters: /[^a-zA-Z0-9\.]/,
|
115
|
+
# stopwords: /\b(and|or|in|on|is|has)\b/,
|
116
|
+
# splits_text_on: /\s/,
|
117
|
+
# removes_characters_after_splitting: /\./,
|
118
|
+
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
119
|
+
# normalizes_words: [
|
120
|
+
# [/(.*)hausen/, 'hn'],
|
121
|
+
# [/\b(\w*)str(eet)?/, 'st']
|
122
|
+
# ]
|
123
|
+
#
|
124
|
+
# default_querying removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/,
|
125
|
+
# stopwords: /\b(and|the|of|it|in|for)\b/,
|
126
|
+
# splits_text_on: /[\s\/\-\,\&]+/,
|
127
|
+
# removes_characters_after_splitting: /\./,
|
128
|
+
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
129
|
+
# maximum_tokens: 4
|
130
|
+
#
|
131
|
+
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
132
|
+
# books.define_category :title,
|
133
|
+
# qualifiers: [:t, :title, :titre],
|
134
|
+
# partial: Partial::Substring.new(:from => 1),
|
135
|
+
# similarity: Similarity::Phonetic.new(2)
|
136
|
+
# books.define_category :author,
|
137
|
+
# partial: Partial::Substring.new(:from => -2)
|
138
|
+
# books.define_category :isbn
|
139
|
+
#
|
140
|
+
# query_options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
|
141
|
+
#
|
142
|
+
# full_books_query = Query::Full.new books, query_options
|
143
|
+
# live_books_query = Query::Full.new books, query_options
|
144
|
+
#
|
145
|
+
# route %r{^/books/full$} => full_books_query
|
146
|
+
# route %r{^/books/live$} => live_books_query
|
147
|
+
#
|
148
|
+
# end
|
149
|
+
# That's actually already a full-blown Picky App!
|
2
150
|
#
|
3
151
|
class Application
|
4
152
|
|
@@ -21,10 +169,20 @@ class Application
|
|
21
169
|
Tokenizers::Query.default = Tokenizers::Query.new(options)
|
22
170
|
end
|
23
171
|
|
24
|
-
#
|
172
|
+
# Create a new index for indexing and for querying.
|
173
|
+
#
|
174
|
+
# Parameters:
|
175
|
+
# * name: The identifier of the index. Used:
|
176
|
+
# - to identify an index (e.g. by you in Rake tasks).
|
177
|
+
# - in the frontend to describe which index a result came from.
|
178
|
+
# - index directory naming (index/development/the_identifier/<lots of indexes>)
|
179
|
+
# * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
|
25
180
|
#
|
26
|
-
|
27
|
-
|
181
|
+
# Options:
|
182
|
+
# * result_type: # TODO Rename.
|
183
|
+
#
|
184
|
+
def index name, source, options = {}
|
185
|
+
IndexAPI.new name, source, options
|
28
186
|
end
|
29
187
|
|
30
188
|
# Routes.
|
@@ -35,39 +193,41 @@ class Application
|
|
35
193
|
# API
|
36
194
|
|
37
195
|
|
38
|
-
#
|
196
|
+
# A Picky application implements the Rack interface.
|
197
|
+
#
|
198
|
+
# Delegates to its routing to handle a request.
|
39
199
|
#
|
40
200
|
def call env
|
41
201
|
routing.call env
|
42
202
|
end
|
43
|
-
def routing
|
203
|
+
def routing # :nodoc:
|
44
204
|
@routing ||= Routing.new
|
45
205
|
end
|
46
206
|
|
47
207
|
# Finalize the subclass as soon as it
|
48
208
|
# has finished loading.
|
49
209
|
#
|
50
|
-
attr_reader :apps
|
51
|
-
def initialize_apps
|
210
|
+
attr_reader :apps # :nodoc:
|
211
|
+
def initialize_apps # :nodoc:
|
52
212
|
@apps ||= []
|
53
213
|
end
|
54
|
-
def inherited app
|
214
|
+
def inherited app # :nodoc:
|
55
215
|
initialize_apps
|
56
216
|
apps << app
|
57
217
|
end
|
58
|
-
def finalize_apps
|
218
|
+
def finalize_apps # :nodoc:
|
59
219
|
initialize_apps
|
60
220
|
apps.each &:finalize
|
61
221
|
end
|
62
222
|
# Finalizes the routes.
|
63
223
|
#
|
64
|
-
def finalize
|
224
|
+
def finalize # :nodoc:
|
65
225
|
routing.freeze
|
66
226
|
end
|
67
227
|
|
68
228
|
# TODO Add more info if possible.
|
69
229
|
#
|
70
|
-
def to_s
|
230
|
+
def to_s # :nodoc:
|
71
231
|
"#{self.name}:\n#{routing}"
|
72
232
|
end
|
73
233
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
module Calculations
|
1
|
+
module Calculations # :nodoc:all
|
2
2
|
|
3
3
|
# A location calculation recalculates a 1-d location
|
4
4
|
# to the Picky internal 1-d "grid".
|
@@ -18,7 +18,15 @@ module Calculations
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def minimum= minimum
|
21
|
+
# Add a margin of 1 user grid.
|
22
|
+
#
|
21
23
|
minimum -= @user_grid
|
24
|
+
|
25
|
+
# Add plus 1 grid so that the index key never falls on 0.
|
26
|
+
# Why? to_i maps by default to 0.
|
27
|
+
#
|
28
|
+
minimum -= @grid
|
29
|
+
|
22
30
|
@minimum = minimum
|
23
31
|
end
|
24
32
|
|
data/lib/picky/cores.rb
CHANGED
data/lib/picky/generator.rb
CHANGED
@@ -8,7 +8,7 @@ module Picky
|
|
8
8
|
# picky <command> <options>
|
9
9
|
# is found.
|
10
10
|
#
|
11
|
-
class NoGeneratorError < StandardError
|
11
|
+
class NoGeneratorError < StandardError # :nodoc:all
|
12
12
|
|
13
13
|
def initialize generator
|
14
14
|
super usage + possible_commands(generator.types)
|
@@ -38,7 +38,7 @@ module Picky
|
|
38
38
|
#
|
39
39
|
# Basically copies a prototype project into a newly generated directory.
|
40
40
|
#
|
41
|
-
class Generator
|
41
|
+
class Generator # :nodoc:all
|
42
42
|
|
43
43
|
attr_reader :types
|
44
44
|
|
data/lib/picky/helpers/cache.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
+
# TODO Not used anymore? Remove.
|
1
2
|
#
|
2
|
-
#
|
3
|
-
|
3
|
+
module Helpers # :nodoc:all
|
4
|
+
|
4
5
|
module Cache
|
5
6
|
# This is a simple cache.
|
6
7
|
# The store needs to be able to answer to [] and []=.
|
@@ -10,14 +11,15 @@ module Helpers
|
|
10
11
|
#
|
11
12
|
results = store[key]
|
12
13
|
return results if results
|
13
|
-
|
14
|
+
|
14
15
|
results = lambda(&block).call
|
15
|
-
|
16
|
+
|
16
17
|
# Store results
|
17
18
|
#
|
18
19
|
store[key] = results
|
19
|
-
|
20
|
+
|
20
21
|
results
|
21
22
|
end
|
22
23
|
end
|
24
|
+
|
23
25
|
end
|
data/lib/picky/helpers/gc.rb
CHANGED
data/lib/picky/index/bundle.rb
CHANGED
data/lib/picky/index_api.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
-
# This class defines the indexing and index API.
|
1
|
+
# This class defines the indexing and index API that is exposed to the user.
|
2
|
+
# It provides a single front for both indexing and index options.
|
2
3
|
#
|
3
4
|
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
|
4
5
|
#
|
5
|
-
class IndexAPI
|
6
|
-
|
7
|
-
# TODO Delegation.
|
8
|
-
#
|
6
|
+
class IndexAPI # :nodoc:all
|
9
7
|
|
10
8
|
attr_reader :name, :indexing, :indexed
|
11
9
|
|
10
|
+
# TODO Doc.
|
11
|
+
#
|
12
12
|
def initialize name, source, options = {}
|
13
13
|
@name = name
|
14
14
|
@indexing = Indexing::Index.new name, source, options
|
@@ -19,15 +19,13 @@ class IndexAPI
|
|
19
19
|
Indexes.register self
|
20
20
|
end
|
21
21
|
|
22
|
-
#
|
23
|
-
#
|
24
|
-
# TODO Spec! Doc!
|
22
|
+
# TODO Doc.
|
25
23
|
#
|
26
24
|
def define_category category_name, options = {}
|
27
25
|
category_name = category_name.to_sym
|
28
26
|
|
29
|
-
indexing_category = indexing.
|
30
|
-
indexed_category = indexed.
|
27
|
+
indexing_category = indexing.define_category category_name, options
|
28
|
+
indexed_category = indexed.define_category category_name, options
|
31
29
|
|
32
30
|
yield indexing_category, indexed_category if block_given?
|
33
31
|
|
@@ -35,22 +33,42 @@ class IndexAPI
|
|
35
33
|
end
|
36
34
|
alias category define_category
|
37
35
|
|
38
|
-
#
|
36
|
+
#
|
39
37
|
#
|
40
38
|
def define_location name, options = {}
|
41
|
-
grid = options[:
|
39
|
+
grid = options[:radius] || raise("Option :radius needs to be set on define_location, it defines the search radius.")
|
42
40
|
precision = options[:precision]
|
43
41
|
|
42
|
+
options = { partial: Partial::None.new }.merge options
|
43
|
+
|
44
44
|
define_category name, options do |indexing, indexed|
|
45
45
|
indexing.source = Sources::Wrappers::Location.new indexing, grid: grid, precision: precision
|
46
46
|
indexing.tokenizer = Tokenizers::Index.new
|
47
|
-
# indexing.partial = Partial::None.new
|
48
47
|
|
49
|
-
exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid
|
48
|
+
exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid, precision: precision
|
50
49
|
indexed.exact = exact_bundle
|
51
|
-
indexed.partial = exact_bundle
|
50
|
+
indexed.partial = exact_bundle # A partial token also uses the exact index.
|
52
51
|
end
|
53
52
|
end
|
54
53
|
alias location define_location
|
55
54
|
|
55
|
+
# Options
|
56
|
+
# * radius (in km).
|
57
|
+
#
|
58
|
+
def define_map_location name, options = {}
|
59
|
+
radius = options[:radius] || raise("Option :radius needs to be set on define_map_location, it defines the search radius.")
|
60
|
+
|
61
|
+
# The radius is given as if all the locations were on the equator.
|
62
|
+
#
|
63
|
+
# TODO Need to recalculate since not many locations are on the equator ;) This is just a prototype.
|
64
|
+
#
|
65
|
+
# This calculates km -> longitude (degrees).
|
66
|
+
#
|
67
|
+
# A degree on the equator is equal to ~111,319.9 meters.
|
68
|
+
# So a km on the equator is equal to 0.00898312 degrees.
|
69
|
+
#
|
70
|
+
options[:radius] = radius * 0.00898312
|
71
|
+
|
72
|
+
define_location name, options
|
73
|
+
end
|
56
74
|
end
|
data/lib/picky/indexed/bundle.rb
CHANGED
data/lib/picky/indexed/index.rb
CHANGED
@@ -28,7 +28,7 @@ module Indexed
|
|
28
28
|
# Load first the bundle, then extract the config.
|
29
29
|
#
|
30
30
|
bundle.load
|
31
|
-
minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing.")
|
31
|
+
minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing. Did you run rake index already?")
|
32
32
|
@calculation.minimum = minimum
|
33
33
|
end
|
34
34
|
|
data/lib/picky/indexes_api.rb
CHANGED
data/lib/picky/indexing/index.rb
CHANGED
data/lib/picky/loader.rb
CHANGED
data/lib/picky/loggers/search.rb
CHANGED