picky 0.12.3 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/deployment.rb +2 -2
- data/lib/picky/application.rb +172 -12
- data/lib/picky/cacher/generator.rb +1 -1
- data/lib/picky/calculations/location.rb +9 -1
- data/lib/picky/character_substituters/west_european.rb +1 -1
- data/lib/picky/configuration/index.rb +1 -1
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/extensions/array.rb +1 -1
- data/lib/picky/extensions/hash.rb +1 -1
- data/lib/picky/extensions/module.rb +1 -1
- data/lib/picky/extensions/object.rb +1 -1
- data/lib/picky/extensions/symbol.rb +1 -1
- data/lib/picky/generator.rb +2 -2
- data/lib/picky/helpers/cache.rb +7 -5
- data/lib/picky/helpers/gc.rb +2 -0
- data/lib/picky/helpers/measuring.rb +2 -0
- data/lib/picky/index/bundle.rb +1 -1
- data/lib/picky/index_api.rb +33 -15
- data/lib/picky/indexed/bundle.rb +1 -1
- data/lib/picky/indexed/index.rb +1 -1
- data/lib/picky/indexed/wrappers/bundle/location.rb +1 -1
- data/lib/picky/indexers/no_source_specified_error.rb +1 -1
- data/lib/picky/indexes_api.rb +1 -1
- data/lib/picky/indexing/bundle.rb +1 -1
- data/lib/picky/indexing/index.rb +1 -1
- data/lib/picky/loader.rb +1 -1
- data/lib/picky/loggers/search.rb +1 -1
- data/lib/picky/performant.rb +3 -0
- data/lib/picky/query/allocation.rb +1 -1
- data/lib/picky/query/allocations.rb +1 -1
- data/lib/picky/query/base.rb +48 -16
- data/lib/picky/query/combination.rb +1 -1
- data/lib/picky/query/combinations.rb +1 -1
- data/lib/picky/query/full.rb +7 -2
- data/lib/picky/query/live.rb +9 -7
- data/lib/picky/query/qualifiers.rb +6 -2
- data/lib/picky/query/solr.rb +1 -1
- data/lib/picky/query/token.rb +2 -1
- data/lib/picky/query/tokens.rb +4 -1
- data/lib/picky/query/weigher.rb +1 -1
- data/lib/picky/query/weights.rb +1 -1
- data/lib/picky/rack/harakiri.rb +14 -5
- data/lib/picky/results/base.rb +1 -1
- data/lib/picky/routing.rb +1 -1
- data/lib/picky/solr/schema_generator.rb +2 -1
- data/lib/picky/sources/base.rb +39 -25
- data/lib/picky/sources/couch.rb +22 -8
- data/lib/picky/sources/csv.rb +29 -6
- data/lib/picky/sources/db.rb +46 -30
- data/lib/picky/sources/delicious.rb +12 -2
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/tokenizers/base.rb +1 -1
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/README +0 -1
- data/spec/lib/calculations/location_spec.rb +28 -16
- data/spec/lib/index_api_spec.rb +64 -0
- data/spec/lib/indexed/index_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +2 -2
- data/spec/lib/indexing/index_spec.rb +2 -2
- data/spec/lib/rack/harakiri_spec.rb +22 -10
- metadata +7 -4
data/lib/deployment.rb
CHANGED
data/lib/picky/application.rb
CHANGED
@@ -1,4 +1,152 @@
|
|
1
|
-
#
|
1
|
+
# = Picky Applications
|
2
|
+
#
|
3
|
+
# A Picky Application is where you configure the whole search engine.
|
4
|
+
#
|
5
|
+
# This is a step-by-step description on how to configure your Picky app.
|
6
|
+
#
|
7
|
+
# Start by subclassing Application:
|
8
|
+
# class MyGreatSearch < Application
|
9
|
+
# # Your configuration goes here.
|
10
|
+
# end
|
11
|
+
# The generator
|
12
|
+
# $ picky project project_name
|
13
|
+
# will generate an example <tt>project_name/app/application.rb</tt> file for you
|
14
|
+
# with some example code inside.
|
15
|
+
#
|
16
|
+
# == index(name, source)
|
17
|
+
#
|
18
|
+
# Next, define where your data comes from. You use the <tt>index</tt> method for that:
|
19
|
+
# my_index = index :some_index_name, some_source
|
20
|
+
# You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
|
21
|
+
# class MyGreatSearch < Application
|
22
|
+
#
|
23
|
+
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
24
|
+
#
|
25
|
+
# end
|
26
|
+
# Now we have an index <tt>books</tt>.
|
27
|
+
#
|
28
|
+
# That on itself won't do much good.
|
29
|
+
#
|
30
|
+
# == index.define_category(identifier, options = {})
|
31
|
+
#
|
32
|
+
# Picky needs us to define categories on the data.
|
33
|
+
#
|
34
|
+
# Categories help your user find data.
|
35
|
+
# It's best if you look at an example yourself: http://floere.github.com/picky/examples.html
|
36
|
+
#
|
37
|
+
# Let's go ahead and define a category:
|
38
|
+
# class MyGreatSearch < Application
|
39
|
+
#
|
40
|
+
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
41
|
+
# books.define_category :title
|
42
|
+
#
|
43
|
+
# end
|
44
|
+
# Now we could already run the indexer:
|
45
|
+
# $ rake index
|
46
|
+
#
|
47
|
+
# (You can define similarity or partial search capabilities on a category, see http://github.com/floere/picky/wiki/Categories-configuration for info)
|
48
|
+
#
|
49
|
+
# So now we have indexed data (the title), but nobody to ask the index anything.
|
50
|
+
#
|
51
|
+
# == Query::Full.new(*indexes, options = {})
|
52
|
+
#
|
53
|
+
# We need somebody who asks the index (a Query object, also see http://github.com/floere/picky/wiki/Queries-Configuration). That works like this:
|
54
|
+
# full_books_query = Query::Full.new books
|
55
|
+
# Full just means that the ids are returned with the results.
|
56
|
+
# Picky also offers a Query that returns live results, Query::Live. But that's not important right now.
|
57
|
+
#
|
58
|
+
# Now we have somebody we can ask about the index. But no external interface.
|
59
|
+
#
|
60
|
+
# == route(/regexp1/ => query1, /regexp2/ => query2, ...)
|
61
|
+
#
|
62
|
+
# Let's add a URL path (a Route, see http://github.com/floere/picky/wiki/Routing-configuration) to which we can send our queries. We do that with the route method:
|
63
|
+
# route %r{^/books/full$} => full_books_query
|
64
|
+
# In full glory:
|
65
|
+
# class MyGreatSearch < Application
|
66
|
+
#
|
67
|
+
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
68
|
+
# books.define_category :title
|
69
|
+
#
|
70
|
+
# full_books_query = Query::Full.new books
|
71
|
+
#
|
72
|
+
# route %r{^/books/full$} => full_books_query
|
73
|
+
#
|
74
|
+
# end
|
75
|
+
# That's it!
|
76
|
+
#
|
77
|
+
# Now run the indexer and server:
|
78
|
+
# $ rake index
|
79
|
+
# $ rake start
|
80
|
+
# Run your first query:
|
81
|
+
# $ curl 'localhost:8080/books/full?query=hello server'
|
82
|
+
#
|
83
|
+
# Nice, right? Your first query!
|
84
|
+
#
|
85
|
+
# Maybe you don't find everything. We need to process the data before it goes into the index.
|
86
|
+
#
|
87
|
+
# == default_indexing(options = {})
|
88
|
+
#
|
89
|
+
# That's what the <tt>default_indexing</tt> method is for:
|
90
|
+
# default_indexing options
|
91
|
+
# Read more about the options here: http://github.com/floere/picky/wiki/Indexing-configuration
|
92
|
+
#
|
93
|
+
# Same thing with the search text – we need to process that as well.
|
94
|
+
#
|
95
|
+
# == default_querying(options = {})
|
96
|
+
#
|
97
|
+
# Analog to the default_indexing method, we use the <tt>default_querying</tt> method.
|
98
|
+
# default_querying options
|
99
|
+
# Read more about the options here: http://github.com/floere/picky/wiki/Querying-Configuration
|
100
|
+
#
|
101
|
+
# And that's all there is. It's incredibly powerful though, as you can combine, weigh, refine to the max.
|
102
|
+
#
|
103
|
+
# == Wiki
|
104
|
+
#
|
105
|
+
# Read more in the Wiki: http://github.com/floere/picky/wiki
|
106
|
+
#
|
107
|
+
# Have fun!
|
108
|
+
#
|
109
|
+
# == Full example
|
110
|
+
#
|
111
|
+
# Our example, fully fleshed out with indexing, querying, and weights:
|
112
|
+
# class MyGreatSearch < Application
|
113
|
+
#
|
114
|
+
# default_indexing removes_characters: /[^a-zA-Z0-9\.]/,
|
115
|
+
# stopwords: /\b(and|or|in|on|is|has)\b/,
|
116
|
+
# splits_text_on: /\s/,
|
117
|
+
# removes_characters_after_splitting: /\./,
|
118
|
+
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
119
|
+
# normalizes_words: [
|
120
|
+
# [/(.*)hausen/, 'hn'],
|
121
|
+
# [/\b(\w*)str(eet)?/, 'st']
|
122
|
+
# ]
|
123
|
+
#
|
124
|
+
# default_querying removes_characters: /[^a-zA-Z0-9\s\/\-\,\&\"\~\*\:]/,
|
125
|
+
# stopwords: /\b(and|the|of|it|in|for)\b/,
|
126
|
+
# splits_text_on: /[\s\/\-\,\&]+/,
|
127
|
+
# removes_characters_after_splitting: /\./,
|
128
|
+
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
129
|
+
# maximum_tokens: 4
|
130
|
+
#
|
131
|
+
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
132
|
+
# books.define_category :title,
|
133
|
+
# qualifiers: [:t, :title, :titre],
|
134
|
+
# partial: Partial::Substring.new(:from => 1),
|
135
|
+
# similarity: Similarity::Phonetic.new(2)
|
136
|
+
# books.define_category :author,
|
137
|
+
# partial: Partial::Substring.new(:from => -2)
|
138
|
+
# books.define_category :isbn
|
139
|
+
#
|
140
|
+
# query_options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
|
141
|
+
#
|
142
|
+
# full_books_query = Query::Full.new books, query_options
|
143
|
+
# live_books_query = Query::Full.new books, query_options
|
144
|
+
#
|
145
|
+
# route %r{^/books/full$} => full_books_query
|
146
|
+
# route %r{^/books/live$} => live_books_query
|
147
|
+
#
|
148
|
+
# end
|
149
|
+
# That's actually already a full-blown Picky App!
|
2
150
|
#
|
3
151
|
class Application
|
4
152
|
|
@@ -21,10 +169,20 @@ class Application
|
|
21
169
|
Tokenizers::Query.default = Tokenizers::Query.new(options)
|
22
170
|
end
|
23
171
|
|
24
|
-
#
|
172
|
+
# Create a new index for indexing and for querying.
|
173
|
+
#
|
174
|
+
# Parameters:
|
175
|
+
# * name: The identifier of the index. Used:
|
176
|
+
# - to identify an index (e.g. by you in Rake tasks).
|
177
|
+
# - in the frontend to describe which index a result came from.
|
178
|
+
# - index directory naming (index/development/the_identifier/<lots of indexes>)
|
179
|
+
# * source: The source the data comes from. See Sources::Base. # TODO Sources (all).
|
25
180
|
#
|
26
|
-
|
27
|
-
|
181
|
+
# Options:
|
182
|
+
# * result_type: # TODO Rename.
|
183
|
+
#
|
184
|
+
def index name, source, options = {}
|
185
|
+
IndexAPI.new name, source, options
|
28
186
|
end
|
29
187
|
|
30
188
|
# Routes.
|
@@ -35,39 +193,41 @@ class Application
|
|
35
193
|
# API
|
36
194
|
|
37
195
|
|
38
|
-
#
|
196
|
+
# A Picky application implements the Rack interface.
|
197
|
+
#
|
198
|
+
# Delegates to its routing to handle a request.
|
39
199
|
#
|
40
200
|
def call env
|
41
201
|
routing.call env
|
42
202
|
end
|
43
|
-
def routing
|
203
|
+
def routing # :nodoc:
|
44
204
|
@routing ||= Routing.new
|
45
205
|
end
|
46
206
|
|
47
207
|
# Finalize the subclass as soon as it
|
48
208
|
# has finished loading.
|
49
209
|
#
|
50
|
-
attr_reader :apps
|
51
|
-
def initialize_apps
|
210
|
+
attr_reader :apps # :nodoc:
|
211
|
+
def initialize_apps # :nodoc:
|
52
212
|
@apps ||= []
|
53
213
|
end
|
54
|
-
def inherited app
|
214
|
+
def inherited app # :nodoc:
|
55
215
|
initialize_apps
|
56
216
|
apps << app
|
57
217
|
end
|
58
|
-
def finalize_apps
|
218
|
+
def finalize_apps # :nodoc:
|
59
219
|
initialize_apps
|
60
220
|
apps.each &:finalize
|
61
221
|
end
|
62
222
|
# Finalizes the routes.
|
63
223
|
#
|
64
|
-
def finalize
|
224
|
+
def finalize # :nodoc:
|
65
225
|
routing.freeze
|
66
226
|
end
|
67
227
|
|
68
228
|
# TODO Add more info if possible.
|
69
229
|
#
|
70
|
-
def to_s
|
230
|
+
def to_s # :nodoc:
|
71
231
|
"#{self.name}:\n#{routing}"
|
72
232
|
end
|
73
233
|
|
@@ -1,4 +1,4 @@
|
|
1
|
-
module Calculations
|
1
|
+
module Calculations # :nodoc:all
|
2
2
|
|
3
3
|
# A location calculation recalculates a 1-d location
|
4
4
|
# to the Picky internal 1-d "grid".
|
@@ -18,7 +18,15 @@ module Calculations
|
|
18
18
|
end
|
19
19
|
|
20
20
|
def minimum= minimum
|
21
|
+
# Add a margin of 1 user grid.
|
22
|
+
#
|
21
23
|
minimum -= @user_grid
|
24
|
+
|
25
|
+
# Add plus 1 grid so that the index key never falls on 0.
|
26
|
+
# Why? to_i maps by default to 0.
|
27
|
+
#
|
28
|
+
minimum -= @grid
|
29
|
+
|
22
30
|
@minimum = minimum
|
23
31
|
end
|
24
32
|
|
data/lib/picky/cores.rb
CHANGED
data/lib/picky/generator.rb
CHANGED
@@ -8,7 +8,7 @@ module Picky
|
|
8
8
|
# picky <command> <options>
|
9
9
|
# is found.
|
10
10
|
#
|
11
|
-
class NoGeneratorError < StandardError
|
11
|
+
class NoGeneratorError < StandardError # :nodoc:all
|
12
12
|
|
13
13
|
def initialize generator
|
14
14
|
super usage + possible_commands(generator.types)
|
@@ -38,7 +38,7 @@ module Picky
|
|
38
38
|
#
|
39
39
|
# Basically copies a prototype project into a newly generated directory.
|
40
40
|
#
|
41
|
-
class Generator
|
41
|
+
class Generator # :nodoc:all
|
42
42
|
|
43
43
|
attr_reader :types
|
44
44
|
|
data/lib/picky/helpers/cache.rb
CHANGED
@@ -1,6 +1,7 @@
|
|
1
|
+
# TODO Not used anymore? Remove.
|
1
2
|
#
|
2
|
-
#
|
3
|
-
|
3
|
+
module Helpers # :nodoc:all
|
4
|
+
|
4
5
|
module Cache
|
5
6
|
# This is a simple cache.
|
6
7
|
# The store needs to be able to answer to [] and []=.
|
@@ -10,14 +11,15 @@ module Helpers
|
|
10
11
|
#
|
11
12
|
results = store[key]
|
12
13
|
return results if results
|
13
|
-
|
14
|
+
|
14
15
|
results = lambda(&block).call
|
15
|
-
|
16
|
+
|
16
17
|
# Store results
|
17
18
|
#
|
18
19
|
store[key] = results
|
19
|
-
|
20
|
+
|
20
21
|
results
|
21
22
|
end
|
22
23
|
end
|
24
|
+
|
23
25
|
end
|
data/lib/picky/helpers/gc.rb
CHANGED
data/lib/picky/index/bundle.rb
CHANGED
data/lib/picky/index_api.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
|
-
# This class defines the indexing and index API.
|
1
|
+
# This class defines the indexing and index API that is exposed to the user.
|
2
|
+
# It provides a single front for both indexing and index options.
|
2
3
|
#
|
3
4
|
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
|
4
5
|
#
|
5
|
-
class IndexAPI
|
6
|
-
|
7
|
-
# TODO Delegation.
|
8
|
-
#
|
6
|
+
class IndexAPI # :nodoc:all
|
9
7
|
|
10
8
|
attr_reader :name, :indexing, :indexed
|
11
9
|
|
10
|
+
# TODO Doc.
|
11
|
+
#
|
12
12
|
def initialize name, source, options = {}
|
13
13
|
@name = name
|
14
14
|
@indexing = Indexing::Index.new name, source, options
|
@@ -19,15 +19,13 @@ class IndexAPI
|
|
19
19
|
Indexes.register self
|
20
20
|
end
|
21
21
|
|
22
|
-
#
|
23
|
-
#
|
24
|
-
# TODO Spec! Doc!
|
22
|
+
# TODO Doc.
|
25
23
|
#
|
26
24
|
def define_category category_name, options = {}
|
27
25
|
category_name = category_name.to_sym
|
28
26
|
|
29
|
-
indexing_category = indexing.
|
30
|
-
indexed_category = indexed.
|
27
|
+
indexing_category = indexing.define_category category_name, options
|
28
|
+
indexed_category = indexed.define_category category_name, options
|
31
29
|
|
32
30
|
yield indexing_category, indexed_category if block_given?
|
33
31
|
|
@@ -35,22 +33,42 @@ class IndexAPI
|
|
35
33
|
end
|
36
34
|
alias category define_category
|
37
35
|
|
38
|
-
#
|
36
|
+
#
|
39
37
|
#
|
40
38
|
def define_location name, options = {}
|
41
|
-
grid = options[:
|
39
|
+
grid = options[:radius] || raise("Option :radius needs to be set on define_location, it defines the search radius.")
|
42
40
|
precision = options[:precision]
|
43
41
|
|
42
|
+
options = { partial: Partial::None.new }.merge options
|
43
|
+
|
44
44
|
define_category name, options do |indexing, indexed|
|
45
45
|
indexing.source = Sources::Wrappers::Location.new indexing, grid: grid, precision: precision
|
46
46
|
indexing.tokenizer = Tokenizers::Index.new
|
47
|
-
# indexing.partial = Partial::None.new
|
48
47
|
|
49
|
-
exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid
|
48
|
+
exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: grid, precision: precision
|
50
49
|
indexed.exact = exact_bundle
|
51
|
-
indexed.partial = exact_bundle
|
50
|
+
indexed.partial = exact_bundle # A partial token also uses the exact index.
|
52
51
|
end
|
53
52
|
end
|
54
53
|
alias location define_location
|
55
54
|
|
55
|
+
# Options
|
56
|
+
# * radius (in km).
|
57
|
+
#
|
58
|
+
def define_map_location name, options = {}
|
59
|
+
radius = options[:radius] || raise("Option :radius needs to be set on define_map_location, it defines the search radius.")
|
60
|
+
|
61
|
+
# The radius is given as if all the locations were on the equator.
|
62
|
+
#
|
63
|
+
# TODO Need to recalculate since not many locations are on the equator ;) This is just a prototype.
|
64
|
+
#
|
65
|
+
# This calculates km -> longitude (degrees).
|
66
|
+
#
|
67
|
+
# A degree on the equator is equal to ~111,319.9 meters.
|
68
|
+
# So a km on the equator is equal to 0.00898312 degrees.
|
69
|
+
#
|
70
|
+
options[:radius] = radius * 0.00898312
|
71
|
+
|
72
|
+
define_location name, options
|
73
|
+
end
|
56
74
|
end
|
data/lib/picky/indexed/bundle.rb
CHANGED
data/lib/picky/indexed/index.rb
CHANGED
@@ -28,7 +28,7 @@ module Indexed
|
|
28
28
|
# Load first the bundle, then extract the config.
|
29
29
|
#
|
30
30
|
bundle.load
|
31
|
-
minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing.")
|
31
|
+
minimum = bundle[:location_minimum] || raise("Configuration :location_minimum for #{bundle.identifier} missing. Did you run rake index already?")
|
32
32
|
@calculation.minimum = minimum
|
33
33
|
end
|
34
34
|
|
data/lib/picky/indexes_api.rb
CHANGED
data/lib/picky/indexing/index.rb
CHANGED
data/lib/picky/loader.rb
CHANGED
data/lib/picky/loggers/search.rb
CHANGED