picky 2.7.0 → 3.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -0,0 +1,400 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
# = Picky Indexes
|
4
|
+
#
|
5
|
+
# A Picky Index defines
|
6
|
+
# * where its data comes from (a data source).
|
7
|
+
# * how this data it is indexed.
|
8
|
+
# * a number of categories that may or may not map directly to data categories.
|
9
|
+
#
|
10
|
+
# == Howto
|
11
|
+
#
|
12
|
+
# This is a step-by-step description on how to create an index.
|
13
|
+
#
|
14
|
+
# Start by choosing an <tt>Indexes::Memory</tt> or an <tt>Indexes::Redis</tt>.
|
15
|
+
# In the example, we will be using an in-memory index, <tt>Indexes::Memory</tt>.
|
16
|
+
#
|
17
|
+
# books = Indexes::Memory.new(:books)
|
18
|
+
#
|
19
|
+
# That in itself won't do much good, that's why we add a data source:
|
20
|
+
#
|
21
|
+
# books = Indexes::Memory.new(:books) do
|
22
|
+
# source Sources::CSV.new(:title, :author, file: 'data/books.csv')
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# In the example, we use an explicit <tt>Sources::CSV</tt> of Picky.
|
26
|
+
# However, anything that responds to <tt>#each</tt>, and returns an object that
|
27
|
+
# answers to <tt>#id</tt>, works.
|
28
|
+
#
|
29
|
+
# For example, a 3.0 ActiveRecord class:
|
30
|
+
#
|
31
|
+
# books = Indexes::Memory.new(:books) do
|
32
|
+
# source Book.order('isbn ASC')
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
# Now we know where the data comes from, but not, how to categorize it.
|
36
|
+
#
|
37
|
+
# Let's add a few categories:
|
38
|
+
#
|
39
|
+
# books = Indexes::Memory.new(:books) do
|
40
|
+
# source Book.order('isbn ASC')
|
41
|
+
# category :title
|
42
|
+
# category :author
|
43
|
+
# category :isbn
|
44
|
+
# end
|
45
|
+
#
|
46
|
+
# Categories offer quite a few options, see <tt>Indexes::Base#category</tt> for details.
|
47
|
+
#
|
48
|
+
# After adding more options, it might look like this:
|
49
|
+
#
|
50
|
+
# books = Indexes::Memory.new(:books) do
|
51
|
+
# source Book.order('isbn ASC')
|
52
|
+
# category :title,
|
53
|
+
# partial: Partial::Substring.new(from: 1),
|
54
|
+
# similarity: Similarity::DoubleMetaphone.new(3),
|
55
|
+
# qualifiers: [:t, :title, :titulo]
|
56
|
+
# category :author,
|
57
|
+
# similarity: Similarity::Metaphone.new(2)
|
58
|
+
# category :isbn,
|
59
|
+
# partial: Partial::None.new,
|
60
|
+
# from: :legacy_isbn_name
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# For this to work, a <tt>Book</tt> should support methods <tt>#title</tt>, <tt>#author</tt> and <tt>#legacy_isbn_name</tt>.
|
64
|
+
#
|
65
|
+
# If it uses <tt>String</tt> ids, use <tt>#key_format</tt> to define a formatting method:
|
66
|
+
#
|
67
|
+
# books = Indexes::Memory.new(:books) do
|
68
|
+
# key_format :to_s
|
69
|
+
# source Book.order('isbn ASC')
|
70
|
+
# category :title
|
71
|
+
# category :author
|
72
|
+
# category :isbn
|
73
|
+
# end
|
74
|
+
#
|
75
|
+
# Finally, use the index for a <tt>Search</tt>:
|
76
|
+
#
|
77
|
+
# route %r{^/media$} => Search.new(books, dvds, mp3s)
|
78
|
+
#
|
79
|
+
class Indexes
|
80
|
+
|
81
|
+
# This class defines the indexing and index API that is exposed to the user
|
82
|
+
# as the #index method inside the Application class.
|
83
|
+
#
|
84
|
+
# It provides a single front for both indexing and index options. We suggest to always use the index API.
|
85
|
+
#
|
86
|
+
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Index*.
|
87
|
+
#
|
88
|
+
class Index
|
89
|
+
|
90
|
+
attr_reader :name,
|
91
|
+
:categories
|
92
|
+
|
93
|
+
delegate :[],
|
94
|
+
:each_category,
|
95
|
+
:to => :categories
|
96
|
+
|
97
|
+
# Create a new index with a given source.
|
98
|
+
#
|
99
|
+
# === Parameters
|
100
|
+
# * name: A name that will be used for the index directory and in the Picky front end.
|
101
|
+
#
|
102
|
+
# === Options
|
103
|
+
# * source: Where the data comes from, e.g. Sources::CSV.new(...). Optional, can be defined in the block using #source.
|
104
|
+
# * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
|
105
|
+
# * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
|
106
|
+
# * tokenizer: The tokenizer to use for this index. Optional, can be defined in the block using #indexing.
|
107
|
+
# * key_format: The format the ids of this index are in. Optional, can be defined in the block using #key_format.
|
108
|
+
#
|
109
|
+
# Examples:
|
110
|
+
# my_index = Indexes::Memory.new(:my_index, source: some_source) do
|
111
|
+
# category :bla
|
112
|
+
# end
|
113
|
+
#
|
114
|
+
# my_index = Indexes::Memory.new(:my_index) do
|
115
|
+
# source Sources::CSV.new(file: 'data/index.csv')
|
116
|
+
# category :bla
|
117
|
+
# end
|
118
|
+
#
|
119
|
+
#
|
120
|
+
def initialize name, options = {}
|
121
|
+
check_name name
|
122
|
+
@name = name.to_sym
|
123
|
+
|
124
|
+
check_options options
|
125
|
+
|
126
|
+
@source = options[:source]
|
127
|
+
|
128
|
+
@after_indexing = options[:after_indexing]
|
129
|
+
@tokenizer = options[:tokenizer]
|
130
|
+
@key_format = options[:key_format]
|
131
|
+
|
132
|
+
# Indexed.
|
133
|
+
#
|
134
|
+
@result_identifier = options[:result_identifier] || name
|
135
|
+
|
136
|
+
# TODO Move ignore_unassigned_tokens to query, somehow.
|
137
|
+
#
|
138
|
+
@categories = Categories.new ignore_unassigned_tokens: (options[:ignore_unassigned_tokens] || false)
|
139
|
+
|
140
|
+
# Centralized registry.
|
141
|
+
#
|
142
|
+
Indexes.register self
|
143
|
+
|
144
|
+
#
|
145
|
+
#
|
146
|
+
instance_eval(&Proc.new) if block_given?
|
147
|
+
|
148
|
+
# Check if any source has been given in the block or the options.
|
149
|
+
#
|
150
|
+
check_source @source
|
151
|
+
end
|
152
|
+
|
153
|
+
# Default bundles.
|
154
|
+
#
|
155
|
+
def indexing_bundle_class
|
156
|
+
Indexing::Bundle::Memory
|
157
|
+
end
|
158
|
+
def indexed_bundle_class
|
159
|
+
Indexed::Bundle::Memory
|
160
|
+
end
|
161
|
+
|
162
|
+
# Defines a searchable category on the index.
|
163
|
+
#
|
164
|
+
# === Parameters
|
165
|
+
# * category_name: This identifier is used in the front end, but also to categorize query text. For example, “title:hobbit” will narrow the hobbit query on categories with the identifier :title.
|
166
|
+
#
|
167
|
+
# === Options
|
168
|
+
# * partial: Partial::None.new or Partial::Substring.new(from: starting_char, to: ending_char). Default is Partial::Substring.new(from: -3, to: -1).
|
169
|
+
# * similarity: Similarity::None.new or Similarity::DoubleMetaphone.new(similar_words_searched). Default is Similarity::None.new.
|
170
|
+
# * qualifiers: An array of qualifiers with which you can define which category you’d like to search, for example “title:hobbit” will search for hobbit in just title categories. Example: qualifiers: [:t, :titre, :title] (use it for example with multiple languages). Default is the name of the category.
|
171
|
+
# * qualifier: Convenience options if you just need a single qualifier, see above. Example: qualifiers => :title. Default is the name of the category.
|
172
|
+
# * source: Use a different source than the index uses. If you think you need that, there might be a better solution to your problem. Please post to the mailing list first with your application.rb :)
|
173
|
+
# * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
|
174
|
+
#
|
175
|
+
def category category_name, options = {}
|
176
|
+
new_category = Category.new category_name.to_sym, self, options
|
177
|
+
categories << new_category
|
178
|
+
|
179
|
+
new_category = yield new_category if block_given?
|
180
|
+
|
181
|
+
new_category
|
182
|
+
end
|
183
|
+
alias define_category category
|
184
|
+
|
185
|
+
# Make this category range searchable with a fixed range. If you need other
|
186
|
+
# ranges, define another category with a different range value.
|
187
|
+
#
|
188
|
+
# Example:
|
189
|
+
# You have data values inside 1..100, and you want to have Picky return
|
190
|
+
# not only the results for 47 if you search for 47, but also results for
|
191
|
+
# 45, 46, or 47.2, 48.9, in a range of 2 around 47, so (45..49).
|
192
|
+
#
|
193
|
+
# Then you use:
|
194
|
+
# ranged_category :values_inside_1_100, 2
|
195
|
+
#
|
196
|
+
# Optionally, you give it a precision value to reduce the error margin
|
197
|
+
# around 47 (Picky is a bit liberal).
|
198
|
+
# Indexes::Memory.new :range do
|
199
|
+
# ranged_category :values_inside_1_100, 2, precision: 5
|
200
|
+
# end
|
201
|
+
#
|
202
|
+
# This will force Picky to maximally be wrong 5% of the given range value
|
203
|
+
# (5% of 2 = 0.1) instead of the default 20% (20% of 2 = 0.4).
|
204
|
+
#
|
205
|
+
# We suggest not to use much more than 5 as a higher precision is more
|
206
|
+
# performance intensive for less and less precision gain.
|
207
|
+
#
|
208
|
+
# == Protip 1
|
209
|
+
#
|
210
|
+
# Create two ranged categories to make an area search:
|
211
|
+
# Indexes::Memory.new :area do
|
212
|
+
# ranged_category :x, 1
|
213
|
+
# ranged_category :y, 1
|
214
|
+
# end
|
215
|
+
#
|
216
|
+
# Search for it using for example:
|
217
|
+
# x:133, y:120
|
218
|
+
#
|
219
|
+
# This will search this square area (* = 133, 120: The "search" point entered):
|
220
|
+
#
|
221
|
+
# 132 134
|
222
|
+
# | |
|
223
|
+
# --|---------|-- 121
|
224
|
+
# | |
|
225
|
+
# | * |
|
226
|
+
# | |
|
227
|
+
# --|---------|-- 119
|
228
|
+
# | |
|
229
|
+
#
|
230
|
+
# Note: The area does not need to be square, but can be rectangular.
|
231
|
+
#
|
232
|
+
# == Protip 2
|
233
|
+
#
|
234
|
+
# Create three ranged categories to make a volume search.
|
235
|
+
#
|
236
|
+
# Or go crazy and use 4 ranged categories for a space/time search! ;)
|
237
|
+
#
|
238
|
+
# === Parameters
|
239
|
+
# * category_name: The category_name as used in #define_category.
|
240
|
+
# * range: The range (in the units of your data values) around the query point where we search for results.
|
241
|
+
#
|
242
|
+
# -----|<- range ->*------------|-----
|
243
|
+
#
|
244
|
+
# === Options
|
245
|
+
# * precision: Default is 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
246
|
+
# * ... all options of #define_category.
|
247
|
+
#
|
248
|
+
def ranged_category category_name, range, options = {}
|
249
|
+
precision = options[:precision] || 1
|
250
|
+
|
251
|
+
options = { partial: Partial::None.new }.merge options
|
252
|
+
|
253
|
+
define_category category_name, options do |category|
|
254
|
+
Indexing::Wrappers::Category::Location.install_on category, range, precision
|
255
|
+
Indexed::Wrappers::Category::Location.install_on category, range, precision
|
256
|
+
end
|
257
|
+
end
|
258
|
+
alias define_ranged_category ranged_category
|
259
|
+
|
260
|
+
# HIGHLY EXPERIMENTAL Not correctly working yet. Try it if you feel "beta".
|
261
|
+
#
|
262
|
+
# Also a range search see #ranged_category, but on the earth's surface.
|
263
|
+
#
|
264
|
+
# Parameters:
|
265
|
+
# * lat_name: The latitude's name as used in #define_category.
|
266
|
+
# * lng_name: The longitude's name as used in #define_category.
|
267
|
+
# * radius: The distance (in km) around the query point which we search for results.
|
268
|
+
#
|
269
|
+
# Note: Picky uses a square, not a circle. That should be ok for most usages.
|
270
|
+
#
|
271
|
+
# -----------------------------
|
272
|
+
# | |
|
273
|
+
# | |
|
274
|
+
# | |
|
275
|
+
# | |
|
276
|
+
# | |
|
277
|
+
# | *<- radius ->|
|
278
|
+
# | |
|
279
|
+
# | |
|
280
|
+
# | |
|
281
|
+
# | |
|
282
|
+
# | |
|
283
|
+
# -----------------------------
|
284
|
+
#
|
285
|
+
# Options
|
286
|
+
# * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
287
|
+
# * lat_from: The data category to take the data for the latitude from.
|
288
|
+
# * lng_from: The data category to take the data for the longitude from.
|
289
|
+
#
|
290
|
+
# TODO Will have to write a wrapper that combines two categories that are
|
291
|
+
# indexed simultaneously, since lat/lng are correlated.
|
292
|
+
#
|
293
|
+
def geo_categories lat_name, lng_name, radius, options = {} # :nodoc:
|
294
|
+
|
295
|
+
# Extract lat/lng specific options.
|
296
|
+
#
|
297
|
+
lat_from = options.delete :lat_from
|
298
|
+
lng_from = options.delete :lng_from
|
299
|
+
|
300
|
+
# One can be a normal ranged_category.
|
301
|
+
#
|
302
|
+
ranged_category lat_name, radius*0.00898312, options.merge(from: lat_from)
|
303
|
+
|
304
|
+
# The other needs to adapt the radius depending on the one.
|
305
|
+
#
|
306
|
+
# Depending on the latitude, the radius of the longitude
|
307
|
+
# needs to enlarge, the closer we get to the pole.
|
308
|
+
#
|
309
|
+
# In our simplified case, the radius is given as if all the
|
310
|
+
# locations were on the 45 degree line.
|
311
|
+
#
|
312
|
+
# This calculates km -> longitude (degrees).
|
313
|
+
#
|
314
|
+
# A degree on the 45 degree line is equal to ~222.6398 km.
|
315
|
+
# So a km on the 45 degree line is equal to 0.01796624 degrees.
|
316
|
+
#
|
317
|
+
ranged_category lng_name, radius*0.01796624, options.merge(from: lng_from)
|
318
|
+
|
319
|
+
end
|
320
|
+
alias define_geo_categories geo_categories
|
321
|
+
|
322
|
+
#
|
323
|
+
# Since this is an API, we fail hard quickly.
|
324
|
+
#
|
325
|
+
def check_name name # :nodoc:
|
326
|
+
raise ArgumentError.new(<<-NAME
|
327
|
+
|
328
|
+
|
329
|
+
The index identifier (you gave "#{name}") for Indexes::Memory/Indexes::Redis should be a Symbol/String,
|
330
|
+
Examples:
|
331
|
+
Indexes::Memory.new(:my_cool_index) # Recommended
|
332
|
+
Indexes::Redis.new("a-redis-index")
|
333
|
+
NAME
|
334
|
+
|
335
|
+
|
336
|
+
) unless name.respond_to?(:to_sym)
|
337
|
+
end
|
338
|
+
def check_options options # :nodoc:
|
339
|
+
raise ArgumentError.new(<<-OPTIONS
|
340
|
+
|
341
|
+
|
342
|
+
Sources are not passed in as second parameter for #{self.class.name} anymore, but either
|
343
|
+
* as :source option:
|
344
|
+
#{self.class.name}.new(#{name.inspect}, source: #{options})
|
345
|
+
or
|
346
|
+
* given to the #source method inside the config block:
|
347
|
+
#{self.class.name}.new(#{name.inspect}) do
|
348
|
+
source #{options}
|
349
|
+
end
|
350
|
+
|
351
|
+
Sorry about that breaking change (in 2.2.0), didn't want to go to 3.0.0 yet!
|
352
|
+
|
353
|
+
All the best
|
354
|
+
-- Picky
|
355
|
+
|
356
|
+
|
357
|
+
OPTIONS
|
358
|
+
) unless options.respond_to?(:[])
|
359
|
+
end
|
360
|
+
def check_source source # :nodoc:
|
361
|
+
raise ArgumentError.new(<<-SOURCE
|
362
|
+
|
363
|
+
|
364
|
+
The index "#{name}" should use a data source that responds to either the method #each, or the method #harvest, which yields(id, text), OR it can be a lambda/block, returning such a source.
|
365
|
+
Or it could use one of the built-in sources:
|
366
|
+
Sources::#{(Sources.constants - [:Base, :Wrappers, :NoCSVFileGiven, :NoCouchDBGiven]).join(',
|
367
|
+
Sources::')}
|
368
|
+
|
369
|
+
|
370
|
+
SOURCE
|
371
|
+
) unless source.respond_to?(:each) || source.respond_to?(:harvest) || source.respond_to?(:call)
|
372
|
+
end
|
373
|
+
|
374
|
+
def to_stats # :nodoc:
|
375
|
+
stats = <<-INDEX
|
376
|
+
#{name} (#{self.class}):
|
377
|
+
#{"source: #{source}".indented_to_s}
|
378
|
+
#{"categories: #{categories.map(&:name).join(', ')}".indented_to_s}
|
379
|
+
INDEX
|
380
|
+
stats << " result identifier: \"#{result_identifier}\"".indented_to_s unless result_identifier.to_s == name.to_s
|
381
|
+
stats
|
382
|
+
end
|
383
|
+
|
384
|
+
# Identifier used for technical output.
|
385
|
+
#
|
386
|
+
def identifier
|
387
|
+
"#{PICKY_ENVIRONMENT}:#{name}"
|
388
|
+
end
|
389
|
+
|
390
|
+
#
|
391
|
+
#
|
392
|
+
def to_s
|
393
|
+
"#{self.class}(#{name}, result_id: #{result_identifier}, source: #{source}, categories: #{categories})"
|
394
|
+
end
|
395
|
+
|
396
|
+
end
|
397
|
+
|
398
|
+
end
|
399
|
+
|
400
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
class Indexes
|
4
|
+
|
5
|
+
#
|
6
|
+
#
|
7
|
+
class Index
|
8
|
+
|
9
|
+
attr_reader :result_identifier,
|
10
|
+
:combinator
|
11
|
+
|
12
|
+
delegate :load_from_cache,
|
13
|
+
:analyze,
|
14
|
+
:reindex,
|
15
|
+
:possible_combinations,
|
16
|
+
:to => :categories
|
17
|
+
|
18
|
+
alias reload load_from_cache
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,138 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
class Indexes
|
4
|
+
|
5
|
+
#
|
6
|
+
#
|
7
|
+
class Index
|
8
|
+
|
9
|
+
attr_reader :after_indexing,
|
10
|
+
:bundle_class
|
11
|
+
|
12
|
+
# Delegators for indexing.
|
13
|
+
#
|
14
|
+
delegate :cache,
|
15
|
+
:check,
|
16
|
+
:clear,
|
17
|
+
:backup,
|
18
|
+
:restore,
|
19
|
+
:to => :categories
|
20
|
+
|
21
|
+
# Calling index on an index will call index
|
22
|
+
# on every category.
|
23
|
+
#
|
24
|
+
# Decides whether to use a parallel indexer or whether to
|
25
|
+
# delegate to each category to index themselves.
|
26
|
+
#
|
27
|
+
def index
|
28
|
+
if source.respond_to?(:each)
|
29
|
+
check_source_empty
|
30
|
+
index_in_parallel
|
31
|
+
else
|
32
|
+
with_data_snapshot do
|
33
|
+
categories.each &:index
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Check if the given enumerable source is empty.
|
39
|
+
#
|
40
|
+
# Note: Checking as early as possible to tell the
|
41
|
+
# user as early as possible.
|
42
|
+
#
|
43
|
+
def check_source_empty
|
44
|
+
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
45
|
+
end
|
46
|
+
|
47
|
+
# Note: Duplicated in category_indexing.rb.
|
48
|
+
#
|
49
|
+
# Take a data snapshot if the source offers it.
|
50
|
+
#
|
51
|
+
def with_data_snapshot
|
52
|
+
if source.respond_to? :with_snapshot
|
53
|
+
source.with_snapshot(self) do
|
54
|
+
yield
|
55
|
+
end
|
56
|
+
else
|
57
|
+
yield
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Indexes the categories in parallel.
|
62
|
+
#
|
63
|
+
# Only use where the category does have a #each source defined.
|
64
|
+
#
|
65
|
+
def index_in_parallel
|
66
|
+
indexer = Indexers::Parallel.new self
|
67
|
+
indexer.index categories
|
68
|
+
categories.each &:cache
|
69
|
+
end
|
70
|
+
|
71
|
+
# Define an index tokenizer on the index.
|
72
|
+
#
|
73
|
+
# Parameters are the exact same as for indexing.
|
74
|
+
#
|
75
|
+
def indexing options = {}
|
76
|
+
@tokenizer = Tokenizers::Index.new options
|
77
|
+
end
|
78
|
+
alias define_indexing indexing
|
79
|
+
|
80
|
+
# Returns the installed tokenizer or the default.
|
81
|
+
#
|
82
|
+
def tokenizer
|
83
|
+
@tokenizer || Indexes.tokenizer
|
84
|
+
end
|
85
|
+
|
86
|
+
# Define a source on the index.
|
87
|
+
#
|
88
|
+
# Parameter is a source, either one of the standard sources or
|
89
|
+
# anything responding to #each and returning objects that
|
90
|
+
# respond to id and the category names (or the category from option).
|
91
|
+
#
|
92
|
+
def source some_source = nil, &block
|
93
|
+
some_source ||= block
|
94
|
+
some_source ? define_source(some_source) : (@source && extract_source || raise_no_source)
|
95
|
+
end
|
96
|
+
# Extract the actual source if it is wrapped in a time
|
97
|
+
# capsule, i.e. a block/lambda.
|
98
|
+
#
|
99
|
+
# TODO Extract into module.
|
100
|
+
#
|
101
|
+
def extract_source
|
102
|
+
@source = @source.respond_to?(:call) ? @source.call : @source
|
103
|
+
end
|
104
|
+
def define_source source
|
105
|
+
@source = source
|
106
|
+
end
|
107
|
+
def raise_no_source
|
108
|
+
raise NoSourceSpecifiedException.new(<<-NO_SOURCE
|
109
|
+
|
110
|
+
|
111
|
+
No source given for index #{name}. An index needs a source.
|
112
|
+
Example:
|
113
|
+
Indexes::Memory.new(:with_source) do
|
114
|
+
source Sources::CSV.new(:title, file: 'data/books.csv')
|
115
|
+
category :title
|
116
|
+
category :author
|
117
|
+
end
|
118
|
+
|
119
|
+
NO_SOURCE
|
120
|
+
)
|
121
|
+
end
|
122
|
+
|
123
|
+
# Define a key_format on the index.
|
124
|
+
#
|
125
|
+
# Parameter is a method name to use on the key (e.g. :to_i, :to_s, :strip).
|
126
|
+
#
|
127
|
+
def key_format format = nil
|
128
|
+
format ? define_key_format(format) : @key_format
|
129
|
+
end
|
130
|
+
def define_key_format key_format
|
131
|
+
@key_format = key_format
|
132
|
+
end
|
133
|
+
|
134
|
+
end
|
135
|
+
|
136
|
+
end
|
137
|
+
|
138
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
class Indexes
|
4
|
+
|
5
|
+
# An index that is persisted in files, loaded at startup and kept in memory at runtime.
|
6
|
+
#
|
7
|
+
class Memory < Index
|
8
|
+
|
9
|
+
def indexing_bundle_class
|
10
|
+
Indexing::Bundle::Memory
|
11
|
+
end
|
12
|
+
def indexed_bundle_class
|
13
|
+
Indexed::Bundle::Memory
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
class Indexes
|
4
|
+
|
5
|
+
# An index that is persisted in Redis.
|
6
|
+
#
|
7
|
+
class Redis < Index
|
8
|
+
|
9
|
+
def indexing_bundle_class
|
10
|
+
Indexing::Bundle::Redis
|
11
|
+
end
|
12
|
+
def indexed_bundle_class
|
13
|
+
Indexed::Bundle::Redis
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|