picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
data/lib/picky/application.rb
CHANGED
|
@@ -13,21 +13,23 @@
|
|
|
13
13
|
# will generate an example <tt>project_name/app/application.rb</tt> file for you
|
|
14
14
|
# with some example code inside.
|
|
15
15
|
#
|
|
16
|
-
# ==
|
|
16
|
+
# == Index::Memory.new(name, source)
|
|
17
17
|
#
|
|
18
|
-
# Next, define where your data comes from. You use the <tt>
|
|
19
|
-
# my_index =
|
|
18
|
+
# Next, define where your data comes from. You use the <tt>Index::Memory.new</tt> method for that:
|
|
19
|
+
# my_index = Index::Memory.new :some_index_name, some_source
|
|
20
20
|
# You give the index a name (or identifier), and a source (see Sources), where its data comes from. Let's do that:
|
|
21
21
|
# class MyGreatSearch < Application
|
|
22
22
|
#
|
|
23
|
-
# books =
|
|
23
|
+
# books = Index::Memory.new :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
24
24
|
#
|
|
25
25
|
# end
|
|
26
26
|
# Now we have an index <tt>books</tt>.
|
|
27
27
|
#
|
|
28
28
|
# That on itself won't do much good.
|
|
29
29
|
#
|
|
30
|
-
#
|
|
30
|
+
# Note that a Redis index is also available: Index::Redis.new.
|
|
31
|
+
#
|
|
32
|
+
# == index_instance.define_category(identifier, options = {})
|
|
31
33
|
#
|
|
32
34
|
# Picky needs us to define categories on the data.
|
|
33
35
|
#
|
|
@@ -37,7 +39,7 @@
|
|
|
37
39
|
# Let's go ahead and define a category:
|
|
38
40
|
# class MyGreatSearch < Application
|
|
39
41
|
#
|
|
40
|
-
# books =
|
|
42
|
+
# books = Index::Memory.new :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
41
43
|
# books.define_category :title
|
|
42
44
|
#
|
|
43
45
|
# end
|
|
@@ -67,9 +69,7 @@
|
|
|
67
69
|
# books = index :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
68
70
|
# books.define_category :title
|
|
69
71
|
#
|
|
70
|
-
#
|
|
71
|
-
#
|
|
72
|
-
# route %r{^/books/full$} => full_books_query
|
|
72
|
+
# route %r{^/books/full$} => Query::Full.new(books)
|
|
73
73
|
#
|
|
74
74
|
# end
|
|
75
75
|
# That's it!
|
|
@@ -128,7 +128,7 @@
|
|
|
128
128
|
# substitutes_characters_with: CharacterSubstituters::WestEuropean.new,
|
|
129
129
|
# maximum_tokens: 4
|
|
130
130
|
#
|
|
131
|
-
# books =
|
|
131
|
+
# books = Index::Memory.new :books, Sources::CSV.new(:title, :author, :isbn, file:'app/library.csv')
|
|
132
132
|
# books.define_category :title,
|
|
133
133
|
# qualifiers: [:t, :title, :titre],
|
|
134
134
|
# partial: Partial::Substring.new(:from => 1),
|
|
@@ -139,11 +139,8 @@
|
|
|
139
139
|
#
|
|
140
140
|
# query_options = { :weights => { [:title, :author] => +3, [:author, :title] => -1 } }
|
|
141
141
|
#
|
|
142
|
-
#
|
|
143
|
-
#
|
|
144
|
-
#
|
|
145
|
-
# route %r{^/books/full$} => full_books_query
|
|
146
|
-
# route %r{^/books/live$} => live_books_query
|
|
142
|
+
# route %r{^/books/full$} => Query::Full.new(books, query_options)
|
|
143
|
+
# route %r{^/books/live$} => Query::Live.new(books, query_options)
|
|
147
144
|
#
|
|
148
145
|
# end
|
|
149
146
|
# That's actually already a full-blown Picky App!
|
|
@@ -159,14 +156,14 @@ class Application
|
|
|
159
156
|
# is used for indexing by default.
|
|
160
157
|
#
|
|
161
158
|
def default_indexing options = {}
|
|
162
|
-
Tokenizers::Index.default = Tokenizers::Index.new(options)
|
|
159
|
+
Internals::Tokenizers::Index.default = Internals::Tokenizers::Index.new(options)
|
|
163
160
|
end
|
|
164
161
|
|
|
165
162
|
# Returns a configured tokenizer that
|
|
166
163
|
# is used for querying by default.
|
|
167
164
|
#
|
|
168
165
|
def default_querying options = {}
|
|
169
|
-
Tokenizers::Query.default = Tokenizers::Query.new(options)
|
|
166
|
+
Internals::Tokenizers::Query.default = Internals::Tokenizers::Query.new(options)
|
|
170
167
|
end
|
|
171
168
|
|
|
172
169
|
# Create a new index for indexing and for querying.
|
|
@@ -181,8 +178,10 @@ class Application
|
|
|
181
178
|
# Options:
|
|
182
179
|
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
|
183
180
|
#
|
|
181
|
+
# TODO Obsolete. Phase out.
|
|
182
|
+
#
|
|
184
183
|
def index name, source, options = {}
|
|
185
|
-
|
|
184
|
+
Index::Memory.new name, source, options
|
|
186
185
|
end
|
|
187
186
|
|
|
188
187
|
# Routes.
|
|
@@ -201,7 +200,7 @@ class Application
|
|
|
201
200
|
rack_adapter.call env
|
|
202
201
|
end
|
|
203
202
|
def rack_adapter # :nodoc:
|
|
204
|
-
@rack_adapter ||= FrontendAdapters::Rack.new
|
|
203
|
+
@rack_adapter ||= Internals::FrontendAdapters::Rack.new
|
|
205
204
|
end
|
|
206
205
|
|
|
207
206
|
# Finalize the subclass as soon as it
|
data/lib/picky/cores.rb
CHANGED
|
@@ -83,7 +83,7 @@ class Cores # :nodoc:all
|
|
|
83
83
|
# os_name => lambda_which_returns_a_number_of_cores
|
|
84
84
|
#
|
|
85
85
|
@@number_of_cores = {
|
|
86
|
-
'darwin' => lambda { `system_profiler SPHardwareDataType | grep 'Total Number Of Cores'`.gsub(/[^\d]/, '') },
|
|
86
|
+
'darwin' => lambda { `system_profiler SPHardwareDataType | grep -i 'Total Number Of Cores'`.gsub(/[^\d]/, '') },
|
|
87
87
|
'linux' => lambda { `grep -ci ^processor /proc/cpuinfo` }
|
|
88
88
|
}
|
|
89
89
|
def self.os_to_core_mapping
|
|
@@ -0,0 +1,179 @@
|
|
|
1
|
+
module Index
|
|
2
|
+
|
|
3
|
+
# This class defines the indexing and index API that is exposed to the user
|
|
4
|
+
# as the #index method inside the Application class.
|
|
5
|
+
#
|
|
6
|
+
# It provides a single front for both indexing and index options. We suggest to always use the index API.
|
|
7
|
+
#
|
|
8
|
+
# Note: An Index holds both an *Indexed*::*Index* and an *Indexing*::*Type*.
|
|
9
|
+
#
|
|
10
|
+
class Base
|
|
11
|
+
|
|
12
|
+
attr_reader :name, :indexing, :indexed
|
|
13
|
+
|
|
14
|
+
# Create a new index with a given source.
|
|
15
|
+
#
|
|
16
|
+
# === Parameters
|
|
17
|
+
# * name: A name that will be used for the index directory and in the Picky front end.
|
|
18
|
+
# * source: Where the data comes from, e.g. Sources::CSV.new(...)
|
|
19
|
+
#
|
|
20
|
+
# === Options
|
|
21
|
+
# * result_identifier: Use if you'd like a different identifier/name in the results than the name of the index.
|
|
22
|
+
# * after_indexing: As of this writing only used in the db source. Executes the given after_indexing as SQL after the indexing process.
|
|
23
|
+
#
|
|
24
|
+
def initialize name, source, options = {}
|
|
25
|
+
@name = name
|
|
26
|
+
@indexing = Internals::Indexing::Index.new name, source, options
|
|
27
|
+
@indexed = Internals::Indexed::Index.new name, options
|
|
28
|
+
|
|
29
|
+
# Centralized registry.
|
|
30
|
+
#
|
|
31
|
+
Indexes.register self
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Defines a searchable category on the index.
|
|
35
|
+
#
|
|
36
|
+
# === Parameters
|
|
37
|
+
# * category_name: This identifier is used in the front end, but also to categorize query text. For example, “title:hobbit” will narrow the hobbit query on categories with the identifier :title.
|
|
38
|
+
#
|
|
39
|
+
# === Options
|
|
40
|
+
# * partial: Partial::None.new or Partial::Substring.new(from: starting_char, to: ending_char). Default is Partial::Substring.new(from: -3, to: -1).
|
|
41
|
+
# * similarity: Similarity::None.new or Similarity::Phonetic.new(similar_words_searched). Default is Similarity::None.new.
|
|
42
|
+
# * qualifiers: An array of qualifiers with which you can define which category you’d like to search, for example “title:hobbit” will search for hobbit in just title categories. Example: qualifiers: [:t, :titre, :title] (use it for example with multiple languages). Default is the name of the category.
|
|
43
|
+
# * qualifier: Convenience options if you just need a single qualifier, see above. Example: qualifiers => :title. Default is the name of the category.
|
|
44
|
+
# * source: Use a different source than the index uses. If you think you need that, there might be a better solution to your problem. Please post to the mailing list first with your application.rb :)
|
|
45
|
+
# * from: Take the data from the data category with this name. Example: You have a source Sources::CSV.new(:title, file:'some_file.csv') but you want the category to be called differently. The you use from: define_category(:similar_title, :from => :title).
|
|
46
|
+
#
|
|
47
|
+
def define_category category_name, options = {}
|
|
48
|
+
category_name = category_name.to_sym
|
|
49
|
+
|
|
50
|
+
indexing_category = indexing.define_category category_name, options
|
|
51
|
+
indexed_category = indexed.define_category category_name, options
|
|
52
|
+
|
|
53
|
+
yield indexing_category, indexed_category if block_given?
|
|
54
|
+
|
|
55
|
+
self
|
|
56
|
+
end
|
|
57
|
+
alias category define_category
|
|
58
|
+
|
|
59
|
+
# HIGHLY EXPERIMENTAL Try if you feel "beta" ;)
|
|
60
|
+
#
|
|
61
|
+
# Make this category range searchable with a fixed range. If you need other ranges, define another category with a different range value.
|
|
62
|
+
#
|
|
63
|
+
# Example:
|
|
64
|
+
# You have data values inside 1..100, and you want to have Picky return
|
|
65
|
+
# not only the results for 47 if you search for 47, but also results for
|
|
66
|
+
# 45, 46, or 47.2, 48.9, in a range of 2 around 47, so (45..49).
|
|
67
|
+
#
|
|
68
|
+
# Then you use:
|
|
69
|
+
# my_index.define_ranged_category :values_inside_1_100, 2
|
|
70
|
+
#
|
|
71
|
+
# Optionally, you give it a precision value to reduce the error margin
|
|
72
|
+
# around 47 (Picky is a bit liberal).
|
|
73
|
+
# my_index.define_ranged_category :values_inside_1_100, 2, precision: 5
|
|
74
|
+
#
|
|
75
|
+
# This will force Picky to maximally be wrong 5% of the given range value
|
|
76
|
+
# (5% of 2 = 0.1) instead of the default 20% (20% of 2 = 0.4).
|
|
77
|
+
#
|
|
78
|
+
# We suggest not to use much more than 5 as a higher precision is more performance intensive for less and less precision gain.
|
|
79
|
+
#
|
|
80
|
+
# == Protip 1
|
|
81
|
+
#
|
|
82
|
+
# Create two ranged categories to make an area search:
|
|
83
|
+
# index.define_ranged_category :x, 1
|
|
84
|
+
# index.define_ranged_category :y, 1
|
|
85
|
+
#
|
|
86
|
+
# Search for it using for example:
|
|
87
|
+
# x:133, y:120
|
|
88
|
+
#
|
|
89
|
+
# This will search this square area (* = 133, 120: The "search" point entered):
|
|
90
|
+
#
|
|
91
|
+
# 132 134
|
|
92
|
+
# | |
|
|
93
|
+
# --|---------|-- 121
|
|
94
|
+
# | |
|
|
95
|
+
# | * |
|
|
96
|
+
# | |
|
|
97
|
+
# --|---------|-- 119
|
|
98
|
+
# | |
|
|
99
|
+
#
|
|
100
|
+
# Note: The area does not need to be square, but can be rectangular.
|
|
101
|
+
#
|
|
102
|
+
# == Protip 2
|
|
103
|
+
#
|
|
104
|
+
# Create three ranged categories to make a volume search.
|
|
105
|
+
#
|
|
106
|
+
# Or go crazy and use 4 ranged categories for a space/time search! ;)
|
|
107
|
+
#
|
|
108
|
+
# === Parameters
|
|
109
|
+
# * category_name: The category_name as used in #define_category.
|
|
110
|
+
# * range: The range (in the units of your data values) around the query point where we search for results.
|
|
111
|
+
#
|
|
112
|
+
# -----|<- range ->*------------|-----
|
|
113
|
+
#
|
|
114
|
+
# === Options
|
|
115
|
+
# * precision: Default is 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
|
116
|
+
# * ... all options of #define_category.
|
|
117
|
+
#
|
|
118
|
+
def define_ranged_category category_name, range, options = {}
|
|
119
|
+
precision = options[:precision]
|
|
120
|
+
|
|
121
|
+
options = { partial: Partial::None.new }.merge options
|
|
122
|
+
|
|
123
|
+
define_category category_name, options do |indexing, indexed|
|
|
124
|
+
indexing.source = Sources::Wrappers::Location.new indexing, grid: range, precision: precision
|
|
125
|
+
indexing.tokenizer = Internals::Tokenizers::Index.new
|
|
126
|
+
|
|
127
|
+
exact_bundle = Indexed::Wrappers::Bundle::Location.new indexed.exact, grid: range, precision: precision
|
|
128
|
+
indexed.exact = exact_bundle
|
|
129
|
+
indexed.partial = exact_bundle # A partial token also uses the exact index.
|
|
130
|
+
end
|
|
131
|
+
end
|
|
132
|
+
alias ranged_category define_ranged_category
|
|
133
|
+
|
|
134
|
+
# HIGHLY EXPERIMENTAL Not correctly working yet. Try it if you feel "beta".
|
|
135
|
+
#
|
|
136
|
+
# Also a range search see #define_ranged_category, but on the earth's surface.
|
|
137
|
+
#
|
|
138
|
+
# Parameters:
|
|
139
|
+
# * name: The name as used in #define_category.
|
|
140
|
+
# * radius: The distance (in km) around the query point which we search for results.
|
|
141
|
+
#
|
|
142
|
+
# Note: Picky uses a square, not a circle. We hope that's ok for most usages.
|
|
143
|
+
#
|
|
144
|
+
# -----------------------------
|
|
145
|
+
# | |
|
|
146
|
+
# | |
|
|
147
|
+
# | |
|
|
148
|
+
# | |
|
|
149
|
+
# | |
|
|
150
|
+
# | *<- radius ->|
|
|
151
|
+
# | |
|
|
152
|
+
# | |
|
|
153
|
+
# | |
|
|
154
|
+
# | |
|
|
155
|
+
# | |
|
|
156
|
+
# -----------------------------
|
|
157
|
+
#
|
|
158
|
+
# Options
|
|
159
|
+
# * precision: Default 1 (20% error margin, very fast), up to 5 (5% error margin, slower) makes sense.
|
|
160
|
+
# * from: The data category to take the data for this category from.
|
|
161
|
+
#
|
|
162
|
+
# TODO Redo. Will have to write a wrapper that combines two categories that are indexed simultaneously.
|
|
163
|
+
#
|
|
164
|
+
def define_map_location name, radius, options = {} # :nodoc:
|
|
165
|
+
# The radius is given as if all the locations were on the equator.
|
|
166
|
+
#
|
|
167
|
+
# TODO Need to recalculate since not many locations are on the equator ;) This is just a prototype.
|
|
168
|
+
#
|
|
169
|
+
# This calculates km -> longitude (degrees).
|
|
170
|
+
#
|
|
171
|
+
# A degree on the equator is equal to ~111,319.9 meters.
|
|
172
|
+
# So a km on the equator is equal to 0.00898312 degrees.
|
|
173
|
+
#
|
|
174
|
+
define_ranged_category name, radius * 0.00898312, options
|
|
175
|
+
end
|
|
176
|
+
alias map_location define_map_location
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
module Index
|
|
2
|
+
|
|
3
|
+
# An index that is persisted in files, loaded at startup and kept in memory at runtime.
|
|
4
|
+
#
|
|
5
|
+
class Memory < Base
|
|
6
|
+
|
|
7
|
+
# Create a new memory index for indexing and for querying.
|
|
8
|
+
#
|
|
9
|
+
# Parameters:
|
|
10
|
+
# * name: The identifier of the index. Used:
|
|
11
|
+
# - to identify an index (e.g. by you in Rake tasks: Indexes[:the_identifier]).
|
|
12
|
+
# - in the frontend to describe which index a result came from.
|
|
13
|
+
# - index directory naming (index/development/the_identifier/<lots of indexes>)
|
|
14
|
+
# * source: The source the data comes from. See Sources::Base.
|
|
15
|
+
#
|
|
16
|
+
# Options:
|
|
17
|
+
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
|
18
|
+
#
|
|
19
|
+
def initialize name, source, options = {}
|
|
20
|
+
options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Memory
|
|
21
|
+
options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Memory
|
|
22
|
+
|
|
23
|
+
super name, source, options
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
module Index
|
|
2
|
+
|
|
3
|
+
# An index that is persisted in Redis.
|
|
4
|
+
#
|
|
5
|
+
class Redis < Base
|
|
6
|
+
|
|
7
|
+
# Create a new Redis index for indexing and for querying.
|
|
8
|
+
#
|
|
9
|
+
# Parameters:
|
|
10
|
+
# * name: The identifier of the index. Used:
|
|
11
|
+
# - to identify an index (e.g. by you in Rake tasks: Indexes[:the_identifier]).
|
|
12
|
+
# - in the frontend to describe which index a result came from.
|
|
13
|
+
# - index directory naming (index/development/the_identifier/<lots of indexes>)
|
|
14
|
+
# * source: The source the data comes from. See Sources::Base.
|
|
15
|
+
#
|
|
16
|
+
# Options:
|
|
17
|
+
# * result_identifier: Use if you'd like a different identifier/name in the results JSON than the name of the index.
|
|
18
|
+
#
|
|
19
|
+
def initialize name, source, options = {}
|
|
20
|
+
options[:indexing_bundle_class] ||= Internals::Indexing::Bundle::Redis
|
|
21
|
+
options[:indexed_bundle_class] ||= Internals::Indexed::Bundle::Redis
|
|
22
|
+
|
|
23
|
+
super name, source, options
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
@@ -1,41 +1,41 @@
|
|
|
1
1
|
# Comfortable API convenience class, splits methods to indexes.
|
|
2
2
|
#
|
|
3
|
-
class
|
|
4
|
-
|
|
5
|
-
attr_reader :indexes, :index_mapping
|
|
6
|
-
|
|
3
|
+
class IndexBundle # :nodoc:all
|
|
4
|
+
|
|
5
|
+
attr_reader :indexes, :index_mapping, :indexing, :indexed
|
|
6
|
+
|
|
7
7
|
delegate :reload,
|
|
8
8
|
:load_from_cache,
|
|
9
|
-
:to =>
|
|
10
|
-
|
|
9
|
+
:to => :indexed
|
|
10
|
+
|
|
11
11
|
delegate :check_caches,
|
|
12
12
|
:find,
|
|
13
13
|
:generate_cache_only,
|
|
14
14
|
:generate_index_only,
|
|
15
15
|
:index,
|
|
16
16
|
:index_for_tests,
|
|
17
|
-
:to =>
|
|
18
|
-
|
|
17
|
+
:to => :indexing
|
|
18
|
+
|
|
19
19
|
def initialize
|
|
20
20
|
@indexes = []
|
|
21
21
|
@index_mapping = {}
|
|
22
|
-
|
|
22
|
+
|
|
23
23
|
@indexed = Indexed::Indexes.new
|
|
24
24
|
@indexing = Indexing::Indexes.new
|
|
25
25
|
end
|
|
26
|
-
|
|
26
|
+
|
|
27
27
|
def register index
|
|
28
28
|
self.indexes << index
|
|
29
29
|
self.index_mapping[index.name] = index
|
|
30
|
-
|
|
31
|
-
@indexing.register index.indexing
|
|
32
|
-
@indexed.register index.indexed
|
|
33
|
-
end
|
|
34
30
|
|
|
31
|
+
indexing.register index.indexing
|
|
32
|
+
indexed.register index.indexed
|
|
33
|
+
end
|
|
34
|
+
|
|
35
35
|
def [] name
|
|
36
36
|
name = name.to_sym
|
|
37
|
-
|
|
37
|
+
|
|
38
38
|
self.index_mapping[name]
|
|
39
39
|
end
|
|
40
|
-
|
|
40
|
+
|
|
41
41
|
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
module Indexed
|
|
2
2
|
|
|
3
|
+
# Registers the indexes held at runtime, for queries.
|
|
4
|
+
#
|
|
3
5
|
class Indexes
|
|
4
6
|
|
|
5
7
|
attr_reader :indexes, :index_mapping
|
|
@@ -11,29 +13,31 @@ module Indexed
|
|
|
11
13
|
clear
|
|
12
14
|
end
|
|
13
15
|
|
|
14
|
-
#
|
|
16
|
+
# Clears the indexes and the mapping.
|
|
15
17
|
#
|
|
16
18
|
def clear
|
|
17
19
|
@indexes = []
|
|
18
20
|
@index_mapping = {}
|
|
19
21
|
end
|
|
20
22
|
|
|
21
|
-
#
|
|
23
|
+
# Reloads all indexes, one after another,
|
|
24
|
+
# in the order they were added.
|
|
22
25
|
#
|
|
23
26
|
def reload
|
|
24
27
|
load_from_cache
|
|
25
28
|
end
|
|
26
29
|
|
|
27
|
-
#
|
|
30
|
+
# Registers an index with the indexes.
|
|
28
31
|
#
|
|
29
32
|
def register index
|
|
30
33
|
self.indexes << index
|
|
31
34
|
self.index_mapping[index.name] = index
|
|
32
35
|
end
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
36
|
+
|
|
37
|
+
# Extracts an index, given its identifier.
|
|
38
|
+
#
|
|
39
|
+
def [] identifier
|
|
40
|
+
index_mapping[identifier.to_sym]
|
|
37
41
|
end
|
|
38
42
|
|
|
39
43
|
end
|
|
@@ -1,5 +1,7 @@
|
|
|
1
1
|
module Indexing
|
|
2
2
|
|
|
3
|
+
# Registers the indexes held at index time, for indexing.
|
|
4
|
+
#
|
|
3
5
|
class Indexes
|
|
4
6
|
|
|
5
7
|
attr_reader :indexes
|
|
@@ -17,13 +19,13 @@ module Indexing
|
|
|
17
19
|
clear
|
|
18
20
|
end
|
|
19
21
|
|
|
20
|
-
#
|
|
22
|
+
# Clears the array of indexes.
|
|
21
23
|
#
|
|
22
24
|
def clear
|
|
23
25
|
@indexes = []
|
|
24
26
|
end
|
|
25
27
|
|
|
26
|
-
#
|
|
28
|
+
# Registers an index with the indexes.
|
|
27
29
|
#
|
|
28
30
|
def register index
|
|
29
31
|
self.indexes << index
|
|
@@ -52,7 +54,7 @@ module Indexing
|
|
|
52
54
|
timed_exclaim "INDEXING FINISHED."
|
|
53
55
|
end
|
|
54
56
|
|
|
55
|
-
# For testing
|
|
57
|
+
# For integration testing – indexes for the tests without forking and shouting ;)
|
|
56
58
|
#
|
|
57
59
|
def index_for_tests
|
|
58
60
|
take_snapshot
|
|
@@ -63,25 +65,29 @@ module Indexing
|
|
|
63
65
|
end
|
|
64
66
|
end
|
|
65
67
|
|
|
66
|
-
#
|
|
68
|
+
# Generate only the index for the given index:category pair.
|
|
67
69
|
#
|
|
68
|
-
def generate_index_only index_name, category_name
|
|
70
|
+
def generate_index_only index_name, category_name = nil
|
|
69
71
|
found = find index_name, category_name
|
|
70
72
|
found.index if found
|
|
71
73
|
end
|
|
72
|
-
|
|
74
|
+
# Generate only the cache for the given index:category pair.
|
|
75
|
+
#
|
|
76
|
+
def generate_cache_only index_name, category_name = nil
|
|
73
77
|
found = find index_name, category_name
|
|
74
78
|
found.generate_caches if found
|
|
75
79
|
end
|
|
76
80
|
|
|
77
|
-
#
|
|
81
|
+
# Find a given index:category pair.
|
|
78
82
|
#
|
|
79
83
|
def find index_name, category_name
|
|
80
|
-
index_name
|
|
84
|
+
index_name = index_name.to_sym
|
|
81
85
|
|
|
82
86
|
indexes.each do |index|
|
|
83
87
|
next unless index.name == index_name
|
|
84
88
|
|
|
89
|
+
return index unless category_name
|
|
90
|
+
|
|
85
91
|
found = index.categories.find category_name
|
|
86
92
|
return found if found
|
|
87
93
|
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Adapters
|
|
4
|
+
# Adapter that is plugged into a Rack outlet.
|
|
5
|
+
#
|
|
6
|
+
module Rack
|
|
7
|
+
|
|
8
|
+
# Subclasses of this class should respond to
|
|
9
|
+
# * to_app(options)
|
|
10
|
+
#
|
|
11
|
+
class Base
|
|
12
|
+
|
|
13
|
+
# Puts together an appropriately structured Rack response.
|
|
14
|
+
#
|
|
15
|
+
# Note: Bytesize is needed to have special characters not trip up Rack.
|
|
16
|
+
#
|
|
17
|
+
def respond_with response, content_type = 'application/json'
|
|
18
|
+
[200, { 'Content-Type' => content_type, 'Content-Length' => response.bytesize.to_s }, [response]]
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
end
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Adapters
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
#
|
|
7
|
+
module Rack
|
|
8
|
+
|
|
9
|
+
class LiveParameters < Base
|
|
10
|
+
|
|
11
|
+
def initialize live_parameters
|
|
12
|
+
@live_parameters = live_parameters
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
#
|
|
16
|
+
#
|
|
17
|
+
def to_app options = {}
|
|
18
|
+
# For capturing by the lambda block.
|
|
19
|
+
#
|
|
20
|
+
live_parameters = @live_parameters
|
|
21
|
+
|
|
22
|
+
lambda do |env|
|
|
23
|
+
params = ::Rack::Request.new(env).params
|
|
24
|
+
|
|
25
|
+
results = live_parameters.parameters params
|
|
26
|
+
|
|
27
|
+
respond_with results.to_json
|
|
28
|
+
end
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Adapters
|
|
4
|
+
# This is an adapter that is plugged into a Rack outlet.
|
|
5
|
+
#
|
|
6
|
+
# It looks at what is given to it and generate an appropriate
|
|
7
|
+
# adapter for it.
|
|
8
|
+
#
|
|
9
|
+
# For example, if you give it a query, it will extract the query param etc.
|
|
10
|
+
# and call search_with_text on it if it is called by Rack.
|
|
11
|
+
#
|
|
12
|
+
module Rack
|
|
13
|
+
|
|
14
|
+
class Query < Base
|
|
15
|
+
|
|
16
|
+
@@defaults = {
|
|
17
|
+
query_key: 'query'.freeze,
|
|
18
|
+
offset_key: 'offset'.freeze,
|
|
19
|
+
content_type: 'application/json'.freeze
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
def initialize query
|
|
23
|
+
@query = query
|
|
24
|
+
@defaults = @@defaults.dup
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
def to_app options = {}
|
|
28
|
+
# For capturing in the lambda.
|
|
29
|
+
#
|
|
30
|
+
query = @query
|
|
31
|
+
query_key = options[:query_key] || @defaults[:query_key]
|
|
32
|
+
content_type = options[:content_type] || @defaults[:content_type]
|
|
33
|
+
|
|
34
|
+
lambda do |env|
|
|
35
|
+
params = ::Rack::Request.new(env).params
|
|
36
|
+
|
|
37
|
+
results = query.search_with_text *extracted(params)
|
|
38
|
+
|
|
39
|
+
PickyLog.log results.to_log(params[query_key])
|
|
40
|
+
|
|
41
|
+
respond_with results.to_response, content_type
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Helper method to extract the params
|
|
46
|
+
#
|
|
47
|
+
UTF8_STRING = 'UTF-8'.freeze
|
|
48
|
+
def extracted params
|
|
49
|
+
[
|
|
50
|
+
# query is encoded in ASCII
|
|
51
|
+
#
|
|
52
|
+
params[@defaults[:query_key]] && params[@defaults[:query_key]].force_encoding(UTF8_STRING),
|
|
53
|
+
params[@defaults[:offset_key]] && params[@defaults[:offset_key]].to_i || 0
|
|
54
|
+
]
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|