picky 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
# TODO Merge into Base, extract common with Indexed::Base.
|
|
2
|
+
#
|
|
3
|
+
module Indexing # :nodoc:all
|
|
4
|
+
# A Bundle is a number of indexes
|
|
5
|
+
# per [index, category] combination.
|
|
6
|
+
#
|
|
7
|
+
# At most, there are three indexes:
|
|
8
|
+
# * *core* index (always used)
|
|
9
|
+
# * *weights* index (always used)
|
|
10
|
+
# * *similarity* index (used with similarity)
|
|
11
|
+
#
|
|
12
|
+
# In Picky, indexing is separated from the index
|
|
13
|
+
# handling itself through a parallel structure.
|
|
14
|
+
#
|
|
15
|
+
# Both use methods provided by this base class, but
|
|
16
|
+
# have very different goals:
|
|
17
|
+
#
|
|
18
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
|
19
|
+
# and providing helper functions to e.g. check the indexes.
|
|
20
|
+
#
|
|
21
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
|
22
|
+
# memory and looking up search data as fast as possible.
|
|
23
|
+
#
|
|
24
|
+
module Bundle
|
|
25
|
+
|
|
26
|
+
class SuperBase
|
|
27
|
+
|
|
28
|
+
attr_reader :identifier, :files
|
|
29
|
+
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
|
30
|
+
|
|
31
|
+
delegate :clear, :to => :index
|
|
32
|
+
delegate :[], :[]=, :to => :configuration
|
|
33
|
+
|
|
34
|
+
def initialize name, category, similarity_strategy
|
|
35
|
+
@identifier = "#{category.identifier}:#{name}"
|
|
36
|
+
@files = Backend::Files.new name, category
|
|
37
|
+
|
|
38
|
+
@index = {}
|
|
39
|
+
@weights = {}
|
|
40
|
+
@similarity = {}
|
|
41
|
+
@configuration = {} # A hash with config options.
|
|
42
|
+
|
|
43
|
+
@similarity_strategy = similarity_strategy
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Get a list of similar texts.
|
|
47
|
+
#
|
|
48
|
+
# Note: Does not return itself.
|
|
49
|
+
#
|
|
50
|
+
def similar text
|
|
51
|
+
code = similarity_strategy.encoded text
|
|
52
|
+
similar_codes = code && @similarity[code]
|
|
53
|
+
similar_codes.delete text if similar_codes
|
|
54
|
+
similar_codes || []
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
module Indexing
|
|
2
|
+
module Wrappers
|
|
3
|
+
module Category
|
|
4
|
+
|
|
5
|
+
module Location
|
|
6
|
+
|
|
7
|
+
def self.install_on category, grid, precision = 1
|
|
8
|
+
new_source = Sources::Wrappers::Location.new category.source, grid, precision
|
|
9
|
+
|
|
10
|
+
category.class_eval do
|
|
11
|
+
def tokenizer
|
|
12
|
+
@tokenizer ||= Tokenizers::Index.new
|
|
13
|
+
end
|
|
14
|
+
define_method :source do
|
|
15
|
+
new_source
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
@@ -141,29 +141,29 @@ module Interfaces
|
|
|
141
141
|
# TODO Move to Interface object.
|
|
142
142
|
#
|
|
143
143
|
def querying_removes_characters
|
|
144
|
-
regexp =
|
|
144
|
+
regexp = Tokenizers::Query.default.instance_variable_get :@removes_characters_regexp
|
|
145
145
|
regexp && regexp.source
|
|
146
146
|
end
|
|
147
147
|
def querying_removes_characters= new_value
|
|
148
|
-
|
|
148
|
+
Tokenizers::Query.default.instance_variable_set(:@removes_characters_regexp, %r{#{new_value}})
|
|
149
149
|
end
|
|
150
150
|
def querying_stopwords
|
|
151
|
-
regexp =
|
|
151
|
+
regexp = Tokenizers::Query.default.instance_variable_get :@remove_stopwords_regexp
|
|
152
152
|
regexp && regexp.source
|
|
153
153
|
end
|
|
154
154
|
def querying_stopwords= new_value
|
|
155
|
-
|
|
155
|
+
Tokenizers::Query.default.instance_variable_set(:@remove_stopwords_regexp, %r{#{new_value}})
|
|
156
156
|
end
|
|
157
157
|
def querying_splits_text_on
|
|
158
|
-
splits =
|
|
158
|
+
splits = Tokenizers::Query.default.instance_variable_get :@splits_text_on
|
|
159
159
|
splits && splits.respond_to?(:source) ? splits.source : splits
|
|
160
160
|
end
|
|
161
161
|
def querying_splits_text_on= new_value
|
|
162
|
-
splits =
|
|
162
|
+
splits = Tokenizers::Query.default.instance_variable_get :@splits_text_on
|
|
163
163
|
if splits.respond_to?(:source)
|
|
164
|
-
|
|
164
|
+
Tokenizers::Query.default.instance_variable_set(:@splits_text_on, %r{#{new_value}})
|
|
165
165
|
else
|
|
166
|
-
|
|
166
|
+
Tokenizers::Query.default.instance_variable_set(:@splits_text_on, new_value)
|
|
167
167
|
end
|
|
168
168
|
end
|
|
169
169
|
|
data/lib/picky/loader.rb
CHANGED
|
@@ -25,9 +25,6 @@ module Loader # :nodoc:all
|
|
|
25
25
|
def self.load_relative filename_without_rb
|
|
26
26
|
load File.join(File.dirname(__FILE__), "#{filename_without_rb}.rb")
|
|
27
27
|
end
|
|
28
|
-
def self.load_internals filename_without_rb
|
|
29
|
-
load File.join(File.dirname(__FILE__), "internals/#{filename_without_rb}.rb")
|
|
30
|
-
end
|
|
31
28
|
|
|
32
29
|
def self.load_user filename
|
|
33
30
|
load File.join(PICKY_ROOT, "#{filename}.rb")
|
|
@@ -76,7 +73,7 @@ module Loader # :nodoc:all
|
|
|
76
73
|
|
|
77
74
|
# TODO Rewrite
|
|
78
75
|
#
|
|
79
|
-
|
|
76
|
+
Query::Qualifiers.instance.prepare
|
|
80
77
|
|
|
81
78
|
exclaim "Application #{Application.apps.map(&:name).join(', ')} loaded."
|
|
82
79
|
end
|
|
@@ -85,143 +82,132 @@ module Loader # :nodoc:all
|
|
|
85
82
|
# (Not for the user)
|
|
86
83
|
#
|
|
87
84
|
def self.load_framework_internals
|
|
88
|
-
load_relative 'internals'
|
|
89
|
-
|
|
90
85
|
# Load compiled C code.
|
|
91
86
|
#
|
|
92
|
-
|
|
87
|
+
load_relative 'ext/maybe_compile'
|
|
93
88
|
|
|
94
89
|
# Load extensions.
|
|
95
90
|
#
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
99
|
-
|
|
100
|
-
|
|
91
|
+
load_relative 'extensions/object'
|
|
92
|
+
load_relative 'extensions/array'
|
|
93
|
+
load_relative 'extensions/symbol'
|
|
94
|
+
load_relative 'extensions/module'
|
|
95
|
+
load_relative 'extensions/class'
|
|
96
|
+
load_relative 'extensions/hash'
|
|
101
97
|
|
|
102
98
|
# Requiring Helpers
|
|
103
99
|
#
|
|
104
|
-
|
|
100
|
+
load_relative 'helpers/measuring'
|
|
105
101
|
|
|
106
102
|
# Calculations.
|
|
107
103
|
#
|
|
108
|
-
|
|
104
|
+
load_relative 'calculations/location'
|
|
109
105
|
|
|
110
106
|
# Index generation strategies.
|
|
111
107
|
#
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
108
|
+
load_relative 'indexers/base'
|
|
109
|
+
load_relative 'indexers/serial'
|
|
110
|
+
load_relative 'indexers/parallel'
|
|
115
111
|
|
|
116
112
|
# Generators.
|
|
117
113
|
#
|
|
118
|
-
|
|
114
|
+
load_relative 'generators/strategy'
|
|
119
115
|
|
|
120
116
|
# Partial index generation strategies.
|
|
121
117
|
#
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
118
|
+
load_relative 'generators/partial/strategy'
|
|
119
|
+
load_relative 'generators/partial/none'
|
|
120
|
+
load_relative 'generators/partial/substring'
|
|
121
|
+
load_relative 'generators/partial/default'
|
|
126
122
|
|
|
127
123
|
# Weight index generation strategies.
|
|
128
124
|
#
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
125
|
+
load_relative 'generators/weights/strategy'
|
|
126
|
+
load_relative 'generators/weights/logarithmic'
|
|
127
|
+
load_relative 'generators/weights/default'
|
|
132
128
|
|
|
133
129
|
# Similarity index generation strategies.
|
|
134
130
|
#
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
131
|
+
load_relative 'generators/similarity/strategy'
|
|
132
|
+
load_relative 'generators/similarity/none'
|
|
133
|
+
load_relative 'generators/similarity/phonetic'
|
|
134
|
+
load_relative 'generators/similarity/metaphone'
|
|
135
|
+
load_relative 'generators/similarity/double_metaphone'
|
|
136
|
+
load_relative 'generators/similarity/soundex'
|
|
137
|
+
load_relative 'generators/similarity/default'
|
|
142
138
|
|
|
143
139
|
# Index generators.
|
|
144
140
|
#
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
# Shared index elements.
|
|
151
|
-
#
|
|
152
|
-
load_internals 'shared/category'
|
|
141
|
+
load_relative 'generators/base'
|
|
142
|
+
load_relative 'generators/partial_generator'
|
|
143
|
+
load_relative 'generators/weights_generator'
|
|
144
|
+
load_relative 'generators/similarity_generator'
|
|
153
145
|
|
|
154
146
|
# Index store handling.
|
|
155
147
|
#
|
|
156
|
-
|
|
148
|
+
load_relative 'backend/backend'
|
|
157
149
|
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
150
|
+
load_relative 'backend/redis'
|
|
151
|
+
load_relative 'backend/redis/basic'
|
|
152
|
+
load_relative 'backend/redis/list_hash'
|
|
153
|
+
load_relative 'backend/redis/string_hash'
|
|
162
154
|
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
load_internals 'index/files'
|
|
155
|
+
load_relative 'backend/file/basic'
|
|
156
|
+
load_relative 'backend/file/text'
|
|
157
|
+
load_relative 'backend/file/marshal'
|
|
158
|
+
load_relative 'backend/file/json'
|
|
169
159
|
|
|
160
|
+
load_relative 'backend/files'
|
|
161
|
+
|
|
170
162
|
# Indexing and Indexed things.
|
|
171
163
|
#
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
load_internals 'indexing/category'
|
|
177
|
-
# load_internals 'indexing/categories'
|
|
178
|
-
load_internals 'indexing/index'
|
|
164
|
+
load_relative 'indexing/bundle/super_base' # TODO Remove.
|
|
165
|
+
load_relative 'indexing/bundle/base'
|
|
166
|
+
load_relative 'indexing/bundle/memory'
|
|
167
|
+
load_relative 'indexing/bundle/redis'
|
|
179
168
|
|
|
180
|
-
|
|
169
|
+
load_relative 'indexing/wrappers/category/location'
|
|
181
170
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
load_internals 'indexed/category'
|
|
186
|
-
load_internals 'indexed/categories'
|
|
187
|
-
load_internals 'indexed/index'
|
|
171
|
+
load_relative 'indexed/bundle/base'
|
|
172
|
+
load_relative 'indexed/bundle/memory'
|
|
173
|
+
load_relative 'indexed/bundle/redis'
|
|
188
174
|
|
|
189
|
-
|
|
175
|
+
load_relative 'indexed/wrappers/exact_first'
|
|
190
176
|
|
|
191
177
|
# Bundle Wrapper
|
|
192
178
|
#
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
179
|
+
load_relative 'indexed/wrappers/bundle/wrapper'
|
|
180
|
+
load_relative 'indexed/wrappers/bundle/calculation'
|
|
181
|
+
load_relative 'indexed/wrappers/bundle/location'
|
|
196
182
|
|
|
197
|
-
|
|
183
|
+
load_relative 'indexed/wrappers/category/location'
|
|
198
184
|
|
|
199
185
|
# Tokens.
|
|
200
186
|
#
|
|
201
|
-
|
|
202
|
-
|
|
187
|
+
load_relative 'query/token'
|
|
188
|
+
load_relative 'query/tokens'
|
|
203
189
|
|
|
204
190
|
# Tokenizers types.
|
|
205
191
|
#
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
192
|
+
load_relative 'tokenizers/base'
|
|
193
|
+
load_relative 'tokenizers/index'
|
|
194
|
+
load_relative 'tokenizers/query'
|
|
209
195
|
|
|
210
196
|
# Query combinations, qualifiers, weigher.
|
|
211
197
|
#
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
198
|
+
load_relative 'query/combination'
|
|
199
|
+
load_relative 'query/combinations/base'
|
|
200
|
+
load_relative 'query/combinations/memory'
|
|
201
|
+
load_relative 'query/combinations/redis'
|
|
216
202
|
|
|
217
|
-
|
|
218
|
-
|
|
203
|
+
load_relative 'query/allocation'
|
|
204
|
+
load_relative 'query/allocations'
|
|
219
205
|
|
|
220
|
-
|
|
206
|
+
load_relative 'query/qualifiers'
|
|
221
207
|
|
|
222
|
-
|
|
208
|
+
load_relative 'query/weights'
|
|
223
209
|
|
|
224
|
-
|
|
210
|
+
load_relative 'query/indexes'
|
|
225
211
|
|
|
226
212
|
# Configuration.
|
|
227
213
|
#
|
|
@@ -229,14 +215,14 @@ module Loader # :nodoc:all
|
|
|
229
215
|
|
|
230
216
|
# Adapters.
|
|
231
217
|
#
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
218
|
+
load_relative 'adapters/rack/base'
|
|
219
|
+
load_relative 'adapters/rack/query'
|
|
220
|
+
load_relative 'adapters/rack/live_parameters'
|
|
221
|
+
load_relative 'adapters/rack'
|
|
236
222
|
|
|
237
223
|
# Routing.
|
|
238
224
|
#
|
|
239
|
-
|
|
225
|
+
load_relative 'frontend_adapters/rack'
|
|
240
226
|
end
|
|
241
227
|
# Loads the user interface parts.
|
|
242
228
|
#
|
|
@@ -271,15 +257,23 @@ module Loader # :nodoc:all
|
|
|
271
257
|
|
|
272
258
|
# API.
|
|
273
259
|
#
|
|
260
|
+
load_relative 'category'
|
|
261
|
+
load_relative 'category_indexed'
|
|
262
|
+
load_relative 'category_indexing'
|
|
263
|
+
|
|
264
|
+
load_relative 'categories'
|
|
265
|
+
load_relative 'categories_indexed'
|
|
266
|
+
load_relative 'categories_indexing'
|
|
267
|
+
|
|
274
268
|
load_relative 'index/base'
|
|
269
|
+
load_relative 'index/base_indexed'
|
|
270
|
+
load_relative 'index/base_indexing'
|
|
275
271
|
load_relative 'index/memory'
|
|
276
272
|
load_relative 'index/redis'
|
|
277
|
-
|
|
278
|
-
load_relative '
|
|
279
|
-
load_relative '
|
|
280
|
-
|
|
281
|
-
load_relative 'index_bundle'
|
|
282
|
-
load_relative 'aliases'
|
|
273
|
+
|
|
274
|
+
load_relative 'indexes'
|
|
275
|
+
load_relative 'indexes_indexed'
|
|
276
|
+
load_relative 'indexes_indexing'
|
|
283
277
|
|
|
284
278
|
# Results.
|
|
285
279
|
#
|
|
File without changes
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# An allocation has a number of combinations:
|
|
4
|
+
# [token, index] [other_token, other_index], ...
|
|
5
|
+
#
|
|
6
|
+
class Allocation # :nodoc:all
|
|
7
|
+
|
|
8
|
+
attr_reader :count, :ids, :score, :combinations, :result_identifier
|
|
9
|
+
|
|
10
|
+
#
|
|
11
|
+
#
|
|
12
|
+
def initialize combinations, result_identifier
|
|
13
|
+
@combinations = combinations
|
|
14
|
+
@result_identifier = result_identifier
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def hash
|
|
18
|
+
@combinations.hash
|
|
19
|
+
end
|
|
20
|
+
def eql? other_allocation
|
|
21
|
+
true # FIXME
|
|
22
|
+
# @combinations.eql? other_allocation.combinations
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Scores its combinations and caches the result.
|
|
26
|
+
#
|
|
27
|
+
def calculate_score weights
|
|
28
|
+
@score ||= @combinations.calculate_score(weights)
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Asks the combinations for the (intersected) ids.
|
|
32
|
+
#
|
|
33
|
+
def calculate_ids amount, offset
|
|
34
|
+
@combinations.ids amount, offset # Calculate as many ids as are necessary.
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# This starts the searching process.
|
|
38
|
+
#
|
|
39
|
+
def process! amount, offset
|
|
40
|
+
ids = calculate_ids amount, offset
|
|
41
|
+
@count = ids.size # cache the count before throwing away the ids
|
|
42
|
+
@ids = ids.slice!(offset, amount) || [] # slice out the relevant part
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
#
|
|
46
|
+
#
|
|
47
|
+
def keep identifiers = [] # categories
|
|
48
|
+
@combinations.keep identifiers
|
|
49
|
+
end
|
|
50
|
+
#
|
|
51
|
+
#
|
|
52
|
+
def remove identifiers = [] # categories
|
|
53
|
+
@combinations.remove identifiers
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Sort highest score first.
|
|
57
|
+
#
|
|
58
|
+
def <=> other_allocation
|
|
59
|
+
other_allocation.score <=> self.score
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Transform the allocation into result form.
|
|
63
|
+
#
|
|
64
|
+
def to_result
|
|
65
|
+
[self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Json representation of this allocation.
|
|
69
|
+
#
|
|
70
|
+
# Note: Delegates to to_result.
|
|
71
|
+
#
|
|
72
|
+
def to_json
|
|
73
|
+
to_result.to_json
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
#
|
|
77
|
+
#
|
|
78
|
+
def to_s
|
|
79
|
+
"Allocation: #{to_result.join(', ')}"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
# Container class for allocations.
|
|
3
|
+
#
|
|
4
|
+
class Allocations # :nodoc:all
|
|
5
|
+
|
|
6
|
+
delegate :each, :inject, :empty?, :size, :to => :@allocations
|
|
7
|
+
attr_reader :total
|
|
8
|
+
|
|
9
|
+
def initialize allocations = []
|
|
10
|
+
@allocations = allocations
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
# Score each allocation.
|
|
14
|
+
#
|
|
15
|
+
def calculate_score weights
|
|
16
|
+
@allocations.each do |allocation|
|
|
17
|
+
allocation.calculate_score weights
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
# Sort the allocations.
|
|
21
|
+
#
|
|
22
|
+
def sort!
|
|
23
|
+
@allocations.sort!
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Reduces the amount of allocations to x.
|
|
27
|
+
#
|
|
28
|
+
def reduce_to amount
|
|
29
|
+
@allocations = @allocations.shift amount
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Keeps combinations.
|
|
33
|
+
#
|
|
34
|
+
# Only those passed in remain.
|
|
35
|
+
#
|
|
36
|
+
def keep identifiers = []
|
|
37
|
+
@allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
|
|
38
|
+
end
|
|
39
|
+
# Removes combinations.
|
|
40
|
+
#
|
|
41
|
+
# Only those passed in are removed.
|
|
42
|
+
#
|
|
43
|
+
def remove identifiers = []
|
|
44
|
+
@allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Returns the top amount ids.
|
|
48
|
+
#
|
|
49
|
+
def ids amount = 20
|
|
50
|
+
@allocations.inject([]) do |total, allocation|
|
|
51
|
+
total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# This is the main method of this class that will replace ids and count.
|
|
56
|
+
#
|
|
57
|
+
# What it does is calculate the ids and counts of its allocations
|
|
58
|
+
# for being used in the results. It also calculates the total
|
|
59
|
+
#
|
|
60
|
+
# Parameters:
|
|
61
|
+
# * amount: the amount of ids to calculate
|
|
62
|
+
# * offset: the offset from where in the result set to take the ids
|
|
63
|
+
#
|
|
64
|
+
# Note: With an amount of 0, an offset > 0 doesn't make much
|
|
65
|
+
# sense, as seen in the live search.
|
|
66
|
+
#
|
|
67
|
+
# Note: Each allocation caches its count, but not its ids (thrown away).
|
|
68
|
+
# The ids are cached in this class.
|
|
69
|
+
#
|
|
70
|
+
# Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
|
|
71
|
+
#
|
|
72
|
+
def process! amount, offset = 0
|
|
73
|
+
@total = 0
|
|
74
|
+
current_offset = 0
|
|
75
|
+
@allocations.each do |allocation|
|
|
76
|
+
ids = allocation.process! amount, offset
|
|
77
|
+
@total = @total + allocation.count # the total mixed in
|
|
78
|
+
if ids.empty?
|
|
79
|
+
offset = offset - allocation.count unless offset.zero?
|
|
80
|
+
else
|
|
81
|
+
amount = amount - ids.size # we need less results from the following allocation
|
|
82
|
+
offset = 0 # we have already passed the offset
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
def uniq
|
|
88
|
+
@allocations.uniq!
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
def to_a
|
|
92
|
+
@allocations
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
# Simply inspects the internal allocations.
|
|
96
|
+
#
|
|
97
|
+
def to_s
|
|
98
|
+
@allocations.inspect
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
# Allocations for results are in the form:
|
|
102
|
+
# [
|
|
103
|
+
# allocation1.to_result,
|
|
104
|
+
# allocation2.to_result
|
|
105
|
+
# ...
|
|
106
|
+
# ]
|
|
107
|
+
#
|
|
108
|
+
def to_result
|
|
109
|
+
@allocations.map(&:to_result).compact
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# Describes the combination of a token (the text) and
|
|
4
|
+
# the index (the bundle): [text, index_bundle]
|
|
5
|
+
#
|
|
6
|
+
# A combination is a single part of an allocation:
|
|
7
|
+
# [..., [text2, index_bundle2], ...]
|
|
8
|
+
#
|
|
9
|
+
# An allocation consists of a number of combinations:
|
|
10
|
+
# [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
|
|
11
|
+
#
|
|
12
|
+
class Combination # :nodoc:all
|
|
13
|
+
|
|
14
|
+
attr_reader :token, :bundle, :category_name
|
|
15
|
+
|
|
16
|
+
def initialize token, category
|
|
17
|
+
@token = token
|
|
18
|
+
@category_name = category.name
|
|
19
|
+
@bundle = category.bundle_for token
|
|
20
|
+
@text = @token.text # don't want to use reset_similar already
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Note: Required for uniq!
|
|
24
|
+
#
|
|
25
|
+
def hash
|
|
26
|
+
[@token.to_s, @bundle].hash
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Returns the weight of this combination.
|
|
30
|
+
#
|
|
31
|
+
# Note: Caching is most of the time useful.
|
|
32
|
+
#
|
|
33
|
+
def weight
|
|
34
|
+
@weight ||= @bundle.weight(@text)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Returns an array of ids for the given text.
|
|
38
|
+
#
|
|
39
|
+
# Note: Caching is most of the time useful.
|
|
40
|
+
#
|
|
41
|
+
def ids
|
|
42
|
+
@ids ||= @bundle.ids(@text)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# The identifier for this combination.
|
|
46
|
+
#
|
|
47
|
+
def identifier
|
|
48
|
+
"#{bundle.identifier}:#{@token.identifier}"
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Is the identifier in the given identifiers?
|
|
52
|
+
#
|
|
53
|
+
def in? identifiers
|
|
54
|
+
identifiers.include? identifier
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Combines the category names with the original names.
|
|
58
|
+
# [
|
|
59
|
+
# [:title, 'Flarbl', :flarbl],
|
|
60
|
+
# [:category, 'Gnorf', :gnorf]
|
|
61
|
+
# ]
|
|
62
|
+
#
|
|
63
|
+
def to_result
|
|
64
|
+
[@category_name, *@token.to_result]
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
# Example:
|
|
68
|
+
# "exact title:Peter*:peter"
|
|
69
|
+
#
|
|
70
|
+
def to_s
|
|
71
|
+
"#{bundle.identifier} #{to_result.join(':')}"
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
end
|