picky 2.5.2 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
@@ -1,65 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
# TODO Merge into Base, extract common with Indexed::Base.
|
4
|
-
#
|
5
|
-
module Indexing # :nodoc:all
|
6
|
-
# A Bundle is a number of indexes
|
7
|
-
# per [index, category] combination.
|
8
|
-
#
|
9
|
-
# At most, there are three indexes:
|
10
|
-
# * *core* index (always used)
|
11
|
-
# * *weights* index (always used)
|
12
|
-
# * *similarity* index (used with similarity)
|
13
|
-
#
|
14
|
-
# In Picky, indexing is separated from the index
|
15
|
-
# handling itself through a parallel structure.
|
16
|
-
#
|
17
|
-
# Both use methods provided by this base class, but
|
18
|
-
# have very different goals:
|
19
|
-
#
|
20
|
-
# * *Indexing*::*Bundle* is just concerned with creating index files
|
21
|
-
# and providing helper functions to e.g. check the indexes.
|
22
|
-
#
|
23
|
-
# * *Index*::*Bundle* is concerned with loading these index files into
|
24
|
-
# memory and looking up search data as fast as possible.
|
25
|
-
#
|
26
|
-
module Bundle
|
27
|
-
|
28
|
-
class SuperBase
|
29
|
-
|
30
|
-
attr_reader :identifier, :files
|
31
|
-
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
32
|
-
|
33
|
-
delegate :clear, :to => :index
|
34
|
-
delegate :[], :[]=, :to => :configuration
|
35
|
-
|
36
|
-
def initialize name, category, similarity_strategy
|
37
|
-
@identifier = "#{category.identifier}:#{name}"
|
38
|
-
@files = Internals::Index::Files.new name, category
|
39
|
-
|
40
|
-
@index = {}
|
41
|
-
@weights = {}
|
42
|
-
@similarity = {}
|
43
|
-
@configuration = {} # A hash with config options.
|
44
|
-
|
45
|
-
@similarity_strategy = similarity_strategy
|
46
|
-
end
|
47
|
-
|
48
|
-
# Get a list of similar texts.
|
49
|
-
#
|
50
|
-
# Note: Does not return itself.
|
51
|
-
#
|
52
|
-
def similar text
|
53
|
-
code = similarity_strategy.encoded text
|
54
|
-
similar_codes = code && @similarity[code]
|
55
|
-
similar_codes.delete text if similar_codes
|
56
|
-
similar_codes || []
|
57
|
-
end
|
58
|
-
|
59
|
-
end
|
60
|
-
|
61
|
-
end
|
62
|
-
|
63
|
-
end
|
64
|
-
|
65
|
-
end
|
@@ -1,153 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Indexing
|
4
|
-
|
5
|
-
class Category
|
6
|
-
|
7
|
-
include Internals::Shared::Category
|
8
|
-
|
9
|
-
attr_reader :name, :index, :exact, :partial
|
10
|
-
|
11
|
-
# Mandatory params:
|
12
|
-
# * name: Category name to use as identifier and file names.
|
13
|
-
# * index: Index to which this category is attached to.
|
14
|
-
#
|
15
|
-
# Options:
|
16
|
-
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
17
|
-
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
18
|
-
# * from: The source category identifier to take the data from.
|
19
|
-
#
|
20
|
-
# Advanced Options:
|
21
|
-
# * source: Use if the category should use a different source.
|
22
|
-
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
23
|
-
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
24
|
-
# * key_format: What this category's keys are formatted with (default is :to_i)
|
25
|
-
#
|
26
|
-
def initialize name, index, options = {}
|
27
|
-
@name = name
|
28
|
-
@index = index
|
29
|
-
|
30
|
-
@source = options[:source]
|
31
|
-
@from = options[:from]
|
32
|
-
@tokenizer = options[:tokenizer]
|
33
|
-
@key_format = options[:key_format]
|
34
|
-
|
35
|
-
# TODO Push into Bundle. At least the weights.
|
36
|
-
#
|
37
|
-
partial = options[:partial] || Generators::Partial::Default
|
38
|
-
weights = options[:weights] || Generators::Weights::Default
|
39
|
-
similarity = options[:similarity] || Generators::Similarity::Default
|
40
|
-
|
41
|
-
bundle_class = index.bundle_class || Bundle::Memory
|
42
|
-
|
43
|
-
@exact = bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
|
44
|
-
@partial = bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
|
45
|
-
end
|
46
|
-
|
47
|
-
# Return an appropriate source.
|
48
|
-
#
|
49
|
-
def source
|
50
|
-
@source || @index.source
|
51
|
-
end
|
52
|
-
# Return the key format.
|
53
|
-
#
|
54
|
-
# If the source has no key format, then
|
55
|
-
# check for an explicit key format, and
|
56
|
-
# if none is defined, ask the index for
|
57
|
-
# one.
|
58
|
-
#
|
59
|
-
def key_format
|
60
|
-
source.respond_to?(:key_format) && source.key_format || @key_format || index.key_format
|
61
|
-
end
|
62
|
-
# The indexer is lazily generated and cached.
|
63
|
-
#
|
64
|
-
def indexer
|
65
|
-
@indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
|
66
|
-
end
|
67
|
-
# TODO This is a hack to get the parallel indexer working.
|
68
|
-
#
|
69
|
-
def categories
|
70
|
-
[self]
|
71
|
-
end
|
72
|
-
# Returns an appropriate tokenizer.
|
73
|
-
# If one isn't set on this category, will try the index,
|
74
|
-
# and finally the default index tokenizer.
|
75
|
-
#
|
76
|
-
def tokenizer
|
77
|
-
@tokenizer || @index.tokenizer || Tokenizers::Index.default
|
78
|
-
end
|
79
|
-
|
80
|
-
# Where the data is taken from.
|
81
|
-
#
|
82
|
-
def from
|
83
|
-
@from || name
|
84
|
-
end
|
85
|
-
|
86
|
-
def backup_caches
|
87
|
-
timed_exclaim "Backing up #{identifier}."
|
88
|
-
exact.backup
|
89
|
-
partial.backup
|
90
|
-
end
|
91
|
-
def restore_caches
|
92
|
-
timed_exclaim "Restoring #{identifier}."
|
93
|
-
exact.restore
|
94
|
-
partial.restore
|
95
|
-
end
|
96
|
-
def check_caches
|
97
|
-
timed_exclaim "Checking #{identifier}."
|
98
|
-
exact.raise_unless_cache_exists
|
99
|
-
partial.raise_unless_cache_exists
|
100
|
-
end
|
101
|
-
def clear_caches
|
102
|
-
timed_exclaim "Deleting #{identifier}."
|
103
|
-
exact.delete
|
104
|
-
partial.delete
|
105
|
-
end
|
106
|
-
|
107
|
-
# Indexes, creates the "prepared_..." file.
|
108
|
-
#
|
109
|
-
def index!
|
110
|
-
prepare_index_directory
|
111
|
-
indexer.index
|
112
|
-
end
|
113
|
-
|
114
|
-
# Generates all caches for this category.
|
115
|
-
#
|
116
|
-
def cache!
|
117
|
-
prepare_index_directory
|
118
|
-
generate_caches
|
119
|
-
end
|
120
|
-
# We need to set what formatting method should be used.
|
121
|
-
# Uses the one defined in the indexer.
|
122
|
-
#
|
123
|
-
def configure
|
124
|
-
exact[:key_format] = self.key_format
|
125
|
-
partial[:key_format] = self.key_format
|
126
|
-
end
|
127
|
-
def generate_caches
|
128
|
-
configure
|
129
|
-
generate_caches_from_source
|
130
|
-
generate_partial
|
131
|
-
generate_caches_from_memory
|
132
|
-
dump_caches
|
133
|
-
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
134
|
-
end
|
135
|
-
def generate_caches_from_source
|
136
|
-
exact.generate_caches_from_source
|
137
|
-
end
|
138
|
-
def generate_partial
|
139
|
-
partial.generate_partial_from exact.index
|
140
|
-
end
|
141
|
-
def generate_caches_from_memory
|
142
|
-
partial.generate_caches_from_memory
|
143
|
-
end
|
144
|
-
def dump_caches
|
145
|
-
exact.dump
|
146
|
-
partial.dump
|
147
|
-
end
|
148
|
-
|
149
|
-
end
|
150
|
-
|
151
|
-
end
|
152
|
-
|
153
|
-
end
|
@@ -1,142 +0,0 @@
|
|
1
|
-
# TODO Move to the API.
|
2
|
-
#
|
3
|
-
module Internals
|
4
|
-
|
5
|
-
module Indexing
|
6
|
-
|
7
|
-
class Index
|
8
|
-
|
9
|
-
attr_reader :name, :categories, :after_indexing, :bundle_class, :tokenizer
|
10
|
-
|
11
|
-
# Delegators for indexing.
|
12
|
-
#
|
13
|
-
delegate :connect_backend,
|
14
|
-
:to => :source
|
15
|
-
|
16
|
-
each_delegate :backup_caches,
|
17
|
-
:cache!,
|
18
|
-
:check_caches,
|
19
|
-
:clear_caches,
|
20
|
-
:create_directory_structure,
|
21
|
-
:generate_caches,
|
22
|
-
:restore_caches,
|
23
|
-
:to => :categories
|
24
|
-
|
25
|
-
def initialize name, options = {}
|
26
|
-
@name = name
|
27
|
-
@source = options[:source]
|
28
|
-
@after_indexing = options[:after_indexing]
|
29
|
-
@bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
|
30
|
-
@tokenizer = options[:tokenizer]
|
31
|
-
@key_format = options[:key_format]
|
32
|
-
|
33
|
-
@categories = []
|
34
|
-
end
|
35
|
-
|
36
|
-
# TODO Spec. Doc.
|
37
|
-
#
|
38
|
-
def define_category category_name, options = {}
|
39
|
-
new_category = Category.new category_name, self, options
|
40
|
-
new_category = yield new_category if block_given?
|
41
|
-
categories << new_category
|
42
|
-
new_category
|
43
|
-
end
|
44
|
-
|
45
|
-
# TODO Spec. Doc.
|
46
|
-
#
|
47
|
-
def define_indexing options = {}
|
48
|
-
@tokenizer = Internals::Tokenizers::Index.new options
|
49
|
-
end
|
50
|
-
|
51
|
-
#
|
52
|
-
#
|
53
|
-
def define_source source
|
54
|
-
@source = source
|
55
|
-
end
|
56
|
-
def source
|
57
|
-
@source || raise_no_source
|
58
|
-
end
|
59
|
-
def raise_no_source
|
60
|
-
raise NoSourceSpecifiedException.new(<<-NO_SOURCE
|
61
|
-
|
62
|
-
|
63
|
-
No source given for index #{name}. An index needs a source.
|
64
|
-
Example:
|
65
|
-
Index::Memory.new(:with_source) do
|
66
|
-
source Sources::CSV.new(:title, file: 'data/books.csv')
|
67
|
-
category :title
|
68
|
-
category :author
|
69
|
-
end
|
70
|
-
|
71
|
-
NO_SOURCE
|
72
|
-
)
|
73
|
-
end
|
74
|
-
|
75
|
-
#
|
76
|
-
#
|
77
|
-
def define_key_format key_format
|
78
|
-
@key_format = key_format
|
79
|
-
end
|
80
|
-
def key_format
|
81
|
-
@key_format || :to_i
|
82
|
-
end
|
83
|
-
|
84
|
-
#
|
85
|
-
#
|
86
|
-
def find category_name
|
87
|
-
category_name = category_name.to_sym
|
88
|
-
|
89
|
-
categories.each do |category|
|
90
|
-
next unless category.name == category_name
|
91
|
-
return category
|
92
|
-
end
|
93
|
-
|
94
|
-
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
95
|
-
end
|
96
|
-
|
97
|
-
# Decides whether to use a parallel indexer or whether to
|
98
|
-
# delegate to each category to index themselves.
|
99
|
-
#
|
100
|
-
def index!
|
101
|
-
# TODO Duplicated in category.rb def indexer.
|
102
|
-
#
|
103
|
-
if source.respond_to?(:each)
|
104
|
-
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
105
|
-
index_parallel
|
106
|
-
else
|
107
|
-
categories.each &:index!
|
108
|
-
end
|
109
|
-
end
|
110
|
-
# Indexes the categories in parallel.
|
111
|
-
#
|
112
|
-
# Only use where the category does not have a non-#each source defined.
|
113
|
-
#
|
114
|
-
def index_parallel
|
115
|
-
indexer = Indexers::Parallel.new self
|
116
|
-
categories.first.prepare_index_directory # TODO Unnice.
|
117
|
-
indexer.index
|
118
|
-
end
|
119
|
-
|
120
|
-
# Indexing.
|
121
|
-
#
|
122
|
-
# Note: If it is an each source we do not take a snapshot.
|
123
|
-
#
|
124
|
-
def take_snapshot
|
125
|
-
source.take_snapshot self unless source.respond_to? :each
|
126
|
-
end
|
127
|
-
|
128
|
-
#
|
129
|
-
#
|
130
|
-
def to_s
|
131
|
-
<<-INDEX
|
132
|
-
Indexing(#{name}):
|
133
|
-
#{"source: #{source}".indented_to_s}
|
134
|
-
#{"Categories:\n#{categories.indented_to_s}".indented_to_s}
|
135
|
-
INDEX
|
136
|
-
end
|
137
|
-
|
138
|
-
end
|
139
|
-
|
140
|
-
end
|
141
|
-
|
142
|
-
end
|
@@ -1,27 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
module Indexing
|
3
|
-
module Wrappers
|
4
|
-
module Category
|
5
|
-
|
6
|
-
module Location
|
7
|
-
|
8
|
-
def self.install_on category, grid, precision = 1
|
9
|
-
new_source = Sources::Wrappers::Location.new category.source, grid, precision
|
10
|
-
|
11
|
-
category.class_eval do
|
12
|
-
def tokenizer
|
13
|
-
@tokenizer ||= Internals::Tokenizers::Index.new
|
14
|
-
end
|
15
|
-
define_method :source do
|
16
|
-
new_source
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
end
|
21
|
-
|
22
|
-
end
|
23
|
-
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
@@ -1,88 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Query
|
4
|
-
|
5
|
-
# An allocation has a number of combinations:
|
6
|
-
# [token, index] [other_token, other_index], ...
|
7
|
-
#
|
8
|
-
class Allocation # :nodoc:all
|
9
|
-
|
10
|
-
attr_reader :count, :ids, :score, :combinations, :result_identifier
|
11
|
-
|
12
|
-
#
|
13
|
-
#
|
14
|
-
def initialize combinations, result_identifier
|
15
|
-
@combinations = combinations
|
16
|
-
@result_identifier = result_identifier
|
17
|
-
end
|
18
|
-
|
19
|
-
def hash
|
20
|
-
@combinations.hash
|
21
|
-
end
|
22
|
-
def eql? other_allocation
|
23
|
-
true # FIXME
|
24
|
-
# @combinations.eql? other_allocation.combinations
|
25
|
-
end
|
26
|
-
|
27
|
-
# Scores its combinations and caches the result.
|
28
|
-
#
|
29
|
-
def calculate_score weights
|
30
|
-
@score ||= @combinations.calculate_score(weights)
|
31
|
-
end
|
32
|
-
|
33
|
-
# Asks the combinations for the (intersected) ids.
|
34
|
-
#
|
35
|
-
def calculate_ids amount, offset
|
36
|
-
@combinations.ids amount, offset # Calculate as many ids as are necessary.
|
37
|
-
end
|
38
|
-
|
39
|
-
# This starts the searching process.
|
40
|
-
#
|
41
|
-
def process! amount, offset
|
42
|
-
ids = calculate_ids amount, offset
|
43
|
-
@count = ids.size # cache the count before throwing away the ids
|
44
|
-
@ids = ids.slice!(offset, amount) || [] # slice out the relevant part
|
45
|
-
end
|
46
|
-
|
47
|
-
#
|
48
|
-
#
|
49
|
-
def keep identifiers = [] # categories
|
50
|
-
@combinations.keep identifiers
|
51
|
-
end
|
52
|
-
#
|
53
|
-
#
|
54
|
-
def remove identifiers = [] # categories
|
55
|
-
@combinations.remove identifiers
|
56
|
-
end
|
57
|
-
|
58
|
-
# Sort highest score first.
|
59
|
-
#
|
60
|
-
def <=> other_allocation
|
61
|
-
other_allocation.score <=> self.score
|
62
|
-
end
|
63
|
-
|
64
|
-
# Transform the allocation into result form.
|
65
|
-
#
|
66
|
-
def to_result
|
67
|
-
[self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
|
68
|
-
end
|
69
|
-
|
70
|
-
# Json representation of this allocation.
|
71
|
-
#
|
72
|
-
# Note: Delegates to to_result.
|
73
|
-
#
|
74
|
-
def to_json
|
75
|
-
to_result.to_json
|
76
|
-
end
|
77
|
-
|
78
|
-
#
|
79
|
-
#
|
80
|
-
def to_s
|
81
|
-
"Allocation: #{to_result.join(', ')}"
|
82
|
-
end
|
83
|
-
|
84
|
-
end
|
85
|
-
|
86
|
-
end
|
87
|
-
|
88
|
-
end
|
@@ -1,118 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Query
|
4
|
-
# Container class for allocations.
|
5
|
-
#
|
6
|
-
class Allocations # :nodoc:all
|
7
|
-
|
8
|
-
delegate :each, :inject, :empty?, :size, :to => :@allocations
|
9
|
-
attr_reader :total
|
10
|
-
|
11
|
-
def initialize allocations = []
|
12
|
-
@allocations = allocations
|
13
|
-
end
|
14
|
-
|
15
|
-
# Score each allocation.
|
16
|
-
#
|
17
|
-
def calculate_score weights
|
18
|
-
@allocations.each do |allocation|
|
19
|
-
allocation.calculate_score weights
|
20
|
-
end
|
21
|
-
end
|
22
|
-
# Sort the allocations.
|
23
|
-
#
|
24
|
-
def sort!
|
25
|
-
@allocations.sort!
|
26
|
-
end
|
27
|
-
|
28
|
-
# Reduces the amount of allocations to x.
|
29
|
-
#
|
30
|
-
def reduce_to amount
|
31
|
-
@allocations = @allocations.shift amount
|
32
|
-
end
|
33
|
-
|
34
|
-
# Keeps combinations.
|
35
|
-
#
|
36
|
-
# Only those passed in remain.
|
37
|
-
#
|
38
|
-
def keep identifiers = []
|
39
|
-
@allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
|
40
|
-
end
|
41
|
-
# Removes combinations.
|
42
|
-
#
|
43
|
-
# Only those passed in are removed.
|
44
|
-
#
|
45
|
-
def remove identifiers = []
|
46
|
-
@allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
|
47
|
-
end
|
48
|
-
|
49
|
-
# Returns the top amount ids.
|
50
|
-
#
|
51
|
-
def ids amount = 20
|
52
|
-
@allocations.inject([]) do |total, allocation|
|
53
|
-
total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
|
54
|
-
end
|
55
|
-
end
|
56
|
-
|
57
|
-
# This is the main method of this class that will replace ids and count.
|
58
|
-
#
|
59
|
-
# What it does is calculate the ids and counts of its allocations
|
60
|
-
# for being used in the results. It also calculates the total
|
61
|
-
#
|
62
|
-
# Parameters:
|
63
|
-
# * amount: the amount of ids to calculate
|
64
|
-
# * offset: the offset from where in the result set to take the ids
|
65
|
-
#
|
66
|
-
# Note: With an amount of 0, an offset > 0 doesn't make much
|
67
|
-
# sense, as seen in the live search.
|
68
|
-
#
|
69
|
-
# Note: Each allocation caches its count, but not its ids (thrown away).
|
70
|
-
# The ids are cached in this class.
|
71
|
-
#
|
72
|
-
# Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
|
73
|
-
#
|
74
|
-
def process! amount, offset = 0
|
75
|
-
@total = 0
|
76
|
-
current_offset = 0
|
77
|
-
@allocations.each do |allocation|
|
78
|
-
ids = allocation.process! amount, offset
|
79
|
-
@total = @total + allocation.count # the total mixed in
|
80
|
-
if ids.empty?
|
81
|
-
offset = offset - allocation.count unless offset.zero?
|
82
|
-
else
|
83
|
-
amount = amount - ids.size # we need less results from the following allocation
|
84
|
-
offset = 0 # we have already passed the offset
|
85
|
-
end
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def uniq
|
90
|
-
@allocations.uniq!
|
91
|
-
end
|
92
|
-
|
93
|
-
def to_a
|
94
|
-
@allocations
|
95
|
-
end
|
96
|
-
|
97
|
-
# Simply inspects the internal allocations.
|
98
|
-
#
|
99
|
-
def to_s
|
100
|
-
@allocations.inspect
|
101
|
-
end
|
102
|
-
|
103
|
-
# Allocations for results are in the form:
|
104
|
-
# [
|
105
|
-
# allocation1.to_result,
|
106
|
-
# allocation2.to_result
|
107
|
-
# ...
|
108
|
-
# ]
|
109
|
-
#
|
110
|
-
def to_result
|
111
|
-
@allocations.map(&:to_result).compact
|
112
|
-
end
|
113
|
-
|
114
|
-
end
|
115
|
-
|
116
|
-
end
|
117
|
-
|
118
|
-
end
|
@@ -1,80 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Query
|
4
|
-
|
5
|
-
# Describes the combination of a token (the text) and
|
6
|
-
# the index (the bundle): [text, index_bundle]
|
7
|
-
#
|
8
|
-
# A combination is a single part of an allocation:
|
9
|
-
# [..., [text2, index_bundle2], ...]
|
10
|
-
#
|
11
|
-
# An allocation consists of a number of combinations:
|
12
|
-
# [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
|
13
|
-
#
|
14
|
-
class Combination # :nodoc:all
|
15
|
-
|
16
|
-
attr_reader :token, :bundle, :category_name
|
17
|
-
|
18
|
-
def initialize token, category
|
19
|
-
@token = token
|
20
|
-
@category_name = category.name
|
21
|
-
@bundle = category.bundle_for token
|
22
|
-
@text = @token.text # don't want to use reset_similar already
|
23
|
-
end
|
24
|
-
|
25
|
-
# Note: Required for uniq!
|
26
|
-
#
|
27
|
-
def hash
|
28
|
-
[@token.to_s, @bundle].hash
|
29
|
-
end
|
30
|
-
|
31
|
-
# Returns the weight of this combination.
|
32
|
-
#
|
33
|
-
# Note: Caching is most of the time useful.
|
34
|
-
#
|
35
|
-
def weight
|
36
|
-
@weight ||= @bundle.weight(@text)
|
37
|
-
end
|
38
|
-
|
39
|
-
# Returns an array of ids for the given text.
|
40
|
-
#
|
41
|
-
# Note: Caching is most of the time useful.
|
42
|
-
#
|
43
|
-
def ids
|
44
|
-
@ids ||= @bundle.ids(@text)
|
45
|
-
end
|
46
|
-
|
47
|
-
# The identifier for this combination.
|
48
|
-
#
|
49
|
-
def identifier
|
50
|
-
"#{bundle.identifier}:#{@token.identifier}"
|
51
|
-
end
|
52
|
-
|
53
|
-
# Is the identifier in the given identifiers?
|
54
|
-
#
|
55
|
-
def in? identifiers
|
56
|
-
identifiers.include? identifier
|
57
|
-
end
|
58
|
-
|
59
|
-
# Combines the category names with the original names.
|
60
|
-
# [
|
61
|
-
# [:title, 'Flarbl', :flarbl],
|
62
|
-
# [:category, 'Gnorf', :gnorf]
|
63
|
-
# ]
|
64
|
-
#
|
65
|
-
def to_result
|
66
|
-
[@category_name, *@token.to_result]
|
67
|
-
end
|
68
|
-
|
69
|
-
# Example:
|
70
|
-
# "exact title:Peter*:peter"
|
71
|
-
#
|
72
|
-
def to_s
|
73
|
-
"#{bundle.identifier} #{to_result.join(':')}"
|
74
|
-
end
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
end
|
79
|
-
|
80
|
-
end
|