picky 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
|
@@ -1,65 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
# TODO Merge into Base, extract common with Indexed::Base.
|
|
4
|
-
#
|
|
5
|
-
module Indexing # :nodoc:all
|
|
6
|
-
# A Bundle is a number of indexes
|
|
7
|
-
# per [index, category] combination.
|
|
8
|
-
#
|
|
9
|
-
# At most, there are three indexes:
|
|
10
|
-
# * *core* index (always used)
|
|
11
|
-
# * *weights* index (always used)
|
|
12
|
-
# * *similarity* index (used with similarity)
|
|
13
|
-
#
|
|
14
|
-
# In Picky, indexing is separated from the index
|
|
15
|
-
# handling itself through a parallel structure.
|
|
16
|
-
#
|
|
17
|
-
# Both use methods provided by this base class, but
|
|
18
|
-
# have very different goals:
|
|
19
|
-
#
|
|
20
|
-
# * *Indexing*::*Bundle* is just concerned with creating index files
|
|
21
|
-
# and providing helper functions to e.g. check the indexes.
|
|
22
|
-
#
|
|
23
|
-
# * *Index*::*Bundle* is concerned with loading these index files into
|
|
24
|
-
# memory and looking up search data as fast as possible.
|
|
25
|
-
#
|
|
26
|
-
module Bundle
|
|
27
|
-
|
|
28
|
-
class SuperBase
|
|
29
|
-
|
|
30
|
-
attr_reader :identifier, :files
|
|
31
|
-
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
|
32
|
-
|
|
33
|
-
delegate :clear, :to => :index
|
|
34
|
-
delegate :[], :[]=, :to => :configuration
|
|
35
|
-
|
|
36
|
-
def initialize name, category, similarity_strategy
|
|
37
|
-
@identifier = "#{category.identifier}:#{name}"
|
|
38
|
-
@files = Internals::Index::Files.new name, category
|
|
39
|
-
|
|
40
|
-
@index = {}
|
|
41
|
-
@weights = {}
|
|
42
|
-
@similarity = {}
|
|
43
|
-
@configuration = {} # A hash with config options.
|
|
44
|
-
|
|
45
|
-
@similarity_strategy = similarity_strategy
|
|
46
|
-
end
|
|
47
|
-
|
|
48
|
-
# Get a list of similar texts.
|
|
49
|
-
#
|
|
50
|
-
# Note: Does not return itself.
|
|
51
|
-
#
|
|
52
|
-
def similar text
|
|
53
|
-
code = similarity_strategy.encoded text
|
|
54
|
-
similar_codes = code && @similarity[code]
|
|
55
|
-
similar_codes.delete text if similar_codes
|
|
56
|
-
similar_codes || []
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
end
|
|
62
|
-
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
end
|
|
@@ -1,153 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Indexing
|
|
4
|
-
|
|
5
|
-
class Category
|
|
6
|
-
|
|
7
|
-
include Internals::Shared::Category
|
|
8
|
-
|
|
9
|
-
attr_reader :name, :index, :exact, :partial
|
|
10
|
-
|
|
11
|
-
# Mandatory params:
|
|
12
|
-
# * name: Category name to use as identifier and file names.
|
|
13
|
-
# * index: Index to which this category is attached to.
|
|
14
|
-
#
|
|
15
|
-
# Options:
|
|
16
|
-
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
|
17
|
-
# * similarity: Similarity::None.new (default), Similarity::DoubleMetaphone.new(amount_of_similarly_linked_words)
|
|
18
|
-
# * from: The source category identifier to take the data from.
|
|
19
|
-
#
|
|
20
|
-
# Advanced Options:
|
|
21
|
-
# * source: Use if the category should use a different source.
|
|
22
|
-
# * weights: Query::Weights.new( [:category1, :category2] => +2, ... )
|
|
23
|
-
# * tokenizer: Use a subclass of Tokenizers::Base that implements #tokens_for and #empty_tokens.
|
|
24
|
-
# * key_format: What this category's keys are formatted with (default is :to_i)
|
|
25
|
-
#
|
|
26
|
-
def initialize name, index, options = {}
|
|
27
|
-
@name = name
|
|
28
|
-
@index = index
|
|
29
|
-
|
|
30
|
-
@source = options[:source]
|
|
31
|
-
@from = options[:from]
|
|
32
|
-
@tokenizer = options[:tokenizer]
|
|
33
|
-
@key_format = options[:key_format]
|
|
34
|
-
|
|
35
|
-
# TODO Push into Bundle. At least the weights.
|
|
36
|
-
#
|
|
37
|
-
partial = options[:partial] || Generators::Partial::Default
|
|
38
|
-
weights = options[:weights] || Generators::Weights::Default
|
|
39
|
-
similarity = options[:similarity] || Generators::Similarity::Default
|
|
40
|
-
|
|
41
|
-
bundle_class = index.bundle_class || Bundle::Memory
|
|
42
|
-
|
|
43
|
-
@exact = bundle_class.new(:exact, self, similarity, Generators::Partial::None.new, weights)
|
|
44
|
-
@partial = bundle_class.new(:partial, self, Generators::Similarity::None.new, partial, weights)
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Return an appropriate source.
|
|
48
|
-
#
|
|
49
|
-
def source
|
|
50
|
-
@source || @index.source
|
|
51
|
-
end
|
|
52
|
-
# Return the key format.
|
|
53
|
-
#
|
|
54
|
-
# If the source has no key format, then
|
|
55
|
-
# check for an explicit key format, and
|
|
56
|
-
# if none is defined, ask the index for
|
|
57
|
-
# one.
|
|
58
|
-
#
|
|
59
|
-
def key_format
|
|
60
|
-
source.respond_to?(:key_format) && source.key_format || @key_format || index.key_format
|
|
61
|
-
end
|
|
62
|
-
# The indexer is lazily generated and cached.
|
|
63
|
-
#
|
|
64
|
-
def indexer
|
|
65
|
-
@indexer ||= source.respond_to?(:each) ? Indexers::Parallel.new(self) : Indexers::Serial.new(self)
|
|
66
|
-
end
|
|
67
|
-
# TODO This is a hack to get the parallel indexer working.
|
|
68
|
-
#
|
|
69
|
-
def categories
|
|
70
|
-
[self]
|
|
71
|
-
end
|
|
72
|
-
# Returns an appropriate tokenizer.
|
|
73
|
-
# If one isn't set on this category, will try the index,
|
|
74
|
-
# and finally the default index tokenizer.
|
|
75
|
-
#
|
|
76
|
-
def tokenizer
|
|
77
|
-
@tokenizer || @index.tokenizer || Tokenizers::Index.default
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
# Where the data is taken from.
|
|
81
|
-
#
|
|
82
|
-
def from
|
|
83
|
-
@from || name
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
def backup_caches
|
|
87
|
-
timed_exclaim "Backing up #{identifier}."
|
|
88
|
-
exact.backup
|
|
89
|
-
partial.backup
|
|
90
|
-
end
|
|
91
|
-
def restore_caches
|
|
92
|
-
timed_exclaim "Restoring #{identifier}."
|
|
93
|
-
exact.restore
|
|
94
|
-
partial.restore
|
|
95
|
-
end
|
|
96
|
-
def check_caches
|
|
97
|
-
timed_exclaim "Checking #{identifier}."
|
|
98
|
-
exact.raise_unless_cache_exists
|
|
99
|
-
partial.raise_unless_cache_exists
|
|
100
|
-
end
|
|
101
|
-
def clear_caches
|
|
102
|
-
timed_exclaim "Deleting #{identifier}."
|
|
103
|
-
exact.delete
|
|
104
|
-
partial.delete
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
# Indexes, creates the "prepared_..." file.
|
|
108
|
-
#
|
|
109
|
-
def index!
|
|
110
|
-
prepare_index_directory
|
|
111
|
-
indexer.index
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
# Generates all caches for this category.
|
|
115
|
-
#
|
|
116
|
-
def cache!
|
|
117
|
-
prepare_index_directory
|
|
118
|
-
generate_caches
|
|
119
|
-
end
|
|
120
|
-
# We need to set what formatting method should be used.
|
|
121
|
-
# Uses the one defined in the indexer.
|
|
122
|
-
#
|
|
123
|
-
def configure
|
|
124
|
-
exact[:key_format] = self.key_format
|
|
125
|
-
partial[:key_format] = self.key_format
|
|
126
|
-
end
|
|
127
|
-
def generate_caches
|
|
128
|
-
configure
|
|
129
|
-
generate_caches_from_source
|
|
130
|
-
generate_partial
|
|
131
|
-
generate_caches_from_memory
|
|
132
|
-
dump_caches
|
|
133
|
-
timed_exclaim %Q{"#{identifier}": Caching finished.}
|
|
134
|
-
end
|
|
135
|
-
def generate_caches_from_source
|
|
136
|
-
exact.generate_caches_from_source
|
|
137
|
-
end
|
|
138
|
-
def generate_partial
|
|
139
|
-
partial.generate_partial_from exact.index
|
|
140
|
-
end
|
|
141
|
-
def generate_caches_from_memory
|
|
142
|
-
partial.generate_caches_from_memory
|
|
143
|
-
end
|
|
144
|
-
def dump_caches
|
|
145
|
-
exact.dump
|
|
146
|
-
partial.dump
|
|
147
|
-
end
|
|
148
|
-
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
end
|
|
152
|
-
|
|
153
|
-
end
|
|
@@ -1,142 +0,0 @@
|
|
|
1
|
-
# TODO Move to the API.
|
|
2
|
-
#
|
|
3
|
-
module Internals
|
|
4
|
-
|
|
5
|
-
module Indexing
|
|
6
|
-
|
|
7
|
-
class Index
|
|
8
|
-
|
|
9
|
-
attr_reader :name, :categories, :after_indexing, :bundle_class, :tokenizer
|
|
10
|
-
|
|
11
|
-
# Delegators for indexing.
|
|
12
|
-
#
|
|
13
|
-
delegate :connect_backend,
|
|
14
|
-
:to => :source
|
|
15
|
-
|
|
16
|
-
each_delegate :backup_caches,
|
|
17
|
-
:cache!,
|
|
18
|
-
:check_caches,
|
|
19
|
-
:clear_caches,
|
|
20
|
-
:create_directory_structure,
|
|
21
|
-
:generate_caches,
|
|
22
|
-
:restore_caches,
|
|
23
|
-
:to => :categories
|
|
24
|
-
|
|
25
|
-
def initialize name, options = {}
|
|
26
|
-
@name = name
|
|
27
|
-
@source = options[:source]
|
|
28
|
-
@after_indexing = options[:after_indexing]
|
|
29
|
-
@bundle_class = options[:indexing_bundle_class] # TODO This should actually be a fixed parameter.
|
|
30
|
-
@tokenizer = options[:tokenizer]
|
|
31
|
-
@key_format = options[:key_format]
|
|
32
|
-
|
|
33
|
-
@categories = []
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# TODO Spec. Doc.
|
|
37
|
-
#
|
|
38
|
-
def define_category category_name, options = {}
|
|
39
|
-
new_category = Category.new category_name, self, options
|
|
40
|
-
new_category = yield new_category if block_given?
|
|
41
|
-
categories << new_category
|
|
42
|
-
new_category
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# TODO Spec. Doc.
|
|
46
|
-
#
|
|
47
|
-
def define_indexing options = {}
|
|
48
|
-
@tokenizer = Internals::Tokenizers::Index.new options
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
#
|
|
52
|
-
#
|
|
53
|
-
def define_source source
|
|
54
|
-
@source = source
|
|
55
|
-
end
|
|
56
|
-
def source
|
|
57
|
-
@source || raise_no_source
|
|
58
|
-
end
|
|
59
|
-
def raise_no_source
|
|
60
|
-
raise NoSourceSpecifiedException.new(<<-NO_SOURCE
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
No source given for index #{name}. An index needs a source.
|
|
64
|
-
Example:
|
|
65
|
-
Index::Memory.new(:with_source) do
|
|
66
|
-
source Sources::CSV.new(:title, file: 'data/books.csv')
|
|
67
|
-
category :title
|
|
68
|
-
category :author
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
NO_SOURCE
|
|
72
|
-
)
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
#
|
|
76
|
-
#
|
|
77
|
-
def define_key_format key_format
|
|
78
|
-
@key_format = key_format
|
|
79
|
-
end
|
|
80
|
-
def key_format
|
|
81
|
-
@key_format || :to_i
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
#
|
|
85
|
-
#
|
|
86
|
-
def find category_name
|
|
87
|
-
category_name = category_name.to_sym
|
|
88
|
-
|
|
89
|
-
categories.each do |category|
|
|
90
|
-
next unless category.name == category_name
|
|
91
|
-
return category
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# Decides whether to use a parallel indexer or whether to
|
|
98
|
-
# delegate to each category to index themselves.
|
|
99
|
-
#
|
|
100
|
-
def index!
|
|
101
|
-
# TODO Duplicated in category.rb def indexer.
|
|
102
|
-
#
|
|
103
|
-
if source.respond_to?(:each)
|
|
104
|
-
warn %Q{\n\033[1mWarning\033[m, source for index "#{name}" is empty: #{source} (responds true to empty?).\n} if source.respond_to?(:empty?) && source.empty?
|
|
105
|
-
index_parallel
|
|
106
|
-
else
|
|
107
|
-
categories.each &:index!
|
|
108
|
-
end
|
|
109
|
-
end
|
|
110
|
-
# Indexes the categories in parallel.
|
|
111
|
-
#
|
|
112
|
-
# Only use where the category does not have a non-#each source defined.
|
|
113
|
-
#
|
|
114
|
-
def index_parallel
|
|
115
|
-
indexer = Indexers::Parallel.new self
|
|
116
|
-
categories.first.prepare_index_directory # TODO Unnice.
|
|
117
|
-
indexer.index
|
|
118
|
-
end
|
|
119
|
-
|
|
120
|
-
# Indexing.
|
|
121
|
-
#
|
|
122
|
-
# Note: If it is an each source we do not take a snapshot.
|
|
123
|
-
#
|
|
124
|
-
def take_snapshot
|
|
125
|
-
source.take_snapshot self unless source.respond_to? :each
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
#
|
|
129
|
-
#
|
|
130
|
-
def to_s
|
|
131
|
-
<<-INDEX
|
|
132
|
-
Indexing(#{name}):
|
|
133
|
-
#{"source: #{source}".indented_to_s}
|
|
134
|
-
#{"Categories:\n#{categories.indented_to_s}".indented_to_s}
|
|
135
|
-
INDEX
|
|
136
|
-
end
|
|
137
|
-
|
|
138
|
-
end
|
|
139
|
-
|
|
140
|
-
end
|
|
141
|
-
|
|
142
|
-
end
|
|
@@ -1,27 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
module Indexing
|
|
3
|
-
module Wrappers
|
|
4
|
-
module Category
|
|
5
|
-
|
|
6
|
-
module Location
|
|
7
|
-
|
|
8
|
-
def self.install_on category, grid, precision = 1
|
|
9
|
-
new_source = Sources::Wrappers::Location.new category.source, grid, precision
|
|
10
|
-
|
|
11
|
-
category.class_eval do
|
|
12
|
-
def tokenizer
|
|
13
|
-
@tokenizer ||= Internals::Tokenizers::Index.new
|
|
14
|
-
end
|
|
15
|
-
define_method :source do
|
|
16
|
-
new_source
|
|
17
|
-
end
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
end
|
|
25
|
-
end
|
|
26
|
-
end
|
|
27
|
-
end
|
|
@@ -1,88 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Query
|
|
4
|
-
|
|
5
|
-
# An allocation has a number of combinations:
|
|
6
|
-
# [token, index] [other_token, other_index], ...
|
|
7
|
-
#
|
|
8
|
-
class Allocation # :nodoc:all
|
|
9
|
-
|
|
10
|
-
attr_reader :count, :ids, :score, :combinations, :result_identifier
|
|
11
|
-
|
|
12
|
-
#
|
|
13
|
-
#
|
|
14
|
-
def initialize combinations, result_identifier
|
|
15
|
-
@combinations = combinations
|
|
16
|
-
@result_identifier = result_identifier
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
def hash
|
|
20
|
-
@combinations.hash
|
|
21
|
-
end
|
|
22
|
-
def eql? other_allocation
|
|
23
|
-
true # FIXME
|
|
24
|
-
# @combinations.eql? other_allocation.combinations
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
# Scores its combinations and caches the result.
|
|
28
|
-
#
|
|
29
|
-
def calculate_score weights
|
|
30
|
-
@score ||= @combinations.calculate_score(weights)
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Asks the combinations for the (intersected) ids.
|
|
34
|
-
#
|
|
35
|
-
def calculate_ids amount, offset
|
|
36
|
-
@combinations.ids amount, offset # Calculate as many ids as are necessary.
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# This starts the searching process.
|
|
40
|
-
#
|
|
41
|
-
def process! amount, offset
|
|
42
|
-
ids = calculate_ids amount, offset
|
|
43
|
-
@count = ids.size # cache the count before throwing away the ids
|
|
44
|
-
@ids = ids.slice!(offset, amount) || [] # slice out the relevant part
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
#
|
|
48
|
-
#
|
|
49
|
-
def keep identifiers = [] # categories
|
|
50
|
-
@combinations.keep identifiers
|
|
51
|
-
end
|
|
52
|
-
#
|
|
53
|
-
#
|
|
54
|
-
def remove identifiers = [] # categories
|
|
55
|
-
@combinations.remove identifiers
|
|
56
|
-
end
|
|
57
|
-
|
|
58
|
-
# Sort highest score first.
|
|
59
|
-
#
|
|
60
|
-
def <=> other_allocation
|
|
61
|
-
other_allocation.score <=> self.score
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Transform the allocation into result form.
|
|
65
|
-
#
|
|
66
|
-
def to_result
|
|
67
|
-
[self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# Json representation of this allocation.
|
|
71
|
-
#
|
|
72
|
-
# Note: Delegates to to_result.
|
|
73
|
-
#
|
|
74
|
-
def to_json
|
|
75
|
-
to_result.to_json
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
#
|
|
79
|
-
#
|
|
80
|
-
def to_s
|
|
81
|
-
"Allocation: #{to_result.join(', ')}"
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
end
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Query
|
|
4
|
-
# Container class for allocations.
|
|
5
|
-
#
|
|
6
|
-
class Allocations # :nodoc:all
|
|
7
|
-
|
|
8
|
-
delegate :each, :inject, :empty?, :size, :to => :@allocations
|
|
9
|
-
attr_reader :total
|
|
10
|
-
|
|
11
|
-
def initialize allocations = []
|
|
12
|
-
@allocations = allocations
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
# Score each allocation.
|
|
16
|
-
#
|
|
17
|
-
def calculate_score weights
|
|
18
|
-
@allocations.each do |allocation|
|
|
19
|
-
allocation.calculate_score weights
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
# Sort the allocations.
|
|
23
|
-
#
|
|
24
|
-
def sort!
|
|
25
|
-
@allocations.sort!
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# Reduces the amount of allocations to x.
|
|
29
|
-
#
|
|
30
|
-
def reduce_to amount
|
|
31
|
-
@allocations = @allocations.shift amount
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Keeps combinations.
|
|
35
|
-
#
|
|
36
|
-
# Only those passed in remain.
|
|
37
|
-
#
|
|
38
|
-
def keep identifiers = []
|
|
39
|
-
@allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
|
|
40
|
-
end
|
|
41
|
-
# Removes combinations.
|
|
42
|
-
#
|
|
43
|
-
# Only those passed in are removed.
|
|
44
|
-
#
|
|
45
|
-
def remove identifiers = []
|
|
46
|
-
@allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Returns the top amount ids.
|
|
50
|
-
#
|
|
51
|
-
def ids amount = 20
|
|
52
|
-
@allocations.inject([]) do |total, allocation|
|
|
53
|
-
total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
|
|
54
|
-
end
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
# This is the main method of this class that will replace ids and count.
|
|
58
|
-
#
|
|
59
|
-
# What it does is calculate the ids and counts of its allocations
|
|
60
|
-
# for being used in the results. It also calculates the total
|
|
61
|
-
#
|
|
62
|
-
# Parameters:
|
|
63
|
-
# * amount: the amount of ids to calculate
|
|
64
|
-
# * offset: the offset from where in the result set to take the ids
|
|
65
|
-
#
|
|
66
|
-
# Note: With an amount of 0, an offset > 0 doesn't make much
|
|
67
|
-
# sense, as seen in the live search.
|
|
68
|
-
#
|
|
69
|
-
# Note: Each allocation caches its count, but not its ids (thrown away).
|
|
70
|
-
# The ids are cached in this class.
|
|
71
|
-
#
|
|
72
|
-
# Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
|
|
73
|
-
#
|
|
74
|
-
def process! amount, offset = 0
|
|
75
|
-
@total = 0
|
|
76
|
-
current_offset = 0
|
|
77
|
-
@allocations.each do |allocation|
|
|
78
|
-
ids = allocation.process! amount, offset
|
|
79
|
-
@total = @total + allocation.count # the total mixed in
|
|
80
|
-
if ids.empty?
|
|
81
|
-
offset = offset - allocation.count unless offset.zero?
|
|
82
|
-
else
|
|
83
|
-
amount = amount - ids.size # we need less results from the following allocation
|
|
84
|
-
offset = 0 # we have already passed the offset
|
|
85
|
-
end
|
|
86
|
-
end
|
|
87
|
-
end
|
|
88
|
-
|
|
89
|
-
def uniq
|
|
90
|
-
@allocations.uniq!
|
|
91
|
-
end
|
|
92
|
-
|
|
93
|
-
def to_a
|
|
94
|
-
@allocations
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
# Simply inspects the internal allocations.
|
|
98
|
-
#
|
|
99
|
-
def to_s
|
|
100
|
-
@allocations.inspect
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
# Allocations for results are in the form:
|
|
104
|
-
# [
|
|
105
|
-
# allocation1.to_result,
|
|
106
|
-
# allocation2.to_result
|
|
107
|
-
# ...
|
|
108
|
-
# ]
|
|
109
|
-
#
|
|
110
|
-
def to_result
|
|
111
|
-
@allocations.map(&:to_result).compact
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
end
|
|
@@ -1,80 +0,0 @@
|
|
|
1
|
-
module Internals
|
|
2
|
-
|
|
3
|
-
module Query
|
|
4
|
-
|
|
5
|
-
# Describes the combination of a token (the text) and
|
|
6
|
-
# the index (the bundle): [text, index_bundle]
|
|
7
|
-
#
|
|
8
|
-
# A combination is a single part of an allocation:
|
|
9
|
-
# [..., [text2, index_bundle2], ...]
|
|
10
|
-
#
|
|
11
|
-
# An allocation consists of a number of combinations:
|
|
12
|
-
# [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
|
|
13
|
-
#
|
|
14
|
-
class Combination # :nodoc:all
|
|
15
|
-
|
|
16
|
-
attr_reader :token, :bundle, :category_name
|
|
17
|
-
|
|
18
|
-
def initialize token, category
|
|
19
|
-
@token = token
|
|
20
|
-
@category_name = category.name
|
|
21
|
-
@bundle = category.bundle_for token
|
|
22
|
-
@text = @token.text # don't want to use reset_similar already
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
# Note: Required for uniq!
|
|
26
|
-
#
|
|
27
|
-
def hash
|
|
28
|
-
[@token.to_s, @bundle].hash
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Returns the weight of this combination.
|
|
32
|
-
#
|
|
33
|
-
# Note: Caching is most of the time useful.
|
|
34
|
-
#
|
|
35
|
-
def weight
|
|
36
|
-
@weight ||= @bundle.weight(@text)
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# Returns an array of ids for the given text.
|
|
40
|
-
#
|
|
41
|
-
# Note: Caching is most of the time useful.
|
|
42
|
-
#
|
|
43
|
-
def ids
|
|
44
|
-
@ids ||= @bundle.ids(@text)
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# The identifier for this combination.
|
|
48
|
-
#
|
|
49
|
-
def identifier
|
|
50
|
-
"#{bundle.identifier}:#{@token.identifier}"
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Is the identifier in the given identifiers?
|
|
54
|
-
#
|
|
55
|
-
def in? identifiers
|
|
56
|
-
identifiers.include? identifier
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Combines the category names with the original names.
|
|
60
|
-
# [
|
|
61
|
-
# [:title, 'Flarbl', :flarbl],
|
|
62
|
-
# [:category, 'Gnorf', :gnorf]
|
|
63
|
-
# ]
|
|
64
|
-
#
|
|
65
|
-
def to_result
|
|
66
|
-
[@category_name, *@token.to_result]
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
# Example:
|
|
70
|
-
# "exact title:Peter*:peter"
|
|
71
|
-
#
|
|
72
|
-
def to_s
|
|
73
|
-
"#{bundle.identifier} #{to_result.join(':')}"
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
end
|
|
79
|
-
|
|
80
|
-
end
|