picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
|
@@ -1,85 +0,0 @@
|
|
|
1
|
-
module Indexed
|
|
2
|
-
|
|
3
|
-
# An index category holds a exact and a partial index for a given category.
|
|
4
|
-
#
|
|
5
|
-
# For example an index category for names holds a exact and
|
|
6
|
-
# a partial index bundle for names.
|
|
7
|
-
#
|
|
8
|
-
class Category
|
|
9
|
-
|
|
10
|
-
attr_accessor :exact
|
|
11
|
-
attr_reader :identifier, :name
|
|
12
|
-
attr_writer :partial
|
|
13
|
-
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
def initialize name, index, options = {}
|
|
17
|
-
@name = name
|
|
18
|
-
|
|
19
|
-
configuration = Configuration::Index.new index, self
|
|
20
|
-
|
|
21
|
-
@identifier = configuration.identifier
|
|
22
|
-
|
|
23
|
-
# TODO Push the defaults out into the index.
|
|
24
|
-
#
|
|
25
|
-
@partial_strategy = options[:partial] || Cacher::Partial::Default
|
|
26
|
-
similarity = options[:similarity] || Cacher::Similarity::Default
|
|
27
|
-
|
|
28
|
-
@exact = options[:exact_index_bundle] || Bundle.new(:exact, configuration, similarity)
|
|
29
|
-
@partial = options[:partial_index_bundle] || Bundle.new(:partial, configuration, similarity)
|
|
30
|
-
|
|
31
|
-
@exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
|
32
|
-
@partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
|
33
|
-
|
|
34
|
-
# TODO Extract?
|
|
35
|
-
#
|
|
36
|
-
Query::Qualifiers.add(configuration.category_name, generate_qualifiers_from(options) || [name])
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
# TODO Move to Index.
|
|
40
|
-
#
|
|
41
|
-
def generate_qualifiers_from options
|
|
42
|
-
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# Loads the index from cache.
|
|
46
|
-
#
|
|
47
|
-
def load_from_cache
|
|
48
|
-
timed_exclaim "Loading index #{identifier}."
|
|
49
|
-
exact.load
|
|
50
|
-
partial.load
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Gets the weight for this token's text.
|
|
54
|
-
#
|
|
55
|
-
def weight token
|
|
56
|
-
bundle_for(token).weight token.text
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Gets the ids for this token's text.
|
|
60
|
-
#
|
|
61
|
-
def ids token
|
|
62
|
-
bundle_for(token).ids token.text
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
# Returns the right index bundle for this token.
|
|
66
|
-
#
|
|
67
|
-
def bundle_for token
|
|
68
|
-
token.partial?? partial : exact
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# The partial strategy defines whether to really use the partial index.
|
|
72
|
-
#
|
|
73
|
-
def partial
|
|
74
|
-
@partial_strategy.use_exact_for_partial?? @exact : @partial
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
#
|
|
78
|
-
#
|
|
79
|
-
def combination_for token
|
|
80
|
-
weight(token) && ::Query::Combination.new(token, self)
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
end
|
data/lib/picky/indexed/index.rb
DELETED
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
module Indexed
|
|
2
|
-
|
|
3
|
-
#
|
|
4
|
-
#
|
|
5
|
-
class Index
|
|
6
|
-
|
|
7
|
-
attr_reader :name, :result_identifier, :combinator, :categories
|
|
8
|
-
|
|
9
|
-
delegate :load_from_cache,
|
|
10
|
-
:to => :categories
|
|
11
|
-
|
|
12
|
-
def initialize name, options = {}
|
|
13
|
-
@name = name
|
|
14
|
-
|
|
15
|
-
@result_identifier = options[:result_identifier] || name
|
|
16
|
-
ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query, somehow.
|
|
17
|
-
|
|
18
|
-
@categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
# TODO Doc.
|
|
22
|
-
#
|
|
23
|
-
def define_category category_name, options = {}
|
|
24
|
-
new_category = Category.new category_name, self, options
|
|
25
|
-
categories << new_category
|
|
26
|
-
new_category
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
# Return the possible combinations for this token.
|
|
30
|
-
#
|
|
31
|
-
# A combination is a tuple <token, index_bundle>.
|
|
32
|
-
#
|
|
33
|
-
def possible_combinations token
|
|
34
|
-
categories.possible_combinations_for token
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
end
|
|
@@ -1,61 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Indexed
|
|
4
|
-
|
|
5
|
-
# TODO Spec
|
|
6
|
-
#
|
|
7
|
-
module Wrappers
|
|
8
|
-
|
|
9
|
-
# This index combines an exact and partial index.
|
|
10
|
-
# It serves to order the results such that exact hits are found first.
|
|
11
|
-
#
|
|
12
|
-
# TODO Need to use the right subtokens. Bake in?
|
|
13
|
-
#
|
|
14
|
-
class ExactFirst < Bundle
|
|
15
|
-
|
|
16
|
-
delegate :similar,
|
|
17
|
-
:identifier,
|
|
18
|
-
:name,
|
|
19
|
-
:to => :@exact
|
|
20
|
-
delegate :index,
|
|
21
|
-
:category,
|
|
22
|
-
:weight,
|
|
23
|
-
:generate_partial_from,
|
|
24
|
-
:generate_caches_from_memory,
|
|
25
|
-
:generate_derived,
|
|
26
|
-
:dump,
|
|
27
|
-
:load,
|
|
28
|
-
:to => :@partial
|
|
29
|
-
|
|
30
|
-
def initialize category
|
|
31
|
-
@exact = category.exact
|
|
32
|
-
@partial = category.partial
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
def self.wrap index_or_category
|
|
36
|
-
if index_or_category.respond_to? :categories
|
|
37
|
-
wrap_each_of index_or_category.categories
|
|
38
|
-
index_or_category
|
|
39
|
-
else
|
|
40
|
-
new index_or_category
|
|
41
|
-
end
|
|
42
|
-
end
|
|
43
|
-
# TODO Do not extract categories!
|
|
44
|
-
#
|
|
45
|
-
def self.wrap_each_of categories
|
|
46
|
-
categories.categories.collect! { |category| new(category) }
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
def ids text
|
|
50
|
-
@exact.ids(text) + @partial.ids(text)
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
def weight text
|
|
54
|
-
[@exact.weight(text) || 0, @partial.weight(text) || 0].max
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
end
|
|
@@ -1,213 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Indexing # :nodoc:all
|
|
4
|
-
|
|
5
|
-
# This is the indexing bundle.
|
|
6
|
-
# It does all menial tasks that have nothing to do
|
|
7
|
-
# with the actual index running etc.
|
|
8
|
-
#
|
|
9
|
-
class Bundle < Index::Bundle
|
|
10
|
-
|
|
11
|
-
attr_accessor :partial_strategy, :weights_strategy
|
|
12
|
-
attr_reader :files
|
|
13
|
-
|
|
14
|
-
# Path is in which directory the cache is located.
|
|
15
|
-
#
|
|
16
|
-
def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
|
|
17
|
-
super name, configuration, similarity_strategy
|
|
18
|
-
|
|
19
|
-
@partial_strategy = partial_strategy
|
|
20
|
-
@weights_strategy = weights_strategy
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Generation
|
|
24
|
-
#
|
|
25
|
-
|
|
26
|
-
# This method
|
|
27
|
-
# * loads the base index from the db
|
|
28
|
-
# * generates derived indexes
|
|
29
|
-
# * dumps all the indexes into files
|
|
30
|
-
#
|
|
31
|
-
def generate_caches_from_source
|
|
32
|
-
load_from_index_file
|
|
33
|
-
generate_caches_from_memory
|
|
34
|
-
end
|
|
35
|
-
# Generates derived indexes from the index and dumps.
|
|
36
|
-
#
|
|
37
|
-
# Note: assumes that there is something in the index
|
|
38
|
-
#
|
|
39
|
-
def generate_caches_from_memory
|
|
40
|
-
cache_from_memory_generation_message
|
|
41
|
-
generate_derived
|
|
42
|
-
end
|
|
43
|
-
def cache_from_memory_generation_message
|
|
44
|
-
timed_exclaim "CACHE FROM MEMORY #{identifier}."
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Generates the weights and similarity from the main index.
|
|
48
|
-
#
|
|
49
|
-
def generate_derived
|
|
50
|
-
generate_weights
|
|
51
|
-
generate_similarity
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Load the data from the db.
|
|
55
|
-
#
|
|
56
|
-
def load_from_index_file
|
|
57
|
-
load_from_index_generation_message
|
|
58
|
-
clear
|
|
59
|
-
retrieve
|
|
60
|
-
end
|
|
61
|
-
def load_from_index_generation_message
|
|
62
|
-
timed_exclaim "LOAD INDEX #{identifier}."
|
|
63
|
-
end
|
|
64
|
-
# Retrieves the prepared index data into the index.
|
|
65
|
-
#
|
|
66
|
-
# This is in preparation for generating
|
|
67
|
-
# derived indexes (like weights, similarity)
|
|
68
|
-
# and later dumping the optimized index.
|
|
69
|
-
#
|
|
70
|
-
def retrieve
|
|
71
|
-
key_format = self[:key_format] || :to_i
|
|
72
|
-
files.retrieve do |id, token|
|
|
73
|
-
initialize_index_for token
|
|
74
|
-
index[token] << id.send(key_format)
|
|
75
|
-
end
|
|
76
|
-
end
|
|
77
|
-
# Sets up a piece of the index for the given token.
|
|
78
|
-
#
|
|
79
|
-
def initialize_index_for token
|
|
80
|
-
index[token] ||= []
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
# Generators.
|
|
84
|
-
#
|
|
85
|
-
# TODO Move somewhere more fitting.
|
|
86
|
-
#
|
|
87
|
-
|
|
88
|
-
# Generates a new index (writes its index) using the
|
|
89
|
-
# partial caching strategy of this bundle.
|
|
90
|
-
#
|
|
91
|
-
def generate_partial
|
|
92
|
-
generator = Cacher::PartialGenerator.new self.index
|
|
93
|
-
self.index = generator.generate self.partial_strategy
|
|
94
|
-
end
|
|
95
|
-
# Generate a partial index from the given exact index.
|
|
96
|
-
#
|
|
97
|
-
def generate_partial_from exact_index
|
|
98
|
-
timed_exclaim "PARTIAL GENERATE #{identifier}."
|
|
99
|
-
self.index = exact_index
|
|
100
|
-
self.generate_partial
|
|
101
|
-
self
|
|
102
|
-
end
|
|
103
|
-
# Generates a new similarity index (writes its index) using the
|
|
104
|
-
# given similarity caching strategy.
|
|
105
|
-
#
|
|
106
|
-
def generate_similarity
|
|
107
|
-
generator = Cacher::SimilarityGenerator.new self.index
|
|
108
|
-
self.similarity = generator.generate self.similarity_strategy
|
|
109
|
-
end
|
|
110
|
-
# Generates a new weights index (writes its index) using the
|
|
111
|
-
# given weight caching strategy.
|
|
112
|
-
#
|
|
113
|
-
def generate_weights
|
|
114
|
-
generator = Cacher::WeightsGenerator.new self.index
|
|
115
|
-
self.weights = generator.generate self.weights_strategy
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
# Saves the indexes in a dump file.
|
|
119
|
-
#
|
|
120
|
-
def dump
|
|
121
|
-
dump_index
|
|
122
|
-
dump_similarity
|
|
123
|
-
dump_weights
|
|
124
|
-
dump_configuration
|
|
125
|
-
end
|
|
126
|
-
# Dumps the core index.
|
|
127
|
-
#
|
|
128
|
-
def dump_index
|
|
129
|
-
timed_exclaim "DUMP INDEX #{identifier}."
|
|
130
|
-
files.dump_index index
|
|
131
|
-
end
|
|
132
|
-
# Dumps the weights index.
|
|
133
|
-
#
|
|
134
|
-
def dump_weights
|
|
135
|
-
timed_exclaim "DUMP WEIGHTS #{identifier}."
|
|
136
|
-
files.dump_weights weights
|
|
137
|
-
end
|
|
138
|
-
# Dumps the similarity index.
|
|
139
|
-
#
|
|
140
|
-
def dump_similarity
|
|
141
|
-
timed_exclaim "DUMP SIMILARITY #{identifier}."
|
|
142
|
-
files.dump_similarity similarity
|
|
143
|
-
end
|
|
144
|
-
# Dumps the similarity index.
|
|
145
|
-
#
|
|
146
|
-
def dump_configuration
|
|
147
|
-
timed_exclaim "DUMP CONFIGURATION #{identifier}."
|
|
148
|
-
files.dump_configuration configuration
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
# Alerts the user if an index is missing.
|
|
152
|
-
#
|
|
153
|
-
def raise_unless_cache_exists
|
|
154
|
-
raise_unless_index_exists
|
|
155
|
-
raise_unless_similarity_exists
|
|
156
|
-
end
|
|
157
|
-
# Alerts the user if one of the necessary indexes
|
|
158
|
-
# (core, weights) is missing.
|
|
159
|
-
#
|
|
160
|
-
def raise_unless_index_exists
|
|
161
|
-
if partial_strategy.saved?
|
|
162
|
-
warn_if_index_small
|
|
163
|
-
raise_unless_index_ok
|
|
164
|
-
end
|
|
165
|
-
end
|
|
166
|
-
# Alerts the user if the similarity
|
|
167
|
-
# index is missing (given that it's used).
|
|
168
|
-
#
|
|
169
|
-
def raise_unless_similarity_exists
|
|
170
|
-
if similarity_strategy.saved?
|
|
171
|
-
warn_if_similarity_small
|
|
172
|
-
raise_unless_similarity_ok
|
|
173
|
-
end
|
|
174
|
-
end
|
|
175
|
-
# Warns the user if the similarity index is small.
|
|
176
|
-
#
|
|
177
|
-
def warn_if_similarity_small
|
|
178
|
-
warn_cache_small :similarity if files.similarity_cache_small?
|
|
179
|
-
end
|
|
180
|
-
# Alerts the user if the similarity index is not there.
|
|
181
|
-
#
|
|
182
|
-
def raise_unless_similarity_ok
|
|
183
|
-
raise_cache_missing :similarity unless files.similarity_cache_ok?
|
|
184
|
-
end
|
|
185
|
-
|
|
186
|
-
# TODO Spec on down.
|
|
187
|
-
#
|
|
188
|
-
|
|
189
|
-
# Warns the user if the core or weights indexes are small.
|
|
190
|
-
#
|
|
191
|
-
def warn_if_index_small
|
|
192
|
-
warn_cache_small :index if files.index_cache_small?
|
|
193
|
-
warn_cache_small :weights if files.weights_cache_small?
|
|
194
|
-
end
|
|
195
|
-
# Alerts the user if the core or weights indexes are not there.
|
|
196
|
-
#
|
|
197
|
-
def raise_unless_index_ok
|
|
198
|
-
raise_cache_missing :index unless files.index_cache_ok?
|
|
199
|
-
raise_cache_missing :weights unless files.weights_cache_ok?
|
|
200
|
-
end
|
|
201
|
-
# Outputs a warning for the given cache.
|
|
202
|
-
#
|
|
203
|
-
def warn_cache_small what
|
|
204
|
-
puts "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
|
|
205
|
-
end
|
|
206
|
-
# Raises an appropriate error message for the given cache.
|
|
207
|
-
#
|
|
208
|
-
def raise_cache_missing what
|
|
209
|
-
raise "#{what} cache for #{identifier} missing."
|
|
210
|
-
end
|
|
211
|
-
|
|
212
|
-
end
|
|
213
|
-
end
|
|
@@ -1,38 +0,0 @@
|
|
|
1
|
-
module Indexing
|
|
2
|
-
|
|
3
|
-
class Categories
|
|
4
|
-
|
|
5
|
-
attr_reader :categories
|
|
6
|
-
|
|
7
|
-
each_delegate :index,
|
|
8
|
-
:cache,
|
|
9
|
-
:generate_caches,
|
|
10
|
-
:backup_caches,
|
|
11
|
-
:restore_caches,
|
|
12
|
-
:check_caches,
|
|
13
|
-
:clear_caches,
|
|
14
|
-
:create_directory_structure,
|
|
15
|
-
:to => :categories
|
|
16
|
-
|
|
17
|
-
def initialize
|
|
18
|
-
@categories = []
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def << category
|
|
22
|
-
categories << category
|
|
23
|
-
end
|
|
24
|
-
|
|
25
|
-
def find category_name
|
|
26
|
-
category_name = category_name.to_sym
|
|
27
|
-
|
|
28
|
-
categories.each do |category|
|
|
29
|
-
next unless category.name == category_name
|
|
30
|
-
return category
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
raise %Q{Index category "#{category_name}" not found. Possible categories: "#{categories.map(&:name).join('", "')}".}
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
end
|
|
@@ -1,117 +0,0 @@
|
|
|
1
|
-
module Indexing
|
|
2
|
-
|
|
3
|
-
class Category
|
|
4
|
-
|
|
5
|
-
attr_reader :exact, :partial, :name, :configuration, :indexer
|
|
6
|
-
|
|
7
|
-
# Mandatory params:
|
|
8
|
-
# * name: Category name to use as identifier and file names.
|
|
9
|
-
# * index: Index to which this category is attached to.
|
|
10
|
-
# Options:
|
|
11
|
-
# * partial: Partial::None.new, Partial::Substring.new(from:start_char, to:up_to_char) (defaults from:-3, to:-1)
|
|
12
|
-
# * similarity: Similarity::None.new (default), Similarity::Phonetic.new(amount_of_similarly_linked_words)
|
|
13
|
-
# * source: Use if the category should use a different source.
|
|
14
|
-
# * from: The source category identifier to take the data from.
|
|
15
|
-
#
|
|
16
|
-
# Advanced Options (TODO):
|
|
17
|
-
#
|
|
18
|
-
# * weights:
|
|
19
|
-
# * tokenizer:
|
|
20
|
-
# * exact_indexing_bundle:
|
|
21
|
-
# * partial_indexing_bundle:
|
|
22
|
-
#
|
|
23
|
-
def initialize name, index, options = {}
|
|
24
|
-
@name = name
|
|
25
|
-
@from = options[:from]
|
|
26
|
-
|
|
27
|
-
# Now we have enough info to combine the index and the category.
|
|
28
|
-
#
|
|
29
|
-
@configuration = Configuration::Index.new index, self
|
|
30
|
-
|
|
31
|
-
@tokenizer = options[:tokenizer] || Tokenizers::Index.default
|
|
32
|
-
@indexer = Indexers::Serial.new configuration, options[:source], @tokenizer
|
|
33
|
-
|
|
34
|
-
# TODO Push into Bundle.
|
|
35
|
-
#
|
|
36
|
-
partial = options[:partial] || Cacher::Partial::Default
|
|
37
|
-
weights = options[:weights] || Cacher::Weights::Default
|
|
38
|
-
similarity = options[:similarity] || Cacher::Similarity::Default
|
|
39
|
-
|
|
40
|
-
@exact = options[:exact_indexing_bundle] || Bundle.new(:exact, configuration, similarity, Cacher::Partial::None.new, weights)
|
|
41
|
-
@partial = options[:partial_indexing_bundle] || Bundle.new(:partial, configuration, Cacher::Similarity::None.new, partial, weights)
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
delegate :identifier, :prepare_index_directory, :to => :configuration
|
|
45
|
-
delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
|
|
46
|
-
|
|
47
|
-
def from
|
|
48
|
-
@from || name
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# TODO Spec.
|
|
52
|
-
#
|
|
53
|
-
def backup_caches
|
|
54
|
-
timed_exclaim "Backing up #{identifier}."
|
|
55
|
-
exact.backup
|
|
56
|
-
partial.backup
|
|
57
|
-
end
|
|
58
|
-
def restore_caches
|
|
59
|
-
timed_exclaim "Restoring #{identifier}."
|
|
60
|
-
exact.restore
|
|
61
|
-
partial.restore
|
|
62
|
-
end
|
|
63
|
-
def check_caches
|
|
64
|
-
timed_exclaim "Checking #{identifier}."
|
|
65
|
-
exact.raise_unless_cache_exists
|
|
66
|
-
partial.raise_unless_cache_exists
|
|
67
|
-
end
|
|
68
|
-
def clear_caches
|
|
69
|
-
timed_exclaim "Deleting #{identifier}."
|
|
70
|
-
exact.delete
|
|
71
|
-
partial.delete
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
def index
|
|
75
|
-
prepare_index_directory
|
|
76
|
-
indexer.index
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# Generates all caches for this category.
|
|
80
|
-
#
|
|
81
|
-
def cache
|
|
82
|
-
prepare_index_directory
|
|
83
|
-
configure
|
|
84
|
-
generate_caches
|
|
85
|
-
end
|
|
86
|
-
# We need to set what formatting method should be used.
|
|
87
|
-
# Uses the one defined in the indexer.
|
|
88
|
-
#
|
|
89
|
-
def configure
|
|
90
|
-
key_format = indexer.key_format
|
|
91
|
-
exact[:key_format] = key_format
|
|
92
|
-
partial[:key_format] = key_format
|
|
93
|
-
end
|
|
94
|
-
def generate_caches
|
|
95
|
-
generate_caches_from_source
|
|
96
|
-
generate_partial
|
|
97
|
-
generate_caches_from_memory
|
|
98
|
-
dump_caches
|
|
99
|
-
timed_exclaim "CACHE FINISHED #{identifier}."
|
|
100
|
-
end
|
|
101
|
-
def generate_caches_from_source
|
|
102
|
-
exact.generate_caches_from_source
|
|
103
|
-
end
|
|
104
|
-
def generate_partial
|
|
105
|
-
partial.generate_partial_from exact.index
|
|
106
|
-
end
|
|
107
|
-
def generate_caches_from_memory
|
|
108
|
-
partial.generate_caches_from_memory
|
|
109
|
-
end
|
|
110
|
-
def dump_caches
|
|
111
|
-
exact.dump
|
|
112
|
-
partial.dump
|
|
113
|
-
end
|
|
114
|
-
|
|
115
|
-
end
|
|
116
|
-
|
|
117
|
-
end
|
data/lib/picky/indexing/index.rb
DELETED
|
@@ -1,55 +0,0 @@
|
|
|
1
|
-
module Indexing
|
|
2
|
-
|
|
3
|
-
class Index
|
|
4
|
-
|
|
5
|
-
attr_reader :name, :source, :categories, :after_indexing
|
|
6
|
-
|
|
7
|
-
# Delegators for indexing.
|
|
8
|
-
#
|
|
9
|
-
delegate :connect_backend,
|
|
10
|
-
:to => :source
|
|
11
|
-
|
|
12
|
-
delegate :index,
|
|
13
|
-
:cache,
|
|
14
|
-
:generate_caches,
|
|
15
|
-
:backup_caches,
|
|
16
|
-
:restore_caches,
|
|
17
|
-
:check_caches,
|
|
18
|
-
:clear_caches,
|
|
19
|
-
:create_directory_structure,
|
|
20
|
-
:to => :categories
|
|
21
|
-
|
|
22
|
-
def initialize name, source, options = {}
|
|
23
|
-
@name = name
|
|
24
|
-
@source = source
|
|
25
|
-
|
|
26
|
-
@after_indexing = options[:after_indexing]
|
|
27
|
-
|
|
28
|
-
@categories = Categories.new
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# TODO Spec. Doc.
|
|
32
|
-
#
|
|
33
|
-
def define_category category_name, options = {}
|
|
34
|
-
options = default_category_options.merge options
|
|
35
|
-
|
|
36
|
-
new_category = Category.new category_name, self, options
|
|
37
|
-
categories << new_category
|
|
38
|
-
new_category
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
# By default, the category uses the index's source.
|
|
42
|
-
#
|
|
43
|
-
def default_category_options
|
|
44
|
-
{ :source => @source }
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Indexing.
|
|
48
|
-
#
|
|
49
|
-
def take_snapshot
|
|
50
|
-
source.take_snapshot self
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
end
|
|
@@ -1,82 +0,0 @@
|
|
|
1
|
-
module Query
|
|
2
|
-
# An allocation has a number of combinations:
|
|
3
|
-
# [token, index] [other_token, other_index], ...
|
|
4
|
-
#
|
|
5
|
-
class Allocation # :nodoc:all
|
|
6
|
-
|
|
7
|
-
attr_reader :count, :ids, :score, :combinations, :result_identifier
|
|
8
|
-
|
|
9
|
-
#
|
|
10
|
-
#
|
|
11
|
-
def initialize combinations, result_identifier
|
|
12
|
-
@combinations = combinations
|
|
13
|
-
@result_identifier = result_identifier
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
def hash
|
|
17
|
-
@combinations.hash
|
|
18
|
-
end
|
|
19
|
-
def eql? other_allocation
|
|
20
|
-
true # FIXME
|
|
21
|
-
# @combinations.eql? other_allocation.combinations
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
# Scores its combinations and caches the result.
|
|
25
|
-
#
|
|
26
|
-
def calculate_score weights
|
|
27
|
-
@score ||= @combinations.calculate_score(weights)
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
# Asks the combinations for the (intersected) ids.
|
|
31
|
-
#
|
|
32
|
-
def calculate_ids
|
|
33
|
-
@combinations.ids
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# This starts the searching process.
|
|
37
|
-
#
|
|
38
|
-
def process! amount, offset
|
|
39
|
-
ids = calculate_ids
|
|
40
|
-
@count = ids.size # cache the count before throwing away the ids
|
|
41
|
-
@ids = ids.slice!(offset, amount) || [] # slice out the relevant part
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
#
|
|
45
|
-
#
|
|
46
|
-
def keep identifiers = [] # categories
|
|
47
|
-
@combinations.keep identifiers
|
|
48
|
-
end
|
|
49
|
-
#
|
|
50
|
-
#
|
|
51
|
-
def remove identifiers = [] # categories
|
|
52
|
-
@combinations.remove identifiers
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Sort highest score first.
|
|
56
|
-
#
|
|
57
|
-
def <=> other_allocation
|
|
58
|
-
other_allocation.score <=> self.score
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Transform the allocation into result form.
|
|
62
|
-
#
|
|
63
|
-
def to_result
|
|
64
|
-
[self.result_identifier, self.score, self.count, @combinations.to_result, self.ids] if self.count > 0
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# Json representation of this allocation.
|
|
68
|
-
#
|
|
69
|
-
# Note: Delegates to to_result.
|
|
70
|
-
#
|
|
71
|
-
def to_json
|
|
72
|
-
to_result.to_json
|
|
73
|
-
end
|
|
74
|
-
|
|
75
|
-
#
|
|
76
|
-
#
|
|
77
|
-
def to_s
|
|
78
|
-
"Allocation: #{to_result.join(', ')}"
|
|
79
|
-
end
|
|
80
|
-
|
|
81
|
-
end
|
|
82
|
-
end
|