picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
|
@@ -0,0 +1,72 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Indexed # :nodoc:all
|
|
4
|
+
|
|
5
|
+
# TODO Rewrite.
|
|
6
|
+
#
|
|
7
|
+
# A Bundle is a number of indexes
|
|
8
|
+
# per [index, category] combination.
|
|
9
|
+
#
|
|
10
|
+
# At most, there are three indexes:
|
|
11
|
+
# * *core* index (always used)
|
|
12
|
+
# * *weights* index (always used)
|
|
13
|
+
# * *similarity* index (used with similarity)
|
|
14
|
+
#
|
|
15
|
+
# In Picky, indexing is separated from the index
|
|
16
|
+
# handling itself through a parallel structure.
|
|
17
|
+
#
|
|
18
|
+
# Both use methods provided by this base class, but
|
|
19
|
+
# have very different goals:
|
|
20
|
+
#
|
|
21
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
|
22
|
+
# and providing helper functions to e.g. check the indexes.
|
|
23
|
+
#
|
|
24
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
|
25
|
+
# memory and looking up search data as fast as possible.
|
|
26
|
+
#
|
|
27
|
+
module Bundle
|
|
28
|
+
|
|
29
|
+
class Base
|
|
30
|
+
|
|
31
|
+
attr_reader :identifier, :configuration
|
|
32
|
+
attr_accessor :similarity_strategy
|
|
33
|
+
|
|
34
|
+
delegate :[], :to => :configuration
|
|
35
|
+
|
|
36
|
+
def initialize name, configuration, similarity_strategy
|
|
37
|
+
@identifier = "#{configuration.identifier}:#{name}"
|
|
38
|
+
|
|
39
|
+
@index = {}
|
|
40
|
+
@weights = {}
|
|
41
|
+
@similarity = {}
|
|
42
|
+
|
|
43
|
+
@similarity_strategy = similarity_strategy
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Get a list of similar texts.
|
|
47
|
+
#
|
|
48
|
+
# Note: Does not return itself.
|
|
49
|
+
#
|
|
50
|
+
def similar text
|
|
51
|
+
code = similarity_strategy.encoded text
|
|
52
|
+
similar_codes = code && @similarity[code]
|
|
53
|
+
similar_codes.delete text if similar_codes
|
|
54
|
+
similar_codes || []
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Loads all indexes.
|
|
58
|
+
#
|
|
59
|
+
def load
|
|
60
|
+
load_index
|
|
61
|
+
load_weights
|
|
62
|
+
load_similarity
|
|
63
|
+
load_configuration
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
end
|
|
@@ -0,0 +1,69 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
#
|
|
5
|
+
module Indexed # :nodoc:all
|
|
6
|
+
|
|
7
|
+
#
|
|
8
|
+
#
|
|
9
|
+
module Bundle
|
|
10
|
+
|
|
11
|
+
# This is the _actual_ index (based on memory).
|
|
12
|
+
#
|
|
13
|
+
# Handles exact/partial index, weights index, and similarity index.
|
|
14
|
+
#
|
|
15
|
+
# Delegates file handling and checking to an *Indexed*::*Files* object.
|
|
16
|
+
#
|
|
17
|
+
class Memory < Base
|
|
18
|
+
|
|
19
|
+
attr_accessor :index, :weights, :similarity, :configuration
|
|
20
|
+
|
|
21
|
+
delegate :[], :to => :configuration
|
|
22
|
+
|
|
23
|
+
def initialize name, configuration, *args
|
|
24
|
+
super name, configuration, *args
|
|
25
|
+
|
|
26
|
+
@configuration = {} # A hash with config options.
|
|
27
|
+
|
|
28
|
+
@backend = Internals::Index::Files.new name, configuration
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Get the ids for the given symbol.
|
|
32
|
+
#
|
|
33
|
+
def ids sym
|
|
34
|
+
@index[sym] || []
|
|
35
|
+
end
|
|
36
|
+
# Get a weight for the given symbol.
|
|
37
|
+
#
|
|
38
|
+
def weight sym
|
|
39
|
+
@weights[sym]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Loads the core index.
|
|
43
|
+
#
|
|
44
|
+
def load_index
|
|
45
|
+
self.index = @backend.load_index
|
|
46
|
+
end
|
|
47
|
+
# Loads the weights index.
|
|
48
|
+
#
|
|
49
|
+
def load_weights
|
|
50
|
+
self.weights = @backend.load_weights
|
|
51
|
+
end
|
|
52
|
+
# Loads the similarity index.
|
|
53
|
+
#
|
|
54
|
+
def load_similarity
|
|
55
|
+
self.similarity = @backend.load_similarity
|
|
56
|
+
end
|
|
57
|
+
# Loads the configuration.
|
|
58
|
+
#
|
|
59
|
+
def load_configuration
|
|
60
|
+
self.configuration = @backend.load_configuration
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
end
|
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
#
|
|
5
|
+
module Indexed # :nodoc:all
|
|
6
|
+
|
|
7
|
+
#
|
|
8
|
+
#
|
|
9
|
+
module Bundle
|
|
10
|
+
|
|
11
|
+
# This is the _actual_ index (based on Redis).
|
|
12
|
+
#
|
|
13
|
+
# Handles exact/partial index, weights index, and similarity index.
|
|
14
|
+
#
|
|
15
|
+
class Redis < Base
|
|
16
|
+
|
|
17
|
+
def initialize name, configuration, *args
|
|
18
|
+
super name, configuration, *args
|
|
19
|
+
|
|
20
|
+
@backend = Internals::Index::Redis.new name, configuration
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Get the ids for the given symbol.
|
|
24
|
+
#
|
|
25
|
+
# Ids are an array of string values in Redis.
|
|
26
|
+
#
|
|
27
|
+
def ids sym
|
|
28
|
+
@backend.ids sym
|
|
29
|
+
end
|
|
30
|
+
# Get a weight for the given symbol.
|
|
31
|
+
#
|
|
32
|
+
# A weight is a string value in Redis. TODO Convert?
|
|
33
|
+
#
|
|
34
|
+
def weight sym
|
|
35
|
+
@backend.weight sym
|
|
36
|
+
end
|
|
37
|
+
# TODO Spec. Doc.
|
|
38
|
+
#
|
|
39
|
+
def [] sym
|
|
40
|
+
@backend.setting sym
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# Loads the core index.
|
|
44
|
+
#
|
|
45
|
+
def load_index
|
|
46
|
+
# TODO check if it is there.
|
|
47
|
+
end
|
|
48
|
+
# Loads the weights index.
|
|
49
|
+
#
|
|
50
|
+
def load_weights
|
|
51
|
+
# TODO check if it is there.
|
|
52
|
+
end
|
|
53
|
+
# Loads the similarity index.
|
|
54
|
+
#
|
|
55
|
+
def load_similarity
|
|
56
|
+
# TODO check if it is there.
|
|
57
|
+
end
|
|
58
|
+
# Loads the configuration.
|
|
59
|
+
#
|
|
60
|
+
def load_configuration
|
|
61
|
+
# TODO check if it is there.
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
end
|
|
@@ -0,0 +1,135 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Indexed
|
|
4
|
+
|
|
5
|
+
class Categories
|
|
6
|
+
|
|
7
|
+
attr_reader :categories, :category_hash, :ignore_unassigned_tokens
|
|
8
|
+
|
|
9
|
+
each_delegate :load_from_cache,
|
|
10
|
+
:to => :categories
|
|
11
|
+
|
|
12
|
+
# A list of indexed categories.
|
|
13
|
+
#
|
|
14
|
+
# Options:
|
|
15
|
+
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
|
16
|
+
# The default behaviour is that if a token does not match to
|
|
17
|
+
# any category, the query will not return anything (since a
|
|
18
|
+
# single token cannot be matched). If you set this option to
|
|
19
|
+
# true, any token that cannot be matched to a category will be
|
|
20
|
+
# simply ignored.
|
|
21
|
+
# Use this if only a few matched words are important, like for
|
|
22
|
+
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
|
23
|
+
# you only want to match the zipcode, to have the search engine
|
|
24
|
+
# display advertisements on the side for the zipcode.
|
|
25
|
+
# Nifty! :)
|
|
26
|
+
#
|
|
27
|
+
def initialize options = {}
|
|
28
|
+
clear
|
|
29
|
+
|
|
30
|
+
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Clears both the array of categories and the hash of categories.
|
|
34
|
+
#
|
|
35
|
+
def clear
|
|
36
|
+
@categories = []
|
|
37
|
+
@category_hash = {}
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Add the given category to the list of categories.
|
|
41
|
+
#
|
|
42
|
+
def << category
|
|
43
|
+
categories << category
|
|
44
|
+
# Note: [category] is an optimization, since I need an array
|
|
45
|
+
# of categories.
|
|
46
|
+
# It's faster to just package it in an array on loading
|
|
47
|
+
# Picky than doing it over and over with each query.
|
|
48
|
+
#
|
|
49
|
+
category_hash[category.name] = [category]
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
# Return all possible combinations for the given token.
|
|
53
|
+
#
|
|
54
|
+
# This checks if it needs to also search through similar
|
|
55
|
+
# tokens, if for example, the token is one with ~.
|
|
56
|
+
# If yes, it puts together all solutions.
|
|
57
|
+
#
|
|
58
|
+
def possible_combinations_for token
|
|
59
|
+
token.similar? ? similar_possible_for(token) : possible_for(token)
|
|
60
|
+
end
|
|
61
|
+
# Gets all similar tokens and puts together the possible combinations
|
|
62
|
+
# for each found similar token.
|
|
63
|
+
#
|
|
64
|
+
def similar_possible_for token
|
|
65
|
+
# Get as many similar tokens as necessary
|
|
66
|
+
#
|
|
67
|
+
tokens = similar_tokens_for token
|
|
68
|
+
# possible combinations
|
|
69
|
+
#
|
|
70
|
+
inject_possible_for tokens
|
|
71
|
+
end
|
|
72
|
+
def similar_tokens_for token
|
|
73
|
+
text = token.text
|
|
74
|
+
categories.inject([]) do |result, category|
|
|
75
|
+
next_token = token
|
|
76
|
+
# Note: We could also break off here if not all the available
|
|
77
|
+
# similars are needed.
|
|
78
|
+
# Wait for a concrete case that needs this before taking
|
|
79
|
+
# action.
|
|
80
|
+
#
|
|
81
|
+
while next_token = next_token.next_similar_token(category)
|
|
82
|
+
result << next_token if next_token && next_token.text != text
|
|
83
|
+
end
|
|
84
|
+
result
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
def inject_possible_for tokens
|
|
88
|
+
tokens.inject([]) do |result, token|
|
|
89
|
+
possible = possible_categories token
|
|
90
|
+
result + possible_for(token, possible)
|
|
91
|
+
end
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
# Returns possible Combinations for the token.
|
|
95
|
+
#
|
|
96
|
+
# Note: The preselected_categories param is an optimization.
|
|
97
|
+
#
|
|
98
|
+
# Note: Returns [] if no categories matched (will produce no result).
|
|
99
|
+
# Returns nil if this token needs to be removed from the query.
|
|
100
|
+
# (Also none of the categories matched, but the ignore unassigned
|
|
101
|
+
# tokens option is true)
|
|
102
|
+
#
|
|
103
|
+
def possible_for token, preselected_categories = nil
|
|
104
|
+
possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
|
|
105
|
+
possible.compact!
|
|
106
|
+
# This is an optimization to mark tokens that are ignored.
|
|
107
|
+
#
|
|
108
|
+
return if ignore_unassigned_tokens && possible.empty?
|
|
109
|
+
possible # wrap in combinations
|
|
110
|
+
end
|
|
111
|
+
# This returns the possible categories for this token.
|
|
112
|
+
# If the user has already preselected a category for this token,
|
|
113
|
+
# like "artist:moby", if not just return all for the given token,
|
|
114
|
+
# since all are possible.
|
|
115
|
+
#
|
|
116
|
+
# Note: Once I thought this was called too often. But it is not (18.01.2011).
|
|
117
|
+
#
|
|
118
|
+
def possible_categories token
|
|
119
|
+
user_defined_categories(token) || categories
|
|
120
|
+
end
|
|
121
|
+
# This returns the array of categories if the user has defined
|
|
122
|
+
# an existing category.
|
|
123
|
+
#
|
|
124
|
+
# Note: Returns nil if the user did not define one
|
|
125
|
+
# or if he/she has defined a non-existing one.
|
|
126
|
+
#
|
|
127
|
+
def user_defined_categories token
|
|
128
|
+
category_hash[token.user_defined_category_name]
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
end
|
|
@@ -0,0 +1,90 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Indexed
|
|
4
|
+
|
|
5
|
+
# An index category holds a exact and a partial index for a given category.
|
|
6
|
+
#
|
|
7
|
+
# For example an index category for names holds a exact and
|
|
8
|
+
# a partial index bundle for names.
|
|
9
|
+
#
|
|
10
|
+
class Category
|
|
11
|
+
|
|
12
|
+
attr_accessor :exact
|
|
13
|
+
attr_reader :identifier, :name
|
|
14
|
+
attr_writer :partial
|
|
15
|
+
|
|
16
|
+
#
|
|
17
|
+
#
|
|
18
|
+
def initialize name, index, options = {}
|
|
19
|
+
@name = name
|
|
20
|
+
|
|
21
|
+
configuration = Configuration::Index.new index, self
|
|
22
|
+
|
|
23
|
+
@identifier = configuration.identifier
|
|
24
|
+
|
|
25
|
+
# TODO Push the defaults out into the index.
|
|
26
|
+
#
|
|
27
|
+
@partial_strategy = options[:partial] || Internals::Generators::Partial::Default
|
|
28
|
+
similarity = options[:similarity] || Internals::Generators::Similarity::Default
|
|
29
|
+
|
|
30
|
+
bundle_class = options[:indexed_bundle_class] || Bundle::Memory
|
|
31
|
+
@exact = bundle_class.new :exact, configuration, similarity
|
|
32
|
+
@partial = bundle_class.new :partial, configuration, similarity
|
|
33
|
+
|
|
34
|
+
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
|
35
|
+
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
|
36
|
+
|
|
37
|
+
# TODO Extract?
|
|
38
|
+
#
|
|
39
|
+
Query::Qualifiers.add(configuration.category_name, generate_qualifiers_from(options) || [name])
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# TODO Move to Index.
|
|
43
|
+
#
|
|
44
|
+
def generate_qualifiers_from options
|
|
45
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Loads the index from cache.
|
|
49
|
+
#
|
|
50
|
+
def load_from_cache
|
|
51
|
+
timed_exclaim "Loading index #{identifier}."
|
|
52
|
+
exact.load
|
|
53
|
+
partial.load
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Gets the weight for this token's text.
|
|
57
|
+
#
|
|
58
|
+
def weight token
|
|
59
|
+
bundle_for(token).weight token.text
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Gets the ids for this token's text.
|
|
63
|
+
#
|
|
64
|
+
def ids token
|
|
65
|
+
bundle_for(token).ids token.text
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Returns the right index bundle for this token.
|
|
69
|
+
#
|
|
70
|
+
def bundle_for token
|
|
71
|
+
token.partial?? partial : exact
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
# The partial strategy defines whether to really use the partial index.
|
|
75
|
+
#
|
|
76
|
+
def partial
|
|
77
|
+
@partial_strategy.use_exact_for_partial?? @exact : @partial
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
#
|
|
81
|
+
#
|
|
82
|
+
def combination_for token
|
|
83
|
+
weight(token) && Internals::Query::Combination.new(token, self)
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
end
|
|
@@ -0,0 +1,57 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Indexed
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
#
|
|
7
|
+
class Index
|
|
8
|
+
|
|
9
|
+
attr_reader :name, :result_identifier, :combinator, :categories
|
|
10
|
+
|
|
11
|
+
delegate :load_from_cache,
|
|
12
|
+
:to => :categories
|
|
13
|
+
|
|
14
|
+
# TODO Externalize?
|
|
15
|
+
#
|
|
16
|
+
def initialize name, options = {}
|
|
17
|
+
@name = name
|
|
18
|
+
|
|
19
|
+
@result_identifier = options[:result_identifier] || name
|
|
20
|
+
@bundle_class = options[:indexed_bundle_class] # TODO This should actually be a fixed parameter.
|
|
21
|
+
ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query, somehow.
|
|
22
|
+
|
|
23
|
+
@categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# TODO Doc. Externalize?
|
|
27
|
+
#
|
|
28
|
+
def define_category category_name, options = {}
|
|
29
|
+
options = default_category_options.merge options
|
|
30
|
+
|
|
31
|
+
new_category = Category.new category_name, self, options
|
|
32
|
+
categories << new_category
|
|
33
|
+
new_category
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# By default, the category uses
|
|
37
|
+
# * the index's bundle type.
|
|
38
|
+
#
|
|
39
|
+
def default_category_options
|
|
40
|
+
{
|
|
41
|
+
:indexed_bundle_class => @bundle_class
|
|
42
|
+
}
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Return the possible combinations for this token.
|
|
46
|
+
#
|
|
47
|
+
# A combination is a tuple <token, index_bundle>.
|
|
48
|
+
#
|
|
49
|
+
def possible_combinations token
|
|
50
|
+
categories.possible_combinations_for token
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
File without changes
|
|
@@ -13,7 +13,7 @@ module Indexed
|
|
|
13
13
|
precision = options[:precision] || 1
|
|
14
14
|
user_grid = options[:grid] || raise("Gridsize needs to be given for location #{bundle.identifier}.")
|
|
15
15
|
|
|
16
|
-
@calculation
|
|
16
|
+
@calculation = Calculations::Location.new user_grid, precision
|
|
17
17
|
end
|
|
18
18
|
|
|
19
19
|
#
|
|
@@ -28,7 +28,9 @@ module Indexed
|
|
|
28
28
|
# Load first the bundle, then extract the config.
|
|
29
29
|
#
|
|
30
30
|
bundle.load
|
|
31
|
-
|
|
31
|
+
# TODO Move the to_f to the backend.
|
|
32
|
+
#
|
|
33
|
+
minimum = bundle[:location_minimum] && bundle[:location_minimum].to_f || raise("Configuration :location_minimum for #{bundle.identifier} missing. Did you run rake index already?")
|
|
32
34
|
@calculation.minimum = minimum
|
|
33
35
|
end
|
|
34
36
|
|
|
@@ -0,0 +1,65 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
# encoding: utf-8
|
|
4
|
+
#
|
|
5
|
+
module Indexed
|
|
6
|
+
|
|
7
|
+
# TODO Spec
|
|
8
|
+
#
|
|
9
|
+
module Wrappers
|
|
10
|
+
|
|
11
|
+
# This index combines an exact and partial index.
|
|
12
|
+
# It serves to order the results such that exact hits are found first.
|
|
13
|
+
#
|
|
14
|
+
# TODO Need to use the right subtokens. Bake in?
|
|
15
|
+
#
|
|
16
|
+
class ExactFirst < Indexed::Bundle::Memory
|
|
17
|
+
|
|
18
|
+
delegate :similar,
|
|
19
|
+
:identifier,
|
|
20
|
+
:name,
|
|
21
|
+
:to => :@exact
|
|
22
|
+
delegate :index,
|
|
23
|
+
:category,
|
|
24
|
+
:weight,
|
|
25
|
+
:generate_partial_from,
|
|
26
|
+
:generate_caches_from_memory,
|
|
27
|
+
:generate_derived,
|
|
28
|
+
:dump,
|
|
29
|
+
:load,
|
|
30
|
+
:to => :@partial
|
|
31
|
+
|
|
32
|
+
def initialize category
|
|
33
|
+
@exact = category.exact
|
|
34
|
+
@partial = category.partial
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
def self.wrap index_or_category
|
|
38
|
+
if index_or_category.respond_to? :categories
|
|
39
|
+
wrap_each_of index_or_category.categories
|
|
40
|
+
index_or_category
|
|
41
|
+
else
|
|
42
|
+
new index_or_category
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
# TODO Do not extract categories!
|
|
46
|
+
#
|
|
47
|
+
def self.wrap_each_of categories
|
|
48
|
+
categories.categories.collect! { |category| new(category) }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def ids text
|
|
52
|
+
@exact.ids(text) + @partial.ids(text)
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def weight text
|
|
56
|
+
[@exact.weight(text) || 0, @partial.weight(text) || 0].max
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
end
|
|
File without changes
|
|
@@ -17,7 +17,7 @@ module Indexers
|
|
|
17
17
|
# Raise a no source exception.
|
|
18
18
|
#
|
|
19
19
|
def raise_no_source
|
|
20
|
-
raise NoSourceSpecifiedException.new("No source given for #{@configuration
|
|
20
|
+
raise NoSourceSpecifiedException.new("No source given for #{@configuration}.")
|
|
21
21
|
end
|
|
22
22
|
|
|
23
23
|
# Delegates the key format to the source.
|
|
@@ -65,7 +65,7 @@ module Indexers
|
|
|
65
65
|
end
|
|
66
66
|
end
|
|
67
67
|
def indexing_message
|
|
68
|
-
timed_exclaim "INDEX #{@configuration
|
|
68
|
+
timed_exclaim "INDEX #{@configuration}" # TODO from ...
|
|
69
69
|
end
|
|
70
70
|
|
|
71
71
|
end
|
|
File without changes
|