picky 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
@@ -0,0 +1,72 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Indexed # :nodoc:all
|
4
|
+
|
5
|
+
# TODO Rewrite.
|
6
|
+
#
|
7
|
+
# A Bundle is a number of indexes
|
8
|
+
# per [index, category] combination.
|
9
|
+
#
|
10
|
+
# At most, there are three indexes:
|
11
|
+
# * *core* index (always used)
|
12
|
+
# * *weights* index (always used)
|
13
|
+
# * *similarity* index (used with similarity)
|
14
|
+
#
|
15
|
+
# In Picky, indexing is separated from the index
|
16
|
+
# handling itself through a parallel structure.
|
17
|
+
#
|
18
|
+
# Both use methods provided by this base class, but
|
19
|
+
# have very different goals:
|
20
|
+
#
|
21
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
22
|
+
# and providing helper functions to e.g. check the indexes.
|
23
|
+
#
|
24
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
25
|
+
# memory and looking up search data as fast as possible.
|
26
|
+
#
|
27
|
+
module Bundle
|
28
|
+
|
29
|
+
class Base
|
30
|
+
|
31
|
+
attr_reader :identifier, :configuration
|
32
|
+
attr_accessor :similarity_strategy
|
33
|
+
|
34
|
+
delegate :[], :to => :configuration
|
35
|
+
|
36
|
+
def initialize name, configuration, similarity_strategy
|
37
|
+
@identifier = "#{configuration.identifier}:#{name}"
|
38
|
+
|
39
|
+
@index = {}
|
40
|
+
@weights = {}
|
41
|
+
@similarity = {}
|
42
|
+
|
43
|
+
@similarity_strategy = similarity_strategy
|
44
|
+
end
|
45
|
+
|
46
|
+
# Get a list of similar texts.
|
47
|
+
#
|
48
|
+
# Note: Does not return itself.
|
49
|
+
#
|
50
|
+
def similar text
|
51
|
+
code = similarity_strategy.encoded text
|
52
|
+
similar_codes = code && @similarity[code]
|
53
|
+
similar_codes.delete text if similar_codes
|
54
|
+
similar_codes || []
|
55
|
+
end
|
56
|
+
|
57
|
+
# Loads all indexes.
|
58
|
+
#
|
59
|
+
def load
|
60
|
+
load_index
|
61
|
+
load_weights
|
62
|
+
load_similarity
|
63
|
+
load_configuration
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
|
72
|
+
end
|
@@ -0,0 +1,69 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
# encoding: utf-8
|
4
|
+
#
|
5
|
+
module Indexed # :nodoc:all
|
6
|
+
|
7
|
+
#
|
8
|
+
#
|
9
|
+
module Bundle
|
10
|
+
|
11
|
+
# This is the _actual_ index (based on memory).
|
12
|
+
#
|
13
|
+
# Handles exact/partial index, weights index, and similarity index.
|
14
|
+
#
|
15
|
+
# Delegates file handling and checking to an *Indexed*::*Files* object.
|
16
|
+
#
|
17
|
+
class Memory < Base
|
18
|
+
|
19
|
+
attr_accessor :index, :weights, :similarity, :configuration
|
20
|
+
|
21
|
+
delegate :[], :to => :configuration
|
22
|
+
|
23
|
+
def initialize name, configuration, *args
|
24
|
+
super name, configuration, *args
|
25
|
+
|
26
|
+
@configuration = {} # A hash with config options.
|
27
|
+
|
28
|
+
@backend = Internals::Index::Files.new name, configuration
|
29
|
+
end
|
30
|
+
|
31
|
+
# Get the ids for the given symbol.
|
32
|
+
#
|
33
|
+
def ids sym
|
34
|
+
@index[sym] || []
|
35
|
+
end
|
36
|
+
# Get a weight for the given symbol.
|
37
|
+
#
|
38
|
+
def weight sym
|
39
|
+
@weights[sym]
|
40
|
+
end
|
41
|
+
|
42
|
+
# Loads the core index.
|
43
|
+
#
|
44
|
+
def load_index
|
45
|
+
self.index = @backend.load_index
|
46
|
+
end
|
47
|
+
# Loads the weights index.
|
48
|
+
#
|
49
|
+
def load_weights
|
50
|
+
self.weights = @backend.load_weights
|
51
|
+
end
|
52
|
+
# Loads the similarity index.
|
53
|
+
#
|
54
|
+
def load_similarity
|
55
|
+
self.similarity = @backend.load_similarity
|
56
|
+
end
|
57
|
+
# Loads the configuration.
|
58
|
+
#
|
59
|
+
def load_configuration
|
60
|
+
self.configuration = @backend.load_configuration
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
66
|
+
|
67
|
+
end
|
68
|
+
|
69
|
+
end
|
@@ -0,0 +1,70 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
# encoding: utf-8
|
4
|
+
#
|
5
|
+
module Indexed # :nodoc:all
|
6
|
+
|
7
|
+
#
|
8
|
+
#
|
9
|
+
module Bundle
|
10
|
+
|
11
|
+
# This is the _actual_ index (based on Redis).
|
12
|
+
#
|
13
|
+
# Handles exact/partial index, weights index, and similarity index.
|
14
|
+
#
|
15
|
+
class Redis < Base
|
16
|
+
|
17
|
+
def initialize name, configuration, *args
|
18
|
+
super name, configuration, *args
|
19
|
+
|
20
|
+
@backend = Internals::Index::Redis.new name, configuration
|
21
|
+
end
|
22
|
+
|
23
|
+
# Get the ids for the given symbol.
|
24
|
+
#
|
25
|
+
# Ids are an array of string values in Redis.
|
26
|
+
#
|
27
|
+
def ids sym
|
28
|
+
@backend.ids sym
|
29
|
+
end
|
30
|
+
# Get a weight for the given symbol.
|
31
|
+
#
|
32
|
+
# A weight is a string value in Redis. TODO Convert?
|
33
|
+
#
|
34
|
+
def weight sym
|
35
|
+
@backend.weight sym
|
36
|
+
end
|
37
|
+
# TODO Spec. Doc.
|
38
|
+
#
|
39
|
+
def [] sym
|
40
|
+
@backend.setting sym
|
41
|
+
end
|
42
|
+
|
43
|
+
# Loads the core index.
|
44
|
+
#
|
45
|
+
def load_index
|
46
|
+
# TODO check if it is there.
|
47
|
+
end
|
48
|
+
# Loads the weights index.
|
49
|
+
#
|
50
|
+
def load_weights
|
51
|
+
# TODO check if it is there.
|
52
|
+
end
|
53
|
+
# Loads the similarity index.
|
54
|
+
#
|
55
|
+
def load_similarity
|
56
|
+
# TODO check if it is there.
|
57
|
+
end
|
58
|
+
# Loads the configuration.
|
59
|
+
#
|
60
|
+
def load_configuration
|
61
|
+
# TODO check if it is there.
|
62
|
+
end
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
@@ -0,0 +1,135 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Indexed
|
4
|
+
|
5
|
+
class Categories
|
6
|
+
|
7
|
+
attr_reader :categories, :category_hash, :ignore_unassigned_tokens
|
8
|
+
|
9
|
+
each_delegate :load_from_cache,
|
10
|
+
:to => :categories
|
11
|
+
|
12
|
+
# A list of indexed categories.
|
13
|
+
#
|
14
|
+
# Options:
|
15
|
+
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
16
|
+
# The default behaviour is that if a token does not match to
|
17
|
+
# any category, the query will not return anything (since a
|
18
|
+
# single token cannot be matched). If you set this option to
|
19
|
+
# true, any token that cannot be matched to a category will be
|
20
|
+
# simply ignored.
|
21
|
+
# Use this if only a few matched words are important, like for
|
22
|
+
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
23
|
+
# you only want to match the zipcode, to have the search engine
|
24
|
+
# display advertisements on the side for the zipcode.
|
25
|
+
# Nifty! :)
|
26
|
+
#
|
27
|
+
def initialize options = {}
|
28
|
+
clear
|
29
|
+
|
30
|
+
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
31
|
+
end
|
32
|
+
|
33
|
+
# Clears both the array of categories and the hash of categories.
|
34
|
+
#
|
35
|
+
def clear
|
36
|
+
@categories = []
|
37
|
+
@category_hash = {}
|
38
|
+
end
|
39
|
+
|
40
|
+
# Add the given category to the list of categories.
|
41
|
+
#
|
42
|
+
def << category
|
43
|
+
categories << category
|
44
|
+
# Note: [category] is an optimization, since I need an array
|
45
|
+
# of categories.
|
46
|
+
# It's faster to just package it in an array on loading
|
47
|
+
# Picky than doing it over and over with each query.
|
48
|
+
#
|
49
|
+
category_hash[category.name] = [category]
|
50
|
+
end
|
51
|
+
|
52
|
+
# Return all possible combinations for the given token.
|
53
|
+
#
|
54
|
+
# This checks if it needs to also search through similar
|
55
|
+
# tokens, if for example, the token is one with ~.
|
56
|
+
# If yes, it puts together all solutions.
|
57
|
+
#
|
58
|
+
def possible_combinations_for token
|
59
|
+
token.similar? ? similar_possible_for(token) : possible_for(token)
|
60
|
+
end
|
61
|
+
# Gets all similar tokens and puts together the possible combinations
|
62
|
+
# for each found similar token.
|
63
|
+
#
|
64
|
+
def similar_possible_for token
|
65
|
+
# Get as many similar tokens as necessary
|
66
|
+
#
|
67
|
+
tokens = similar_tokens_for token
|
68
|
+
# possible combinations
|
69
|
+
#
|
70
|
+
inject_possible_for tokens
|
71
|
+
end
|
72
|
+
def similar_tokens_for token
|
73
|
+
text = token.text
|
74
|
+
categories.inject([]) do |result, category|
|
75
|
+
next_token = token
|
76
|
+
# Note: We could also break off here if not all the available
|
77
|
+
# similars are needed.
|
78
|
+
# Wait for a concrete case that needs this before taking
|
79
|
+
# action.
|
80
|
+
#
|
81
|
+
while next_token = next_token.next_similar_token(category)
|
82
|
+
result << next_token if next_token && next_token.text != text
|
83
|
+
end
|
84
|
+
result
|
85
|
+
end
|
86
|
+
end
|
87
|
+
def inject_possible_for tokens
|
88
|
+
tokens.inject([]) do |result, token|
|
89
|
+
possible = possible_categories token
|
90
|
+
result + possible_for(token, possible)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
|
94
|
+
# Returns possible Combinations for the token.
|
95
|
+
#
|
96
|
+
# Note: The preselected_categories param is an optimization.
|
97
|
+
#
|
98
|
+
# Note: Returns [] if no categories matched (will produce no result).
|
99
|
+
# Returns nil if this token needs to be removed from the query.
|
100
|
+
# (Also none of the categories matched, but the ignore unassigned
|
101
|
+
# tokens option is true)
|
102
|
+
#
|
103
|
+
def possible_for token, preselected_categories = nil
|
104
|
+
possible = (preselected_categories || possible_categories(token)).map { |category| category.combination_for(token) }
|
105
|
+
possible.compact!
|
106
|
+
# This is an optimization to mark tokens that are ignored.
|
107
|
+
#
|
108
|
+
return if ignore_unassigned_tokens && possible.empty?
|
109
|
+
possible # wrap in combinations
|
110
|
+
end
|
111
|
+
# This returns the possible categories for this token.
|
112
|
+
# If the user has already preselected a category for this token,
|
113
|
+
# like "artist:moby", if not just return all for the given token,
|
114
|
+
# since all are possible.
|
115
|
+
#
|
116
|
+
# Note: Once I thought this was called too often. But it is not (18.01.2011).
|
117
|
+
#
|
118
|
+
def possible_categories token
|
119
|
+
user_defined_categories(token) || categories
|
120
|
+
end
|
121
|
+
# This returns the array of categories if the user has defined
|
122
|
+
# an existing category.
|
123
|
+
#
|
124
|
+
# Note: Returns nil if the user did not define one
|
125
|
+
# or if he/she has defined a non-existing one.
|
126
|
+
#
|
127
|
+
def user_defined_categories token
|
128
|
+
category_hash[token.user_defined_category_name]
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Indexed
|
4
|
+
|
5
|
+
# An index category holds a exact and a partial index for a given category.
|
6
|
+
#
|
7
|
+
# For example an index category for names holds a exact and
|
8
|
+
# a partial index bundle for names.
|
9
|
+
#
|
10
|
+
class Category
|
11
|
+
|
12
|
+
attr_accessor :exact
|
13
|
+
attr_reader :identifier, :name
|
14
|
+
attr_writer :partial
|
15
|
+
|
16
|
+
#
|
17
|
+
#
|
18
|
+
def initialize name, index, options = {}
|
19
|
+
@name = name
|
20
|
+
|
21
|
+
configuration = Configuration::Index.new index, self
|
22
|
+
|
23
|
+
@identifier = configuration.identifier
|
24
|
+
|
25
|
+
# TODO Push the defaults out into the index.
|
26
|
+
#
|
27
|
+
@partial_strategy = options[:partial] || Internals::Generators::Partial::Default
|
28
|
+
similarity = options[:similarity] || Internals::Generators::Similarity::Default
|
29
|
+
|
30
|
+
bundle_class = options[:indexed_bundle_class] || Bundle::Memory
|
31
|
+
@exact = bundle_class.new :exact, configuration, similarity
|
32
|
+
@partial = bundle_class.new :partial, configuration, similarity
|
33
|
+
|
34
|
+
# @exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
35
|
+
# @partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
36
|
+
|
37
|
+
# TODO Extract?
|
38
|
+
#
|
39
|
+
Query::Qualifiers.add(configuration.category_name, generate_qualifiers_from(options) || [name])
|
40
|
+
end
|
41
|
+
|
42
|
+
# TODO Move to Index.
|
43
|
+
#
|
44
|
+
def generate_qualifiers_from options
|
45
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
46
|
+
end
|
47
|
+
|
48
|
+
# Loads the index from cache.
|
49
|
+
#
|
50
|
+
def load_from_cache
|
51
|
+
timed_exclaim "Loading index #{identifier}."
|
52
|
+
exact.load
|
53
|
+
partial.load
|
54
|
+
end
|
55
|
+
|
56
|
+
# Gets the weight for this token's text.
|
57
|
+
#
|
58
|
+
def weight token
|
59
|
+
bundle_for(token).weight token.text
|
60
|
+
end
|
61
|
+
|
62
|
+
# Gets the ids for this token's text.
|
63
|
+
#
|
64
|
+
def ids token
|
65
|
+
bundle_for(token).ids token.text
|
66
|
+
end
|
67
|
+
|
68
|
+
# Returns the right index bundle for this token.
|
69
|
+
#
|
70
|
+
def bundle_for token
|
71
|
+
token.partial?? partial : exact
|
72
|
+
end
|
73
|
+
|
74
|
+
# The partial strategy defines whether to really use the partial index.
|
75
|
+
#
|
76
|
+
def partial
|
77
|
+
@partial_strategy.use_exact_for_partial?? @exact : @partial
|
78
|
+
end
|
79
|
+
|
80
|
+
#
|
81
|
+
#
|
82
|
+
def combination_for token
|
83
|
+
weight(token) && Internals::Query::Combination.new(token, self)
|
84
|
+
end
|
85
|
+
|
86
|
+
end
|
87
|
+
|
88
|
+
end
|
89
|
+
|
90
|
+
end
|
@@ -0,0 +1,57 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Indexed
|
4
|
+
|
5
|
+
#
|
6
|
+
#
|
7
|
+
class Index
|
8
|
+
|
9
|
+
attr_reader :name, :result_identifier, :combinator, :categories
|
10
|
+
|
11
|
+
delegate :load_from_cache,
|
12
|
+
:to => :categories
|
13
|
+
|
14
|
+
# TODO Externalize?
|
15
|
+
#
|
16
|
+
def initialize name, options = {}
|
17
|
+
@name = name
|
18
|
+
|
19
|
+
@result_identifier = options[:result_identifier] || name
|
20
|
+
@bundle_class = options[:indexed_bundle_class] # TODO This should actually be a fixed parameter.
|
21
|
+
ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false # TODO Move to query, somehow.
|
22
|
+
|
23
|
+
@categories = Categories.new ignore_unassigned_tokens: ignore_unassigned_tokens
|
24
|
+
end
|
25
|
+
|
26
|
+
# TODO Doc. Externalize?
|
27
|
+
#
|
28
|
+
def define_category category_name, options = {}
|
29
|
+
options = default_category_options.merge options
|
30
|
+
|
31
|
+
new_category = Category.new category_name, self, options
|
32
|
+
categories << new_category
|
33
|
+
new_category
|
34
|
+
end
|
35
|
+
|
36
|
+
# By default, the category uses
|
37
|
+
# * the index's bundle type.
|
38
|
+
#
|
39
|
+
def default_category_options
|
40
|
+
{
|
41
|
+
:indexed_bundle_class => @bundle_class
|
42
|
+
}
|
43
|
+
end
|
44
|
+
|
45
|
+
# Return the possible combinations for this token.
|
46
|
+
#
|
47
|
+
# A combination is a tuple <token, index_bundle>.
|
48
|
+
#
|
49
|
+
def possible_combinations token
|
50
|
+
categories.possible_combinations_for token
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
File without changes
|
@@ -13,7 +13,7 @@ module Indexed
|
|
13
13
|
precision = options[:precision] || 1
|
14
14
|
user_grid = options[:grid] || raise("Gridsize needs to be given for location #{bundle.identifier}.")
|
15
15
|
|
16
|
-
@calculation
|
16
|
+
@calculation = Calculations::Location.new user_grid, precision
|
17
17
|
end
|
18
18
|
|
19
19
|
#
|
@@ -28,7 +28,9 @@ module Indexed
|
|
28
28
|
# Load first the bundle, then extract the config.
|
29
29
|
#
|
30
30
|
bundle.load
|
31
|
-
|
31
|
+
# TODO Move the to_f to the backend.
|
32
|
+
#
|
33
|
+
minimum = bundle[:location_minimum] && bundle[:location_minimum].to_f || raise("Configuration :location_minimum for #{bundle.identifier} missing. Did you run rake index already?")
|
32
34
|
@calculation.minimum = minimum
|
33
35
|
end
|
34
36
|
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
# encoding: utf-8
|
4
|
+
#
|
5
|
+
module Indexed
|
6
|
+
|
7
|
+
# TODO Spec
|
8
|
+
#
|
9
|
+
module Wrappers
|
10
|
+
|
11
|
+
# This index combines an exact and partial index.
|
12
|
+
# It serves to order the results such that exact hits are found first.
|
13
|
+
#
|
14
|
+
# TODO Need to use the right subtokens. Bake in?
|
15
|
+
#
|
16
|
+
class ExactFirst < Indexed::Bundle::Memory
|
17
|
+
|
18
|
+
delegate :similar,
|
19
|
+
:identifier,
|
20
|
+
:name,
|
21
|
+
:to => :@exact
|
22
|
+
delegate :index,
|
23
|
+
:category,
|
24
|
+
:weight,
|
25
|
+
:generate_partial_from,
|
26
|
+
:generate_caches_from_memory,
|
27
|
+
:generate_derived,
|
28
|
+
:dump,
|
29
|
+
:load,
|
30
|
+
:to => :@partial
|
31
|
+
|
32
|
+
def initialize category
|
33
|
+
@exact = category.exact
|
34
|
+
@partial = category.partial
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.wrap index_or_category
|
38
|
+
if index_or_category.respond_to? :categories
|
39
|
+
wrap_each_of index_or_category.categories
|
40
|
+
index_or_category
|
41
|
+
else
|
42
|
+
new index_or_category
|
43
|
+
end
|
44
|
+
end
|
45
|
+
# TODO Do not extract categories!
|
46
|
+
#
|
47
|
+
def self.wrap_each_of categories
|
48
|
+
categories.categories.collect! { |category| new(category) }
|
49
|
+
end
|
50
|
+
|
51
|
+
def ids text
|
52
|
+
@exact.ids(text) + @partial.ids(text)
|
53
|
+
end
|
54
|
+
|
55
|
+
def weight text
|
56
|
+
[@exact.weight(text) || 0, @partial.weight(text) || 0].max
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
File without changes
|
@@ -17,7 +17,7 @@ module Indexers
|
|
17
17
|
# Raise a no source exception.
|
18
18
|
#
|
19
19
|
def raise_no_source
|
20
|
-
raise NoSourceSpecifiedException.new("No source given for #{@configuration
|
20
|
+
raise NoSourceSpecifiedException.new("No source given for #{@configuration}.")
|
21
21
|
end
|
22
22
|
|
23
23
|
# Delegates the key format to the source.
|
@@ -65,7 +65,7 @@ module Indexers
|
|
65
65
|
end
|
66
66
|
end
|
67
67
|
def indexing_message
|
68
|
-
timed_exclaim "INDEX #{@configuration
|
68
|
+
timed_exclaim "INDEX #{@configuration}" # TODO from ...
|
69
69
|
end
|
70
70
|
|
71
71
|
end
|
File without changes
|