picky 2.5.2 → 2.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# Combinations are a number of Combination-s.
|
|
4
|
+
#
|
|
5
|
+
# They are the core of an allocation.
|
|
6
|
+
# An allocation consists of a number of combinations.
|
|
7
|
+
#
|
|
8
|
+
module Combinations # :nodoc:all
|
|
9
|
+
|
|
10
|
+
# Base Combinations contain methods for calculating score and ids.
|
|
11
|
+
#
|
|
12
|
+
class Base
|
|
13
|
+
|
|
14
|
+
attr_reader :combinations
|
|
15
|
+
|
|
16
|
+
delegate :empty?, :to => :@combinations
|
|
17
|
+
|
|
18
|
+
def initialize combinations = []
|
|
19
|
+
@combinations = combinations
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def hash
|
|
23
|
+
@combinations.hash
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Uses user specific weights to calculate a score for the combinations.
|
|
27
|
+
#
|
|
28
|
+
def calculate_score weights
|
|
29
|
+
total_score + weighted_score(weights)
|
|
30
|
+
end
|
|
31
|
+
def total_score
|
|
32
|
+
@combinations.sum &:weight
|
|
33
|
+
end
|
|
34
|
+
def weighted_score weights
|
|
35
|
+
weights.score @combinations
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Filters the tokens and identifiers such that only identifiers
|
|
39
|
+
# that are passed in, remain, including their tokens.
|
|
40
|
+
#
|
|
41
|
+
# Note: This method is not totally independent of the calculate_ids one.
|
|
42
|
+
# Since identifiers are only nullified, we need to not include the
|
|
43
|
+
# ids that have an associated identifier that is nil.
|
|
44
|
+
#
|
|
45
|
+
def keep identifiers = []
|
|
46
|
+
@combinations.reject! { |combination| !combination.in?(identifiers) }
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Filters the tokens and identifiers such that identifiers
|
|
50
|
+
# that are passed in, are removed, including their tokens.
|
|
51
|
+
#
|
|
52
|
+
# Note: This method is not totally independent of the calculate_ids one.
|
|
53
|
+
# Since identifiers are only nullified, we need to not include the
|
|
54
|
+
# ids that have an associated identifier that is nil.
|
|
55
|
+
#
|
|
56
|
+
def remove identifiers = []
|
|
57
|
+
@combinations.reject! { |combination| combination.in?(identifiers) }
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
#
|
|
61
|
+
#
|
|
62
|
+
def to_result
|
|
63
|
+
@combinations.map &:to_result
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
end
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# Combinations are a number of Combination-s.
|
|
4
|
+
#
|
|
5
|
+
# They are the core of an allocation.
|
|
6
|
+
# An allocation consists of a number of combinations.
|
|
7
|
+
#
|
|
8
|
+
module Combinations # :nodoc:all
|
|
9
|
+
|
|
10
|
+
# Memory Combinations contain specific methods for
|
|
11
|
+
# calculating score and ids in memory.
|
|
12
|
+
#
|
|
13
|
+
class Memory < Base
|
|
14
|
+
|
|
15
|
+
# Returns the result ids for the allocation.
|
|
16
|
+
#
|
|
17
|
+
# Sorts the ids by size and & through them in the following order (sizes):
|
|
18
|
+
# 0. [100_000, 400, 30, 2]
|
|
19
|
+
# 1. [2, 30, 400, 100_000]
|
|
20
|
+
# 2. (100_000 & (400 & (30 & 2))) # => result
|
|
21
|
+
#
|
|
22
|
+
# Note: Uses a C-optimized intersection routine (in performant.c)
|
|
23
|
+
# for speed and memory efficiency.
|
|
24
|
+
#
|
|
25
|
+
# Note: In the memory based version we ignore the (amount) needed hint.
|
|
26
|
+
# We cannot use the information to speed up the algorithm, unfortunately.
|
|
27
|
+
#
|
|
28
|
+
def ids _, _
|
|
29
|
+
return [] if @combinations.empty?
|
|
30
|
+
|
|
31
|
+
# Get the ids for each combination.
|
|
32
|
+
#
|
|
33
|
+
id_arrays = @combinations.inject([]) do |total, combination|
|
|
34
|
+
total << combination.ids
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Call the optimized C algorithm.
|
|
38
|
+
#
|
|
39
|
+
# Note: It orders the passed arrays by size.
|
|
40
|
+
#
|
|
41
|
+
Performant::Array.memory_efficient_intersect id_arrays
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
end
|
|
@@ -0,0 +1,86 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# Combinations are a number of Combination-s.
|
|
4
|
+
#
|
|
5
|
+
# They are the core of an allocation.
|
|
6
|
+
# An allocation consists of a number of combinations.
|
|
7
|
+
#
|
|
8
|
+
module Combinations # :nodoc:all
|
|
9
|
+
|
|
10
|
+
# Redis Combinations contain specific methods for
|
|
11
|
+
# calculating score and ids in memory.
|
|
12
|
+
#
|
|
13
|
+
class Redis < Base
|
|
14
|
+
|
|
15
|
+
# Connect to the backend.
|
|
16
|
+
#
|
|
17
|
+
# TODO Use specific Picky Redis wrapper.
|
|
18
|
+
#
|
|
19
|
+
def self.redis
|
|
20
|
+
@redis ||= ::Redis.new :db => 15
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
attr_reader :redis
|
|
24
|
+
|
|
25
|
+
#
|
|
26
|
+
#
|
|
27
|
+
def initialize combinations
|
|
28
|
+
super combinations
|
|
29
|
+
|
|
30
|
+
@redis = self.class.redis
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Returns the result ids for the allocation.
|
|
34
|
+
#
|
|
35
|
+
def ids amount, offset
|
|
36
|
+
return [] if @combinations.empty?
|
|
37
|
+
|
|
38
|
+
identifiers = @combinations.inject([]) do |identifiers, combination|
|
|
39
|
+
identifiers << "#{combination.identifier}"
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
result_id = generate_intermediate_result_id
|
|
43
|
+
|
|
44
|
+
# Intersect and store.
|
|
45
|
+
#
|
|
46
|
+
redis.zinterstore result_id, identifiers
|
|
47
|
+
|
|
48
|
+
# Get the stored result.
|
|
49
|
+
#
|
|
50
|
+
results = redis.zrange result_id, offset, (offset + amount)
|
|
51
|
+
|
|
52
|
+
# Delete the stored result as it was only for temporary purposes.
|
|
53
|
+
#
|
|
54
|
+
# Note: I could also not delete it, but that would not be clean at all.
|
|
55
|
+
#
|
|
56
|
+
redis.del result_id
|
|
57
|
+
|
|
58
|
+
results
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Generate a multiple host/process safe result id.
|
|
62
|
+
#
|
|
63
|
+
# Note: Generated when this class loads.
|
|
64
|
+
#
|
|
65
|
+
require 'socket'
|
|
66
|
+
def self.extract_host
|
|
67
|
+
@host ||= Socket.gethostname
|
|
68
|
+
end
|
|
69
|
+
def host
|
|
70
|
+
self.class.extract_host
|
|
71
|
+
end
|
|
72
|
+
extract_host
|
|
73
|
+
def pid
|
|
74
|
+
@pid ||= Process.pid
|
|
75
|
+
end
|
|
76
|
+
# Use the host and pid (generated lazily in child processes) for the result.
|
|
77
|
+
#
|
|
78
|
+
def generate_intermediate_result_id
|
|
79
|
+
:"#{host}:#{pid}:picky:result"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
end
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# The query indexes class bundles indexes given to a query.
|
|
4
|
+
#
|
|
5
|
+
# Example:
|
|
6
|
+
# # If you call
|
|
7
|
+
# Search.new dvd_index, mp3_index, video_index
|
|
8
|
+
#
|
|
9
|
+
# # What it does is take the three given (API-) indexes and
|
|
10
|
+
# # bundle them in an index bundle.
|
|
11
|
+
#
|
|
12
|
+
class Indexes
|
|
13
|
+
|
|
14
|
+
attr_reader :indexes
|
|
15
|
+
|
|
16
|
+
# Creates a new Query::Indexes.
|
|
17
|
+
#
|
|
18
|
+
# Its job is to generate all possible combinations.
|
|
19
|
+
# Note: We cannot mix memory and redis indexes just yet.
|
|
20
|
+
#
|
|
21
|
+
def initialize *indexes, combinations_type
|
|
22
|
+
@indexes = indexes
|
|
23
|
+
@combinations_type = combinations_type
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Returns a number of prepared (sorted, reduced etc.) allocations for the given tokens.
|
|
27
|
+
#
|
|
28
|
+
def prepared_allocations_for tokens, weights = {}
|
|
29
|
+
allocations = allocations_for tokens
|
|
30
|
+
|
|
31
|
+
# Remove double allocations.
|
|
32
|
+
#
|
|
33
|
+
allocations.uniq
|
|
34
|
+
|
|
35
|
+
# Score the allocations using weights as bias.
|
|
36
|
+
#
|
|
37
|
+
allocations.calculate_score weights
|
|
38
|
+
|
|
39
|
+
# Sort the allocations.
|
|
40
|
+
# (allocations are sorted according to score, highest to lowest)
|
|
41
|
+
#
|
|
42
|
+
allocations.sort!
|
|
43
|
+
|
|
44
|
+
# Reduce the amount of allocations.
|
|
45
|
+
#
|
|
46
|
+
# allocations.reduce_to some_amount
|
|
47
|
+
|
|
48
|
+
# Remove identifiers from allocations.
|
|
49
|
+
#
|
|
50
|
+
# allocations.remove some_array_of_identifiers_to_remove
|
|
51
|
+
|
|
52
|
+
allocations
|
|
53
|
+
end
|
|
54
|
+
# Returns a number of possible allocations for the given tokens.
|
|
55
|
+
#
|
|
56
|
+
def allocations_for tokens
|
|
57
|
+
Allocations.new allocations_ary_for(tokens)
|
|
58
|
+
end
|
|
59
|
+
def allocations_ary_for tokens
|
|
60
|
+
indexes.inject([]) do |allocations, index|
|
|
61
|
+
allocations + allocation_for(tokens, index)
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
def allocation_for tokens, index
|
|
65
|
+
# Expand the combinations.
|
|
66
|
+
#
|
|
67
|
+
possible_combinations = tokens.possible_combinations_in index
|
|
68
|
+
|
|
69
|
+
# Generate all possible combinations.
|
|
70
|
+
#
|
|
71
|
+
expanded_combinations = expand_combinations_from possible_combinations
|
|
72
|
+
|
|
73
|
+
# Add the wrapped possible allocations to the ones we already have.
|
|
74
|
+
#
|
|
75
|
+
expanded_combinations.map! do |expanded_combination|
|
|
76
|
+
Allocation.new @combinations_type.new(expanded_combination), index.result_identifier # TODO Do not extract result_identifier.
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
# This is the core of the search engine. No kidding.
|
|
81
|
+
#
|
|
82
|
+
# Gets an array of
|
|
83
|
+
# [
|
|
84
|
+
# [<combinations for token1>],
|
|
85
|
+
# [<combinations for token2>],
|
|
86
|
+
# [<combinations for token3>]
|
|
87
|
+
# ]
|
|
88
|
+
#
|
|
89
|
+
# Generates all possible allocations of combinations.
|
|
90
|
+
# [
|
|
91
|
+
# [first combination of token1, first c of t2, first c of t3],
|
|
92
|
+
# [first combination of token1, first c of t2, second c of t3]
|
|
93
|
+
# ...
|
|
94
|
+
# ]
|
|
95
|
+
#
|
|
96
|
+
# Generates all possible combinations of array elements:
|
|
97
|
+
# [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
|
|
98
|
+
# Note: Also calculates the weights and sorts them accordingly.
|
|
99
|
+
#
|
|
100
|
+
# Note: This is a heavily optimized ruby version.
|
|
101
|
+
#
|
|
102
|
+
# Works like this:
|
|
103
|
+
# [1,2,3], [a,b,c], [k,l,m] are expanded to
|
|
104
|
+
# group mult: 1
|
|
105
|
+
# <- single mult ->
|
|
106
|
+
# [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
|
|
107
|
+
# group mult: 3
|
|
108
|
+
# <- -> s/m
|
|
109
|
+
# [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
|
|
110
|
+
# group mult: 9
|
|
111
|
+
# <> s/m
|
|
112
|
+
# [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
|
|
113
|
+
#
|
|
114
|
+
# It is then recombined, where
|
|
115
|
+
# [
|
|
116
|
+
# [a,a,b,b,c,c]
|
|
117
|
+
# [d,e,d,e,d,e]
|
|
118
|
+
# ]
|
|
119
|
+
# becomes
|
|
120
|
+
# [
|
|
121
|
+
# [a,d],
|
|
122
|
+
# [a,e],
|
|
123
|
+
# [b,d],
|
|
124
|
+
# [b,e],
|
|
125
|
+
# [c,d],
|
|
126
|
+
# [c,e]
|
|
127
|
+
# ]
|
|
128
|
+
#
|
|
129
|
+
# Note: Not using transpose as it is slower.
|
|
130
|
+
#
|
|
131
|
+
# Returns nil if there are no combinations.
|
|
132
|
+
#
|
|
133
|
+
# Note: Of course I could split this method up into smaller
|
|
134
|
+
# ones, but I guess I am a bit sentimental.
|
|
135
|
+
#
|
|
136
|
+
def expand_combinations_from possible_combinations
|
|
137
|
+
# If an element has size 0, this means one of the
|
|
138
|
+
# tokens could not be allocated.
|
|
139
|
+
#
|
|
140
|
+
return [] if possible_combinations.any?(&:empty?)
|
|
141
|
+
|
|
142
|
+
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
|
143
|
+
#
|
|
144
|
+
single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
|
|
145
|
+
|
|
146
|
+
# Initialize a group multiplicator.
|
|
147
|
+
#
|
|
148
|
+
group_mult = 1
|
|
149
|
+
|
|
150
|
+
# The expanding part to line up the combinations
|
|
151
|
+
# for later combination in allocations.
|
|
152
|
+
#
|
|
153
|
+
possible_combinations.collect! do |combinations|
|
|
154
|
+
|
|
155
|
+
# Get the size of the combinations of the first token.
|
|
156
|
+
#
|
|
157
|
+
combinations_size = combinations.size
|
|
158
|
+
|
|
159
|
+
# Special case: If there is no combination for one of the tokens.
|
|
160
|
+
# In that case, we just use the same single mult for
|
|
161
|
+
# the next iteration.
|
|
162
|
+
# If there are combinations, we divide the single mult
|
|
163
|
+
# by the number of combinations.
|
|
164
|
+
#
|
|
165
|
+
single_mult /= combinations_size unless combinations_size.zero?
|
|
166
|
+
|
|
167
|
+
# Expand each combination by the single mult:
|
|
168
|
+
# [a,b,c]
|
|
169
|
+
# [a,a,a, b,b,b, c,c,c]
|
|
170
|
+
# Then, expand the result by the group mult:
|
|
171
|
+
# [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
|
|
172
|
+
#
|
|
173
|
+
combinations = combinations.inject([]) do |total, combination|
|
|
174
|
+
total + Array.new(single_mult, combination)
|
|
175
|
+
end * group_mult
|
|
176
|
+
|
|
177
|
+
# Multiply the group mult by the combinations size,
|
|
178
|
+
# since the next combinations' single mult is smaller
|
|
179
|
+
# and we need to adjust for that.
|
|
180
|
+
#
|
|
181
|
+
group_mult = group_mult * combinations_size
|
|
182
|
+
|
|
183
|
+
# Return the combinations.
|
|
184
|
+
#
|
|
185
|
+
combinations
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
return [] if possible_combinations.empty?
|
|
189
|
+
|
|
190
|
+
possible_combinations.shift.zip *possible_combinations
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
#
|
|
3
|
+
module Query
|
|
4
|
+
|
|
5
|
+
# A single qualifier.
|
|
6
|
+
#
|
|
7
|
+
class Qualifier # :nodoc:all
|
|
8
|
+
|
|
9
|
+
attr_reader :normalized_qualifier, :codes
|
|
10
|
+
|
|
11
|
+
#
|
|
12
|
+
#
|
|
13
|
+
# codes is an array.
|
|
14
|
+
#
|
|
15
|
+
def initialize normalized_qualifier, codes
|
|
16
|
+
@normalized_qualifier = normalized_qualifier
|
|
17
|
+
@codes = codes.map &:to_sym
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Will overwrite if the key is present in the hash.
|
|
21
|
+
#
|
|
22
|
+
def inject_into hash
|
|
23
|
+
codes.each do |code|
|
|
24
|
+
hash[code] = normalized_qualifier
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Collection class for qualifiers.
|
|
31
|
+
#
|
|
32
|
+
class Qualifiers # :nodoc:all
|
|
33
|
+
|
|
34
|
+
attr_reader :qualifiers, :normalization_mapping
|
|
35
|
+
|
|
36
|
+
delegate :<<, :to => :qualifiers
|
|
37
|
+
|
|
38
|
+
#
|
|
39
|
+
#
|
|
40
|
+
def initialize
|
|
41
|
+
@qualifiers = []
|
|
42
|
+
@normalization_mapping = {}
|
|
43
|
+
end
|
|
44
|
+
def self.instance
|
|
45
|
+
@instanec ||= new
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# TODO Spec.
|
|
49
|
+
#
|
|
50
|
+
def self.add name, qualifiers
|
|
51
|
+
instance << Qualifier.new(name, qualifiers)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
# Uses the qualifiers to prepare (optimize) the qualifier handling.
|
|
55
|
+
#
|
|
56
|
+
def prepare
|
|
57
|
+
qualifiers.each do |qualifier|
|
|
58
|
+
qualifier.inject_into normalization_mapping
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Normalizes the given qualifier.
|
|
63
|
+
#
|
|
64
|
+
# Returns nil if it is not allowed, the normalized qualifier if it is.
|
|
65
|
+
#
|
|
66
|
+
# Note: Normalizes.
|
|
67
|
+
#
|
|
68
|
+
def normalize qualifier
|
|
69
|
+
return nil if qualifier.blank?
|
|
70
|
+
|
|
71
|
+
normalization_mapping[qualifier.to_sym]
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
end
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
module Query
|
|
2
|
+
|
|
3
|
+
# This is a query token. Together with other tokens it makes up a query.
|
|
4
|
+
#
|
|
5
|
+
# It remembers the original form, and and a normalized form.
|
|
6
|
+
#
|
|
7
|
+
# It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
|
|
8
|
+
#
|
|
9
|
+
class Token # :nodoc:all
|
|
10
|
+
|
|
11
|
+
attr_reader :text, :original
|
|
12
|
+
attr_writer :similar
|
|
13
|
+
|
|
14
|
+
delegate :blank?, :to => :text
|
|
15
|
+
|
|
16
|
+
# Normal initializer.
|
|
17
|
+
#
|
|
18
|
+
# Note: Use this if you do not want a qualified and normalized token.
|
|
19
|
+
#
|
|
20
|
+
def initialize text
|
|
21
|
+
@text = text
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Returns a qualified and normalized token.
|
|
25
|
+
#
|
|
26
|
+
# Note: Use this in the search engine if you need a qualified
|
|
27
|
+
# and normalized token. I.e. one prepared for a search.
|
|
28
|
+
#
|
|
29
|
+
def self.processed text, downcase = true
|
|
30
|
+
new(text).process downcase
|
|
31
|
+
end
|
|
32
|
+
def process downcases = true
|
|
33
|
+
qualify
|
|
34
|
+
extract_original
|
|
35
|
+
downcase if downcases
|
|
36
|
+
partialize
|
|
37
|
+
similarize
|
|
38
|
+
remove_illegals
|
|
39
|
+
symbolize
|
|
40
|
+
self
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
# This returns an array of predefined category names if the user has given any.
|
|
44
|
+
#
|
|
45
|
+
def user_defined_category_names
|
|
46
|
+
@qualifiers
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
# Extracts a qualifier for this token and pre-assigns an allocation.
|
|
50
|
+
#
|
|
51
|
+
# Note: Removes the qualifier if it is not allowed.
|
|
52
|
+
#
|
|
53
|
+
def qualify
|
|
54
|
+
@qualifiers, @text = split @text
|
|
55
|
+
@qualifiers && @qualifiers.collect! { |qualifier| Query::Qualifiers.instance.normalize qualifier }.compact!
|
|
56
|
+
@qualifiers
|
|
57
|
+
end
|
|
58
|
+
def extract_original
|
|
59
|
+
@original = @text.dup
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Downcases the text.
|
|
63
|
+
#
|
|
64
|
+
def downcase
|
|
65
|
+
@text.downcase!
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
# Partial is a conditional setter.
|
|
69
|
+
#
|
|
70
|
+
# It is only settable if it hasn't been set yet.
|
|
71
|
+
#
|
|
72
|
+
def partial= partial
|
|
73
|
+
@partial = partial if @partial.nil?
|
|
74
|
+
end
|
|
75
|
+
def partial?
|
|
76
|
+
!@similar && @partial
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
# If the text ends with *, partialize it. If with ", don't.
|
|
80
|
+
#
|
|
81
|
+
# The latter wins. So "hello*" will not be partially searched.
|
|
82
|
+
#
|
|
83
|
+
@@no_partial = /\"\Z/
|
|
84
|
+
@@partial = /\*\Z/
|
|
85
|
+
def partialize
|
|
86
|
+
self.partial = false and return unless @text !~ @@no_partial
|
|
87
|
+
self.partial = true unless @text !~ @@partial
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# If the text ends with ~ similarize it. If with ", don't.
|
|
91
|
+
#
|
|
92
|
+
# The latter wins.
|
|
93
|
+
#
|
|
94
|
+
@@no_similar = /\"\Z/
|
|
95
|
+
@@similar = /\~\Z/
|
|
96
|
+
def similarize
|
|
97
|
+
self.similar = false and return if @text =~ @@no_similar
|
|
98
|
+
self.similar = true if @text =~ @@similar
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
def similar?
|
|
102
|
+
@similar
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Normalizes this token's text.
|
|
106
|
+
#
|
|
107
|
+
@@illegals = /["*~]/
|
|
108
|
+
def remove_illegals
|
|
109
|
+
@text.gsub! @@illegals, '' unless @text.blank?
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
#
|
|
113
|
+
#
|
|
114
|
+
def symbolize
|
|
115
|
+
@text = @text.to_sym
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Returns an array of possible combinations.
|
|
119
|
+
#
|
|
120
|
+
def possible_combinations_in index
|
|
121
|
+
index.possible_combinations self
|
|
122
|
+
end
|
|
123
|
+
|
|
124
|
+
# Returns a token with the next similar text.
|
|
125
|
+
#
|
|
126
|
+
# TODO Rewrite this. It is hard to understand. Also spec performance.
|
|
127
|
+
#
|
|
128
|
+
def next_similar_token category
|
|
129
|
+
token = self.dup
|
|
130
|
+
token if token.next_similar category.bundle_for(token)
|
|
131
|
+
end
|
|
132
|
+
# Sets and returns the next similar word.
|
|
133
|
+
#
|
|
134
|
+
# Note: Also overrides the original.
|
|
135
|
+
#
|
|
136
|
+
def next_similar bundle
|
|
137
|
+
@text = @original = (similarity(bundle).shift || return) if similar?
|
|
138
|
+
end
|
|
139
|
+
# Lazy similar reader.
|
|
140
|
+
#
|
|
141
|
+
def similarity bundle = nil
|
|
142
|
+
@similarity || @similarity = generate_similarity_for(bundle)
|
|
143
|
+
end
|
|
144
|
+
# Returns an enumerator that traverses over the similar.
|
|
145
|
+
#
|
|
146
|
+
# Note: The dup isn't too nice – since it is needed on account of the shift, above.
|
|
147
|
+
# (We avoid a StopIteration exception. Which of both is less evil?)
|
|
148
|
+
#
|
|
149
|
+
def generate_similarity_for bundle
|
|
150
|
+
bundle.similar(@text).dup || []
|
|
151
|
+
end
|
|
152
|
+
|
|
153
|
+
#
|
|
154
|
+
#
|
|
155
|
+
def to_result
|
|
156
|
+
[@original, @text]
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# Internal identifier.
|
|
160
|
+
#
|
|
161
|
+
def identifier
|
|
162
|
+
"#{similar?? :similarity : :index}:#{@text}"
|
|
163
|
+
end
|
|
164
|
+
|
|
165
|
+
# If the originals & the text are the same, they are the same.
|
|
166
|
+
#
|
|
167
|
+
def == other
|
|
168
|
+
self.original == other.original && self.text == other.text
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# Displays the qualifier text and the text, joined.
|
|
172
|
+
#
|
|
173
|
+
# e.g. name:meier
|
|
174
|
+
#
|
|
175
|
+
@@split_qualifier_text = ':'
|
|
176
|
+
@@split_qualifiers = ','
|
|
177
|
+
def to_s
|
|
178
|
+
[@qualifiers && @qualifiers.join(@@split_qualifiers), @text].compact.join @@split_qualifier_text
|
|
179
|
+
end
|
|
180
|
+
|
|
181
|
+
private
|
|
182
|
+
|
|
183
|
+
# Splits text into a qualifier and text.
|
|
184
|
+
#
|
|
185
|
+
# Returns [qualifier, text].
|
|
186
|
+
#
|
|
187
|
+
def split unqualified_text
|
|
188
|
+
qualifiers, text = (unqualified_text || '').split(@@split_qualifier_text, 2)
|
|
189
|
+
if text.blank?
|
|
190
|
+
[nil, (qualifiers || '')]
|
|
191
|
+
else
|
|
192
|
+
[qualifiers.split(@@split_qualifiers), text]
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
end
|