picky 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
@@ -0,0 +1,137 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Query
|
4
|
+
# Container class for allocations.
|
5
|
+
#
|
6
|
+
class Allocations # :nodoc:all
|
7
|
+
|
8
|
+
# TODO Remove size
|
9
|
+
#
|
10
|
+
delegate :each, :inject, :empty?, :size, :to => :@allocations
|
11
|
+
attr_reader :total
|
12
|
+
|
13
|
+
def initialize allocations = []
|
14
|
+
@allocations = allocations
|
15
|
+
end
|
16
|
+
|
17
|
+
# Score each allocation.
|
18
|
+
#
|
19
|
+
def calculate_score weights
|
20
|
+
@allocations.each do |allocation|
|
21
|
+
allocation.calculate_score weights
|
22
|
+
end
|
23
|
+
end
|
24
|
+
# Sort the allocations.
|
25
|
+
#
|
26
|
+
def sort
|
27
|
+
@allocations.sort!
|
28
|
+
end
|
29
|
+
|
30
|
+
# Reduces the amount of allocations to x.
|
31
|
+
#
|
32
|
+
def reduce_to amount
|
33
|
+
@allocations = @allocations.shift amount
|
34
|
+
end
|
35
|
+
|
36
|
+
# Keeps combinations.
|
37
|
+
#
|
38
|
+
# Only those passed in remain.
|
39
|
+
#
|
40
|
+
def keep identifiers = []
|
41
|
+
@allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
|
42
|
+
end
|
43
|
+
# Removes combinations.
|
44
|
+
#
|
45
|
+
# Only those passed in are removed.
|
46
|
+
#
|
47
|
+
# TODO Rewrite.
|
48
|
+
#
|
49
|
+
def remove identifiers = []
|
50
|
+
@allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Returns the top amount ids.
|
54
|
+
#
|
55
|
+
def ids amount = 20
|
56
|
+
@allocations.inject([]) do |total, allocation|
|
57
|
+
total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# Returns a random id from the allocations.
|
62
|
+
#
|
63
|
+
# Note: This is an ok algorithm for small id sets.
|
64
|
+
#
|
65
|
+
# But still TODO try for a faster one.
|
66
|
+
#
|
67
|
+
# TODO For the 1 amount random case this needs to be improved.
|
68
|
+
#
|
69
|
+
def random_ids amount = 1
|
70
|
+
return [] if @allocations.empty?
|
71
|
+
ids = @allocations.first.ids
|
72
|
+
indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
|
73
|
+
indexes.first(amount).map { |id| ids[id] }
|
74
|
+
end
|
75
|
+
|
76
|
+
# This is the main method of this class that will replace ids and count.
|
77
|
+
#
|
78
|
+
# What it does is calculate the ids and counts of its allocations
|
79
|
+
# for being used in the results. It also calculates the total
|
80
|
+
#
|
81
|
+
# Parameters:
|
82
|
+
# * amount: the amount of ids to calculate
|
83
|
+
# * offset: the offset from where in the result set to take the ids
|
84
|
+
#
|
85
|
+
# Note: With an amount of 0, an offset > 0 doesn't make much
|
86
|
+
# sense, as seen in the live search.
|
87
|
+
#
|
88
|
+
# Note: Each allocation caches its count, but not its ids (thrown away).
|
89
|
+
# The ids are cached in this class.
|
90
|
+
#
|
91
|
+
# Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
|
92
|
+
#
|
93
|
+
def process! amount, offset = 0
|
94
|
+
@total = 0
|
95
|
+
current_offset = 0
|
96
|
+
@allocations.each do |allocation|
|
97
|
+
ids = allocation.process! amount, offset
|
98
|
+
@total = @total + allocation.count # the total mixed in
|
99
|
+
if ids.empty?
|
100
|
+
offset = offset - allocation.count unless offset.zero?
|
101
|
+
else
|
102
|
+
amount = amount - ids.size # we need less results from the following allocation
|
103
|
+
offset = 0 # we have already passed the offset
|
104
|
+
end
|
105
|
+
end
|
106
|
+
end
|
107
|
+
|
108
|
+
def uniq
|
109
|
+
@allocations.uniq!
|
110
|
+
end
|
111
|
+
|
112
|
+
def to_a
|
113
|
+
@allocations
|
114
|
+
end
|
115
|
+
|
116
|
+
# Simply inspects the internal allocations.
|
117
|
+
#
|
118
|
+
def to_s
|
119
|
+
@allocations.inspect
|
120
|
+
end
|
121
|
+
|
122
|
+
# Allocations for results are in the form:
|
123
|
+
# [
|
124
|
+
# allocation1.to_result,
|
125
|
+
# allocation2.to_result
|
126
|
+
# ...
|
127
|
+
# ]
|
128
|
+
#
|
129
|
+
def to_result
|
130
|
+
@allocations.map(&:to_result).compact
|
131
|
+
end
|
132
|
+
|
133
|
+
end
|
134
|
+
|
135
|
+
end
|
136
|
+
|
137
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Query
|
4
|
+
|
5
|
+
# Describes the combination of a token (the text) and
|
6
|
+
# the index (the bundle): [text, index_bundle]
|
7
|
+
#
|
8
|
+
# A combination is a single part of an allocation:
|
9
|
+
# [..., [text2, index_bundle2], ...]
|
10
|
+
#
|
11
|
+
# An allocation consists of a number of combinations:
|
12
|
+
# [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
|
13
|
+
#
|
14
|
+
class Combination # :nodoc:all
|
15
|
+
|
16
|
+
attr_reader :token, :bundle, :category_name
|
17
|
+
|
18
|
+
def initialize token, category
|
19
|
+
@token = token
|
20
|
+
@category_name = category.name
|
21
|
+
@bundle = category.bundle_for token
|
22
|
+
@text = @token.text # don't want to use reset_similar already
|
23
|
+
end
|
24
|
+
|
25
|
+
# Note: Required for uniq!
|
26
|
+
#
|
27
|
+
def hash
|
28
|
+
[@token.to_s, @bundle].hash
|
29
|
+
end
|
30
|
+
|
31
|
+
# Returns the weight of this combination.
|
32
|
+
#
|
33
|
+
# Note: Caching is most of the time useful.
|
34
|
+
#
|
35
|
+
def weight
|
36
|
+
@weight ||= @bundle.weight(@text)
|
37
|
+
end
|
38
|
+
|
39
|
+
# Returns an array of ids for the given text.
|
40
|
+
#
|
41
|
+
# Note: Caching is most of the time useful.
|
42
|
+
#
|
43
|
+
def ids
|
44
|
+
@ids ||= @bundle.ids(@text)
|
45
|
+
end
|
46
|
+
|
47
|
+
# The identifier for this combination.
|
48
|
+
#
|
49
|
+
def identifier
|
50
|
+
"#{bundle.identifier}:#{@token.identifier}"
|
51
|
+
end
|
52
|
+
|
53
|
+
# Is the identifier in the given identifiers?
|
54
|
+
#
|
55
|
+
def in? identifiers
|
56
|
+
identifiers.include? identifier
|
57
|
+
end
|
58
|
+
|
59
|
+
# Combines the category names with the original names.
|
60
|
+
# [
|
61
|
+
# [:title, 'Flarbl', :flarbl],
|
62
|
+
# [:category, 'Gnorf', :gnorf]
|
63
|
+
# ]
|
64
|
+
#
|
65
|
+
def to_result
|
66
|
+
[@category_name, *@token.to_result]
|
67
|
+
end
|
68
|
+
|
69
|
+
# Example:
|
70
|
+
# "exact title:Peter*:peter"
|
71
|
+
#
|
72
|
+
def to_s
|
73
|
+
"#{bundle.identifier} #{to_result.join(':')}"
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Query
|
4
|
+
|
5
|
+
# Combinations are a number of Combination-s.
|
6
|
+
#
|
7
|
+
# They are the core of an allocation.
|
8
|
+
# An allocation consists of a number of combinations.
|
9
|
+
#
|
10
|
+
module Combinations # :nodoc:all
|
11
|
+
|
12
|
+
# Base Combinations contain methods for calculating score and ids.
|
13
|
+
#
|
14
|
+
class Base
|
15
|
+
|
16
|
+
attr_reader :combinations
|
17
|
+
|
18
|
+
delegate :empty?, :to => :@combinations
|
19
|
+
|
20
|
+
def initialize combinations = []
|
21
|
+
@combinations = combinations
|
22
|
+
end
|
23
|
+
|
24
|
+
def hash
|
25
|
+
@combinations.hash
|
26
|
+
end
|
27
|
+
|
28
|
+
# Uses user specific weights to calculate a score for the combinations.
|
29
|
+
#
|
30
|
+
def calculate_score weights
|
31
|
+
total_score + weighted_score(weights)
|
32
|
+
end
|
33
|
+
def total_score
|
34
|
+
@combinations.sum &:weight
|
35
|
+
end
|
36
|
+
def weighted_score weights
|
37
|
+
weights.score @combinations
|
38
|
+
end
|
39
|
+
|
40
|
+
# Wrap the combinations into an allocation with the result_identifier.
|
41
|
+
#
|
42
|
+
def pack_into_allocation result_identifier
|
43
|
+
Allocation.new self, result_identifier
|
44
|
+
end
|
45
|
+
|
46
|
+
# Filters the tokens and identifiers such that only identifiers
|
47
|
+
# that are passed in, remain, including their tokens.
|
48
|
+
#
|
49
|
+
# Note: This method is not totally independent of the calculate_ids one.
|
50
|
+
# Since identifiers are only nullified, we need to not include the
|
51
|
+
# ids that have an associated identifier that is nil.
|
52
|
+
#
|
53
|
+
def keep identifiers = []
|
54
|
+
# TODO Rewrite to use the category!!!
|
55
|
+
#
|
56
|
+
@combinations.reject! { |combination| !combination.in?(identifiers) }
|
57
|
+
end
|
58
|
+
|
59
|
+
# Filters the tokens and identifiers such that identifiers
|
60
|
+
# that are passed in, are removed, including their tokens.
|
61
|
+
#
|
62
|
+
# Note: This method is not totally independent of the calculate_ids one.
|
63
|
+
# Since identifiers are only nullified, we need to not include the
|
64
|
+
# ids that have an associated identifier that is nil.
|
65
|
+
#
|
66
|
+
def remove identifiers = []
|
67
|
+
# TODO Rewrite to use the category!!!
|
68
|
+
#
|
69
|
+
@combinations.reject! { |combination| combination.in?(identifiers) }
|
70
|
+
end
|
71
|
+
|
72
|
+
#
|
73
|
+
#
|
74
|
+
def to_result
|
75
|
+
@combinations.map &:to_result
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
|
80
|
+
end
|
81
|
+
|
82
|
+
end
|
83
|
+
|
84
|
+
end
|
@@ -0,0 +1,58 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Query
|
4
|
+
|
5
|
+
# Combinations are a number of Combination-s.
|
6
|
+
#
|
7
|
+
# They are the core of an allocation.
|
8
|
+
# An allocation consists of a number of combinations.
|
9
|
+
#
|
10
|
+
module Combinations # :nodoc:all
|
11
|
+
|
12
|
+
# Memory Combinations contain specific methods for
|
13
|
+
# calculating score and ids in memory.
|
14
|
+
#
|
15
|
+
class Memory < Base
|
16
|
+
|
17
|
+
# Returns the result ids for the allocation.
|
18
|
+
#
|
19
|
+
# Sorts the ids by size and & through them in the following order (sizes):
|
20
|
+
# 0. [100_000, 400, 30, 2]
|
21
|
+
# 1. [2, 30, 400, 100_000]
|
22
|
+
# 2. (100_000 & (400 & (30 & 2))) # => result
|
23
|
+
#
|
24
|
+
# Note: Uses a C-optimized intersection routine for speed and memory efficiency.
|
25
|
+
#
|
26
|
+
# Note: In the memory based version we ignore the (amount) needed hint.
|
27
|
+
# TODO Not ignore it?
|
28
|
+
#
|
29
|
+
def ids _, _
|
30
|
+
return [] if @combinations.empty?
|
31
|
+
|
32
|
+
# Get the ids for each combination.
|
33
|
+
#
|
34
|
+
# TODO For combinations with Redis
|
35
|
+
#
|
36
|
+
id_arrays = @combinations.inject([]) do |total, combination|
|
37
|
+
total << combination.ids
|
38
|
+
end
|
39
|
+
|
40
|
+
# Order by smallest size first such that the intersect can be performed faster.
|
41
|
+
#
|
42
|
+
# TODO Move into the memory_efficient_intersect such that
|
43
|
+
# this precondition for a fast algorithm is always given.
|
44
|
+
#
|
45
|
+
id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
|
46
|
+
|
47
|
+
# Call the optimized C algorithm.
|
48
|
+
#
|
49
|
+
Performant::Array.memory_efficient_intersect id_arrays
|
50
|
+
end
|
51
|
+
|
52
|
+
end
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Query
|
4
|
+
|
5
|
+
# Combinations are a number of Combination-s.
|
6
|
+
#
|
7
|
+
# They are the core of an allocation.
|
8
|
+
# An allocation consists of a number of combinations.
|
9
|
+
#
|
10
|
+
module Combinations # :nodoc:all
|
11
|
+
|
12
|
+
# Redis Combinations contain specific methods for
|
13
|
+
# calculating score and ids in memory.
|
14
|
+
#
|
15
|
+
class Redis < Base
|
16
|
+
|
17
|
+
# TODO Err… yeah. Wrap in Picky specific wrapper.
|
18
|
+
#
|
19
|
+
def initialize combinations
|
20
|
+
super combinations
|
21
|
+
|
22
|
+
@@redis ||= ::Redis.new
|
23
|
+
end
|
24
|
+
|
25
|
+
# Returns the result ids for the allocation.
|
26
|
+
#
|
27
|
+
def ids amount, offset
|
28
|
+
return [] if @combinations.empty?
|
29
|
+
|
30
|
+
identifiers = @combinations.inject([]) do |identifiers, combination|
|
31
|
+
identifiers << "#{combination.identifier}"
|
32
|
+
end
|
33
|
+
|
34
|
+
result_id = generate_intermediate_result_id
|
35
|
+
|
36
|
+
# TODO multi?
|
37
|
+
#
|
38
|
+
|
39
|
+
@@redis.zinterstore result_id, identifiers
|
40
|
+
|
41
|
+
@@redis.zrange result_id, offset, (offset + amount)
|
42
|
+
end
|
43
|
+
|
44
|
+
# Generate a multiple host/process safe result id.
|
45
|
+
#
|
46
|
+
# TODO How expensive is Process.pid? If it changes once, remember forever?
|
47
|
+
#
|
48
|
+
def generate_intermediate_result_id
|
49
|
+
# TODO host -> extract host.
|
50
|
+
:"host:#{Process.pid}:picky:result"
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
@@ -0,0 +1,180 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Query
|
4
|
+
|
5
|
+
# The query indexes class bundles indexes given to a query.
|
6
|
+
#
|
7
|
+
# Example:
|
8
|
+
# # If you call
|
9
|
+
# Query::Full.new dvd_index, mp3_index, video_index
|
10
|
+
#
|
11
|
+
# # What it does is take the three given (API-) indexes and
|
12
|
+
# # bundle them in an index bundle.
|
13
|
+
#
|
14
|
+
class Indexes
|
15
|
+
|
16
|
+
attr_reader :indexes
|
17
|
+
|
18
|
+
# Creates a new Query::Indexes.
|
19
|
+
#
|
20
|
+
# Its job is to generate all possible combinations, but also
|
21
|
+
# checking whether the query indexes are all of the same type.
|
22
|
+
# Note: We cannot mix memory and redis indexes just yet.
|
23
|
+
#
|
24
|
+
def initialize *index_definitions, combinations_type
|
25
|
+
@combinations_type = combinations_type
|
26
|
+
@indexes = index_definitions.map &:indexed
|
27
|
+
end
|
28
|
+
|
29
|
+
# Returns a number of possible allocations for the given tokens.
|
30
|
+
#
|
31
|
+
def allocations_for tokens
|
32
|
+
Allocations.new(indexes.inject([]) do |previous_allocations, index|
|
33
|
+
# Expand the combinations.
|
34
|
+
#
|
35
|
+
possible_combinations = tokens.possible_combinations_in index
|
36
|
+
|
37
|
+
# Optimization for ignoring tokens that allocate to nothing and
|
38
|
+
# can be ignored.
|
39
|
+
# For example in a special search, where "florian" is not
|
40
|
+
# mapped to any category.
|
41
|
+
#
|
42
|
+
possible_combinations.compact!
|
43
|
+
|
44
|
+
# Generate all possible combinations.
|
45
|
+
#
|
46
|
+
expanded_combinations = expand_combinations_from possible_combinations
|
47
|
+
|
48
|
+
# If there are none, try the next allocation.
|
49
|
+
#
|
50
|
+
next previous_allocations unless expanded_combinations
|
51
|
+
|
52
|
+
# Add the wrapped possible allocations to the ones we already have.
|
53
|
+
#
|
54
|
+
previous_allocations + expanded_combinations.map! do |expanded_combination|
|
55
|
+
# TODO Insert Redis here?
|
56
|
+
#
|
57
|
+
@combinations_type.new(expanded_combination).pack_into_allocation(index.result_identifier) # TODO Do not extract result_identifier. Remove pack_into_allocation.
|
58
|
+
end
|
59
|
+
end)
|
60
|
+
end
|
61
|
+
|
62
|
+
# This is the core of the search engine.
|
63
|
+
#
|
64
|
+
# Gets an array of
|
65
|
+
# [
|
66
|
+
# [<combinations for token1>],
|
67
|
+
# [<combinations for token2>],
|
68
|
+
# [<combinations for token3>]
|
69
|
+
# ]
|
70
|
+
#
|
71
|
+
# Generates all possible allocations of combinations.
|
72
|
+
# [
|
73
|
+
# [first combination of token1, first c of t2, first c of t3],
|
74
|
+
# [first combination of token1, first c of t2, second c of t3]
|
75
|
+
# ...
|
76
|
+
# ]
|
77
|
+
#
|
78
|
+
# Generates all possible combinations of array elements:
|
79
|
+
# [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
|
80
|
+
# Note: Also calculates the weights and sorts them accordingly.
|
81
|
+
#
|
82
|
+
# Note: This is a heavily optimized ruby version.
|
83
|
+
#
|
84
|
+
# Works like this:
|
85
|
+
# [1,2,3], [a,b,c], [k,l,m] are expanded to
|
86
|
+
# group mult: 1
|
87
|
+
# <- single mult ->
|
88
|
+
# [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
|
89
|
+
# group mult: 3
|
90
|
+
# <- -> s/m
|
91
|
+
# [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
|
92
|
+
# group mult: 9
|
93
|
+
# <> s/m
|
94
|
+
# [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
|
95
|
+
#
|
96
|
+
# It is then recombined, where
|
97
|
+
# [
|
98
|
+
# [a,a,b,b,c,c]
|
99
|
+
# [d,e,d,e,d,e]
|
100
|
+
# ]
|
101
|
+
# becomes
|
102
|
+
# [
|
103
|
+
# [a,d],
|
104
|
+
# [a,e],
|
105
|
+
# [b,d],
|
106
|
+
# [b,e],
|
107
|
+
# [c,d],
|
108
|
+
# [c,e]
|
109
|
+
# ]
|
110
|
+
#
|
111
|
+
# Note: Not using transpose as it is slower.
|
112
|
+
#
|
113
|
+
# Returns nil if there are no combinations.
|
114
|
+
#
|
115
|
+
# Note: Of course I could split this method up into smaller
|
116
|
+
# ones, but I guess I am a bit sentimental.
|
117
|
+
#
|
118
|
+
def expand_combinations_from possible_combinations
|
119
|
+
return if possible_combinations.any?(&:empty?)
|
120
|
+
|
121
|
+
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
122
|
+
#
|
123
|
+
# TODO How does this work if an element has size 0? Since below we account for size 0.
|
124
|
+
# Should we even continue if an element has size 0?
|
125
|
+
# This means one of the tokens cannot be allocated.
|
126
|
+
#
|
127
|
+
single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
|
128
|
+
|
129
|
+
# Initialize a group multiplicator.
|
130
|
+
#
|
131
|
+
group_mult = 1
|
132
|
+
|
133
|
+
# The expanding part to line up the combinations
|
134
|
+
# for later combination in allocations.
|
135
|
+
#
|
136
|
+
possible_combinations.collect! do |combinations|
|
137
|
+
|
138
|
+
# Get the size of the combinations of the first token.
|
139
|
+
#
|
140
|
+
combinations_size = combinations.size
|
141
|
+
|
142
|
+
# Special case: If there is no combination for one of the tokens.
|
143
|
+
# In that case, we just use the same single mult for
|
144
|
+
# the next iteration.
|
145
|
+
# If there are combinations, we divide the single mult
|
146
|
+
# by the number of combinations.
|
147
|
+
#
|
148
|
+
single_mult /= combinations_size unless combinations_size.zero?
|
149
|
+
|
150
|
+
# Expand each combination by the single mult:
|
151
|
+
# [a,b,c]
|
152
|
+
# [a,a,a, b,b,b, c,c,c]
|
153
|
+
# Then, expand the result by the group mult:
|
154
|
+
# [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
|
155
|
+
#
|
156
|
+
combinations = combinations.inject([]) do |total, combination|
|
157
|
+
total + Array.new(single_mult, combination)
|
158
|
+
end * group_mult
|
159
|
+
|
160
|
+
# Multiply the group mult by the combinations size,
|
161
|
+
# since the next combinations' single mult is smaller
|
162
|
+
# and we need to adjust for that.
|
163
|
+
#
|
164
|
+
group_mult = group_mult * combinations_size
|
165
|
+
|
166
|
+
# Return the combinations.
|
167
|
+
#
|
168
|
+
combinations
|
169
|
+
end
|
170
|
+
|
171
|
+
return if possible_combinations.empty?
|
172
|
+
|
173
|
+
possible_combinations.shift.zip *possible_combinations
|
174
|
+
end
|
175
|
+
|
176
|
+
end
|
177
|
+
|
178
|
+
end
|
179
|
+
|
180
|
+
end
|
@@ -0,0 +1,81 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
#
|
3
|
+
module Internals
|
4
|
+
|
5
|
+
#
|
6
|
+
#
|
7
|
+
module Query
|
8
|
+
|
9
|
+
# A single qualifier.
|
10
|
+
#
|
11
|
+
class Qualifier # :nodoc:all
|
12
|
+
|
13
|
+
attr_reader :normalized_qualifier, :codes
|
14
|
+
|
15
|
+
#
|
16
|
+
#
|
17
|
+
# codes is an array.
|
18
|
+
#
|
19
|
+
def initialize normalized_qualifier, codes
|
20
|
+
@normalized_qualifier = normalized_qualifier
|
21
|
+
@codes = codes.map &:to_sym
|
22
|
+
end
|
23
|
+
|
24
|
+
# Will overwrite if the key is present in the hash.
|
25
|
+
#
|
26
|
+
def inject_into hash
|
27
|
+
codes.each do |code|
|
28
|
+
hash[code] = normalized_qualifier
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
# Collection class for qualifiers.
|
35
|
+
#
|
36
|
+
class Qualifiers # :nodoc:all
|
37
|
+
|
38
|
+
include Singleton
|
39
|
+
|
40
|
+
attr_reader :qualifiers, :normalization_mapping
|
41
|
+
|
42
|
+
delegate :<<, :to => :qualifiers
|
43
|
+
|
44
|
+
#
|
45
|
+
#
|
46
|
+
def initialize
|
47
|
+
@qualifiers = []
|
48
|
+
@normalization_mapping = {}
|
49
|
+
end
|
50
|
+
|
51
|
+
# TODO Spec.
|
52
|
+
#
|
53
|
+
def self.add name, qualifiers
|
54
|
+
instance << Qualifier.new(name, qualifiers)
|
55
|
+
end
|
56
|
+
|
57
|
+
# Uses the qualifiers to prepare (optimize) the qualifier handling.
|
58
|
+
#
|
59
|
+
def prepare
|
60
|
+
qualifiers.each do |qualifier|
|
61
|
+
qualifier.inject_into normalization_mapping
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
# Normalizes the given qualifier.
|
66
|
+
#
|
67
|
+
# Returns nil if it is not allowed, the normalized qualifier if it is.
|
68
|
+
#
|
69
|
+
# Note: Normalizes.
|
70
|
+
#
|
71
|
+
def normalize qualifier
|
72
|
+
return nil if qualifier.blank?
|
73
|
+
|
74
|
+
normalization_mapping[qualifier.to_sym]
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
end
|
80
|
+
|
81
|
+
end
|