picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
|
@@ -0,0 +1,137 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Query
|
|
4
|
+
# Container class for allocations.
|
|
5
|
+
#
|
|
6
|
+
class Allocations # :nodoc:all
|
|
7
|
+
|
|
8
|
+
# TODO Remove size
|
|
9
|
+
#
|
|
10
|
+
delegate :each, :inject, :empty?, :size, :to => :@allocations
|
|
11
|
+
attr_reader :total
|
|
12
|
+
|
|
13
|
+
def initialize allocations = []
|
|
14
|
+
@allocations = allocations
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Score each allocation.
|
|
18
|
+
#
|
|
19
|
+
def calculate_score weights
|
|
20
|
+
@allocations.each do |allocation|
|
|
21
|
+
allocation.calculate_score weights
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
# Sort the allocations.
|
|
25
|
+
#
|
|
26
|
+
def sort
|
|
27
|
+
@allocations.sort!
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Reduces the amount of allocations to x.
|
|
31
|
+
#
|
|
32
|
+
def reduce_to amount
|
|
33
|
+
@allocations = @allocations.shift amount
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
# Keeps combinations.
|
|
37
|
+
#
|
|
38
|
+
# Only those passed in remain.
|
|
39
|
+
#
|
|
40
|
+
def keep identifiers = []
|
|
41
|
+
@allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
|
|
42
|
+
end
|
|
43
|
+
# Removes combinations.
|
|
44
|
+
#
|
|
45
|
+
# Only those passed in are removed.
|
|
46
|
+
#
|
|
47
|
+
# TODO Rewrite.
|
|
48
|
+
#
|
|
49
|
+
def remove identifiers = []
|
|
50
|
+
@allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Returns the top amount ids.
|
|
54
|
+
#
|
|
55
|
+
def ids amount = 20
|
|
56
|
+
@allocations.inject([]) do |total, allocation|
|
|
57
|
+
total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
|
|
58
|
+
end
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
# Returns a random id from the allocations.
|
|
62
|
+
#
|
|
63
|
+
# Note: This is an ok algorithm for small id sets.
|
|
64
|
+
#
|
|
65
|
+
# But still TODO try for a faster one.
|
|
66
|
+
#
|
|
67
|
+
# TODO For the 1 amount random case this needs to be improved.
|
|
68
|
+
#
|
|
69
|
+
def random_ids amount = 1
|
|
70
|
+
return [] if @allocations.empty?
|
|
71
|
+
ids = @allocations.first.ids
|
|
72
|
+
indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
|
|
73
|
+
indexes.first(amount).map { |id| ids[id] }
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
# This is the main method of this class that will replace ids and count.
|
|
77
|
+
#
|
|
78
|
+
# What it does is calculate the ids and counts of its allocations
|
|
79
|
+
# for being used in the results. It also calculates the total
|
|
80
|
+
#
|
|
81
|
+
# Parameters:
|
|
82
|
+
# * amount: the amount of ids to calculate
|
|
83
|
+
# * offset: the offset from where in the result set to take the ids
|
|
84
|
+
#
|
|
85
|
+
# Note: With an amount of 0, an offset > 0 doesn't make much
|
|
86
|
+
# sense, as seen in the live search.
|
|
87
|
+
#
|
|
88
|
+
# Note: Each allocation caches its count, but not its ids (thrown away).
|
|
89
|
+
# The ids are cached in this class.
|
|
90
|
+
#
|
|
91
|
+
# Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
|
|
92
|
+
#
|
|
93
|
+
def process! amount, offset = 0
|
|
94
|
+
@total = 0
|
|
95
|
+
current_offset = 0
|
|
96
|
+
@allocations.each do |allocation|
|
|
97
|
+
ids = allocation.process! amount, offset
|
|
98
|
+
@total = @total + allocation.count # the total mixed in
|
|
99
|
+
if ids.empty?
|
|
100
|
+
offset = offset - allocation.count unless offset.zero?
|
|
101
|
+
else
|
|
102
|
+
amount = amount - ids.size # we need less results from the following allocation
|
|
103
|
+
offset = 0 # we have already passed the offset
|
|
104
|
+
end
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
def uniq
|
|
109
|
+
@allocations.uniq!
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def to_a
|
|
113
|
+
@allocations
|
|
114
|
+
end
|
|
115
|
+
|
|
116
|
+
# Simply inspects the internal allocations.
|
|
117
|
+
#
|
|
118
|
+
def to_s
|
|
119
|
+
@allocations.inspect
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Allocations for results are in the form:
|
|
123
|
+
# [
|
|
124
|
+
# allocation1.to_result,
|
|
125
|
+
# allocation2.to_result
|
|
126
|
+
# ...
|
|
127
|
+
# ]
|
|
128
|
+
#
|
|
129
|
+
def to_result
|
|
130
|
+
@allocations.map(&:to_result).compact
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
end
|
|
134
|
+
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Query
|
|
4
|
+
|
|
5
|
+
# Describes the combination of a token (the text) and
|
|
6
|
+
# the index (the bundle): [text, index_bundle]
|
|
7
|
+
#
|
|
8
|
+
# A combination is a single part of an allocation:
|
|
9
|
+
# [..., [text2, index_bundle2], ...]
|
|
10
|
+
#
|
|
11
|
+
# An allocation consists of a number of combinations:
|
|
12
|
+
# [[text1, index_bundle1], [text2, index_bundle2], [text3, index_bundle1]]
|
|
13
|
+
#
|
|
14
|
+
class Combination # :nodoc:all
|
|
15
|
+
|
|
16
|
+
attr_reader :token, :bundle, :category_name
|
|
17
|
+
|
|
18
|
+
def initialize token, category
|
|
19
|
+
@token = token
|
|
20
|
+
@category_name = category.name
|
|
21
|
+
@bundle = category.bundle_for token
|
|
22
|
+
@text = @token.text # don't want to use reset_similar already
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Note: Required for uniq!
|
|
26
|
+
#
|
|
27
|
+
def hash
|
|
28
|
+
[@token.to_s, @bundle].hash
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
# Returns the weight of this combination.
|
|
32
|
+
#
|
|
33
|
+
# Note: Caching is most of the time useful.
|
|
34
|
+
#
|
|
35
|
+
def weight
|
|
36
|
+
@weight ||= @bundle.weight(@text)
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Returns an array of ids for the given text.
|
|
40
|
+
#
|
|
41
|
+
# Note: Caching is most of the time useful.
|
|
42
|
+
#
|
|
43
|
+
def ids
|
|
44
|
+
@ids ||= @bundle.ids(@text)
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# The identifier for this combination.
|
|
48
|
+
#
|
|
49
|
+
def identifier
|
|
50
|
+
"#{bundle.identifier}:#{@token.identifier}"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Is the identifier in the given identifiers?
|
|
54
|
+
#
|
|
55
|
+
def in? identifiers
|
|
56
|
+
identifiers.include? identifier
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Combines the category names with the original names.
|
|
60
|
+
# [
|
|
61
|
+
# [:title, 'Flarbl', :flarbl],
|
|
62
|
+
# [:category, 'Gnorf', :gnorf]
|
|
63
|
+
# ]
|
|
64
|
+
#
|
|
65
|
+
def to_result
|
|
66
|
+
[@category_name, *@token.to_result]
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Example:
|
|
70
|
+
# "exact title:Peter*:peter"
|
|
71
|
+
#
|
|
72
|
+
def to_s
|
|
73
|
+
"#{bundle.identifier} #{to_result.join(':')}"
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
end
|
|
@@ -0,0 +1,84 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Query
|
|
4
|
+
|
|
5
|
+
# Combinations are a number of Combination-s.
|
|
6
|
+
#
|
|
7
|
+
# They are the core of an allocation.
|
|
8
|
+
# An allocation consists of a number of combinations.
|
|
9
|
+
#
|
|
10
|
+
module Combinations # :nodoc:all
|
|
11
|
+
|
|
12
|
+
# Base Combinations contain methods for calculating score and ids.
|
|
13
|
+
#
|
|
14
|
+
class Base
|
|
15
|
+
|
|
16
|
+
attr_reader :combinations
|
|
17
|
+
|
|
18
|
+
delegate :empty?, :to => :@combinations
|
|
19
|
+
|
|
20
|
+
def initialize combinations = []
|
|
21
|
+
@combinations = combinations
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def hash
|
|
25
|
+
@combinations.hash
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Uses user specific weights to calculate a score for the combinations.
|
|
29
|
+
#
|
|
30
|
+
def calculate_score weights
|
|
31
|
+
total_score + weighted_score(weights)
|
|
32
|
+
end
|
|
33
|
+
def total_score
|
|
34
|
+
@combinations.sum &:weight
|
|
35
|
+
end
|
|
36
|
+
def weighted_score weights
|
|
37
|
+
weights.score @combinations
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Wrap the combinations into an allocation with the result_identifier.
|
|
41
|
+
#
|
|
42
|
+
def pack_into_allocation result_identifier
|
|
43
|
+
Allocation.new self, result_identifier
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Filters the tokens and identifiers such that only identifiers
|
|
47
|
+
# that are passed in, remain, including their tokens.
|
|
48
|
+
#
|
|
49
|
+
# Note: This method is not totally independent of the calculate_ids one.
|
|
50
|
+
# Since identifiers are only nullified, we need to not include the
|
|
51
|
+
# ids that have an associated identifier that is nil.
|
|
52
|
+
#
|
|
53
|
+
def keep identifiers = []
|
|
54
|
+
# TODO Rewrite to use the category!!!
|
|
55
|
+
#
|
|
56
|
+
@combinations.reject! { |combination| !combination.in?(identifiers) }
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
# Filters the tokens and identifiers such that identifiers
|
|
60
|
+
# that are passed in, are removed, including their tokens.
|
|
61
|
+
#
|
|
62
|
+
# Note: This method is not totally independent of the calculate_ids one.
|
|
63
|
+
# Since identifiers are only nullified, we need to not include the
|
|
64
|
+
# ids that have an associated identifier that is nil.
|
|
65
|
+
#
|
|
66
|
+
def remove identifiers = []
|
|
67
|
+
# TODO Rewrite to use the category!!!
|
|
68
|
+
#
|
|
69
|
+
@combinations.reject! { |combination| combination.in?(identifiers) }
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
#
|
|
73
|
+
#
|
|
74
|
+
def to_result
|
|
75
|
+
@combinations.map &:to_result
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
end
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Query
|
|
4
|
+
|
|
5
|
+
# Combinations are a number of Combination-s.
|
|
6
|
+
#
|
|
7
|
+
# They are the core of an allocation.
|
|
8
|
+
# An allocation consists of a number of combinations.
|
|
9
|
+
#
|
|
10
|
+
module Combinations # :nodoc:all
|
|
11
|
+
|
|
12
|
+
# Memory Combinations contain specific methods for
|
|
13
|
+
# calculating score and ids in memory.
|
|
14
|
+
#
|
|
15
|
+
class Memory < Base
|
|
16
|
+
|
|
17
|
+
# Returns the result ids for the allocation.
|
|
18
|
+
#
|
|
19
|
+
# Sorts the ids by size and & through them in the following order (sizes):
|
|
20
|
+
# 0. [100_000, 400, 30, 2]
|
|
21
|
+
# 1. [2, 30, 400, 100_000]
|
|
22
|
+
# 2. (100_000 & (400 & (30 & 2))) # => result
|
|
23
|
+
#
|
|
24
|
+
# Note: Uses a C-optimized intersection routine for speed and memory efficiency.
|
|
25
|
+
#
|
|
26
|
+
# Note: In the memory based version we ignore the (amount) needed hint.
|
|
27
|
+
# TODO Not ignore it?
|
|
28
|
+
#
|
|
29
|
+
def ids _, _
|
|
30
|
+
return [] if @combinations.empty?
|
|
31
|
+
|
|
32
|
+
# Get the ids for each combination.
|
|
33
|
+
#
|
|
34
|
+
# TODO For combinations with Redis
|
|
35
|
+
#
|
|
36
|
+
id_arrays = @combinations.inject([]) do |total, combination|
|
|
37
|
+
total << combination.ids
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
# Order by smallest size first such that the intersect can be performed faster.
|
|
41
|
+
#
|
|
42
|
+
# TODO Move into the memory_efficient_intersect such that
|
|
43
|
+
# this precondition for a fast algorithm is always given.
|
|
44
|
+
#
|
|
45
|
+
id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
|
|
46
|
+
|
|
47
|
+
# Call the optimized C algorithm.
|
|
48
|
+
#
|
|
49
|
+
Performant::Array.memory_efficient_intersect id_arrays
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
end
|
|
@@ -0,0 +1,59 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Query
|
|
4
|
+
|
|
5
|
+
# Combinations are a number of Combination-s.
|
|
6
|
+
#
|
|
7
|
+
# They are the core of an allocation.
|
|
8
|
+
# An allocation consists of a number of combinations.
|
|
9
|
+
#
|
|
10
|
+
module Combinations # :nodoc:all
|
|
11
|
+
|
|
12
|
+
# Redis Combinations contain specific methods for
|
|
13
|
+
# calculating score and ids in memory.
|
|
14
|
+
#
|
|
15
|
+
class Redis < Base
|
|
16
|
+
|
|
17
|
+
# TODO Err… yeah. Wrap in Picky specific wrapper.
|
|
18
|
+
#
|
|
19
|
+
def initialize combinations
|
|
20
|
+
super combinations
|
|
21
|
+
|
|
22
|
+
@@redis ||= ::Redis.new
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Returns the result ids for the allocation.
|
|
26
|
+
#
|
|
27
|
+
def ids amount, offset
|
|
28
|
+
return [] if @combinations.empty?
|
|
29
|
+
|
|
30
|
+
identifiers = @combinations.inject([]) do |identifiers, combination|
|
|
31
|
+
identifiers << "#{combination.identifier}"
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
result_id = generate_intermediate_result_id
|
|
35
|
+
|
|
36
|
+
# TODO multi?
|
|
37
|
+
#
|
|
38
|
+
|
|
39
|
+
@@redis.zinterstore result_id, identifiers
|
|
40
|
+
|
|
41
|
+
@@redis.zrange result_id, offset, (offset + amount)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Generate a multiple host/process safe result id.
|
|
45
|
+
#
|
|
46
|
+
# TODO How expensive is Process.pid? If it changes once, remember forever?
|
|
47
|
+
#
|
|
48
|
+
def generate_intermediate_result_id
|
|
49
|
+
# TODO host -> extract host.
|
|
50
|
+
:"host:#{Process.pid}:picky:result"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
end
|
|
@@ -0,0 +1,180 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Query
|
|
4
|
+
|
|
5
|
+
# The query indexes class bundles indexes given to a query.
|
|
6
|
+
#
|
|
7
|
+
# Example:
|
|
8
|
+
# # If you call
|
|
9
|
+
# Query::Full.new dvd_index, mp3_index, video_index
|
|
10
|
+
#
|
|
11
|
+
# # What it does is take the three given (API-) indexes and
|
|
12
|
+
# # bundle them in an index bundle.
|
|
13
|
+
#
|
|
14
|
+
class Indexes
|
|
15
|
+
|
|
16
|
+
attr_reader :indexes
|
|
17
|
+
|
|
18
|
+
# Creates a new Query::Indexes.
|
|
19
|
+
#
|
|
20
|
+
# Its job is to generate all possible combinations, but also
|
|
21
|
+
# checking whether the query indexes are all of the same type.
|
|
22
|
+
# Note: We cannot mix memory and redis indexes just yet.
|
|
23
|
+
#
|
|
24
|
+
def initialize *index_definitions, combinations_type
|
|
25
|
+
@combinations_type = combinations_type
|
|
26
|
+
@indexes = index_definitions.map &:indexed
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Returns a number of possible allocations for the given tokens.
|
|
30
|
+
#
|
|
31
|
+
def allocations_for tokens
|
|
32
|
+
Allocations.new(indexes.inject([]) do |previous_allocations, index|
|
|
33
|
+
# Expand the combinations.
|
|
34
|
+
#
|
|
35
|
+
possible_combinations = tokens.possible_combinations_in index
|
|
36
|
+
|
|
37
|
+
# Optimization for ignoring tokens that allocate to nothing and
|
|
38
|
+
# can be ignored.
|
|
39
|
+
# For example in a special search, where "florian" is not
|
|
40
|
+
# mapped to any category.
|
|
41
|
+
#
|
|
42
|
+
possible_combinations.compact!
|
|
43
|
+
|
|
44
|
+
# Generate all possible combinations.
|
|
45
|
+
#
|
|
46
|
+
expanded_combinations = expand_combinations_from possible_combinations
|
|
47
|
+
|
|
48
|
+
# If there are none, try the next allocation.
|
|
49
|
+
#
|
|
50
|
+
next previous_allocations unless expanded_combinations
|
|
51
|
+
|
|
52
|
+
# Add the wrapped possible allocations to the ones we already have.
|
|
53
|
+
#
|
|
54
|
+
previous_allocations + expanded_combinations.map! do |expanded_combination|
|
|
55
|
+
# TODO Insert Redis here?
|
|
56
|
+
#
|
|
57
|
+
@combinations_type.new(expanded_combination).pack_into_allocation(index.result_identifier) # TODO Do not extract result_identifier. Remove pack_into_allocation.
|
|
58
|
+
end
|
|
59
|
+
end)
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# This is the core of the search engine.
|
|
63
|
+
#
|
|
64
|
+
# Gets an array of
|
|
65
|
+
# [
|
|
66
|
+
# [<combinations for token1>],
|
|
67
|
+
# [<combinations for token2>],
|
|
68
|
+
# [<combinations for token3>]
|
|
69
|
+
# ]
|
|
70
|
+
#
|
|
71
|
+
# Generates all possible allocations of combinations.
|
|
72
|
+
# [
|
|
73
|
+
# [first combination of token1, first c of t2, first c of t3],
|
|
74
|
+
# [first combination of token1, first c of t2, second c of t3]
|
|
75
|
+
# ...
|
|
76
|
+
# ]
|
|
77
|
+
#
|
|
78
|
+
# Generates all possible combinations of array elements:
|
|
79
|
+
# [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
|
|
80
|
+
# Note: Also calculates the weights and sorts them accordingly.
|
|
81
|
+
#
|
|
82
|
+
# Note: This is a heavily optimized ruby version.
|
|
83
|
+
#
|
|
84
|
+
# Works like this:
|
|
85
|
+
# [1,2,3], [a,b,c], [k,l,m] are expanded to
|
|
86
|
+
# group mult: 1
|
|
87
|
+
# <- single mult ->
|
|
88
|
+
# [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
|
|
89
|
+
# group mult: 3
|
|
90
|
+
# <- -> s/m
|
|
91
|
+
# [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
|
|
92
|
+
# group mult: 9
|
|
93
|
+
# <> s/m
|
|
94
|
+
# [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
|
|
95
|
+
#
|
|
96
|
+
# It is then recombined, where
|
|
97
|
+
# [
|
|
98
|
+
# [a,a,b,b,c,c]
|
|
99
|
+
# [d,e,d,e,d,e]
|
|
100
|
+
# ]
|
|
101
|
+
# becomes
|
|
102
|
+
# [
|
|
103
|
+
# [a,d],
|
|
104
|
+
# [a,e],
|
|
105
|
+
# [b,d],
|
|
106
|
+
# [b,e],
|
|
107
|
+
# [c,d],
|
|
108
|
+
# [c,e]
|
|
109
|
+
# ]
|
|
110
|
+
#
|
|
111
|
+
# Note: Not using transpose as it is slower.
|
|
112
|
+
#
|
|
113
|
+
# Returns nil if there are no combinations.
|
|
114
|
+
#
|
|
115
|
+
# Note: Of course I could split this method up into smaller
|
|
116
|
+
# ones, but I guess I am a bit sentimental.
|
|
117
|
+
#
|
|
118
|
+
def expand_combinations_from possible_combinations
|
|
119
|
+
return if possible_combinations.any?(&:empty?)
|
|
120
|
+
|
|
121
|
+
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
|
122
|
+
#
|
|
123
|
+
# TODO How does this work if an element has size 0? Since below we account for size 0.
|
|
124
|
+
# Should we even continue if an element has size 0?
|
|
125
|
+
# This means one of the tokens cannot be allocated.
|
|
126
|
+
#
|
|
127
|
+
single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
|
|
128
|
+
|
|
129
|
+
# Initialize a group multiplicator.
|
|
130
|
+
#
|
|
131
|
+
group_mult = 1
|
|
132
|
+
|
|
133
|
+
# The expanding part to line up the combinations
|
|
134
|
+
# for later combination in allocations.
|
|
135
|
+
#
|
|
136
|
+
possible_combinations.collect! do |combinations|
|
|
137
|
+
|
|
138
|
+
# Get the size of the combinations of the first token.
|
|
139
|
+
#
|
|
140
|
+
combinations_size = combinations.size
|
|
141
|
+
|
|
142
|
+
# Special case: If there is no combination for one of the tokens.
|
|
143
|
+
# In that case, we just use the same single mult for
|
|
144
|
+
# the next iteration.
|
|
145
|
+
# If there are combinations, we divide the single mult
|
|
146
|
+
# by the number of combinations.
|
|
147
|
+
#
|
|
148
|
+
single_mult /= combinations_size unless combinations_size.zero?
|
|
149
|
+
|
|
150
|
+
# Expand each combination by the single mult:
|
|
151
|
+
# [a,b,c]
|
|
152
|
+
# [a,a,a, b,b,b, c,c,c]
|
|
153
|
+
# Then, expand the result by the group mult:
|
|
154
|
+
# [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
|
|
155
|
+
#
|
|
156
|
+
combinations = combinations.inject([]) do |total, combination|
|
|
157
|
+
total + Array.new(single_mult, combination)
|
|
158
|
+
end * group_mult
|
|
159
|
+
|
|
160
|
+
# Multiply the group mult by the combinations size,
|
|
161
|
+
# since the next combinations' single mult is smaller
|
|
162
|
+
# and we need to adjust for that.
|
|
163
|
+
#
|
|
164
|
+
group_mult = group_mult * combinations_size
|
|
165
|
+
|
|
166
|
+
# Return the combinations.
|
|
167
|
+
#
|
|
168
|
+
combinations
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
return if possible_combinations.empty?
|
|
172
|
+
|
|
173
|
+
possible_combinations.shift.zip *possible_combinations
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
end
|
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
#
|
|
3
|
+
module Internals
|
|
4
|
+
|
|
5
|
+
#
|
|
6
|
+
#
|
|
7
|
+
module Query
|
|
8
|
+
|
|
9
|
+
# A single qualifier.
|
|
10
|
+
#
|
|
11
|
+
class Qualifier # :nodoc:all
|
|
12
|
+
|
|
13
|
+
attr_reader :normalized_qualifier, :codes
|
|
14
|
+
|
|
15
|
+
#
|
|
16
|
+
#
|
|
17
|
+
# codes is an array.
|
|
18
|
+
#
|
|
19
|
+
def initialize normalized_qualifier, codes
|
|
20
|
+
@normalized_qualifier = normalized_qualifier
|
|
21
|
+
@codes = codes.map &:to_sym
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
# Will overwrite if the key is present in the hash.
|
|
25
|
+
#
|
|
26
|
+
def inject_into hash
|
|
27
|
+
codes.each do |code|
|
|
28
|
+
hash[code] = normalized_qualifier
|
|
29
|
+
end
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Collection class for qualifiers.
|
|
35
|
+
#
|
|
36
|
+
class Qualifiers # :nodoc:all
|
|
37
|
+
|
|
38
|
+
include Singleton
|
|
39
|
+
|
|
40
|
+
attr_reader :qualifiers, :normalization_mapping
|
|
41
|
+
|
|
42
|
+
delegate :<<, :to => :qualifiers
|
|
43
|
+
|
|
44
|
+
#
|
|
45
|
+
#
|
|
46
|
+
def initialize
|
|
47
|
+
@qualifiers = []
|
|
48
|
+
@normalization_mapping = {}
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# TODO Spec.
|
|
52
|
+
#
|
|
53
|
+
def self.add name, qualifiers
|
|
54
|
+
instance << Qualifier.new(name, qualifiers)
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Uses the qualifiers to prepare (optimize) the qualifier handling.
|
|
58
|
+
#
|
|
59
|
+
def prepare
|
|
60
|
+
qualifiers.each do |qualifier|
|
|
61
|
+
qualifier.inject_into normalization_mapping
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
# Normalizes the given qualifier.
|
|
66
|
+
#
|
|
67
|
+
# Returns nil if it is not allowed, the normalized qualifier if it is.
|
|
68
|
+
#
|
|
69
|
+
# Note: Normalizes.
|
|
70
|
+
#
|
|
71
|
+
def normalize qualifier
|
|
72
|
+
return nil if qualifier.blank?
|
|
73
|
+
|
|
74
|
+
normalization_mapping[qualifier.to_sym]
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
end
|