picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
|
@@ -1,130 +0,0 @@
|
|
|
1
|
-
module Query
|
|
2
|
-
# Container class for allocations.
|
|
3
|
-
#
|
|
4
|
-
class Allocations # :nodoc:all
|
|
5
|
-
|
|
6
|
-
# TODO Remove size
|
|
7
|
-
#
|
|
8
|
-
delegate :each, :inject, :empty?, :size, :to => :@allocations
|
|
9
|
-
attr_reader :total
|
|
10
|
-
|
|
11
|
-
def initialize allocations = []
|
|
12
|
-
@allocations = allocations
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
# Score each allocation.
|
|
16
|
-
#
|
|
17
|
-
def calculate_score weights
|
|
18
|
-
@allocations.each do |allocation|
|
|
19
|
-
allocation.calculate_score weights
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
# Sort the allocations.
|
|
23
|
-
#
|
|
24
|
-
def sort
|
|
25
|
-
@allocations.sort!
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# Reduces the amount of allocations to x.
|
|
29
|
-
#
|
|
30
|
-
def reduce_to amount
|
|
31
|
-
@allocations = @allocations.shift amount
|
|
32
|
-
end
|
|
33
|
-
|
|
34
|
-
# Keeps combinations.
|
|
35
|
-
#
|
|
36
|
-
# Only those passed in remain.
|
|
37
|
-
#
|
|
38
|
-
def keep identifiers = []
|
|
39
|
-
@allocations.each { |allocation| allocation.keep identifiers } unless identifiers.empty?
|
|
40
|
-
end
|
|
41
|
-
# Removes combinations.
|
|
42
|
-
#
|
|
43
|
-
# Only those passed in are removed.
|
|
44
|
-
#
|
|
45
|
-
# TODO Rewrite
|
|
46
|
-
#
|
|
47
|
-
def remove identifiers = []
|
|
48
|
-
@allocations.each { |allocation| allocation.remove identifiers } unless identifiers.empty?
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Returns the top amount ids.
|
|
52
|
-
#
|
|
53
|
-
def ids amount = 20
|
|
54
|
-
@allocations.inject([]) do |total, allocation|
|
|
55
|
-
total.size >= amount ? (return total.shift(amount)) : total + allocation.ids
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Returns a random id from the allocations.
|
|
60
|
-
#
|
|
61
|
-
# Note: This is an ok algorithm for small id sets.
|
|
62
|
-
#
|
|
63
|
-
# But still TODO try for a faster one.
|
|
64
|
-
#
|
|
65
|
-
def random_ids amount = 1
|
|
66
|
-
return [] if @allocations.empty?
|
|
67
|
-
ids = @allocations.first.ids
|
|
68
|
-
indexes = Array.new(ids.size) { |id| id }.sort_by { rand }
|
|
69
|
-
indexes.first(amount).map { |id| ids[id] }
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
# This is the main method of this class that will replace ids and count.
|
|
73
|
-
#
|
|
74
|
-
# What it does is calculate the ids and counts of its allocations
|
|
75
|
-
# for being used in the results. It also calculates the total
|
|
76
|
-
#
|
|
77
|
-
# Parameters:
|
|
78
|
-
# * amount: the amount of ids to calculate
|
|
79
|
-
# * offset: the offset from where in the result set to take the ids
|
|
80
|
-
#
|
|
81
|
-
# Note: With an amount of 0, an offset > 0 doesn't make much
|
|
82
|
-
# sense, as seen in the live search.
|
|
83
|
-
#
|
|
84
|
-
# Note: Each allocation caches its count, but not its ids (thrown away).
|
|
85
|
-
# The ids are cached in this class.
|
|
86
|
-
#
|
|
87
|
-
# Note: It's possible that no ids are returned by an allocation, but a count. (In case of an offset)
|
|
88
|
-
#
|
|
89
|
-
def process! amount, offset = 0
|
|
90
|
-
@total = 0
|
|
91
|
-
current_offset = 0
|
|
92
|
-
@allocations.each do |allocation|
|
|
93
|
-
ids = allocation.process! amount, offset
|
|
94
|
-
@total = @total + allocation.count # the total mixed in
|
|
95
|
-
if ids.empty?
|
|
96
|
-
offset = offset - allocation.count unless offset.zero?
|
|
97
|
-
else
|
|
98
|
-
amount = amount - ids.size # we need less results from the following allocation
|
|
99
|
-
offset = 0 # we have already passed the offset
|
|
100
|
-
end
|
|
101
|
-
end
|
|
102
|
-
end
|
|
103
|
-
|
|
104
|
-
def uniq
|
|
105
|
-
@allocations.uniq!
|
|
106
|
-
end
|
|
107
|
-
|
|
108
|
-
def to_a
|
|
109
|
-
@allocations
|
|
110
|
-
end
|
|
111
|
-
|
|
112
|
-
# Simply inspects the internal allocations.
|
|
113
|
-
#
|
|
114
|
-
def to_s
|
|
115
|
-
@allocations.inspect
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
# Allocations for results are in the form:
|
|
119
|
-
# [
|
|
120
|
-
# allocation1.to_result,
|
|
121
|
-
# allocation2.to_result
|
|
122
|
-
# ...
|
|
123
|
-
# ]
|
|
124
|
-
#
|
|
125
|
-
def to_result
|
|
126
|
-
@allocations.map(&:to_result).compact
|
|
127
|
-
end
|
|
128
|
-
|
|
129
|
-
end
|
|
130
|
-
end
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
module Query
|
|
2
|
-
|
|
3
|
-
# Describes the combination of a token (the text) and
|
|
4
|
-
# the index (the bundle).
|
|
5
|
-
#
|
|
6
|
-
# A combination is a single part of an allocation.
|
|
7
|
-
#
|
|
8
|
-
# An allocation consists of a number of combinations.
|
|
9
|
-
#
|
|
10
|
-
class Combination # :nodoc:all
|
|
11
|
-
|
|
12
|
-
attr_reader :token, :bundle, :category_name
|
|
13
|
-
|
|
14
|
-
def initialize token, category
|
|
15
|
-
@token = token
|
|
16
|
-
@category_name = category.name
|
|
17
|
-
@bundle = category.bundle_for token
|
|
18
|
-
@text = @token.text # don't want to use reset_similar already
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
# Note: Required for uniq!
|
|
22
|
-
#
|
|
23
|
-
def hash
|
|
24
|
-
[@token.to_s, @bundle].hash
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
# Returns the weight of this combination.
|
|
28
|
-
#
|
|
29
|
-
# Note: Caching is most oft the time useful.
|
|
30
|
-
#
|
|
31
|
-
def weight
|
|
32
|
-
@weight ||= @bundle.weight(@text)
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
# Returns an array of ids for the given text.
|
|
36
|
-
#
|
|
37
|
-
# Note: Caching is most oft the time useful.
|
|
38
|
-
#
|
|
39
|
-
def ids
|
|
40
|
-
@ids ||= @bundle.ids(@text)
|
|
41
|
-
end
|
|
42
|
-
|
|
43
|
-
# The identifier for this combination.
|
|
44
|
-
#
|
|
45
|
-
def identifier
|
|
46
|
-
@category_name
|
|
47
|
-
end
|
|
48
|
-
|
|
49
|
-
# Is the identifier in the given identifiers?
|
|
50
|
-
#
|
|
51
|
-
def in? identifiers
|
|
52
|
-
identifiers.include? identifier
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Combines the category names with the original names.
|
|
56
|
-
# [
|
|
57
|
-
# [:title, 'Flarbl', :flarbl],
|
|
58
|
-
# [:category, 'Gnorf', :gnorf]
|
|
59
|
-
# ]
|
|
60
|
-
#
|
|
61
|
-
def to_result
|
|
62
|
-
[identifier, *@token.to_result]
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
# Example:
|
|
66
|
-
# "exact title:Peter*:peter"
|
|
67
|
-
#
|
|
68
|
-
def to_s
|
|
69
|
-
"#{bundle.identifier} #{to_result.join(':')}"
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
end
|
|
73
|
-
|
|
74
|
-
end
|
|
@@ -1,105 +0,0 @@
|
|
|
1
|
-
module Query
|
|
2
|
-
|
|
3
|
-
# Combinations are a number of Combination-s.
|
|
4
|
-
#
|
|
5
|
-
# They are the core of an allocation.
|
|
6
|
-
#
|
|
7
|
-
class Combinations # :nodoc:all
|
|
8
|
-
|
|
9
|
-
attr_reader :combinations
|
|
10
|
-
|
|
11
|
-
delegate :empty?, :to => :@combinations
|
|
12
|
-
|
|
13
|
-
def initialize combinations = []
|
|
14
|
-
@combinations = combinations
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
def hash
|
|
18
|
-
@combinations.hash
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
# Uses user specific weights to calculate a score for the combinations.
|
|
22
|
-
#
|
|
23
|
-
def calculate_score weights
|
|
24
|
-
total_score + weighted_score(weights)
|
|
25
|
-
end
|
|
26
|
-
def total_score
|
|
27
|
-
@combinations.sum &:weight
|
|
28
|
-
end
|
|
29
|
-
def weighted_score weights
|
|
30
|
-
weights.score @combinations
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Gets all ids for the allocations.
|
|
34
|
-
#
|
|
35
|
-
# Sorts the ids by size and & through them in the following order (sizes):
|
|
36
|
-
# 0. [100_000, 400, 30, 2]
|
|
37
|
-
# 1. [2, 30, 400, 100_000]
|
|
38
|
-
# 2. (100_000 & (400 & (30 & 2))) # => result
|
|
39
|
-
#
|
|
40
|
-
# Returns the ids. Also sets the count.
|
|
41
|
-
#
|
|
42
|
-
# Note: Uses a C-optimized intersection routine for speed and memory efficiency.
|
|
43
|
-
#
|
|
44
|
-
def ids
|
|
45
|
-
return [] if @combinations.empty?
|
|
46
|
-
|
|
47
|
-
# Get the ids for each combination.
|
|
48
|
-
#
|
|
49
|
-
id_arrays = @combinations.inject([]) do |total, combination|
|
|
50
|
-
total << combination.ids
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
# Order by smallest size first such that the intersect can be performed faster.
|
|
54
|
-
#
|
|
55
|
-
# TODO Move into the memory_efficient_intersect such that
|
|
56
|
-
# this precondition for a fast algorithm is always given.
|
|
57
|
-
#
|
|
58
|
-
id_arrays.sort! { |this_array, that_array| this_array.size <=> that_array.size }
|
|
59
|
-
|
|
60
|
-
# Call the optimized C algorithm.
|
|
61
|
-
#
|
|
62
|
-
Performant::Array.memory_efficient_intersect id_arrays
|
|
63
|
-
end
|
|
64
|
-
|
|
65
|
-
# Wrap the combinations into an allocation with the result_identifier.
|
|
66
|
-
#
|
|
67
|
-
def pack_into_allocation result_identifier
|
|
68
|
-
Allocation.new self, result_identifier
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# Filters the tokens and identifiers such that only identifiers
|
|
72
|
-
# that are passed in, remain, including their tokens.
|
|
73
|
-
#
|
|
74
|
-
# Note: This method is not totally independent of the calculate_ids one.
|
|
75
|
-
# Since identifiers are only nullified, we need to not include the
|
|
76
|
-
# ids that have an associated identifier that is nil.
|
|
77
|
-
#
|
|
78
|
-
def keep identifiers = []
|
|
79
|
-
# TODO Rewrite to use the category!!!
|
|
80
|
-
#
|
|
81
|
-
@combinations.reject! { |combination| !combination.in?(identifiers) }
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
# Filters the tokens and identifiers such that identifiers
|
|
85
|
-
# that are passed in, are removed, including their tokens.
|
|
86
|
-
#
|
|
87
|
-
# Note: This method is not totally independent of the calculate_ids one.
|
|
88
|
-
# Since identifiers are only nullified, we need to not include the
|
|
89
|
-
# ids that have an associated identifier that is nil.
|
|
90
|
-
#
|
|
91
|
-
def remove identifiers = []
|
|
92
|
-
# TODO Rewrite to use the category!!!
|
|
93
|
-
#
|
|
94
|
-
@combinations.reject! { |combination| combination.in?(identifiers) }
|
|
95
|
-
end
|
|
96
|
-
|
|
97
|
-
#
|
|
98
|
-
#
|
|
99
|
-
def to_result
|
|
100
|
-
@combinations.map &:to_result
|
|
101
|
-
end
|
|
102
|
-
|
|
103
|
-
end
|
|
104
|
-
|
|
105
|
-
end
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
# coding: utf-8
|
|
2
|
-
#
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
module Query
|
|
7
|
-
|
|
8
|
-
# A single qualifier.
|
|
9
|
-
#
|
|
10
|
-
class Qualifier # :nodoc:all
|
|
11
|
-
|
|
12
|
-
attr_reader :normalized_qualifier, :codes
|
|
13
|
-
|
|
14
|
-
#
|
|
15
|
-
#
|
|
16
|
-
# codes is an array.
|
|
17
|
-
#
|
|
18
|
-
def initialize normalized_qualifier, codes
|
|
19
|
-
@normalized_qualifier = normalized_qualifier
|
|
20
|
-
@codes = codes.map &:to_sym
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Will overwrite if the key is present in the hash.
|
|
24
|
-
#
|
|
25
|
-
def inject_into hash
|
|
26
|
-
codes.each do |code|
|
|
27
|
-
hash[code] = normalized_qualifier
|
|
28
|
-
end
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
end
|
|
32
|
-
|
|
33
|
-
# Collection class for qualifiers.
|
|
34
|
-
#
|
|
35
|
-
class Qualifiers # :nodoc:all
|
|
36
|
-
|
|
37
|
-
include Singleton
|
|
38
|
-
|
|
39
|
-
attr_reader :qualifiers, :normalization_mapping
|
|
40
|
-
|
|
41
|
-
delegate :<<, :to => :qualifiers
|
|
42
|
-
|
|
43
|
-
#
|
|
44
|
-
#
|
|
45
|
-
def initialize
|
|
46
|
-
@qualifiers = []
|
|
47
|
-
@normalization_mapping = {}
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
# TODO Spec.
|
|
51
|
-
#
|
|
52
|
-
def self.add name, qualifiers
|
|
53
|
-
instance << Qualifier.new(name, qualifiers)
|
|
54
|
-
end
|
|
55
|
-
|
|
56
|
-
# Uses the qualifiers to prepare (optimize) the qualifier handling.
|
|
57
|
-
#
|
|
58
|
-
def prepare
|
|
59
|
-
qualifiers.each do |qualifier|
|
|
60
|
-
qualifier.inject_into normalization_mapping
|
|
61
|
-
end
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Normalizes the given qualifier.
|
|
65
|
-
#
|
|
66
|
-
# Returns nil if it is not allowed, the normalized qualifier if it is.
|
|
67
|
-
#
|
|
68
|
-
# Note: Normalizes.
|
|
69
|
-
#
|
|
70
|
-
def normalize qualifier
|
|
71
|
-
return nil if qualifier.blank?
|
|
72
|
-
|
|
73
|
-
normalization_mapping[qualifier.to_sym]
|
|
74
|
-
end
|
|
75
|
-
|
|
76
|
-
end
|
|
77
|
-
end
|
data/lib/picky/query/token.rb
DELETED
|
@@ -1,202 +0,0 @@
|
|
|
1
|
-
module Query
|
|
2
|
-
|
|
3
|
-
# This is a query token. Together with other tokens it makes up a query.
|
|
4
|
-
#
|
|
5
|
-
# It remembers the original form, and and a normalized form.
|
|
6
|
-
#
|
|
7
|
-
# It also knows whether it needs to look for similarity (bla~), or whether it is a partial (bla*).
|
|
8
|
-
#
|
|
9
|
-
# TODO Make partial / similarity char configurable.
|
|
10
|
-
#
|
|
11
|
-
class Token # :nodoc:all
|
|
12
|
-
|
|
13
|
-
attr_reader :text, :original
|
|
14
|
-
attr_writer :similar
|
|
15
|
-
|
|
16
|
-
delegate :blank?, :to => :text
|
|
17
|
-
|
|
18
|
-
# Normal initializer.
|
|
19
|
-
#
|
|
20
|
-
# Note: Use this if you do not want a qualified and normalized token.
|
|
21
|
-
#
|
|
22
|
-
def initialize text
|
|
23
|
-
@text = text
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Returns a qualified and normalized token.
|
|
27
|
-
#
|
|
28
|
-
# Note: Use this in the search engine if you need a qualified
|
|
29
|
-
# and normalized token. I.e. one prepared for a search.
|
|
30
|
-
#
|
|
31
|
-
def self.processed text
|
|
32
|
-
token = new text
|
|
33
|
-
token.qualify
|
|
34
|
-
token.extract_original
|
|
35
|
-
token.partialize
|
|
36
|
-
token.similarize
|
|
37
|
-
token.remove_illegals
|
|
38
|
-
token
|
|
39
|
-
end
|
|
40
|
-
|
|
41
|
-
# This returns a predefined category name if the user has given one.
|
|
42
|
-
#
|
|
43
|
-
def user_defined_category_name
|
|
44
|
-
@qualifier
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Extracts a qualifier for this token and pre-assigns an allocation.
|
|
48
|
-
#
|
|
49
|
-
# Note: Removes the qualifier if it is not allowed.
|
|
50
|
-
#
|
|
51
|
-
def qualify
|
|
52
|
-
@qualifier, @text = split @text
|
|
53
|
-
@qualifier = Query::Qualifiers.instance.normalize @qualifier
|
|
54
|
-
end
|
|
55
|
-
def extract_original
|
|
56
|
-
@original = @text.dup
|
|
57
|
-
end
|
|
58
|
-
|
|
59
|
-
# Partial is a conditional setter.
|
|
60
|
-
#
|
|
61
|
-
# It is only settable if it hasn't been set yet.
|
|
62
|
-
#
|
|
63
|
-
def partial= partial
|
|
64
|
-
@partial = partial if @partial.nil?
|
|
65
|
-
end
|
|
66
|
-
def partial?
|
|
67
|
-
!@similar && @partial
|
|
68
|
-
end
|
|
69
|
-
|
|
70
|
-
# If the text ends with *, partialize it. If with ", don't.
|
|
71
|
-
#
|
|
72
|
-
@@no_partial = /\"\Z/
|
|
73
|
-
@@partial = /\*\Z/
|
|
74
|
-
def partialize
|
|
75
|
-
self.partial = false and return if @text =~ @@no_partial
|
|
76
|
-
self.partial = true if @text =~ @@partial
|
|
77
|
-
end
|
|
78
|
-
|
|
79
|
-
# If the text ends with ~ similarize it. If with ", don't.
|
|
80
|
-
#
|
|
81
|
-
@@no_similar = /\"\Z/
|
|
82
|
-
@@similar = /\~\Z/
|
|
83
|
-
def similarize
|
|
84
|
-
self.similar = false and return if @text =~ @@no_similar
|
|
85
|
-
self.similar = true if @text =~ @@similar
|
|
86
|
-
end
|
|
87
|
-
|
|
88
|
-
def similar?
|
|
89
|
-
@similar
|
|
90
|
-
end
|
|
91
|
-
|
|
92
|
-
# Normalizes this token's text.
|
|
93
|
-
#
|
|
94
|
-
@@illegals = /["*~]/
|
|
95
|
-
def remove_illegals
|
|
96
|
-
@text.gsub! @@illegals, '' unless @text.blank?
|
|
97
|
-
end
|
|
98
|
-
|
|
99
|
-
# Visitor for tokenizer.
|
|
100
|
-
#
|
|
101
|
-
# TODO Rewrite!!!
|
|
102
|
-
#
|
|
103
|
-
def tokenize_with tokenizer
|
|
104
|
-
@text = tokenizer.normalize @text
|
|
105
|
-
end
|
|
106
|
-
# TODO spec!
|
|
107
|
-
#
|
|
108
|
-
# TODO Rewrite!!
|
|
109
|
-
#
|
|
110
|
-
def tokenized tokenizer
|
|
111
|
-
tokenizer.tokenize(@text.to_s).each do |text|
|
|
112
|
-
yield text
|
|
113
|
-
end
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
# Returns an array of possible combinations.
|
|
117
|
-
#
|
|
118
|
-
def possible_combinations_in type
|
|
119
|
-
type.possible_combinations self
|
|
120
|
-
end
|
|
121
|
-
|
|
122
|
-
# Returns a token with the next similar text.
|
|
123
|
-
#
|
|
124
|
-
# TODO Rewrite this. It is hard to understand. Also spec performance.
|
|
125
|
-
#
|
|
126
|
-
def next_similar_token category
|
|
127
|
-
token = self.dup
|
|
128
|
-
token if token.next_similar category.bundle_for(token)
|
|
129
|
-
end
|
|
130
|
-
# Sets and returns the next similar word.
|
|
131
|
-
#
|
|
132
|
-
# Note: Also overrides the original.
|
|
133
|
-
#
|
|
134
|
-
def next_similar bundle
|
|
135
|
-
@text = @original = (similarity(bundle).shift || return) if similar?
|
|
136
|
-
end
|
|
137
|
-
# Lazy similar reader.
|
|
138
|
-
#
|
|
139
|
-
def similarity bundle = nil
|
|
140
|
-
@similarity || @similarity = generate_similarity_for(bundle)
|
|
141
|
-
end
|
|
142
|
-
# Returns an enumerator that traverses over the similar.
|
|
143
|
-
#
|
|
144
|
-
# Note: The dup isn't too nice – since it is needed on account of the shift, above.
|
|
145
|
-
# (We avoid a StopIteration exception. Which of both is less evil?)
|
|
146
|
-
#
|
|
147
|
-
def generate_similarity_for bundle
|
|
148
|
-
bundle.similar(@text).dup || []
|
|
149
|
-
end
|
|
150
|
-
|
|
151
|
-
# Generates a solr term from this token.
|
|
152
|
-
#
|
|
153
|
-
# E.g. "name:heroes~0.75"
|
|
154
|
-
#
|
|
155
|
-
@@solr_fuzzy_mapping = {
|
|
156
|
-
1 => :'',
|
|
157
|
-
2 => :'',
|
|
158
|
-
3 => :'',
|
|
159
|
-
4 => :'~0.74',
|
|
160
|
-
5 => :'~0.78',
|
|
161
|
-
6 => :'~0.81',
|
|
162
|
-
7 => :'~0.83',
|
|
163
|
-
8 => :'~0.85',
|
|
164
|
-
9 => :'~0.87',
|
|
165
|
-
10 => :'~0.89'
|
|
166
|
-
}
|
|
167
|
-
@@solr_fuzzy_mapping.default = :'~0.9'
|
|
168
|
-
def to_solr
|
|
169
|
-
blank? ? '' : (to_s + @@solr_fuzzy_mapping[@text.size].to_s)
|
|
170
|
-
end
|
|
171
|
-
|
|
172
|
-
#
|
|
173
|
-
#
|
|
174
|
-
def to_result
|
|
175
|
-
[@original, @text]
|
|
176
|
-
end
|
|
177
|
-
|
|
178
|
-
# Displays the qualifier text and the text, joined.
|
|
179
|
-
#
|
|
180
|
-
# e.g. name:meier
|
|
181
|
-
#
|
|
182
|
-
def to_s
|
|
183
|
-
[@qualifier, @text].compact.join ':'
|
|
184
|
-
end
|
|
185
|
-
|
|
186
|
-
private
|
|
187
|
-
|
|
188
|
-
# Splits text into a qualifier and text.
|
|
189
|
-
#
|
|
190
|
-
# Returns [qualifier, text].
|
|
191
|
-
#
|
|
192
|
-
def split unqualified_text
|
|
193
|
-
qualifier, text = (unqualified_text || '').split(':', 2)
|
|
194
|
-
if text.blank?
|
|
195
|
-
[nil, (qualifier || '')]
|
|
196
|
-
else
|
|
197
|
-
[qualifier, text]
|
|
198
|
-
end
|
|
199
|
-
end
|
|
200
|
-
|
|
201
|
-
end
|
|
202
|
-
end
|
data/lib/picky/query/tokens.rb
DELETED
|
@@ -1,86 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
#
|
|
6
|
-
module Query
|
|
7
|
-
|
|
8
|
-
# This class primarily handles switching through similar token constellations.
|
|
9
|
-
#
|
|
10
|
-
class Tokens # :nodoc:all
|
|
11
|
-
|
|
12
|
-
# Basically delegates to its internal tokens array.
|
|
13
|
-
#
|
|
14
|
-
self.delegate *[Enumerable.instance_methods, :slice!, :[], :uniq!, :last, :reject!, :length, :size, :empty?, :each, :exit, { :to => :@tokens }].flatten
|
|
15
|
-
|
|
16
|
-
#
|
|
17
|
-
#
|
|
18
|
-
def initialize tokens = []
|
|
19
|
-
@tokens = tokens
|
|
20
|
-
end
|
|
21
|
-
|
|
22
|
-
#
|
|
23
|
-
#
|
|
24
|
-
def tokenize_with tokenizer
|
|
25
|
-
@tokens.each { |token| token.tokenize_with(tokenizer) }
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# Generates an array in the form of
|
|
29
|
-
# [
|
|
30
|
-
# [combination], # of token 1
|
|
31
|
-
# [combination, combination, combination], # of token 2
|
|
32
|
-
# [combination, combination] # of token 3
|
|
33
|
-
# ]
|
|
34
|
-
#
|
|
35
|
-
# TODO If we want token behaviour defined per Query, we can
|
|
36
|
-
# compact! here
|
|
37
|
-
#
|
|
38
|
-
def possible_combinations_in type
|
|
39
|
-
@tokens.inject([]) do |combinations, token|
|
|
40
|
-
combinations << token.possible_combinations_in(type)
|
|
41
|
-
end
|
|
42
|
-
# TODO compact! if ignore_unassigned_tokens
|
|
43
|
-
end
|
|
44
|
-
|
|
45
|
-
# Makes the last of the tokens partial.
|
|
46
|
-
#
|
|
47
|
-
def partialize_last
|
|
48
|
-
@tokens.last.partial = true unless empty?
|
|
49
|
-
end
|
|
50
|
-
|
|
51
|
-
# Caps the tokens to the maximum.
|
|
52
|
-
#
|
|
53
|
-
def cap maximum
|
|
54
|
-
@tokens.slice!(maximum..-1) if cap?(maximum)
|
|
55
|
-
end
|
|
56
|
-
def cap? maximum
|
|
57
|
-
@tokens.size > maximum
|
|
58
|
-
end
|
|
59
|
-
|
|
60
|
-
# Rejects blank tokens.
|
|
61
|
-
#
|
|
62
|
-
def reject
|
|
63
|
-
@tokens.reject! &:blank?
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# Returns a solr query.
|
|
67
|
-
#
|
|
68
|
-
def to_solr_query
|
|
69
|
-
@tokens.map(&:to_solr).join ' '
|
|
70
|
-
end
|
|
71
|
-
|
|
72
|
-
#
|
|
73
|
-
#
|
|
74
|
-
def originals
|
|
75
|
-
@tokens.map(&:original)
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
# Just join the token original texts.
|
|
79
|
-
#
|
|
80
|
-
def to_s
|
|
81
|
-
originals.join ' '
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
end
|
|
85
|
-
|
|
86
|
-
end
|