picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
data/lib/picky/query/weigher.rb
DELETED
|
@@ -1,165 +0,0 @@
|
|
|
1
|
-
module Query
|
|
2
|
-
|
|
3
|
-
# Weighs the given tokens, generates Allocations -> Allocation -> Combinations.
|
|
4
|
-
#
|
|
5
|
-
class Weigher # :nodoc:all
|
|
6
|
-
|
|
7
|
-
attr_reader :indexes
|
|
8
|
-
|
|
9
|
-
# A weigher has a number of typed indexes, for which it generates allocations.
|
|
10
|
-
#
|
|
11
|
-
def initialize types
|
|
12
|
-
@indexes = types
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
#
|
|
16
|
-
#
|
|
17
|
-
def allocations_for tokens
|
|
18
|
-
Allocations.new(indexes.inject([]) do |previous_allocations, index|
|
|
19
|
-
# Expand the combinations.
|
|
20
|
-
#
|
|
21
|
-
possible_combinations = tokens.possible_combinations_in index
|
|
22
|
-
|
|
23
|
-
# Optimization for ignoring tokens that allocate to nothing and
|
|
24
|
-
# can be ignored.
|
|
25
|
-
# For example in a special search, where "florian" is not
|
|
26
|
-
# mapped to city, zip, or category.
|
|
27
|
-
#
|
|
28
|
-
possible_combinations.compact!
|
|
29
|
-
expanded_combinations = expand_combinations_from possible_combinations
|
|
30
|
-
|
|
31
|
-
#
|
|
32
|
-
#
|
|
33
|
-
next previous_allocations if expanded_combinations.empty?
|
|
34
|
-
|
|
35
|
-
# The recombination part, where
|
|
36
|
-
# [
|
|
37
|
-
# [a,a,b,b,c,c]
|
|
38
|
-
# [d,e,d,e,d,e]
|
|
39
|
-
# ]
|
|
40
|
-
# becomes
|
|
41
|
-
# [
|
|
42
|
-
# [a,d],
|
|
43
|
-
# [a,e],
|
|
44
|
-
# [b,d],
|
|
45
|
-
# [b,e],
|
|
46
|
-
# [c,d],
|
|
47
|
-
# [c,e]
|
|
48
|
-
# ]
|
|
49
|
-
#
|
|
50
|
-
# TODO Use transpose?
|
|
51
|
-
#
|
|
52
|
-
expanded_combinations = expanded_combinations.shift.zip *expanded_combinations
|
|
53
|
-
|
|
54
|
-
# Wrap into a real combination.
|
|
55
|
-
#
|
|
56
|
-
# expanded_combinations.map! { |expanded_combination| Combinations.new(expanded_combination).pack_into_allocation(index.result_identifier) }
|
|
57
|
-
|
|
58
|
-
# Add the possible allocations to the ones we already have.
|
|
59
|
-
#
|
|
60
|
-
# previous_allocations + expanded_combinations.map(&:pack_into_allocation)
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
# Add the wrapped possible allocations to the ones we already have.
|
|
64
|
-
#
|
|
65
|
-
previous_allocations + expanded_combinations.map! do |expanded_combination|
|
|
66
|
-
Combinations.new(expanded_combination).pack_into_allocation(index.result_identifier) # TODO Do not extract result_identifier. Remove pack_into_allocation.
|
|
67
|
-
end
|
|
68
|
-
end)
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# This is the core of the search engine.
|
|
72
|
-
#
|
|
73
|
-
# Gets an array of
|
|
74
|
-
# [
|
|
75
|
-
# [<combinations for token1>],
|
|
76
|
-
# [<combinations for token2>],
|
|
77
|
-
# [<combinations for token3>]
|
|
78
|
-
# ]
|
|
79
|
-
#
|
|
80
|
-
# Generates all possible allocations of combinations.
|
|
81
|
-
# [
|
|
82
|
-
# [first combination of token1, first c of t2, first c of t3],
|
|
83
|
-
# [first combination of token1, first c of t2, second c of t3]
|
|
84
|
-
# ...
|
|
85
|
-
# ]
|
|
86
|
-
#
|
|
87
|
-
# Generates all possible combinations of array elements:
|
|
88
|
-
# [1,2,3] x [a,b,c] x [k,l,m] => [[1,a,k], [1,a,l], [1,a,m], [1,b,k], [1,b,l], [1,b,m], [1,c,k], ..., [3,c,m]]
|
|
89
|
-
# Note: Also calculates the weights and sorts them accordingly.
|
|
90
|
-
#
|
|
91
|
-
# Note: This is a heavily optimized ruby version.
|
|
92
|
-
#
|
|
93
|
-
# Works like this:
|
|
94
|
-
# [1,2,3], [a,b,c], [k,l,m] are expanded to
|
|
95
|
-
# group mult: 1
|
|
96
|
-
# <- single mult ->
|
|
97
|
-
# [1,1,1,1,1,1,1,1,1,2,2,2,2,2,2,2,2,2,3,3,3,3,3,3,3,3,3] = 27 elements
|
|
98
|
-
# group mult: 3
|
|
99
|
-
# <- -> s/m
|
|
100
|
-
# [a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c,a,a,a,b,b,b,c,c,c] = 27 elements
|
|
101
|
-
# group mult: 9
|
|
102
|
-
# <> s/m
|
|
103
|
-
# [k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m,k,l,m] = 27 elements
|
|
104
|
-
# The array elements are then combined by index (i.e. vertically) to get all combinations.
|
|
105
|
-
#
|
|
106
|
-
# Note: Of course I could split this method up into smaller
|
|
107
|
-
# ones, but I guess I am a bit sentimental.
|
|
108
|
-
#
|
|
109
|
-
def expand_combinations_from possible_combinations
|
|
110
|
-
# Generate the first multiplicator "with which" (well, not quite) to multiply the smallest amount of combinations.
|
|
111
|
-
#
|
|
112
|
-
# TODO How does this work if an element has size 0? Since below we account for size 0.
|
|
113
|
-
# Should we even continue if an element has size 0?
|
|
114
|
-
# This means one of the tokens cannot be allocated.
|
|
115
|
-
#
|
|
116
|
-
single_mult = possible_combinations.inject(1) { |total, combinations| total * combinations.size }
|
|
117
|
-
|
|
118
|
-
# Initialize a group multiplicator.
|
|
119
|
-
#
|
|
120
|
-
group_mult = 1
|
|
121
|
-
|
|
122
|
-
possible_combinations.reject!(&:empty?)
|
|
123
|
-
|
|
124
|
-
# The expanding part to line up the combinations
|
|
125
|
-
# for later combination in allocations.
|
|
126
|
-
#
|
|
127
|
-
possible_combinations.collect! do |combinations|
|
|
128
|
-
|
|
129
|
-
# Get the size of the combinations of the first token.
|
|
130
|
-
#
|
|
131
|
-
combinations_size = combinations.size
|
|
132
|
-
|
|
133
|
-
# Special case: If there is no combination for one of the tokens.
|
|
134
|
-
# In that case, we just use the same single mult for
|
|
135
|
-
# the next iteration.
|
|
136
|
-
# If there are combinations, we divide the single mult
|
|
137
|
-
# by the number of combinations.
|
|
138
|
-
#
|
|
139
|
-
single_mult /= combinations_size unless combinations_size.zero?
|
|
140
|
-
|
|
141
|
-
# Expand each combination by the single mult:
|
|
142
|
-
# [a,b,c]
|
|
143
|
-
# [a,a,a, b,b,b, c,c,c]
|
|
144
|
-
# Then, expand the result by the group mult:
|
|
145
|
-
# [a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c, a,a,a,b,b,b,c,c,c]
|
|
146
|
-
#
|
|
147
|
-
combinations = combinations.inject([]) do |total, combination|
|
|
148
|
-
total + [combination]*single_mult
|
|
149
|
-
end * group_mult
|
|
150
|
-
|
|
151
|
-
# Multiply the group mult by the combinations size,
|
|
152
|
-
# since the next combinations' single mult is smaller
|
|
153
|
-
# and we need to adjust for that.
|
|
154
|
-
#
|
|
155
|
-
group_mult = group_mult * combinations_size
|
|
156
|
-
|
|
157
|
-
# Return the combinations.
|
|
158
|
-
#
|
|
159
|
-
combinations
|
|
160
|
-
end
|
|
161
|
-
end
|
|
162
|
-
|
|
163
|
-
end
|
|
164
|
-
|
|
165
|
-
end
|
data/lib/picky/results/base.rb
DELETED
|
@@ -1,102 +0,0 @@
|
|
|
1
|
-
module Results # :nodoc:all
|
|
2
|
-
|
|
3
|
-
# This is the internal results object. Usually, to_marshal, or to_json
|
|
4
|
-
# is called on it to get a string for the answer.
|
|
5
|
-
#
|
|
6
|
-
class Base
|
|
7
|
-
|
|
8
|
-
# Duration is set externally by the query.
|
|
9
|
-
#
|
|
10
|
-
attr_writer :duration
|
|
11
|
-
attr_reader :allocations, :offset
|
|
12
|
-
|
|
13
|
-
# Takes instances of Query::Allocations as param.
|
|
14
|
-
#
|
|
15
|
-
def initialize offset = 0, allocations = Query::Allocations.new
|
|
16
|
-
@offset = offset
|
|
17
|
-
@allocations = allocations # || Query::Allocations.new
|
|
18
|
-
end
|
|
19
|
-
# Create new results and calculate the ids.
|
|
20
|
-
#
|
|
21
|
-
def self.from offset, allocations
|
|
22
|
-
results = new offset, allocations
|
|
23
|
-
results.prepare!
|
|
24
|
-
results
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
#
|
|
28
|
-
#
|
|
29
|
-
def serialize
|
|
30
|
-
{ allocations: allocations.to_result,
|
|
31
|
-
offset: offset,
|
|
32
|
-
duration: duration,
|
|
33
|
-
total: total }
|
|
34
|
-
end
|
|
35
|
-
# The default format is json.
|
|
36
|
-
#
|
|
37
|
-
def to_response options = {}
|
|
38
|
-
to_json options
|
|
39
|
-
end
|
|
40
|
-
# Convert to json format.
|
|
41
|
-
#
|
|
42
|
-
def to_json options = {}
|
|
43
|
-
serialize.to_json options
|
|
44
|
-
end
|
|
45
|
-
|
|
46
|
-
# This starts the actual processing.
|
|
47
|
-
#
|
|
48
|
-
# Without this, the allocations are not processed,
|
|
49
|
-
# and no ids are calculated.
|
|
50
|
-
#
|
|
51
|
-
def prepare!
|
|
52
|
-
allocations.process! self.max_results, self.offset
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Duration default is 0.
|
|
56
|
-
#
|
|
57
|
-
def duration
|
|
58
|
-
@duration || 0
|
|
59
|
-
end
|
|
60
|
-
# The total results. Delegates to the allocations.
|
|
61
|
-
#
|
|
62
|
-
# Caches.
|
|
63
|
-
#
|
|
64
|
-
def total
|
|
65
|
-
@total || @total = allocations.total || 0
|
|
66
|
-
end
|
|
67
|
-
|
|
68
|
-
# How many results are returned.
|
|
69
|
-
#
|
|
70
|
-
# Set in config using
|
|
71
|
-
# Results::Full.max_results = 20
|
|
72
|
-
#
|
|
73
|
-
class_inheritable_accessor :max_results
|
|
74
|
-
def max_results
|
|
75
|
-
self.class.max_results
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
# Convenience methods.
|
|
79
|
-
#
|
|
80
|
-
|
|
81
|
-
# Delegates to allocations.
|
|
82
|
-
#
|
|
83
|
-
def ids amount = 20
|
|
84
|
-
allocations.ids amount
|
|
85
|
-
end
|
|
86
|
-
# Gets an amout of random ids from the allocations.
|
|
87
|
-
#
|
|
88
|
-
# Note: Basically delegates to the allocations.
|
|
89
|
-
#
|
|
90
|
-
def random_ids amount = 1
|
|
91
|
-
allocations.random_ids amount
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
# Human readable log.
|
|
95
|
-
#
|
|
96
|
-
def to_log query
|
|
97
|
-
"|#{Time.now.to_s(:db)}|#{'%8f' % duration}|#{'%-50s' % query}|#{'%8d' % total}|#{'%4d' % offset}|#{'%2d' % allocations.size}|"
|
|
98
|
-
end
|
|
99
|
-
|
|
100
|
-
end
|
|
101
|
-
|
|
102
|
-
end
|
data/lib/picky/results/full.rb
DELETED
data/lib/picky/results/live.rb
DELETED
|
@@ -1,161 +0,0 @@
|
|
|
1
|
-
module Tokenizers # :nodoc:all
|
|
2
|
-
|
|
3
|
-
# Defines tokenizing processes used both in indexing and querying.
|
|
4
|
-
#
|
|
5
|
-
class Base
|
|
6
|
-
|
|
7
|
-
# TODO Move EMPTY_STRING top level.
|
|
8
|
-
#
|
|
9
|
-
EMPTY_STRING = ''.freeze
|
|
10
|
-
|
|
11
|
-
# Stopwords.
|
|
12
|
-
#
|
|
13
|
-
def stopwords regexp
|
|
14
|
-
@remove_stopwords_regexp = regexp
|
|
15
|
-
end
|
|
16
|
-
def remove_stopwords text
|
|
17
|
-
text.gsub! @remove_stopwords_regexp, EMPTY_STRING if @remove_stopwords_regexp
|
|
18
|
-
text
|
|
19
|
-
end
|
|
20
|
-
@@non_single_stopword_regexp = /^\b[\w:]+?\b[\.\*\~]?\s?$/
|
|
21
|
-
def remove_non_single_stopwords text
|
|
22
|
-
return text if text.match @@non_single_stopword_regexp
|
|
23
|
-
remove_stopwords text
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
# Illegals.
|
|
27
|
-
#
|
|
28
|
-
# TODO Should there be a legal?
|
|
29
|
-
#
|
|
30
|
-
def removes_characters regexp
|
|
31
|
-
@removes_characters_regexp = regexp
|
|
32
|
-
end
|
|
33
|
-
def remove_illegals text
|
|
34
|
-
text.gsub! @removes_characters_regexp, EMPTY_STRING if @removes_characters_regexp
|
|
35
|
-
text
|
|
36
|
-
end
|
|
37
|
-
|
|
38
|
-
# Splitting.
|
|
39
|
-
#
|
|
40
|
-
def splits_text_on regexp
|
|
41
|
-
@splits_text_on_regexp = regexp
|
|
42
|
-
end
|
|
43
|
-
def split text
|
|
44
|
-
text.split @splits_text_on_regexp
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Normalizing.
|
|
48
|
-
#
|
|
49
|
-
def normalizes_words regexp_replaces
|
|
50
|
-
@normalizes_words_regexp_replaces = regexp_replaces
|
|
51
|
-
end
|
|
52
|
-
def normalize_with_patterns text
|
|
53
|
-
return text unless @normalizes_words_regexp_replaces
|
|
54
|
-
|
|
55
|
-
@normalizes_words_regexp_replaces.each do |regex, replace|
|
|
56
|
-
# This should be sufficient
|
|
57
|
-
#
|
|
58
|
-
text.gsub!(regex, replace) and break
|
|
59
|
-
end
|
|
60
|
-
remove_after_normalizing_illegals text
|
|
61
|
-
text
|
|
62
|
-
end
|
|
63
|
-
|
|
64
|
-
# Illegal after normalizing.
|
|
65
|
-
#
|
|
66
|
-
def removes_characters_after_splitting regexp
|
|
67
|
-
@removes_characters_after_splitting_regexp = regexp
|
|
68
|
-
end
|
|
69
|
-
def remove_after_normalizing_illegals text
|
|
70
|
-
text.gsub! @removes_characters_after_splitting_regexp, EMPTY_STRING if @removes_characters_after_splitting_regexp
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
# Substitute Characters with this substituter.
|
|
74
|
-
#
|
|
75
|
-
# Default is European Character substitution.
|
|
76
|
-
#
|
|
77
|
-
def substitutes_characters_with substituter = CharacterSubstituters::WestEuropean.new
|
|
78
|
-
# TODO Raise if it doesn't quack substitute?
|
|
79
|
-
@substituter = substituter
|
|
80
|
-
end
|
|
81
|
-
def substitute_characters text
|
|
82
|
-
substituter?? substituter.substitute(text) : text
|
|
83
|
-
end
|
|
84
|
-
|
|
85
|
-
# Reject tokens after tokenizing based on the given criteria.
|
|
86
|
-
#
|
|
87
|
-
# Note: Currently only for indexing. TODO Redesign and write for both!
|
|
88
|
-
#
|
|
89
|
-
def reject_token_if &condition
|
|
90
|
-
@reject_condition = condition
|
|
91
|
-
end
|
|
92
|
-
def reject tokens
|
|
93
|
-
tokens.reject! &@reject_condition
|
|
94
|
-
end
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
# Returns a number of tokens, generated from the given text.
|
|
98
|
-
#
|
|
99
|
-
# Note:
|
|
100
|
-
# * preprocess, pretokenize are hooks
|
|
101
|
-
#
|
|
102
|
-
def tokenize text
|
|
103
|
-
text = preprocess text # processing the text
|
|
104
|
-
return empty_tokens if text.blank?
|
|
105
|
-
words = pretokenize text # splitting and preparations for tokenizing
|
|
106
|
-
return empty_tokens if words.empty?
|
|
107
|
-
tokens = tokens_for words # creating tokens / strings
|
|
108
|
-
process tokens # processing tokens / strings
|
|
109
|
-
end
|
|
110
|
-
|
|
111
|
-
attr_reader :substituter
|
|
112
|
-
alias substituter? substituter
|
|
113
|
-
|
|
114
|
-
def initialize options = {}
|
|
115
|
-
removes_characters options[:removes_characters] if options[:removes_characters]
|
|
116
|
-
contracts_expressions *options[:contracts_expressions] if options[:contracts_expressions]
|
|
117
|
-
stopwords options[:stopwords] if options[:stopwords]
|
|
118
|
-
normalizes_words options[:normalizes_words] if options[:normalizes_words]
|
|
119
|
-
removes_characters_after_splitting options[:removes_characters_after_splitting] if options[:removes_characters_after_splitting]
|
|
120
|
-
substitutes_characters_with options[:substitutes_characters_with] if options[:substitutes_characters_with]
|
|
121
|
-
|
|
122
|
-
# Defaults.
|
|
123
|
-
#
|
|
124
|
-
splits_text_on options[:splits_text_on] || /\s/
|
|
125
|
-
reject_token_if &(options[:reject_token_if] || :blank?)
|
|
126
|
-
end
|
|
127
|
-
|
|
128
|
-
# Hooks.
|
|
129
|
-
#
|
|
130
|
-
|
|
131
|
-
# Preprocessing.
|
|
132
|
-
#
|
|
133
|
-
def preprocess text; end
|
|
134
|
-
# Pretokenizing.
|
|
135
|
-
#
|
|
136
|
-
def pretokenize text; end
|
|
137
|
-
# Postprocessing.
|
|
138
|
-
#
|
|
139
|
-
def process tokens
|
|
140
|
-
reject tokens # Reject any tokens that don't meet criteria
|
|
141
|
-
tokens
|
|
142
|
-
end
|
|
143
|
-
|
|
144
|
-
# Converts words into real tokens.
|
|
145
|
-
#
|
|
146
|
-
def tokens_for words
|
|
147
|
-
::Query::Tokens.new words.collect! { |word| token_for word }
|
|
148
|
-
end
|
|
149
|
-
# Turns non-blank text into symbols.
|
|
150
|
-
#
|
|
151
|
-
def symbolize text
|
|
152
|
-
text.blank? ? nil : text.to_sym
|
|
153
|
-
end
|
|
154
|
-
# Returns a tokens object.
|
|
155
|
-
#
|
|
156
|
-
def empty_tokens
|
|
157
|
-
::Query::Tokens.new
|
|
158
|
-
end
|
|
159
|
-
|
|
160
|
-
end
|
|
161
|
-
end
|
|
@@ -1,58 +0,0 @@
|
|
|
1
|
-
module Tokenizers
|
|
2
|
-
|
|
3
|
-
# The base indexing tokenizer.
|
|
4
|
-
#
|
|
5
|
-
# Override in indexing subclasses and define in configuration.
|
|
6
|
-
#
|
|
7
|
-
class Index < Base
|
|
8
|
-
|
|
9
|
-
def self.default= new_default
|
|
10
|
-
@default = new_default
|
|
11
|
-
end
|
|
12
|
-
def self.default
|
|
13
|
-
@default ||= new
|
|
14
|
-
end
|
|
15
|
-
|
|
16
|
-
# Default indexing preprocessing hook.
|
|
17
|
-
#
|
|
18
|
-
# Does:
|
|
19
|
-
# 1. Character substitution.
|
|
20
|
-
# 2. Downcasing.
|
|
21
|
-
# 3. Remove illegal expressions.
|
|
22
|
-
# 4. Remove non-single stopwords. (Stopwords that occur with other words)
|
|
23
|
-
#
|
|
24
|
-
def preprocess text
|
|
25
|
-
text = substitute_characters text
|
|
26
|
-
text.downcase!
|
|
27
|
-
remove_illegals text
|
|
28
|
-
# we do not remove single stopwords for an entirely different
|
|
29
|
-
# reason than in the query tokenizer.
|
|
30
|
-
# An indexed thing with just name "UND" (a possible stopword) should not lose its name.
|
|
31
|
-
#
|
|
32
|
-
remove_non_single_stopwords text
|
|
33
|
-
text
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Default indexing pretokenizing hook.
|
|
37
|
-
#
|
|
38
|
-
# Does:
|
|
39
|
-
# 1. Split the text into words.
|
|
40
|
-
# 2. Normalize each word.
|
|
41
|
-
#
|
|
42
|
-
def pretokenize text
|
|
43
|
-
words = split text
|
|
44
|
-
words.collect! do |word|
|
|
45
|
-
normalize_with_patterns word
|
|
46
|
-
word
|
|
47
|
-
end
|
|
48
|
-
end
|
|
49
|
-
|
|
50
|
-
# Does not actually return a token, but a
|
|
51
|
-
# symbol "token".
|
|
52
|
-
#
|
|
53
|
-
def token_for text
|
|
54
|
-
symbolize text
|
|
55
|
-
end
|
|
56
|
-
|
|
57
|
-
end
|
|
58
|
-
end
|
|
@@ -1,74 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Tokenizers
|
|
4
|
-
|
|
5
|
-
# There are a few class methods that you can use to configure how a query works.
|
|
6
|
-
#
|
|
7
|
-
# removes_characters regexp
|
|
8
|
-
# illegal_after_normalizing regexp
|
|
9
|
-
# stopwords regexp
|
|
10
|
-
# contracts_expressions regexp, to_string
|
|
11
|
-
# splits_text_on regexp
|
|
12
|
-
# normalizes_words [[/regexp1/, 'replacement1'], [/regexp2/, 'replacement2']]
|
|
13
|
-
#
|
|
14
|
-
class Query < Base
|
|
15
|
-
|
|
16
|
-
def self.default= new_default
|
|
17
|
-
@default = new_default
|
|
18
|
-
end
|
|
19
|
-
def self.default
|
|
20
|
-
@default ||= new
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
attr_reader :maximum_tokens
|
|
24
|
-
|
|
25
|
-
def initialize options = {}
|
|
26
|
-
super options
|
|
27
|
-
@maximum_tokens = options[:maximum_tokens] || 5
|
|
28
|
-
end
|
|
29
|
-
|
|
30
|
-
def preprocess text
|
|
31
|
-
remove_illegals text # Remove illegal characters
|
|
32
|
-
remove_non_single_stopwords text # remove stop words
|
|
33
|
-
text
|
|
34
|
-
end
|
|
35
|
-
|
|
36
|
-
# Split the text and put some back together.
|
|
37
|
-
#
|
|
38
|
-
# TODO Make the same as in indexing?
|
|
39
|
-
#
|
|
40
|
-
def pretokenize text
|
|
41
|
-
split text
|
|
42
|
-
end
|
|
43
|
-
|
|
44
|
-
# Let each token process itself.
|
|
45
|
-
# Reject, limit, and partialize tokens.
|
|
46
|
-
#
|
|
47
|
-
def process tokens
|
|
48
|
-
tokens.tokenize_with self
|
|
49
|
-
tokens.reject # Reject any tokens that don't meet criteria
|
|
50
|
-
tokens.cap maximum_tokens # Cut off superfluous tokens
|
|
51
|
-
tokens.partialize_last # Set certain tokens as partial
|
|
52
|
-
tokens
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Called by the token.
|
|
56
|
-
#
|
|
57
|
-
# TODO Perhaps move to Normalizer?
|
|
58
|
-
#
|
|
59
|
-
def normalize text
|
|
60
|
-
text = substitute_characters text # Substitute special characters
|
|
61
|
-
text.downcase! # Downcase all text
|
|
62
|
-
normalize_with_patterns text # normalize
|
|
63
|
-
text.to_sym # symbolize
|
|
64
|
-
end
|
|
65
|
-
|
|
66
|
-
# Returns a token for a word.
|
|
67
|
-
# The basic query tokenizer uses new tokens.
|
|
68
|
-
#
|
|
69
|
-
def token_for word
|
|
70
|
-
::Query::Token.processed word
|
|
71
|
-
end
|
|
72
|
-
|
|
73
|
-
end
|
|
74
|
-
end
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
describe Cacher::Partial::Default do
|
|
4
|
-
|
|
5
|
-
it "should be a subtoken" do
|
|
6
|
-
Cacher::Partial::Default.should be_kind_of(Cacher::Partial::Substring)
|
|
7
|
-
end
|
|
8
|
-
it "should be a the right down to" do
|
|
9
|
-
Cacher::Partial::Default.from.should == -3
|
|
10
|
-
end
|
|
11
|
-
it "should be a the right starting at" do
|
|
12
|
-
Cacher::Partial::Default.to.should == -1
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
end
|
|
@@ -1,17 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
describe Cacher::Partial::None do
|
|
4
|
-
|
|
5
|
-
it "has the right superclass" do
|
|
6
|
-
Cacher::Partial::None.should < Cacher::Partial::Strategy
|
|
7
|
-
end
|
|
8
|
-
it "returns an empty index" do
|
|
9
|
-
Cacher::Partial::None.new.generate_from(:unimportant).should == {}
|
|
10
|
-
end
|
|
11
|
-
describe 'use_exact_for_partial?' do
|
|
12
|
-
it 'returns true' do
|
|
13
|
-
Cacher::Partial::None.new.use_exact_for_partial?.should == true
|
|
14
|
-
end
|
|
15
|
-
end
|
|
16
|
-
|
|
17
|
-
end
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
require 'spec_helper'
|
|
2
|
-
|
|
3
|
-
describe Cacher::WeightsGenerator do
|
|
4
|
-
|
|
5
|
-
context 'integration' do
|
|
6
|
-
it 'should generate the correct values' do
|
|
7
|
-
generator = Cacher::WeightsGenerator.new :a => Array.new(0),
|
|
8
|
-
:b => Array.new(1),
|
|
9
|
-
:c => Array.new(10),
|
|
10
|
-
:d => Array.new(100),
|
|
11
|
-
:e => Array.new(1000)
|
|
12
|
-
|
|
13
|
-
result = generator.generate
|
|
14
|
-
|
|
15
|
-
result[:c].should be_close 2.3, 0.011
|
|
16
|
-
result[:d].should be_close 4.6, 0.011
|
|
17
|
-
result[:e].should be_close 6.9, 0.011
|
|
18
|
-
end
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
end
|