picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
module Cacher
|
|
2
|
-
|
|
3
|
-
module Partial
|
|
4
|
-
|
|
5
|
-
# Does not generate a partial index.
|
|
6
|
-
#
|
|
7
|
-
class None < Strategy
|
|
8
|
-
|
|
9
|
-
# Returns an empty index.
|
|
10
|
-
#
|
|
11
|
-
def generate_from index
|
|
12
|
-
{}
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
# Returns if this strategy's generated file is saved.
|
|
16
|
-
#
|
|
17
|
-
def saved?
|
|
18
|
-
false
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
# Do not use the partial bundle for getting ids and weights.
|
|
22
|
-
#
|
|
23
|
-
def use_exact_for_partial?
|
|
24
|
-
true
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
end
|
|
@@ -1,21 +0,0 @@
|
|
|
1
|
-
module Cacher
|
|
2
|
-
module Partial
|
|
3
|
-
# Superclass for partial strategies.
|
|
4
|
-
#
|
|
5
|
-
class Strategy < Cacher::Strategy
|
|
6
|
-
|
|
7
|
-
# Defines whether to use the exact bundle
|
|
8
|
-
# instead of the partial one.
|
|
9
|
-
#
|
|
10
|
-
# Default is @false@.
|
|
11
|
-
#
|
|
12
|
-
# For example:
|
|
13
|
-
# Partial::None.new # Uses the exact index instead of the partial one.
|
|
14
|
-
#
|
|
15
|
-
def use_exact_for_partial?
|
|
16
|
-
false
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
end
|
|
20
|
-
end
|
|
21
|
-
end
|
|
@@ -1,118 +0,0 @@
|
|
|
1
|
-
module Cacher
|
|
2
|
-
|
|
3
|
-
module Partial
|
|
4
|
-
|
|
5
|
-
# Generates the right substrings for use in the substring strategy.
|
|
6
|
-
#
|
|
7
|
-
class SubstringGenerator
|
|
8
|
-
|
|
9
|
-
attr_reader :from, :to
|
|
10
|
-
|
|
11
|
-
def initialize from, to
|
|
12
|
-
@from, @to = from, to
|
|
13
|
-
|
|
14
|
-
if @to.zero?
|
|
15
|
-
def each_subtoken token, &block
|
|
16
|
-
token.each_subtoken @from, &block
|
|
17
|
-
end
|
|
18
|
-
else
|
|
19
|
-
def each_subtoken token, &block
|
|
20
|
-
token[0..@to].intern.each_subtoken @from, &block
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
|
|
24
|
-
end
|
|
25
|
-
|
|
26
|
-
end
|
|
27
|
-
|
|
28
|
-
# The subtoken partial strategy.
|
|
29
|
-
#
|
|
30
|
-
# If given "florian"
|
|
31
|
-
# it will index "floria", "flori", "flor", "flo", "fl", "f"
|
|
32
|
-
# (Depending on what the given from value is, the example is with option from: 1)
|
|
33
|
-
#
|
|
34
|
-
class Substring < Strategy
|
|
35
|
-
|
|
36
|
-
# The from option signifies where in the symbol it
|
|
37
|
-
# will start in generating the subtokens.
|
|
38
|
-
#
|
|
39
|
-
# Examples:
|
|
40
|
-
#
|
|
41
|
-
# With :hello, and to: -1 (default)
|
|
42
|
-
# * from: 1 # => [:hello, :hell, :hel, :he, :h]
|
|
43
|
-
# * from: 4 # => [:hello, :hell]
|
|
44
|
-
#
|
|
45
|
-
# With :hello, and to: -2
|
|
46
|
-
# * from: 1 # => [:hell, :hel, :he, :h]
|
|
47
|
-
# * from: 4 # => [:hell]
|
|
48
|
-
#
|
|
49
|
-
def initialize options = {}
|
|
50
|
-
from = options[:from] || 1
|
|
51
|
-
to = options[:to] || -1
|
|
52
|
-
@generator = SubstringGenerator.new from, to
|
|
53
|
-
end
|
|
54
|
-
|
|
55
|
-
# Delegator to generator#from.
|
|
56
|
-
#
|
|
57
|
-
def from
|
|
58
|
-
@generator.from
|
|
59
|
-
end
|
|
60
|
-
|
|
61
|
-
# Delegator to generator#to.
|
|
62
|
-
#
|
|
63
|
-
def to
|
|
64
|
-
@generator.to
|
|
65
|
-
end
|
|
66
|
-
|
|
67
|
-
# Generates a partial index from the given index.
|
|
68
|
-
#
|
|
69
|
-
def generate_from index
|
|
70
|
-
result = {}
|
|
71
|
-
|
|
72
|
-
# Generate for each key token the subtokens.
|
|
73
|
-
#
|
|
74
|
-
i = 0
|
|
75
|
-
index.each_key do |token|
|
|
76
|
-
i += 1
|
|
77
|
-
if i == 5000
|
|
78
|
-
timed_exclaim "Generating partial tokens for token #{token}. This appears every 5000 tokens."
|
|
79
|
-
i = 0
|
|
80
|
-
end
|
|
81
|
-
generate_for token, index, result
|
|
82
|
-
end
|
|
83
|
-
|
|
84
|
-
# Remove duplicate ids.
|
|
85
|
-
#
|
|
86
|
-
# THINK If it is unique for a subtoken, it is
|
|
87
|
-
# unique for all derived longer tokens.
|
|
88
|
-
#
|
|
89
|
-
result.each_value &:uniq!
|
|
90
|
-
|
|
91
|
-
result
|
|
92
|
-
end
|
|
93
|
-
|
|
94
|
-
private
|
|
95
|
-
|
|
96
|
-
# To each shortened token of :test
|
|
97
|
-
# :test, :tes, :te, :t
|
|
98
|
-
# add all ids of :test
|
|
99
|
-
#
|
|
100
|
-
# "token" here means just text.
|
|
101
|
-
#
|
|
102
|
-
# THINK Could be improved by appending the aforegoing ids?
|
|
103
|
-
#
|
|
104
|
-
def generate_for token, index, result
|
|
105
|
-
@generator.each_subtoken(token) do |subtoken|
|
|
106
|
-
if result[subtoken]
|
|
107
|
-
result[subtoken] += index[token] # unique
|
|
108
|
-
else
|
|
109
|
-
result[subtoken] = index[token].dup
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
end
|
|
113
|
-
|
|
114
|
-
end
|
|
115
|
-
|
|
116
|
-
end
|
|
117
|
-
|
|
118
|
-
end
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
module Cacher
|
|
2
|
-
|
|
3
|
-
# The partial generator uses a subtoken(downto:1) generator as default.
|
|
4
|
-
#
|
|
5
|
-
class PartialGenerator < Generator
|
|
6
|
-
|
|
7
|
-
# Generate a partial index based on the given index.
|
|
8
|
-
#
|
|
9
|
-
def generate strategy = Partial::Substring.new(from: 1)
|
|
10
|
-
strategy.generate_from self.index
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
end
|
|
@@ -1,77 +0,0 @@
|
|
|
1
|
-
# encoding: utf-8
|
|
2
|
-
#
|
|
3
|
-
module Cacher
|
|
4
|
-
|
|
5
|
-
module Similarity
|
|
6
|
-
|
|
7
|
-
# DoubleLevensthone means that it's a combination of
|
|
8
|
-
# * DoubleMetaphone
|
|
9
|
-
# and
|
|
10
|
-
# * Levenshtein
|
|
11
|
-
# :)
|
|
12
|
-
#
|
|
13
|
-
class DoubleLevenshtone < Strategy
|
|
14
|
-
|
|
15
|
-
attr_reader :amount
|
|
16
|
-
|
|
17
|
-
#
|
|
18
|
-
#
|
|
19
|
-
def initialize amount = 10
|
|
20
|
-
@amount = amount
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Encodes the given symbol.
|
|
24
|
-
#
|
|
25
|
-
# Returns a symbol.
|
|
26
|
-
#
|
|
27
|
-
def encoded sym
|
|
28
|
-
codes = Text::Metaphone.double_metaphone sym.to_s
|
|
29
|
-
codes.first.to_sym unless codes.empty?
|
|
30
|
-
end
|
|
31
|
-
|
|
32
|
-
# Generates an index for the given index (in exact index style).
|
|
33
|
-
#
|
|
34
|
-
# In the following form:
|
|
35
|
-
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
|
36
|
-
#
|
|
37
|
-
def generate_from index
|
|
38
|
-
hash = hashify index.keys
|
|
39
|
-
sort hash
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
private
|
|
43
|
-
|
|
44
|
-
# Sorts the index values in place.
|
|
45
|
-
#
|
|
46
|
-
def sort index
|
|
47
|
-
index.each_pair.each do |code, ary|
|
|
48
|
-
ary.sort_by_levenshtein! code
|
|
49
|
-
ary.slice! amount, ary.size # size is not perfectly correct, but anyway
|
|
50
|
-
end
|
|
51
|
-
index
|
|
52
|
-
end
|
|
53
|
-
|
|
54
|
-
# Hashifies a list of symbols.
|
|
55
|
-
#
|
|
56
|
-
# Where:
|
|
57
|
-
# { encoded_sym => [syms] }
|
|
58
|
-
#
|
|
59
|
-
def hashify list
|
|
60
|
-
list.inject({}) do |total, element|
|
|
61
|
-
if code = encoded(element)
|
|
62
|
-
total[code] ||= []
|
|
63
|
-
total[code] << element
|
|
64
|
-
end
|
|
65
|
-
total
|
|
66
|
-
end
|
|
67
|
-
end
|
|
68
|
-
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
# ... aka Phonetic.
|
|
72
|
-
#
|
|
73
|
-
Phonetic = DoubleLevenshtone
|
|
74
|
-
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
end
|
|
@@ -1,31 +0,0 @@
|
|
|
1
|
-
module Cacher
|
|
2
|
-
|
|
3
|
-
module Similarity
|
|
4
|
-
|
|
5
|
-
# Similarity strategy that does nothing.
|
|
6
|
-
#
|
|
7
|
-
class None < Strategy
|
|
8
|
-
|
|
9
|
-
# Does not encode text. Just returns nil.
|
|
10
|
-
#
|
|
11
|
-
def encoded text
|
|
12
|
-
nil
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
# Returns an empty index.
|
|
16
|
-
#
|
|
17
|
-
def generate_from index
|
|
18
|
-
{}
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
# Returns if this strategy's generated file is saved.
|
|
22
|
-
#
|
|
23
|
-
def saved?
|
|
24
|
-
false
|
|
25
|
-
end
|
|
26
|
-
|
|
27
|
-
end
|
|
28
|
-
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
end
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
module Cacher
|
|
2
|
-
|
|
3
|
-
# Uses no similarity as default.
|
|
4
|
-
#
|
|
5
|
-
class SimilarityGenerator < Generator
|
|
6
|
-
|
|
7
|
-
# Generate a similarity index based on the given index.
|
|
8
|
-
#
|
|
9
|
-
def generate strategy = Similarity::None.new
|
|
10
|
-
strategy.generate_from self.index
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
end
|
|
@@ -1,39 +0,0 @@
|
|
|
1
|
-
module Cacher
|
|
2
|
-
|
|
3
|
-
module Weights
|
|
4
|
-
|
|
5
|
-
# Uses a logarithmic weight.
|
|
6
|
-
# If for a key k we have x ids, the weight is:
|
|
7
|
-
# w(x): log(x)
|
|
8
|
-
# Special case: If x < 1, then we use 0.
|
|
9
|
-
#
|
|
10
|
-
class Logarithmic < Strategy
|
|
11
|
-
|
|
12
|
-
# Generates a partial index from the given index.
|
|
13
|
-
#
|
|
14
|
-
def generate_from index
|
|
15
|
-
index.inject({}) do |hash, text_ids|
|
|
16
|
-
text, ids = *text_ids
|
|
17
|
-
weight = weight_for ids.size
|
|
18
|
-
hash[text] ||= weight.round(2) if weight
|
|
19
|
-
hash
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
|
|
23
|
-
# Sets the weight value.
|
|
24
|
-
#
|
|
25
|
-
# If the size is 0 or one, we would get -Infinity or 0.0.
|
|
26
|
-
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
|
27
|
-
#
|
|
28
|
-
# BUT: We need the value, even if 0. To designate that there is a weight!
|
|
29
|
-
#
|
|
30
|
-
def weight_for amount
|
|
31
|
-
return 0 if amount < 1
|
|
32
|
-
Math.log amount
|
|
33
|
-
end
|
|
34
|
-
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
end
|
|
38
|
-
|
|
39
|
-
end
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
module Cacher
|
|
2
|
-
|
|
3
|
-
# Uses a logarithmic algorithm as default.
|
|
4
|
-
#
|
|
5
|
-
class WeightsGenerator < Generator
|
|
6
|
-
|
|
7
|
-
# Generate a weights index based on the given index.
|
|
8
|
-
#
|
|
9
|
-
def generate strategy = Weights::Logarithmic.new
|
|
10
|
-
strategy.generate_from self.index
|
|
11
|
-
end
|
|
12
|
-
|
|
13
|
-
end
|
|
14
|
-
|
|
15
|
-
end
|
|
@@ -1,150 +0,0 @@
|
|
|
1
|
-
require 'rack/mount'
|
|
2
|
-
|
|
3
|
-
module FrontendAdapters
|
|
4
|
-
|
|
5
|
-
# TODO Rename to Routing again. Push everything back into appropriate Adapters.
|
|
6
|
-
#
|
|
7
|
-
class Rack # :nodoc:all
|
|
8
|
-
|
|
9
|
-
@@defaults = {
|
|
10
|
-
query_key: 'query'.freeze,
|
|
11
|
-
offset_key: 'offset'.freeze,
|
|
12
|
-
content_type: 'application/octet-stream'.freeze # TODO Wrong.
|
|
13
|
-
}
|
|
14
|
-
|
|
15
|
-
def initialize
|
|
16
|
-
@defaults = @@defaults.dup
|
|
17
|
-
end
|
|
18
|
-
|
|
19
|
-
#
|
|
20
|
-
#
|
|
21
|
-
def reset_routes
|
|
22
|
-
@routes = ::Rack::Mount::RouteSet.new
|
|
23
|
-
end
|
|
24
|
-
def routes
|
|
25
|
-
@routes || reset_routes
|
|
26
|
-
end
|
|
27
|
-
def finalize
|
|
28
|
-
routes.freeze
|
|
29
|
-
end
|
|
30
|
-
|
|
31
|
-
# Routing simply delegates to the route set to handle a request.
|
|
32
|
-
#
|
|
33
|
-
def call env
|
|
34
|
-
routes.call env
|
|
35
|
-
end
|
|
36
|
-
|
|
37
|
-
# API method.
|
|
38
|
-
#
|
|
39
|
-
def route options = {}
|
|
40
|
-
mappings, route_options = split options
|
|
41
|
-
mappings.each do |url, query|
|
|
42
|
-
route_one url, query, route_options
|
|
43
|
-
end
|
|
44
|
-
end
|
|
45
|
-
# Splits the route method options
|
|
46
|
-
# into real options and route options (/regexp/ => thing or 'some/path' => thing).
|
|
47
|
-
#
|
|
48
|
-
def split options
|
|
49
|
-
mappings = {}
|
|
50
|
-
route_options = {}
|
|
51
|
-
options.each_pair do |key, value|
|
|
52
|
-
if Regexp === key or String === key
|
|
53
|
-
mappings[key] = value
|
|
54
|
-
else
|
|
55
|
-
route_options[key] = value
|
|
56
|
-
end
|
|
57
|
-
end
|
|
58
|
-
[mappings, route_options]
|
|
59
|
-
end
|
|
60
|
-
def route_one url, query, route_options = {}
|
|
61
|
-
raise RouteTargetNilError.new(url) unless query
|
|
62
|
-
routes.add_route Adapters::Rack.app_for(query, route_options), default_options(url, route_options)
|
|
63
|
-
end
|
|
64
|
-
class RouteTargetNilError < StandardError
|
|
65
|
-
def initialize url
|
|
66
|
-
@url = url
|
|
67
|
-
end
|
|
68
|
-
def to_s
|
|
69
|
-
"Routing for #{@url.inspect} was defined with a nil target object, i.e. #{@url.inspect} => nil."
|
|
70
|
-
end
|
|
71
|
-
end
|
|
72
|
-
#
|
|
73
|
-
#
|
|
74
|
-
def root status
|
|
75
|
-
answer %r{^/$}, STATUSES[status]
|
|
76
|
-
end
|
|
77
|
-
#
|
|
78
|
-
#
|
|
79
|
-
def default status
|
|
80
|
-
answer nil, STATUSES[status]
|
|
81
|
-
end
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
# TODO Can Rack handle this for me?
|
|
86
|
-
#
|
|
87
|
-
# Note: Rack-mount already handles the 404.
|
|
88
|
-
#
|
|
89
|
-
STATUSES = {
|
|
90
|
-
200 => lambda { |_| [200, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] },
|
|
91
|
-
404 => lambda { |_| [404, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] }
|
|
92
|
-
}
|
|
93
|
-
|
|
94
|
-
#
|
|
95
|
-
#
|
|
96
|
-
def default_options url, route_options = {}
|
|
97
|
-
url = normalized url
|
|
98
|
-
|
|
99
|
-
options = { request_method: 'GET' }.merge route_options
|
|
100
|
-
|
|
101
|
-
options[:path_info] = url if url
|
|
102
|
-
|
|
103
|
-
options.delete :content_type
|
|
104
|
-
|
|
105
|
-
query_params = options.delete :query
|
|
106
|
-
options[:query_string] = %r{#{generate_query_string(query_params)}} if query_params
|
|
107
|
-
|
|
108
|
-
options
|
|
109
|
-
end
|
|
110
|
-
#
|
|
111
|
-
#
|
|
112
|
-
def generate_query_string query_params
|
|
113
|
-
raise "At least one query string condition is needed." if query_params.size.zero?
|
|
114
|
-
raise "Too many query param conditions (only 1 allowed): #{query_params}" if query_params.size > 1
|
|
115
|
-
k, v = query_params.first
|
|
116
|
-
"#{k}=#{v}"
|
|
117
|
-
end
|
|
118
|
-
|
|
119
|
-
# Setup a route that answers using the given app.
|
|
120
|
-
#
|
|
121
|
-
def answer url = nil, app = nil
|
|
122
|
-
routes.add_route (app || STATUSES[200]), default_options(url)
|
|
123
|
-
end
|
|
124
|
-
|
|
125
|
-
# Returns a regular expression for the url if it is given a String-like object.
|
|
126
|
-
#
|
|
127
|
-
def normalized url
|
|
128
|
-
url.respond_to?(:to_str) ? %r{#{url}} : url
|
|
129
|
-
end
|
|
130
|
-
|
|
131
|
-
# Returns true if there are no routes defined.
|
|
132
|
-
#
|
|
133
|
-
def empty?
|
|
134
|
-
routes.length.zero?
|
|
135
|
-
end
|
|
136
|
-
|
|
137
|
-
# TODO Beautify.
|
|
138
|
-
#
|
|
139
|
-
def to_s
|
|
140
|
-
routes.instance_variable_get(:@routes).map do |route|
|
|
141
|
-
path_info = route.conditions[:path_info]
|
|
142
|
-
anchored = ::Rack::Mount::Utils.regexp_anchored?(path_info)
|
|
143
|
-
anchored_ok = anchored ? "\u2713" : " "
|
|
144
|
-
"#{anchored_ok} #{path_info.source}"
|
|
145
|
-
end.join "\n"
|
|
146
|
-
end
|
|
147
|
-
|
|
148
|
-
end
|
|
149
|
-
|
|
150
|
-
end
|
data/lib/picky/index/bundle.rb
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
module Index # :nodoc:all
|
|
2
|
-
# A Bundle is a number of indexes
|
|
3
|
-
# per [index, category] combination.
|
|
4
|
-
#
|
|
5
|
-
# At most, there are three indexes:
|
|
6
|
-
# * *core* index (always used)
|
|
7
|
-
# * *weights* index (always used)
|
|
8
|
-
# * *similarity* index (used with similarity)
|
|
9
|
-
#
|
|
10
|
-
# In Picky, indexing is separated from the index
|
|
11
|
-
# handling itself through a parallel structure.
|
|
12
|
-
#
|
|
13
|
-
# Both use methods provided by this base class, but
|
|
14
|
-
# have very different goals:
|
|
15
|
-
#
|
|
16
|
-
# * *Indexing*::*Bundle* is just concerned with creating index files
|
|
17
|
-
# and providing helper functions to e.g. check the indexes.
|
|
18
|
-
#
|
|
19
|
-
# * *Index*::*Bundle* is concerned with loading these index files into
|
|
20
|
-
# memory and looking up search data as fast as possible.
|
|
21
|
-
#
|
|
22
|
-
class Bundle
|
|
23
|
-
|
|
24
|
-
attr_reader :identifier, :files
|
|
25
|
-
attr_accessor :index, :weights, :similarity, :configuration, :similarity_strategy
|
|
26
|
-
|
|
27
|
-
delegate :clear, :to => :index
|
|
28
|
-
delegate :[], :[]=, :to => :configuration
|
|
29
|
-
|
|
30
|
-
def initialize name, configuration, similarity_strategy
|
|
31
|
-
@identifier = "#{configuration.identifier} (#{name})"
|
|
32
|
-
@files = Files.new name, configuration
|
|
33
|
-
|
|
34
|
-
@index = {}
|
|
35
|
-
@weights = {}
|
|
36
|
-
@similarity = {}
|
|
37
|
-
@configuration = {} # A hash with config options.
|
|
38
|
-
|
|
39
|
-
@similarity_strategy = similarity_strategy
|
|
40
|
-
end
|
|
41
|
-
|
|
42
|
-
# Get a list of similar texts.
|
|
43
|
-
#
|
|
44
|
-
# Note: Does not return itself.
|
|
45
|
-
#
|
|
46
|
-
def similar text
|
|
47
|
-
code = similarity_strategy.encoded text
|
|
48
|
-
similar_codes = code && @similarity[code]
|
|
49
|
-
similar_codes.delete text if similar_codes
|
|
50
|
-
similar_codes || []
|
|
51
|
-
end
|
|
52
|
-
|
|
53
|
-
end
|
|
54
|
-
end
|