picky 2.5.2 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
@@ -0,0 +1,146 @@
|
|
1
|
+
require 'rack/mount'
|
2
|
+
|
3
|
+
module FrontendAdapters
|
4
|
+
|
5
|
+
# TODO Rename to Routing again. Push everything back into appropriate Adapters.
|
6
|
+
#
|
7
|
+
class Rack # :nodoc:all
|
8
|
+
|
9
|
+
#
|
10
|
+
#
|
11
|
+
def reset_routes
|
12
|
+
@routes = ::Rack::Mount::RouteSet.new
|
13
|
+
end
|
14
|
+
def routes
|
15
|
+
@routes || reset_routes
|
16
|
+
end
|
17
|
+
def finalize
|
18
|
+
routes.freeze
|
19
|
+
end
|
20
|
+
|
21
|
+
# Routing simply delegates to the route set to handle a request.
|
22
|
+
#
|
23
|
+
def call env
|
24
|
+
routes.call env
|
25
|
+
end
|
26
|
+
|
27
|
+
# API method.
|
28
|
+
#
|
29
|
+
def route options = {}
|
30
|
+
mappings, route_options = split options
|
31
|
+
mappings.each do |url, query|
|
32
|
+
route_one url, query, route_options
|
33
|
+
end
|
34
|
+
end
|
35
|
+
# Splits the route method options
|
36
|
+
# into real options and route options (/regexp/ => thing or 'some/path' => thing).
|
37
|
+
#
|
38
|
+
def split options
|
39
|
+
mappings = {}
|
40
|
+
route_options = {}
|
41
|
+
options.each_pair do |key, value|
|
42
|
+
if Regexp === key or String === key
|
43
|
+
mappings[key] = value
|
44
|
+
else
|
45
|
+
route_options[key] = value
|
46
|
+
end
|
47
|
+
end
|
48
|
+
[mappings, route_options]
|
49
|
+
end
|
50
|
+
def route_one url, query, route_options = {}
|
51
|
+
raise RouteTargetNilError.new(url) unless query
|
52
|
+
routes.add_route Adapters::Rack.app_for(query, route_options), default_options(url, route_options), {}, query.to_s
|
53
|
+
end
|
54
|
+
class RouteTargetNilError < StandardError
|
55
|
+
def initialize url
|
56
|
+
@url = url
|
57
|
+
end
|
58
|
+
def to_s
|
59
|
+
"Routing for #{@url.inspect} was defined with a nil target object, i.e. #{@url.inspect} => nil."
|
60
|
+
end
|
61
|
+
end
|
62
|
+
#
|
63
|
+
#
|
64
|
+
def root status
|
65
|
+
answer %r{^/$}, STATUSES[status]
|
66
|
+
end
|
67
|
+
#
|
68
|
+
#
|
69
|
+
def default status
|
70
|
+
answer nil, STATUSES[status]
|
71
|
+
end
|
72
|
+
|
73
|
+
|
74
|
+
|
75
|
+
# TODO Can Rack handle this for me?
|
76
|
+
#
|
77
|
+
# Note: Rack-mount already handles the 404.
|
78
|
+
#
|
79
|
+
STATUSES = {
|
80
|
+
200 => lambda { |_| [200, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] },
|
81
|
+
404 => lambda { |_| [404, { 'Content-Type' => 'text/html', 'Content-Length' => '0' }, ['']] }
|
82
|
+
}
|
83
|
+
|
84
|
+
#
|
85
|
+
#
|
86
|
+
def default_options url, route_options = {}
|
87
|
+
url = normalized url
|
88
|
+
|
89
|
+
options = { request_method: 'GET' }.merge route_options
|
90
|
+
|
91
|
+
options[:path_info] = url if url
|
92
|
+
|
93
|
+
options.delete :content_type
|
94
|
+
|
95
|
+
query_params = options.delete :query
|
96
|
+
options[:query_string] = %r{#{generate_query_string(query_params)}} if query_params
|
97
|
+
|
98
|
+
options
|
99
|
+
end
|
100
|
+
#
|
101
|
+
#
|
102
|
+
def generate_query_string query_params
|
103
|
+
raise "At least one query string condition is needed." if query_params.size.zero?
|
104
|
+
raise "Too many query param conditions (only 1 allowed): #{query_params}" if query_params.size > 1
|
105
|
+
k, v = query_params.first
|
106
|
+
"#{k}=#{v}"
|
107
|
+
end
|
108
|
+
|
109
|
+
# Setup a route that answers using the given app.
|
110
|
+
#
|
111
|
+
def answer url = nil, app = nil
|
112
|
+
routes.add_route (app || STATUSES[200]), default_options(url)
|
113
|
+
end
|
114
|
+
|
115
|
+
# Returns a regular expression for the url if it is given a String-like object.
|
116
|
+
#
|
117
|
+
def normalized url
|
118
|
+
url.respond_to?(:to_str) ? %r{#{url}} : url
|
119
|
+
end
|
120
|
+
|
121
|
+
# Returns true if there are no routes defined.
|
122
|
+
#
|
123
|
+
def empty?
|
124
|
+
routes.length.zero?
|
125
|
+
end
|
126
|
+
|
127
|
+
# TODO Beautify. Rewrite!
|
128
|
+
#
|
129
|
+
def to_s
|
130
|
+
max_length = routes.instance_variable_get(:@routes).reduce(0) do |current_max, route|
|
131
|
+
route_length = route.conditions[:path_info].source.to_s.size
|
132
|
+
route_length > current_max ? route_length : current_max
|
133
|
+
end
|
134
|
+
"Note: Anchored (\u2713) regexps are faster, e.g. /\\A.*\\Z/ or /^.*$/.\n\n" +
|
135
|
+
routes.instance_variable_get(:@routes).map do |route|
|
136
|
+
path_info = route.conditions[:path_info]
|
137
|
+
anchored = ::Rack::Mount::Utils.regexp_anchored?(path_info)
|
138
|
+
anchored_ok = anchored ? "\u2713" : " "
|
139
|
+
source = path_info.source
|
140
|
+
"#{anchored_ok} #{source.ljust(max_length)} => #{route.name}"
|
141
|
+
end.join("\n")
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
@@ -1,3 +1,3 @@
|
|
1
|
-
Partial =
|
2
|
-
Similarity =
|
3
|
-
Weights =
|
1
|
+
Partial = Generators::Partial
|
2
|
+
Similarity = Generators::Similarity
|
3
|
+
Weights = Generators::Weights
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Generators
|
2
|
+
|
3
|
+
module Partial
|
4
|
+
|
5
|
+
# Does not generate a partial index.
|
6
|
+
#
|
7
|
+
class None < Strategy
|
8
|
+
|
9
|
+
# Returns an empty index.
|
10
|
+
#
|
11
|
+
def generate_from index
|
12
|
+
{}
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns if this strategy's generated file is saved.
|
16
|
+
#
|
17
|
+
def saved?
|
18
|
+
false
|
19
|
+
end
|
20
|
+
|
21
|
+
# Do not use the partial bundle for getting ids and weights.
|
22
|
+
#
|
23
|
+
def use_exact_for_partial?
|
24
|
+
true
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,25 @@
|
|
1
|
+
module Generators
|
2
|
+
|
3
|
+
module Partial
|
4
|
+
|
5
|
+
# Superclass for partial strategies.
|
6
|
+
#
|
7
|
+
class Strategy < Generators::Strategy
|
8
|
+
|
9
|
+
# Defines whether to use the exact bundle
|
10
|
+
# instead of the partial one.
|
11
|
+
#
|
12
|
+
# Default is @false@.
|
13
|
+
#
|
14
|
+
# For example:
|
15
|
+
# Partial::None.new # Uses the exact index instead of the partial one.
|
16
|
+
#
|
17
|
+
def use_exact_for_partial?
|
18
|
+
false
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
@@ -0,0 +1,118 @@
|
|
1
|
+
module Generators
|
2
|
+
|
3
|
+
module Partial
|
4
|
+
|
5
|
+
# Generates the right substrings for use in the substring strategy.
|
6
|
+
#
|
7
|
+
class SubstringGenerator
|
8
|
+
|
9
|
+
attr_reader :from, :to
|
10
|
+
|
11
|
+
def initialize from, to
|
12
|
+
@from, @to = from, to
|
13
|
+
|
14
|
+
if @to.zero?
|
15
|
+
def each_subtoken token, &block
|
16
|
+
token.each_subtoken @from, &block
|
17
|
+
end
|
18
|
+
else
|
19
|
+
def each_subtoken token, &block
|
20
|
+
token[0..@to].intern.each_subtoken @from, &block
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
# The subtoken partial strategy.
|
29
|
+
#
|
30
|
+
# If given "florian"
|
31
|
+
# it will index "floria", "flori", "flor", "flo", "fl", "f"
|
32
|
+
# (Depending on what the given from value is, the example is with option from: 1)
|
33
|
+
#
|
34
|
+
class Substring < Strategy
|
35
|
+
|
36
|
+
# The from option signifies where in the symbol it
|
37
|
+
# will start in generating the subtokens.
|
38
|
+
#
|
39
|
+
# Examples:
|
40
|
+
#
|
41
|
+
# With :hello, and to: -1 (default)
|
42
|
+
# * from: 1 # => [:hello, :hell, :hel, :he, :h]
|
43
|
+
# * from: 4 # => [:hello, :hell]
|
44
|
+
#
|
45
|
+
# With :hello, and to: -2
|
46
|
+
# * from: 1 # => [:hell, :hel, :he, :h]
|
47
|
+
# * from: 4 # => [:hell]
|
48
|
+
#
|
49
|
+
def initialize options = {}
|
50
|
+
from = options[:from] || 1
|
51
|
+
to = options[:to] || -1
|
52
|
+
@generator = SubstringGenerator.new from, to
|
53
|
+
end
|
54
|
+
|
55
|
+
# Delegator to generator#from.
|
56
|
+
#
|
57
|
+
def from
|
58
|
+
@generator.from
|
59
|
+
end
|
60
|
+
|
61
|
+
# Delegator to generator#to.
|
62
|
+
#
|
63
|
+
def to
|
64
|
+
@generator.to
|
65
|
+
end
|
66
|
+
|
67
|
+
# Generates a partial index from the given index.
|
68
|
+
#
|
69
|
+
def generate_from index
|
70
|
+
result = {}
|
71
|
+
|
72
|
+
# Generate for each key token the subtokens.
|
73
|
+
#
|
74
|
+
i = 0
|
75
|
+
index.each_key do |token|
|
76
|
+
i += 1
|
77
|
+
if i == 5000
|
78
|
+
timed_exclaim "Generating partial tokens for token #{token}. This appears every 5000 tokens."
|
79
|
+
i = 0
|
80
|
+
end
|
81
|
+
generate_for token, index, result
|
82
|
+
end
|
83
|
+
|
84
|
+
# Remove duplicate ids.
|
85
|
+
#
|
86
|
+
# THINK If it is unique for a subtoken, it is
|
87
|
+
# unique for all derived longer tokens.
|
88
|
+
#
|
89
|
+
result.each_value &:uniq!
|
90
|
+
|
91
|
+
result
|
92
|
+
end
|
93
|
+
|
94
|
+
private
|
95
|
+
|
96
|
+
# To each shortened token of :test
|
97
|
+
# :test, :tes, :te, :t
|
98
|
+
# add all ids of :test
|
99
|
+
#
|
100
|
+
# "token" here means just text.
|
101
|
+
#
|
102
|
+
# THINK Could be improved by appending the aforegoing ids?
|
103
|
+
#
|
104
|
+
def generate_for token, index, result
|
105
|
+
@generator.each_subtoken(token) do |subtoken|
|
106
|
+
if result[subtoken]
|
107
|
+
result[subtoken] += index[token] # unique
|
108
|
+
else
|
109
|
+
result[subtoken] = index[token].dup
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
end
|
115
|
+
|
116
|
+
end
|
117
|
+
|
118
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Generators
|
2
|
+
|
3
|
+
# The partial generator uses a subtoken(downto:1) generator as default.
|
4
|
+
#
|
5
|
+
class PartialGenerator < Base
|
6
|
+
|
7
|
+
# Generate a partial index based on the given index.
|
8
|
+
#
|
9
|
+
def generate strategy = Partial::Substring.new(from: 1)
|
10
|
+
strategy.generate_from self.index
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Generators
|
4
|
+
|
5
|
+
module Similarity
|
6
|
+
|
7
|
+
# It's actually a combination of double metaphone
|
8
|
+
# and Levenshtein.
|
9
|
+
#
|
10
|
+
# It uses the double metaphone to get similar words
|
11
|
+
# and ranks them using the levenshtein.
|
12
|
+
#
|
13
|
+
class DoubleMetaphone < Phonetic
|
14
|
+
|
15
|
+
# Encodes the given symbol.
|
16
|
+
#
|
17
|
+
# Returns a symbol.
|
18
|
+
#
|
19
|
+
def encoded sym
|
20
|
+
codes = Text::Metaphone.double_metaphone sym.to_s
|
21
|
+
codes.first.to_sym unless codes.empty?
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Generators
|
4
|
+
|
5
|
+
module Similarity
|
6
|
+
|
7
|
+
# It's actually a combination of metaphone
|
8
|
+
# and Levenshtein.
|
9
|
+
#
|
10
|
+
# It uses the metaphone to get similar words
|
11
|
+
# and ranks them using the levenshtein.
|
12
|
+
#
|
13
|
+
class Metaphone < Phonetic
|
14
|
+
|
15
|
+
# Encodes the given symbol.
|
16
|
+
#
|
17
|
+
# Returns a symbol.
|
18
|
+
#
|
19
|
+
def encoded sym
|
20
|
+
code = Text::Metaphone.metaphone sym.to_s
|
21
|
+
code.to_sym if code
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
module Generators
|
2
|
+
|
3
|
+
module Similarity
|
4
|
+
|
5
|
+
# Similarity strategy that does nothing.
|
6
|
+
#
|
7
|
+
class None < Strategy
|
8
|
+
|
9
|
+
# Does not encode text. Just returns nil.
|
10
|
+
#
|
11
|
+
def encoded text
|
12
|
+
nil
|
13
|
+
end
|
14
|
+
|
15
|
+
# Returns an empty index.
|
16
|
+
#
|
17
|
+
def generate_from index
|
18
|
+
{}
|
19
|
+
end
|
20
|
+
|
21
|
+
# Returns if this strategy's generated file is saved.
|
22
|
+
#
|
23
|
+
def saved?
|
24
|
+
false
|
25
|
+
end
|
26
|
+
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Generators
|
4
|
+
|
5
|
+
module Similarity
|
6
|
+
|
7
|
+
# It's actually a combination of double metaphone
|
8
|
+
# and Levenshtein.
|
9
|
+
#
|
10
|
+
# It uses the double metaphone to get similar words
|
11
|
+
# and ranks them using the levenshtein.
|
12
|
+
#
|
13
|
+
class Phonetic < Strategy
|
14
|
+
|
15
|
+
attr_reader :amount
|
16
|
+
|
17
|
+
#
|
18
|
+
#
|
19
|
+
def initialize amount = 10
|
20
|
+
raise "In Picky 2.0+, the Similarity::Phonetic has been renamed to Similarity::DoubleMetaphone. Please use that one. Thanks!" if self.class == Phonetic
|
21
|
+
@amount = amount
|
22
|
+
end
|
23
|
+
|
24
|
+
# Generates an index for the given index (in exact index style).
|
25
|
+
#
|
26
|
+
# In the following form:
|
27
|
+
# [:meier, :mueller, :peter, :pater] => { MR: [:meier], MLR: [:mueller], PTR: [:peter, :pater] }
|
28
|
+
#
|
29
|
+
def generate_from index
|
30
|
+
hash = hashify index.keys
|
31
|
+
sort hash
|
32
|
+
end
|
33
|
+
|
34
|
+
protected
|
35
|
+
|
36
|
+
# Sorts the index values in place.
|
37
|
+
#
|
38
|
+
def sort index
|
39
|
+
index.each_pair.each do |code, ary|
|
40
|
+
ary.sort_by_levenshtein! code
|
41
|
+
ary.slice! amount, ary.size # size is not perfectly correct, but anyway
|
42
|
+
end
|
43
|
+
index
|
44
|
+
end
|
45
|
+
|
46
|
+
# Hashifies a list of symbols.
|
47
|
+
#
|
48
|
+
# Where:
|
49
|
+
# { encoded_sym => [syms] }
|
50
|
+
#
|
51
|
+
def hashify list
|
52
|
+
list.inject({}) do |total, element|
|
53
|
+
if code = encoded(element)
|
54
|
+
total[code] ||= []
|
55
|
+
total[code] << element
|
56
|
+
end
|
57
|
+
total
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
|
65
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Generators
|
4
|
+
|
5
|
+
module Similarity
|
6
|
+
|
7
|
+
# It's actually a combination of soundex
|
8
|
+
# and Levenshtein.
|
9
|
+
#
|
10
|
+
# It uses the soundex to get similar words
|
11
|
+
# and ranks them using the levenshtein.
|
12
|
+
#
|
13
|
+
class Soundex < Phonetic
|
14
|
+
|
15
|
+
# Encodes the given symbol.
|
16
|
+
#
|
17
|
+
# Returns a symbol.
|
18
|
+
#
|
19
|
+
def encoded sym
|
20
|
+
code = Text::Soundex.soundex sym.to_s
|
21
|
+
code.to_sym if code
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
25
|
+
|
26
|
+
end
|
27
|
+
|
28
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Generators
|
2
|
+
|
3
|
+
# Uses no similarity as default.
|
4
|
+
#
|
5
|
+
class SimilarityGenerator < Base
|
6
|
+
|
7
|
+
# Generate a similarity index based on the given index.
|
8
|
+
#
|
9
|
+
def generate strategy = Similarity::None.new
|
10
|
+
strategy.generate_from self.index
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
module Generators
|
2
|
+
|
3
|
+
module Weights
|
4
|
+
|
5
|
+
# Uses a logarithmic weight.
|
6
|
+
# If for a key k we have x ids, the weight is:
|
7
|
+
# w(x): log(x)
|
8
|
+
# Special case: If x < 1, then we use 0.
|
9
|
+
#
|
10
|
+
class Logarithmic < Strategy
|
11
|
+
|
12
|
+
# Generates a partial index from the given index.
|
13
|
+
#
|
14
|
+
def generate_from index
|
15
|
+
index.inject({}) do |hash, text_ids|
|
16
|
+
text, ids = *text_ids
|
17
|
+
weight = weight_for ids.size
|
18
|
+
hash[text] ||= weight.round(2) if weight
|
19
|
+
hash
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Sets the weight value.
|
24
|
+
#
|
25
|
+
# If the size is 0 or one, we would get -Infinity or 0.0.
|
26
|
+
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
27
|
+
#
|
28
|
+
# BUT: We need the value, even if 0. To designate that there is a weight!
|
29
|
+
#
|
30
|
+
def weight_for amount
|
31
|
+
return 0 if amount < 1
|
32
|
+
Math.log amount
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
@@ -0,0 +1,15 @@
|
|
1
|
+
module Generators
|
2
|
+
|
3
|
+
# Uses a logarithmic algorithm as default.
|
4
|
+
#
|
5
|
+
class WeightsGenerator < Base
|
6
|
+
|
7
|
+
# Generate a weights index based on the given index.
|
8
|
+
#
|
9
|
+
def generate strategy = Weights::Logarithmic.new
|
10
|
+
strategy.generate_from self.index
|
11
|
+
end
|
12
|
+
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
File without changes
|