picky 2.5.2 → 2.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +23 -0
- data/lib/picky/adapters/rack/live_parameters.rb +33 -0
- data/lib/picky/adapters/rack/query.rb +65 -0
- data/lib/picky/adapters/rack.rb +30 -0
- data/lib/picky/application.rb +5 -5
- data/lib/picky/backend/backend.rb +108 -0
- data/lib/picky/backend/file/basic.rb +101 -0
- data/lib/picky/backend/file/json.rb +34 -0
- data/lib/picky/backend/file/marshal.rb +34 -0
- data/lib/picky/backend/file/text.rb +56 -0
- data/lib/picky/backend/files.rb +30 -0
- data/lib/picky/backend/redis/basic.rb +85 -0
- data/lib/picky/backend/redis/list_hash.rb +49 -0
- data/lib/picky/backend/redis/string_hash.rb +40 -0
- data/lib/picky/backend/redis.rb +40 -0
- data/lib/picky/calculations/location.rb +57 -0
- data/lib/picky/categories.rb +62 -0
- data/lib/picky/categories_indexed.rb +93 -0
- data/lib/picky/categories_indexing.rb +12 -0
- data/lib/picky/category.rb +127 -0
- data/lib/picky/category_indexed.rb +64 -0
- data/lib/picky/category_indexing.rb +145 -0
- data/lib/picky/{internals/ext → ext}/maybe_compile.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{internals/ext → ext}/ruby19/performant.c +0 -0
- data/lib/picky/{internals/extensions → extensions}/array.rb +0 -0
- data/lib/picky/extensions/class.rb +11 -0
- data/lib/picky/{internals/extensions → extensions}/hash.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/module.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/object.rb +0 -0
- data/lib/picky/{internals/extensions → extensions}/symbol.rb +0 -0
- data/lib/picky/frontend_adapters/rack.rb +146 -0
- data/lib/picky/generators/aliases.rb +3 -3
- data/lib/picky/generators/base.rb +15 -0
- data/lib/picky/generators/partial/default.rb +5 -0
- data/lib/picky/generators/partial/none.rb +31 -0
- data/lib/picky/generators/partial/strategy.rb +25 -0
- data/lib/picky/generators/partial/substring.rb +118 -0
- data/lib/picky/generators/partial_generator.rb +15 -0
- data/lib/picky/generators/similarity/default.rb +7 -0
- data/lib/picky/generators/similarity/double_metaphone.rb +28 -0
- data/lib/picky/generators/similarity/metaphone.rb +28 -0
- data/lib/picky/generators/similarity/none.rb +31 -0
- data/lib/picky/generators/similarity/phonetic.rb +65 -0
- data/lib/picky/generators/similarity/soundex.rb +28 -0
- data/lib/picky/generators/similarity/strategy.rb +9 -0
- data/lib/picky/generators/similarity_generator.rb +15 -0
- data/lib/picky/generators/strategy.rb +14 -0
- data/lib/picky/generators/weights/default.rb +7 -0
- data/lib/picky/generators/weights/logarithmic.rb +39 -0
- data/lib/picky/generators/weights/strategy.rb +9 -0
- data/lib/picky/generators/weights_generator.rb +15 -0
- data/lib/picky/{internals/helpers → helpers}/measuring.rb +0 -0
- data/lib/picky/index/base.rb +119 -104
- data/lib/picky/index/base_indexed.rb +27 -0
- data/lib/picky/index/base_indexing.rb +119 -0
- data/lib/picky/index/memory.rb +6 -18
- data/lib/picky/index/redis.rb +6 -18
- data/lib/picky/indexed/bundle/base.rb +110 -0
- data/lib/picky/indexed/bundle/memory.rb +91 -0
- data/lib/picky/indexed/bundle/redis.rb +45 -0
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +35 -0
- data/lib/picky/indexed/wrappers/bundle/location.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +43 -0
- data/lib/picky/indexed/wrappers/category/location.rb +25 -0
- data/lib/picky/indexed/wrappers/exact_first.rb +55 -0
- data/lib/picky/{internals/indexers → indexers}/base.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/parallel.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/serial.rb +0 -0
- data/lib/picky/{internals/indexers → indexers}/solr.rb +0 -0
- data/lib/picky/indexes.rb +73 -0
- data/lib/picky/indexes_indexed.rb +29 -0
- data/lib/picky/indexes_indexing.rb +49 -0
- data/lib/picky/indexing/bundle/base.rb +212 -0
- data/lib/picky/indexing/bundle/memory.rb +25 -0
- data/lib/picky/indexing/bundle/redis.rb +24 -0
- data/lib/picky/indexing/bundle/super_base.rb +61 -0
- data/lib/picky/indexing/wrappers/category/location.rb +25 -0
- data/lib/picky/interfaces/live_parameters.rb +8 -8
- data/lib/picky/loader.rb +89 -95
- data/lib/picky/{internals/performant.rb → performant.rb} +0 -0
- data/lib/picky/query/allocation.rb +84 -0
- data/lib/picky/query/allocations.rb +114 -0
- data/lib/picky/query/combination.rb +76 -0
- data/lib/picky/query/combinations/base.rb +70 -0
- data/lib/picky/query/combinations/memory.rb +48 -0
- data/lib/picky/query/combinations/redis.rb +86 -0
- data/lib/picky/query/indexes.rb +195 -0
- data/lib/picky/query/qualifiers.rb +76 -0
- data/lib/picky/query/token.rb +198 -0
- data/lib/picky/query/tokens.rb +103 -0
- data/lib/picky/{internals/query → query}/weights.rb +0 -0
- data/lib/picky/results.rb +1 -1
- data/lib/picky/search.rb +6 -6
- data/lib/picky/{internals/solr → solr}/schema_generator.rb +0 -0
- data/lib/picky/sources/db.rb +7 -7
- data/lib/picky/sources/wrappers/location.rb +2 -2
- data/lib/picky/tokenizers/base.rb +224 -0
- data/lib/picky/tokenizers/index.rb +30 -0
- data/lib/picky/tokenizers/location.rb +49 -0
- data/lib/picky/tokenizers/query.rb +55 -0
- data/lib/tasks/index.rake +4 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/{internals/adapters → adapters}/rack/base_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/{internals/adapters → adapters}/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/file/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/json_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/marshal_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/file/text_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/files_spec.rb +3 -3
- data/spec/lib/{internals/index → backend}/redis/basic_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/list_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis/string_hash_spec.rb +1 -1
- data/spec/lib/{internals/index → backend}/redis_spec.rb +11 -5
- data/spec/lib/{internals/calculations → calculations}/location_spec.rb +1 -1
- data/spec/lib/{internals/indexed/categories_spec.rb → categories_indexed_spec.rb} +10 -10
- data/spec/lib/{internals/indexed/category_spec.rb → category_indexed_spec.rb} +12 -12
- data/spec/lib/{internals/indexing/category_spec.rb → category_indexing_spec.rb} +10 -10
- data/spec/lib/{internals/cores_spec.rb → cores_spec.rb} +0 -0
- data/spec/lib/{internals/extensions → extensions}/array_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/hash_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/module_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/object_spec.rb +0 -0
- data/spec/lib/{internals/extensions → extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{internals/frontend_adapters → frontend_adapters}/rack_spec.rb +10 -10
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/cacher_strategy_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial/default_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/partial/none_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/partial/substring_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/partial_generator_spec.rb +3 -3
- data/spec/lib/{internals/generators → generators}/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/none_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity/soundex_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/similarity_generator_spec.rb +2 -2
- data/spec/lib/{internals/generators → generators}/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/{internals/generators → generators}/weights_generator_spec.rb +5 -5
- data/spec/lib/{internals/helpers → helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{internals/indexed/index_spec.rb → index/base_indexed_spec.rb} +5 -5
- data/spec/lib/{internals/indexing/index_spec.rb → index/base_indexing_spec.rb} +6 -19
- data/spec/lib/index/base_spec.rb +10 -53
- data/spec/lib/{internals/indexed → indexed}/bundle/memory_spec.rb +5 -5
- data/spec/lib/{internals/indexed → indexed}/bundle/redis_spec.rb +4 -4
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/calculation_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/bundle/wrapper_spec.rb +1 -1
- data/spec/lib/{internals/indexed → indexed}/wrappers/exact_first_spec.rb +7 -7
- data/spec/lib/{internals/indexers → indexers}/base_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/parallel_spec.rb +0 -0
- data/spec/lib/{internals/indexers → indexers}/serial_spec.rb +0 -0
- data/spec/lib/indexes_class_spec.rb +30 -0
- data/spec/lib/{indexed/indexes_spec.rb → indexes_indexed_spec.rb} +1 -1
- data/spec/lib/{indexing/indexes_spec.rb → indexes_indexing_spec.rb} +8 -8
- data/spec/lib/{internals/indexing/indexes_spec.rb → indexes_spec.rb} +15 -12
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/{internals/indexing → indexing}/bundle/memory_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/redis_spec.rb +3 -3
- data/spec/lib/{internals/indexing → indexing}/bundle/super_base_spec.rb +2 -2
- data/spec/lib/{internals/interfaces → interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +5 -5
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +1 -1
- data/spec/lib/query/qualifiers_spec.rb +4 -4
- data/spec/lib/query/token_spec.rb +3 -3
- data/spec/lib/query/tokens_spec.rb +32 -32
- data/spec/lib/search_spec.rb +5 -5
- data/spec/lib/{internals/solr → solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/sources/db_spec.rb +4 -8
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/base_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/index_spec.rb +1 -1
- data/spec/lib/{internals/tokenizers → tokenizers}/query_spec.rb +1 -1
- metadata +214 -215
- data/lib/picky/aliases.rb +0 -4
- data/lib/picky/index_bundle.rb +0 -48
- data/lib/picky/indexed/indexes.rb +0 -59
- data/lib/picky/indexing/indexes.rb +0 -87
- data/lib/picky/internals/adapters/rack/base.rb +0 -27
- data/lib/picky/internals/adapters/rack/live_parameters.rb +0 -37
- data/lib/picky/internals/adapters/rack/query.rb +0 -69
- data/lib/picky/internals/adapters/rack.rb +0 -34
- data/lib/picky/internals/calculations/location.rb +0 -59
- data/lib/picky/internals/frontend_adapters/rack.rb +0 -150
- data/lib/picky/internals/generators/base.rb +0 -19
- data/lib/picky/internals/generators/partial/default.rb +0 -7
- data/lib/picky/internals/generators/partial/none.rb +0 -35
- data/lib/picky/internals/generators/partial/strategy.rb +0 -29
- data/lib/picky/internals/generators/partial/substring.rb +0 -122
- data/lib/picky/internals/generators/partial_generator.rb +0 -19
- data/lib/picky/internals/generators/similarity/default.rb +0 -9
- data/lib/picky/internals/generators/similarity/double_metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/metaphone.rb +0 -32
- data/lib/picky/internals/generators/similarity/none.rb +0 -35
- data/lib/picky/internals/generators/similarity/phonetic.rb +0 -69
- data/lib/picky/internals/generators/similarity/soundex.rb +0 -32
- data/lib/picky/internals/generators/similarity/strategy.rb +0 -11
- data/lib/picky/internals/generators/similarity_generator.rb +0 -19
- data/lib/picky/internals/generators/strategy.rb +0 -18
- data/lib/picky/internals/generators/weights/default.rb +0 -9
- data/lib/picky/internals/generators/weights/logarithmic.rb +0 -43
- data/lib/picky/internals/generators/weights/strategy.rb +0 -11
- data/lib/picky/internals/generators/weights_generator.rb +0 -19
- data/lib/picky/internals/index/backend.rb +0 -112
- data/lib/picky/internals/index/file/basic.rb +0 -105
- data/lib/picky/internals/index/file/json.rb +0 -38
- data/lib/picky/internals/index/file/marshal.rb +0 -38
- data/lib/picky/internals/index/file/text.rb +0 -60
- data/lib/picky/internals/index/files.rb +0 -34
- data/lib/picky/internals/index/redis/basic.rb +0 -89
- data/lib/picky/internals/index/redis/list_hash.rb +0 -53
- data/lib/picky/internals/index/redis/string_hash.rb +0 -44
- data/lib/picky/internals/index/redis.rb +0 -44
- data/lib/picky/internals/indexed/bundle/base.rb +0 -114
- data/lib/picky/internals/indexed/bundle/memory.rb +0 -95
- data/lib/picky/internals/indexed/bundle/redis.rb +0 -49
- data/lib/picky/internals/indexed/categories.rb +0 -140
- data/lib/picky/internals/indexed/category.rb +0 -111
- data/lib/picky/internals/indexed/index.rb +0 -63
- data/lib/picky/internals/indexed/wrappers/bundle/calculation.rb +0 -37
- data/lib/picky/internals/indexed/wrappers/bundle/location.rb +0 -44
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +0 -45
- data/lib/picky/internals/indexed/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/internals/indexing/bundle/base.rb +0 -216
- data/lib/picky/internals/indexing/bundle/memory.rb +0 -29
- data/lib/picky/internals/indexing/bundle/redis.rb +0 -28
- data/lib/picky/internals/indexing/bundle/super_base.rb +0 -65
- data/lib/picky/internals/indexing/category.rb +0 -153
- data/lib/picky/internals/indexing/index.rb +0 -142
- data/lib/picky/internals/indexing/wrappers/category/location.rb +0 -27
- data/lib/picky/internals/query/allocation.rb +0 -88
- data/lib/picky/internals/query/allocations.rb +0 -118
- data/lib/picky/internals/query/combination.rb +0 -80
- data/lib/picky/internals/query/combinations/base.rb +0 -74
- data/lib/picky/internals/query/combinations/memory.rb +0 -52
- data/lib/picky/internals/query/combinations/redis.rb +0 -90
- data/lib/picky/internals/query/indexes.rb +0 -199
- data/lib/picky/internals/query/qualifiers.rb +0 -82
- data/lib/picky/internals/query/token.rb +0 -202
- data/lib/picky/internals/query/tokens.rb +0 -109
- data/lib/picky/internals/shared/category.rb +0 -52
- data/lib/picky/internals/tokenizers/base.rb +0 -228
- data/lib/picky/internals/tokenizers/index.rb +0 -34
- data/lib/picky/internals/tokenizers/location.rb +0 -54
- data/lib/picky/internals/tokenizers/query.rb +0 -59
- data/lib/picky/internals.rb +0 -2
- data/spec/lib/aliases_spec.rb +0 -9
- data/spec/lib/index_bundle_spec.rb +0 -69
@@ -1,34 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
class Files < Backend
|
6
|
-
|
7
|
-
def initialize bundle_name, category
|
8
|
-
super bundle_name, category
|
9
|
-
|
10
|
-
# Note: We marshal the similarity, as the
|
11
|
-
# Yajl json lib cannot load symbolized
|
12
|
-
# values, just keys.
|
13
|
-
#
|
14
|
-
@index = File::JSON.new category.index_path(bundle_name, :index)
|
15
|
-
@weights = File::JSON.new category.index_path(bundle_name, :weights)
|
16
|
-
@similarity = File::Marshal.new category.index_path(bundle_name, :similarity)
|
17
|
-
@configuration = File::JSON.new category.index_path(bundle_name, :configuration)
|
18
|
-
end
|
19
|
-
|
20
|
-
def to_s
|
21
|
-
<<-FILES
|
22
|
-
Files:
|
23
|
-
#{"Index: #{@index}".indented_to_s}
|
24
|
-
#{"Weights: #{@weights}".indented_to_s}
|
25
|
-
#{"Similarity: #{@similarity}".indented_to_s}
|
26
|
-
#{"Config: #{@configuration}".indented_to_s}
|
27
|
-
FILES
|
28
|
-
end
|
29
|
-
|
30
|
-
end
|
31
|
-
|
32
|
-
end
|
33
|
-
|
34
|
-
end
|
@@ -1,89 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
class Redis
|
6
|
-
|
7
|
-
# Redis Backend Accessor.
|
8
|
-
#
|
9
|
-
# Provides necessary helper methods for its
|
10
|
-
# subclasses.
|
11
|
-
# Not directly useable, as it does not provide
|
12
|
-
# dump/load methods.
|
13
|
-
#
|
14
|
-
class Basic
|
15
|
-
|
16
|
-
attr_reader :namespace, :backend
|
17
|
-
|
18
|
-
# An index cache takes a path, without file extension,
|
19
|
-
# which will be provided by the subclasses.
|
20
|
-
#
|
21
|
-
def initialize namespace
|
22
|
-
@namespace = namespace
|
23
|
-
|
24
|
-
# TODO Turn this inside out such that people can pass in
|
25
|
-
# their own Redis instance.
|
26
|
-
#
|
27
|
-
# TODO Make the :db a real option.
|
28
|
-
#
|
29
|
-
@backend = ::Redis.new :db => 15
|
30
|
-
end
|
31
|
-
|
32
|
-
# Does nothing.
|
33
|
-
#
|
34
|
-
def load
|
35
|
-
# Nothing.
|
36
|
-
end
|
37
|
-
# We do not use Redis to retrieve data.
|
38
|
-
#
|
39
|
-
def retrieve
|
40
|
-
# Nothing.
|
41
|
-
end
|
42
|
-
|
43
|
-
# Redis does not backup.
|
44
|
-
#
|
45
|
-
def backup
|
46
|
-
# Nothing.
|
47
|
-
end
|
48
|
-
|
49
|
-
# Deletes the Redis index namespace.
|
50
|
-
#
|
51
|
-
def delete
|
52
|
-
# Not implemented here.
|
53
|
-
# Note: backend.flushdb might be the way to go,
|
54
|
-
# but since we cannot delete by key pattern,
|
55
|
-
# we don't do anything.
|
56
|
-
end
|
57
|
-
|
58
|
-
# Checks.
|
59
|
-
#
|
60
|
-
|
61
|
-
# Is this cache suspiciously small?
|
62
|
-
#
|
63
|
-
def cache_small?
|
64
|
-
size < 1
|
65
|
-
end
|
66
|
-
# Is the cache ok?
|
67
|
-
#
|
68
|
-
# A small cache is still ok.
|
69
|
-
#
|
70
|
-
def cache_ok?
|
71
|
-
size > 0
|
72
|
-
end
|
73
|
-
# Extracts the size of the file in Bytes.
|
74
|
-
#
|
75
|
-
# Note: This is a very forgiving implementation.
|
76
|
-
# But as long as Redis does not implement
|
77
|
-
# DBSIZE KEYPATTERN, we are stuck with this.
|
78
|
-
#
|
79
|
-
def size
|
80
|
-
backend.dbsize
|
81
|
-
end
|
82
|
-
|
83
|
-
end
|
84
|
-
|
85
|
-
end
|
86
|
-
|
87
|
-
end
|
88
|
-
|
89
|
-
end
|
@@ -1,53 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
class Redis
|
6
|
-
|
7
|
-
class ListHash < Basic
|
8
|
-
|
9
|
-
# Writes the hash into Redis.
|
10
|
-
#
|
11
|
-
def dump hash
|
12
|
-
clear
|
13
|
-
hash.each_pair do |key, values|
|
14
|
-
redis_key = "#{namespace}:#{key}"
|
15
|
-
i = 0
|
16
|
-
values.each do |value|
|
17
|
-
i += 1
|
18
|
-
backend.zadd redis_key, i, value
|
19
|
-
end
|
20
|
-
end
|
21
|
-
end
|
22
|
-
|
23
|
-
# Clear the index for this list.
|
24
|
-
#
|
25
|
-
# Note: Perhaps we can use a server only command.
|
26
|
-
# This is not the optimal way to do it.
|
27
|
-
#
|
28
|
-
def clear
|
29
|
-
redis_key = "#{namespace}:*"
|
30
|
-
backend.keys(redis_key).each do |key|
|
31
|
-
backend.del key
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
# Get a collection.
|
36
|
-
#
|
37
|
-
def collection sym
|
38
|
-
backend.lrange "#{namespace}:#{sym}", 0, -1
|
39
|
-
end
|
40
|
-
|
41
|
-
# Get a single value.
|
42
|
-
#
|
43
|
-
def member sym
|
44
|
-
raise "Can't retrieve a single value from a Redis ListHash. Use Index::Redis::StringHash."
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
50
|
-
|
51
|
-
end
|
52
|
-
|
53
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
class Redis
|
6
|
-
|
7
|
-
class StringHash < Basic
|
8
|
-
|
9
|
-
# Writes the hash into Redis.
|
10
|
-
#
|
11
|
-
# Note: We could use multi, but it did not help.
|
12
|
-
#
|
13
|
-
def dump hash
|
14
|
-
clear
|
15
|
-
hash.each_pair do |key, value|
|
16
|
-
backend.hset namespace, key, value
|
17
|
-
end
|
18
|
-
end
|
19
|
-
|
20
|
-
# Clears the hash.
|
21
|
-
#
|
22
|
-
def clear
|
23
|
-
backend.del namespace
|
24
|
-
end
|
25
|
-
|
26
|
-
# Get a collection.
|
27
|
-
#
|
28
|
-
def collection sym
|
29
|
-
raise "Can't retrieve a collection from a StringHash. Use Index::Redis::ListHash."
|
30
|
-
end
|
31
|
-
|
32
|
-
# Get a single value.
|
33
|
-
#
|
34
|
-
def member sym
|
35
|
-
backend.hget namespace, sym
|
36
|
-
end
|
37
|
-
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
end
|
43
|
-
|
44
|
-
end
|
@@ -1,44 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Index
|
4
|
-
|
5
|
-
# TODO Needs a reconnect to be run after forking.
|
6
|
-
#
|
7
|
-
class Redis < Backend
|
8
|
-
|
9
|
-
def initialize bundle_name, category
|
10
|
-
super bundle_name, category
|
11
|
-
|
12
|
-
# Refine a few Redis "types".
|
13
|
-
#
|
14
|
-
@index = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:index"
|
15
|
-
@weights = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:weights"
|
16
|
-
@similarity = Redis::ListHash.new "#{category.identifier}:#{bundle_name}:similarity"
|
17
|
-
@configuration = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:configuration"
|
18
|
-
end
|
19
|
-
|
20
|
-
# Delegate to the right collection.
|
21
|
-
#
|
22
|
-
def ids sym
|
23
|
-
index.collection sym
|
24
|
-
end
|
25
|
-
|
26
|
-
# Delegate to the right member value.
|
27
|
-
#
|
28
|
-
# Note: Converts to float.
|
29
|
-
#
|
30
|
-
def weight sym
|
31
|
-
weights.member(sym).to_f
|
32
|
-
end
|
33
|
-
|
34
|
-
# Delegate to a member value.
|
35
|
-
#
|
36
|
-
def setting sym
|
37
|
-
configuration.member sym
|
38
|
-
end
|
39
|
-
|
40
|
-
end
|
41
|
-
|
42
|
-
end
|
43
|
-
|
44
|
-
end
|
@@ -1,114 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Indexed # :nodoc:all
|
4
|
-
|
5
|
-
# A Bundle is a number of indexes
|
6
|
-
# per [index, category] combination.
|
7
|
-
#
|
8
|
-
# At most, there are three indexes:
|
9
|
-
# * *core* index (always used)
|
10
|
-
# * *weights* index (always used)
|
11
|
-
# * *similarity* index (used with similarity)
|
12
|
-
#
|
13
|
-
# In Picky, indexing is separated from the index
|
14
|
-
# handling itself through a parallel structure.
|
15
|
-
#
|
16
|
-
# Both use methods provided by this base class, but
|
17
|
-
# have very different goals:
|
18
|
-
#
|
19
|
-
# * *Indexing*::*Bundle* is just concerned with creating index files
|
20
|
-
# and providing helper functions to e.g. check the indexes.
|
21
|
-
#
|
22
|
-
# * *Index*::*Bundle* is concerned with loading these index files into
|
23
|
-
# memory and looking up search data as fast as possible.
|
24
|
-
#
|
25
|
-
module Bundle
|
26
|
-
|
27
|
-
class Base
|
28
|
-
|
29
|
-
attr_reader :identifier, :configuration
|
30
|
-
attr_accessor :similarity_strategy
|
31
|
-
attr_accessor :index, :weights, :similarity, :configuration
|
32
|
-
|
33
|
-
delegate :[], :to => :configuration
|
34
|
-
delegate :size, :to => :index
|
35
|
-
|
36
|
-
def initialize name, category, similarity_strategy
|
37
|
-
@identifier = "#{category.identifier}:#{name}"
|
38
|
-
|
39
|
-
@index = {}
|
40
|
-
@weights = {}
|
41
|
-
@similarity = {}
|
42
|
-
|
43
|
-
@similarity_strategy = similarity_strategy
|
44
|
-
end
|
45
|
-
|
46
|
-
# Get a list of similar texts.
|
47
|
-
#
|
48
|
-
# Note: Does not return itself.
|
49
|
-
#
|
50
|
-
def similar text
|
51
|
-
code = similarity_strategy.encoded text
|
52
|
-
similar_codes = code && @similarity[code]
|
53
|
-
similar_codes.delete text if similar_codes
|
54
|
-
similar_codes || []
|
55
|
-
end
|
56
|
-
|
57
|
-
# Loads all indexes.
|
58
|
-
#
|
59
|
-
def load
|
60
|
-
load_index
|
61
|
-
load_weights
|
62
|
-
load_similarity
|
63
|
-
load_configuration
|
64
|
-
end
|
65
|
-
|
66
|
-
# Loads the core index.
|
67
|
-
#
|
68
|
-
def load_index
|
69
|
-
# No loading needed.
|
70
|
-
end
|
71
|
-
# Loads the weights index.
|
72
|
-
#
|
73
|
-
def load_weights
|
74
|
-
# No loading needed.
|
75
|
-
end
|
76
|
-
# Loads the similarity index.
|
77
|
-
#
|
78
|
-
def load_similarity
|
79
|
-
# No loading needed.
|
80
|
-
end
|
81
|
-
# Loads the configuration.
|
82
|
-
#
|
83
|
-
def load_configuration
|
84
|
-
# No loading needed.
|
85
|
-
end
|
86
|
-
|
87
|
-
# Loads the core index.
|
88
|
-
#
|
89
|
-
def clear_index
|
90
|
-
# No loading needed.
|
91
|
-
end
|
92
|
-
# Loads the weights index.
|
93
|
-
#
|
94
|
-
def clear_weights
|
95
|
-
# No loading needed.
|
96
|
-
end
|
97
|
-
# Loads the similarity index.
|
98
|
-
#
|
99
|
-
def clear_similarity
|
100
|
-
# No loading needed.
|
101
|
-
end
|
102
|
-
# Loads the configuration.
|
103
|
-
#
|
104
|
-
def clear_configuration
|
105
|
-
# No loading needed.
|
106
|
-
end
|
107
|
-
|
108
|
-
end
|
109
|
-
|
110
|
-
end
|
111
|
-
|
112
|
-
end
|
113
|
-
|
114
|
-
end
|
@@ -1,95 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
# encoding: utf-8
|
4
|
-
#
|
5
|
-
module Indexed # :nodoc:all
|
6
|
-
|
7
|
-
#
|
8
|
-
#
|
9
|
-
module Bundle
|
10
|
-
|
11
|
-
# This is the _actual_ index (based on memory).
|
12
|
-
#
|
13
|
-
# Handles exact/partial index, weights index, and similarity index.
|
14
|
-
#
|
15
|
-
# Delegates file handling and checking to an *Indexed*::*Files* object.
|
16
|
-
#
|
17
|
-
class Memory < Base
|
18
|
-
|
19
|
-
delegate :[], :to => :configuration
|
20
|
-
|
21
|
-
def initialize name, configuration, *args
|
22
|
-
super name, configuration, *args
|
23
|
-
|
24
|
-
@configuration = {} # A hash with config options.
|
25
|
-
|
26
|
-
@backend = Internals::Index::Files.new name, configuration
|
27
|
-
end
|
28
|
-
|
29
|
-
def to_s
|
30
|
-
<<-MEMORY
|
31
|
-
Memory
|
32
|
-
#{@backend.indented_to_s}
|
33
|
-
MEMORY
|
34
|
-
end
|
35
|
-
|
36
|
-
# Get the ids for the given symbol.
|
37
|
-
#
|
38
|
-
def ids sym
|
39
|
-
@index[sym] || []
|
40
|
-
end
|
41
|
-
# Get a weight for the given symbol.
|
42
|
-
#
|
43
|
-
def weight sym
|
44
|
-
@weights[sym]
|
45
|
-
end
|
46
|
-
|
47
|
-
# Loads the core index.
|
48
|
-
#
|
49
|
-
def load_index
|
50
|
-
self.index = @backend.load_index
|
51
|
-
end
|
52
|
-
# Loads the weights index.
|
53
|
-
#
|
54
|
-
def load_weights
|
55
|
-
self.weights = @backend.load_weights
|
56
|
-
end
|
57
|
-
# Loads the similarity index.
|
58
|
-
#
|
59
|
-
def load_similarity
|
60
|
-
self.similarity = @backend.load_similarity
|
61
|
-
end
|
62
|
-
# Loads the configuration.
|
63
|
-
#
|
64
|
-
def load_configuration
|
65
|
-
self.configuration = @backend.load_configuration
|
66
|
-
end
|
67
|
-
|
68
|
-
# Loads the core index.
|
69
|
-
#
|
70
|
-
def clear_index
|
71
|
-
self.index = {}
|
72
|
-
end
|
73
|
-
# Loads the weights index.
|
74
|
-
#
|
75
|
-
def clear_weights
|
76
|
-
self.weights = {}
|
77
|
-
end
|
78
|
-
# Loads the similarity index.
|
79
|
-
#
|
80
|
-
def clear_similarity
|
81
|
-
self.similarity = {}
|
82
|
-
end
|
83
|
-
# Loads the configuration.
|
84
|
-
#
|
85
|
-
def clear_configuration
|
86
|
-
self.configuration = {}
|
87
|
-
end
|
88
|
-
|
89
|
-
end
|
90
|
-
|
91
|
-
end
|
92
|
-
|
93
|
-
end
|
94
|
-
|
95
|
-
end
|
@@ -1,49 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
# encoding: utf-8
|
4
|
-
#
|
5
|
-
module Indexed # :nodoc:all
|
6
|
-
|
7
|
-
#
|
8
|
-
#
|
9
|
-
module Bundle
|
10
|
-
|
11
|
-
# This is the _actual_ index (based on Redis).
|
12
|
-
#
|
13
|
-
# Handles exact/partial index, weights index, and similarity index.
|
14
|
-
#
|
15
|
-
class Redis < Base
|
16
|
-
|
17
|
-
def initialize name, category, *args
|
18
|
-
super name, category, *args
|
19
|
-
|
20
|
-
@backend = Internals::Index::Redis.new name, category
|
21
|
-
end
|
22
|
-
|
23
|
-
# Get the ids for the given symbol.
|
24
|
-
#
|
25
|
-
# Ids are an array of string values in Redis.
|
26
|
-
#
|
27
|
-
def ids sym
|
28
|
-
@backend.ids sym
|
29
|
-
end
|
30
|
-
# Get a weight for the given symbol.
|
31
|
-
#
|
32
|
-
# A weight is a string value in Redis. TODO Convert?
|
33
|
-
#
|
34
|
-
def weight sym
|
35
|
-
@backend.weight sym
|
36
|
-
end
|
37
|
-
# Settings of this bundle can be accessed via [].
|
38
|
-
#
|
39
|
-
def [] sym
|
40
|
-
@backend.setting sym
|
41
|
-
end
|
42
|
-
|
43
|
-
end
|
44
|
-
|
45
|
-
end
|
46
|
-
|
47
|
-
end
|
48
|
-
|
49
|
-
end
|
@@ -1,140 +0,0 @@
|
|
1
|
-
module Internals
|
2
|
-
|
3
|
-
module Indexed
|
4
|
-
|
5
|
-
class Categories
|
6
|
-
|
7
|
-
attr_reader :categories, :category_hash, :ignore_unassigned_tokens
|
8
|
-
|
9
|
-
each_delegate :load_from_cache,
|
10
|
-
:analyze,
|
11
|
-
:to => :categories
|
12
|
-
|
13
|
-
# A list of indexed categories.
|
14
|
-
#
|
15
|
-
# Options:
|
16
|
-
# * ignore_unassigned_tokens: Ignore the given token if it cannot be matched to a category.
|
17
|
-
# The default behaviour is that if a token does not match to
|
18
|
-
# any category, the query will not return anything (since a
|
19
|
-
# single token cannot be matched). If you set this option to
|
20
|
-
# true, any token that cannot be matched to a category will be
|
21
|
-
# simply ignored.
|
22
|
-
# Use this if only a few matched words are important, like for
|
23
|
-
# example of the query "Jonathan Myers 86455 Las Cucarachas"
|
24
|
-
# you only want to match the zipcode, to have the search engine
|
25
|
-
# display advertisements on the side for the zipcode.
|
26
|
-
# Nifty! :)
|
27
|
-
#
|
28
|
-
def initialize options = {}
|
29
|
-
clear
|
30
|
-
|
31
|
-
@ignore_unassigned_tokens = options[:ignore_unassigned_tokens] || false
|
32
|
-
end
|
33
|
-
|
34
|
-
def to_s
|
35
|
-
categories.indented_to_s
|
36
|
-
end
|
37
|
-
|
38
|
-
# Clears both the array of categories and the hash of categories.
|
39
|
-
#
|
40
|
-
def clear
|
41
|
-
@categories = []
|
42
|
-
@category_hash = {}
|
43
|
-
end
|
44
|
-
|
45
|
-
# Add the given category to the list of categories.
|
46
|
-
#
|
47
|
-
def << category
|
48
|
-
categories << category
|
49
|
-
category_hash[category.name] = category
|
50
|
-
end
|
51
|
-
|
52
|
-
# Return all possible combinations for the given token.
|
53
|
-
#
|
54
|
-
# This checks if it needs to also search through similar
|
55
|
-
# tokens, if for example, the token is one with ~.
|
56
|
-
# If yes, it puts together all solutions.
|
57
|
-
#
|
58
|
-
def possible_combinations_for token
|
59
|
-
token.similar? ? similar_possible_for(token) : possible_for(token)
|
60
|
-
end
|
61
|
-
# Gets all similar tokens and puts together the possible combinations
|
62
|
-
# for each found similar token.
|
63
|
-
#
|
64
|
-
def similar_possible_for token
|
65
|
-
# Get as many tokens as necessary
|
66
|
-
#
|
67
|
-
tokens = similar_tokens_for token
|
68
|
-
# possible combinations
|
69
|
-
#
|
70
|
-
inject_possible_for tokens
|
71
|
-
end
|
72
|
-
def similar_tokens_for token
|
73
|
-
text = token.text
|
74
|
-
categories.inject([]) do |result, category|
|
75
|
-
next_token = token
|
76
|
-
# Note: We could also break off here if not all the available
|
77
|
-
# similars are needed.
|
78
|
-
# Wait for a concrete case that needs this before taking
|
79
|
-
# action.
|
80
|
-
#
|
81
|
-
while next_token = next_token.next_similar_token(category)
|
82
|
-
result << next_token if next_token && next_token.text != text
|
83
|
-
end
|
84
|
-
result
|
85
|
-
end
|
86
|
-
end
|
87
|
-
def inject_possible_for tokens
|
88
|
-
tokens.inject([]) do |result, token|
|
89
|
-
possible = possible_categories token
|
90
|
-
result + possible_for(token, possible)
|
91
|
-
end
|
92
|
-
end
|
93
|
-
|
94
|
-
# Returns possible Combinations for the token.
|
95
|
-
#
|
96
|
-
# Note: The preselected_categories param is an optimization.
|
97
|
-
#
|
98
|
-
# Note: Returns [] if no categories matched (will produce no result).
|
99
|
-
# Returns nil if this token needs to be removed from the query.
|
100
|
-
# (Also none of the categories matched, but the ignore unassigned
|
101
|
-
# tokens option is true)
|
102
|
-
#
|
103
|
-
def possible_for token, preselected_categories = nil
|
104
|
-
possible = (preselected_categories || possible_categories(token)).inject([]) do |combinations, category|
|
105
|
-
combination = category.combination_for token
|
106
|
-
combination ? combinations << combination : combinations
|
107
|
-
end
|
108
|
-
# This is an optimization to mark tokens that are ignored.
|
109
|
-
#
|
110
|
-
return if ignore_unassigned_tokens && possible.empty?
|
111
|
-
possible # wrap in combinations
|
112
|
-
end
|
113
|
-
# This returns the possible categories for this token.
|
114
|
-
# If the user has already preselected a category for this token,
|
115
|
-
# like "artist:moby", if not just return all for the given token,
|
116
|
-
# since all are possible.
|
117
|
-
#
|
118
|
-
# Note: Once I thought this was called too often. But it is not (18.01.2011).
|
119
|
-
#
|
120
|
-
def possible_categories token
|
121
|
-
user_defined_categories(token) || categories
|
122
|
-
end
|
123
|
-
# This returns the array of categories if the user has defined
|
124
|
-
# an existing category.
|
125
|
-
#
|
126
|
-
# Note: Returns nil if the user did not define one
|
127
|
-
# or [] if he/she has defined a non-existing one.
|
128
|
-
#
|
129
|
-
def user_defined_categories token
|
130
|
-
names = token.user_defined_category_names
|
131
|
-
names && names.map do |name|
|
132
|
-
category_hash[name]
|
133
|
-
end.compact
|
134
|
-
end
|
135
|
-
|
136
|
-
end
|
137
|
-
|
138
|
-
end
|
139
|
-
|
140
|
-
end
|