picky 2.7.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
data/lib/picky/backend/files.rb
CHANGED
|
@@ -1,22 +1,26 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
module Backend
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
super bundle_name, category
|
|
5
|
+
class Files < Base
|
|
7
6
|
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
7
|
+
def initialize bundle
|
|
8
|
+
super bundle
|
|
9
|
+
|
|
10
|
+
# Note: We marshal the similarity, as the
|
|
11
|
+
# Yajl json lib cannot load symbolized
|
|
12
|
+
# values, just keys.
|
|
13
|
+
#
|
|
14
|
+
@inverted = File::JSON.new bundle.index_path(:inverted)
|
|
15
|
+
@weights = File::JSON.new bundle.index_path(:weights)
|
|
16
|
+
@similarity = File::Marshal.new bundle.index_path(:similarity)
|
|
17
|
+
@configuration = File::JSON.new bundle.index_path(:configuration)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
def to_s
|
|
21
|
+
"#{self.class}(#{[@inverted, @weights, @similarity, @configuration].join(', ')})"
|
|
22
|
+
end
|
|
17
23
|
|
|
18
|
-
def to_s
|
|
19
|
-
"#{self.class}(#{[@prepared, @inverted, @weights, @similarity, @configuration].join(', ')})"
|
|
20
24
|
end
|
|
21
25
|
|
|
22
26
|
end
|
|
@@ -1,89 +1,93 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
module Backend
|
|
4
4
|
|
|
5
|
-
|
|
6
|
-
#
|
|
7
|
-
# Provides necessary helper methods for its
|
|
8
|
-
# subclasses.
|
|
9
|
-
# Not directly useable, as it does not provide
|
|
10
|
-
# dump/load methods.
|
|
11
|
-
#
|
|
12
|
-
class Basic
|
|
5
|
+
class Redis
|
|
13
6
|
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
#
|
|
17
|
-
#
|
|
7
|
+
# Redis Backend Accessor.
|
|
8
|
+
#
|
|
9
|
+
# Provides necessary helper methods for its
|
|
10
|
+
# subclasses.
|
|
11
|
+
# Not directly useable, as it does not provide
|
|
12
|
+
# dump/load methods.
|
|
18
13
|
#
|
|
19
|
-
|
|
20
|
-
|
|
14
|
+
class Basic
|
|
15
|
+
|
|
16
|
+
attr_reader :namespace, :backend
|
|
21
17
|
|
|
22
|
-
#
|
|
23
|
-
#
|
|
18
|
+
# An index cache takes a path, without file extension,
|
|
19
|
+
# which will be provided by the subclasses.
|
|
24
20
|
#
|
|
25
|
-
|
|
21
|
+
def initialize namespace
|
|
22
|
+
@namespace = namespace
|
|
23
|
+
|
|
24
|
+
# TODO Turn this inside out such that people can pass in
|
|
25
|
+
# their own Redis instance.
|
|
26
|
+
#
|
|
27
|
+
# TODO Make the :db a real option.
|
|
28
|
+
#
|
|
29
|
+
@backend = ::Redis.new :db => 15
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Does nothing.
|
|
26
33
|
#
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
# We do not use Redis to retrieve data.
|
|
36
|
-
#
|
|
37
|
-
def retrieve
|
|
38
|
-
# Nothing.
|
|
39
|
-
end
|
|
34
|
+
def load
|
|
35
|
+
# Nothing.
|
|
36
|
+
end
|
|
37
|
+
# We do not use Redis to retrieve data.
|
|
38
|
+
#
|
|
39
|
+
def retrieve
|
|
40
|
+
# Nothing.
|
|
41
|
+
end
|
|
40
42
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
43
|
+
# Redis does not backup.
|
|
44
|
+
#
|
|
45
|
+
def backup
|
|
46
|
+
# Nothing.
|
|
47
|
+
end
|
|
46
48
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
49
|
+
# Deletes the Redis index namespace.
|
|
50
|
+
#
|
|
51
|
+
def delete
|
|
52
|
+
# Not implemented here.
|
|
53
|
+
# Note: backend.flushdb might be the way to go,
|
|
54
|
+
# but since we cannot delete by key pattern,
|
|
55
|
+
# we don't do anything.
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
# Checks.
|
|
59
|
+
#
|
|
55
60
|
|
|
56
|
-
|
|
57
|
-
|
|
61
|
+
# Is this cache suspiciously small?
|
|
62
|
+
#
|
|
63
|
+
def cache_small?
|
|
64
|
+
size < 1
|
|
65
|
+
end
|
|
58
66
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
67
|
+
# Is the cache ok?
|
|
68
|
+
#
|
|
69
|
+
# A small cache is still ok.
|
|
70
|
+
#
|
|
71
|
+
def cache_ok?
|
|
72
|
+
size > 0
|
|
73
|
+
end
|
|
64
74
|
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
75
|
+
# Extracts the size of the file in Bytes.
|
|
76
|
+
#
|
|
77
|
+
# Note: This is a very forgiving implementation.
|
|
78
|
+
# But as long as Redis does not implement
|
|
79
|
+
# DBSIZE KEYPATTERN, we are stuck with this.
|
|
80
|
+
#
|
|
81
|
+
def size
|
|
82
|
+
backend.dbsize
|
|
83
|
+
end
|
|
72
84
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
#
|
|
79
|
-
def size
|
|
80
|
-
backend.dbsize
|
|
81
|
-
end
|
|
85
|
+
#
|
|
86
|
+
#
|
|
87
|
+
def to_s
|
|
88
|
+
"#{self.class}(#{namespace}:*)"
|
|
89
|
+
end
|
|
82
90
|
|
|
83
|
-
#
|
|
84
|
-
#
|
|
85
|
-
def to_s
|
|
86
|
-
"#{self.class}(#{namespace}:*)"
|
|
87
91
|
end
|
|
88
92
|
|
|
89
93
|
end
|
|
@@ -1,45 +1,49 @@
|
|
|
1
|
-
module
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
class
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
i
|
|
16
|
-
|
|
1
|
+
module Picky
|
|
2
|
+
|
|
3
|
+
module Backend
|
|
4
|
+
|
|
5
|
+
class Redis
|
|
6
|
+
|
|
7
|
+
class ListHash < Basic
|
|
8
|
+
|
|
9
|
+
# Writes the hash into Redis.
|
|
10
|
+
#
|
|
11
|
+
def dump hash
|
|
12
|
+
clear
|
|
13
|
+
hash.each_pair do |key, values|
|
|
14
|
+
redis_key = "#{namespace}:#{key}"
|
|
15
|
+
i = 0
|
|
16
|
+
values.each do |value|
|
|
17
|
+
i += 1
|
|
18
|
+
backend.zadd redis_key, i, value
|
|
19
|
+
end
|
|
17
20
|
end
|
|
18
21
|
end
|
|
19
|
-
end
|
|
20
22
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
23
|
+
# Clear the index for this list.
|
|
24
|
+
#
|
|
25
|
+
# Note: Perhaps we can use a server only command.
|
|
26
|
+
# This is not the optimal way to do it.
|
|
27
|
+
#
|
|
28
|
+
def clear
|
|
29
|
+
redis_key = "#{namespace}:*"
|
|
30
|
+
backend.keys(redis_key).each do |key|
|
|
31
|
+
backend.del key
|
|
32
|
+
end
|
|
30
33
|
end
|
|
31
|
-
end
|
|
32
34
|
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
35
|
+
# Get a collection.
|
|
36
|
+
#
|
|
37
|
+
def collection key
|
|
38
|
+
backend.zrange "#{namespace}:#{key}", 0, -1
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
# Get a single value.
|
|
42
|
+
#
|
|
43
|
+
def member key
|
|
44
|
+
raise "Can't retrieve single value :#{key} from a Redis ListHash. Use Indexes::Redis::StringHash."
|
|
45
|
+
end
|
|
38
46
|
|
|
39
|
-
# Get a single value.
|
|
40
|
-
#
|
|
41
|
-
def member key
|
|
42
|
-
raise "Can't retrieve single value :#{key} from a Redis ListHash. Use Index::Redis::StringHash."
|
|
43
47
|
end
|
|
44
48
|
|
|
45
49
|
end
|
|
@@ -1,36 +1,40 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
module Backend
|
|
4
4
|
|
|
5
|
-
class
|
|
5
|
+
class Redis
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
7
|
+
class StringHash < Basic
|
|
8
|
+
|
|
9
|
+
# Writes the hash into Redis.
|
|
10
|
+
#
|
|
11
|
+
# Note: We could use multi, but it did not help.
|
|
12
|
+
#
|
|
13
|
+
def dump hash
|
|
14
|
+
clear
|
|
15
|
+
hash.each_pair do |key, value|
|
|
16
|
+
backend.hset namespace, key, value
|
|
17
|
+
end
|
|
15
18
|
end
|
|
16
|
-
end
|
|
17
19
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
20
|
+
# Clears the hash.
|
|
21
|
+
#
|
|
22
|
+
def clear
|
|
23
|
+
backend.del namespace
|
|
24
|
+
end
|
|
23
25
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
26
|
+
# Get a collection.
|
|
27
|
+
#
|
|
28
|
+
def collection key
|
|
29
|
+
raise "Can't retrieve collection for :#{key} from a StringHash. Use Indexes::Redis::ListHash."
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Get a single value.
|
|
33
|
+
#
|
|
34
|
+
def member key
|
|
35
|
+
backend.hget namespace, key
|
|
36
|
+
end
|
|
29
37
|
|
|
30
|
-
# Get a single value.
|
|
31
|
-
#
|
|
32
|
-
def member key
|
|
33
|
-
backend.hget namespace, key
|
|
34
38
|
end
|
|
35
39
|
|
|
36
40
|
end
|
data/lib/picky/backend/redis.rb
CHANGED
|
@@ -1,38 +1,42 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
class Redis < Base
|
|
3
|
+
module Backend
|
|
6
4
|
|
|
7
|
-
|
|
8
|
-
|
|
5
|
+
#
|
|
6
|
+
#
|
|
7
|
+
class Redis < Base
|
|
8
|
+
|
|
9
|
+
def initialize bundle
|
|
10
|
+
super bundle
|
|
11
|
+
|
|
12
|
+
# Refine a few Redis "types".
|
|
13
|
+
#
|
|
14
|
+
@inverted = Redis::ListHash.new "#{bundle.identifier}:inverted"
|
|
15
|
+
@weights = Redis::StringHash.new "#{bundle.identifier}:weights"
|
|
16
|
+
@similarity = Redis::ListHash.new "#{bundle.identifier}:similarity"
|
|
17
|
+
@configuration = Redis::StringHash.new "#{bundle.identifier}:configuration"
|
|
18
|
+
end
|
|
9
19
|
|
|
10
|
-
#
|
|
20
|
+
# Delegate to the right collection.
|
|
11
21
|
#
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
@configuration = Redis::StringHash.new "#{category.identifier}:#{bundle_name}:configuration"
|
|
16
|
-
end
|
|
22
|
+
def ids sym
|
|
23
|
+
inverted.collection sym
|
|
24
|
+
end
|
|
17
25
|
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
26
|
+
# Delegate to the right member value.
|
|
27
|
+
#
|
|
28
|
+
# Note: Converts to float.
|
|
29
|
+
#
|
|
30
|
+
def weight sym
|
|
31
|
+
weights.member(sym).to_f
|
|
32
|
+
end
|
|
23
33
|
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
weights.member(sym).to_f
|
|
30
|
-
end
|
|
34
|
+
# Delegate to a member value.
|
|
35
|
+
#
|
|
36
|
+
def setting sym
|
|
37
|
+
configuration.member sym
|
|
38
|
+
end
|
|
31
39
|
|
|
32
|
-
# Delegate to a member value.
|
|
33
|
-
#
|
|
34
|
-
def setting sym
|
|
35
|
-
configuration.member sym
|
|
36
40
|
end
|
|
37
41
|
|
|
38
42
|
end
|
data/lib/picky/bundle.rb
CHANGED
|
@@ -1,62 +1,87 @@
|
|
|
1
|
-
|
|
2
|
-
#
|
|
3
|
-
#
|
|
4
|
-
# At most, there are three indexes:
|
|
5
|
-
# * *core* index (always used)
|
|
6
|
-
# * *weights* index (always used)
|
|
7
|
-
# * *similarity* index (used with similarity)
|
|
8
|
-
#
|
|
9
|
-
# In Picky, indexing is separated from the index
|
|
10
|
-
# handling itself through a parallel structure.
|
|
11
|
-
#
|
|
12
|
-
# Both use methods provided by this base class, but
|
|
13
|
-
# have very different goals:
|
|
14
|
-
#
|
|
15
|
-
# * *Indexing*::*Bundle*::*Base* is just concerned with creating index
|
|
16
|
-
# files / redis entries and providing helper functions to e.g. check
|
|
17
|
-
# the indexes.
|
|
18
|
-
#
|
|
19
|
-
# * *Index*::*Bundle*::*Base* is concerned with loading these index files into
|
|
20
|
-
# memory / redis and looking up search data as fast as possible.
|
|
21
|
-
#
|
|
22
|
-
class Bundle
|
|
23
|
-
|
|
24
|
-
attr_reader :identifier,
|
|
25
|
-
:files
|
|
26
|
-
attr_accessor :inverted,
|
|
27
|
-
:weights,
|
|
28
|
-
:similarity,
|
|
29
|
-
:configuration,
|
|
30
|
-
:similarity_strategy
|
|
31
|
-
|
|
32
|
-
delegate :clear, :to => :inverted
|
|
33
|
-
delegate :[], :[]=, :to => :configuration
|
|
34
|
-
|
|
35
|
-
def initialize name, category, similarity_strategy
|
|
36
|
-
@identifier = "#{category.identifier}:#{name}"
|
|
37
|
-
@files = Backend::Files.new name, category
|
|
38
|
-
|
|
39
|
-
@inverted = {}
|
|
40
|
-
@weights = {}
|
|
41
|
-
@similarity = {}
|
|
42
|
-
@configuration = {} # A hash with config options.
|
|
43
|
-
|
|
44
|
-
@similarity_strategy = similarity_strategy
|
|
45
|
-
end
|
|
46
|
-
|
|
47
|
-
# Get a list of similar texts.
|
|
1
|
+
module Picky
|
|
2
|
+
# A Bundle is a number of indexes
|
|
3
|
+
# per [index, category] combination.
|
|
48
4
|
#
|
|
49
|
-
#
|
|
5
|
+
# At most, there are three indexes:
|
|
6
|
+
# * *core* index (always used)
|
|
7
|
+
# * *weights* index (always used)
|
|
8
|
+
# * *similarity* index (used with similarity)
|
|
50
9
|
#
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
10
|
+
# In Picky, indexing is separated from the index
|
|
11
|
+
# handling itself through a parallel structure.
|
|
12
|
+
#
|
|
13
|
+
# Both use methods provided by this base class, but
|
|
14
|
+
# have very different goals:
|
|
15
|
+
#
|
|
16
|
+
# * *Indexing*::*Bundle*::*Base* is just concerned with creating index
|
|
17
|
+
# files / redis entries and providing helper functions to e.g. check
|
|
18
|
+
# the indexes.
|
|
19
|
+
#
|
|
20
|
+
# * *Index*::*Bundle*::*Base* is concerned with loading these index files into
|
|
21
|
+
# memory / redis and looking up search data as fast as possible.
|
|
22
|
+
#
|
|
23
|
+
class Bundle
|
|
57
24
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
25
|
+
attr_reader :name,
|
|
26
|
+
:category
|
|
27
|
+
|
|
28
|
+
attr_accessor :inverted,
|
|
29
|
+
:weights,
|
|
30
|
+
:similarity,
|
|
31
|
+
:configuration,
|
|
32
|
+
:similarity_strategy
|
|
61
33
|
|
|
34
|
+
delegate :clear, :to => :inverted
|
|
35
|
+
delegate :[], :[]=, :to => :configuration
|
|
36
|
+
delegate :index_directory, :to => :category
|
|
37
|
+
|
|
38
|
+
def initialize name, category, similarity_strategy, options = {}
|
|
39
|
+
@name = name
|
|
40
|
+
@category = category
|
|
41
|
+
|
|
42
|
+
@inverted = {}
|
|
43
|
+
@weights = {}
|
|
44
|
+
@similarity = {}
|
|
45
|
+
@configuration = {} # A hash with config options.
|
|
46
|
+
|
|
47
|
+
@similarity_strategy = similarity_strategy
|
|
48
|
+
end
|
|
49
|
+
def identifier
|
|
50
|
+
"#{category.identifier}:#{name}"
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Get a list of similar texts.
|
|
54
|
+
#
|
|
55
|
+
# Note: Does not return itself.
|
|
56
|
+
#
|
|
57
|
+
def similar text
|
|
58
|
+
code = similarity_strategy.encoded text
|
|
59
|
+
similar_codes = code && @similarity[code]
|
|
60
|
+
similar_codes.delete text if similar_codes
|
|
61
|
+
similar_codes || []
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
# If a key format is set, use it, else delegate to the category.
|
|
65
|
+
#
|
|
66
|
+
def key_format
|
|
67
|
+
@key_format || @category.key_format
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
# Path and partial filename of a specific subindex.
|
|
71
|
+
#
|
|
72
|
+
# Subindexes are:
|
|
73
|
+
# * inverted index
|
|
74
|
+
# * weights index
|
|
75
|
+
# * partial index
|
|
76
|
+
# * similarity index
|
|
77
|
+
#
|
|
78
|
+
def index_path type
|
|
79
|
+
::File.join index_directory, "#{category.name}_#{name}_#{type}"
|
|
80
|
+
end
|
|
81
|
+
|
|
82
|
+
def to_s
|
|
83
|
+
"#{self.class}(#{identifier})"
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
end
|
|
62
87
|
end
|
|
File without changes
|
|
@@ -1,55 +1,59 @@
|
|
|
1
|
-
module
|
|
2
|
-
|
|
3
|
-
# A location calculation recalculates a 1-d location
|
|
4
|
-
# to the Picky internal 1-d "grid".
|
|
5
|
-
#
|
|
6
|
-
# For example, if you have a location x == 12.3456,
|
|
7
|
-
# it will be recalculated into 3, if the minimum is 9
|
|
8
|
-
# and the gridlength is 1.
|
|
9
|
-
#
|
|
10
|
-
class Location
|
|
11
|
-
|
|
12
|
-
attr_reader :minimum, :precision, :grid
|
|
13
|
-
|
|
14
|
-
def initialize user_grid, precision = nil
|
|
15
|
-
@user_grid = user_grid
|
|
16
|
-
@precision = precision || 1
|
|
17
|
-
@grid = @user_grid / (@precision + 0.5)
|
|
18
|
-
end
|
|
19
|
-
|
|
20
|
-
def minimum= minimum
|
|
21
|
-
# Add a margin of 1 user grid.
|
|
22
|
-
#
|
|
23
|
-
minimum -= @user_grid
|
|
24
|
-
|
|
25
|
-
# Add plus 1 grid so that the index key never falls on 0.
|
|
26
|
-
# Why? to_i maps by default to 0.
|
|
27
|
-
#
|
|
28
|
-
minimum -= @grid
|
|
1
|
+
module Picky
|
|
29
2
|
|
|
30
|
-
|
|
31
|
-
end
|
|
3
|
+
module Calculations # :nodoc:all
|
|
32
4
|
|
|
5
|
+
# A location calculation recalculates a 1-d location
|
|
6
|
+
# to the Picky internal 1-d "grid".
|
|
33
7
|
#
|
|
8
|
+
# For example, if you have a location x == 12.3456,
|
|
9
|
+
# it will be recalculated into 3, if the minimum is 9
|
|
10
|
+
# and the gridlength is 1.
|
|
34
11
|
#
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
12
|
+
class Location
|
|
13
|
+
|
|
14
|
+
attr_reader :minimum, :precision, :grid
|
|
15
|
+
|
|
16
|
+
def initialize user_grid, precision = nil
|
|
17
|
+
@user_grid = user_grid
|
|
18
|
+
@precision = precision || 1
|
|
19
|
+
@grid = @user_grid / (@precision + 0.5)
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def minimum= minimum
|
|
23
|
+
# Add a margin of 1 user grid.
|
|
24
|
+
#
|
|
25
|
+
minimum -= @user_grid
|
|
26
|
+
|
|
27
|
+
# Add plus 1 grid so that the index key never falls on 0.
|
|
28
|
+
# Why? to_i maps by default to 0.
|
|
29
|
+
#
|
|
30
|
+
minimum -= @grid
|
|
31
|
+
|
|
32
|
+
@minimum = minimum
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
#
|
|
36
|
+
#
|
|
37
|
+
def add_margin length
|
|
38
|
+
@minimum -= length
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
#
|
|
42
|
+
#
|
|
43
|
+
def recalculated_range location
|
|
44
|
+
range recalculate(location)
|
|
45
|
+
end
|
|
46
|
+
#
|
|
47
|
+
#
|
|
48
|
+
def range around_location
|
|
49
|
+
(around_location - @precision)..(around_location + @precision)
|
|
50
|
+
end
|
|
51
|
+
#
|
|
52
|
+
#
|
|
53
|
+
def recalculate location
|
|
54
|
+
((location - @minimum) / @grid).floor
|
|
55
|
+
end
|
|
38
56
|
|
|
39
|
-
#
|
|
40
|
-
#
|
|
41
|
-
def recalculated_range location
|
|
42
|
-
range recalculate(location)
|
|
43
|
-
end
|
|
44
|
-
#
|
|
45
|
-
#
|
|
46
|
-
def range around_location
|
|
47
|
-
(around_location - @precision)..(around_location + @precision)
|
|
48
|
-
end
|
|
49
|
-
#
|
|
50
|
-
#
|
|
51
|
-
def recalculate location
|
|
52
|
-
((location - @minimum) / @grid).floor
|
|
53
57
|
end
|
|
54
58
|
|
|
55
59
|
end
|