picky 1.4.1 → 1.4.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
@@ -0,0 +1,43 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Generators
|
4
|
+
|
5
|
+
module Weights
|
6
|
+
|
7
|
+
# Uses a logarithmic weight.
|
8
|
+
# If for a key k we have x ids, the weight is:
|
9
|
+
# w(x): log(x)
|
10
|
+
# Special case: If x < 1, then we use 0.
|
11
|
+
#
|
12
|
+
class Logarithmic < Strategy
|
13
|
+
|
14
|
+
# Generates a partial index from the given index.
|
15
|
+
#
|
16
|
+
def generate_from index
|
17
|
+
index.inject({}) do |hash, text_ids|
|
18
|
+
text, ids = *text_ids
|
19
|
+
weight = weight_for ids.size
|
20
|
+
hash[text] ||= weight.round(2) if weight
|
21
|
+
hash
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Sets the weight value.
|
26
|
+
#
|
27
|
+
# If the size is 0 or one, we would get -Infinity or 0.0.
|
28
|
+
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
29
|
+
#
|
30
|
+
# BUT: We need the value, even if 0. To designate that there is a weight!
|
31
|
+
#
|
32
|
+
def weight_for amount
|
33
|
+
return 0 if amount < 1
|
34
|
+
Math.log amount
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Generators
|
4
|
+
|
5
|
+
# Uses a logarithmic algorithm as default.
|
6
|
+
#
|
7
|
+
class WeightsGenerator < Base
|
8
|
+
|
9
|
+
# Generate a weights index based on the given index.
|
10
|
+
#
|
11
|
+
def generate strategy = Weights::Logarithmic.new
|
12
|
+
strategy.generate_from self.index
|
13
|
+
end
|
14
|
+
|
15
|
+
end
|
16
|
+
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
File without changes
|
@@ -0,0 +1,113 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
class Backend
|
6
|
+
|
7
|
+
attr_reader :bundle_name
|
8
|
+
attr_reader :prepared, :index, :weights, :similarity, :configuration
|
9
|
+
|
10
|
+
delegate :index_name, :category_name, :to => :@config
|
11
|
+
|
12
|
+
def initialize bundle_name, config
|
13
|
+
@bundle_name = bundle_name
|
14
|
+
@config = config
|
15
|
+
@prepared = File::Text.new config.prepared_index_path
|
16
|
+
end
|
17
|
+
|
18
|
+
# Delegators.
|
19
|
+
#
|
20
|
+
|
21
|
+
# Retrieving data.
|
22
|
+
#
|
23
|
+
def retrieve &block
|
24
|
+
prepared.retrieve &block
|
25
|
+
end
|
26
|
+
|
27
|
+
# Dumping.
|
28
|
+
#
|
29
|
+
def dump_index index_hash
|
30
|
+
index.dump index_hash
|
31
|
+
end
|
32
|
+
def dump_weights weights_hash
|
33
|
+
weights.dump weights_hash
|
34
|
+
end
|
35
|
+
def dump_similarity similarity_hash
|
36
|
+
similarity.dump similarity_hash
|
37
|
+
end
|
38
|
+
def dump_configuration configuration_hash
|
39
|
+
configuration.dump configuration_hash
|
40
|
+
end
|
41
|
+
|
42
|
+
# Loading.
|
43
|
+
#
|
44
|
+
def load_index
|
45
|
+
index.load
|
46
|
+
end
|
47
|
+
def load_similarity
|
48
|
+
similarity.load
|
49
|
+
end
|
50
|
+
def load_weights
|
51
|
+
weights.load
|
52
|
+
end
|
53
|
+
def load_configuration
|
54
|
+
configuration.load
|
55
|
+
end
|
56
|
+
|
57
|
+
# Cache ok?
|
58
|
+
#
|
59
|
+
def index_cache_ok?
|
60
|
+
index.cache_ok?
|
61
|
+
end
|
62
|
+
def similarity_cache_ok?
|
63
|
+
similarity.cache_ok?
|
64
|
+
end
|
65
|
+
def weights_cache_ok?
|
66
|
+
weights.cache_ok?
|
67
|
+
end
|
68
|
+
|
69
|
+
# Cache small?
|
70
|
+
#
|
71
|
+
def index_cache_small?
|
72
|
+
index.cache_small?
|
73
|
+
end
|
74
|
+
def similarity_cache_small?
|
75
|
+
similarity.cache_small?
|
76
|
+
end
|
77
|
+
def weights_cache_small?
|
78
|
+
weights.cache_small?
|
79
|
+
end
|
80
|
+
|
81
|
+
# Copies the indexes to the "backup" directory.
|
82
|
+
#
|
83
|
+
def backup
|
84
|
+
index.backup
|
85
|
+
weights.backup
|
86
|
+
similarity.backup
|
87
|
+
configuration.backup
|
88
|
+
end
|
89
|
+
|
90
|
+
# Restores the indexes from the "backup" directory.
|
91
|
+
#
|
92
|
+
def restore
|
93
|
+
index.restore
|
94
|
+
weights.restore
|
95
|
+
similarity.restore
|
96
|
+
configuration.restore
|
97
|
+
end
|
98
|
+
|
99
|
+
|
100
|
+
# Delete all index files.
|
101
|
+
#
|
102
|
+
def delete
|
103
|
+
index.delete
|
104
|
+
weights.delete
|
105
|
+
similarity.delete
|
106
|
+
configuration.delete
|
107
|
+
end
|
108
|
+
|
109
|
+
end
|
110
|
+
|
111
|
+
end
|
112
|
+
|
113
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
# Handles all aspects of index files, such as dumping/loading.
|
6
|
+
#
|
7
|
+
module File
|
8
|
+
|
9
|
+
# Base class for all index files.
|
10
|
+
#
|
11
|
+
# Provides necessary helper methods for its
|
12
|
+
# subclasses.
|
13
|
+
# Not directly useable, as it does not provide
|
14
|
+
# dump/load methods.
|
15
|
+
#
|
16
|
+
class Basic
|
17
|
+
|
18
|
+
attr_reader :cache_path
|
19
|
+
|
20
|
+
# An index cache takes a path, without file extension,
|
21
|
+
# which will be provided by the subclasses.
|
22
|
+
#
|
23
|
+
def initialize cache_path
|
24
|
+
@cache_path = "#{cache_path}.#{extension}"
|
25
|
+
end
|
26
|
+
|
27
|
+
# The default extension for index files is "index".
|
28
|
+
#
|
29
|
+
def extension
|
30
|
+
:index
|
31
|
+
end
|
32
|
+
|
33
|
+
# Will copy the index file to a location that
|
34
|
+
# is in a directory named "backup" right under
|
35
|
+
# the directory the index file is in.
|
36
|
+
#
|
37
|
+
def backup
|
38
|
+
prepare_backup backup_directory
|
39
|
+
FileUtils.cp cache_path, target, verbose: true
|
40
|
+
end
|
41
|
+
# The backup directory of this file.
|
42
|
+
# Equal to the file's dirname plus /backup
|
43
|
+
#
|
44
|
+
def backup_directory
|
45
|
+
::File.join ::File.dirname(cache_path), 'backup'
|
46
|
+
end
|
47
|
+
# Prepares the backup directory for the file.
|
48
|
+
#
|
49
|
+
def prepare_backup target
|
50
|
+
FileUtils.mkdir target unless Dir.exists?(target)
|
51
|
+
end
|
52
|
+
|
53
|
+
# Copies the file from its backup location back
|
54
|
+
# to the original location.
|
55
|
+
#
|
56
|
+
def restore
|
57
|
+
FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
|
58
|
+
end
|
59
|
+
# The backup filename.
|
60
|
+
#
|
61
|
+
def backup_file_path_of path
|
62
|
+
dir, name = ::File.split path
|
63
|
+
::File.join dir, 'backup', name
|
64
|
+
end
|
65
|
+
|
66
|
+
# Deletes the file.
|
67
|
+
#
|
68
|
+
def delete
|
69
|
+
`rm -Rf #{cache_path}`
|
70
|
+
end
|
71
|
+
|
72
|
+
# Checks.
|
73
|
+
#
|
74
|
+
|
75
|
+
# Is this cache file suspiciously small?
|
76
|
+
# (less than 8 Bytes of size)
|
77
|
+
#
|
78
|
+
def cache_small?
|
79
|
+
size_of(cache_path) < 8
|
80
|
+
end
|
81
|
+
# Is the cache ok? (existing and larger than
|
82
|
+
# zero Bytes in size)
|
83
|
+
#
|
84
|
+
# A small cache is still ok.
|
85
|
+
#
|
86
|
+
def cache_ok?
|
87
|
+
size_of(cache_path) > 0
|
88
|
+
end
|
89
|
+
# Extracts the size of the file in Bytes.
|
90
|
+
#
|
91
|
+
def size_of path
|
92
|
+
`ls -l #{path} | awk '{print $5}'`.to_i
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
module File
|
6
|
+
|
7
|
+
# Index files dumped in the JSON format.
|
8
|
+
#
|
9
|
+
class JSON < Basic
|
10
|
+
|
11
|
+
# Uses the extension "json".
|
12
|
+
#
|
13
|
+
def extension
|
14
|
+
:json
|
15
|
+
end
|
16
|
+
# Loads the index hash from json format.
|
17
|
+
#
|
18
|
+
def load
|
19
|
+
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
|
20
|
+
end
|
21
|
+
# Dumps the index hash in json format.
|
22
|
+
#
|
23
|
+
def dump hash
|
24
|
+
hash.dump_json cache_path
|
25
|
+
end
|
26
|
+
# A json file does not provide retrieve functionality.
|
27
|
+
#
|
28
|
+
def retrieve
|
29
|
+
raise "Can't retrieve from JSON file. Use text file."
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
module File
|
6
|
+
|
7
|
+
# Index data in the Ruby Marshal format.
|
8
|
+
#
|
9
|
+
class Marshal < Basic
|
10
|
+
|
11
|
+
# Uses the extension "dump".
|
12
|
+
#
|
13
|
+
def extension
|
14
|
+
:dump
|
15
|
+
end
|
16
|
+
# Loads the index hash from marshal format.
|
17
|
+
#
|
18
|
+
def load
|
19
|
+
::Marshal.load ::File.open(cache_path, 'r:binary')
|
20
|
+
end
|
21
|
+
# Dumps the index hash in marshal format.
|
22
|
+
#
|
23
|
+
def dump hash
|
24
|
+
hash.dump_marshalled cache_path
|
25
|
+
end
|
26
|
+
# A marshal file does not provide retrieve functionality.
|
27
|
+
#
|
28
|
+
def retrieve
|
29
|
+
raise "Can't retrieve from marshalled file. Use text file."
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
module File
|
6
|
+
|
7
|
+
# Index data dumped in the text format.
|
8
|
+
#
|
9
|
+
class Text < Basic
|
10
|
+
|
11
|
+
# Uses the extension "txt".
|
12
|
+
#
|
13
|
+
def extension
|
14
|
+
:txt
|
15
|
+
end
|
16
|
+
# Text files are used exclusively for
|
17
|
+
# prepared data files.
|
18
|
+
#
|
19
|
+
def load
|
20
|
+
raise "Can't load from text file. Use JSON or Marshal."
|
21
|
+
end
|
22
|
+
# Text files are used exclusively for
|
23
|
+
# prepared data files.
|
24
|
+
#
|
25
|
+
def dump hash
|
26
|
+
raise "Can't dump to text file. Use JSON or Marshal."
|
27
|
+
end
|
28
|
+
|
29
|
+
# Retrieves prepared index data in the form
|
30
|
+
# * id,data\n
|
31
|
+
# * id,data\n
|
32
|
+
# * id,data\n
|
33
|
+
#
|
34
|
+
# Yields an id string and a symbol token.
|
35
|
+
#
|
36
|
+
def retrieve
|
37
|
+
id = nil
|
38
|
+
token = nil
|
39
|
+
::File.open(cache_path, 'r:binary') do |file|
|
40
|
+
file.each_line do |line|
|
41
|
+
id, token = line.split ?,, 2
|
42
|
+
yield id, (token.chomp! || token).to_sym
|
43
|
+
end
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
#
|
49
|
+
def open_for_indexing &block
|
50
|
+
::File.open cache_path, 'w:binary', &block
|
51
|
+
end
|
52
|
+
|
53
|
+
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
class Files < Backend
|
6
|
+
|
7
|
+
def initialize bundle_name, config
|
8
|
+
super bundle_name, config
|
9
|
+
|
10
|
+
# Note: We marshal the similarity, as the
|
11
|
+
# Yajl json lib cannot load symbolized
|
12
|
+
# values, just keys.
|
13
|
+
#
|
14
|
+
@index = File::JSON.new config.index_path(bundle_name, :index)
|
15
|
+
@weights = File::JSON.new config.index_path(bundle_name, :weights)
|
16
|
+
@similarity = File::Marshal.new config.index_path(bundle_name, :similarity)
|
17
|
+
@configuration = File::JSON.new config.index_path(bundle_name, :configuration)
|
18
|
+
end
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
end
|
23
|
+
|
24
|
+
end
|
@@ -0,0 +1,77 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
class Redis
|
6
|
+
|
7
|
+
# Redis Backend Accessor.
|
8
|
+
#
|
9
|
+
# Provides necessary helper methods for its
|
10
|
+
# subclasses.
|
11
|
+
# Not directly useable, as it does not provide
|
12
|
+
# dump/load methods.
|
13
|
+
#
|
14
|
+
class Basic
|
15
|
+
|
16
|
+
attr_reader :namespace
|
17
|
+
|
18
|
+
# An index cache takes a path, without file extension,
|
19
|
+
# which will be provided by the subclasses.
|
20
|
+
#
|
21
|
+
def initialize namespace
|
22
|
+
@namespace = namespace
|
23
|
+
|
24
|
+
@backend = ::Redis.new
|
25
|
+
end
|
26
|
+
|
27
|
+
# Does nothing.
|
28
|
+
#
|
29
|
+
def load
|
30
|
+
# Nothing.
|
31
|
+
end
|
32
|
+
# We do not use Redis to retrieve data.
|
33
|
+
#
|
34
|
+
def retrieve
|
35
|
+
# Nothing.
|
36
|
+
end
|
37
|
+
|
38
|
+
# Redis does not backup.
|
39
|
+
#
|
40
|
+
def backup
|
41
|
+
# Nothing.
|
42
|
+
end
|
43
|
+
|
44
|
+
# Deletes the Redis index namespace.
|
45
|
+
#
|
46
|
+
def delete
|
47
|
+
# TODO @backend.
|
48
|
+
end
|
49
|
+
|
50
|
+
# Checks.
|
51
|
+
#
|
52
|
+
|
53
|
+
# Is this cache suspiciously small?
|
54
|
+
#
|
55
|
+
def cache_small?
|
56
|
+
false # TODO
|
57
|
+
end
|
58
|
+
# Is the cache ok?
|
59
|
+
#
|
60
|
+
# A small cache is still ok.
|
61
|
+
#
|
62
|
+
def cache_ok?
|
63
|
+
false # TODO
|
64
|
+
end
|
65
|
+
# Extracts the size of the file in Bytes.
|
66
|
+
#
|
67
|
+
def size_of path
|
68
|
+
# TODO
|
69
|
+
end
|
70
|
+
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
@@ -0,0 +1,46 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
class Redis
|
6
|
+
|
7
|
+
class ListHash < Basic
|
8
|
+
|
9
|
+
# Writes the hash into Redis.
|
10
|
+
#
|
11
|
+
# TODO Performance: rpush as you get the values instead of putting it together in an array first.
|
12
|
+
#
|
13
|
+
def dump hash
|
14
|
+
hash.each_pair do |key, values|
|
15
|
+
redis_key = "#{namespace}:#{key}"
|
16
|
+
i = 0
|
17
|
+
@backend.multi do
|
18
|
+
@backend.del redis_key
|
19
|
+
|
20
|
+
values.each do |value|
|
21
|
+
i += 1
|
22
|
+
@backend.zadd redis_key, i, value
|
23
|
+
end
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Get a collection.
|
29
|
+
#
|
30
|
+
def collection sym
|
31
|
+
@backend.lrange "#{namespace}:#{sym}", 0, -1
|
32
|
+
end
|
33
|
+
|
34
|
+
# Get a single value.
|
35
|
+
#
|
36
|
+
def member sym
|
37
|
+
raise "Can't retrieve a single value from a Redis ListHash. Use Index::Redis::StringHash."
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|
45
|
+
|
46
|
+
end
|
@@ -0,0 +1,35 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
class Redis
|
6
|
+
|
7
|
+
class StringHash < Basic
|
8
|
+
|
9
|
+
# Writes the hash into Redis.
|
10
|
+
#
|
11
|
+
def dump hash
|
12
|
+
hash.each_pair do |key, value|
|
13
|
+
@backend.hset namespace, key, value
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
# Get a collection.
|
18
|
+
#
|
19
|
+
def collection sym
|
20
|
+
raise "Can't retrieve a collection from a StringHash. Use Index::Redis::ListHash."
|
21
|
+
end
|
22
|
+
|
23
|
+
# Get a single value.
|
24
|
+
#
|
25
|
+
def member sym
|
26
|
+
@backend.hget namespace, sym
|
27
|
+
end
|
28
|
+
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
34
|
+
|
35
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
module Internals
|
2
|
+
|
3
|
+
module Index
|
4
|
+
|
5
|
+
# TODO Needs a reconnect to be run after forking.
|
6
|
+
#
|
7
|
+
class Redis < Backend
|
8
|
+
|
9
|
+
def initialize bundle_name, config
|
10
|
+
super bundle_name, config
|
11
|
+
|
12
|
+
# TODO
|
13
|
+
#
|
14
|
+
@index = Redis::ListHash.new "#{config.identifier}:#{bundle_name}:index"
|
15
|
+
@weights = Redis::StringHash.new "#{config.identifier}:#{bundle_name}:weights"
|
16
|
+
@similarity = Redis::ListHash.new "#{config.identifier}:#{bundle_name}:similarity"
|
17
|
+
@configuration = Redis::StringHash.new "#{config.identifier}:#{bundle_name}:configuration"
|
18
|
+
end
|
19
|
+
|
20
|
+
# Delegate to the right collection.
|
21
|
+
#
|
22
|
+
def ids sym
|
23
|
+
@index.collection sym
|
24
|
+
end
|
25
|
+
|
26
|
+
# Delegate to the right member value.
|
27
|
+
#
|
28
|
+
# Note: Converts to float.
|
29
|
+
#
|
30
|
+
def weight sym
|
31
|
+
@weights.member(sym).to_f
|
32
|
+
end
|
33
|
+
|
34
|
+
# Delegate to a member value.
|
35
|
+
#
|
36
|
+
def setting sym
|
37
|
+
@configuration.member sym
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
end
|