picky 1.4.1 → 1.4.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/{alias_instances.rb → aliases.rb} +1 -3
- data/lib/picky/application.rb +18 -19
- data/lib/picky/cores.rb +1 -1
- data/lib/picky/generators/aliases.rb +3 -0
- data/lib/picky/index/base.rb +179 -0
- data/lib/picky/index/memory.rb +28 -0
- data/lib/picky/index/redis.rb +28 -0
- data/lib/picky/{indexes_api.rb → index_bundle.rb} +16 -16
- data/lib/picky/indexed/indexes.rb +11 -7
- data/lib/picky/indexing/indexes.rb +14 -8
- data/lib/picky/internals/adapters/rack/base.rb +27 -0
- data/lib/picky/internals/adapters/rack/live_parameters.rb +37 -0
- data/lib/picky/internals/adapters/rack/query.rb +63 -0
- data/lib/picky/internals/adapters/rack.rb +34 -0
- data/lib/picky/{calculations → internals/calculations}/location.rb +0 -0
- data/lib/picky/{cli.rb → internals/cli.rb} +0 -0
- data/lib/picky/{configuration → internals/configuration}/index.rb +8 -2
- data/lib/picky/{ext → internals/ext}/maybe_compile.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/extconf.rb +0 -0
- data/lib/picky/{ext → internals/ext}/ruby19/performant.c +0 -0
- data/lib/picky/{extensions → internals/extensions}/array.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/hash.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/module.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/object.rb +0 -0
- data/lib/picky/{extensions → internals/extensions}/symbol.rb +0 -0
- data/lib/picky/internals/frontend_adapters/rack.rb +154 -0
- data/lib/picky/internals/generators/base.rb +19 -0
- data/lib/picky/internals/generators/partial/default.rb +7 -0
- data/lib/picky/internals/generators/partial/none.rb +35 -0
- data/lib/picky/internals/generators/partial/strategy.rb +29 -0
- data/lib/picky/internals/generators/partial/substring.rb +122 -0
- data/lib/picky/internals/generators/partial_generator.rb +19 -0
- data/lib/picky/internals/generators/similarity/default.rb +9 -0
- data/lib/picky/internals/generators/similarity/double_levenshtone.rb +81 -0
- data/lib/picky/internals/generators/similarity/none.rb +35 -0
- data/lib/picky/internals/generators/similarity/strategy.rb +11 -0
- data/lib/picky/internals/generators/similarity_generator.rb +19 -0
- data/lib/picky/internals/generators/strategy.rb +18 -0
- data/lib/picky/internals/generators/weights/default.rb +9 -0
- data/lib/picky/internals/generators/weights/logarithmic.rb +43 -0
- data/lib/picky/internals/generators/weights/strategy.rb +11 -0
- data/lib/picky/internals/generators/weights_generator.rb +19 -0
- data/lib/picky/{helpers → internals/helpers}/measuring.rb +0 -0
- data/lib/picky/internals/index/backend.rb +113 -0
- data/lib/picky/internals/index/file/basic.rb +101 -0
- data/lib/picky/internals/index/file/json.rb +38 -0
- data/lib/picky/internals/index/file/marshal.rb +38 -0
- data/lib/picky/internals/index/file/text.rb +60 -0
- data/lib/picky/internals/index/files.rb +24 -0
- data/lib/picky/internals/index/redis/basic.rb +77 -0
- data/lib/picky/internals/index/redis/list_hash.rb +46 -0
- data/lib/picky/internals/index/redis/string_hash.rb +35 -0
- data/lib/picky/internals/index/redis.rb +44 -0
- data/lib/picky/internals/indexed/bundle/base.rb +72 -0
- data/lib/picky/internals/indexed/bundle/memory.rb +69 -0
- data/lib/picky/internals/indexed/bundle/redis.rb +70 -0
- data/lib/picky/internals/indexed/categories.rb +135 -0
- data/lib/picky/internals/indexed/category.rb +90 -0
- data/lib/picky/internals/indexed/index.rb +57 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/calculation.rb +0 -0
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/location.rb +4 -2
- data/lib/picky/{indexed → internals/indexed}/wrappers/bundle/wrapper.rb +1 -1
- data/lib/picky/internals/indexed/wrappers/exact_first.rb +65 -0
- data/lib/picky/{indexers → internals/indexers}/no_source_specified_error.rb +0 -0
- data/lib/picky/{indexers → internals/indexers}/serial.rb +2 -2
- data/lib/picky/{indexers → internals/indexers}/solr.rb +0 -0
- data/lib/picky/internals/indexing/bundle/base.rb +219 -0
- data/lib/picky/internals/indexing/bundle/memory.rb +25 -0
- data/lib/picky/internals/indexing/bundle/redis.rb +28 -0
- data/lib/picky/internals/indexing/bundle/super_base.rb +65 -0
- data/lib/picky/internals/indexing/categories.rb +42 -0
- data/lib/picky/internals/indexing/category.rb +120 -0
- data/lib/picky/internals/indexing/index.rb +67 -0
- data/lib/picky/{performant.rb → internals/performant.rb} +0 -0
- data/lib/picky/internals/query/allocation.rb +88 -0
- data/lib/picky/internals/query/allocations.rb +137 -0
- data/lib/picky/internals/query/combination.rb +80 -0
- data/lib/picky/internals/query/combinations/base.rb +84 -0
- data/lib/picky/internals/query/combinations/memory.rb +58 -0
- data/lib/picky/internals/query/combinations/redis.rb +59 -0
- data/lib/picky/internals/query/indexes.rb +180 -0
- data/lib/picky/internals/query/qualifiers.rb +81 -0
- data/lib/picky/internals/query/token.rb +215 -0
- data/lib/picky/internals/query/tokens.rb +89 -0
- data/lib/picky/{query → internals/query}/weights.rb +0 -0
- data/lib/picky/internals/results/base.rb +106 -0
- data/lib/picky/internals/results/full.rb +17 -0
- data/lib/picky/internals/results/live.rb +17 -0
- data/lib/picky/{solr → internals/solr}/schema_generator.rb +0 -0
- data/lib/picky/internals/tokenizers/base.rb +166 -0
- data/lib/picky/internals/tokenizers/index.rb +63 -0
- data/lib/picky/internals/tokenizers/query.rb +79 -0
- data/lib/picky/loader.rb +148 -112
- data/lib/picky/query/base.rb +57 -26
- data/lib/picky/query/full.rb +1 -1
- data/lib/picky/query/live.rb +1 -1
- data/lib/picky/sources/db.rb +27 -6
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/try.rake +2 -2
- data/spec/lib/aliases_spec.rb +9 -0
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/generators/aliases_spec.rb +1 -0
- data/spec/lib/{index_api_spec.rb → index/base_spec.rb} +7 -7
- data/spec/lib/index_bundle_spec.rb +71 -0
- data/spec/lib/indexed/indexes_spec.rb +61 -0
- data/spec/lib/indexing/indexes_spec.rb +94 -24
- data/spec/lib/{adapters → internals/adapters}/rack/base_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/live_parameters_spec.rb +2 -2
- data/spec/lib/{adapters → internals/adapters}/rack/query_spec.rb +2 -2
- data/spec/lib/{calculations → internals/calculations}/location_spec.rb +0 -0
- data/spec/lib/{cli_spec.rb → internals/cli_spec.rb} +4 -1
- data/spec/lib/{configuration → internals/configuration}/index_spec.rb +1 -1
- data/spec/lib/{cores_spec.rb → internals/cores_spec.rb} +0 -0
- data/spec/lib/{extensions → internals/extensions}/array_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/hash_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/module_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/object_spec.rb +0 -0
- data/spec/lib/{extensions → internals/extensions}/symbol_spec.rb +0 -0
- data/spec/lib/{frontend_adapters → internals/frontend_adapters}/rack_spec.rb +11 -11
- data/spec/lib/{cacher → internals/generators}/cacher_strategy_spec.rb +2 -2
- data/spec/lib/internals/generators/partial/default_spec.rb +17 -0
- data/spec/lib/internals/generators/partial/none_spec.rb +17 -0
- data/spec/lib/{cacher → internals/generators}/partial/substring_spec.rb +26 -27
- data/spec/lib/{cacher → internals/generators}/partial_generator_spec.rb +5 -5
- data/spec/lib/{cacher → internals/generators}/similarity/double_levenshtone_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/similarity/none_spec.rb +2 -2
- data/spec/lib/{cacher → internals/generators}/similarity_generator_spec.rb +4 -4
- data/spec/lib/{cacher → internals/generators}/weights/logarithmic_spec.rb +2 -2
- data/spec/lib/internals/generators/weights_generator_spec.rb +21 -0
- data/spec/lib/{helpers → internals/helpers}/measuring_spec.rb +0 -0
- data/spec/lib/{index → internals/index}/file/basic_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/json_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/marshal_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/file/text_spec.rb +2 -2
- data/spec/lib/{index → internals/index}/files_spec.rb +2 -2
- data/spec/lib/{indexed/bundle_spec.rb → internals/indexed/bundle/memory_spec.rb} +4 -5
- data/spec/lib/{indexed → internals/indexed}/categories_spec.rb +13 -13
- data/spec/lib/{indexed → internals/indexed}/category_spec.rb +59 -32
- data/spec/lib/{indexed → internals/indexed}/index_spec.rb +5 -5
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/calculation_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/bundle/wrapper_spec.rb +0 -0
- data/spec/lib/{indexed → internals/indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/{indexers → internals/indexers}/serial_spec.rb +0 -0
- data/spec/lib/{indexing/bundle_partial_generation_speed_spec.rb → internals/indexing/bundle/memory_partial_generation_speed_spec.rb} +3 -3
- data/spec/lib/{indexing/bundle_spec.rb → internals/indexing/bundle/memory_spec.rb} +3 -3
- data/spec/lib/{index/bundle_spec.rb → internals/indexing/bundle/super_base_spec.rb} +9 -3
- data/spec/lib/{indexing → internals/indexing}/category_spec.rb +3 -3
- data/spec/lib/{indexing → internals/indexing}/index_spec.rb +3 -3
- data/spec/lib/internals/indexing/indexes_spec.rb +36 -0
- data/spec/lib/{interfaces → internals/interfaces}/live_parameters_spec.rb +0 -0
- data/spec/lib/internals/results/base_spec.rb +105 -0
- data/spec/lib/internals/results/full_spec.rb +78 -0
- data/spec/lib/internals/results/live_spec.rb +88 -0
- data/spec/lib/{solr → internals/solr}/schema_generator_spec.rb +0 -0
- data/spec/lib/{tokenizers → internals/tokenizers}/base_spec.rb +3 -3
- data/spec/lib/{tokenizers → internals/tokenizers}/index_spec.rb +9 -9
- data/spec/lib/{tokenizers → internals/tokenizers}/query_spec.rb +11 -11
- data/spec/lib/query/allocation_spec.rb +12 -12
- data/spec/lib/query/allocations_spec.rb +19 -19
- data/spec/lib/query/base_spec.rb +28 -4
- data/spec/lib/query/combination_spec.rb +8 -9
- data/spec/lib/query/combinations/base_spec.rb +116 -0
- data/spec/lib/query/{combinations_spec.rb → combinations/memory_spec.rb} +14 -14
- data/spec/lib/query/combinations/redis_spec.rb +132 -0
- data/spec/lib/query/full_spec.rb +2 -2
- data/spec/lib/query/indexes_spec.rb +81 -0
- data/spec/lib/query/live_spec.rb +3 -3
- data/spec/lib/query/qualifiers_spec.rb +6 -6
- data/spec/lib/query/token_spec.rb +38 -38
- data/spec/lib/query/tokens_spec.rb +35 -35
- data/spec/lib/sources/db_spec.rb +23 -18
- metadata +212 -181
- data/lib/picky/adapters/rack/base.rb +0 -23
- data/lib/picky/adapters/rack/live_parameters.rb +0 -33
- data/lib/picky/adapters/rack/query.rb +0 -59
- data/lib/picky/adapters/rack.rb +0 -28
- data/lib/picky/cacher/convenience.rb +0 -3
- data/lib/picky/cacher/generator.rb +0 -15
- data/lib/picky/cacher/partial/default.rb +0 -5
- data/lib/picky/cacher/partial/none.rb +0 -31
- data/lib/picky/cacher/partial/strategy.rb +0 -21
- data/lib/picky/cacher/partial/substring.rb +0 -118
- data/lib/picky/cacher/partial_generator.rb +0 -15
- data/lib/picky/cacher/similarity/default.rb +0 -7
- data/lib/picky/cacher/similarity/double_levenshtone.rb +0 -77
- data/lib/picky/cacher/similarity/none.rb +0 -31
- data/lib/picky/cacher/similarity/strategy.rb +0 -9
- data/lib/picky/cacher/similarity_generator.rb +0 -15
- data/lib/picky/cacher/strategy.rb +0 -12
- data/lib/picky/cacher/weights/default.rb +0 -7
- data/lib/picky/cacher/weights/logarithmic.rb +0 -39
- data/lib/picky/cacher/weights/strategy.rb +0 -9
- data/lib/picky/cacher/weights_generator.rb +0 -15
- data/lib/picky/frontend_adapters/rack.rb +0 -150
- data/lib/picky/index/bundle.rb +0 -54
- data/lib/picky/index/file/basic.rb +0 -97
- data/lib/picky/index/file/json.rb +0 -34
- data/lib/picky/index/file/marshal.rb +0 -34
- data/lib/picky/index/file/text.rb +0 -56
- data/lib/picky/index/files.rb +0 -118
- data/lib/picky/index_api.rb +0 -175
- data/lib/picky/indexed/bundle.rb +0 -54
- data/lib/picky/indexed/categories.rb +0 -131
- data/lib/picky/indexed/category.rb +0 -85
- data/lib/picky/indexed/index.rb +0 -39
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -61
- data/lib/picky/indexing/bundle.rb +0 -213
- data/lib/picky/indexing/categories.rb +0 -38
- data/lib/picky/indexing/category.rb +0 -117
- data/lib/picky/indexing/index.rb +0 -55
- data/lib/picky/query/allocation.rb +0 -82
- data/lib/picky/query/allocations.rb +0 -130
- data/lib/picky/query/combination.rb +0 -74
- data/lib/picky/query/combinations.rb +0 -105
- data/lib/picky/query/qualifiers.rb +0 -77
- data/lib/picky/query/token.rb +0 -202
- data/lib/picky/query/tokens.rb +0 -86
- data/lib/picky/query/weigher.rb +0 -165
- data/lib/picky/results/base.rb +0 -102
- data/lib/picky/results/full.rb +0 -13
- data/lib/picky/results/live.rb +0 -13
- data/lib/picky/tokenizers/base.rb +0 -161
- data/lib/picky/tokenizers/index.rb +0 -58
- data/lib/picky/tokenizers/query.rb +0 -74
- data/spec/lib/cacher/partial/default_spec.rb +0 -15
- data/spec/lib/cacher/partial/none_spec.rb +0 -17
- data/spec/lib/cacher/weights_generator_spec.rb +0 -21
- data/spec/lib/results/base_spec.rb +0 -257
- data/spec/lib/results/live_spec.rb +0 -15
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Generators
|
|
4
|
+
|
|
5
|
+
module Weights
|
|
6
|
+
|
|
7
|
+
# Uses a logarithmic weight.
|
|
8
|
+
# If for a key k we have x ids, the weight is:
|
|
9
|
+
# w(x): log(x)
|
|
10
|
+
# Special case: If x < 1, then we use 0.
|
|
11
|
+
#
|
|
12
|
+
class Logarithmic < Strategy
|
|
13
|
+
|
|
14
|
+
# Generates a partial index from the given index.
|
|
15
|
+
#
|
|
16
|
+
def generate_from index
|
|
17
|
+
index.inject({}) do |hash, text_ids|
|
|
18
|
+
text, ids = *text_ids
|
|
19
|
+
weight = weight_for ids.size
|
|
20
|
+
hash[text] ||= weight.round(2) if weight
|
|
21
|
+
hash
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
# Sets the weight value.
|
|
26
|
+
#
|
|
27
|
+
# If the size is 0 or one, we would get -Infinity or 0.0.
|
|
28
|
+
# Thus we do not set a value if there is just one. The default, dynamically, is 0.
|
|
29
|
+
#
|
|
30
|
+
# BUT: We need the value, even if 0. To designate that there is a weight!
|
|
31
|
+
#
|
|
32
|
+
def weight_for amount
|
|
33
|
+
return 0 if amount < 1
|
|
34
|
+
Math.log amount
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
end
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Generators
|
|
4
|
+
|
|
5
|
+
# Uses a logarithmic algorithm as default.
|
|
6
|
+
#
|
|
7
|
+
class WeightsGenerator < Base
|
|
8
|
+
|
|
9
|
+
# Generate a weights index based on the given index.
|
|
10
|
+
#
|
|
11
|
+
def generate strategy = Weights::Logarithmic.new
|
|
12
|
+
strategy.generate_from self.index
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
end
|
|
File without changes
|
|
@@ -0,0 +1,113 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
class Backend
|
|
6
|
+
|
|
7
|
+
attr_reader :bundle_name
|
|
8
|
+
attr_reader :prepared, :index, :weights, :similarity, :configuration
|
|
9
|
+
|
|
10
|
+
delegate :index_name, :category_name, :to => :@config
|
|
11
|
+
|
|
12
|
+
def initialize bundle_name, config
|
|
13
|
+
@bundle_name = bundle_name
|
|
14
|
+
@config = config
|
|
15
|
+
@prepared = File::Text.new config.prepared_index_path
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
# Delegators.
|
|
19
|
+
#
|
|
20
|
+
|
|
21
|
+
# Retrieving data.
|
|
22
|
+
#
|
|
23
|
+
def retrieve &block
|
|
24
|
+
prepared.retrieve &block
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Dumping.
|
|
28
|
+
#
|
|
29
|
+
def dump_index index_hash
|
|
30
|
+
index.dump index_hash
|
|
31
|
+
end
|
|
32
|
+
def dump_weights weights_hash
|
|
33
|
+
weights.dump weights_hash
|
|
34
|
+
end
|
|
35
|
+
def dump_similarity similarity_hash
|
|
36
|
+
similarity.dump similarity_hash
|
|
37
|
+
end
|
|
38
|
+
def dump_configuration configuration_hash
|
|
39
|
+
configuration.dump configuration_hash
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
# Loading.
|
|
43
|
+
#
|
|
44
|
+
def load_index
|
|
45
|
+
index.load
|
|
46
|
+
end
|
|
47
|
+
def load_similarity
|
|
48
|
+
similarity.load
|
|
49
|
+
end
|
|
50
|
+
def load_weights
|
|
51
|
+
weights.load
|
|
52
|
+
end
|
|
53
|
+
def load_configuration
|
|
54
|
+
configuration.load
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Cache ok?
|
|
58
|
+
#
|
|
59
|
+
def index_cache_ok?
|
|
60
|
+
index.cache_ok?
|
|
61
|
+
end
|
|
62
|
+
def similarity_cache_ok?
|
|
63
|
+
similarity.cache_ok?
|
|
64
|
+
end
|
|
65
|
+
def weights_cache_ok?
|
|
66
|
+
weights.cache_ok?
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Cache small?
|
|
70
|
+
#
|
|
71
|
+
def index_cache_small?
|
|
72
|
+
index.cache_small?
|
|
73
|
+
end
|
|
74
|
+
def similarity_cache_small?
|
|
75
|
+
similarity.cache_small?
|
|
76
|
+
end
|
|
77
|
+
def weights_cache_small?
|
|
78
|
+
weights.cache_small?
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
# Copies the indexes to the "backup" directory.
|
|
82
|
+
#
|
|
83
|
+
def backup
|
|
84
|
+
index.backup
|
|
85
|
+
weights.backup
|
|
86
|
+
similarity.backup
|
|
87
|
+
configuration.backup
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
# Restores the indexes from the "backup" directory.
|
|
91
|
+
#
|
|
92
|
+
def restore
|
|
93
|
+
index.restore
|
|
94
|
+
weights.restore
|
|
95
|
+
similarity.restore
|
|
96
|
+
configuration.restore
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
|
|
100
|
+
# Delete all index files.
|
|
101
|
+
#
|
|
102
|
+
def delete
|
|
103
|
+
index.delete
|
|
104
|
+
weights.delete
|
|
105
|
+
similarity.delete
|
|
106
|
+
configuration.delete
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
end
|
|
110
|
+
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
end
|
|
@@ -0,0 +1,101 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
# Handles all aspects of index files, such as dumping/loading.
|
|
6
|
+
#
|
|
7
|
+
module File
|
|
8
|
+
|
|
9
|
+
# Base class for all index files.
|
|
10
|
+
#
|
|
11
|
+
# Provides necessary helper methods for its
|
|
12
|
+
# subclasses.
|
|
13
|
+
# Not directly useable, as it does not provide
|
|
14
|
+
# dump/load methods.
|
|
15
|
+
#
|
|
16
|
+
class Basic
|
|
17
|
+
|
|
18
|
+
attr_reader :cache_path
|
|
19
|
+
|
|
20
|
+
# An index cache takes a path, without file extension,
|
|
21
|
+
# which will be provided by the subclasses.
|
|
22
|
+
#
|
|
23
|
+
def initialize cache_path
|
|
24
|
+
@cache_path = "#{cache_path}.#{extension}"
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# The default extension for index files is "index".
|
|
28
|
+
#
|
|
29
|
+
def extension
|
|
30
|
+
:index
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Will copy the index file to a location that
|
|
34
|
+
# is in a directory named "backup" right under
|
|
35
|
+
# the directory the index file is in.
|
|
36
|
+
#
|
|
37
|
+
def backup
|
|
38
|
+
prepare_backup backup_directory
|
|
39
|
+
FileUtils.cp cache_path, target, verbose: true
|
|
40
|
+
end
|
|
41
|
+
# The backup directory of this file.
|
|
42
|
+
# Equal to the file's dirname plus /backup
|
|
43
|
+
#
|
|
44
|
+
def backup_directory
|
|
45
|
+
::File.join ::File.dirname(cache_path), 'backup'
|
|
46
|
+
end
|
|
47
|
+
# Prepares the backup directory for the file.
|
|
48
|
+
#
|
|
49
|
+
def prepare_backup target
|
|
50
|
+
FileUtils.mkdir target unless Dir.exists?(target)
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
# Copies the file from its backup location back
|
|
54
|
+
# to the original location.
|
|
55
|
+
#
|
|
56
|
+
def restore
|
|
57
|
+
FileUtils.cp backup_file_path_of(cache_path), cache_path, verbose: true
|
|
58
|
+
end
|
|
59
|
+
# The backup filename.
|
|
60
|
+
#
|
|
61
|
+
def backup_file_path_of path
|
|
62
|
+
dir, name = ::File.split path
|
|
63
|
+
::File.join dir, 'backup', name
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
# Deletes the file.
|
|
67
|
+
#
|
|
68
|
+
def delete
|
|
69
|
+
`rm -Rf #{cache_path}`
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Checks.
|
|
73
|
+
#
|
|
74
|
+
|
|
75
|
+
# Is this cache file suspiciously small?
|
|
76
|
+
# (less than 8 Bytes of size)
|
|
77
|
+
#
|
|
78
|
+
def cache_small?
|
|
79
|
+
size_of(cache_path) < 8
|
|
80
|
+
end
|
|
81
|
+
# Is the cache ok? (existing and larger than
|
|
82
|
+
# zero Bytes in size)
|
|
83
|
+
#
|
|
84
|
+
# A small cache is still ok.
|
|
85
|
+
#
|
|
86
|
+
def cache_ok?
|
|
87
|
+
size_of(cache_path) > 0
|
|
88
|
+
end
|
|
89
|
+
# Extracts the size of the file in Bytes.
|
|
90
|
+
#
|
|
91
|
+
def size_of path
|
|
92
|
+
`ls -l #{path} | awk '{print $5}'`.to_i
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
module File
|
|
6
|
+
|
|
7
|
+
# Index files dumped in the JSON format.
|
|
8
|
+
#
|
|
9
|
+
class JSON < Basic
|
|
10
|
+
|
|
11
|
+
# Uses the extension "json".
|
|
12
|
+
#
|
|
13
|
+
def extension
|
|
14
|
+
:json
|
|
15
|
+
end
|
|
16
|
+
# Loads the index hash from json format.
|
|
17
|
+
#
|
|
18
|
+
def load
|
|
19
|
+
Yajl::Parser.parse ::File.open(cache_path, 'r'), symbolize_keys: true
|
|
20
|
+
end
|
|
21
|
+
# Dumps the index hash in json format.
|
|
22
|
+
#
|
|
23
|
+
def dump hash
|
|
24
|
+
hash.dump_json cache_path
|
|
25
|
+
end
|
|
26
|
+
# A json file does not provide retrieve functionality.
|
|
27
|
+
#
|
|
28
|
+
def retrieve
|
|
29
|
+
raise "Can't retrieve from JSON file. Use text file."
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
module File
|
|
6
|
+
|
|
7
|
+
# Index data in the Ruby Marshal format.
|
|
8
|
+
#
|
|
9
|
+
class Marshal < Basic
|
|
10
|
+
|
|
11
|
+
# Uses the extension "dump".
|
|
12
|
+
#
|
|
13
|
+
def extension
|
|
14
|
+
:dump
|
|
15
|
+
end
|
|
16
|
+
# Loads the index hash from marshal format.
|
|
17
|
+
#
|
|
18
|
+
def load
|
|
19
|
+
::Marshal.load ::File.open(cache_path, 'r:binary')
|
|
20
|
+
end
|
|
21
|
+
# Dumps the index hash in marshal format.
|
|
22
|
+
#
|
|
23
|
+
def dump hash
|
|
24
|
+
hash.dump_marshalled cache_path
|
|
25
|
+
end
|
|
26
|
+
# A marshal file does not provide retrieve functionality.
|
|
27
|
+
#
|
|
28
|
+
def retrieve
|
|
29
|
+
raise "Can't retrieve from marshalled file. Use text file."
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
@@ -0,0 +1,60 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
module File
|
|
6
|
+
|
|
7
|
+
# Index data dumped in the text format.
|
|
8
|
+
#
|
|
9
|
+
class Text < Basic
|
|
10
|
+
|
|
11
|
+
# Uses the extension "txt".
|
|
12
|
+
#
|
|
13
|
+
def extension
|
|
14
|
+
:txt
|
|
15
|
+
end
|
|
16
|
+
# Text files are used exclusively for
|
|
17
|
+
# prepared data files.
|
|
18
|
+
#
|
|
19
|
+
def load
|
|
20
|
+
raise "Can't load from text file. Use JSON or Marshal."
|
|
21
|
+
end
|
|
22
|
+
# Text files are used exclusively for
|
|
23
|
+
# prepared data files.
|
|
24
|
+
#
|
|
25
|
+
def dump hash
|
|
26
|
+
raise "Can't dump to text file. Use JSON or Marshal."
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
# Retrieves prepared index data in the form
|
|
30
|
+
# * id,data\n
|
|
31
|
+
# * id,data\n
|
|
32
|
+
# * id,data\n
|
|
33
|
+
#
|
|
34
|
+
# Yields an id string and a symbol token.
|
|
35
|
+
#
|
|
36
|
+
def retrieve
|
|
37
|
+
id = nil
|
|
38
|
+
token = nil
|
|
39
|
+
::File.open(cache_path, 'r:binary') do |file|
|
|
40
|
+
file.each_line do |line|
|
|
41
|
+
id, token = line.split ?,, 2
|
|
42
|
+
yield id, (token.chomp! || token).to_sym
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
#
|
|
48
|
+
#
|
|
49
|
+
def open_for_indexing &block
|
|
50
|
+
::File.open cache_path, 'w:binary', &block
|
|
51
|
+
end
|
|
52
|
+
|
|
53
|
+
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
end
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
class Files < Backend
|
|
6
|
+
|
|
7
|
+
def initialize bundle_name, config
|
|
8
|
+
super bundle_name, config
|
|
9
|
+
|
|
10
|
+
# Note: We marshal the similarity, as the
|
|
11
|
+
# Yajl json lib cannot load symbolized
|
|
12
|
+
# values, just keys.
|
|
13
|
+
#
|
|
14
|
+
@index = File::JSON.new config.index_path(bundle_name, :index)
|
|
15
|
+
@weights = File::JSON.new config.index_path(bundle_name, :weights)
|
|
16
|
+
@similarity = File::Marshal.new config.index_path(bundle_name, :similarity)
|
|
17
|
+
@configuration = File::JSON.new config.index_path(bundle_name, :configuration)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
end
|
|
@@ -0,0 +1,77 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
class Redis
|
|
6
|
+
|
|
7
|
+
# Redis Backend Accessor.
|
|
8
|
+
#
|
|
9
|
+
# Provides necessary helper methods for its
|
|
10
|
+
# subclasses.
|
|
11
|
+
# Not directly useable, as it does not provide
|
|
12
|
+
# dump/load methods.
|
|
13
|
+
#
|
|
14
|
+
class Basic
|
|
15
|
+
|
|
16
|
+
attr_reader :namespace
|
|
17
|
+
|
|
18
|
+
# An index cache takes a path, without file extension,
|
|
19
|
+
# which will be provided by the subclasses.
|
|
20
|
+
#
|
|
21
|
+
def initialize namespace
|
|
22
|
+
@namespace = namespace
|
|
23
|
+
|
|
24
|
+
@backend = ::Redis.new
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Does nothing.
|
|
28
|
+
#
|
|
29
|
+
def load
|
|
30
|
+
# Nothing.
|
|
31
|
+
end
|
|
32
|
+
# We do not use Redis to retrieve data.
|
|
33
|
+
#
|
|
34
|
+
def retrieve
|
|
35
|
+
# Nothing.
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
# Redis does not backup.
|
|
39
|
+
#
|
|
40
|
+
def backup
|
|
41
|
+
# Nothing.
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
# Deletes the Redis index namespace.
|
|
45
|
+
#
|
|
46
|
+
def delete
|
|
47
|
+
# TODO @backend.
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
# Checks.
|
|
51
|
+
#
|
|
52
|
+
|
|
53
|
+
# Is this cache suspiciously small?
|
|
54
|
+
#
|
|
55
|
+
def cache_small?
|
|
56
|
+
false # TODO
|
|
57
|
+
end
|
|
58
|
+
# Is the cache ok?
|
|
59
|
+
#
|
|
60
|
+
# A small cache is still ok.
|
|
61
|
+
#
|
|
62
|
+
def cache_ok?
|
|
63
|
+
false # TODO
|
|
64
|
+
end
|
|
65
|
+
# Extracts the size of the file in Bytes.
|
|
66
|
+
#
|
|
67
|
+
def size_of path
|
|
68
|
+
# TODO
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
end
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
class Redis
|
|
6
|
+
|
|
7
|
+
class ListHash < Basic
|
|
8
|
+
|
|
9
|
+
# Writes the hash into Redis.
|
|
10
|
+
#
|
|
11
|
+
# TODO Performance: rpush as you get the values instead of putting it together in an array first.
|
|
12
|
+
#
|
|
13
|
+
def dump hash
|
|
14
|
+
hash.each_pair do |key, values|
|
|
15
|
+
redis_key = "#{namespace}:#{key}"
|
|
16
|
+
i = 0
|
|
17
|
+
@backend.multi do
|
|
18
|
+
@backend.del redis_key
|
|
19
|
+
|
|
20
|
+
values.each do |value|
|
|
21
|
+
i += 1
|
|
22
|
+
@backend.zadd redis_key, i, value
|
|
23
|
+
end
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
# Get a collection.
|
|
29
|
+
#
|
|
30
|
+
def collection sym
|
|
31
|
+
@backend.lrange "#{namespace}:#{sym}", 0, -1
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Get a single value.
|
|
35
|
+
#
|
|
36
|
+
def member sym
|
|
37
|
+
raise "Can't retrieve a single value from a Redis ListHash. Use Index::Redis::StringHash."
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
end
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
class Redis
|
|
6
|
+
|
|
7
|
+
class StringHash < Basic
|
|
8
|
+
|
|
9
|
+
# Writes the hash into Redis.
|
|
10
|
+
#
|
|
11
|
+
def dump hash
|
|
12
|
+
hash.each_pair do |key, value|
|
|
13
|
+
@backend.hset namespace, key, value
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
# Get a collection.
|
|
18
|
+
#
|
|
19
|
+
def collection sym
|
|
20
|
+
raise "Can't retrieve a collection from a StringHash. Use Index::Redis::ListHash."
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
# Get a single value.
|
|
24
|
+
#
|
|
25
|
+
def member sym
|
|
26
|
+
@backend.hget namespace, sym
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
end
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
module Internals
|
|
2
|
+
|
|
3
|
+
module Index
|
|
4
|
+
|
|
5
|
+
# TODO Needs a reconnect to be run after forking.
|
|
6
|
+
#
|
|
7
|
+
class Redis < Backend
|
|
8
|
+
|
|
9
|
+
def initialize bundle_name, config
|
|
10
|
+
super bundle_name, config
|
|
11
|
+
|
|
12
|
+
# TODO
|
|
13
|
+
#
|
|
14
|
+
@index = Redis::ListHash.new "#{config.identifier}:#{bundle_name}:index"
|
|
15
|
+
@weights = Redis::StringHash.new "#{config.identifier}:#{bundle_name}:weights"
|
|
16
|
+
@similarity = Redis::ListHash.new "#{config.identifier}:#{bundle_name}:similarity"
|
|
17
|
+
@configuration = Redis::StringHash.new "#{config.identifier}:#{bundle_name}:configuration"
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Delegate to the right collection.
|
|
21
|
+
#
|
|
22
|
+
def ids sym
|
|
23
|
+
@index.collection sym
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
# Delegate to the right member value.
|
|
27
|
+
#
|
|
28
|
+
# Note: Converts to float.
|
|
29
|
+
#
|
|
30
|
+
def weight sym
|
|
31
|
+
@weights.member(sym).to_f
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
# Delegate to a member value.
|
|
35
|
+
#
|
|
36
|
+
def setting sym
|
|
37
|
+
@configuration.member sym
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
end
|