picky 3.2.0 → 3.3.0
Sign up to get free protection for your applications and to get access to all the features.
- data/aux/picky/cli.rb +3 -1
- data/lib/picky/backends/backend.rb +16 -0
- data/lib/picky/backends/file/basic.rb +18 -9
- data/lib/picky/backends/file/json.rb +1 -0
- data/lib/picky/backends/file.rb +8 -4
- data/lib/picky/backends/helpers/file.rb +6 -0
- data/lib/picky/backends/memory/basic.rb +10 -2
- data/lib/picky/backends/memory/json.rb +1 -6
- data/lib/picky/backends/memory/marshal.rb +1 -6
- data/lib/picky/backends/memory/text.rb +1 -0
- data/lib/picky/backends/memory.rb +8 -4
- data/lib/picky/backends/redis/basic.rb +12 -9
- data/lib/picky/backends/redis.rb +10 -4
- data/lib/picky/bundle.rb +14 -0
- data/lib/picky/bundle_indexed.rb +110 -0
- data/lib/picky/bundle_indexing.rb +177 -0
- data/lib/picky/bundle_realtime.rb +80 -0
- data/lib/picky/categories.rb +5 -1
- data/lib/picky/category.rb +12 -20
- data/lib/picky/category_indexed.rb +3 -6
- data/lib/picky/category_indexing.rb +19 -18
- data/lib/picky/category_realtime.rb +5 -10
- data/lib/picky/extensions/symbol.rb +1 -1
- data/lib/picky/generators/partial/default.rb +1 -1
- data/lib/picky/generators/partial/postfix.rb +30 -0
- data/lib/picky/generators/partial/substring.rb +8 -2
- data/lib/picky/index.rb +3 -3
- data/lib/picky/index_indexing.rb +3 -2
- data/lib/picky/indexers/base.rb +0 -8
- data/lib/picky/indexers/parallel.rb +1 -1
- data/lib/picky/loader.rb +15 -15
- data/lib/picky/query/qualifier_category_mapper.rb +1 -1
- data/lib/picky/rack/harakiri.rb +3 -1
- data/lib/picky/sources/db.rb +11 -0
- data/lib/picky/statistics.rb +2 -2
- data/lib/picky/tokenizer.rb +1 -1
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/picky/wrappers/bundle/calculation.rb +45 -0
- data/lib/picky/wrappers/bundle/delegators.rb +69 -0
- data/lib/picky/wrappers/bundle/exact_partial.rb +38 -0
- data/lib/picky/{indexed/wrappers → wrappers}/bundle/location.rb +6 -4
- data/lib/picky/wrappers/bundle/wrapper.rb +29 -0
- data/lib/picky/wrappers/category/exact_first.rb +55 -0
- data/lib/picky/wrappers/category/location.rb +33 -0
- data/lib/picky/{sources/wrappers → wrappers/sources}/base.rb +7 -3
- data/lib/picky/{sources/wrappers → wrappers/sources}/location.rb +3 -3
- data/lib/picky.rb +10 -11
- data/spec/aux/picky/cli_spec.rb +5 -5
- data/spec/lib/backends/backend_spec.rb +39 -0
- data/spec/lib/backends/file/basic_spec.rb +59 -0
- data/spec/lib/backends/file_spec.rb +105 -0
- data/spec/lib/backends/memory/basic_spec.rb +43 -15
- data/spec/lib/backends/memory_spec.rb +108 -54
- data/spec/lib/backends/redis/basic_spec.rb +81 -57
- data/spec/lib/backends/redis_spec.rb +120 -66
- data/spec/lib/category_indexed_spec.rb +12 -12
- data/spec/lib/category_indexing_spec.rb +23 -23
- data/spec/lib/category_spec.rb +14 -14
- data/spec/lib/cores_spec.rb +2 -2
- data/spec/lib/extensions/object_spec.rb +7 -7
- data/spec/lib/generators/partial/postfix_spec.rb +131 -0
- data/spec/lib/generators/partial/substring_spec.rb +29 -4
- data/spec/lib/generators/weights_generator_spec.rb +3 -3
- data/spec/lib/index_indexing_spec.rb +11 -15
- data/spec/lib/index_spec.rb +8 -8
- data/spec/lib/indexed/bundle_realtime_spec.rb +18 -18
- data/spec/lib/indexed/bundle_spec.rb +21 -21
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +9 -9
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +8 -8
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +16 -16
- data/spec/lib/indexers/base_spec.rb +6 -25
- data/spec/lib/indexes_spec.rb +33 -22
- data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +2 -2
- data/spec/lib/indexing/bundle_spec.rb +27 -28
- data/spec/lib/sources/wrappers/base_spec.rb +7 -7
- data/spec/lib/sources/wrappers/location_spec.rb +8 -8
- metadata +48 -38
- data/lib/picky/indexed/bundle.rb +0 -125
- data/lib/picky/indexed/bundle_realtime.rb +0 -76
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +0 -47
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +0 -47
- data/lib/picky/indexed/wrappers/category/location.rb +0 -31
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/indexing/bundle.rb +0 -183
- data/lib/picky/indexing/wrappers/category/location.rb +0 -29
data/aux/picky/cli.rb
CHANGED
@@ -100,8 +100,10 @@ module Picky
|
|
100
100
|
|
101
101
|
# Maps commands to the other gem's command.
|
102
102
|
#
|
103
|
+
# TODO Try to load the other gems and get the commands dynamically.
|
104
|
+
#
|
103
105
|
@@mapping = {
|
104
|
-
:generate => [Generate, :'{sinatra_client,unicorn_server,
|
106
|
+
:generate => [Generate, :'{client,server,sinatra_client,unicorn_server,all_in_one}', :'app_directory_name'],
|
105
107
|
:help => [Help],
|
106
108
|
:live => [Live, 'host:port/path (default: localhost:8080/admin)', 'port (default: 4568)'],
|
107
109
|
:search => [Search, :url_or_path, 'amount of ids (default 20)'],
|
@@ -4,6 +4,22 @@ module Picky
|
|
4
4
|
|
5
5
|
class Backend
|
6
6
|
|
7
|
+
attr_reader :inverted,
|
8
|
+
:weights,
|
9
|
+
:similarity,
|
10
|
+
:configuration
|
11
|
+
|
12
|
+
def initialize options = {}
|
13
|
+
@inverted = options[:inverted]
|
14
|
+
@weights = options[:weights]
|
15
|
+
@similarity = options[:similarity]
|
16
|
+
@configuration = options[:configuration]
|
17
|
+
end
|
18
|
+
|
19
|
+
def extract_lambda_or thing, *args
|
20
|
+
thing && (thing.respond_to?(:call) && thing.call(*args) || thing)
|
21
|
+
end
|
22
|
+
|
7
23
|
#
|
8
24
|
#
|
9
25
|
def to_s
|
@@ -21,13 +21,16 @@ module Picky
|
|
21
21
|
# An index cache takes a path, without file extension,
|
22
22
|
# which will be provided by the subclasses.
|
23
23
|
#
|
24
|
-
def initialize cache_path
|
24
|
+
def initialize cache_path, options = {}
|
25
25
|
@cache_path = "#{cache_path}.file.#{extension}"
|
26
26
|
|
27
27
|
# This is the mapping file with the in-memory hash for the
|
28
28
|
# file position/offset mappings.
|
29
29
|
#
|
30
30
|
@mapping_file = Memory::JSON.new "#{cache_path}.file_mapping.#{extension}"
|
31
|
+
|
32
|
+
@empty = options[:empty]
|
33
|
+
@initial = options[:initial]
|
31
34
|
end
|
32
35
|
|
33
36
|
# The default extension for index files is "index".
|
@@ -36,20 +39,20 @@ module Picky
|
|
36
39
|
:index
|
37
40
|
end
|
38
41
|
|
42
|
+
# The empty index that is used for putting the index
|
43
|
+
# together before it is saved into the files.
|
44
|
+
#
|
45
|
+
def empty
|
46
|
+
@empty && @empty.clone || {}
|
47
|
+
end
|
48
|
+
|
39
49
|
# The initial content before loading.
|
40
50
|
#
|
41
51
|
# Note: We could also load the mapping file
|
42
52
|
# as in #load.
|
43
53
|
#
|
44
54
|
def initial
|
45
|
-
nil
|
46
|
-
end
|
47
|
-
|
48
|
-
# The empty index that is used for putting the index
|
49
|
-
# together before it is saved into the files.
|
50
|
-
#
|
51
|
-
def empty
|
52
|
-
{}
|
55
|
+
@initial && @initial.clone || nil
|
53
56
|
end
|
54
57
|
|
55
58
|
# Will copy the index file to a location that
|
@@ -96,6 +99,12 @@ module Picky
|
|
96
99
|
size_of(cache_path) > 0
|
97
100
|
end
|
98
101
|
|
102
|
+
#
|
103
|
+
#
|
104
|
+
def to_s
|
105
|
+
"#{self.class}(#{cache_path},#{mapping_file.cache_path})"
|
106
|
+
end
|
107
|
+
|
99
108
|
end
|
100
109
|
|
101
110
|
end
|
data/lib/picky/backends/file.rb
CHANGED
@@ -14,25 +14,29 @@ module Picky
|
|
14
14
|
# [:token] # => [id, id, id, id, id] (an array of ids)
|
15
15
|
#
|
16
16
|
def create_inverted bundle
|
17
|
-
|
17
|
+
extract_lambda_or(inverted, bundle) ||
|
18
|
+
JSON.new(bundle.index_path(:inverted))
|
18
19
|
end
|
19
20
|
# Returns an object that on #initial, #load returns an object that responds to:
|
20
21
|
# [:token] # => 1.23 (a weight)
|
21
22
|
#
|
22
23
|
def create_weights bundle
|
23
|
-
|
24
|
+
extract_lambda_or(weights, bundle) ||
|
25
|
+
JSON.new(bundle.index_path(:weights))
|
24
26
|
end
|
25
27
|
# Returns an object that on #initial, #load returns an object that responds to:
|
26
28
|
# [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
|
27
29
|
#
|
28
30
|
def create_similarity bundle
|
29
|
-
|
31
|
+
extract_lambda_or(similarity, bundle) ||
|
32
|
+
JSON.new(bundle.index_path(:similarity))
|
30
33
|
end
|
31
34
|
# Returns an object that on #initial, #load returns an object that responds to:
|
32
35
|
# [:key] # => value (a value for this config key)
|
33
36
|
#
|
34
37
|
def create_configuration bundle
|
35
|
-
|
38
|
+
extract_lambda_or(configuration, bundle) ||
|
39
|
+
JSON.new(bundle.index_path(:configuration))
|
36
40
|
end
|
37
41
|
|
38
42
|
# Currently, the loaded ids are intersected using
|
@@ -22,8 +22,10 @@ module Picky
|
|
22
22
|
# An index cache takes a path, without file extension,
|
23
23
|
# which will be provided by the subclasses.
|
24
24
|
#
|
25
|
-
def initialize cache_path
|
25
|
+
def initialize cache_path, options = {}
|
26
26
|
@cache_path = "#{cache_path}.memory.#{extension}"
|
27
|
+
@empty = options[:empty]
|
28
|
+
@initial = options[:initial]
|
27
29
|
end
|
28
30
|
|
29
31
|
# The default extension for index files is "index".
|
@@ -36,7 +38,13 @@ module Picky
|
|
36
38
|
# together before it is dumped into the files.
|
37
39
|
#
|
38
40
|
def empty
|
39
|
-
{}
|
41
|
+
@empty && @empty.clone || {}
|
42
|
+
end
|
43
|
+
|
44
|
+
# The initial content before loading from file.
|
45
|
+
#
|
46
|
+
def initial
|
47
|
+
@initial && @initial.clone || {}
|
40
48
|
end
|
41
49
|
|
42
50
|
# Will copy the index file to a location that
|
@@ -14,12 +14,6 @@ module Picky
|
|
14
14
|
:json
|
15
15
|
end
|
16
16
|
|
17
|
-
# The initial content before loading.
|
18
|
-
#
|
19
|
-
def initial
|
20
|
-
{}
|
21
|
-
end
|
22
|
-
|
23
17
|
# Loads the index hash from json format.
|
24
18
|
#
|
25
19
|
def load
|
@@ -29,6 +23,7 @@ module Picky
|
|
29
23
|
# Dumps the index hash in json format.
|
30
24
|
#
|
31
25
|
def dump hash
|
26
|
+
create_directory cache_path
|
32
27
|
hash.dump_json cache_path
|
33
28
|
end
|
34
29
|
|
@@ -14,12 +14,6 @@ module Picky
|
|
14
14
|
:dump
|
15
15
|
end
|
16
16
|
|
17
|
-
# The initial content before loading.
|
18
|
-
#
|
19
|
-
def initial
|
20
|
-
{}
|
21
|
-
end
|
22
|
-
|
23
17
|
# Loads the index hash from marshal format.
|
24
18
|
#
|
25
19
|
def load
|
@@ -29,6 +23,7 @@ module Picky
|
|
29
23
|
# Dumps the index hash in marshal format.
|
30
24
|
#
|
31
25
|
def dump hash
|
26
|
+
create_directory cache_path
|
32
27
|
hash.dump_marshal cache_path
|
33
28
|
end
|
34
29
|
|
@@ -8,25 +8,29 @@ module Picky
|
|
8
8
|
# [:token] # => [id, id, id, id, id] (an array of ids)
|
9
9
|
#
|
10
10
|
def create_inverted bundle
|
11
|
-
|
11
|
+
extract_lambda_or(inverted, bundle) ||
|
12
|
+
JSON.new(bundle.index_path(:inverted))
|
12
13
|
end
|
13
14
|
# Returns an object that on #initial, #load returns an object that responds to:
|
14
15
|
# [:token] # => 1.23 (a weight)
|
15
16
|
#
|
16
17
|
def create_weights bundle
|
17
|
-
|
18
|
+
extract_lambda_or(weights, bundle) ||
|
19
|
+
JSON.new(bundle.index_path(:weights))
|
18
20
|
end
|
19
21
|
# Returns an object that on #initial, #load returns an object that responds to:
|
20
22
|
# [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
|
21
23
|
#
|
22
24
|
def create_similarity bundle
|
23
|
-
|
25
|
+
extract_lambda_or(similarity, bundle) ||
|
26
|
+
Marshal.new(bundle.index_path(:similarity))
|
24
27
|
end
|
25
28
|
# Returns an object that on #initial, #load returns an object that responds to:
|
26
29
|
# [:key] # => value (a value for this config key)
|
27
30
|
#
|
28
31
|
def create_configuration bundle
|
29
|
-
|
32
|
+
extract_lambda_or(configuration, bundle) ||
|
33
|
+
JSON.new(bundle.index_path(:configuration))
|
30
34
|
end
|
31
35
|
|
32
36
|
# Returns the result ids for the allocation.
|
@@ -18,9 +18,19 @@ module Picky
|
|
18
18
|
# An index cache takes a path, without file extension,
|
19
19
|
# which will be provided by the subclasses.
|
20
20
|
#
|
21
|
-
def initialize client, namespace
|
21
|
+
def initialize client, namespace, options = {}
|
22
22
|
@client = client
|
23
23
|
@namespace = namespace
|
24
|
+
|
25
|
+
@empty = options[:empty]
|
26
|
+
@initial = options[:initial]
|
27
|
+
end
|
28
|
+
|
29
|
+
# The empty index that is used for putting the index
|
30
|
+
# together.
|
31
|
+
#
|
32
|
+
def empty
|
33
|
+
@empty && @empty.clone || {}
|
24
34
|
end
|
25
35
|
|
26
36
|
# The initial content before loading.
|
@@ -29,7 +39,7 @@ module Picky
|
|
29
39
|
# this just returns the same thing as #load.
|
30
40
|
#
|
31
41
|
def initial
|
32
|
-
self
|
42
|
+
@initial && @initial.clone || self
|
33
43
|
end
|
34
44
|
|
35
45
|
# Returns itself.
|
@@ -38,13 +48,6 @@ module Picky
|
|
38
48
|
self
|
39
49
|
end
|
40
50
|
|
41
|
-
# The empty index that is used for putting the index
|
42
|
-
# together.
|
43
|
-
#
|
44
|
-
def empty
|
45
|
-
{}
|
46
|
-
end
|
47
|
-
|
48
51
|
# We do not use Redis to retrieve data.
|
49
52
|
#
|
50
53
|
def retrieve
|
data/lib/picky/backends/redis.rb
CHANGED
@@ -9,6 +9,8 @@ module Picky
|
|
9
9
|
attr_reader :client
|
10
10
|
|
11
11
|
def initialize options = {}
|
12
|
+
super options
|
13
|
+
|
12
14
|
require 'redis'
|
13
15
|
@client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
|
14
16
|
rescue LoadError => e
|
@@ -19,25 +21,29 @@ module Picky
|
|
19
21
|
# [:token] # => [id, id, id, id, id] (an array of ids)
|
20
22
|
#
|
21
23
|
def create_inverted bundle
|
22
|
-
|
24
|
+
extract_lambda_or(inverted, client, bundle) ||
|
25
|
+
List.new(client, "#{bundle.identifier}:inverted")
|
23
26
|
end
|
24
27
|
# Returns an object that on #initial, #load returns an object that responds to:
|
25
28
|
# [:token] # => 1.23 (a weight)
|
26
29
|
#
|
27
30
|
def create_weights bundle
|
28
|
-
|
31
|
+
extract_lambda_or(weights, client, bundle) ||
|
32
|
+
Float.new(client, "#{bundle.identifier}:weights")
|
29
33
|
end
|
30
34
|
# Returns an object that on #initial, #load returns an object that responds to:
|
31
35
|
# [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
|
32
36
|
#
|
33
37
|
def create_similarity bundle
|
34
|
-
|
38
|
+
extract_lambda_or(similarity, client, bundle) ||
|
39
|
+
List.new(client, "#{bundle.identifier}:similarity")
|
35
40
|
end
|
36
41
|
# Returns an object that on #initial, #load returns an object that responds to:
|
37
42
|
# [:key] # => value (a value for this config key)
|
38
43
|
#
|
39
44
|
def create_configuration bundle
|
40
|
-
|
45
|
+
extract_lambda_or(configuration, client, bundle) ||
|
46
|
+
String.new(client, "#{bundle.identifier}:configuration")
|
41
47
|
end
|
42
48
|
|
43
49
|
# Returns the result ids for the allocation.
|
data/lib/picky/bundle.rb
CHANGED
@@ -47,6 +47,11 @@ module Picky
|
|
47
47
|
@name = name
|
48
48
|
@category = category
|
49
49
|
|
50
|
+
# TODO Tidy up a bit.
|
51
|
+
#
|
52
|
+
@key_format = options[:key_format]
|
53
|
+
@prepared = Backends::Memory::Text.new category.prepared_index_path
|
54
|
+
|
50
55
|
@weights_strategy = weights_strategy
|
51
56
|
@partial_strategy = partial_strategy
|
52
57
|
@similarity_strategy = similarity_strategy
|
@@ -59,6 +64,15 @@ module Picky
|
|
59
64
|
@backend_weights = backend.create_weights self
|
60
65
|
@backend_similarity = backend.create_similarity self
|
61
66
|
@backend_configuration = backend.create_configuration self
|
67
|
+
|
68
|
+
# Initial indexes.
|
69
|
+
#
|
70
|
+
@inverted = @backend_inverted.initial
|
71
|
+
@weights = @backend_weights.initial
|
72
|
+
@similarity = @backend_similarity.initial
|
73
|
+
@configuration = @backend_configuration.initial
|
74
|
+
|
75
|
+
@realtime_mapping = {} # id -> ary of syms. TODO Always instantiate?
|
62
76
|
end
|
63
77
|
def identifier
|
64
78
|
"#{category.identifier}:#{name}"
|
@@ -0,0 +1,110 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
# An indexed bundle is a number of memory/redis
|
4
|
+
# indexes that compose the indexes for a single category:
|
5
|
+
# * core (inverted) index
|
6
|
+
# * weights index
|
7
|
+
# * similarity index
|
8
|
+
# * index configuration
|
9
|
+
#
|
10
|
+
# Indexed refers to them being indexed.
|
11
|
+
# This class notably offers the methods:
|
12
|
+
# * load
|
13
|
+
# * clear
|
14
|
+
#
|
15
|
+
# To (re)load or clear the current indexes.
|
16
|
+
#
|
17
|
+
class Bundle
|
18
|
+
|
19
|
+
attr_reader :realtime_mapping
|
20
|
+
|
21
|
+
# Get the ids for the given symbol.
|
22
|
+
#
|
23
|
+
# Returns a (potentially empty) array of ids.
|
24
|
+
#
|
25
|
+
def ids sym
|
26
|
+
@inverted[sym] || []
|
27
|
+
end
|
28
|
+
|
29
|
+
# Get a weight for the given symbol.
|
30
|
+
#
|
31
|
+
# Returns a number, or nil.
|
32
|
+
#
|
33
|
+
def weight sym
|
34
|
+
@weights[sym]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get settings for this bundle.
|
38
|
+
#
|
39
|
+
# Returns an object.
|
40
|
+
#
|
41
|
+
def [] sym
|
42
|
+
@configuration[sym]
|
43
|
+
end
|
44
|
+
|
45
|
+
# Loads all indexes.
|
46
|
+
#
|
47
|
+
# Loading loads index objects from the backend.
|
48
|
+
# They should each respond to [] and return something appropriate.
|
49
|
+
#
|
50
|
+
def load
|
51
|
+
load_inverted
|
52
|
+
load_weights
|
53
|
+
load_similarity
|
54
|
+
load_configuration
|
55
|
+
end
|
56
|
+
|
57
|
+
# Loads the core index.
|
58
|
+
#
|
59
|
+
def load_inverted
|
60
|
+
self.inverted = @backend_inverted.load
|
61
|
+
end
|
62
|
+
# Loads the weights index.
|
63
|
+
#
|
64
|
+
def load_weights
|
65
|
+
self.weights = @backend_weights.load
|
66
|
+
end
|
67
|
+
# Loads the similarity index.
|
68
|
+
#
|
69
|
+
def load_similarity
|
70
|
+
self.similarity = @backend_similarity.load
|
71
|
+
end
|
72
|
+
# Loads the configuration.
|
73
|
+
#
|
74
|
+
def load_configuration
|
75
|
+
self.configuration = @backend_configuration.load
|
76
|
+
end
|
77
|
+
|
78
|
+
# Clears all indexes.
|
79
|
+
#
|
80
|
+
def clear
|
81
|
+
clear_inverted
|
82
|
+
clear_weights
|
83
|
+
clear_similarity
|
84
|
+
clear_configuration
|
85
|
+
end
|
86
|
+
|
87
|
+
# Clears the core index.
|
88
|
+
#
|
89
|
+
def clear_inverted
|
90
|
+
inverted.clear
|
91
|
+
end
|
92
|
+
# Clears the weights index.
|
93
|
+
#
|
94
|
+
def clear_weights
|
95
|
+
weights.clear
|
96
|
+
end
|
97
|
+
# Clears the similarity index.
|
98
|
+
#
|
99
|
+
def clear_similarity
|
100
|
+
similarity.clear
|
101
|
+
end
|
102
|
+
# Clears the configuration.
|
103
|
+
#
|
104
|
+
def clear_configuration
|
105
|
+
configuration.clear
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
@@ -0,0 +1,177 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
# A Bundle is a number of indexes
|
4
|
+
# per [index, category] combination.
|
5
|
+
#
|
6
|
+
# At most, there are three indexes:
|
7
|
+
# * *core* index (always used)
|
8
|
+
# * *weights* index (always used)
|
9
|
+
# * *similarity* index (used with similarity)
|
10
|
+
#
|
11
|
+
# In Picky, indexing is separated from the index
|
12
|
+
# handling itself through a parallel structure.
|
13
|
+
#
|
14
|
+
# Both use methods provided by this base class, but
|
15
|
+
# have very different goals:
|
16
|
+
#
|
17
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
18
|
+
# and providing helper functions to e.g. check the indexes.
|
19
|
+
#
|
20
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
21
|
+
# memory and looking up search data as fast as possible.
|
22
|
+
#
|
23
|
+
# This is the indexing bundle.
|
24
|
+
#
|
25
|
+
# It does all menial tasks that have nothing to do
|
26
|
+
# with the actual index running etc.
|
27
|
+
# (Find these in Indexed::Bundle)
|
28
|
+
#
|
29
|
+
class Bundle
|
30
|
+
|
31
|
+
attr_reader :backend,
|
32
|
+
:prepared
|
33
|
+
|
34
|
+
# When indexing, clear only clears the inverted index.
|
35
|
+
#
|
36
|
+
delegate :clear, :to => :inverted
|
37
|
+
|
38
|
+
# Sets up a piece of the index for the given token.
|
39
|
+
#
|
40
|
+
def initialize_inverted_index_for token
|
41
|
+
self.inverted[token] ||= []
|
42
|
+
end
|
43
|
+
|
44
|
+
# Generation
|
45
|
+
#
|
46
|
+
|
47
|
+
# This method
|
48
|
+
# * Loads the base index from the "prepared..." file.
|
49
|
+
# * Generates derived indexes.
|
50
|
+
# * Dumps all the indexes into files.
|
51
|
+
#
|
52
|
+
def generate_caches_from_source
|
53
|
+
load_from_prepared_index_file
|
54
|
+
generate_caches_from_memory
|
55
|
+
end
|
56
|
+
# Generates derived indexes from the index and dumps.
|
57
|
+
#
|
58
|
+
# Note: assumes that there is something in the index
|
59
|
+
#
|
60
|
+
def generate_caches_from_memory
|
61
|
+
cache_from_memory_generation_message
|
62
|
+
generate_derived
|
63
|
+
end
|
64
|
+
def cache_from_memory_generation_message
|
65
|
+
timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
|
66
|
+
end
|
67
|
+
|
68
|
+
# Generates the weights and similarity from the main index.
|
69
|
+
#
|
70
|
+
def generate_derived
|
71
|
+
generate_weights
|
72
|
+
generate_similarity
|
73
|
+
end
|
74
|
+
|
75
|
+
# "Empties" the index(es) by getting a new empty
|
76
|
+
# internal backend instance.
|
77
|
+
#
|
78
|
+
def empty
|
79
|
+
empty_inverted
|
80
|
+
empty_configuration
|
81
|
+
end
|
82
|
+
def empty_inverted
|
83
|
+
@inverted = @backend_inverted.empty
|
84
|
+
end
|
85
|
+
def empty_configuration
|
86
|
+
@configuration = @backend_configuration.empty
|
87
|
+
end
|
88
|
+
|
89
|
+
# Load the data from the db.
|
90
|
+
#
|
91
|
+
def load_from_prepared_index_file
|
92
|
+
load_from_prepared_index_generation_message
|
93
|
+
retrieve
|
94
|
+
end
|
95
|
+
def load_from_prepared_index_generation_message
|
96
|
+
timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
|
97
|
+
end
|
98
|
+
# Retrieves the prepared index data into the index.
|
99
|
+
#
|
100
|
+
# This is in preparation for generating
|
101
|
+
# derived indexes (like weights, similarity)
|
102
|
+
# and later dumping the optimized index.
|
103
|
+
#
|
104
|
+
# TODO Move this out to the category?
|
105
|
+
#
|
106
|
+
def retrieve
|
107
|
+
format = key_format || :to_i
|
108
|
+
empty_inverted
|
109
|
+
prepared.retrieve do |id, token|
|
110
|
+
initialize_inverted_index_for token
|
111
|
+
self.inverted[token] << id.send(format)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Generates a new index (writes its index) using the
|
116
|
+
# partial caching strategy of this bundle.
|
117
|
+
#
|
118
|
+
def generate_partial
|
119
|
+
generator = Generators::PartialGenerator.new self.inverted
|
120
|
+
self.inverted = generator.generate self.partial_strategy
|
121
|
+
end
|
122
|
+
# Generate a partial index from the given exact inverted index.
|
123
|
+
#
|
124
|
+
def generate_partial_from exact_inverted_index
|
125
|
+
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
126
|
+
self.inverted = exact_inverted_index
|
127
|
+
self.generate_partial
|
128
|
+
self
|
129
|
+
end
|
130
|
+
# Generates a new weights index (writes its index) using the
|
131
|
+
# given weight caching strategy.
|
132
|
+
#
|
133
|
+
def generate_weights
|
134
|
+
generator = Generators::WeightsGenerator.new self.inverted
|
135
|
+
self.weights = generator.generate self.weights_strategy
|
136
|
+
end
|
137
|
+
# Generates a new similarity index (writes its index) using the
|
138
|
+
# given similarity caching strategy.
|
139
|
+
#
|
140
|
+
def generate_similarity
|
141
|
+
generator = Generators::SimilarityGenerator.new self.inverted
|
142
|
+
self.similarity = generator.generate self.similarity_strategy
|
143
|
+
end
|
144
|
+
|
145
|
+
# Saves the indexes in a dump file.
|
146
|
+
#
|
147
|
+
def dump
|
148
|
+
timed_exclaim %Q{"#{identifier}": Dumping data.}
|
149
|
+
dump_inverted
|
150
|
+
dump_similarity
|
151
|
+
dump_weights
|
152
|
+
dump_configuration
|
153
|
+
end
|
154
|
+
# Dumps the core index.
|
155
|
+
#
|
156
|
+
def dump_inverted
|
157
|
+
@backend_inverted.dump self.inverted
|
158
|
+
end
|
159
|
+
# Dumps the weights index.
|
160
|
+
#
|
161
|
+
def dump_weights
|
162
|
+
@backend_weights.dump self.weights
|
163
|
+
end
|
164
|
+
# Dumps the similarity index.
|
165
|
+
#
|
166
|
+
def dump_similarity
|
167
|
+
@backend_similarity.dump self.similarity
|
168
|
+
end
|
169
|
+
# Dumps the similarity index.
|
170
|
+
#
|
171
|
+
def dump_configuration
|
172
|
+
@backend_configuration.dump self.configuration
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|