picky 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/aux/picky/cli.rb +3 -1
- data/lib/picky/backends/backend.rb +16 -0
- data/lib/picky/backends/file/basic.rb +18 -9
- data/lib/picky/backends/file/json.rb +1 -0
- data/lib/picky/backends/file.rb +8 -4
- data/lib/picky/backends/helpers/file.rb +6 -0
- data/lib/picky/backends/memory/basic.rb +10 -2
- data/lib/picky/backends/memory/json.rb +1 -6
- data/lib/picky/backends/memory/marshal.rb +1 -6
- data/lib/picky/backends/memory/text.rb +1 -0
- data/lib/picky/backends/memory.rb +8 -4
- data/lib/picky/backends/redis/basic.rb +12 -9
- data/lib/picky/backends/redis.rb +10 -4
- data/lib/picky/bundle.rb +14 -0
- data/lib/picky/bundle_indexed.rb +110 -0
- data/lib/picky/bundle_indexing.rb +177 -0
- data/lib/picky/bundle_realtime.rb +80 -0
- data/lib/picky/categories.rb +5 -1
- data/lib/picky/category.rb +12 -20
- data/lib/picky/category_indexed.rb +3 -6
- data/lib/picky/category_indexing.rb +19 -18
- data/lib/picky/category_realtime.rb +5 -10
- data/lib/picky/extensions/symbol.rb +1 -1
- data/lib/picky/generators/partial/default.rb +1 -1
- data/lib/picky/generators/partial/postfix.rb +30 -0
- data/lib/picky/generators/partial/substring.rb +8 -2
- data/lib/picky/index.rb +3 -3
- data/lib/picky/index_indexing.rb +3 -2
- data/lib/picky/indexers/base.rb +0 -8
- data/lib/picky/indexers/parallel.rb +1 -1
- data/lib/picky/loader.rb +15 -15
- data/lib/picky/query/qualifier_category_mapper.rb +1 -1
- data/lib/picky/rack/harakiri.rb +3 -1
- data/lib/picky/sources/db.rb +11 -0
- data/lib/picky/statistics.rb +2 -2
- data/lib/picky/tokenizer.rb +1 -1
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/picky/wrappers/bundle/calculation.rb +45 -0
- data/lib/picky/wrappers/bundle/delegators.rb +69 -0
- data/lib/picky/wrappers/bundle/exact_partial.rb +38 -0
- data/lib/picky/{indexed/wrappers → wrappers}/bundle/location.rb +6 -4
- data/lib/picky/wrappers/bundle/wrapper.rb +29 -0
- data/lib/picky/wrappers/category/exact_first.rb +55 -0
- data/lib/picky/wrappers/category/location.rb +33 -0
- data/lib/picky/{sources/wrappers → wrappers/sources}/base.rb +7 -3
- data/lib/picky/{sources/wrappers → wrappers/sources}/location.rb +3 -3
- data/lib/picky.rb +10 -11
- data/spec/aux/picky/cli_spec.rb +5 -5
- data/spec/lib/backends/backend_spec.rb +39 -0
- data/spec/lib/backends/file/basic_spec.rb +59 -0
- data/spec/lib/backends/file_spec.rb +105 -0
- data/spec/lib/backends/memory/basic_spec.rb +43 -15
- data/spec/lib/backends/memory_spec.rb +108 -54
- data/spec/lib/backends/redis/basic_spec.rb +81 -57
- data/spec/lib/backends/redis_spec.rb +120 -66
- data/spec/lib/category_indexed_spec.rb +12 -12
- data/spec/lib/category_indexing_spec.rb +23 -23
- data/spec/lib/category_spec.rb +14 -14
- data/spec/lib/cores_spec.rb +2 -2
- data/spec/lib/extensions/object_spec.rb +7 -7
- data/spec/lib/generators/partial/postfix_spec.rb +131 -0
- data/spec/lib/generators/partial/substring_spec.rb +29 -4
- data/spec/lib/generators/weights_generator_spec.rb +3 -3
- data/spec/lib/index_indexing_spec.rb +11 -15
- data/spec/lib/index_spec.rb +8 -8
- data/spec/lib/indexed/bundle_realtime_spec.rb +18 -18
- data/spec/lib/indexed/bundle_spec.rb +21 -21
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +9 -9
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +8 -8
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +16 -16
- data/spec/lib/indexers/base_spec.rb +6 -25
- data/spec/lib/indexes_spec.rb +33 -22
- data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +2 -2
- data/spec/lib/indexing/bundle_spec.rb +27 -28
- data/spec/lib/sources/wrappers/base_spec.rb +7 -7
- data/spec/lib/sources/wrappers/location_spec.rb +8 -8
- metadata +48 -38
- data/lib/picky/indexed/bundle.rb +0 -125
- data/lib/picky/indexed/bundle_realtime.rb +0 -76
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +0 -47
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +0 -47
- data/lib/picky/indexed/wrappers/category/location.rb +0 -31
- data/lib/picky/indexed/wrappers/exact_first.rb +0 -59
- data/lib/picky/indexing/bundle.rb +0 -183
- data/lib/picky/indexing/wrappers/category/location.rb +0 -29
data/aux/picky/cli.rb
CHANGED
@@ -100,8 +100,10 @@ module Picky
|
|
100
100
|
|
101
101
|
# Maps commands to the other gem's command.
|
102
102
|
#
|
103
|
+
# TODO Try to load the other gems and get the commands dynamically.
|
104
|
+
#
|
103
105
|
@@mapping = {
|
104
|
-
:generate => [Generate, :'{sinatra_client,unicorn_server,
|
106
|
+
:generate => [Generate, :'{client,server,sinatra_client,unicorn_server,all_in_one}', :'app_directory_name'],
|
105
107
|
:help => [Help],
|
106
108
|
:live => [Live, 'host:port/path (default: localhost:8080/admin)', 'port (default: 4568)'],
|
107
109
|
:search => [Search, :url_or_path, 'amount of ids (default 20)'],
|
@@ -4,6 +4,22 @@ module Picky
|
|
4
4
|
|
5
5
|
class Backend
|
6
6
|
|
7
|
+
attr_reader :inverted,
|
8
|
+
:weights,
|
9
|
+
:similarity,
|
10
|
+
:configuration
|
11
|
+
|
12
|
+
def initialize options = {}
|
13
|
+
@inverted = options[:inverted]
|
14
|
+
@weights = options[:weights]
|
15
|
+
@similarity = options[:similarity]
|
16
|
+
@configuration = options[:configuration]
|
17
|
+
end
|
18
|
+
|
19
|
+
def extract_lambda_or thing, *args
|
20
|
+
thing && (thing.respond_to?(:call) && thing.call(*args) || thing)
|
21
|
+
end
|
22
|
+
|
7
23
|
#
|
8
24
|
#
|
9
25
|
def to_s
|
@@ -21,13 +21,16 @@ module Picky
|
|
21
21
|
# An index cache takes a path, without file extension,
|
22
22
|
# which will be provided by the subclasses.
|
23
23
|
#
|
24
|
-
def initialize cache_path
|
24
|
+
def initialize cache_path, options = {}
|
25
25
|
@cache_path = "#{cache_path}.file.#{extension}"
|
26
26
|
|
27
27
|
# This is the mapping file with the in-memory hash for the
|
28
28
|
# file position/offset mappings.
|
29
29
|
#
|
30
30
|
@mapping_file = Memory::JSON.new "#{cache_path}.file_mapping.#{extension}"
|
31
|
+
|
32
|
+
@empty = options[:empty]
|
33
|
+
@initial = options[:initial]
|
31
34
|
end
|
32
35
|
|
33
36
|
# The default extension for index files is "index".
|
@@ -36,20 +39,20 @@ module Picky
|
|
36
39
|
:index
|
37
40
|
end
|
38
41
|
|
42
|
+
# The empty index that is used for putting the index
|
43
|
+
# together before it is saved into the files.
|
44
|
+
#
|
45
|
+
def empty
|
46
|
+
@empty && @empty.clone || {}
|
47
|
+
end
|
48
|
+
|
39
49
|
# The initial content before loading.
|
40
50
|
#
|
41
51
|
# Note: We could also load the mapping file
|
42
52
|
# as in #load.
|
43
53
|
#
|
44
54
|
def initial
|
45
|
-
nil
|
46
|
-
end
|
47
|
-
|
48
|
-
# The empty index that is used for putting the index
|
49
|
-
# together before it is saved into the files.
|
50
|
-
#
|
51
|
-
def empty
|
52
|
-
{}
|
55
|
+
@initial && @initial.clone || nil
|
53
56
|
end
|
54
57
|
|
55
58
|
# Will copy the index file to a location that
|
@@ -96,6 +99,12 @@ module Picky
|
|
96
99
|
size_of(cache_path) > 0
|
97
100
|
end
|
98
101
|
|
102
|
+
#
|
103
|
+
#
|
104
|
+
def to_s
|
105
|
+
"#{self.class}(#{cache_path},#{mapping_file.cache_path})"
|
106
|
+
end
|
107
|
+
|
99
108
|
end
|
100
109
|
|
101
110
|
end
|
data/lib/picky/backends/file.rb
CHANGED
@@ -14,25 +14,29 @@ module Picky
|
|
14
14
|
# [:token] # => [id, id, id, id, id] (an array of ids)
|
15
15
|
#
|
16
16
|
def create_inverted bundle
|
17
|
-
|
17
|
+
extract_lambda_or(inverted, bundle) ||
|
18
|
+
JSON.new(bundle.index_path(:inverted))
|
18
19
|
end
|
19
20
|
# Returns an object that on #initial, #load returns an object that responds to:
|
20
21
|
# [:token] # => 1.23 (a weight)
|
21
22
|
#
|
22
23
|
def create_weights bundle
|
23
|
-
|
24
|
+
extract_lambda_or(weights, bundle) ||
|
25
|
+
JSON.new(bundle.index_path(:weights))
|
24
26
|
end
|
25
27
|
# Returns an object that on #initial, #load returns an object that responds to:
|
26
28
|
# [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
|
27
29
|
#
|
28
30
|
def create_similarity bundle
|
29
|
-
|
31
|
+
extract_lambda_or(similarity, bundle) ||
|
32
|
+
JSON.new(bundle.index_path(:similarity))
|
30
33
|
end
|
31
34
|
# Returns an object that on #initial, #load returns an object that responds to:
|
32
35
|
# [:key] # => value (a value for this config key)
|
33
36
|
#
|
34
37
|
def create_configuration bundle
|
35
|
-
|
38
|
+
extract_lambda_or(configuration, bundle) ||
|
39
|
+
JSON.new(bundle.index_path(:configuration))
|
36
40
|
end
|
37
41
|
|
38
42
|
# Currently, the loaded ids are intersected using
|
@@ -22,8 +22,10 @@ module Picky
|
|
22
22
|
# An index cache takes a path, without file extension,
|
23
23
|
# which will be provided by the subclasses.
|
24
24
|
#
|
25
|
-
def initialize cache_path
|
25
|
+
def initialize cache_path, options = {}
|
26
26
|
@cache_path = "#{cache_path}.memory.#{extension}"
|
27
|
+
@empty = options[:empty]
|
28
|
+
@initial = options[:initial]
|
27
29
|
end
|
28
30
|
|
29
31
|
# The default extension for index files is "index".
|
@@ -36,7 +38,13 @@ module Picky
|
|
36
38
|
# together before it is dumped into the files.
|
37
39
|
#
|
38
40
|
def empty
|
39
|
-
{}
|
41
|
+
@empty && @empty.clone || {}
|
42
|
+
end
|
43
|
+
|
44
|
+
# The initial content before loading from file.
|
45
|
+
#
|
46
|
+
def initial
|
47
|
+
@initial && @initial.clone || {}
|
40
48
|
end
|
41
49
|
|
42
50
|
# Will copy the index file to a location that
|
@@ -14,12 +14,6 @@ module Picky
|
|
14
14
|
:json
|
15
15
|
end
|
16
16
|
|
17
|
-
# The initial content before loading.
|
18
|
-
#
|
19
|
-
def initial
|
20
|
-
{}
|
21
|
-
end
|
22
|
-
|
23
17
|
# Loads the index hash from json format.
|
24
18
|
#
|
25
19
|
def load
|
@@ -29,6 +23,7 @@ module Picky
|
|
29
23
|
# Dumps the index hash in json format.
|
30
24
|
#
|
31
25
|
def dump hash
|
26
|
+
create_directory cache_path
|
32
27
|
hash.dump_json cache_path
|
33
28
|
end
|
34
29
|
|
@@ -14,12 +14,6 @@ module Picky
|
|
14
14
|
:dump
|
15
15
|
end
|
16
16
|
|
17
|
-
# The initial content before loading.
|
18
|
-
#
|
19
|
-
def initial
|
20
|
-
{}
|
21
|
-
end
|
22
|
-
|
23
17
|
# Loads the index hash from marshal format.
|
24
18
|
#
|
25
19
|
def load
|
@@ -29,6 +23,7 @@ module Picky
|
|
29
23
|
# Dumps the index hash in marshal format.
|
30
24
|
#
|
31
25
|
def dump hash
|
26
|
+
create_directory cache_path
|
32
27
|
hash.dump_marshal cache_path
|
33
28
|
end
|
34
29
|
|
@@ -8,25 +8,29 @@ module Picky
|
|
8
8
|
# [:token] # => [id, id, id, id, id] (an array of ids)
|
9
9
|
#
|
10
10
|
def create_inverted bundle
|
11
|
-
|
11
|
+
extract_lambda_or(inverted, bundle) ||
|
12
|
+
JSON.new(bundle.index_path(:inverted))
|
12
13
|
end
|
13
14
|
# Returns an object that on #initial, #load returns an object that responds to:
|
14
15
|
# [:token] # => 1.23 (a weight)
|
15
16
|
#
|
16
17
|
def create_weights bundle
|
17
|
-
|
18
|
+
extract_lambda_or(weights, bundle) ||
|
19
|
+
JSON.new(bundle.index_path(:weights))
|
18
20
|
end
|
19
21
|
# Returns an object that on #initial, #load returns an object that responds to:
|
20
22
|
# [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
|
21
23
|
#
|
22
24
|
def create_similarity bundle
|
23
|
-
|
25
|
+
extract_lambda_or(similarity, bundle) ||
|
26
|
+
Marshal.new(bundle.index_path(:similarity))
|
24
27
|
end
|
25
28
|
# Returns an object that on #initial, #load returns an object that responds to:
|
26
29
|
# [:key] # => value (a value for this config key)
|
27
30
|
#
|
28
31
|
def create_configuration bundle
|
29
|
-
|
32
|
+
extract_lambda_or(configuration, bundle) ||
|
33
|
+
JSON.new(bundle.index_path(:configuration))
|
30
34
|
end
|
31
35
|
|
32
36
|
# Returns the result ids for the allocation.
|
@@ -18,9 +18,19 @@ module Picky
|
|
18
18
|
# An index cache takes a path, without file extension,
|
19
19
|
# which will be provided by the subclasses.
|
20
20
|
#
|
21
|
-
def initialize client, namespace
|
21
|
+
def initialize client, namespace, options = {}
|
22
22
|
@client = client
|
23
23
|
@namespace = namespace
|
24
|
+
|
25
|
+
@empty = options[:empty]
|
26
|
+
@initial = options[:initial]
|
27
|
+
end
|
28
|
+
|
29
|
+
# The empty index that is used for putting the index
|
30
|
+
# together.
|
31
|
+
#
|
32
|
+
def empty
|
33
|
+
@empty && @empty.clone || {}
|
24
34
|
end
|
25
35
|
|
26
36
|
# The initial content before loading.
|
@@ -29,7 +39,7 @@ module Picky
|
|
29
39
|
# this just returns the same thing as #load.
|
30
40
|
#
|
31
41
|
def initial
|
32
|
-
self
|
42
|
+
@initial && @initial.clone || self
|
33
43
|
end
|
34
44
|
|
35
45
|
# Returns itself.
|
@@ -38,13 +48,6 @@ module Picky
|
|
38
48
|
self
|
39
49
|
end
|
40
50
|
|
41
|
-
# The empty index that is used for putting the index
|
42
|
-
# together.
|
43
|
-
#
|
44
|
-
def empty
|
45
|
-
{}
|
46
|
-
end
|
47
|
-
|
48
51
|
# We do not use Redis to retrieve data.
|
49
52
|
#
|
50
53
|
def retrieve
|
data/lib/picky/backends/redis.rb
CHANGED
@@ -9,6 +9,8 @@ module Picky
|
|
9
9
|
attr_reader :client
|
10
10
|
|
11
11
|
def initialize options = {}
|
12
|
+
super options
|
13
|
+
|
12
14
|
require 'redis'
|
13
15
|
@client = options[:client] || ::Redis.new(:db => (options[:db] || 15))
|
14
16
|
rescue LoadError => e
|
@@ -19,25 +21,29 @@ module Picky
|
|
19
21
|
# [:token] # => [id, id, id, id, id] (an array of ids)
|
20
22
|
#
|
21
23
|
def create_inverted bundle
|
22
|
-
|
24
|
+
extract_lambda_or(inverted, client, bundle) ||
|
25
|
+
List.new(client, "#{bundle.identifier}:inverted")
|
23
26
|
end
|
24
27
|
# Returns an object that on #initial, #load returns an object that responds to:
|
25
28
|
# [:token] # => 1.23 (a weight)
|
26
29
|
#
|
27
30
|
def create_weights bundle
|
28
|
-
|
31
|
+
extract_lambda_or(weights, client, bundle) ||
|
32
|
+
Float.new(client, "#{bundle.identifier}:weights")
|
29
33
|
end
|
30
34
|
# Returns an object that on #initial, #load returns an object that responds to:
|
31
35
|
# [:encoded] # => [:original, :original] (an array of original symbols this similarity encoded thing maps to)
|
32
36
|
#
|
33
37
|
def create_similarity bundle
|
34
|
-
|
38
|
+
extract_lambda_or(similarity, client, bundle) ||
|
39
|
+
List.new(client, "#{bundle.identifier}:similarity")
|
35
40
|
end
|
36
41
|
# Returns an object that on #initial, #load returns an object that responds to:
|
37
42
|
# [:key] # => value (a value for this config key)
|
38
43
|
#
|
39
44
|
def create_configuration bundle
|
40
|
-
|
45
|
+
extract_lambda_or(configuration, client, bundle) ||
|
46
|
+
String.new(client, "#{bundle.identifier}:configuration")
|
41
47
|
end
|
42
48
|
|
43
49
|
# Returns the result ids for the allocation.
|
data/lib/picky/bundle.rb
CHANGED
@@ -47,6 +47,11 @@ module Picky
|
|
47
47
|
@name = name
|
48
48
|
@category = category
|
49
49
|
|
50
|
+
# TODO Tidy up a bit.
|
51
|
+
#
|
52
|
+
@key_format = options[:key_format]
|
53
|
+
@prepared = Backends::Memory::Text.new category.prepared_index_path
|
54
|
+
|
50
55
|
@weights_strategy = weights_strategy
|
51
56
|
@partial_strategy = partial_strategy
|
52
57
|
@similarity_strategy = similarity_strategy
|
@@ -59,6 +64,15 @@ module Picky
|
|
59
64
|
@backend_weights = backend.create_weights self
|
60
65
|
@backend_similarity = backend.create_similarity self
|
61
66
|
@backend_configuration = backend.create_configuration self
|
67
|
+
|
68
|
+
# Initial indexes.
|
69
|
+
#
|
70
|
+
@inverted = @backend_inverted.initial
|
71
|
+
@weights = @backend_weights.initial
|
72
|
+
@similarity = @backend_similarity.initial
|
73
|
+
@configuration = @backend_configuration.initial
|
74
|
+
|
75
|
+
@realtime_mapping = {} # id -> ary of syms. TODO Always instantiate?
|
62
76
|
end
|
63
77
|
def identifier
|
64
78
|
"#{category.identifier}:#{name}"
|
@@ -0,0 +1,110 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
# An indexed bundle is a number of memory/redis
|
4
|
+
# indexes that compose the indexes for a single category:
|
5
|
+
# * core (inverted) index
|
6
|
+
# * weights index
|
7
|
+
# * similarity index
|
8
|
+
# * index configuration
|
9
|
+
#
|
10
|
+
# Indexed refers to them being indexed.
|
11
|
+
# This class notably offers the methods:
|
12
|
+
# * load
|
13
|
+
# * clear
|
14
|
+
#
|
15
|
+
# To (re)load or clear the current indexes.
|
16
|
+
#
|
17
|
+
class Bundle
|
18
|
+
|
19
|
+
attr_reader :realtime_mapping
|
20
|
+
|
21
|
+
# Get the ids for the given symbol.
|
22
|
+
#
|
23
|
+
# Returns a (potentially empty) array of ids.
|
24
|
+
#
|
25
|
+
def ids sym
|
26
|
+
@inverted[sym] || []
|
27
|
+
end
|
28
|
+
|
29
|
+
# Get a weight for the given symbol.
|
30
|
+
#
|
31
|
+
# Returns a number, or nil.
|
32
|
+
#
|
33
|
+
def weight sym
|
34
|
+
@weights[sym]
|
35
|
+
end
|
36
|
+
|
37
|
+
# Get settings for this bundle.
|
38
|
+
#
|
39
|
+
# Returns an object.
|
40
|
+
#
|
41
|
+
def [] sym
|
42
|
+
@configuration[sym]
|
43
|
+
end
|
44
|
+
|
45
|
+
# Loads all indexes.
|
46
|
+
#
|
47
|
+
# Loading loads index objects from the backend.
|
48
|
+
# They should each respond to [] and return something appropriate.
|
49
|
+
#
|
50
|
+
def load
|
51
|
+
load_inverted
|
52
|
+
load_weights
|
53
|
+
load_similarity
|
54
|
+
load_configuration
|
55
|
+
end
|
56
|
+
|
57
|
+
# Loads the core index.
|
58
|
+
#
|
59
|
+
def load_inverted
|
60
|
+
self.inverted = @backend_inverted.load
|
61
|
+
end
|
62
|
+
# Loads the weights index.
|
63
|
+
#
|
64
|
+
def load_weights
|
65
|
+
self.weights = @backend_weights.load
|
66
|
+
end
|
67
|
+
# Loads the similarity index.
|
68
|
+
#
|
69
|
+
def load_similarity
|
70
|
+
self.similarity = @backend_similarity.load
|
71
|
+
end
|
72
|
+
# Loads the configuration.
|
73
|
+
#
|
74
|
+
def load_configuration
|
75
|
+
self.configuration = @backend_configuration.load
|
76
|
+
end
|
77
|
+
|
78
|
+
# Clears all indexes.
|
79
|
+
#
|
80
|
+
def clear
|
81
|
+
clear_inverted
|
82
|
+
clear_weights
|
83
|
+
clear_similarity
|
84
|
+
clear_configuration
|
85
|
+
end
|
86
|
+
|
87
|
+
# Clears the core index.
|
88
|
+
#
|
89
|
+
def clear_inverted
|
90
|
+
inverted.clear
|
91
|
+
end
|
92
|
+
# Clears the weights index.
|
93
|
+
#
|
94
|
+
def clear_weights
|
95
|
+
weights.clear
|
96
|
+
end
|
97
|
+
# Clears the similarity index.
|
98
|
+
#
|
99
|
+
def clear_similarity
|
100
|
+
similarity.clear
|
101
|
+
end
|
102
|
+
# Clears the configuration.
|
103
|
+
#
|
104
|
+
def clear_configuration
|
105
|
+
configuration.clear
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
|
110
|
+
end
|
@@ -0,0 +1,177 @@
|
|
1
|
+
module Picky
|
2
|
+
|
3
|
+
# A Bundle is a number of indexes
|
4
|
+
# per [index, category] combination.
|
5
|
+
#
|
6
|
+
# At most, there are three indexes:
|
7
|
+
# * *core* index (always used)
|
8
|
+
# * *weights* index (always used)
|
9
|
+
# * *similarity* index (used with similarity)
|
10
|
+
#
|
11
|
+
# In Picky, indexing is separated from the index
|
12
|
+
# handling itself through a parallel structure.
|
13
|
+
#
|
14
|
+
# Both use methods provided by this base class, but
|
15
|
+
# have very different goals:
|
16
|
+
#
|
17
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
18
|
+
# and providing helper functions to e.g. check the indexes.
|
19
|
+
#
|
20
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
21
|
+
# memory and looking up search data as fast as possible.
|
22
|
+
#
|
23
|
+
# This is the indexing bundle.
|
24
|
+
#
|
25
|
+
# It does all menial tasks that have nothing to do
|
26
|
+
# with the actual index running etc.
|
27
|
+
# (Find these in Indexed::Bundle)
|
28
|
+
#
|
29
|
+
class Bundle
|
30
|
+
|
31
|
+
attr_reader :backend,
|
32
|
+
:prepared
|
33
|
+
|
34
|
+
# When indexing, clear only clears the inverted index.
|
35
|
+
#
|
36
|
+
delegate :clear, :to => :inverted
|
37
|
+
|
38
|
+
# Sets up a piece of the index for the given token.
|
39
|
+
#
|
40
|
+
def initialize_inverted_index_for token
|
41
|
+
self.inverted[token] ||= []
|
42
|
+
end
|
43
|
+
|
44
|
+
# Generation
|
45
|
+
#
|
46
|
+
|
47
|
+
# This method
|
48
|
+
# * Loads the base index from the "prepared..." file.
|
49
|
+
# * Generates derived indexes.
|
50
|
+
# * Dumps all the indexes into files.
|
51
|
+
#
|
52
|
+
def generate_caches_from_source
|
53
|
+
load_from_prepared_index_file
|
54
|
+
generate_caches_from_memory
|
55
|
+
end
|
56
|
+
# Generates derived indexes from the index and dumps.
|
57
|
+
#
|
58
|
+
# Note: assumes that there is something in the index
|
59
|
+
#
|
60
|
+
def generate_caches_from_memory
|
61
|
+
cache_from_memory_generation_message
|
62
|
+
generate_derived
|
63
|
+
end
|
64
|
+
def cache_from_memory_generation_message
|
65
|
+
timed_exclaim %Q{"#{identifier}": Caching from intermediate in-memory index.}
|
66
|
+
end
|
67
|
+
|
68
|
+
# Generates the weights and similarity from the main index.
|
69
|
+
#
|
70
|
+
def generate_derived
|
71
|
+
generate_weights
|
72
|
+
generate_similarity
|
73
|
+
end
|
74
|
+
|
75
|
+
# "Empties" the index(es) by getting a new empty
|
76
|
+
# internal backend instance.
|
77
|
+
#
|
78
|
+
def empty
|
79
|
+
empty_inverted
|
80
|
+
empty_configuration
|
81
|
+
end
|
82
|
+
def empty_inverted
|
83
|
+
@inverted = @backend_inverted.empty
|
84
|
+
end
|
85
|
+
def empty_configuration
|
86
|
+
@configuration = @backend_configuration.empty
|
87
|
+
end
|
88
|
+
|
89
|
+
# Load the data from the db.
|
90
|
+
#
|
91
|
+
def load_from_prepared_index_file
|
92
|
+
load_from_prepared_index_generation_message
|
93
|
+
retrieve
|
94
|
+
end
|
95
|
+
def load_from_prepared_index_generation_message
|
96
|
+
timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
|
97
|
+
end
|
98
|
+
# Retrieves the prepared index data into the index.
|
99
|
+
#
|
100
|
+
# This is in preparation for generating
|
101
|
+
# derived indexes (like weights, similarity)
|
102
|
+
# and later dumping the optimized index.
|
103
|
+
#
|
104
|
+
# TODO Move this out to the category?
|
105
|
+
#
|
106
|
+
def retrieve
|
107
|
+
format = key_format || :to_i
|
108
|
+
empty_inverted
|
109
|
+
prepared.retrieve do |id, token|
|
110
|
+
initialize_inverted_index_for token
|
111
|
+
self.inverted[token] << id.send(format)
|
112
|
+
end
|
113
|
+
end
|
114
|
+
|
115
|
+
# Generates a new index (writes its index) using the
|
116
|
+
# partial caching strategy of this bundle.
|
117
|
+
#
|
118
|
+
def generate_partial
|
119
|
+
generator = Generators::PartialGenerator.new self.inverted
|
120
|
+
self.inverted = generator.generate self.partial_strategy
|
121
|
+
end
|
122
|
+
# Generate a partial index from the given exact inverted index.
|
123
|
+
#
|
124
|
+
def generate_partial_from exact_inverted_index
|
125
|
+
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
126
|
+
self.inverted = exact_inverted_index
|
127
|
+
self.generate_partial
|
128
|
+
self
|
129
|
+
end
|
130
|
+
# Generates a new weights index (writes its index) using the
|
131
|
+
# given weight caching strategy.
|
132
|
+
#
|
133
|
+
def generate_weights
|
134
|
+
generator = Generators::WeightsGenerator.new self.inverted
|
135
|
+
self.weights = generator.generate self.weights_strategy
|
136
|
+
end
|
137
|
+
# Generates a new similarity index (writes its index) using the
|
138
|
+
# given similarity caching strategy.
|
139
|
+
#
|
140
|
+
def generate_similarity
|
141
|
+
generator = Generators::SimilarityGenerator.new self.inverted
|
142
|
+
self.similarity = generator.generate self.similarity_strategy
|
143
|
+
end
|
144
|
+
|
145
|
+
# Saves the indexes in a dump file.
|
146
|
+
#
|
147
|
+
def dump
|
148
|
+
timed_exclaim %Q{"#{identifier}": Dumping data.}
|
149
|
+
dump_inverted
|
150
|
+
dump_similarity
|
151
|
+
dump_weights
|
152
|
+
dump_configuration
|
153
|
+
end
|
154
|
+
# Dumps the core index.
|
155
|
+
#
|
156
|
+
def dump_inverted
|
157
|
+
@backend_inverted.dump self.inverted
|
158
|
+
end
|
159
|
+
# Dumps the weights index.
|
160
|
+
#
|
161
|
+
def dump_weights
|
162
|
+
@backend_weights.dump self.weights
|
163
|
+
end
|
164
|
+
# Dumps the similarity index.
|
165
|
+
#
|
166
|
+
def dump_similarity
|
167
|
+
@backend_similarity.dump self.similarity
|
168
|
+
end
|
169
|
+
# Dumps the similarity index.
|
170
|
+
#
|
171
|
+
def dump_configuration
|
172
|
+
@backend_configuration.dump self.configuration
|
173
|
+
end
|
174
|
+
|
175
|
+
end
|
176
|
+
|
177
|
+
end
|