picky 0.11.2 → 0.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/Index_api.rb +49 -0
- data/lib/picky/alias_instances.rb +4 -1
- data/lib/picky/application.rb +16 -15
- data/lib/picky/cacher/partial/{subtoken.rb → substring.rb} +19 -18
- data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb} +2 -2
- data/lib/picky/configuration/index.rb +67 -0
- data/lib/picky/cores.rb +3 -0
- data/lib/picky/index/bundle.rb +35 -51
- data/lib/picky/index/file/basic.rb +39 -5
- data/lib/picky/index/file/json.rb +10 -0
- data/lib/picky/index/file/marshal.rb +10 -0
- data/lib/picky/index/file/text.rb +22 -0
- data/lib/picky/index/files.rb +11 -36
- data/lib/picky/indexed/bundle.rb +61 -0
- data/lib/picky/{index → indexed}/categories.rb +1 -1
- data/lib/picky/{index → indexed}/category.rb +13 -16
- data/lib/picky/{index/type.rb → indexed/index.rb} +6 -6
- data/lib/picky/{index/types.rb → indexed/indexes.rb} +10 -10
- data/lib/picky/{index → indexed}/wrappers/exact_first.rb +8 -8
- data/lib/picky/indexers/no_source_specified_error.rb +1 -1
- data/lib/picky/indexers/serial.rb +64 -0
- data/lib/picky/indexers/solr.rb +1 -3
- data/lib/picky/indexes_api.rb +41 -0
- data/lib/picky/indexing/bundle.rb +43 -13
- data/lib/picky/indexing/category.rb +17 -64
- data/lib/picky/indexing/{type.rb → index.rb} +13 -3
- data/lib/picky/indexing/{types.rb → indexes.rb} +22 -22
- data/lib/picky/loader.rb +17 -22
- data/lib/picky/query/base.rb +1 -1
- data/lib/picky/rack/harakiri.rb +9 -2
- data/lib/picky/signals.rb +1 -1
- data/lib/picky/sources/base.rb +14 -14
- data/lib/picky/sources/couch.rb +8 -7
- data/lib/picky/sources/csv.rb +10 -10
- data/lib/picky/sources/db.rb +8 -8
- data/lib/picky/sources/delicious.rb +2 -2
- data/lib/picky/sources/wrappers/location.rb +3 -3
- data/lib/picky/tokenizers/base.rb +1 -11
- data/lib/picky/tokenizers/index.rb +0 -1
- data/lib/picky/tokenizers/query.rb +0 -1
- data/lib/tasks/index.rake +4 -4
- data/lib/tasks/shortcuts.rake +4 -4
- data/lib/tasks/try.rake +8 -8
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/application.rb +13 -12
- data/spec/lib/application_spec.rb +10 -38
- data/spec/lib/cacher/partial/{subtoken_spec.rb → substring_spec.rb} +0 -0
- data/spec/lib/{character_substitution/european_spec.rb → character_substituters/west_european_spec.rb} +6 -2
- data/spec/lib/configuration/index_spec.rb +80 -0
- data/spec/lib/cores_spec.rb +1 -1
- data/spec/lib/index/file/text_spec.rb +1 -1
- data/spec/lib/index/files_spec.rb +12 -32
- data/spec/lib/indexed/bundle_spec.rb +119 -0
- data/spec/lib/{indexing → indexed}/categories_spec.rb +13 -14
- data/spec/lib/{index → indexed}/category_spec.rb +6 -6
- data/spec/lib/{index/type_spec.rb → indexed/index_spec.rb} +3 -3
- data/spec/lib/{index → indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/serial_spec.rb +62 -0
- data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +7 -5
- data/spec/lib/indexing/bundle_spec.rb +9 -14
- data/spec/lib/indexing/category_spec.rb +9 -125
- data/spec/lib/indexing/{type_spec.rb → index_spec.rb} +3 -3
- data/spec/lib/query/base_spec.rb +1 -1
- data/spec/lib/query/full_spec.rb +1 -1
- data/spec/lib/query/live_spec.rb +2 -4
- data/spec/lib/sources/couch_spec.rb +5 -5
- data/spec/lib/sources/db_spec.rb +6 -7
- data/spec/lib/tokenizers/base_spec.rb +1 -24
- data/spec/lib/tokenizers/query_spec.rb +0 -1
- metadata +38 -41
- data/lib/picky/bundle.rb +0 -33
- data/lib/picky/configuration/indexes.rb +0 -51
- data/lib/picky/configuration/queries.rb +0 -15
- data/lib/picky/indexers/base.rb +0 -85
- data/lib/picky/indexers/default.rb +0 -3
- data/lib/picky/type.rb +0 -46
- data/lib/picky/types.rb +0 -41
- data/lib/tasks/cache.rake +0 -46
- data/spec/lib/configuration/indexes_spec.rb +0 -28
- data/spec/lib/index/bundle_spec.rb +0 -151
- data/spec/lib/indexers/base_spec.rb +0 -89
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexed
|
4
|
+
|
5
|
+
# This is the _actual_ index.
|
6
|
+
#
|
7
|
+
# Handles exact/partial index, weights index, and similarity index.
|
8
|
+
#
|
9
|
+
# Delegates file handling and checking to an *Indexed*::*Files* object.
|
10
|
+
#
|
11
|
+
class Bundle < Index::Bundle
|
12
|
+
|
13
|
+
# Get the ids for the given symbol.
|
14
|
+
#
|
15
|
+
def ids sym
|
16
|
+
@index[sym] || []
|
17
|
+
end
|
18
|
+
# Get a weight for the given symbol.
|
19
|
+
#
|
20
|
+
def weight sym
|
21
|
+
@weights[sym]
|
22
|
+
end
|
23
|
+
|
24
|
+
# Load the data from the db.
|
25
|
+
#
|
26
|
+
def load_from_index_file
|
27
|
+
load_from_index_generation_message
|
28
|
+
clear
|
29
|
+
retrieve
|
30
|
+
end
|
31
|
+
# Notifies the user that the index is being loaded.
|
32
|
+
#
|
33
|
+
def load_from_index_generation_message
|
34
|
+
timed_exclaim "LOAD INDEX #{identifier}."
|
35
|
+
end
|
36
|
+
|
37
|
+
# Loads all indexes.
|
38
|
+
#
|
39
|
+
def load
|
40
|
+
load_index
|
41
|
+
load_similarity
|
42
|
+
load_weights
|
43
|
+
end
|
44
|
+
# Loads the core index.
|
45
|
+
#
|
46
|
+
def load_index
|
47
|
+
self.index = files.load_index
|
48
|
+
end
|
49
|
+
# Loads the weights index.
|
50
|
+
#
|
51
|
+
def load_weights
|
52
|
+
self.weights = files.load_weights
|
53
|
+
end
|
54
|
+
# Loads the similarity index.
|
55
|
+
#
|
56
|
+
def load_similarity
|
57
|
+
self.similarity = files.load_similarity
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
@@ -1,38 +1,41 @@
|
|
1
|
-
module
|
1
|
+
module Indexed
|
2
2
|
|
3
|
-
# An index category holds a exact and a partial index for a given
|
3
|
+
# An index category holds a exact and a partial index for a given category.
|
4
4
|
#
|
5
5
|
# For example an index category for names holds a exact and
|
6
6
|
# a partial index bundle for names.
|
7
7
|
#
|
8
8
|
class Category
|
9
9
|
|
10
|
-
attr_reader :
|
10
|
+
attr_reader :exact, :partial, :identifier, :name
|
11
11
|
|
12
12
|
#
|
13
13
|
#
|
14
|
-
def initialize name,
|
14
|
+
def initialize name, index, options = {}
|
15
15
|
@name = name
|
16
|
-
|
16
|
+
|
17
|
+
configuration = Configuration::Index.new index, self
|
18
|
+
|
19
|
+
@identifier = configuration.identifier
|
17
20
|
|
18
21
|
similarity = options[:similarity] || Cacher::Similarity::Default
|
19
22
|
|
20
|
-
@exact = options[:exact_index_bundle] || Bundle.new(:exact,
|
21
|
-
@partial = options[:partial_index_bundle] || Bundle.new(:partial,
|
23
|
+
@exact = options[:exact_index_bundle] || Bundle.new(:exact, configuration, similarity)
|
24
|
+
@partial = options[:partial_index_bundle] || Bundle.new(:partial, configuration, similarity)
|
22
25
|
|
23
26
|
@exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
24
27
|
@partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
25
28
|
|
26
29
|
# Extract?
|
27
30
|
#
|
28
|
-
qualifiers = generate_qualifiers_from options
|
29
|
-
Query::Qualifiers.add(
|
31
|
+
qualifiers = generate_qualifiers_from options || [name]
|
32
|
+
Query::Qualifiers.add(configuration.category_name, qualifiers) if qualifiers
|
30
33
|
end
|
31
34
|
|
32
35
|
# TODO Move to Index.
|
33
36
|
#
|
34
37
|
def generate_qualifiers_from options
|
35
|
-
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
38
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
36
39
|
end
|
37
40
|
|
38
41
|
# Loads the index from cache.
|
@@ -43,12 +46,6 @@ module Index
|
|
43
46
|
partial.load
|
44
47
|
end
|
45
48
|
|
46
|
-
# TODO Move to initializer?
|
47
|
-
#
|
48
|
-
def identifier
|
49
|
-
@identifier ||= "#{type.name} #{name}"
|
50
|
-
end
|
51
|
-
|
52
49
|
# Gets the weight for this token's text.
|
53
50
|
#
|
54
51
|
def weight token
|
@@ -1,10 +1,8 @@
|
|
1
|
-
module
|
1
|
+
module Indexed
|
2
2
|
|
3
|
-
# This class is for multiple types.
|
4
3
|
#
|
5
|
-
# For example, you could have types books, isbn.
|
6
4
|
#
|
7
|
-
class
|
5
|
+
class Index
|
8
6
|
|
9
7
|
attr_reader :name, :result_type, :combinator, :categories
|
10
8
|
|
@@ -22,8 +20,10 @@ module Index
|
|
22
20
|
|
23
21
|
# TODO Spec. Doc.
|
24
22
|
#
|
25
|
-
def add_category
|
26
|
-
|
23
|
+
def add_category category_name, options = {}
|
24
|
+
new_category = Category.new category_name, self, options
|
25
|
+
categories << new_category
|
26
|
+
new_category
|
27
27
|
end
|
28
28
|
|
29
29
|
#
|
@@ -1,11 +1,11 @@
|
|
1
|
-
module
|
1
|
+
module Indexed
|
2
2
|
|
3
|
-
class
|
3
|
+
class Indexes
|
4
4
|
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :indexes, :index_mapping
|
6
6
|
|
7
7
|
each_delegate :load_from_cache,
|
8
|
-
:to => :
|
8
|
+
:to => :indexes
|
9
9
|
|
10
10
|
def initialize
|
11
11
|
clear
|
@@ -14,8 +14,8 @@ module Index
|
|
14
14
|
# TODO Spec.
|
15
15
|
#
|
16
16
|
def clear
|
17
|
-
@
|
18
|
-
@
|
17
|
+
@indexes = []
|
18
|
+
@index_mapping = {}
|
19
19
|
end
|
20
20
|
|
21
21
|
# TODO Spec.
|
@@ -26,14 +26,14 @@ module Index
|
|
26
26
|
|
27
27
|
# TODO Spec
|
28
28
|
#
|
29
|
-
def register
|
30
|
-
self.
|
31
|
-
self.
|
29
|
+
def register index
|
30
|
+
self.indexes << index
|
31
|
+
self.index_mapping[index.name] = index
|
32
32
|
end
|
33
33
|
def [] name
|
34
34
|
name = name.to_sym
|
35
35
|
|
36
|
-
|
36
|
+
index_mapping[name]
|
37
37
|
end
|
38
38
|
|
39
39
|
end
|
@@ -1,13 +1,13 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
|
-
module
|
3
|
+
module Indexed
|
4
4
|
|
5
5
|
# FIXME and spec
|
6
6
|
#
|
7
7
|
module Wrappers
|
8
8
|
|
9
9
|
# This index combines an exact and partial index.
|
10
|
-
# It serves to order the results such that exact
|
10
|
+
# It serves to order the results such that exact hits are found first.
|
11
11
|
#
|
12
12
|
# TODO Need to use the right subtokens. Bake in?
|
13
13
|
#
|
@@ -17,7 +17,7 @@ module Index
|
|
17
17
|
:identifier,
|
18
18
|
:name,
|
19
19
|
:to => :@exact
|
20
|
-
delegate :
|
20
|
+
delegate :index,
|
21
21
|
:category,
|
22
22
|
:weight,
|
23
23
|
:generate_partial_from,
|
@@ -32,12 +32,12 @@ module Index
|
|
32
32
|
@partial = category.partial
|
33
33
|
end
|
34
34
|
|
35
|
-
def self.wrap
|
36
|
-
if
|
37
|
-
wrap_each_of
|
38
|
-
|
35
|
+
def self.wrap index_or_category
|
36
|
+
if index_or_category.respond_to? :categories
|
37
|
+
wrap_each_of index_or_category.categories
|
38
|
+
index_or_category
|
39
39
|
else
|
40
|
-
new
|
40
|
+
new index_or_category
|
41
41
|
end
|
42
42
|
end
|
43
43
|
# TODO Do not extract categories!
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexers
|
4
|
+
|
5
|
+
# The indexer defines the control flow.
|
6
|
+
#
|
7
|
+
class Serial
|
8
|
+
|
9
|
+
attr_accessor :tokenizer, :source
|
10
|
+
|
11
|
+
def initialize configuration, source, tokenizer
|
12
|
+
@configuration = configuration
|
13
|
+
@source = source || raise_no_source
|
14
|
+
@tokenizer = tokenizer
|
15
|
+
end
|
16
|
+
|
17
|
+
# Raise a no source exception.
|
18
|
+
#
|
19
|
+
def raise_no_source
|
20
|
+
raise NoSourceSpecifiedException.new("No source given for #{@configuration.identifier}.")
|
21
|
+
end
|
22
|
+
|
23
|
+
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
24
|
+
#
|
25
|
+
# Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
|
26
|
+
#
|
27
|
+
def index
|
28
|
+
indexing_message
|
29
|
+
process
|
30
|
+
end
|
31
|
+
def process
|
32
|
+
comma = ?,
|
33
|
+
newline = ?\n
|
34
|
+
|
35
|
+
# TODO Move open to config?
|
36
|
+
#
|
37
|
+
# @category.prepared_index do |file|
|
38
|
+
# source.harvest(@index, @category) do |indexed_id, text|
|
39
|
+
# tokenizer.tokenize(text).each do |token_text|
|
40
|
+
# next unless token_text
|
41
|
+
# file.buffer indexed_id << comma << token_text << newline
|
42
|
+
# end
|
43
|
+
# file.write_maybe
|
44
|
+
# end
|
45
|
+
# end
|
46
|
+
#
|
47
|
+
@configuration.prepared_index_file do |file|
|
48
|
+
result = []
|
49
|
+
source.harvest(@configuration.index, @configuration.category) do |indexed_id, text|
|
50
|
+
tokenizer.tokenize(text).each do |token_text|
|
51
|
+
next unless token_text
|
52
|
+
result << indexed_id << comma << token_text << newline
|
53
|
+
end
|
54
|
+
file.write(result.join) && result.clear if result.size > 100_000
|
55
|
+
end
|
56
|
+
file.write result.join
|
57
|
+
end
|
58
|
+
end
|
59
|
+
def indexing_message
|
60
|
+
timed_exclaim "INDEX #{@configuration.identifier}"
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
data/lib/picky/indexers/solr.rb
CHANGED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Comfortable API convenience class, splits methods to indexes.
|
2
|
+
#
|
3
|
+
class IndexesAPI
|
4
|
+
|
5
|
+
attr_reader :indexes, :index_mapping
|
6
|
+
|
7
|
+
delegate :reload,
|
8
|
+
:load_from_cache,
|
9
|
+
:to => :@indexed
|
10
|
+
|
11
|
+
delegate :check_caches,
|
12
|
+
:find,
|
13
|
+
:generate_cache_only,
|
14
|
+
:generate_index_only,
|
15
|
+
:index,
|
16
|
+
:index_for_tests,
|
17
|
+
:to => :@indexing
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@indexes = []
|
21
|
+
@index_mapping = {}
|
22
|
+
|
23
|
+
@indexed = Indexed::Indexes.new
|
24
|
+
@indexing = Indexing::Indexes.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def register index
|
28
|
+
self.indexes << index
|
29
|
+
self.index_mapping[index.name] = index
|
30
|
+
|
31
|
+
@indexing.register index.indexing
|
32
|
+
@indexed.register index.indexed # TODO Even necessary?
|
33
|
+
end
|
34
|
+
|
35
|
+
def [] name
|
36
|
+
name = name.to_sym
|
37
|
+
|
38
|
+
self.index_mapping[name]
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -6,17 +6,15 @@ module Indexing
|
|
6
6
|
# It does all menial tasks that have nothing to do
|
7
7
|
# with the actual index running etc.
|
8
8
|
#
|
9
|
-
|
10
|
-
#
|
11
|
-
class Bundle < ::Bundle
|
9
|
+
class Bundle < Index::Bundle
|
12
10
|
|
13
11
|
attr_accessor :partial_strategy, :weights_strategy
|
14
12
|
attr_reader :files
|
15
13
|
|
16
14
|
# Path is in which directory the cache is located.
|
17
15
|
#
|
18
|
-
def initialize name,
|
19
|
-
super name,
|
16
|
+
def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
|
17
|
+
super name, configuration, similarity_strategy
|
20
18
|
|
21
19
|
@partial_strategy = partial_strategy
|
22
20
|
@weights_strategy = weights_strategy
|
@@ -63,7 +61,11 @@ module Indexing
|
|
63
61
|
def load_from_index_generation_message
|
64
62
|
timed_exclaim "LOAD INDEX #{identifier}."
|
65
63
|
end
|
66
|
-
# Retrieves the data into the index.
|
64
|
+
# Retrieves the prepared index data into the index.
|
65
|
+
#
|
66
|
+
# This is in preparation for generating
|
67
|
+
# derived indexes (like weights, similarity)
|
68
|
+
# and later dumping the optimized index.
|
67
69
|
#
|
68
70
|
def retrieve
|
69
71
|
files.retrieve do |id, token|
|
@@ -71,6 +73,8 @@ module Indexing
|
|
71
73
|
index[token] << id
|
72
74
|
end
|
73
75
|
end
|
76
|
+
# Sets up a piece of the index for the given token.
|
77
|
+
#
|
74
78
|
def initialize_index_for token
|
75
79
|
index[token] ||= []
|
76
80
|
end
|
@@ -81,12 +85,14 @@ module Indexing
|
|
81
85
|
#
|
82
86
|
|
83
87
|
# Generates a new index (writes its index) using the
|
84
|
-
#
|
88
|
+
# partial caching strategy of this bundle.
|
85
89
|
#
|
86
90
|
def generate_partial
|
87
91
|
generator = Cacher::PartialGenerator.new self.index
|
88
92
|
self.index = generator.generate self.partial_strategy
|
89
93
|
end
|
94
|
+
# Generate a partial index from the given exact index.
|
95
|
+
#
|
90
96
|
def generate_partial_from exact_index
|
91
97
|
timed_exclaim "PARTIAL GENERATE #{identifier}."
|
92
98
|
self.index = exact_index
|
@@ -108,25 +114,31 @@ module Indexing
|
|
108
114
|
self.weights = generator.generate self.weights_strategy
|
109
115
|
end
|
110
116
|
|
111
|
-
# Saves the
|
117
|
+
# Saves the indexes in a dump file.
|
112
118
|
#
|
113
119
|
def dump
|
114
120
|
dump_index
|
115
121
|
dump_similarity
|
116
122
|
dump_weights
|
117
123
|
end
|
124
|
+
# Dumps the core index.
|
125
|
+
#
|
118
126
|
def dump_index
|
119
127
|
timed_exclaim "DUMP INDEX #{identifier}."
|
120
128
|
files.dump_index index
|
121
129
|
end
|
122
|
-
|
123
|
-
|
124
|
-
files.dump_similarity similarity
|
125
|
-
end
|
130
|
+
# Dumps the weights index.
|
131
|
+
#
|
126
132
|
def dump_weights
|
127
133
|
timed_exclaim "DUMP WEIGHTS #{identifier}."
|
128
134
|
files.dump_weights weights
|
129
135
|
end
|
136
|
+
# Dumps the similarity index.
|
137
|
+
#
|
138
|
+
def dump_similarity
|
139
|
+
timed_exclaim "DUMP SIMILARITY #{identifier}."
|
140
|
+
files.dump_similarity similarity
|
141
|
+
end
|
130
142
|
|
131
143
|
# Alerts the user if an index is missing.
|
132
144
|
#
|
@@ -134,38 +146,56 @@ module Indexing
|
|
134
146
|
raise_unless_index_exists
|
135
147
|
raise_unless_similarity_exists
|
136
148
|
end
|
149
|
+
# Alerts the user if one of the necessary indexes
|
150
|
+
# (core, weights) is missing.
|
151
|
+
#
|
137
152
|
def raise_unless_index_exists
|
138
153
|
if partial_strategy.saved?
|
139
154
|
warn_if_index_small
|
140
155
|
raise_unless_index_ok
|
141
156
|
end
|
142
157
|
end
|
158
|
+
# Alerts the user if the similarity
|
159
|
+
# index is missing (given that it's used).
|
160
|
+
#
|
143
161
|
def raise_unless_similarity_exists
|
144
162
|
if similarity_strategy.saved?
|
145
163
|
warn_if_similarity_small
|
146
164
|
raise_unless_similarity_ok
|
147
165
|
end
|
148
166
|
end
|
167
|
+
# Warns the user if the similarity index is small.
|
168
|
+
#
|
149
169
|
def warn_if_similarity_small
|
150
170
|
warn_cache_small :similarity if files.similarity_cache_small?
|
151
171
|
end
|
172
|
+
# Alerts the user if the similarity index is not there.
|
173
|
+
#
|
152
174
|
def raise_unless_similarity_ok
|
153
175
|
raise_cache_missing :similarity unless files.similarity_cache_ok?
|
154
176
|
end
|
177
|
+
|
155
178
|
# TODO Spec on down.
|
156
179
|
#
|
180
|
+
|
181
|
+
# Warns the user if the core or weights indexes are small.
|
182
|
+
#
|
157
183
|
def warn_if_index_small
|
158
184
|
warn_cache_small :index if files.index_cache_small?
|
159
185
|
warn_cache_small :weights if files.weights_cache_small?
|
160
186
|
end
|
187
|
+
# Alerts the user if the core or weights indexes are not there.
|
188
|
+
#
|
161
189
|
def raise_unless_index_ok
|
162
190
|
raise_cache_missing :index unless files.index_cache_ok?
|
163
191
|
raise_cache_missing :weights unless files.weights_cache_ok?
|
164
192
|
end
|
193
|
+
# Outputs a warning for the given cache.
|
194
|
+
#
|
165
195
|
def warn_cache_small what
|
166
196
|
puts "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
|
167
197
|
end
|
168
|
-
# Raises an appropriate error message.
|
198
|
+
# Raises an appropriate error message for the given cache.
|
169
199
|
#
|
170
200
|
def raise_cache_missing what
|
171
201
|
raise "#{what} cache for #{identifier} missing."
|
@@ -2,20 +2,17 @@ module Indexing
|
|
2
2
|
|
3
3
|
class Category
|
4
4
|
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :exact, :partial, :name, :configuration, :indexer
|
6
6
|
|
7
|
-
|
8
|
-
#
|
9
|
-
def initialize name, type, options = {}
|
7
|
+
def initialize name, index, options = {}
|
10
8
|
@name = name
|
11
|
-
@type = type
|
12
9
|
|
13
|
-
|
10
|
+
# Now we have enough info to combine the index and the category.
|
11
|
+
#
|
12
|
+
@configuration = Configuration::Index.new index, self
|
14
13
|
|
15
|
-
@tokenizer
|
16
|
-
@
|
17
|
-
@indexed_as = options[:as] || name
|
18
|
-
@virtual = options[:virtual] || false # TODO What is this again?
|
14
|
+
@tokenizer = options[:tokenizer] || Tokenizers::Index.default
|
15
|
+
@indexer = Indexers::Serial.new configuration, options[:source], @tokenizer #, :as => options[:as] # TODO option as.
|
19
16
|
|
20
17
|
# TODO Push into Bundle.
|
21
18
|
#
|
@@ -23,28 +20,12 @@ module Indexing
|
|
23
20
|
weights = options[:weights] || Cacher::Weights::Default
|
24
21
|
similarity = options[:similarity] || Cacher::Similarity::Default
|
25
22
|
|
26
|
-
@exact = options[:exact_indexing_bundle] || Bundle.new(:exact,
|
27
|
-
@partial = options[:partial_indexing_bundle] || Bundle.new(:partial,
|
28
|
-
|
29
|
-
# TODO Move to Query.
|
30
|
-
#
|
31
|
-
# @remove = options[:remove] || false
|
32
|
-
# @filter = options[:filter] || true
|
33
|
-
|
34
|
-
@options = options # TODO Remove?
|
23
|
+
@exact = options[:exact_indexing_bundle] || Bundle.new(:exact, configuration, similarity, Cacher::Partial::None.new, weights)
|
24
|
+
@partial = options[:partial_indexing_bundle] || Bundle.new(:partial, configuration, Cacher::Similarity::None.new, partial, weights)
|
35
25
|
end
|
36
26
|
|
37
|
-
|
38
|
-
|
39
|
-
def identifier
|
40
|
-
@identifier ||= "#{type.name} #{name}"
|
41
|
-
end
|
42
|
-
|
43
|
-
# Note: Most of the time the source of the type is used.
|
44
|
-
#
|
45
|
-
def source
|
46
|
-
@source || type.source
|
47
|
-
end
|
27
|
+
delegate :identifier, :prepare_index_directory, :to => :configuration
|
28
|
+
delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
|
48
29
|
|
49
30
|
# TODO Spec.
|
50
31
|
#
|
@@ -68,16 +49,16 @@ module Indexing
|
|
68
49
|
exact.delete
|
69
50
|
partial.delete
|
70
51
|
end
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
52
|
+
|
53
|
+
def index
|
54
|
+
prepare_index_directory
|
55
|
+
indexer.index
|
56
|
+
end
|
76
57
|
|
77
58
|
# Generates all caches for this category.
|
78
59
|
#
|
79
60
|
def cache
|
80
|
-
|
61
|
+
prepare_index_directory
|
81
62
|
generate_caches
|
82
63
|
end
|
83
64
|
def generate_caches
|
@@ -101,34 +82,6 @@ module Indexing
|
|
101
82
|
partial.dump
|
102
83
|
end
|
103
84
|
|
104
|
-
# TODO Partially move to type. Duplicate Code in indexers/field.rb.
|
105
|
-
#
|
106
|
-
# TODO Use the Files object.
|
107
|
-
#
|
108
|
-
def search_index_root
|
109
|
-
File.join PICKY_ROOT, 'index'
|
110
|
-
end
|
111
|
-
def cache_directory
|
112
|
-
File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
|
113
|
-
end
|
114
|
-
def search_index_file_name
|
115
|
-
File.join cache_directory, "prepared_#{name}_index.txt"
|
116
|
-
end
|
117
|
-
def index
|
118
|
-
prepare_cache_directory
|
119
|
-
# files.create_directory # TODO Make this possible!
|
120
|
-
indexer.index
|
121
|
-
end
|
122
|
-
def prepare_cache_directory
|
123
|
-
FileUtils.mkdir_p cache_directory
|
124
|
-
end
|
125
|
-
def indexer
|
126
|
-
@indexer || @indexer = @indexer_class.new(type, self)
|
127
|
-
end
|
128
|
-
def virtual?
|
129
|
-
!!virtual
|
130
|
-
end
|
131
|
-
|
132
85
|
end
|
133
86
|
|
134
87
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Indexing
|
2
2
|
|
3
|
-
class
|
3
|
+
class Index
|
4
4
|
|
5
5
|
attr_reader :name, :source, :categories, :after_indexing
|
6
6
|
|
@@ -30,8 +30,18 @@ module Indexing
|
|
30
30
|
|
31
31
|
# TODO Spec. Doc.
|
32
32
|
#
|
33
|
-
def add_category
|
34
|
-
|
33
|
+
def add_category category_name, options = {}
|
34
|
+
options = default_category_options.merge options
|
35
|
+
|
36
|
+
new_category = Category.new category_name, self, options
|
37
|
+
categories << new_category
|
38
|
+
new_category
|
39
|
+
end
|
40
|
+
|
41
|
+
# By default, the category uses the index's source.
|
42
|
+
#
|
43
|
+
def default_category_options
|
44
|
+
{ :source => @source }
|
35
45
|
end
|
36
46
|
|
37
47
|
# Indexing.
|