picky 0.11.2 → 0.12.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/Index_api.rb +49 -0
- data/lib/picky/alias_instances.rb +4 -1
- data/lib/picky/application.rb +16 -15
- data/lib/picky/cacher/partial/{subtoken.rb → substring.rb} +19 -18
- data/lib/picky/{character_substitution/european.rb → character_substituters/west_european.rb} +2 -2
- data/lib/picky/configuration/index.rb +67 -0
- data/lib/picky/cores.rb +3 -0
- data/lib/picky/index/bundle.rb +35 -51
- data/lib/picky/index/file/basic.rb +39 -5
- data/lib/picky/index/file/json.rb +10 -0
- data/lib/picky/index/file/marshal.rb +10 -0
- data/lib/picky/index/file/text.rb +22 -0
- data/lib/picky/index/files.rb +11 -36
- data/lib/picky/indexed/bundle.rb +61 -0
- data/lib/picky/{index → indexed}/categories.rb +1 -1
- data/lib/picky/{index → indexed}/category.rb +13 -16
- data/lib/picky/{index/type.rb → indexed/index.rb} +6 -6
- data/lib/picky/{index/types.rb → indexed/indexes.rb} +10 -10
- data/lib/picky/{index → indexed}/wrappers/exact_first.rb +8 -8
- data/lib/picky/indexers/no_source_specified_error.rb +1 -1
- data/lib/picky/indexers/serial.rb +64 -0
- data/lib/picky/indexers/solr.rb +1 -3
- data/lib/picky/indexes_api.rb +41 -0
- data/lib/picky/indexing/bundle.rb +43 -13
- data/lib/picky/indexing/category.rb +17 -64
- data/lib/picky/indexing/{type.rb → index.rb} +13 -3
- data/lib/picky/indexing/{types.rb → indexes.rb} +22 -22
- data/lib/picky/loader.rb +17 -22
- data/lib/picky/query/base.rb +1 -1
- data/lib/picky/rack/harakiri.rb +9 -2
- data/lib/picky/signals.rb +1 -1
- data/lib/picky/sources/base.rb +14 -14
- data/lib/picky/sources/couch.rb +8 -7
- data/lib/picky/sources/csv.rb +10 -10
- data/lib/picky/sources/db.rb +8 -8
- data/lib/picky/sources/delicious.rb +2 -2
- data/lib/picky/sources/wrappers/location.rb +3 -3
- data/lib/picky/tokenizers/base.rb +1 -11
- data/lib/picky/tokenizers/index.rb +0 -1
- data/lib/picky/tokenizers/query.rb +0 -1
- data/lib/tasks/index.rake +4 -4
- data/lib/tasks/shortcuts.rake +4 -4
- data/lib/tasks/try.rake +8 -8
- data/project_prototype/Gemfile +1 -1
- data/project_prototype/app/application.rb +13 -12
- data/spec/lib/application_spec.rb +10 -38
- data/spec/lib/cacher/partial/{subtoken_spec.rb → substring_spec.rb} +0 -0
- data/spec/lib/{character_substitution/european_spec.rb → character_substituters/west_european_spec.rb} +6 -2
- data/spec/lib/configuration/index_spec.rb +80 -0
- data/spec/lib/cores_spec.rb +1 -1
- data/spec/lib/index/file/text_spec.rb +1 -1
- data/spec/lib/index/files_spec.rb +12 -32
- data/spec/lib/indexed/bundle_spec.rb +119 -0
- data/spec/lib/{indexing → indexed}/categories_spec.rb +13 -14
- data/spec/lib/{index → indexed}/category_spec.rb +6 -6
- data/spec/lib/{index/type_spec.rb → indexed/index_spec.rb} +3 -3
- data/spec/lib/{index → indexed}/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/serial_spec.rb +62 -0
- data/spec/lib/indexing/bundle_partial_generation_speed_spec.rb +7 -5
- data/spec/lib/indexing/bundle_spec.rb +9 -14
- data/spec/lib/indexing/category_spec.rb +9 -125
- data/spec/lib/indexing/{type_spec.rb → index_spec.rb} +3 -3
- data/spec/lib/query/base_spec.rb +1 -1
- data/spec/lib/query/full_spec.rb +1 -1
- data/spec/lib/query/live_spec.rb +2 -4
- data/spec/lib/sources/couch_spec.rb +5 -5
- data/spec/lib/sources/db_spec.rb +6 -7
- data/spec/lib/tokenizers/base_spec.rb +1 -24
- data/spec/lib/tokenizers/query_spec.rb +0 -1
- metadata +38 -41
- data/lib/picky/bundle.rb +0 -33
- data/lib/picky/configuration/indexes.rb +0 -51
- data/lib/picky/configuration/queries.rb +0 -15
- data/lib/picky/indexers/base.rb +0 -85
- data/lib/picky/indexers/default.rb +0 -3
- data/lib/picky/type.rb +0 -46
- data/lib/picky/types.rb +0 -41
- data/lib/tasks/cache.rake +0 -46
- data/spec/lib/configuration/indexes_spec.rb +0 -28
- data/spec/lib/index/bundle_spec.rb +0 -151
- data/spec/lib/indexers/base_spec.rb +0 -89
@@ -0,0 +1,61 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexed
|
4
|
+
|
5
|
+
# This is the _actual_ index.
|
6
|
+
#
|
7
|
+
# Handles exact/partial index, weights index, and similarity index.
|
8
|
+
#
|
9
|
+
# Delegates file handling and checking to an *Indexed*::*Files* object.
|
10
|
+
#
|
11
|
+
class Bundle < Index::Bundle
|
12
|
+
|
13
|
+
# Get the ids for the given symbol.
|
14
|
+
#
|
15
|
+
def ids sym
|
16
|
+
@index[sym] || []
|
17
|
+
end
|
18
|
+
# Get a weight for the given symbol.
|
19
|
+
#
|
20
|
+
def weight sym
|
21
|
+
@weights[sym]
|
22
|
+
end
|
23
|
+
|
24
|
+
# Load the data from the db.
|
25
|
+
#
|
26
|
+
def load_from_index_file
|
27
|
+
load_from_index_generation_message
|
28
|
+
clear
|
29
|
+
retrieve
|
30
|
+
end
|
31
|
+
# Notifies the user that the index is being loaded.
|
32
|
+
#
|
33
|
+
def load_from_index_generation_message
|
34
|
+
timed_exclaim "LOAD INDEX #{identifier}."
|
35
|
+
end
|
36
|
+
|
37
|
+
# Loads all indexes.
|
38
|
+
#
|
39
|
+
def load
|
40
|
+
load_index
|
41
|
+
load_similarity
|
42
|
+
load_weights
|
43
|
+
end
|
44
|
+
# Loads the core index.
|
45
|
+
#
|
46
|
+
def load_index
|
47
|
+
self.index = files.load_index
|
48
|
+
end
|
49
|
+
# Loads the weights index.
|
50
|
+
#
|
51
|
+
def load_weights
|
52
|
+
self.weights = files.load_weights
|
53
|
+
end
|
54
|
+
# Loads the similarity index.
|
55
|
+
#
|
56
|
+
def load_similarity
|
57
|
+
self.similarity = files.load_similarity
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
@@ -1,38 +1,41 @@
|
|
1
|
-
module
|
1
|
+
module Indexed
|
2
2
|
|
3
|
-
# An index category holds a exact and a partial index for a given
|
3
|
+
# An index category holds a exact and a partial index for a given category.
|
4
4
|
#
|
5
5
|
# For example an index category for names holds a exact and
|
6
6
|
# a partial index bundle for names.
|
7
7
|
#
|
8
8
|
class Category
|
9
9
|
|
10
|
-
attr_reader :
|
10
|
+
attr_reader :exact, :partial, :identifier, :name
|
11
11
|
|
12
12
|
#
|
13
13
|
#
|
14
|
-
def initialize name,
|
14
|
+
def initialize name, index, options = {}
|
15
15
|
@name = name
|
16
|
-
|
16
|
+
|
17
|
+
configuration = Configuration::Index.new index, self
|
18
|
+
|
19
|
+
@identifier = configuration.identifier
|
17
20
|
|
18
21
|
similarity = options[:similarity] || Cacher::Similarity::Default
|
19
22
|
|
20
|
-
@exact = options[:exact_index_bundle] || Bundle.new(:exact,
|
21
|
-
@partial = options[:partial_index_bundle] || Bundle.new(:partial,
|
23
|
+
@exact = options[:exact_index_bundle] || Bundle.new(:exact, configuration, similarity)
|
24
|
+
@partial = options[:partial_index_bundle] || Bundle.new(:partial, configuration, similarity)
|
22
25
|
|
23
26
|
@exact = exact_lambda.call(@exact, @partial) if exact_lambda = options[:exact_lambda]
|
24
27
|
@partial = partial_lambda.call(@exact, @partial) if partial_lambda = options[:partial_lambda]
|
25
28
|
|
26
29
|
# Extract?
|
27
30
|
#
|
28
|
-
qualifiers = generate_qualifiers_from options
|
29
|
-
Query::Qualifiers.add(
|
31
|
+
qualifiers = generate_qualifiers_from options || [name]
|
32
|
+
Query::Qualifiers.add(configuration.category_name, qualifiers) if qualifiers
|
30
33
|
end
|
31
34
|
|
32
35
|
# TODO Move to Index.
|
33
36
|
#
|
34
37
|
def generate_qualifiers_from options
|
35
|
-
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
38
|
+
options[:qualifiers] || options[:qualifier] && [options[:qualifier]]
|
36
39
|
end
|
37
40
|
|
38
41
|
# Loads the index from cache.
|
@@ -43,12 +46,6 @@ module Index
|
|
43
46
|
partial.load
|
44
47
|
end
|
45
48
|
|
46
|
-
# TODO Move to initializer?
|
47
|
-
#
|
48
|
-
def identifier
|
49
|
-
@identifier ||= "#{type.name} #{name}"
|
50
|
-
end
|
51
|
-
|
52
49
|
# Gets the weight for this token's text.
|
53
50
|
#
|
54
51
|
def weight token
|
@@ -1,10 +1,8 @@
|
|
1
|
-
module
|
1
|
+
module Indexed
|
2
2
|
|
3
|
-
# This class is for multiple types.
|
4
3
|
#
|
5
|
-
# For example, you could have types books, isbn.
|
6
4
|
#
|
7
|
-
class
|
5
|
+
class Index
|
8
6
|
|
9
7
|
attr_reader :name, :result_type, :combinator, :categories
|
10
8
|
|
@@ -22,8 +20,10 @@ module Index
|
|
22
20
|
|
23
21
|
# TODO Spec. Doc.
|
24
22
|
#
|
25
|
-
def add_category
|
26
|
-
|
23
|
+
def add_category category_name, options = {}
|
24
|
+
new_category = Category.new category_name, self, options
|
25
|
+
categories << new_category
|
26
|
+
new_category
|
27
27
|
end
|
28
28
|
|
29
29
|
#
|
@@ -1,11 +1,11 @@
|
|
1
|
-
module
|
1
|
+
module Indexed
|
2
2
|
|
3
|
-
class
|
3
|
+
class Indexes
|
4
4
|
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :indexes, :index_mapping
|
6
6
|
|
7
7
|
each_delegate :load_from_cache,
|
8
|
-
:to => :
|
8
|
+
:to => :indexes
|
9
9
|
|
10
10
|
def initialize
|
11
11
|
clear
|
@@ -14,8 +14,8 @@ module Index
|
|
14
14
|
# TODO Spec.
|
15
15
|
#
|
16
16
|
def clear
|
17
|
-
@
|
18
|
-
@
|
17
|
+
@indexes = []
|
18
|
+
@index_mapping = {}
|
19
19
|
end
|
20
20
|
|
21
21
|
# TODO Spec.
|
@@ -26,14 +26,14 @@ module Index
|
|
26
26
|
|
27
27
|
# TODO Spec
|
28
28
|
#
|
29
|
-
def register
|
30
|
-
self.
|
31
|
-
self.
|
29
|
+
def register index
|
30
|
+
self.indexes << index
|
31
|
+
self.index_mapping[index.name] = index
|
32
32
|
end
|
33
33
|
def [] name
|
34
34
|
name = name.to_sym
|
35
35
|
|
36
|
-
|
36
|
+
index_mapping[name]
|
37
37
|
end
|
38
38
|
|
39
39
|
end
|
@@ -1,13 +1,13 @@
|
|
1
1
|
# encoding: utf-8
|
2
2
|
#
|
3
|
-
module
|
3
|
+
module Indexed
|
4
4
|
|
5
5
|
# FIXME and spec
|
6
6
|
#
|
7
7
|
module Wrappers
|
8
8
|
|
9
9
|
# This index combines an exact and partial index.
|
10
|
-
# It serves to order the results such that exact
|
10
|
+
# It serves to order the results such that exact hits are found first.
|
11
11
|
#
|
12
12
|
# TODO Need to use the right subtokens. Bake in?
|
13
13
|
#
|
@@ -17,7 +17,7 @@ module Index
|
|
17
17
|
:identifier,
|
18
18
|
:name,
|
19
19
|
:to => :@exact
|
20
|
-
delegate :
|
20
|
+
delegate :index,
|
21
21
|
:category,
|
22
22
|
:weight,
|
23
23
|
:generate_partial_from,
|
@@ -32,12 +32,12 @@ module Index
|
|
32
32
|
@partial = category.partial
|
33
33
|
end
|
34
34
|
|
35
|
-
def self.wrap
|
36
|
-
if
|
37
|
-
wrap_each_of
|
38
|
-
|
35
|
+
def self.wrap index_or_category
|
36
|
+
if index_or_category.respond_to? :categories
|
37
|
+
wrap_each_of index_or_category.categories
|
38
|
+
index_or_category
|
39
39
|
else
|
40
|
-
new
|
40
|
+
new index_or_category
|
41
41
|
end
|
42
42
|
end
|
43
43
|
# TODO Do not extract categories!
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
module Indexers
|
4
|
+
|
5
|
+
# The indexer defines the control flow.
|
6
|
+
#
|
7
|
+
class Serial
|
8
|
+
|
9
|
+
attr_accessor :tokenizer, :source
|
10
|
+
|
11
|
+
def initialize configuration, source, tokenizer
|
12
|
+
@configuration = configuration
|
13
|
+
@source = source || raise_no_source
|
14
|
+
@tokenizer = tokenizer
|
15
|
+
end
|
16
|
+
|
17
|
+
# Raise a no source exception.
|
18
|
+
#
|
19
|
+
def raise_no_source
|
20
|
+
raise NoSourceSpecifiedException.new("No source given for #{@configuration.identifier}.")
|
21
|
+
end
|
22
|
+
|
23
|
+
# Selects the original id (indexed id) and a column to process. The column data is called "token".
|
24
|
+
#
|
25
|
+
# Note: Puts together the parts first in an array, then releasing the array from time to time by joining.
|
26
|
+
#
|
27
|
+
def index
|
28
|
+
indexing_message
|
29
|
+
process
|
30
|
+
end
|
31
|
+
def process
|
32
|
+
comma = ?,
|
33
|
+
newline = ?\n
|
34
|
+
|
35
|
+
# TODO Move open to config?
|
36
|
+
#
|
37
|
+
# @category.prepared_index do |file|
|
38
|
+
# source.harvest(@index, @category) do |indexed_id, text|
|
39
|
+
# tokenizer.tokenize(text).each do |token_text|
|
40
|
+
# next unless token_text
|
41
|
+
# file.buffer indexed_id << comma << token_text << newline
|
42
|
+
# end
|
43
|
+
# file.write_maybe
|
44
|
+
# end
|
45
|
+
# end
|
46
|
+
#
|
47
|
+
@configuration.prepared_index_file do |file|
|
48
|
+
result = []
|
49
|
+
source.harvest(@configuration.index, @configuration.category) do |indexed_id, text|
|
50
|
+
tokenizer.tokenize(text).each do |token_text|
|
51
|
+
next unless token_text
|
52
|
+
result << indexed_id << comma << token_text << newline
|
53
|
+
end
|
54
|
+
file.write(result.join) && result.clear if result.size > 100_000
|
55
|
+
end
|
56
|
+
file.write result.join
|
57
|
+
end
|
58
|
+
end
|
59
|
+
def indexing_message
|
60
|
+
timed_exclaim "INDEX #{@configuration.identifier}"
|
61
|
+
end
|
62
|
+
|
63
|
+
end
|
64
|
+
end
|
data/lib/picky/indexers/solr.rb
CHANGED
@@ -0,0 +1,41 @@
|
|
1
|
+
# Comfortable API convenience class, splits methods to indexes.
|
2
|
+
#
|
3
|
+
class IndexesAPI
|
4
|
+
|
5
|
+
attr_reader :indexes, :index_mapping
|
6
|
+
|
7
|
+
delegate :reload,
|
8
|
+
:load_from_cache,
|
9
|
+
:to => :@indexed
|
10
|
+
|
11
|
+
delegate :check_caches,
|
12
|
+
:find,
|
13
|
+
:generate_cache_only,
|
14
|
+
:generate_index_only,
|
15
|
+
:index,
|
16
|
+
:index_for_tests,
|
17
|
+
:to => :@indexing
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@indexes = []
|
21
|
+
@index_mapping = {}
|
22
|
+
|
23
|
+
@indexed = Indexed::Indexes.new
|
24
|
+
@indexing = Indexing::Indexes.new
|
25
|
+
end
|
26
|
+
|
27
|
+
def register index
|
28
|
+
self.indexes << index
|
29
|
+
self.index_mapping[index.name] = index
|
30
|
+
|
31
|
+
@indexing.register index.indexing
|
32
|
+
@indexed.register index.indexed # TODO Even necessary?
|
33
|
+
end
|
34
|
+
|
35
|
+
def [] name
|
36
|
+
name = name.to_sym
|
37
|
+
|
38
|
+
self.index_mapping[name]
|
39
|
+
end
|
40
|
+
|
41
|
+
end
|
@@ -6,17 +6,15 @@ module Indexing
|
|
6
6
|
# It does all menial tasks that have nothing to do
|
7
7
|
# with the actual index running etc.
|
8
8
|
#
|
9
|
-
|
10
|
-
#
|
11
|
-
class Bundle < ::Bundle
|
9
|
+
class Bundle < Index::Bundle
|
12
10
|
|
13
11
|
attr_accessor :partial_strategy, :weights_strategy
|
14
12
|
attr_reader :files
|
15
13
|
|
16
14
|
# Path is in which directory the cache is located.
|
17
15
|
#
|
18
|
-
def initialize name,
|
19
|
-
super name,
|
16
|
+
def initialize name, configuration, similarity_strategy, partial_strategy, weights_strategy
|
17
|
+
super name, configuration, similarity_strategy
|
20
18
|
|
21
19
|
@partial_strategy = partial_strategy
|
22
20
|
@weights_strategy = weights_strategy
|
@@ -63,7 +61,11 @@ module Indexing
|
|
63
61
|
def load_from_index_generation_message
|
64
62
|
timed_exclaim "LOAD INDEX #{identifier}."
|
65
63
|
end
|
66
|
-
# Retrieves the data into the index.
|
64
|
+
# Retrieves the prepared index data into the index.
|
65
|
+
#
|
66
|
+
# This is in preparation for generating
|
67
|
+
# derived indexes (like weights, similarity)
|
68
|
+
# and later dumping the optimized index.
|
67
69
|
#
|
68
70
|
def retrieve
|
69
71
|
files.retrieve do |id, token|
|
@@ -71,6 +73,8 @@ module Indexing
|
|
71
73
|
index[token] << id
|
72
74
|
end
|
73
75
|
end
|
76
|
+
# Sets up a piece of the index for the given token.
|
77
|
+
#
|
74
78
|
def initialize_index_for token
|
75
79
|
index[token] ||= []
|
76
80
|
end
|
@@ -81,12 +85,14 @@ module Indexing
|
|
81
85
|
#
|
82
86
|
|
83
87
|
# Generates a new index (writes its index) using the
|
84
|
-
#
|
88
|
+
# partial caching strategy of this bundle.
|
85
89
|
#
|
86
90
|
def generate_partial
|
87
91
|
generator = Cacher::PartialGenerator.new self.index
|
88
92
|
self.index = generator.generate self.partial_strategy
|
89
93
|
end
|
94
|
+
# Generate a partial index from the given exact index.
|
95
|
+
#
|
90
96
|
def generate_partial_from exact_index
|
91
97
|
timed_exclaim "PARTIAL GENERATE #{identifier}."
|
92
98
|
self.index = exact_index
|
@@ -108,25 +114,31 @@ module Indexing
|
|
108
114
|
self.weights = generator.generate self.weights_strategy
|
109
115
|
end
|
110
116
|
|
111
|
-
# Saves the
|
117
|
+
# Saves the indexes in a dump file.
|
112
118
|
#
|
113
119
|
def dump
|
114
120
|
dump_index
|
115
121
|
dump_similarity
|
116
122
|
dump_weights
|
117
123
|
end
|
124
|
+
# Dumps the core index.
|
125
|
+
#
|
118
126
|
def dump_index
|
119
127
|
timed_exclaim "DUMP INDEX #{identifier}."
|
120
128
|
files.dump_index index
|
121
129
|
end
|
122
|
-
|
123
|
-
|
124
|
-
files.dump_similarity similarity
|
125
|
-
end
|
130
|
+
# Dumps the weights index.
|
131
|
+
#
|
126
132
|
def dump_weights
|
127
133
|
timed_exclaim "DUMP WEIGHTS #{identifier}."
|
128
134
|
files.dump_weights weights
|
129
135
|
end
|
136
|
+
# Dumps the similarity index.
|
137
|
+
#
|
138
|
+
def dump_similarity
|
139
|
+
timed_exclaim "DUMP SIMILARITY #{identifier}."
|
140
|
+
files.dump_similarity similarity
|
141
|
+
end
|
130
142
|
|
131
143
|
# Alerts the user if an index is missing.
|
132
144
|
#
|
@@ -134,38 +146,56 @@ module Indexing
|
|
134
146
|
raise_unless_index_exists
|
135
147
|
raise_unless_similarity_exists
|
136
148
|
end
|
149
|
+
# Alerts the user if one of the necessary indexes
|
150
|
+
# (core, weights) is missing.
|
151
|
+
#
|
137
152
|
def raise_unless_index_exists
|
138
153
|
if partial_strategy.saved?
|
139
154
|
warn_if_index_small
|
140
155
|
raise_unless_index_ok
|
141
156
|
end
|
142
157
|
end
|
158
|
+
# Alerts the user if the similarity
|
159
|
+
# index is missing (given that it's used).
|
160
|
+
#
|
143
161
|
def raise_unless_similarity_exists
|
144
162
|
if similarity_strategy.saved?
|
145
163
|
warn_if_similarity_small
|
146
164
|
raise_unless_similarity_ok
|
147
165
|
end
|
148
166
|
end
|
167
|
+
# Warns the user if the similarity index is small.
|
168
|
+
#
|
149
169
|
def warn_if_similarity_small
|
150
170
|
warn_cache_small :similarity if files.similarity_cache_small?
|
151
171
|
end
|
172
|
+
# Alerts the user if the similarity index is not there.
|
173
|
+
#
|
152
174
|
def raise_unless_similarity_ok
|
153
175
|
raise_cache_missing :similarity unless files.similarity_cache_ok?
|
154
176
|
end
|
177
|
+
|
155
178
|
# TODO Spec on down.
|
156
179
|
#
|
180
|
+
|
181
|
+
# Warns the user if the core or weights indexes are small.
|
182
|
+
#
|
157
183
|
def warn_if_index_small
|
158
184
|
warn_cache_small :index if files.index_cache_small?
|
159
185
|
warn_cache_small :weights if files.weights_cache_small?
|
160
186
|
end
|
187
|
+
# Alerts the user if the core or weights indexes are not there.
|
188
|
+
#
|
161
189
|
def raise_unless_index_ok
|
162
190
|
raise_cache_missing :index unless files.index_cache_ok?
|
163
191
|
raise_cache_missing :weights unless files.weights_cache_ok?
|
164
192
|
end
|
193
|
+
# Outputs a warning for the given cache.
|
194
|
+
#
|
165
195
|
def warn_cache_small what
|
166
196
|
puts "Warning: #{what} cache for #{identifier} smaller than 16 bytes."
|
167
197
|
end
|
168
|
-
# Raises an appropriate error message.
|
198
|
+
# Raises an appropriate error message for the given cache.
|
169
199
|
#
|
170
200
|
def raise_cache_missing what
|
171
201
|
raise "#{what} cache for #{identifier} missing."
|
@@ -2,20 +2,17 @@ module Indexing
|
|
2
2
|
|
3
3
|
class Category
|
4
4
|
|
5
|
-
attr_reader :
|
5
|
+
attr_reader :exact, :partial, :name, :configuration, :indexer
|
6
6
|
|
7
|
-
|
8
|
-
#
|
9
|
-
def initialize name, type, options = {}
|
7
|
+
def initialize name, index, options = {}
|
10
8
|
@name = name
|
11
|
-
@type = type
|
12
9
|
|
13
|
-
|
10
|
+
# Now we have enough info to combine the index and the category.
|
11
|
+
#
|
12
|
+
@configuration = Configuration::Index.new index, self
|
14
13
|
|
15
|
-
@tokenizer
|
16
|
-
@
|
17
|
-
@indexed_as = options[:as] || name
|
18
|
-
@virtual = options[:virtual] || false # TODO What is this again?
|
14
|
+
@tokenizer = options[:tokenizer] || Tokenizers::Index.default
|
15
|
+
@indexer = Indexers::Serial.new configuration, options[:source], @tokenizer #, :as => options[:as] # TODO option as.
|
19
16
|
|
20
17
|
# TODO Push into Bundle.
|
21
18
|
#
|
@@ -23,28 +20,12 @@ module Indexing
|
|
23
20
|
weights = options[:weights] || Cacher::Weights::Default
|
24
21
|
similarity = options[:similarity] || Cacher::Similarity::Default
|
25
22
|
|
26
|
-
@exact = options[:exact_indexing_bundle] || Bundle.new(:exact,
|
27
|
-
@partial = options[:partial_indexing_bundle] || Bundle.new(:partial,
|
28
|
-
|
29
|
-
# TODO Move to Query.
|
30
|
-
#
|
31
|
-
# @remove = options[:remove] || false
|
32
|
-
# @filter = options[:filter] || true
|
33
|
-
|
34
|
-
@options = options # TODO Remove?
|
23
|
+
@exact = options[:exact_indexing_bundle] || Bundle.new(:exact, configuration, similarity, Cacher::Partial::None.new, weights)
|
24
|
+
@partial = options[:partial_indexing_bundle] || Bundle.new(:partial, configuration, Cacher::Similarity::None.new, partial, weights)
|
35
25
|
end
|
36
26
|
|
37
|
-
|
38
|
-
|
39
|
-
def identifier
|
40
|
-
@identifier ||= "#{type.name} #{name}"
|
41
|
-
end
|
42
|
-
|
43
|
-
# Note: Most of the time the source of the type is used.
|
44
|
-
#
|
45
|
-
def source
|
46
|
-
@source || type.source
|
47
|
-
end
|
27
|
+
delegate :identifier, :prepare_index_directory, :to => :configuration
|
28
|
+
delegate :source, :source=, :tokenizer, :tokenizer=, :to => :indexer
|
48
29
|
|
49
30
|
# TODO Spec.
|
50
31
|
#
|
@@ -68,16 +49,16 @@ module Indexing
|
|
68
49
|
exact.delete
|
69
50
|
partial.delete
|
70
51
|
end
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
52
|
+
|
53
|
+
def index
|
54
|
+
prepare_index_directory
|
55
|
+
indexer.index
|
56
|
+
end
|
76
57
|
|
77
58
|
# Generates all caches for this category.
|
78
59
|
#
|
79
60
|
def cache
|
80
|
-
|
61
|
+
prepare_index_directory
|
81
62
|
generate_caches
|
82
63
|
end
|
83
64
|
def generate_caches
|
@@ -101,34 +82,6 @@ module Indexing
|
|
101
82
|
partial.dump
|
102
83
|
end
|
103
84
|
|
104
|
-
# TODO Partially move to type. Duplicate Code in indexers/field.rb.
|
105
|
-
#
|
106
|
-
# TODO Use the Files object.
|
107
|
-
#
|
108
|
-
def search_index_root
|
109
|
-
File.join PICKY_ROOT, 'index'
|
110
|
-
end
|
111
|
-
def cache_directory
|
112
|
-
File.join search_index_root, PICKY_ENVIRONMENT, type.name.to_s
|
113
|
-
end
|
114
|
-
def search_index_file_name
|
115
|
-
File.join cache_directory, "prepared_#{name}_index.txt"
|
116
|
-
end
|
117
|
-
def index
|
118
|
-
prepare_cache_directory
|
119
|
-
# files.create_directory # TODO Make this possible!
|
120
|
-
indexer.index
|
121
|
-
end
|
122
|
-
def prepare_cache_directory
|
123
|
-
FileUtils.mkdir_p cache_directory
|
124
|
-
end
|
125
|
-
def indexer
|
126
|
-
@indexer || @indexer = @indexer_class.new(type, self)
|
127
|
-
end
|
128
|
-
def virtual?
|
129
|
-
!!virtual
|
130
|
-
end
|
131
|
-
|
132
85
|
end
|
133
86
|
|
134
87
|
end
|
@@ -1,6 +1,6 @@
|
|
1
1
|
module Indexing
|
2
2
|
|
3
|
-
class
|
3
|
+
class Index
|
4
4
|
|
5
5
|
attr_reader :name, :source, :categories, :after_indexing
|
6
6
|
|
@@ -30,8 +30,18 @@ module Indexing
|
|
30
30
|
|
31
31
|
# TODO Spec. Doc.
|
32
32
|
#
|
33
|
-
def add_category
|
34
|
-
|
33
|
+
def add_category category_name, options = {}
|
34
|
+
options = default_category_options.merge options
|
35
|
+
|
36
|
+
new_category = Category.new category_name, self, options
|
37
|
+
categories << new_category
|
38
|
+
new_category
|
39
|
+
end
|
40
|
+
|
41
|
+
# By default, the category uses the index's source.
|
42
|
+
#
|
43
|
+
def default_category_options
|
44
|
+
{ :source => @source }
|
35
45
|
end
|
36
46
|
|
37
47
|
# Indexing.
|