picky 2.6.0 → 2.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/analyzer.rb +4 -4
- data/lib/picky/application.rb +6 -7
- data/lib/picky/backend/{backend.rb → base.rb} +31 -14
- data/lib/picky/backend/file/basic.rb +12 -4
- data/lib/picky/backend/file/json.rb +5 -5
- data/lib/picky/backend/file/text.rb +1 -1
- data/lib/picky/backend/files.rb +3 -9
- data/lib/picky/backend/redis/basic.rb +8 -0
- data/lib/picky/backend/redis/list_hash.rb +5 -5
- data/lib/picky/backend/redis/string_hash.rb +5 -5
- data/lib/picky/backend/redis.rb +5 -5
- data/lib/picky/bundle.rb +62 -0
- data/lib/picky/categories.rb +10 -9
- data/lib/picky/categories_indexed.rb +12 -7
- data/lib/picky/categories_indexing.rb +7 -9
- data/lib/picky/category.rb +38 -26
- data/lib/picky/category_indexed.rb +4 -20
- data/lib/picky/category_indexing.rb +71 -68
- data/lib/picky/generators/base.rb +6 -6
- data/lib/picky/generators/partial/substring.rb +28 -26
- data/lib/picky/generators/partial_generator.rb +3 -3
- data/lib/picky/generators/similarity/phonetic.rb +5 -5
- data/lib/picky/generators/similarity_generator.rb +2 -2
- data/lib/picky/generators/weights/logarithmic.rb +3 -3
- data/lib/picky/generators/weights_generator.rb +2 -2
- data/lib/picky/index/base.rb +13 -10
- data/lib/picky/index/base_indexed.rb +2 -0
- data/lib/picky/index/base_indexing.rb +65 -57
- data/lib/picky/indexed/bundle/base.rb +21 -86
- data/lib/picky/indexed/bundle/memory.rb +5 -12
- data/lib/picky/indexed/bundle/redis.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +3 -3
- data/lib/picky/indexers/base.rb +20 -3
- data/lib/picky/indexers/parallel.rb +32 -14
- data/lib/picky/indexers/serial.rb +29 -26
- data/lib/picky/indexes.rb +5 -3
- data/lib/picky/indexes_indexed.rb +3 -15
- data/lib/picky/indexes_indexing.rb +18 -21
- data/lib/picky/indexing/bundle/base.rb +64 -45
- data/lib/picky/indexing/bundle/memory.rb +0 -4
- data/lib/picky/loader.rb +7 -6
- data/lib/picky/query/allocation.rb +3 -3
- data/lib/picky/query/token.rb +5 -1
- data/lib/picky/search.rb +5 -0
- data/lib/picky/sources/base.rb +21 -2
- data/lib/picky/sources/db.rb +0 -7
- data/lib/picky/statistics.rb +9 -12
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/tasks/checks.rake +8 -6
- data/lib/tasks/index.rake +14 -20
- data/lib/tasks/server.rake +18 -2
- data/lib/tasks/statistics.rake +27 -14
- data/lib/tasks/todo.rake +2 -2
- data/lib/tasks/try.rake +12 -27
- data/spec/lib/application_spec.rb +1 -1
- data/spec/lib/backend/file/basic_spec.rb +6 -6
- data/spec/lib/backend/file/json_spec.rb +11 -6
- data/spec/lib/backend/file/marshal_spec.rb +11 -6
- data/spec/lib/backend/files_spec.rb +21 -7
- data/spec/lib/backend/redis/basic_spec.rb +6 -0
- data/spec/lib/backend/redis/list_hash_spec.rb +9 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +7 -1
- data/spec/lib/backend/redis_spec.rb +22 -12
- data/spec/lib/categories_indexed_spec.rb +2 -2
- data/spec/lib/category_indexing_spec.rb +12 -33
- data/spec/lib/category_spec.rb +22 -0
- data/spec/lib/index/base_indexing_spec.rb +30 -0
- data/spec/lib/indexed/bundle/memory_spec.rb +13 -20
- data/spec/lib/indexers/base_spec.rb +39 -4
- data/spec/lib/indexers/parallel_spec.rb +2 -10
- data/spec/lib/indexers/serial_spec.rb +11 -26
- data/spec/lib/indexes_class_spec.rb +4 -4
- data/spec/lib/indexes_indexed_spec.rb +2 -2
- data/spec/lib/indexes_indexing_spec.rb +6 -10
- data/spec/lib/indexes_spec.rb +3 -3
- data/spec/lib/indexing/bundle/{super_base_spec.rb → base_spec.rb} +2 -2
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +3 -3
- data/spec/lib/indexing/bundle/memory_spec.rb +16 -14
- data/spec/lib/indexing/bundle/redis_spec.rb +18 -16
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/token_spec.rb +5 -7
- data/spec/lib/sources/base_spec.rb +53 -0
- data/spec/lib/sources/db_spec.rb +0 -7
- metadata +11 -12
- data/lib/picky/indexers/solr.rb +0 -56
- data/lib/picky/indexing/bundle/super_base.rb +0 -61
- data/lib/picky/solr/schema_generator.rb +0 -74
- data/lib/tasks/search.rake +0 -9
- data/lib/tasks/shortcuts.rake +0 -32
- data/lib/tasks/solr.rake +0 -36
@@ -2,30 +2,23 @@
|
|
2
2
|
#
|
3
3
|
class Indexes
|
4
4
|
|
5
|
-
instance_delegate :
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
:
|
17
|
-
:backup_caches,
|
18
|
-
:restore_caches,
|
19
|
-
:check_caches,
|
20
|
-
:clear_caches,
|
21
|
-
:create_directory_structure,
|
5
|
+
instance_delegate :index,
|
6
|
+
:check,
|
7
|
+
:clear,
|
8
|
+
:backup,
|
9
|
+
:restore,
|
10
|
+
:index_for_tests,
|
11
|
+
:tokenizer
|
12
|
+
|
13
|
+
each_delegate :check,
|
14
|
+
:clear,
|
15
|
+
:backup,
|
16
|
+
:restore,
|
22
17
|
:to => :indexes
|
23
18
|
|
24
19
|
# Runs the indexers in parallel (prepare + cache).
|
25
20
|
#
|
26
21
|
def index randomly = true
|
27
|
-
take_snapshot
|
28
|
-
|
29
22
|
# Run in parallel.
|
30
23
|
#
|
31
24
|
timed_exclaim "Indexing using #{Cores.max_processors} processors, in #{randomly ? 'random' : 'given'} order."
|
@@ -41,9 +34,13 @@ class Indexes
|
|
41
34
|
# without forking and shouting ;)
|
42
35
|
#
|
43
36
|
def index_for_tests
|
44
|
-
take_snapshot
|
45
|
-
|
46
37
|
indexes.each(&:index)
|
47
38
|
end
|
48
39
|
|
40
|
+
# TODO Doc. Spec. Split into Query/Index.
|
41
|
+
#
|
42
|
+
def tokenizer
|
43
|
+
Tokenizers::Index.default
|
44
|
+
end
|
45
|
+
|
49
46
|
end
|
@@ -1,30 +1,48 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
1
|
module Indexing # :nodoc:all
|
4
2
|
|
3
|
+
# A Bundle is a number of indexes
|
4
|
+
# per [index, category] combination.
|
5
|
+
#
|
6
|
+
# At most, there are three indexes:
|
7
|
+
# * *core* index (always used)
|
8
|
+
# * *weights* index (always used)
|
9
|
+
# * *similarity* index (used with similarity)
|
10
|
+
#
|
11
|
+
# In Picky, indexing is separated from the index
|
12
|
+
# handling itself through a parallel structure.
|
13
|
+
#
|
14
|
+
# Both use methods provided by this base class, but
|
15
|
+
# have very different goals:
|
16
|
+
#
|
17
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
18
|
+
# and providing helper functions to e.g. check the indexes.
|
19
|
+
#
|
20
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
21
|
+
# memory and looking up search data as fast as possible.
|
22
|
+
#
|
5
23
|
module Bundle
|
6
24
|
|
7
25
|
# This is the indexing bundle.
|
26
|
+
#
|
8
27
|
# It does all menial tasks that have nothing to do
|
9
28
|
# with the actual index running etc.
|
10
29
|
#
|
11
|
-
class Base <
|
30
|
+
class Base < ::Bundle
|
12
31
|
|
13
|
-
attr_accessor :partial_strategy,
|
32
|
+
attr_accessor :partial_strategy,
|
33
|
+
:weights_strategy
|
14
34
|
|
15
|
-
|
16
|
-
#
|
17
|
-
def initialize name, category, similarity_strategy, partial_strategy, weights_strategy
|
35
|
+
def initialize name, category, weights_strategy, partial_strategy, similarity_strategy
|
18
36
|
super name, category, similarity_strategy
|
19
37
|
|
20
|
-
@
|
21
|
-
@
|
38
|
+
@weights_strategy = weights_strategy
|
39
|
+
@partial_strategy = partial_strategy
|
22
40
|
end
|
23
41
|
|
24
42
|
# Sets up a piece of the index for the given token.
|
25
43
|
#
|
26
|
-
def
|
27
|
-
|
44
|
+
def initialize_inverted_index_for token
|
45
|
+
self.inverted[token] ||= []
|
28
46
|
end
|
29
47
|
|
30
48
|
# Generation
|
@@ -36,7 +54,7 @@ module Indexing # :nodoc:all
|
|
36
54
|
# * Dumps all the indexes into files.
|
37
55
|
#
|
38
56
|
def generate_caches_from_source
|
39
|
-
|
57
|
+
load_from_prepared_index_file
|
40
58
|
generate_caches_from_memory
|
41
59
|
end
|
42
60
|
# Generates derived indexes from the index and dumps.
|
@@ -60,13 +78,13 @@ module Indexing # :nodoc:all
|
|
60
78
|
|
61
79
|
# Load the data from the db.
|
62
80
|
#
|
63
|
-
def
|
64
|
-
|
81
|
+
def load_from_prepared_index_file
|
82
|
+
load_from_prepared_index_generation_message
|
65
83
|
clear
|
66
84
|
retrieve
|
67
85
|
end
|
68
|
-
def
|
69
|
-
timed_exclaim %Q{"#{identifier}": Loading
|
86
|
+
def load_from_prepared_index_generation_message
|
87
|
+
timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
|
70
88
|
end
|
71
89
|
# Retrieves the prepared index data into the index.
|
72
90
|
#
|
@@ -77,8 +95,8 @@ module Indexing # :nodoc:all
|
|
77
95
|
def retrieve
|
78
96
|
key_format = self[:key_format] || :to_i
|
79
97
|
files.retrieve do |id, token|
|
80
|
-
|
81
|
-
|
98
|
+
initialize_inverted_index_for token
|
99
|
+
self.inverted[token] << id.send(key_format)
|
82
100
|
end
|
83
101
|
end
|
84
102
|
|
@@ -86,63 +104,64 @@ module Indexing # :nodoc:all
|
|
86
104
|
# partial caching strategy of this bundle.
|
87
105
|
#
|
88
106
|
def generate_partial
|
89
|
-
generator = Generators::PartialGenerator.new self.
|
90
|
-
self.
|
107
|
+
generator = Generators::PartialGenerator.new self.inverted
|
108
|
+
self.inverted = generator.generate self.partial_strategy
|
91
109
|
end
|
92
|
-
# Generate a partial index from the given exact index.
|
110
|
+
# Generate a partial index from the given exact inverted index.
|
93
111
|
#
|
94
|
-
def generate_partial_from
|
112
|
+
def generate_partial_from exact_inverted_index
|
95
113
|
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
96
|
-
self.
|
114
|
+
self.inverted = exact_inverted_index
|
97
115
|
self.generate_partial
|
98
116
|
self
|
99
117
|
end
|
100
|
-
# Generates a new similarity index (writes its index) using the
|
101
|
-
# given similarity caching strategy.
|
102
|
-
#
|
103
|
-
def generate_similarity
|
104
|
-
generator = Generators::SimilarityGenerator.new self.index
|
105
|
-
self.similarity = generator.generate self.similarity_strategy
|
106
|
-
end
|
107
118
|
# Generates a new weights index (writes its index) using the
|
108
119
|
# given weight caching strategy.
|
109
120
|
#
|
110
121
|
def generate_weights
|
111
|
-
generator = Generators::WeightsGenerator.new self.
|
122
|
+
generator = Generators::WeightsGenerator.new self.inverted
|
112
123
|
self.weights = generator.generate self.weights_strategy
|
113
124
|
end
|
125
|
+
# Generates a new similarity index (writes its index) using the
|
126
|
+
# given similarity caching strategy.
|
127
|
+
#
|
128
|
+
def generate_similarity
|
129
|
+
generator = Generators::SimilarityGenerator.new self.inverted
|
130
|
+
self.similarity = generator.generate self.similarity_strategy
|
131
|
+
end
|
114
132
|
|
115
133
|
# Saves the indexes in a dump file.
|
116
134
|
#
|
117
135
|
def dump
|
118
|
-
|
136
|
+
timed_exclaim %Q{"#{identifier}": Dumping data.}
|
137
|
+
dump_inverted
|
119
138
|
dump_similarity
|
120
139
|
dump_weights
|
121
140
|
dump_configuration
|
122
141
|
end
|
123
142
|
# Dumps the core index.
|
124
143
|
#
|
125
|
-
def
|
126
|
-
timed_exclaim %Q{"#{identifier}": Dumping index.}
|
127
|
-
backend.
|
144
|
+
def dump_inverted
|
145
|
+
# timed_exclaim %Q{"#{identifier}": Dumping inverted index.}
|
146
|
+
backend.dump_inverted self.inverted
|
128
147
|
end
|
129
148
|
# Dumps the weights index.
|
130
149
|
#
|
131
150
|
def dump_weights
|
132
|
-
timed_exclaim %Q{"#{identifier}": Dumping weights
|
133
|
-
backend.dump_weights weights
|
151
|
+
# timed_exclaim %Q{"#{identifier}": Dumping index weights.}
|
152
|
+
backend.dump_weights self.weights
|
134
153
|
end
|
135
154
|
# Dumps the similarity index.
|
136
155
|
#
|
137
156
|
def dump_similarity
|
138
|
-
timed_exclaim %Q{"#{identifier}": Dumping similarity
|
139
|
-
backend.dump_similarity similarity
|
157
|
+
# timed_exclaim %Q{"#{identifier}": Dumping similarity index.}
|
158
|
+
backend.dump_similarity self.similarity
|
140
159
|
end
|
141
160
|
# Dumps the similarity index.
|
142
161
|
#
|
143
162
|
def dump_configuration
|
144
|
-
timed_exclaim %Q{"#{identifier}": Dumping configuration
|
145
|
-
backend.dump_configuration configuration
|
163
|
+
# timed_exclaim %Q{"#{identifier}": Dumping configuration.}
|
164
|
+
backend.dump_configuration self.configuration
|
146
165
|
end
|
147
166
|
|
148
167
|
# Alerts the user if an index is missing.
|
@@ -195,14 +214,14 @@ module Indexing # :nodoc:all
|
|
195
214
|
# Warns the user if the core or weights indexes are small.
|
196
215
|
#
|
197
216
|
def warn_if_index_small
|
198
|
-
warn_cache_small :
|
199
|
-
warn_cache_small :weights
|
217
|
+
warn_cache_small :inverted if backend.inverted_cache_small?
|
218
|
+
warn_cache_small :weights if backend.weights_cache_small?
|
200
219
|
end
|
201
220
|
# Alerts the user if the core or weights indexes are not there.
|
202
221
|
#
|
203
222
|
def raise_unless_index_ok
|
204
|
-
raise_cache_missing :
|
205
|
-
raise_cache_missing :weights
|
223
|
+
raise_cache_missing :inverted unless backend.inverted_cache_ok?
|
224
|
+
raise_cache_missing :weights unless backend.weights_cache_ok?
|
206
225
|
end
|
207
226
|
|
208
227
|
end
|
data/lib/picky/loader.rb
CHANGED
@@ -145,7 +145,7 @@ module Loader # :nodoc:all
|
|
145
145
|
|
146
146
|
# Index store handling.
|
147
147
|
#
|
148
|
-
load_relative 'backend/
|
148
|
+
load_relative 'backend/base'
|
149
149
|
|
150
150
|
load_relative 'backend/redis'
|
151
151
|
load_relative 'backend/redis/basic'
|
@@ -158,10 +158,11 @@ module Loader # :nodoc:all
|
|
158
158
|
load_relative 'backend/file/json'
|
159
159
|
|
160
160
|
load_relative 'backend/files'
|
161
|
-
|
161
|
+
|
162
162
|
# Indexing and Indexed things.
|
163
163
|
#
|
164
|
-
load_relative '
|
164
|
+
load_relative 'bundle'
|
165
|
+
|
165
166
|
load_relative 'indexing/bundle/base'
|
166
167
|
load_relative 'indexing/bundle/memory'
|
167
168
|
load_relative 'indexing/bundle/redis'
|
@@ -260,17 +261,17 @@ module Loader # :nodoc:all
|
|
260
261
|
load_relative 'category'
|
261
262
|
load_relative 'category_indexed'
|
262
263
|
load_relative 'category_indexing'
|
263
|
-
|
264
|
+
|
264
265
|
load_relative 'categories'
|
265
266
|
load_relative 'categories_indexed'
|
266
267
|
load_relative 'categories_indexing'
|
267
|
-
|
268
|
+
|
268
269
|
load_relative 'index/base'
|
269
270
|
load_relative 'index/base_indexed'
|
270
271
|
load_relative 'index/base_indexing'
|
271
272
|
load_relative 'index/memory'
|
272
273
|
load_relative 'index/redis'
|
273
|
-
|
274
|
+
|
274
275
|
load_relative 'indexes'
|
275
276
|
load_relative 'indexes_indexed'
|
276
277
|
load_relative 'indexes_indexing'
|
@@ -6,7 +6,7 @@ module Query
|
|
6
6
|
class Allocation # :nodoc:all
|
7
7
|
|
8
8
|
attr_reader :count, :ids, :score, :combinations, :result_identifier
|
9
|
-
|
9
|
+
|
10
10
|
#
|
11
11
|
#
|
12
12
|
def initialize combinations, result_identifier
|
@@ -76,9 +76,9 @@ module Query
|
|
76
76
|
#
|
77
77
|
#
|
78
78
|
def to_s
|
79
|
-
"Allocation
|
79
|
+
"Allocation(#{to_result})"
|
80
80
|
end
|
81
81
|
|
82
82
|
end
|
83
|
-
|
83
|
+
|
84
84
|
end
|
data/lib/picky/query/token.rb
CHANGED
@@ -17,6 +17,8 @@ module Query
|
|
17
17
|
#
|
18
18
|
# Note: Use this if you do not want a qualified and normalized token.
|
19
19
|
#
|
20
|
+
# TODO text, qualifiers
|
21
|
+
#
|
20
22
|
def initialize text
|
21
23
|
@text = text
|
22
24
|
end
|
@@ -50,6 +52,8 @@ module Query
|
|
50
52
|
#
|
51
53
|
# Note: Removes the qualifier if it is not allowed.
|
52
54
|
#
|
55
|
+
# TODO Extract this sind it is Search-based.
|
56
|
+
#
|
53
57
|
def qualify
|
54
58
|
@qualifiers, @text = split @text
|
55
59
|
@qualifiers && @qualifiers.collect! { |qualifier| Query::Qualifiers.instance.normalize qualifier }.compact!
|
@@ -159,7 +163,7 @@ module Query
|
|
159
163
|
# Internal identifier.
|
160
164
|
#
|
161
165
|
def identifier
|
162
|
-
"#{similar?? :similarity : :
|
166
|
+
"#{similar?? :similarity : :inverted}:#{@text}"
|
163
167
|
end
|
164
168
|
|
165
169
|
# If the originals & the text are the same, they are the same.
|
data/lib/picky/search.rb
CHANGED
@@ -24,6 +24,7 @@ class Search
|
|
24
24
|
# * weights: A hash of weights, or a Query::Weights object.
|
25
25
|
#
|
26
26
|
# TODO Add identifiers_to_remove (rename) and reduce_allocations_to_amount (rename).
|
27
|
+
# TODO categories_to_remove ?
|
27
28
|
#
|
28
29
|
# It is also possible to define the tokenizer and weights like so.
|
29
30
|
# Example:
|
@@ -57,9 +58,13 @@ class Search
|
|
57
58
|
options && Tokenizers::Query.new(options)
|
58
59
|
end
|
59
60
|
end
|
61
|
+
|
62
|
+
# Returns the tokenizer if set or if not, the query tokenizer.
|
63
|
+
#
|
60
64
|
def tokenizer
|
61
65
|
@tokenizer || Tokenizers::Query.default
|
62
66
|
end
|
67
|
+
|
63
68
|
# TODO Doc. Spec.
|
64
69
|
#
|
65
70
|
# Example:
|
data/lib/picky/sources/base.rb
CHANGED
@@ -18,7 +18,7 @@ module Sources
|
|
18
18
|
# A source has 1 mandatory and 2 optional methods:
|
19
19
|
# * connect_backend (_optional_): called once for each type/category pair.
|
20
20
|
# * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
|
21
|
-
# * take_snapshot (_optional_): called once for each
|
21
|
+
# * take_snapshot (_optional_): called once for each index or category (if indexing a single category).
|
22
22
|
#
|
23
23
|
# This base class "implements" all these methods, but they don't do anything.
|
24
24
|
# Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
|
@@ -56,7 +56,9 @@ module Sources
|
|
56
56
|
|
57
57
|
# Used to take a snapshot of your data if it is fast changing.
|
58
58
|
#
|
59
|
-
# Called once for each
|
59
|
+
# Called once for each index before harvesting.
|
60
|
+
# If it has been called on a source already by an index,
|
61
|
+
# it won't be called again for a category inside that index.
|
60
62
|
#
|
61
63
|
# Example:
|
62
64
|
# * In a DB source, a table based on the source's select statement is created.
|
@@ -65,6 +67,23 @@ module Sources
|
|
65
67
|
|
66
68
|
end
|
67
69
|
|
70
|
+
# Used to check if a snapshot has been done already.
|
71
|
+
#
|
72
|
+
# Example:
|
73
|
+
# * In a DB source, a table based on the source's select statement is created.
|
74
|
+
#
|
75
|
+
def with_snapshot index
|
76
|
+
connect_backend
|
77
|
+
@snapshot_taken ||= 0
|
78
|
+
if @snapshot_taken.zero?
|
79
|
+
timed_exclaim %Q{"#{index.identifier}": Taking snapshot of source data (if supported).}
|
80
|
+
take_snapshot index
|
81
|
+
end
|
82
|
+
@snapshot_taken += 1
|
83
|
+
yield
|
84
|
+
@snapshot_taken -= 1
|
85
|
+
end
|
86
|
+
|
68
87
|
end
|
69
88
|
|
70
89
|
end
|
data/lib/picky/sources/db.rb
CHANGED
@@ -85,8 +85,6 @@ module Sources
|
|
85
85
|
# Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
|
86
86
|
#
|
87
87
|
def take_snapshot index
|
88
|
-
connect_backend
|
89
|
-
|
90
88
|
origin = snapshot_table_name index.name
|
91
89
|
on_database = database.connection
|
92
90
|
|
@@ -110,8 +108,6 @@ module Sources
|
|
110
108
|
# Counts all the entries that are used for the index.
|
111
109
|
#
|
112
110
|
def count index_name
|
113
|
-
connect_backend
|
114
|
-
|
115
111
|
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
|
116
112
|
end
|
117
113
|
|
@@ -124,8 +120,6 @@ module Sources
|
|
124
120
|
# Harvests the data to index in chunks.
|
125
121
|
#
|
126
122
|
def harvest category, &block
|
127
|
-
connect_backend
|
128
|
-
|
129
123
|
(0..count(category.index_name)).step(chunksize) do |offset|
|
130
124
|
get_data category, offset, &block
|
131
125
|
end
|
@@ -134,7 +128,6 @@ module Sources
|
|
134
128
|
# Gets the data from the backend.
|
135
129
|
#
|
136
130
|
def get_data category, offset, &block # :nodoc:
|
137
|
-
|
138
131
|
select_statement = harvest_statement_with_offset category, offset
|
139
132
|
|
140
133
|
# TODO Rewrite ASAP.
|
data/lib/picky/statistics.rb
CHANGED
@@ -5,8 +5,8 @@
|
|
5
5
|
#
|
6
6
|
class Statistics # :nodoc:all
|
7
7
|
|
8
|
-
def
|
9
|
-
@
|
8
|
+
def initialize
|
9
|
+
@indexes = ["\033[1mIndexes analysis\033[m:"]
|
10
10
|
end
|
11
11
|
|
12
12
|
def preamble
|
@@ -28,18 +28,15 @@ PREAMBLE
|
|
28
28
|
|
29
29
|
# Gathers information about the indexes.
|
30
30
|
#
|
31
|
-
def analyze
|
32
|
-
|
33
|
-
|
34
|
-
@indexes = ["\033[1mIndexes analysis\033[m:"]
|
35
|
-
Indexes.analyze.each_pair do |name, index|
|
31
|
+
def analyze object
|
32
|
+
object.each_category do |category|
|
36
33
|
@indexes << <<-ANALYSIS
|
37
|
-
#{"#{
|
38
|
-
#{"
|
39
|
-
#{"
|
34
|
+
#{"#{category.index_name}".indented_to_s}\n
|
35
|
+
#{"#{category.name}".indented_to_s(4)}\n
|
36
|
+
#{"exact\n#{Analyzer.new.analyze(category.indexed_exact).indented_to_s}".indented_to_s(6)}\n
|
37
|
+
#{"partial\n#{Analyzer.new.analyze(category.indexed_partial).indented_to_s}".indented_to_s(6)}
|
40
38
|
ANALYSIS
|
41
39
|
end
|
42
|
-
@indexes = @indexes.join "\n"
|
43
40
|
end
|
44
41
|
|
45
42
|
# Outputs all gathered statistics.
|
@@ -49,7 +46,7 @@ ANALYSIS
|
|
49
46
|
|
50
47
|
Picky Configuration:
|
51
48
|
|
52
|
-
#{[@preamble, @application, @indexes].compact.join("\n")}
|
49
|
+
#{[@preamble, @application, @indexes.join("\n")].compact.join("\n")}
|
53
50
|
STATS
|
54
51
|
end
|
55
52
|
|
data/lib/tasks/checks.rake
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
# Checks to help the user.
|
2
2
|
#
|
3
|
-
|
3
|
+
desc 'Checks if index files are small/missing (index, category optional).'
|
4
|
+
task :check, [:index, :category] => :application do |_, options|
|
5
|
+
index, category = options.index, options.category
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
7
|
+
specific = Indexes
|
8
|
+
specific = specific[index] if index
|
9
|
+
specific = specific[category] if category
|
10
|
+
specific.check
|
10
11
|
|
12
|
+
puts "All checked indexes look ok."
|
11
13
|
end
|
data/lib/tasks/index.rake
CHANGED
@@ -1,32 +1,26 @@
|
|
1
1
|
# Indexing tasks.
|
2
2
|
#
|
3
|
+
desc "Generate the index (index, category optional)."
|
4
|
+
task :index, [:index, :category] => :application do |_, options|
|
5
|
+
index, category = options.index, options.category
|
6
|
+
|
7
|
+
specific = Indexes
|
8
|
+
specific = specific[index] if index
|
9
|
+
specific = specific[category] if category
|
10
|
+
specific.index
|
11
|
+
end
|
12
|
+
|
3
13
|
namespace :index do
|
4
14
|
|
5
|
-
|
15
|
+
# Advanced usage.
|
16
|
+
#
|
17
|
+
# desc "Takes a snapshot, indexes, and caches in random order."
|
6
18
|
task :randomly => :application do
|
7
19
|
Indexes.index true
|
8
20
|
end
|
9
|
-
desc "Takes a snapshot, indexes, and caches in order given."
|
21
|
+
# desc "Takes a snapshot, indexes, and caches in order given."
|
10
22
|
task :ordered => :application do
|
11
23
|
Indexes.index false
|
12
24
|
end
|
13
25
|
|
14
|
-
# desc "Generates the index snapshots."
|
15
|
-
#
|
16
|
-
# Note: Hidden since it is only needed by pro users.
|
17
|
-
#
|
18
|
-
# desc "Generate the data snapshots (intermediate table on a DB source)"
|
19
|
-
task :generate_snapshots => :application do
|
20
|
-
Indexes.take_snapshot
|
21
|
-
end
|
22
|
-
|
23
|
-
desc "Generates a specific index from index snapshots (category optional)."
|
24
|
-
task :specific, [:index, :category] => :application do |_, options|
|
25
|
-
index, category = options.index, options.category
|
26
|
-
|
27
|
-
specific = Indexes[index]
|
28
|
-
specific = specific[category] if category
|
29
|
-
specific.index
|
30
|
-
end
|
31
|
-
|
32
26
|
end
|
data/lib/tasks/server.rake
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Server tasks, like starting/stopping/restarting.
|
2
2
|
#
|
3
|
+
desc "Start the server."
|
4
|
+
task :start do
|
5
|
+
Rake::Task[:'server:start'].invoke
|
6
|
+
end
|
7
|
+
desc "Stop the server."
|
8
|
+
task :stop do
|
9
|
+
Rake::Task[:'server:stop'].invoke
|
10
|
+
end
|
11
|
+
|
3
12
|
namespace :server do
|
4
13
|
|
5
14
|
# desc "Start the unicorns. (Wehee!)"
|
@@ -30,8 +39,15 @@ namespace :server do
|
|
30
39
|
end
|
31
40
|
|
32
41
|
def current_pid
|
33
|
-
|
34
|
-
pid
|
42
|
+
pidfile = 'tmp/pids/unicorn.pid'
|
43
|
+
pid = `cat #{File.join(PICKY_ROOT, pidfile)}`
|
44
|
+
if pid.blank?
|
45
|
+
puts
|
46
|
+
puts "No server running (no #{pidfile} found)."
|
47
|
+
puts
|
48
|
+
else
|
49
|
+
pid.chomp
|
50
|
+
end
|
35
51
|
end
|
36
52
|
|
37
53
|
end
|
data/lib/tasks/statistics.rake
CHANGED
@@ -1,21 +1,34 @@
|
|
1
1
|
# Statistics tasks.
|
2
2
|
#
|
3
|
+
desc "Analyzes indexes (index, category optional)."
|
4
|
+
task :analyze, [:index, :category] => :'stats:prepare' do |_, options|
|
5
|
+
index, category = options.index, options.category
|
6
|
+
|
7
|
+
specific = Indexes
|
8
|
+
specific = specific[index] if index
|
9
|
+
specific = specific[category] if category
|
10
|
+
|
11
|
+
statistics = Statistics.new
|
12
|
+
|
13
|
+
begin
|
14
|
+
statistics.analyze specific
|
15
|
+
rescue StandardError
|
16
|
+
puts "\n\033[31mNote: rake analyze needs prepared indexes. Run rake index first.\033[m\n\n"
|
17
|
+
raise
|
18
|
+
end
|
19
|
+
|
20
|
+
puts statistics
|
21
|
+
end
|
22
|
+
|
23
|
+
task :stats => :'stats:prepare' do
|
24
|
+
stats = Statistics.new
|
25
|
+
puts stats.application
|
26
|
+
end
|
27
|
+
|
3
28
|
namespace :stats do
|
29
|
+
|
4
30
|
task :prepare => :application do
|
5
31
|
require File.expand_path('../../picky/statistics', __FILE__)
|
6
|
-
statistics = Statistics.instance
|
7
|
-
end
|
8
|
-
task :app => :prepare do
|
9
|
-
Statistics.instance.application
|
10
|
-
puts Statistics.instance
|
11
|
-
end
|
12
|
-
task :analyze => :prepare do
|
13
|
-
begin
|
14
|
-
Statistics.instance.analyze
|
15
|
-
rescue StandardError
|
16
|
-
puts "\n\033[31mNote: rake analyze needs prepared indexes. Run rake index first.\033[m\n\n"
|
17
|
-
raise
|
18
|
-
end
|
19
|
-
puts Statistics.instance
|
20
32
|
end
|
33
|
+
|
21
34
|
end
|
data/lib/tasks/todo.rake
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
desc "Finds where Picky still needs input from you."
|
2
2
|
task :"to#{}do" do
|
3
|
-
if system "grep -e '
|
3
|
+
if system "grep -e 'TO#{}DO.*' -n --color=always -R *"
|
4
4
|
puts "Picky needs a bit of input from you there. Thanks."
|
5
5
|
else
|
6
|
-
puts "Picky seems to be fine (no
|
6
|
+
puts "Picky seems to be fine (no TO#{}DOs found)."
|
7
7
|
end
|
8
8
|
end
|