picky 2.6.0 → 2.7.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/analyzer.rb +4 -4
- data/lib/picky/application.rb +6 -7
- data/lib/picky/backend/{backend.rb → base.rb} +31 -14
- data/lib/picky/backend/file/basic.rb +12 -4
- data/lib/picky/backend/file/json.rb +5 -5
- data/lib/picky/backend/file/text.rb +1 -1
- data/lib/picky/backend/files.rb +3 -9
- data/lib/picky/backend/redis/basic.rb +8 -0
- data/lib/picky/backend/redis/list_hash.rb +5 -5
- data/lib/picky/backend/redis/string_hash.rb +5 -5
- data/lib/picky/backend/redis.rb +5 -5
- data/lib/picky/bundle.rb +62 -0
- data/lib/picky/categories.rb +10 -9
- data/lib/picky/categories_indexed.rb +12 -7
- data/lib/picky/categories_indexing.rb +7 -9
- data/lib/picky/category.rb +38 -26
- data/lib/picky/category_indexed.rb +4 -20
- data/lib/picky/category_indexing.rb +71 -68
- data/lib/picky/generators/base.rb +6 -6
- data/lib/picky/generators/partial/substring.rb +28 -26
- data/lib/picky/generators/partial_generator.rb +3 -3
- data/lib/picky/generators/similarity/phonetic.rb +5 -5
- data/lib/picky/generators/similarity_generator.rb +2 -2
- data/lib/picky/generators/weights/logarithmic.rb +3 -3
- data/lib/picky/generators/weights_generator.rb +2 -2
- data/lib/picky/index/base.rb +13 -10
- data/lib/picky/index/base_indexed.rb +2 -0
- data/lib/picky/index/base_indexing.rb +65 -57
- data/lib/picky/indexed/bundle/base.rb +21 -86
- data/lib/picky/indexed/bundle/memory.rb +5 -12
- data/lib/picky/indexed/bundle/redis.rb +42 -0
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +3 -3
- data/lib/picky/indexers/base.rb +20 -3
- data/lib/picky/indexers/parallel.rb +32 -14
- data/lib/picky/indexers/serial.rb +29 -26
- data/lib/picky/indexes.rb +5 -3
- data/lib/picky/indexes_indexed.rb +3 -15
- data/lib/picky/indexes_indexing.rb +18 -21
- data/lib/picky/indexing/bundle/base.rb +64 -45
- data/lib/picky/indexing/bundle/memory.rb +0 -4
- data/lib/picky/loader.rb +7 -6
- data/lib/picky/query/allocation.rb +3 -3
- data/lib/picky/query/token.rb +5 -1
- data/lib/picky/search.rb +5 -0
- data/lib/picky/sources/base.rb +21 -2
- data/lib/picky/sources/db.rb +0 -7
- data/lib/picky/statistics.rb +9 -12
- data/lib/picky/tokenizers/location.rb +1 -1
- data/lib/tasks/checks.rake +8 -6
- data/lib/tasks/index.rake +14 -20
- data/lib/tasks/server.rake +18 -2
- data/lib/tasks/statistics.rake +27 -14
- data/lib/tasks/todo.rake +2 -2
- data/lib/tasks/try.rake +12 -27
- data/spec/lib/application_spec.rb +1 -1
- data/spec/lib/backend/file/basic_spec.rb +6 -6
- data/spec/lib/backend/file/json_spec.rb +11 -6
- data/spec/lib/backend/file/marshal_spec.rb +11 -6
- data/spec/lib/backend/files_spec.rb +21 -7
- data/spec/lib/backend/redis/basic_spec.rb +6 -0
- data/spec/lib/backend/redis/list_hash_spec.rb +9 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +7 -1
- data/spec/lib/backend/redis_spec.rb +22 -12
- data/spec/lib/categories_indexed_spec.rb +2 -2
- data/spec/lib/category_indexing_spec.rb +12 -33
- data/spec/lib/category_spec.rb +22 -0
- data/spec/lib/index/base_indexing_spec.rb +30 -0
- data/spec/lib/indexed/bundle/memory_spec.rb +13 -20
- data/spec/lib/indexers/base_spec.rb +39 -4
- data/spec/lib/indexers/parallel_spec.rb +2 -10
- data/spec/lib/indexers/serial_spec.rb +11 -26
- data/spec/lib/indexes_class_spec.rb +4 -4
- data/spec/lib/indexes_indexed_spec.rb +2 -2
- data/spec/lib/indexes_indexing_spec.rb +6 -10
- data/spec/lib/indexes_spec.rb +3 -3
- data/spec/lib/indexing/bundle/{super_base_spec.rb → base_spec.rb} +2 -2
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +3 -3
- data/spec/lib/indexing/bundle/memory_spec.rb +16 -14
- data/spec/lib/indexing/bundle/redis_spec.rb +18 -16
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/token_spec.rb +5 -7
- data/spec/lib/sources/base_spec.rb +53 -0
- data/spec/lib/sources/db_spec.rb +0 -7
- metadata +11 -12
- data/lib/picky/indexers/solr.rb +0 -56
- data/lib/picky/indexing/bundle/super_base.rb +0 -61
- data/lib/picky/solr/schema_generator.rb +0 -74
- data/lib/tasks/search.rake +0 -9
- data/lib/tasks/shortcuts.rake +0 -32
- data/lib/tasks/solr.rake +0 -36
@@ -2,30 +2,23 @@
|
|
2
2
|
#
|
3
3
|
class Indexes
|
4
4
|
|
5
|
-
instance_delegate :
|
6
|
-
:
|
7
|
-
:
|
8
|
-
:
|
9
|
-
:
|
10
|
-
:
|
11
|
-
:
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
:
|
17
|
-
:backup_caches,
|
18
|
-
:restore_caches,
|
19
|
-
:check_caches,
|
20
|
-
:clear_caches,
|
21
|
-
:create_directory_structure,
|
5
|
+
instance_delegate :index,
|
6
|
+
:check,
|
7
|
+
:clear,
|
8
|
+
:backup,
|
9
|
+
:restore,
|
10
|
+
:index_for_tests,
|
11
|
+
:tokenizer
|
12
|
+
|
13
|
+
each_delegate :check,
|
14
|
+
:clear,
|
15
|
+
:backup,
|
16
|
+
:restore,
|
22
17
|
:to => :indexes
|
23
18
|
|
24
19
|
# Runs the indexers in parallel (prepare + cache).
|
25
20
|
#
|
26
21
|
def index randomly = true
|
27
|
-
take_snapshot
|
28
|
-
|
29
22
|
# Run in parallel.
|
30
23
|
#
|
31
24
|
timed_exclaim "Indexing using #{Cores.max_processors} processors, in #{randomly ? 'random' : 'given'} order."
|
@@ -41,9 +34,13 @@ class Indexes
|
|
41
34
|
# without forking and shouting ;)
|
42
35
|
#
|
43
36
|
def index_for_tests
|
44
|
-
take_snapshot
|
45
|
-
|
46
37
|
indexes.each(&:index)
|
47
38
|
end
|
48
39
|
|
40
|
+
# TODO Doc. Spec. Split into Query/Index.
|
41
|
+
#
|
42
|
+
def tokenizer
|
43
|
+
Tokenizers::Index.default
|
44
|
+
end
|
45
|
+
|
49
46
|
end
|
@@ -1,30 +1,48 @@
|
|
1
|
-
# encoding: utf-8
|
2
|
-
#
|
3
1
|
module Indexing # :nodoc:all
|
4
2
|
|
3
|
+
# A Bundle is a number of indexes
|
4
|
+
# per [index, category] combination.
|
5
|
+
#
|
6
|
+
# At most, there are three indexes:
|
7
|
+
# * *core* index (always used)
|
8
|
+
# * *weights* index (always used)
|
9
|
+
# * *similarity* index (used with similarity)
|
10
|
+
#
|
11
|
+
# In Picky, indexing is separated from the index
|
12
|
+
# handling itself through a parallel structure.
|
13
|
+
#
|
14
|
+
# Both use methods provided by this base class, but
|
15
|
+
# have very different goals:
|
16
|
+
#
|
17
|
+
# * *Indexing*::*Bundle* is just concerned with creating index files
|
18
|
+
# and providing helper functions to e.g. check the indexes.
|
19
|
+
#
|
20
|
+
# * *Index*::*Bundle* is concerned with loading these index files into
|
21
|
+
# memory and looking up search data as fast as possible.
|
22
|
+
#
|
5
23
|
module Bundle
|
6
24
|
|
7
25
|
# This is the indexing bundle.
|
26
|
+
#
|
8
27
|
# It does all menial tasks that have nothing to do
|
9
28
|
# with the actual index running etc.
|
10
29
|
#
|
11
|
-
class Base <
|
30
|
+
class Base < ::Bundle
|
12
31
|
|
13
|
-
attr_accessor :partial_strategy,
|
32
|
+
attr_accessor :partial_strategy,
|
33
|
+
:weights_strategy
|
14
34
|
|
15
|
-
|
16
|
-
#
|
17
|
-
def initialize name, category, similarity_strategy, partial_strategy, weights_strategy
|
35
|
+
def initialize name, category, weights_strategy, partial_strategy, similarity_strategy
|
18
36
|
super name, category, similarity_strategy
|
19
37
|
|
20
|
-
@
|
21
|
-
@
|
38
|
+
@weights_strategy = weights_strategy
|
39
|
+
@partial_strategy = partial_strategy
|
22
40
|
end
|
23
41
|
|
24
42
|
# Sets up a piece of the index for the given token.
|
25
43
|
#
|
26
|
-
def
|
27
|
-
|
44
|
+
def initialize_inverted_index_for token
|
45
|
+
self.inverted[token] ||= []
|
28
46
|
end
|
29
47
|
|
30
48
|
# Generation
|
@@ -36,7 +54,7 @@ module Indexing # :nodoc:all
|
|
36
54
|
# * Dumps all the indexes into files.
|
37
55
|
#
|
38
56
|
def generate_caches_from_source
|
39
|
-
|
57
|
+
load_from_prepared_index_file
|
40
58
|
generate_caches_from_memory
|
41
59
|
end
|
42
60
|
# Generates derived indexes from the index and dumps.
|
@@ -60,13 +78,13 @@ module Indexing # :nodoc:all
|
|
60
78
|
|
61
79
|
# Load the data from the db.
|
62
80
|
#
|
63
|
-
def
|
64
|
-
|
81
|
+
def load_from_prepared_index_file
|
82
|
+
load_from_prepared_index_generation_message
|
65
83
|
clear
|
66
84
|
retrieve
|
67
85
|
end
|
68
|
-
def
|
69
|
-
timed_exclaim %Q{"#{identifier}": Loading
|
86
|
+
def load_from_prepared_index_generation_message
|
87
|
+
timed_exclaim %Q{"#{identifier}": Loading prepared data into memory.}
|
70
88
|
end
|
71
89
|
# Retrieves the prepared index data into the index.
|
72
90
|
#
|
@@ -77,8 +95,8 @@ module Indexing # :nodoc:all
|
|
77
95
|
def retrieve
|
78
96
|
key_format = self[:key_format] || :to_i
|
79
97
|
files.retrieve do |id, token|
|
80
|
-
|
81
|
-
|
98
|
+
initialize_inverted_index_for token
|
99
|
+
self.inverted[token] << id.send(key_format)
|
82
100
|
end
|
83
101
|
end
|
84
102
|
|
@@ -86,63 +104,64 @@ module Indexing # :nodoc:all
|
|
86
104
|
# partial caching strategy of this bundle.
|
87
105
|
#
|
88
106
|
def generate_partial
|
89
|
-
generator = Generators::PartialGenerator.new self.
|
90
|
-
self.
|
107
|
+
generator = Generators::PartialGenerator.new self.inverted
|
108
|
+
self.inverted = generator.generate self.partial_strategy
|
91
109
|
end
|
92
|
-
# Generate a partial index from the given exact index.
|
110
|
+
# Generate a partial index from the given exact inverted index.
|
93
111
|
#
|
94
|
-
def generate_partial_from
|
112
|
+
def generate_partial_from exact_inverted_index
|
95
113
|
timed_exclaim %Q{"#{identifier}": Generating partial index for index.}
|
96
|
-
self.
|
114
|
+
self.inverted = exact_inverted_index
|
97
115
|
self.generate_partial
|
98
116
|
self
|
99
117
|
end
|
100
|
-
# Generates a new similarity index (writes its index) using the
|
101
|
-
# given similarity caching strategy.
|
102
|
-
#
|
103
|
-
def generate_similarity
|
104
|
-
generator = Generators::SimilarityGenerator.new self.index
|
105
|
-
self.similarity = generator.generate self.similarity_strategy
|
106
|
-
end
|
107
118
|
# Generates a new weights index (writes its index) using the
|
108
119
|
# given weight caching strategy.
|
109
120
|
#
|
110
121
|
def generate_weights
|
111
|
-
generator = Generators::WeightsGenerator.new self.
|
122
|
+
generator = Generators::WeightsGenerator.new self.inverted
|
112
123
|
self.weights = generator.generate self.weights_strategy
|
113
124
|
end
|
125
|
+
# Generates a new similarity index (writes its index) using the
|
126
|
+
# given similarity caching strategy.
|
127
|
+
#
|
128
|
+
def generate_similarity
|
129
|
+
generator = Generators::SimilarityGenerator.new self.inverted
|
130
|
+
self.similarity = generator.generate self.similarity_strategy
|
131
|
+
end
|
114
132
|
|
115
133
|
# Saves the indexes in a dump file.
|
116
134
|
#
|
117
135
|
def dump
|
118
|
-
|
136
|
+
timed_exclaim %Q{"#{identifier}": Dumping data.}
|
137
|
+
dump_inverted
|
119
138
|
dump_similarity
|
120
139
|
dump_weights
|
121
140
|
dump_configuration
|
122
141
|
end
|
123
142
|
# Dumps the core index.
|
124
143
|
#
|
125
|
-
def
|
126
|
-
timed_exclaim %Q{"#{identifier}": Dumping index.}
|
127
|
-
backend.
|
144
|
+
def dump_inverted
|
145
|
+
# timed_exclaim %Q{"#{identifier}": Dumping inverted index.}
|
146
|
+
backend.dump_inverted self.inverted
|
128
147
|
end
|
129
148
|
# Dumps the weights index.
|
130
149
|
#
|
131
150
|
def dump_weights
|
132
|
-
timed_exclaim %Q{"#{identifier}": Dumping weights
|
133
|
-
backend.dump_weights weights
|
151
|
+
# timed_exclaim %Q{"#{identifier}": Dumping index weights.}
|
152
|
+
backend.dump_weights self.weights
|
134
153
|
end
|
135
154
|
# Dumps the similarity index.
|
136
155
|
#
|
137
156
|
def dump_similarity
|
138
|
-
timed_exclaim %Q{"#{identifier}": Dumping similarity
|
139
|
-
backend.dump_similarity similarity
|
157
|
+
# timed_exclaim %Q{"#{identifier}": Dumping similarity index.}
|
158
|
+
backend.dump_similarity self.similarity
|
140
159
|
end
|
141
160
|
# Dumps the similarity index.
|
142
161
|
#
|
143
162
|
def dump_configuration
|
144
|
-
timed_exclaim %Q{"#{identifier}": Dumping configuration
|
145
|
-
backend.dump_configuration configuration
|
163
|
+
# timed_exclaim %Q{"#{identifier}": Dumping configuration.}
|
164
|
+
backend.dump_configuration self.configuration
|
146
165
|
end
|
147
166
|
|
148
167
|
# Alerts the user if an index is missing.
|
@@ -195,14 +214,14 @@ module Indexing # :nodoc:all
|
|
195
214
|
# Warns the user if the core or weights indexes are small.
|
196
215
|
#
|
197
216
|
def warn_if_index_small
|
198
|
-
warn_cache_small :
|
199
|
-
warn_cache_small :weights
|
217
|
+
warn_cache_small :inverted if backend.inverted_cache_small?
|
218
|
+
warn_cache_small :weights if backend.weights_cache_small?
|
200
219
|
end
|
201
220
|
# Alerts the user if the core or weights indexes are not there.
|
202
221
|
#
|
203
222
|
def raise_unless_index_ok
|
204
|
-
raise_cache_missing :
|
205
|
-
raise_cache_missing :weights
|
223
|
+
raise_cache_missing :inverted unless backend.inverted_cache_ok?
|
224
|
+
raise_cache_missing :weights unless backend.weights_cache_ok?
|
206
225
|
end
|
207
226
|
|
208
227
|
end
|
data/lib/picky/loader.rb
CHANGED
@@ -145,7 +145,7 @@ module Loader # :nodoc:all
|
|
145
145
|
|
146
146
|
# Index store handling.
|
147
147
|
#
|
148
|
-
load_relative 'backend/
|
148
|
+
load_relative 'backend/base'
|
149
149
|
|
150
150
|
load_relative 'backend/redis'
|
151
151
|
load_relative 'backend/redis/basic'
|
@@ -158,10 +158,11 @@ module Loader # :nodoc:all
|
|
158
158
|
load_relative 'backend/file/json'
|
159
159
|
|
160
160
|
load_relative 'backend/files'
|
161
|
-
|
161
|
+
|
162
162
|
# Indexing and Indexed things.
|
163
163
|
#
|
164
|
-
load_relative '
|
164
|
+
load_relative 'bundle'
|
165
|
+
|
165
166
|
load_relative 'indexing/bundle/base'
|
166
167
|
load_relative 'indexing/bundle/memory'
|
167
168
|
load_relative 'indexing/bundle/redis'
|
@@ -260,17 +261,17 @@ module Loader # :nodoc:all
|
|
260
261
|
load_relative 'category'
|
261
262
|
load_relative 'category_indexed'
|
262
263
|
load_relative 'category_indexing'
|
263
|
-
|
264
|
+
|
264
265
|
load_relative 'categories'
|
265
266
|
load_relative 'categories_indexed'
|
266
267
|
load_relative 'categories_indexing'
|
267
|
-
|
268
|
+
|
268
269
|
load_relative 'index/base'
|
269
270
|
load_relative 'index/base_indexed'
|
270
271
|
load_relative 'index/base_indexing'
|
271
272
|
load_relative 'index/memory'
|
272
273
|
load_relative 'index/redis'
|
273
|
-
|
274
|
+
|
274
275
|
load_relative 'indexes'
|
275
276
|
load_relative 'indexes_indexed'
|
276
277
|
load_relative 'indexes_indexing'
|
@@ -6,7 +6,7 @@ module Query
|
|
6
6
|
class Allocation # :nodoc:all
|
7
7
|
|
8
8
|
attr_reader :count, :ids, :score, :combinations, :result_identifier
|
9
|
-
|
9
|
+
|
10
10
|
#
|
11
11
|
#
|
12
12
|
def initialize combinations, result_identifier
|
@@ -76,9 +76,9 @@ module Query
|
|
76
76
|
#
|
77
77
|
#
|
78
78
|
def to_s
|
79
|
-
"Allocation
|
79
|
+
"Allocation(#{to_result})"
|
80
80
|
end
|
81
81
|
|
82
82
|
end
|
83
|
-
|
83
|
+
|
84
84
|
end
|
data/lib/picky/query/token.rb
CHANGED
@@ -17,6 +17,8 @@ module Query
|
|
17
17
|
#
|
18
18
|
# Note: Use this if you do not want a qualified and normalized token.
|
19
19
|
#
|
20
|
+
# TODO text, qualifiers
|
21
|
+
#
|
20
22
|
def initialize text
|
21
23
|
@text = text
|
22
24
|
end
|
@@ -50,6 +52,8 @@ module Query
|
|
50
52
|
#
|
51
53
|
# Note: Removes the qualifier if it is not allowed.
|
52
54
|
#
|
55
|
+
# TODO Extract this sind it is Search-based.
|
56
|
+
#
|
53
57
|
def qualify
|
54
58
|
@qualifiers, @text = split @text
|
55
59
|
@qualifiers && @qualifiers.collect! { |qualifier| Query::Qualifiers.instance.normalize qualifier }.compact!
|
@@ -159,7 +163,7 @@ module Query
|
|
159
163
|
# Internal identifier.
|
160
164
|
#
|
161
165
|
def identifier
|
162
|
-
"#{similar?? :similarity : :
|
166
|
+
"#{similar?? :similarity : :inverted}:#{@text}"
|
163
167
|
end
|
164
168
|
|
165
169
|
# If the originals & the text are the same, they are the same.
|
data/lib/picky/search.rb
CHANGED
@@ -24,6 +24,7 @@ class Search
|
|
24
24
|
# * weights: A hash of weights, or a Query::Weights object.
|
25
25
|
#
|
26
26
|
# TODO Add identifiers_to_remove (rename) and reduce_allocations_to_amount (rename).
|
27
|
+
# TODO categories_to_remove ?
|
27
28
|
#
|
28
29
|
# It is also possible to define the tokenizer and weights like so.
|
29
30
|
# Example:
|
@@ -57,9 +58,13 @@ class Search
|
|
57
58
|
options && Tokenizers::Query.new(options)
|
58
59
|
end
|
59
60
|
end
|
61
|
+
|
62
|
+
# Returns the tokenizer if set or if not, the query tokenizer.
|
63
|
+
#
|
60
64
|
def tokenizer
|
61
65
|
@tokenizer || Tokenizers::Query.default
|
62
66
|
end
|
67
|
+
|
63
68
|
# TODO Doc. Spec.
|
64
69
|
#
|
65
70
|
# Example:
|
data/lib/picky/sources/base.rb
CHANGED
@@ -18,7 +18,7 @@ module Sources
|
|
18
18
|
# A source has 1 mandatory and 2 optional methods:
|
19
19
|
# * connect_backend (_optional_): called once for each type/category pair.
|
20
20
|
# * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
|
21
|
-
# * take_snapshot (_optional_): called once for each
|
21
|
+
# * take_snapshot (_optional_): called once for each index or category (if indexing a single category).
|
22
22
|
#
|
23
23
|
# This base class "implements" all these methods, but they don't do anything.
|
24
24
|
# Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
|
@@ -56,7 +56,9 @@ module Sources
|
|
56
56
|
|
57
57
|
# Used to take a snapshot of your data if it is fast changing.
|
58
58
|
#
|
59
|
-
# Called once for each
|
59
|
+
# Called once for each index before harvesting.
|
60
|
+
# If it has been called on a source already by an index,
|
61
|
+
# it won't be called again for a category inside that index.
|
60
62
|
#
|
61
63
|
# Example:
|
62
64
|
# * In a DB source, a table based on the source's select statement is created.
|
@@ -65,6 +67,23 @@ module Sources
|
|
65
67
|
|
66
68
|
end
|
67
69
|
|
70
|
+
# Used to check if a snapshot has been done already.
|
71
|
+
#
|
72
|
+
# Example:
|
73
|
+
# * In a DB source, a table based on the source's select statement is created.
|
74
|
+
#
|
75
|
+
def with_snapshot index
|
76
|
+
connect_backend
|
77
|
+
@snapshot_taken ||= 0
|
78
|
+
if @snapshot_taken.zero?
|
79
|
+
timed_exclaim %Q{"#{index.identifier}": Taking snapshot of source data (if supported).}
|
80
|
+
take_snapshot index
|
81
|
+
end
|
82
|
+
@snapshot_taken += 1
|
83
|
+
yield
|
84
|
+
@snapshot_taken -= 1
|
85
|
+
end
|
86
|
+
|
68
87
|
end
|
69
88
|
|
70
89
|
end
|
data/lib/picky/sources/db.rb
CHANGED
@@ -85,8 +85,6 @@ module Sources
|
|
85
85
|
# Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
|
86
86
|
#
|
87
87
|
def take_snapshot index
|
88
|
-
connect_backend
|
89
|
-
|
90
88
|
origin = snapshot_table_name index.name
|
91
89
|
on_database = database.connection
|
92
90
|
|
@@ -110,8 +108,6 @@ module Sources
|
|
110
108
|
# Counts all the entries that are used for the index.
|
111
109
|
#
|
112
110
|
def count index_name
|
113
|
-
connect_backend
|
114
|
-
|
115
111
|
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
|
116
112
|
end
|
117
113
|
|
@@ -124,8 +120,6 @@ module Sources
|
|
124
120
|
# Harvests the data to index in chunks.
|
125
121
|
#
|
126
122
|
def harvest category, &block
|
127
|
-
connect_backend
|
128
|
-
|
129
123
|
(0..count(category.index_name)).step(chunksize) do |offset|
|
130
124
|
get_data category, offset, &block
|
131
125
|
end
|
@@ -134,7 +128,6 @@ module Sources
|
|
134
128
|
# Gets the data from the backend.
|
135
129
|
#
|
136
130
|
def get_data category, offset, &block # :nodoc:
|
137
|
-
|
138
131
|
select_statement = harvest_statement_with_offset category, offset
|
139
132
|
|
140
133
|
# TODO Rewrite ASAP.
|
data/lib/picky/statistics.rb
CHANGED
@@ -5,8 +5,8 @@
|
|
5
5
|
#
|
6
6
|
class Statistics # :nodoc:all
|
7
7
|
|
8
|
-
def
|
9
|
-
@
|
8
|
+
def initialize
|
9
|
+
@indexes = ["\033[1mIndexes analysis\033[m:"]
|
10
10
|
end
|
11
11
|
|
12
12
|
def preamble
|
@@ -28,18 +28,15 @@ PREAMBLE
|
|
28
28
|
|
29
29
|
# Gathers information about the indexes.
|
30
30
|
#
|
31
|
-
def analyze
|
32
|
-
|
33
|
-
|
34
|
-
@indexes = ["\033[1mIndexes analysis\033[m:"]
|
35
|
-
Indexes.analyze.each_pair do |name, index|
|
31
|
+
def analyze object
|
32
|
+
object.each_category do |category|
|
36
33
|
@indexes << <<-ANALYSIS
|
37
|
-
#{"#{
|
38
|
-
#{"
|
39
|
-
#{"
|
34
|
+
#{"#{category.index_name}".indented_to_s}\n
|
35
|
+
#{"#{category.name}".indented_to_s(4)}\n
|
36
|
+
#{"exact\n#{Analyzer.new.analyze(category.indexed_exact).indented_to_s}".indented_to_s(6)}\n
|
37
|
+
#{"partial\n#{Analyzer.new.analyze(category.indexed_partial).indented_to_s}".indented_to_s(6)}
|
40
38
|
ANALYSIS
|
41
39
|
end
|
42
|
-
@indexes = @indexes.join "\n"
|
43
40
|
end
|
44
41
|
|
45
42
|
# Outputs all gathered statistics.
|
@@ -49,7 +46,7 @@ ANALYSIS
|
|
49
46
|
|
50
47
|
Picky Configuration:
|
51
48
|
|
52
|
-
#{[@preamble, @application, @indexes].compact.join("\n")}
|
49
|
+
#{[@preamble, @application, @indexes.join("\n")].compact.join("\n")}
|
53
50
|
STATS
|
54
51
|
end
|
55
52
|
|
data/lib/tasks/checks.rake
CHANGED
@@ -1,11 +1,13 @@
|
|
1
1
|
# Checks to help the user.
|
2
2
|
#
|
3
|
-
|
3
|
+
desc 'Checks if index files are small/missing (index, category optional).'
|
4
|
+
task :check, [:index, :category] => :application do |_, options|
|
5
|
+
index, category = options.index, options.category
|
4
6
|
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
end
|
7
|
+
specific = Indexes
|
8
|
+
specific = specific[index] if index
|
9
|
+
specific = specific[category] if category
|
10
|
+
specific.check
|
10
11
|
|
12
|
+
puts "All checked indexes look ok."
|
11
13
|
end
|
data/lib/tasks/index.rake
CHANGED
@@ -1,32 +1,26 @@
|
|
1
1
|
# Indexing tasks.
|
2
2
|
#
|
3
|
+
desc "Generate the index (index, category optional)."
|
4
|
+
task :index, [:index, :category] => :application do |_, options|
|
5
|
+
index, category = options.index, options.category
|
6
|
+
|
7
|
+
specific = Indexes
|
8
|
+
specific = specific[index] if index
|
9
|
+
specific = specific[category] if category
|
10
|
+
specific.index
|
11
|
+
end
|
12
|
+
|
3
13
|
namespace :index do
|
4
14
|
|
5
|
-
|
15
|
+
# Advanced usage.
|
16
|
+
#
|
17
|
+
# desc "Takes a snapshot, indexes, and caches in random order."
|
6
18
|
task :randomly => :application do
|
7
19
|
Indexes.index true
|
8
20
|
end
|
9
|
-
desc "Takes a snapshot, indexes, and caches in order given."
|
21
|
+
# desc "Takes a snapshot, indexes, and caches in order given."
|
10
22
|
task :ordered => :application do
|
11
23
|
Indexes.index false
|
12
24
|
end
|
13
25
|
|
14
|
-
# desc "Generates the index snapshots."
|
15
|
-
#
|
16
|
-
# Note: Hidden since it is only needed by pro users.
|
17
|
-
#
|
18
|
-
# desc "Generate the data snapshots (intermediate table on a DB source)"
|
19
|
-
task :generate_snapshots => :application do
|
20
|
-
Indexes.take_snapshot
|
21
|
-
end
|
22
|
-
|
23
|
-
desc "Generates a specific index from index snapshots (category optional)."
|
24
|
-
task :specific, [:index, :category] => :application do |_, options|
|
25
|
-
index, category = options.index, options.category
|
26
|
-
|
27
|
-
specific = Indexes[index]
|
28
|
-
specific = specific[category] if category
|
29
|
-
specific.index
|
30
|
-
end
|
31
|
-
|
32
26
|
end
|
data/lib/tasks/server.rake
CHANGED
@@ -1,5 +1,14 @@
|
|
1
1
|
# Server tasks, like starting/stopping/restarting.
|
2
2
|
#
|
3
|
+
desc "Start the server."
|
4
|
+
task :start do
|
5
|
+
Rake::Task[:'server:start'].invoke
|
6
|
+
end
|
7
|
+
desc "Stop the server."
|
8
|
+
task :stop do
|
9
|
+
Rake::Task[:'server:stop'].invoke
|
10
|
+
end
|
11
|
+
|
3
12
|
namespace :server do
|
4
13
|
|
5
14
|
# desc "Start the unicorns. (Wehee!)"
|
@@ -30,8 +39,15 @@ namespace :server do
|
|
30
39
|
end
|
31
40
|
|
32
41
|
def current_pid
|
33
|
-
|
34
|
-
pid
|
42
|
+
pidfile = 'tmp/pids/unicorn.pid'
|
43
|
+
pid = `cat #{File.join(PICKY_ROOT, pidfile)}`
|
44
|
+
if pid.blank?
|
45
|
+
puts
|
46
|
+
puts "No server running (no #{pidfile} found)."
|
47
|
+
puts
|
48
|
+
else
|
49
|
+
pid.chomp
|
50
|
+
end
|
35
51
|
end
|
36
52
|
|
37
53
|
end
|
data/lib/tasks/statistics.rake
CHANGED
@@ -1,21 +1,34 @@
|
|
1
1
|
# Statistics tasks.
|
2
2
|
#
|
3
|
+
desc "Analyzes indexes (index, category optional)."
|
4
|
+
task :analyze, [:index, :category] => :'stats:prepare' do |_, options|
|
5
|
+
index, category = options.index, options.category
|
6
|
+
|
7
|
+
specific = Indexes
|
8
|
+
specific = specific[index] if index
|
9
|
+
specific = specific[category] if category
|
10
|
+
|
11
|
+
statistics = Statistics.new
|
12
|
+
|
13
|
+
begin
|
14
|
+
statistics.analyze specific
|
15
|
+
rescue StandardError
|
16
|
+
puts "\n\033[31mNote: rake analyze needs prepared indexes. Run rake index first.\033[m\n\n"
|
17
|
+
raise
|
18
|
+
end
|
19
|
+
|
20
|
+
puts statistics
|
21
|
+
end
|
22
|
+
|
23
|
+
task :stats => :'stats:prepare' do
|
24
|
+
stats = Statistics.new
|
25
|
+
puts stats.application
|
26
|
+
end
|
27
|
+
|
3
28
|
namespace :stats do
|
29
|
+
|
4
30
|
task :prepare => :application do
|
5
31
|
require File.expand_path('../../picky/statistics', __FILE__)
|
6
|
-
statistics = Statistics.instance
|
7
|
-
end
|
8
|
-
task :app => :prepare do
|
9
|
-
Statistics.instance.application
|
10
|
-
puts Statistics.instance
|
11
|
-
end
|
12
|
-
task :analyze => :prepare do
|
13
|
-
begin
|
14
|
-
Statistics.instance.analyze
|
15
|
-
rescue StandardError
|
16
|
-
puts "\n\033[31mNote: rake analyze needs prepared indexes. Run rake index first.\033[m\n\n"
|
17
|
-
raise
|
18
|
-
end
|
19
|
-
puts Statistics.instance
|
20
32
|
end
|
33
|
+
|
21
34
|
end
|
data/lib/tasks/todo.rake
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
desc "Finds where Picky still needs input from you."
|
2
2
|
task :"to#{}do" do
|
3
|
-
if system "grep -e '
|
3
|
+
if system "grep -e 'TO#{}DO.*' -n --color=always -R *"
|
4
4
|
puts "Picky needs a bit of input from you there. Thanks."
|
5
5
|
else
|
6
|
-
puts "Picky seems to be fine (no
|
6
|
+
puts "Picky seems to be fine (no TO#{}DOs found)."
|
7
7
|
end
|
8
8
|
end
|