picky 1.5.2 → 1.5.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/analyzer.rb +154 -0
- data/lib/picky/application.rb +53 -33
- data/lib/picky/character_substituters/west_european.rb +10 -6
- data/lib/picky/cli.rb +18 -18
- data/lib/picky/index/base.rb +44 -13
- data/lib/picky/index_bundle.rb +13 -4
- data/lib/picky/indexed/indexes.rb +26 -10
- data/lib/picky/indexing/indexes.rb +26 -24
- data/lib/picky/interfaces/live_parameters.rb +23 -16
- data/lib/picky/internals/extensions/object.rb +13 -6
- data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
- data/lib/picky/internals/index/backend.rb +1 -2
- data/lib/picky/internals/index/file/basic.rb +18 -14
- data/lib/picky/internals/index/files.rb +16 -6
- data/lib/picky/internals/index/redis/basic.rb +12 -5
- data/lib/picky/internals/index/redis.rb +2 -2
- data/lib/picky/internals/indexed/bundle/base.rb +58 -14
- data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
- data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
- data/lib/picky/internals/indexed/categories.rb +19 -14
- data/lib/picky/internals/indexed/category.rb +44 -20
- data/lib/picky/internals/indexed/index.rb +23 -13
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
- data/lib/picky/internals/indexers/serial.rb +1 -1
- data/lib/picky/internals/indexing/bundle/base.rb +28 -28
- data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
- data/lib/picky/internals/indexing/categories.rb +15 -11
- data/lib/picky/internals/indexing/category.rb +30 -20
- data/lib/picky/internals/indexing/index.rb +22 -14
- data/lib/picky/internals/query/allocations.rb +0 -15
- data/lib/picky/internals/query/combinations/base.rb +0 -4
- data/lib/picky/internals/query/combinations/redis.rb +19 -8
- data/lib/picky/internals/query/indexes.rb +3 -6
- data/lib/picky/internals/query/token.rb +0 -4
- data/lib/picky/internals/query/weights.rb +2 -11
- data/lib/picky/internals/results/base.rb +3 -10
- data/lib/picky/internals/tokenizers/base.rb +64 -28
- data/lib/picky/internals/tokenizers/index.rb +8 -8
- data/lib/picky/loader.rb +59 -53
- data/lib/picky/query/base.rb +23 -29
- data/lib/picky/sources/base.rb +10 -10
- data/lib/picky/sources/couch.rb +14 -10
- data/lib/picky/sources/csv.rb +21 -14
- data/lib/picky/sources/db.rb +37 -31
- data/lib/picky/sources/delicious.rb +11 -8
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/statistics.rb +66 -0
- data/lib/tasks/application.rake +3 -0
- data/lib/tasks/checks.rake +11 -0
- data/lib/tasks/framework.rake +3 -0
- data/lib/tasks/index.rake +9 -11
- data/lib/tasks/routes.rake +3 -2
- data/lib/tasks/shortcuts.rake +17 -5
- data/lib/tasks/statistics.rake +20 -12
- data/lib/tasks/try.rake +14 -14
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/index/base_spec.rb +25 -3
- data/spec/lib/internals/extensions/object_spec.rb +46 -20
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
- data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
- data/spec/lib/internals/indexers/serial_spec.rb +1 -1
- data/spec/lib/internals/results/base_spec.rb +0 -12
- data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
- data/spec/lib/query/allocations_spec.rb +0 -56
- data/spec/lib/query/base_spec.rb +25 -21
- data/spec/lib/query/combinations/redis_spec.rb +6 -1
- data/spec/lib/sources/delicious_spec.rb +2 -2
- data/spec/lib/statistics_spec.rb +31 -0
- metadata +9 -2
data/lib/picky/query/base.rb
CHANGED
@@ -20,12 +20,13 @@ module Query
|
|
20
20
|
# Not directly instantiated. However, its methods are used by its subclasses, Full and Live.
|
21
21
|
#
|
22
22
|
class Base
|
23
|
-
|
23
|
+
|
24
24
|
include Helpers::Measuring
|
25
|
-
|
25
|
+
|
26
|
+
attr_reader :indexes
|
26
27
|
attr_writer :tokenizer, :identifiers_to_remove
|
27
28
|
attr_accessor :reduce_to_amount, :weights
|
28
|
-
|
29
|
+
|
29
30
|
# Takes:
|
30
31
|
# * A number of indexes
|
31
32
|
# * Options hash (optional) with:
|
@@ -40,7 +41,7 @@ module Query
|
|
40
41
|
weights = options[:weights] || Weights.new
|
41
42
|
@weights = Hash === weights ? Weights.new(weights) : weights
|
42
43
|
end
|
43
|
-
|
44
|
+
|
44
45
|
# Returns the right combinations strategy for
|
45
46
|
# a number of query indexes.
|
46
47
|
#
|
@@ -73,7 +74,7 @@ module Query
|
|
73
74
|
def raise_different index_types
|
74
75
|
raise DifferentTypesError.new(index_types)
|
75
76
|
end
|
76
|
-
|
77
|
+
|
77
78
|
# This is the main entry point for a query.
|
78
79
|
# Use this in specs and also for running queries.
|
79
80
|
#
|
@@ -86,22 +87,22 @@ module Query
|
|
86
87
|
def search_with_text text, offset = 0
|
87
88
|
search tokenized(text), offset
|
88
89
|
end
|
89
|
-
|
90
|
+
|
90
91
|
# Runs the actual search using Query::Tokens.
|
91
92
|
#
|
92
93
|
# Note: Internal method, use #search_with_text.
|
93
94
|
#
|
94
95
|
def search tokens, offset = 0
|
95
96
|
results = nil
|
96
|
-
|
97
|
+
|
97
98
|
duration = timed do
|
98
|
-
results = execute
|
99
|
+
results = execute tokens, offset
|
99
100
|
end
|
100
101
|
results.duration = duration.round 6
|
101
|
-
|
102
|
+
|
102
103
|
results
|
103
104
|
end
|
104
|
-
|
105
|
+
|
105
106
|
# Execute a search using Query::Tokens.
|
106
107
|
#
|
107
108
|
# Note: Internal method, use #search_with_text.
|
@@ -109,16 +110,7 @@ module Query
|
|
109
110
|
def execute tokens, offset
|
110
111
|
result_type.from offset, sorted_allocations(tokens)
|
111
112
|
end
|
112
|
-
|
113
|
-
# Returns an empty result with default values.
|
114
|
-
#
|
115
|
-
# Parameters:
|
116
|
-
# * offset = 0: _optional_ The offset to use for the empty results.
|
117
|
-
#
|
118
|
-
def empty_results offset = 0
|
119
|
-
result_type.new offset
|
120
|
-
end
|
121
|
-
|
113
|
+
|
122
114
|
# Delegates the tokenizing to the query tokenizer.
|
123
115
|
#
|
124
116
|
# Parameters:
|
@@ -127,7 +119,7 @@ module Query
|
|
127
119
|
def tokenized text
|
128
120
|
@tokenizer.tokenize text
|
129
121
|
end
|
130
|
-
|
122
|
+
|
131
123
|
# Gets sorted allocations for the tokens.
|
132
124
|
#
|
133
125
|
# This generates the possible allocations, sorted.
|
@@ -144,27 +136,27 @@ module Query
|
|
144
136
|
# TODO uniq, score, sort in there
|
145
137
|
#
|
146
138
|
allocations = @indexes.allocations_for tokens
|
147
|
-
|
139
|
+
|
148
140
|
# Callbacks.
|
149
141
|
#
|
150
142
|
# TODO Reduce before sort?
|
151
143
|
#
|
152
144
|
reduce allocations
|
153
145
|
remove_from allocations
|
154
|
-
|
146
|
+
|
155
147
|
# Remove double allocations.
|
156
148
|
#
|
157
149
|
allocations.uniq
|
158
|
-
|
150
|
+
|
159
151
|
# Score the allocations using weights as bias.
|
160
152
|
#
|
161
153
|
allocations.calculate_score weights
|
162
|
-
|
154
|
+
|
163
155
|
# Sort the allocations.
|
164
156
|
# (allocations are sorted according to score, highest to lowest)
|
165
157
|
#
|
166
158
|
allocations.sort
|
167
|
-
|
159
|
+
|
168
160
|
# Return the allocations.
|
169
161
|
#
|
170
162
|
allocations
|
@@ -172,7 +164,7 @@ module Query
|
|
172
164
|
def reduce allocations # :nodoc:
|
173
165
|
allocations.reduce_to reduce_to_amount if reduce_to_amount
|
174
166
|
end
|
175
|
-
|
167
|
+
|
176
168
|
#
|
177
169
|
#
|
178
170
|
def remove_from allocations # :nodoc:
|
@@ -183,12 +175,14 @@ module Query
|
|
183
175
|
def identifiers_to_remove # :nodoc:
|
184
176
|
@identifiers_to_remove ||= []
|
185
177
|
end
|
186
|
-
|
178
|
+
|
187
179
|
# Display some nice information for the user.
|
188
180
|
#
|
189
181
|
def to_s
|
190
|
-
s = "#{self.class}"
|
182
|
+
s = "#{self.class}("
|
183
|
+
s << @indexes.indexes.map(&:name).join(', ')
|
191
184
|
s << ", weights: #{@weights}" unless @weights.empty?
|
185
|
+
s << ")"
|
192
186
|
s
|
193
187
|
end
|
194
188
|
|
data/lib/picky/sources/base.rb
CHANGED
@@ -7,12 +7,12 @@
|
|
7
7
|
# * Delicious (http://del.icio.us, online bookmarking service)
|
8
8
|
# See also:
|
9
9
|
# http://github.com/floere/picky/wiki/Sources-Configuration
|
10
|
-
#
|
10
|
+
#
|
11
11
|
# Don't worry if your source isn't here. Adding your own is easy:
|
12
12
|
# http://github.com/floere/picky/wiki/Contributing-sources
|
13
13
|
#
|
14
14
|
module Sources
|
15
|
-
|
15
|
+
|
16
16
|
# Sources are where your data comes from.
|
17
17
|
#
|
18
18
|
# A source has 1 mandatory and 2 optional methods:
|
@@ -24,9 +24,9 @@ module Sources
|
|
24
24
|
# Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
|
25
25
|
#
|
26
26
|
class Base
|
27
|
-
|
27
|
+
|
28
28
|
attr_reader :key_format
|
29
|
-
|
29
|
+
|
30
30
|
# Connect to the backend.
|
31
31
|
#
|
32
32
|
# Called once per index/category combination before harvesting.
|
@@ -37,9 +37,9 @@ module Sources
|
|
37
37
|
# * We open an file with data.
|
38
38
|
#
|
39
39
|
def connect_backend
|
40
|
-
|
40
|
+
|
41
41
|
end
|
42
|
-
|
42
|
+
|
43
43
|
# Called by the indexer when gathering data.
|
44
44
|
#
|
45
45
|
# Yields the data (id, text for id) for the given type and category.
|
@@ -51,7 +51,7 @@ module Sources
|
|
51
51
|
def harvest index, category # :yields: id, text_for_id
|
52
52
|
# This concrete implementation yields "nothing", override in subclasses.
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
# Used to take a snapshot of your data if it is fast changing.
|
56
56
|
#
|
57
57
|
# Called once for each type before harvesting.
|
@@ -60,9 +60,9 @@ module Sources
|
|
60
60
|
# * In a DB source, a table based on the source's select statement is created.
|
61
61
|
#
|
62
62
|
def take_snapshot index
|
63
|
-
|
63
|
+
|
64
64
|
end
|
65
|
-
|
65
|
+
|
66
66
|
end
|
67
|
-
|
67
|
+
|
68
68
|
end
|
data/lib/picky/sources/couch.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module Sources
|
2
|
-
|
2
|
+
|
3
3
|
# Raised when a Couch source is instantiated without a file.
|
4
4
|
#
|
5
5
|
# Example:
|
6
6
|
# Sources::Couch.new(:column1, :column2) # without file option
|
7
7
|
#
|
8
8
|
class NoCouchDBGiven < StandardError; end
|
9
|
-
|
9
|
+
|
10
10
|
# A Couch database source.
|
11
11
|
#
|
12
12
|
# Options:
|
@@ -19,28 +19,32 @@ module Sources
|
|
19
19
|
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
|
20
20
|
#
|
21
21
|
class Couch < Base
|
22
|
-
|
22
|
+
|
23
23
|
#
|
24
24
|
#
|
25
25
|
def initialize *category_names, options
|
26
26
|
check_gem
|
27
|
-
|
27
|
+
|
28
28
|
Hash === options && options[:url] || raise_no_db_given(category_names)
|
29
29
|
@db = RestClient::Resource.new options.delete(:url), options
|
30
30
|
end
|
31
|
-
|
32
|
-
|
31
|
+
|
32
|
+
def to_s
|
33
|
+
self.class.name
|
34
|
+
end
|
35
|
+
|
36
|
+
# Default key format method for couch DB is to_sym.
|
33
37
|
#
|
34
38
|
def key_format
|
35
39
|
:to_sym
|
36
40
|
end
|
37
|
-
|
41
|
+
|
38
42
|
# Tries to require the rest_client gem.
|
39
43
|
#
|
40
44
|
def check_gem # :nodoc:
|
41
45
|
require 'rest_client'
|
42
46
|
rescue LoadError
|
43
|
-
|
47
|
+
warn_gem_missing 'rest-client', 'the CouchDB source'
|
44
48
|
exit 1
|
45
49
|
end
|
46
50
|
|
@@ -55,14 +59,14 @@ module Sources
|
|
55
59
|
yield doc[@@id_key], doc[category_name] || next
|
56
60
|
end
|
57
61
|
end
|
58
|
-
|
62
|
+
|
59
63
|
def get_data &block # :nodoc:
|
60
64
|
resp = @db['_all_docs?include_docs=true'].get
|
61
65
|
JSON.parse(resp)['rows'].
|
62
66
|
map{|row| row['doc']}.
|
63
67
|
each &block
|
64
68
|
end
|
65
|
-
|
69
|
+
|
66
70
|
def raise_no_db_given category_names # :nodoc:
|
67
71
|
raise NoCouchDBGiven.new(category_names.join(', '))
|
68
72
|
end
|
data/lib/picky/sources/csv.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
module Sources
|
2
|
-
|
2
|
+
|
3
3
|
# Raised when a CSV source is instantiated without a file.
|
4
4
|
#
|
5
5
|
# Example:
|
6
6
|
# Sources::CSV.new(:column1, :column2) # without file option
|
7
7
|
#
|
8
8
|
class NoCSVFileGiven < StandardError; end
|
9
|
-
|
9
|
+
|
10
10
|
# Describes a CSV source, a file with comma separated values in it.
|
11
|
-
#
|
11
|
+
#
|
12
12
|
# The first column is implicitly assumed to be the id column.
|
13
13
|
#
|
14
14
|
# It takes the same options as the Ruby 1.9 CSV class.
|
@@ -19,36 +19,43 @@ module Sources
|
|
19
19
|
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
|
20
20
|
#
|
21
21
|
class CSV < Base
|
22
|
-
|
22
|
+
|
23
23
|
# The CSV file's path, relative to PICKY_ROOT.
|
24
24
|
#
|
25
25
|
attr_reader :file_name
|
26
|
-
|
26
|
+
|
27
27
|
# The options that were passed into #new.
|
28
28
|
#
|
29
29
|
attr_reader :csv_options, :key_format
|
30
|
-
|
30
|
+
|
31
31
|
# The data category names.
|
32
32
|
#
|
33
33
|
attr_reader :category_names
|
34
|
-
|
34
|
+
|
35
35
|
def initialize *category_names, options
|
36
36
|
require 'csv'
|
37
37
|
@category_names = category_names
|
38
|
-
|
38
|
+
|
39
39
|
@csv_options = Hash === options && options || {}
|
40
40
|
@file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
|
41
|
-
|
41
|
+
|
42
42
|
key_format = options.delete :key_format
|
43
43
|
@key_format = key_format && key_format.to_sym || :to_i
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
|
+
def to_s
|
47
|
+
parameters = category_names
|
48
|
+
parameters << { file: file_name }
|
49
|
+
parameters << csv_options unless csv_options.empty?
|
50
|
+
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
51
|
+
end
|
52
|
+
|
46
53
|
# Raises a NoCSVFileGiven exception.
|
47
54
|
#
|
48
55
|
def raise_no_file_given category_names # :nodoc:
|
49
56
|
raise NoCSVFileGiven.new(category_names.join(', '))
|
50
57
|
end
|
51
|
-
|
58
|
+
|
52
59
|
# Harvests the data to index.
|
53
60
|
#
|
54
61
|
def harvest _, category
|
@@ -61,13 +68,13 @@ module Sources
|
|
61
68
|
yield indexed_id, text
|
62
69
|
end
|
63
70
|
end
|
64
|
-
|
71
|
+
|
65
72
|
#
|
66
73
|
#
|
67
74
|
def get_data &block # :nodoc:
|
68
75
|
::CSV.foreach file_name, csv_options, &block
|
69
76
|
end
|
70
|
-
|
77
|
+
|
71
78
|
end
|
72
|
-
|
79
|
+
|
73
80
|
end
|
data/lib/picky/sources/db.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Sources
|
2
|
-
|
2
|
+
|
3
3
|
# Describes a database source. Needs a SELECT statement
|
4
4
|
# (with id in it), and a file option or the options from an AR config file.
|
5
5
|
#
|
@@ -15,27 +15,33 @@ module Sources
|
|
15
15
|
# Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
|
16
16
|
#
|
17
17
|
class DB < Base
|
18
|
-
|
18
|
+
|
19
19
|
# The select statement that was passed in.
|
20
20
|
#
|
21
21
|
attr_reader :select_statement
|
22
|
-
|
22
|
+
|
23
23
|
# The database adapter.
|
24
24
|
#
|
25
25
|
attr_reader :database
|
26
|
-
|
26
|
+
|
27
27
|
# The database connection options that were either passed in or loaded from the given file.
|
28
28
|
#
|
29
|
-
attr_reader :connection_options
|
30
|
-
|
29
|
+
attr_reader :connection_options, :options
|
30
|
+
|
31
31
|
@@traversal_id = :__picky_id
|
32
|
-
|
32
|
+
|
33
33
|
def initialize select_statement, options = { file: 'app/db.yml' }
|
34
34
|
@select_statement = select_statement
|
35
35
|
@database = create_database_adapter
|
36
36
|
@options = options
|
37
37
|
end
|
38
|
-
|
38
|
+
|
39
|
+
def to_s
|
40
|
+
parameters = [select_statement.inspect]
|
41
|
+
parameters << options unless options.empty?
|
42
|
+
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
43
|
+
end
|
44
|
+
|
39
45
|
# Creates a database adapter for use with this source.
|
40
46
|
def create_database_adapter # :nodoc:
|
41
47
|
# TODO Do not use ActiveRecord directly.
|
@@ -46,7 +52,7 @@ module Sources
|
|
46
52
|
adapter_class.abstract_class = true
|
47
53
|
adapter_class
|
48
54
|
end
|
49
|
-
|
55
|
+
|
50
56
|
# Configure the backend.
|
51
57
|
#
|
52
58
|
# Options:
|
@@ -63,7 +69,7 @@ module Sources
|
|
63
69
|
end
|
64
70
|
self
|
65
71
|
end
|
66
|
-
|
72
|
+
|
67
73
|
# Connect the backend.
|
68
74
|
#
|
69
75
|
# Will raise unless connection options have been given.
|
@@ -73,64 +79,64 @@ module Sources
|
|
73
79
|
raise "Database backend not configured" unless connection_options
|
74
80
|
database.establish_connection connection_options
|
75
81
|
end
|
76
|
-
|
82
|
+
|
77
83
|
# Take a snapshot of the data.
|
78
84
|
#
|
79
85
|
# Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
|
80
86
|
#
|
81
87
|
def take_snapshot index
|
82
88
|
connect_backend
|
83
|
-
|
89
|
+
|
84
90
|
origin = snapshot_table_name index
|
85
91
|
on_database = database.connection
|
86
|
-
|
92
|
+
|
87
93
|
# Drop the table if it exists.
|
88
94
|
#
|
89
95
|
on_database.drop_table origin if on_database.table_exists?(origin)
|
90
|
-
|
96
|
+
|
91
97
|
# The adapters currently do not support this.
|
92
98
|
#
|
93
99
|
on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
|
94
|
-
|
100
|
+
|
95
101
|
# Add a column that Picky uses to traverse the table's entries.
|
96
102
|
#
|
97
103
|
on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
|
98
|
-
|
104
|
+
|
99
105
|
# Execute any special queries this index needs executed.
|
100
106
|
#
|
101
107
|
on_database.execute index.after_indexing if index.after_indexing
|
102
108
|
end
|
103
|
-
|
109
|
+
|
104
110
|
# Counts all the entries that are used for the index.
|
105
111
|
#
|
106
112
|
def count index
|
107
113
|
connect_backend
|
108
|
-
|
114
|
+
|
109
115
|
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index)}").to_i
|
110
116
|
end
|
111
|
-
|
117
|
+
|
112
118
|
# The name of the snapshot table created by Picky.
|
113
119
|
#
|
114
120
|
def snapshot_table_name index
|
115
121
|
"picky_#{index.name}_index"
|
116
122
|
end
|
117
|
-
|
123
|
+
|
118
124
|
# Harvests the data to index in chunks.
|
119
125
|
#
|
120
126
|
def harvest index, category, &block
|
121
127
|
connect_backend
|
122
|
-
|
128
|
+
|
123
129
|
(0..count(index)).step(chunksize) do |offset|
|
124
130
|
get_data index, category, offset, &block
|
125
131
|
end
|
126
132
|
end
|
127
|
-
|
133
|
+
|
128
134
|
# Gets the data from the backend.
|
129
135
|
#
|
130
136
|
def get_data index, category, offset, &block # :nodoc:
|
131
|
-
|
137
|
+
|
132
138
|
select_statement = harvest_statement_with_offset index, category, offset
|
133
|
-
|
139
|
+
|
134
140
|
# TODO Rewrite ASAP.
|
135
141
|
#
|
136
142
|
if database.connection.adapter_name == "PostgreSQL"
|
@@ -146,29 +152,29 @@ module Sources
|
|
146
152
|
end
|
147
153
|
end
|
148
154
|
end
|
149
|
-
|
155
|
+
|
150
156
|
# Builds a harvest statement for getting data to index.
|
151
157
|
#
|
152
158
|
def harvest_statement_with_offset index, category, offset
|
153
159
|
statement = harvest_statement index, category
|
154
|
-
|
160
|
+
|
155
161
|
statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
|
156
|
-
|
162
|
+
|
157
163
|
"#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
|
158
164
|
end
|
159
|
-
|
165
|
+
|
160
166
|
# The harvest statement used to pull data from the snapshot table.
|
161
167
|
#
|
162
168
|
def harvest_statement index, category
|
163
169
|
"SELECT id, #{category.from} FROM #{snapshot_table_name(index)} st"
|
164
170
|
end
|
165
|
-
|
171
|
+
|
166
172
|
# The amount of records that are loaded each chunk.
|
167
173
|
#
|
168
174
|
def chunksize
|
169
175
|
25_000
|
170
176
|
end
|
171
|
-
|
177
|
+
|
172
178
|
end
|
173
|
-
|
179
|
+
|
174
180
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Sources
|
2
|
-
|
2
|
+
|
3
3
|
# Describes a Delicious (http://deli.cio.us) source.
|
4
4
|
#
|
5
5
|
# This source has a fixed set of categories:
|
@@ -11,7 +11,7 @@ module Sources
|
|
11
11
|
# Sources::CSV.new('usrnam', 'paswrd')
|
12
12
|
#
|
13
13
|
class Delicious < Base
|
14
|
-
|
14
|
+
|
15
15
|
def initialize username, password
|
16
16
|
check_gem
|
17
17
|
@username = username
|
@@ -20,21 +20,24 @@ module Sources
|
|
20
20
|
def check_gem # :nodoc:
|
21
21
|
require 'www/delicious'
|
22
22
|
rescue LoadError
|
23
|
-
|
23
|
+
warn_gem_missing 'www-delicious', 'the delicious source'
|
24
24
|
exit 1
|
25
25
|
end
|
26
|
-
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
self.class.name
|
29
|
+
end
|
30
|
+
|
27
31
|
# Harvests the data to index.
|
28
32
|
#
|
29
33
|
def harvest _, category
|
30
34
|
get_data do |indexed_id, data|
|
31
35
|
text = data[category.from]
|
32
36
|
next unless text
|
33
|
-
text.force_encoding 'utf-8' # TODO Still needed?
|
34
37
|
yield indexed_id, text
|
35
38
|
end
|
36
39
|
end
|
37
|
-
|
40
|
+
|
38
41
|
#
|
39
42
|
#
|
40
43
|
def get_data # :nodoc:
|
@@ -50,7 +53,7 @@ module Sources
|
|
50
53
|
yield @generated_id, data
|
51
54
|
end
|
52
55
|
end
|
53
|
-
|
56
|
+
|
54
57
|
end
|
55
|
-
|
58
|
+
|
56
59
|
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
|
4
|
+
# Gathers different statistics
|
5
|
+
# when methods are called.
|
6
|
+
#
|
7
|
+
# Can be output using to_s.
|
8
|
+
#
|
9
|
+
class Statistics
|
10
|
+
|
11
|
+
def self.instance
|
12
|
+
@statistics ||= new
|
13
|
+
end
|
14
|
+
|
15
|
+
def preamble
|
16
|
+
loc = lines_of_code File.open('app/application.rb').read
|
17
|
+
|
18
|
+
@preamble ||= <<-PREAMBLE
|
19
|
+
\033[1mApplication(s)\033[m
|
20
|
+
Definition LOC: #{"%4d" % loc}
|
21
|
+
Indexes defined: #{"%4d" % Indexes.size}
|
22
|
+
PREAMBLE
|
23
|
+
end
|
24
|
+
|
25
|
+
# Gathers information about the application.
|
26
|
+
#
|
27
|
+
def application
|
28
|
+
preamble
|
29
|
+
@application = Application.apps.map &:indented_to_s
|
30
|
+
end
|
31
|
+
|
32
|
+
# Gathers information about the indexes.
|
33
|
+
#
|
34
|
+
def analyze
|
35
|
+
preamble
|
36
|
+
|
37
|
+
@indexes = ["\033[1mIndexes analysis\033[m:"]
|
38
|
+
Indexes.analyze.each_pair do |name, index|
|
39
|
+
@indexes << <<-ANALYSIS
|
40
|
+
#{"#{name}:".indented_to_s}:
|
41
|
+
#{"exact:\n#{index[:exact].indented_to_s}".indented_to_s(4)}
|
42
|
+
#{"partial*:\n#{index[:partial].indented_to_s}".indented_to_s(4)}
|
43
|
+
ANALYSIS
|
44
|
+
end
|
45
|
+
@indexes = @indexes.join "\n"
|
46
|
+
end
|
47
|
+
|
48
|
+
# Outputs all gathered statistics.
|
49
|
+
#
|
50
|
+
def to_s
|
51
|
+
<<-STATS
|
52
|
+
|
53
|
+
Picky Configuration:
|
54
|
+
|
55
|
+
#{[@preamble, @application, @indexes].compact.join("\n")}
|
56
|
+
STATS
|
57
|
+
end
|
58
|
+
|
59
|
+
# Internal methods.
|
60
|
+
#
|
61
|
+
|
62
|
+
def lines_of_code text
|
63
|
+
text.scan(/^\s*[^#\s].*$/).size
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
data/lib/tasks/application.rake
CHANGED