picky 1.5.2 → 1.5.3
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/analyzer.rb +154 -0
- data/lib/picky/application.rb +53 -33
- data/lib/picky/character_substituters/west_european.rb +10 -6
- data/lib/picky/cli.rb +18 -18
- data/lib/picky/index/base.rb +44 -13
- data/lib/picky/index_bundle.rb +13 -4
- data/lib/picky/indexed/indexes.rb +26 -10
- data/lib/picky/indexing/indexes.rb +26 -24
- data/lib/picky/interfaces/live_parameters.rb +23 -16
- data/lib/picky/internals/extensions/object.rb +13 -6
- data/lib/picky/internals/frontend_adapters/rack.rb +30 -34
- data/lib/picky/internals/index/backend.rb +1 -2
- data/lib/picky/internals/index/file/basic.rb +18 -14
- data/lib/picky/internals/index/files.rb +16 -6
- data/lib/picky/internals/index/redis/basic.rb +12 -5
- data/lib/picky/internals/index/redis.rb +2 -2
- data/lib/picky/internals/indexed/bundle/base.rb +58 -14
- data/lib/picky/internals/indexed/bundle/memory.rb +40 -14
- data/lib/picky/internals/indexed/bundle/redis.rb +9 -30
- data/lib/picky/internals/indexed/categories.rb +19 -14
- data/lib/picky/internals/indexed/category.rb +44 -20
- data/lib/picky/internals/indexed/index.rb +23 -13
- data/lib/picky/internals/indexed/wrappers/bundle/wrapper.rb +27 -9
- data/lib/picky/internals/indexers/serial.rb +1 -1
- data/lib/picky/internals/indexing/bundle/base.rb +28 -28
- data/lib/picky/internals/indexing/bundle/memory.rb +14 -7
- data/lib/picky/internals/indexing/categories.rb +15 -11
- data/lib/picky/internals/indexing/category.rb +30 -20
- data/lib/picky/internals/indexing/index.rb +22 -14
- data/lib/picky/internals/query/allocations.rb +0 -15
- data/lib/picky/internals/query/combinations/base.rb +0 -4
- data/lib/picky/internals/query/combinations/redis.rb +19 -8
- data/lib/picky/internals/query/indexes.rb +3 -6
- data/lib/picky/internals/query/token.rb +0 -4
- data/lib/picky/internals/query/weights.rb +2 -11
- data/lib/picky/internals/results/base.rb +3 -10
- data/lib/picky/internals/tokenizers/base.rb +64 -28
- data/lib/picky/internals/tokenizers/index.rb +8 -8
- data/lib/picky/loader.rb +59 -53
- data/lib/picky/query/base.rb +23 -29
- data/lib/picky/sources/base.rb +10 -10
- data/lib/picky/sources/couch.rb +14 -10
- data/lib/picky/sources/csv.rb +21 -14
- data/lib/picky/sources/db.rb +37 -31
- data/lib/picky/sources/delicious.rb +11 -8
- data/lib/picky/sources/wrappers/base.rb +3 -1
- data/lib/picky/statistics.rb +66 -0
- data/lib/tasks/application.rake +3 -0
- data/lib/tasks/checks.rake +11 -0
- data/lib/tasks/framework.rake +3 -0
- data/lib/tasks/index.rake +9 -11
- data/lib/tasks/routes.rake +3 -2
- data/lib/tasks/shortcuts.rake +17 -5
- data/lib/tasks/statistics.rake +20 -12
- data/lib/tasks/try.rake +14 -14
- data/spec/lib/application_spec.rb +3 -3
- data/spec/lib/index/base_spec.rb +25 -3
- data/spec/lib/internals/extensions/object_spec.rb +46 -20
- data/spec/lib/internals/frontend_adapters/rack_spec.rb +3 -3
- data/spec/lib/internals/index/redis/basic_spec.rb +67 -0
- data/spec/lib/internals/indexers/serial_spec.rb +1 -1
- data/spec/lib/internals/results/base_spec.rb +0 -12
- data/spec/lib/internals/tokenizers/base_spec.rb +49 -1
- data/spec/lib/query/allocations_spec.rb +0 -56
- data/spec/lib/query/base_spec.rb +25 -21
- data/spec/lib/query/combinations/redis_spec.rb +6 -1
- data/spec/lib/sources/delicious_spec.rb +2 -2
- data/spec/lib/statistics_spec.rb +31 -0
- metadata +9 -2
data/lib/picky/query/base.rb
CHANGED
@@ -20,12 +20,13 @@ module Query
|
|
20
20
|
# Not directly instantiated. However, its methods are used by its subclasses, Full and Live.
|
21
21
|
#
|
22
22
|
class Base
|
23
|
-
|
23
|
+
|
24
24
|
include Helpers::Measuring
|
25
|
-
|
25
|
+
|
26
|
+
attr_reader :indexes
|
26
27
|
attr_writer :tokenizer, :identifiers_to_remove
|
27
28
|
attr_accessor :reduce_to_amount, :weights
|
28
|
-
|
29
|
+
|
29
30
|
# Takes:
|
30
31
|
# * A number of indexes
|
31
32
|
# * Options hash (optional) with:
|
@@ -40,7 +41,7 @@ module Query
|
|
40
41
|
weights = options[:weights] || Weights.new
|
41
42
|
@weights = Hash === weights ? Weights.new(weights) : weights
|
42
43
|
end
|
43
|
-
|
44
|
+
|
44
45
|
# Returns the right combinations strategy for
|
45
46
|
# a number of query indexes.
|
46
47
|
#
|
@@ -73,7 +74,7 @@ module Query
|
|
73
74
|
def raise_different index_types
|
74
75
|
raise DifferentTypesError.new(index_types)
|
75
76
|
end
|
76
|
-
|
77
|
+
|
77
78
|
# This is the main entry point for a query.
|
78
79
|
# Use this in specs and also for running queries.
|
79
80
|
#
|
@@ -86,22 +87,22 @@ module Query
|
|
86
87
|
def search_with_text text, offset = 0
|
87
88
|
search tokenized(text), offset
|
88
89
|
end
|
89
|
-
|
90
|
+
|
90
91
|
# Runs the actual search using Query::Tokens.
|
91
92
|
#
|
92
93
|
# Note: Internal method, use #search_with_text.
|
93
94
|
#
|
94
95
|
def search tokens, offset = 0
|
95
96
|
results = nil
|
96
|
-
|
97
|
+
|
97
98
|
duration = timed do
|
98
|
-
results = execute
|
99
|
+
results = execute tokens, offset
|
99
100
|
end
|
100
101
|
results.duration = duration.round 6
|
101
|
-
|
102
|
+
|
102
103
|
results
|
103
104
|
end
|
104
|
-
|
105
|
+
|
105
106
|
# Execute a search using Query::Tokens.
|
106
107
|
#
|
107
108
|
# Note: Internal method, use #search_with_text.
|
@@ -109,16 +110,7 @@ module Query
|
|
109
110
|
def execute tokens, offset
|
110
111
|
result_type.from offset, sorted_allocations(tokens)
|
111
112
|
end
|
112
|
-
|
113
|
-
# Returns an empty result with default values.
|
114
|
-
#
|
115
|
-
# Parameters:
|
116
|
-
# * offset = 0: _optional_ The offset to use for the empty results.
|
117
|
-
#
|
118
|
-
def empty_results offset = 0
|
119
|
-
result_type.new offset
|
120
|
-
end
|
121
|
-
|
113
|
+
|
122
114
|
# Delegates the tokenizing to the query tokenizer.
|
123
115
|
#
|
124
116
|
# Parameters:
|
@@ -127,7 +119,7 @@ module Query
|
|
127
119
|
def tokenized text
|
128
120
|
@tokenizer.tokenize text
|
129
121
|
end
|
130
|
-
|
122
|
+
|
131
123
|
# Gets sorted allocations for the tokens.
|
132
124
|
#
|
133
125
|
# This generates the possible allocations, sorted.
|
@@ -144,27 +136,27 @@ module Query
|
|
144
136
|
# TODO uniq, score, sort in there
|
145
137
|
#
|
146
138
|
allocations = @indexes.allocations_for tokens
|
147
|
-
|
139
|
+
|
148
140
|
# Callbacks.
|
149
141
|
#
|
150
142
|
# TODO Reduce before sort?
|
151
143
|
#
|
152
144
|
reduce allocations
|
153
145
|
remove_from allocations
|
154
|
-
|
146
|
+
|
155
147
|
# Remove double allocations.
|
156
148
|
#
|
157
149
|
allocations.uniq
|
158
|
-
|
150
|
+
|
159
151
|
# Score the allocations using weights as bias.
|
160
152
|
#
|
161
153
|
allocations.calculate_score weights
|
162
|
-
|
154
|
+
|
163
155
|
# Sort the allocations.
|
164
156
|
# (allocations are sorted according to score, highest to lowest)
|
165
157
|
#
|
166
158
|
allocations.sort
|
167
|
-
|
159
|
+
|
168
160
|
# Return the allocations.
|
169
161
|
#
|
170
162
|
allocations
|
@@ -172,7 +164,7 @@ module Query
|
|
172
164
|
def reduce allocations # :nodoc:
|
173
165
|
allocations.reduce_to reduce_to_amount if reduce_to_amount
|
174
166
|
end
|
175
|
-
|
167
|
+
|
176
168
|
#
|
177
169
|
#
|
178
170
|
def remove_from allocations # :nodoc:
|
@@ -183,12 +175,14 @@ module Query
|
|
183
175
|
def identifiers_to_remove # :nodoc:
|
184
176
|
@identifiers_to_remove ||= []
|
185
177
|
end
|
186
|
-
|
178
|
+
|
187
179
|
# Display some nice information for the user.
|
188
180
|
#
|
189
181
|
def to_s
|
190
|
-
s = "#{self.class}"
|
182
|
+
s = "#{self.class}("
|
183
|
+
s << @indexes.indexes.map(&:name).join(', ')
|
191
184
|
s << ", weights: #{@weights}" unless @weights.empty?
|
185
|
+
s << ")"
|
192
186
|
s
|
193
187
|
end
|
194
188
|
|
data/lib/picky/sources/base.rb
CHANGED
@@ -7,12 +7,12 @@
|
|
7
7
|
# * Delicious (http://del.icio.us, online bookmarking service)
|
8
8
|
# See also:
|
9
9
|
# http://github.com/floere/picky/wiki/Sources-Configuration
|
10
|
-
#
|
10
|
+
#
|
11
11
|
# Don't worry if your source isn't here. Adding your own is easy:
|
12
12
|
# http://github.com/floere/picky/wiki/Contributing-sources
|
13
13
|
#
|
14
14
|
module Sources
|
15
|
-
|
15
|
+
|
16
16
|
# Sources are where your data comes from.
|
17
17
|
#
|
18
18
|
# A source has 1 mandatory and 2 optional methods:
|
@@ -24,9 +24,9 @@ module Sources
|
|
24
24
|
# Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
|
25
25
|
#
|
26
26
|
class Base
|
27
|
-
|
27
|
+
|
28
28
|
attr_reader :key_format
|
29
|
-
|
29
|
+
|
30
30
|
# Connect to the backend.
|
31
31
|
#
|
32
32
|
# Called once per index/category combination before harvesting.
|
@@ -37,9 +37,9 @@ module Sources
|
|
37
37
|
# * We open an file with data.
|
38
38
|
#
|
39
39
|
def connect_backend
|
40
|
-
|
40
|
+
|
41
41
|
end
|
42
|
-
|
42
|
+
|
43
43
|
# Called by the indexer when gathering data.
|
44
44
|
#
|
45
45
|
# Yields the data (id, text for id) for the given type and category.
|
@@ -51,7 +51,7 @@ module Sources
|
|
51
51
|
def harvest index, category # :yields: id, text_for_id
|
52
52
|
# This concrete implementation yields "nothing", override in subclasses.
|
53
53
|
end
|
54
|
-
|
54
|
+
|
55
55
|
# Used to take a snapshot of your data if it is fast changing.
|
56
56
|
#
|
57
57
|
# Called once for each type before harvesting.
|
@@ -60,9 +60,9 @@ module Sources
|
|
60
60
|
# * In a DB source, a table based on the source's select statement is created.
|
61
61
|
#
|
62
62
|
def take_snapshot index
|
63
|
-
|
63
|
+
|
64
64
|
end
|
65
|
-
|
65
|
+
|
66
66
|
end
|
67
|
-
|
67
|
+
|
68
68
|
end
|
data/lib/picky/sources/couch.rb
CHANGED
@@ -1,12 +1,12 @@
|
|
1
1
|
module Sources
|
2
|
-
|
2
|
+
|
3
3
|
# Raised when a Couch source is instantiated without a file.
|
4
4
|
#
|
5
5
|
# Example:
|
6
6
|
# Sources::Couch.new(:column1, :column2) # without file option
|
7
7
|
#
|
8
8
|
class NoCouchDBGiven < StandardError; end
|
9
|
-
|
9
|
+
|
10
10
|
# A Couch database source.
|
11
11
|
#
|
12
12
|
# Options:
|
@@ -19,28 +19,32 @@ module Sources
|
|
19
19
|
# Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
|
20
20
|
#
|
21
21
|
class Couch < Base
|
22
|
-
|
22
|
+
|
23
23
|
#
|
24
24
|
#
|
25
25
|
def initialize *category_names, options
|
26
26
|
check_gem
|
27
|
-
|
27
|
+
|
28
28
|
Hash === options && options[:url] || raise_no_db_given(category_names)
|
29
29
|
@db = RestClient::Resource.new options.delete(:url), options
|
30
30
|
end
|
31
|
-
|
32
|
-
|
31
|
+
|
32
|
+
def to_s
|
33
|
+
self.class.name
|
34
|
+
end
|
35
|
+
|
36
|
+
# Default key format method for couch DB is to_sym.
|
33
37
|
#
|
34
38
|
def key_format
|
35
39
|
:to_sym
|
36
40
|
end
|
37
|
-
|
41
|
+
|
38
42
|
# Tries to require the rest_client gem.
|
39
43
|
#
|
40
44
|
def check_gem # :nodoc:
|
41
45
|
require 'rest_client'
|
42
46
|
rescue LoadError
|
43
|
-
|
47
|
+
warn_gem_missing 'rest-client', 'the CouchDB source'
|
44
48
|
exit 1
|
45
49
|
end
|
46
50
|
|
@@ -55,14 +59,14 @@ module Sources
|
|
55
59
|
yield doc[@@id_key], doc[category_name] || next
|
56
60
|
end
|
57
61
|
end
|
58
|
-
|
62
|
+
|
59
63
|
def get_data &block # :nodoc:
|
60
64
|
resp = @db['_all_docs?include_docs=true'].get
|
61
65
|
JSON.parse(resp)['rows'].
|
62
66
|
map{|row| row['doc']}.
|
63
67
|
each &block
|
64
68
|
end
|
65
|
-
|
69
|
+
|
66
70
|
def raise_no_db_given category_names # :nodoc:
|
67
71
|
raise NoCouchDBGiven.new(category_names.join(', '))
|
68
72
|
end
|
data/lib/picky/sources/csv.rb
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
module Sources
|
2
|
-
|
2
|
+
|
3
3
|
# Raised when a CSV source is instantiated without a file.
|
4
4
|
#
|
5
5
|
# Example:
|
6
6
|
# Sources::CSV.new(:column1, :column2) # without file option
|
7
7
|
#
|
8
8
|
class NoCSVFileGiven < StandardError; end
|
9
|
-
|
9
|
+
|
10
10
|
# Describes a CSV source, a file with comma separated values in it.
|
11
|
-
#
|
11
|
+
#
|
12
12
|
# The first column is implicitly assumed to be the id column.
|
13
13
|
#
|
14
14
|
# It takes the same options as the Ruby 1.9 CSV class.
|
@@ -19,36 +19,43 @@ module Sources
|
|
19
19
|
# Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
|
20
20
|
#
|
21
21
|
class CSV < Base
|
22
|
-
|
22
|
+
|
23
23
|
# The CSV file's path, relative to PICKY_ROOT.
|
24
24
|
#
|
25
25
|
attr_reader :file_name
|
26
|
-
|
26
|
+
|
27
27
|
# The options that were passed into #new.
|
28
28
|
#
|
29
29
|
attr_reader :csv_options, :key_format
|
30
|
-
|
30
|
+
|
31
31
|
# The data category names.
|
32
32
|
#
|
33
33
|
attr_reader :category_names
|
34
|
-
|
34
|
+
|
35
35
|
def initialize *category_names, options
|
36
36
|
require 'csv'
|
37
37
|
@category_names = category_names
|
38
|
-
|
38
|
+
|
39
39
|
@csv_options = Hash === options && options || {}
|
40
40
|
@file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
|
41
|
-
|
41
|
+
|
42
42
|
key_format = options.delete :key_format
|
43
43
|
@key_format = key_format && key_format.to_sym || :to_i
|
44
44
|
end
|
45
|
-
|
45
|
+
|
46
|
+
def to_s
|
47
|
+
parameters = category_names
|
48
|
+
parameters << { file: file_name }
|
49
|
+
parameters << csv_options unless csv_options.empty?
|
50
|
+
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
51
|
+
end
|
52
|
+
|
46
53
|
# Raises a NoCSVFileGiven exception.
|
47
54
|
#
|
48
55
|
def raise_no_file_given category_names # :nodoc:
|
49
56
|
raise NoCSVFileGiven.new(category_names.join(', '))
|
50
57
|
end
|
51
|
-
|
58
|
+
|
52
59
|
# Harvests the data to index.
|
53
60
|
#
|
54
61
|
def harvest _, category
|
@@ -61,13 +68,13 @@ module Sources
|
|
61
68
|
yield indexed_id, text
|
62
69
|
end
|
63
70
|
end
|
64
|
-
|
71
|
+
|
65
72
|
#
|
66
73
|
#
|
67
74
|
def get_data &block # :nodoc:
|
68
75
|
::CSV.foreach file_name, csv_options, &block
|
69
76
|
end
|
70
|
-
|
77
|
+
|
71
78
|
end
|
72
|
-
|
79
|
+
|
73
80
|
end
|
data/lib/picky/sources/db.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
module Sources
|
2
|
-
|
2
|
+
|
3
3
|
# Describes a database source. Needs a SELECT statement
|
4
4
|
# (with id in it), and a file option or the options from an AR config file.
|
5
5
|
#
|
@@ -15,27 +15,33 @@ module Sources
|
|
15
15
|
# Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
|
16
16
|
#
|
17
17
|
class DB < Base
|
18
|
-
|
18
|
+
|
19
19
|
# The select statement that was passed in.
|
20
20
|
#
|
21
21
|
attr_reader :select_statement
|
22
|
-
|
22
|
+
|
23
23
|
# The database adapter.
|
24
24
|
#
|
25
25
|
attr_reader :database
|
26
|
-
|
26
|
+
|
27
27
|
# The database connection options that were either passed in or loaded from the given file.
|
28
28
|
#
|
29
|
-
attr_reader :connection_options
|
30
|
-
|
29
|
+
attr_reader :connection_options, :options
|
30
|
+
|
31
31
|
@@traversal_id = :__picky_id
|
32
|
-
|
32
|
+
|
33
33
|
def initialize select_statement, options = { file: 'app/db.yml' }
|
34
34
|
@select_statement = select_statement
|
35
35
|
@database = create_database_adapter
|
36
36
|
@options = options
|
37
37
|
end
|
38
|
-
|
38
|
+
|
39
|
+
def to_s
|
40
|
+
parameters = [select_statement.inspect]
|
41
|
+
parameters << options unless options.empty?
|
42
|
+
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
43
|
+
end
|
44
|
+
|
39
45
|
# Creates a database adapter for use with this source.
|
40
46
|
def create_database_adapter # :nodoc:
|
41
47
|
# TODO Do not use ActiveRecord directly.
|
@@ -46,7 +52,7 @@ module Sources
|
|
46
52
|
adapter_class.abstract_class = true
|
47
53
|
adapter_class
|
48
54
|
end
|
49
|
-
|
55
|
+
|
50
56
|
# Configure the backend.
|
51
57
|
#
|
52
58
|
# Options:
|
@@ -63,7 +69,7 @@ module Sources
|
|
63
69
|
end
|
64
70
|
self
|
65
71
|
end
|
66
|
-
|
72
|
+
|
67
73
|
# Connect the backend.
|
68
74
|
#
|
69
75
|
# Will raise unless connection options have been given.
|
@@ -73,64 +79,64 @@ module Sources
|
|
73
79
|
raise "Database backend not configured" unless connection_options
|
74
80
|
database.establish_connection connection_options
|
75
81
|
end
|
76
|
-
|
82
|
+
|
77
83
|
# Take a snapshot of the data.
|
78
84
|
#
|
79
85
|
# Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
|
80
86
|
#
|
81
87
|
def take_snapshot index
|
82
88
|
connect_backend
|
83
|
-
|
89
|
+
|
84
90
|
origin = snapshot_table_name index
|
85
91
|
on_database = database.connection
|
86
|
-
|
92
|
+
|
87
93
|
# Drop the table if it exists.
|
88
94
|
#
|
89
95
|
on_database.drop_table origin if on_database.table_exists?(origin)
|
90
|
-
|
96
|
+
|
91
97
|
# The adapters currently do not support this.
|
92
98
|
#
|
93
99
|
on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
|
94
|
-
|
100
|
+
|
95
101
|
# Add a column that Picky uses to traverse the table's entries.
|
96
102
|
#
|
97
103
|
on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
|
98
|
-
|
104
|
+
|
99
105
|
# Execute any special queries this index needs executed.
|
100
106
|
#
|
101
107
|
on_database.execute index.after_indexing if index.after_indexing
|
102
108
|
end
|
103
|
-
|
109
|
+
|
104
110
|
# Counts all the entries that are used for the index.
|
105
111
|
#
|
106
112
|
def count index
|
107
113
|
connect_backend
|
108
|
-
|
114
|
+
|
109
115
|
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index)}").to_i
|
110
116
|
end
|
111
|
-
|
117
|
+
|
112
118
|
# The name of the snapshot table created by Picky.
|
113
119
|
#
|
114
120
|
def snapshot_table_name index
|
115
121
|
"picky_#{index.name}_index"
|
116
122
|
end
|
117
|
-
|
123
|
+
|
118
124
|
# Harvests the data to index in chunks.
|
119
125
|
#
|
120
126
|
def harvest index, category, &block
|
121
127
|
connect_backend
|
122
|
-
|
128
|
+
|
123
129
|
(0..count(index)).step(chunksize) do |offset|
|
124
130
|
get_data index, category, offset, &block
|
125
131
|
end
|
126
132
|
end
|
127
|
-
|
133
|
+
|
128
134
|
# Gets the data from the backend.
|
129
135
|
#
|
130
136
|
def get_data index, category, offset, &block # :nodoc:
|
131
|
-
|
137
|
+
|
132
138
|
select_statement = harvest_statement_with_offset index, category, offset
|
133
|
-
|
139
|
+
|
134
140
|
# TODO Rewrite ASAP.
|
135
141
|
#
|
136
142
|
if database.connection.adapter_name == "PostgreSQL"
|
@@ -146,29 +152,29 @@ module Sources
|
|
146
152
|
end
|
147
153
|
end
|
148
154
|
end
|
149
|
-
|
155
|
+
|
150
156
|
# Builds a harvest statement for getting data to index.
|
151
157
|
#
|
152
158
|
def harvest_statement_with_offset index, category, offset
|
153
159
|
statement = harvest_statement index, category
|
154
|
-
|
160
|
+
|
155
161
|
statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
|
156
|
-
|
162
|
+
|
157
163
|
"#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
|
158
164
|
end
|
159
|
-
|
165
|
+
|
160
166
|
# The harvest statement used to pull data from the snapshot table.
|
161
167
|
#
|
162
168
|
def harvest_statement index, category
|
163
169
|
"SELECT id, #{category.from} FROM #{snapshot_table_name(index)} st"
|
164
170
|
end
|
165
|
-
|
171
|
+
|
166
172
|
# The amount of records that are loaded each chunk.
|
167
173
|
#
|
168
174
|
def chunksize
|
169
175
|
25_000
|
170
176
|
end
|
171
|
-
|
177
|
+
|
172
178
|
end
|
173
|
-
|
179
|
+
|
174
180
|
end
|
@@ -1,5 +1,5 @@
|
|
1
1
|
module Sources
|
2
|
-
|
2
|
+
|
3
3
|
# Describes a Delicious (http://deli.cio.us) source.
|
4
4
|
#
|
5
5
|
# This source has a fixed set of categories:
|
@@ -11,7 +11,7 @@ module Sources
|
|
11
11
|
# Sources::CSV.new('usrnam', 'paswrd')
|
12
12
|
#
|
13
13
|
class Delicious < Base
|
14
|
-
|
14
|
+
|
15
15
|
def initialize username, password
|
16
16
|
check_gem
|
17
17
|
@username = username
|
@@ -20,21 +20,24 @@ module Sources
|
|
20
20
|
def check_gem # :nodoc:
|
21
21
|
require 'www/delicious'
|
22
22
|
rescue LoadError
|
23
|
-
|
23
|
+
warn_gem_missing 'www-delicious', 'the delicious source'
|
24
24
|
exit 1
|
25
25
|
end
|
26
|
-
|
26
|
+
|
27
|
+
def to_s
|
28
|
+
self.class.name
|
29
|
+
end
|
30
|
+
|
27
31
|
# Harvests the data to index.
|
28
32
|
#
|
29
33
|
def harvest _, category
|
30
34
|
get_data do |indexed_id, data|
|
31
35
|
text = data[category.from]
|
32
36
|
next unless text
|
33
|
-
text.force_encoding 'utf-8' # TODO Still needed?
|
34
37
|
yield indexed_id, text
|
35
38
|
end
|
36
39
|
end
|
37
|
-
|
40
|
+
|
38
41
|
#
|
39
42
|
#
|
40
43
|
def get_data # :nodoc:
|
@@ -50,7 +53,7 @@ module Sources
|
|
50
53
|
yield @generated_id, data
|
51
54
|
end
|
52
55
|
end
|
53
|
-
|
56
|
+
|
54
57
|
end
|
55
|
-
|
58
|
+
|
56
59
|
end
|
@@ -0,0 +1,66 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
#
|
3
|
+
|
4
|
+
# Gathers different statistics
|
5
|
+
# when methods are called.
|
6
|
+
#
|
7
|
+
# Can be output using to_s.
|
8
|
+
#
|
9
|
+
class Statistics
|
10
|
+
|
11
|
+
def self.instance
|
12
|
+
@statistics ||= new
|
13
|
+
end
|
14
|
+
|
15
|
+
def preamble
|
16
|
+
loc = lines_of_code File.open('app/application.rb').read
|
17
|
+
|
18
|
+
@preamble ||= <<-PREAMBLE
|
19
|
+
\033[1mApplication(s)\033[m
|
20
|
+
Definition LOC: #{"%4d" % loc}
|
21
|
+
Indexes defined: #{"%4d" % Indexes.size}
|
22
|
+
PREAMBLE
|
23
|
+
end
|
24
|
+
|
25
|
+
# Gathers information about the application.
|
26
|
+
#
|
27
|
+
def application
|
28
|
+
preamble
|
29
|
+
@application = Application.apps.map &:indented_to_s
|
30
|
+
end
|
31
|
+
|
32
|
+
# Gathers information about the indexes.
|
33
|
+
#
|
34
|
+
def analyze
|
35
|
+
preamble
|
36
|
+
|
37
|
+
@indexes = ["\033[1mIndexes analysis\033[m:"]
|
38
|
+
Indexes.analyze.each_pair do |name, index|
|
39
|
+
@indexes << <<-ANALYSIS
|
40
|
+
#{"#{name}:".indented_to_s}:
|
41
|
+
#{"exact:\n#{index[:exact].indented_to_s}".indented_to_s(4)}
|
42
|
+
#{"partial*:\n#{index[:partial].indented_to_s}".indented_to_s(4)}
|
43
|
+
ANALYSIS
|
44
|
+
end
|
45
|
+
@indexes = @indexes.join "\n"
|
46
|
+
end
|
47
|
+
|
48
|
+
# Outputs all gathered statistics.
|
49
|
+
#
|
50
|
+
def to_s
|
51
|
+
<<-STATS
|
52
|
+
|
53
|
+
Picky Configuration:
|
54
|
+
|
55
|
+
#{[@preamble, @application, @indexes].compact.join("\n")}
|
56
|
+
STATS
|
57
|
+
end
|
58
|
+
|
59
|
+
# Internal methods.
|
60
|
+
#
|
61
|
+
|
62
|
+
def lines_of_code text
|
63
|
+
text.scan(/^\s*[^#\s].*$/).size
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
data/lib/tasks/application.rake
CHANGED