picky 2.7.0 → 3.0.0.pre1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
data/lib/picky/sources/db.rb
CHANGED
@@ -1,171 +1,175 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
# Describes a database source. Needs a SELECT statement
|
4
|
-
# (with id in it), and a file option or the options from an AR config file.
|
5
|
-
#
|
6
|
-
# The select statement can be as complicated as you want,
|
7
|
-
# as long as it has an id in it and as long as it can be
|
8
|
-
# used in a CREATE TABLE AS statement.
|
9
|
-
# (working on that last one)
|
10
|
-
#
|
11
|
-
# Examples:
|
12
|
-
# Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
|
13
|
-
# Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
|
14
|
-
# Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
|
15
|
-
# Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
|
16
|
-
#
|
17
|
-
class DB < Base
|
18
|
-
|
19
|
-
# The select statement that was passed in.
|
20
|
-
#
|
21
|
-
attr_reader :select_statement
|
1
|
+
module Picky
|
22
2
|
|
23
|
-
|
24
|
-
#
|
25
|
-
attr_reader :database
|
3
|
+
module Sources
|
26
4
|
|
27
|
-
#
|
5
|
+
# Describes a database source. Needs a SELECT statement
|
6
|
+
# (with id in it), and a file option or the options from an AR config file.
|
28
7
|
#
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
8
|
+
# The select statement can be as complicated as you want,
|
9
|
+
# as long as it has an id in it and as long as it can be
|
10
|
+
# used in a CREATE TABLE AS statement.
|
11
|
+
# (working on that last one)
|
12
|
+
#
|
13
|
+
# Examples:
|
14
|
+
# Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
|
15
|
+
# Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
|
16
|
+
# Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
|
17
|
+
# Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
|
18
|
+
#
|
19
|
+
class DB < Base
|
38
20
|
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
43
|
-
end
|
21
|
+
# The select statement that was passed in.
|
22
|
+
#
|
23
|
+
attr_reader :select_statement
|
44
24
|
|
45
|
-
|
46
|
-
def create_database_adapter # :nodoc:
|
47
|
-
# TODO Do not use ActiveRecord directly.
|
25
|
+
# The database adapter.
|
48
26
|
#
|
49
|
-
|
27
|
+
attr_reader :database
|
28
|
+
|
29
|
+
# The database connection options that were either passed in or loaded from the given file.
|
50
30
|
#
|
51
|
-
|
52
|
-
adapter_class.abstract_class = true
|
53
|
-
adapter_class
|
54
|
-
end
|
31
|
+
attr_reader :connection_options, :options
|
55
32
|
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
# * The configuration as a hash.
|
63
|
-
#
|
64
|
-
def configure options # :nodoc:
|
65
|
-
@connection_options = if filename = options[:file]
|
66
|
-
File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
|
67
|
-
else
|
68
|
-
options
|
33
|
+
@@traversal_id = :__picky_id
|
34
|
+
|
35
|
+
def initialize select_statement, options = { file: 'app/db.yml' }
|
36
|
+
@select_statement = select_statement
|
37
|
+
@database = create_database_adapter
|
38
|
+
@options = options
|
69
39
|
end
|
70
|
-
self
|
71
|
-
end
|
72
40
|
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
configure @options
|
79
|
-
raise "Database backend not configured" unless connection_options
|
80
|
-
database.establish_connection connection_options
|
81
|
-
end
|
41
|
+
def to_s
|
42
|
+
parameters = [select_statement.inspect]
|
43
|
+
parameters << options unless options.empty?
|
44
|
+
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
45
|
+
end
|
82
46
|
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
47
|
+
# Creates a database adapter for use with this source.
|
48
|
+
def create_database_adapter # :nodoc:
|
49
|
+
# TODO Do not use ActiveRecord directly. Use set_table_name etc.
|
50
|
+
#
|
51
|
+
adapter_class = Class.new ActiveRecord::Base
|
52
|
+
adapter_class.abstract_class = true
|
53
|
+
adapter_class
|
54
|
+
end
|
90
55
|
|
91
|
-
#
|
56
|
+
# Configure the backend.
|
92
57
|
#
|
93
|
-
|
94
|
-
|
95
|
-
#
|
58
|
+
# Options:
|
59
|
+
# Either
|
60
|
+
# * file => 'some/filename.yml' # With an active record configuration.
|
61
|
+
# Or
|
62
|
+
# * The configuration as a hash.
|
96
63
|
#
|
97
|
-
|
64
|
+
def configure options # :nodoc:
|
65
|
+
@connection_options = if filename = options[:file]
|
66
|
+
File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
|
67
|
+
else
|
68
|
+
options
|
69
|
+
end
|
70
|
+
self
|
71
|
+
end
|
98
72
|
|
99
|
-
#
|
73
|
+
# Connect the backend.
|
100
74
|
#
|
101
|
-
|
75
|
+
# Will raise unless connection options have been given.
|
76
|
+
#
|
77
|
+
def connect_backend
|
78
|
+
configure @options
|
79
|
+
raise "Database backend not configured" unless connection_options
|
80
|
+
database.establish_connection connection_options
|
81
|
+
end
|
102
82
|
|
103
|
-
#
|
83
|
+
# Take a snapshot of the data.
|
104
84
|
#
|
105
|
-
|
106
|
-
|
85
|
+
# Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
|
86
|
+
#
|
87
|
+
def take_snapshot index
|
88
|
+
timed_exclaim %Q{"#{index.identifier}": Taking snapshot of database data.}
|
107
89
|
|
108
|
-
|
109
|
-
|
110
|
-
def count index_name
|
111
|
-
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
|
112
|
-
end
|
90
|
+
origin = snapshot_table_name index.name
|
91
|
+
on_database = database.connection
|
113
92
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
"picky_#{index_name}_index"
|
118
|
-
end
|
93
|
+
# Drop the table if it exists.
|
94
|
+
#
|
95
|
+
on_database.drop_table origin if on_database.table_exists?(origin)
|
119
96
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
97
|
+
# The adapters currently do not support this.
|
98
|
+
#
|
99
|
+
on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
|
100
|
+
|
101
|
+
# Add a column that Picky uses to traverse the table's entries.
|
102
|
+
#
|
103
|
+
on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
|
104
|
+
|
105
|
+
# Execute any special queries this index needs executed.
|
106
|
+
#
|
107
|
+
on_database.execute index.after_indexing if index.after_indexing
|
125
108
|
end
|
126
|
-
end
|
127
109
|
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
110
|
+
# Counts all the entries that are used for the index.
|
111
|
+
#
|
112
|
+
def count index_name
|
113
|
+
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
|
114
|
+
end
|
132
115
|
|
133
|
-
#
|
116
|
+
# The name of the snapshot table created by Picky.
|
134
117
|
#
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
140
|
-
|
118
|
+
def snapshot_table_name index_name
|
119
|
+
"picky_#{index_name}_index"
|
120
|
+
end
|
121
|
+
|
122
|
+
# Harvests the data to index in chunks.
|
123
|
+
#
|
124
|
+
def harvest category, &block
|
125
|
+
(0..count(category.index_name)).step(chunksize) do |offset|
|
126
|
+
get_data category, offset, &block
|
141
127
|
end
|
142
|
-
|
143
|
-
|
144
|
-
|
128
|
+
end
|
129
|
+
|
130
|
+
# Gets the data from the backend.
|
131
|
+
#
|
132
|
+
def get_data category, offset, &block # :nodoc:
|
133
|
+
select_statement = harvest_statement_with_offset category, offset
|
134
|
+
|
135
|
+
# TODO Rewrite ASAP.
|
136
|
+
#
|
137
|
+
if database.connection.adapter_name == "PostgreSQL"
|
138
|
+
id_key = 'id'
|
139
|
+
text_key = category.from.to_s
|
140
|
+
database.connection.execute(select_statement).each do |hash|
|
141
|
+
id, text = hash.values_at id_key, text_key
|
142
|
+
yield id, text if text
|
143
|
+
end
|
144
|
+
else
|
145
|
+
database.connection.execute(select_statement).each do |id, text|
|
146
|
+
yield id, text if text
|
147
|
+
end
|
145
148
|
end
|
146
149
|
end
|
147
|
-
end
|
148
150
|
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
151
|
+
# Builds a harvest statement for getting data to index.
|
152
|
+
#
|
153
|
+
def harvest_statement_with_offset category, offset
|
154
|
+
statement = harvest_statement category
|
153
155
|
|
154
|
-
|
156
|
+
statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
|
155
157
|
|
156
|
-
|
157
|
-
|
158
|
+
"#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
|
159
|
+
end
|
158
160
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
161
|
+
# The harvest statement used to pull data from the snapshot table.
|
162
|
+
#
|
163
|
+
def harvest_statement category
|
164
|
+
"SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
|
165
|
+
end
|
166
|
+
|
167
|
+
# The amount of records that are loaded each chunk.
|
168
|
+
#
|
169
|
+
def chunksize
|
170
|
+
25_000
|
171
|
+
end
|
164
172
|
|
165
|
-
# The amount of records that are loaded each chunk.
|
166
|
-
#
|
167
|
-
def chunksize
|
168
|
-
25_000
|
169
173
|
end
|
170
174
|
|
171
175
|
end
|
@@ -1,57 +1,61 @@
|
|
1
|
-
module
|
2
|
-
|
3
|
-
# Describes a Delicious (http://deli.cio.us) source.
|
4
|
-
#
|
5
|
-
# This source has a fixed set of categories:
|
6
|
-
# * title
|
7
|
-
# * tags
|
8
|
-
# * url
|
9
|
-
#
|
10
|
-
# Examples:
|
11
|
-
# Sources::CSV.new('usrnam', 'paswrd')
|
12
|
-
#
|
13
|
-
class Delicious < Base
|
14
|
-
|
15
|
-
def initialize username, password
|
16
|
-
check_gem
|
17
|
-
@username = username
|
18
|
-
@password = password
|
19
|
-
end
|
20
|
-
def check_gem # :nodoc:
|
21
|
-
require 'www/delicious'
|
22
|
-
rescue LoadError
|
23
|
-
warn_gem_missing 'www-delicious', 'the delicious source'
|
24
|
-
exit 1
|
25
|
-
end
|
1
|
+
module Picky
|
26
2
|
|
27
|
-
|
28
|
-
"#{self.class.name}(#{@username})"
|
29
|
-
end
|
3
|
+
module Sources
|
30
4
|
|
31
|
-
#
|
5
|
+
# Describes a Delicious (http://deli.cio.us) source.
|
32
6
|
#
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
yield indexed_id, text
|
38
|
-
end
|
39
|
-
end
|
40
|
-
|
7
|
+
# This source has a fixed set of categories:
|
8
|
+
# * title
|
9
|
+
# * tags
|
10
|
+
# * url
|
41
11
|
#
|
12
|
+
# Examples:
|
13
|
+
# Sources::CSV.new('usrnam', 'paswrd')
|
42
14
|
#
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
15
|
+
class Delicious < Base
|
16
|
+
|
17
|
+
def initialize username, password
|
18
|
+
check_gem
|
19
|
+
@username = username
|
20
|
+
@password = password
|
21
|
+
end
|
22
|
+
def check_gem # :nodoc:
|
23
|
+
require 'www/delicious'
|
24
|
+
rescue LoadError
|
25
|
+
warn_gem_missing 'www-delicious', 'the delicious source'
|
26
|
+
exit 1
|
54
27
|
end
|
28
|
+
|
29
|
+
def to_s
|
30
|
+
"#{self.class.name}(#{@username})"
|
31
|
+
end
|
32
|
+
|
33
|
+
# Harvests the data to index.
|
34
|
+
#
|
35
|
+
def harvest category
|
36
|
+
get_data do |indexed_id, data|
|
37
|
+
text = data[category.from]
|
38
|
+
next unless text
|
39
|
+
yield indexed_id, text
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
#
|
44
|
+
#
|
45
|
+
def get_data # :nodoc:
|
46
|
+
@generated_id ||= 0
|
47
|
+
@posts ||= WWW::Delicious.new(@username, @password).posts_recent(count: 100)
|
48
|
+
@posts.each do |post|
|
49
|
+
data = {
|
50
|
+
title: post.title,
|
51
|
+
tags: post.tags.join(' '),
|
52
|
+
url: post.url.to_s
|
53
|
+
}
|
54
|
+
@generated_id += 1
|
55
|
+
yield @generated_id, data
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
55
59
|
end
|
56
60
|
|
57
61
|
end
|
data/lib/picky/sources/mongo.rb
CHANGED
@@ -1,75 +1,80 @@
|
|
1
|
-
module
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
|
4
|
-
#
|
5
|
-
# Important!
|
6
|
-
# You have to start your mongodb with --rest in order to use
|
7
|
-
# the rest / http interface
|
8
|
-
#
|
9
|
-
class NoMongoDBGiven < StandardError; end
|
3
|
+
module Sources
|
10
4
|
|
11
|
-
|
12
|
-
#
|
13
|
-
# A Mongo database source.
|
14
|
-
#
|
15
|
-
# Options:
|
16
|
-
# * url, db
|
17
|
-
# Example:
|
18
|
-
# Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
|
19
|
-
# Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
|
20
|
-
#
|
21
|
-
# and all the options of a <tt>RestClient::Resource</tt>.
|
22
|
-
# See http://github.com/archiloque/rest-client.
|
23
|
-
#
|
24
|
-
class Mongo < Base
|
25
|
-
@@id_key = '_id'
|
5
|
+
# Important note: We're not sure if this works already.
|
26
6
|
#
|
7
|
+
# A Mongo database source.
|
27
8
|
#
|
28
|
-
|
29
|
-
|
9
|
+
# Options:
|
10
|
+
# * url, db
|
11
|
+
# Example:
|
12
|
+
# Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
|
13
|
+
# Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
|
14
|
+
#
|
15
|
+
# and all the options of a <tt>RestClient::Resource</tt>.
|
16
|
+
# See http://github.com/archiloque/rest-client.
|
17
|
+
#
|
18
|
+
class Mongo < Base
|
30
19
|
|
31
|
-
|
32
|
-
|
33
|
-
|
20
|
+
# Raised when a Mongo source is instantiated without a valid uri.
|
21
|
+
#
|
22
|
+
# Important!
|
23
|
+
# You have to start your mongodb with --rest in order to use
|
24
|
+
# the rest / http interface
|
25
|
+
#
|
26
|
+
class NoDBGiven < StandardError; end
|
34
27
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
28
|
+
@@id_key = '_id'
|
29
|
+
#
|
30
|
+
#
|
31
|
+
def initialize *category_names, options
|
32
|
+
check_gem
|
39
33
|
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
require 'rest_client'
|
44
|
-
rescue LoadError
|
45
|
-
warn_gem_missing 'rest-client', 'the MongoDB source'
|
46
|
-
exit 1
|
47
|
-
end
|
34
|
+
unless options[:url] && options[:db]
|
35
|
+
raise_no_db_given(category_names)
|
36
|
+
end
|
48
37
|
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
# to return only 15 entries
|
54
|
-
#
|
55
|
-
def harvest category
|
56
|
-
collection = (category.from || category.index_name).to_s
|
57
|
-
resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
|
58
|
-
JSON.parse(resp)['rows'].each do |row|
|
59
|
-
text = row[collection].to_s
|
60
|
-
next unless text
|
61
|
-
index_key = row.delete(@@id_key) # TODO Still works, I removed .values
|
62
|
-
yield index_key, text
|
63
|
-
end
|
64
|
-
end
|
38
|
+
@db = RestClient::Resource.new options.delete(:url), options
|
39
|
+
@database = options.delete(:db)
|
40
|
+
@key_format = options[:key_format] && options[:key_format].to_sym || :to_sym
|
41
|
+
end
|
65
42
|
|
66
|
-
|
67
|
-
|
68
|
-
|
43
|
+
# Tries to require the rest_client gem.
|
44
|
+
#
|
45
|
+
def check_gem # :nodoc:
|
46
|
+
require 'rest_client'
|
47
|
+
rescue LoadError
|
48
|
+
warn_gem_missing 'rest-client', 'the MongoDB source'
|
49
|
+
exit 1
|
50
|
+
end
|
69
51
|
|
70
|
-
|
71
|
-
|
72
|
-
|
52
|
+
# Fetches the data, @limit=0 will return all records
|
53
|
+
#
|
54
|
+
# Limit is set to 0 by default - all collection entries will be send
|
55
|
+
# If want to limit the results, set to to any other number, e.g. limit=15
|
56
|
+
# to return only 15 entries
|
57
|
+
#
|
58
|
+
def harvest category
|
59
|
+
collection = (category.from || category.index_name).to_s
|
60
|
+
resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
|
61
|
+
JSON.parse(resp)['rows'].each do |row|
|
62
|
+
text = row[collection].to_s
|
63
|
+
next unless text
|
64
|
+
index_key = row.delete(@@id_key) # TODO Still works, I removed .values
|
65
|
+
yield index_key, text
|
66
|
+
end
|
67
|
+
end
|
73
68
|
|
69
|
+
def raise_no_db_given category_names # :nodoc:
|
70
|
+
raise NoDBGiven.new(category_names.join(', '))
|
71
|
+
end
|
72
|
+
|
73
|
+
def to_s
|
74
|
+
self.class.name
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
74
78
|
end
|
75
|
-
|
79
|
+
|
80
|
+
end
|
@@ -1,24 +1,28 @@
|
|
1
|
-
module
|
1
|
+
module Picky
|
2
2
|
|
3
|
-
|
4
|
-
#
|
5
|
-
# For example if you want to normalize data.
|
6
|
-
#
|
7
|
-
module Wrappers # :nodoc:all
|
3
|
+
module Sources
|
8
4
|
|
9
|
-
|
5
|
+
# Source wrappers can be used to rewrite data before it goes into the index.
|
6
|
+
#
|
7
|
+
# For example if you want to normalize data.
|
8
|
+
#
|
9
|
+
module Wrappers # :nodoc:all
|
10
10
|
|
11
|
-
|
11
|
+
class Base
|
12
12
|
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
13
|
+
attr_reader :source
|
14
|
+
|
15
|
+
# Wraps an indexing category.
|
16
|
+
#
|
17
|
+
def initialize source
|
18
|
+
@source = source
|
19
|
+
end
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
|
21
|
+
# Default is delegation for all methods
|
22
|
+
#
|
23
|
+
delegate :harvest, :connect_backend, :take_snapshot, :key_format, :to => :source
|
24
|
+
|
25
|
+
end
|
22
26
|
|
23
27
|
end
|
24
28
|
|