picky 2.7.0 → 3.0.0.pre1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/picky/adapters/rack/base.rb +20 -16
- data/lib/picky/adapters/rack/live_parameters.rb +28 -24
- data/lib/picky/adapters/rack/search.rb +67 -0
- data/lib/picky/adapters/rack.rb +27 -23
- data/lib/picky/application.rb +246 -236
- data/lib/picky/backend/base.rb +115 -119
- data/lib/picky/backend/file/basic.rb +102 -98
- data/lib/picky/backend/file/json.rb +27 -23
- data/lib/picky/backend/file/marshal.rb +32 -28
- data/lib/picky/backend/file/text.rb +45 -41
- data/lib/picky/backend/files.rb +19 -15
- data/lib/picky/backend/redis/basic.rb +76 -72
- data/lib/picky/backend/redis/list_hash.rb +40 -36
- data/lib/picky/backend/redis/string_hash.rb +30 -26
- data/lib/picky/backend/redis.rb +32 -28
- data/lib/picky/bundle.rb +82 -57
- data/lib/{bundling.rb → picky/bundling.rb} +0 -0
- data/lib/picky/calculations/location.rb +51 -47
- data/lib/picky/categories.rb +60 -56
- data/lib/picky/categories_indexed.rb +73 -82
- data/lib/picky/categories_indexing.rb +12 -8
- data/lib/picky/category.rb +109 -120
- data/lib/picky/category_indexed.rb +39 -41
- data/lib/picky/category_indexing.rb +123 -125
- data/lib/picky/character_substituters/west_european.rb +32 -26
- data/lib/{constants.rb → picky/constants.rb} +0 -0
- data/lib/picky/cores.rb +96 -92
- data/lib/{deployment.rb → picky/deployment.rb} +0 -0
- data/lib/picky/frontend_adapters/rack.rb +133 -118
- data/lib/picky/generators/aliases.rb +5 -3
- data/lib/picky/generators/base.rb +11 -7
- data/lib/picky/generators/partial/default.rb +7 -3
- data/lib/picky/generators/partial/none.rb +24 -20
- data/lib/picky/generators/partial/strategy.rb +20 -16
- data/lib/picky/generators/partial/substring.rb +94 -90
- data/lib/picky/generators/partial_generator.rb +11 -7
- data/lib/picky/generators/similarity/default.rb +9 -5
- data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
- data/lib/picky/generators/similarity/metaphone.rb +20 -16
- data/lib/picky/generators/similarity/none.rb +23 -19
- data/lib/picky/generators/similarity/phonetic.rb +49 -45
- data/lib/picky/generators/similarity/soundex.rb +20 -16
- data/lib/picky/generators/similarity/strategy.rb +10 -6
- data/lib/picky/generators/similarity_generator.rb +11 -7
- data/lib/picky/generators/strategy.rb +14 -10
- data/lib/picky/generators/weights/default.rb +9 -5
- data/lib/picky/generators/weights/logarithmic.rb +30 -26
- data/lib/picky/generators/weights/strategy.rb +10 -6
- data/lib/picky/generators/weights_generator.rb +11 -7
- data/lib/picky/helpers/measuring.rb +20 -16
- data/lib/picky/indexed/bundle/base.rb +39 -37
- data/lib/picky/indexed/bundle/memory.rb +68 -64
- data/lib/picky/indexed/bundle/redis.rb +73 -69
- data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
- data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
- data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
- data/lib/picky/indexed/wrappers/category/location.rb +17 -13
- data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
- data/lib/picky/indexers/base.rb +26 -22
- data/lib/picky/indexers/parallel.rb +62 -58
- data/lib/picky/indexers/serial.rb +41 -37
- data/lib/picky/indexes/index.rb +400 -0
- data/lib/picky/indexes/index_indexed.rb +24 -0
- data/lib/picky/indexes/index_indexing.rb +138 -0
- data/lib/picky/indexes/memory.rb +20 -0
- data/lib/picky/indexes/redis.rb +20 -0
- data/lib/picky/indexes.rb +68 -61
- data/lib/picky/indexes_indexed.rb +16 -12
- data/lib/picky/indexes_indexing.rb +41 -37
- data/lib/picky/indexing/bundle/base.rb +216 -205
- data/lib/picky/indexing/bundle/memory.rb +16 -11
- data/lib/picky/indexing/bundle/redis.rb +14 -12
- data/lib/picky/indexing/wrappers/category/location.rb +17 -13
- data/lib/picky/interfaces/live_parameters.rb +159 -154
- data/lib/picky/loader.rb +267 -304
- data/lib/picky/loggers/search.rb +20 -13
- data/lib/picky/no_source_specified_exception.rb +7 -3
- data/lib/picky/performant.rb +6 -2
- data/lib/picky/query/allocation.rb +71 -67
- data/lib/picky/query/allocations.rb +99 -94
- data/lib/picky/query/combination.rb +70 -66
- data/lib/picky/query/combinations/base.rb +56 -52
- data/lib/picky/query/combinations/memory.rb +36 -32
- data/lib/picky/query/combinations/redis.rb +66 -62
- data/lib/picky/query/indexes.rb +175 -160
- data/lib/picky/query/qualifier_category_mapper.rb +43 -0
- data/lib/picky/query/token.rb +165 -172
- data/lib/picky/query/tokens.rb +86 -82
- data/lib/picky/query/weights.rb +44 -48
- data/lib/picky/query.rb +5 -1
- data/lib/picky/rack/harakiri.rb +51 -47
- data/lib/picky/results.rb +81 -77
- data/lib/picky/search.rb +169 -158
- data/lib/picky/sinatra.rb +34 -0
- data/lib/picky/sources/base.rb +73 -70
- data/lib/picky/sources/couch.rb +61 -57
- data/lib/picky/sources/csv.rb +68 -64
- data/lib/picky/sources/db.rb +139 -135
- data/lib/picky/sources/delicious.rb +52 -48
- data/lib/picky/sources/mongo.rb +68 -63
- data/lib/picky/sources/wrappers/base.rb +20 -16
- data/lib/picky/sources/wrappers/location.rb +37 -33
- data/lib/picky/statistics.rb +46 -43
- data/lib/picky/tasks.rb +3 -0
- data/lib/picky/tokenizers/base.rb +192 -187
- data/lib/picky/tokenizers/index.rb +25 -21
- data/lib/picky/tokenizers/location.rb +33 -29
- data/lib/picky/tokenizers/query.rb +49 -43
- data/lib/picky.rb +21 -13
- data/lib/tasks/application.rake +1 -1
- data/lib/tasks/index.rake +3 -3
- data/lib/tasks/routes.rake +1 -1
- data/lib/tasks/server.rake +1 -1
- data/spec/lib/adapters/rack/base_spec.rb +1 -1
- data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
- data/spec/lib/adapters/rack/query_spec.rb +1 -1
- data/spec/lib/application_spec.rb +39 -32
- data/spec/lib/backend/file/basic_spec.rb +2 -2
- data/spec/lib/backend/file/json_spec.rb +2 -2
- data/spec/lib/backend/file/marshal_spec.rb +2 -2
- data/spec/lib/backend/file/text_spec.rb +1 -1
- data/spec/lib/backend/files_spec.rb +14 -24
- data/spec/lib/backend/redis/basic_spec.rb +2 -2
- data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
- data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
- data/spec/lib/backend/redis_spec.rb +20 -13
- data/spec/lib/calculations/location_spec.rb +1 -1
- data/spec/lib/categories_indexed_spec.rb +16 -34
- data/spec/lib/category_indexed_spec.rb +9 -27
- data/spec/lib/category_indexing_spec.rb +2 -3
- data/spec/lib/category_spec.rb +10 -10
- data/spec/lib/character_substituters/west_european_spec.rb +6 -5
- data/spec/lib/cores_spec.rb +17 -17
- data/spec/lib/extensions/symbol_spec.rb +15 -1
- data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
- data/spec/lib/generators/aliases_spec.rb +3 -3
- data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
- data/spec/lib/generators/partial/default_spec.rb +3 -3
- data/spec/lib/generators/partial/none_spec.rb +2 -2
- data/spec/lib/generators/partial/substring_spec.rb +1 -1
- data/spec/lib/generators/partial_generator_spec.rb +3 -3
- data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
- data/spec/lib/generators/similarity/none_spec.rb +1 -1
- data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
- data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
- data/spec/lib/generators/similarity_generator_spec.rb +2 -2
- data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
- data/spec/lib/generators/weights_generator_spec.rb +1 -1
- data/spec/lib/helpers/measuring_spec.rb +2 -2
- data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
- data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
- data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
- data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
- data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
- data/spec/lib/indexers/base_spec.rb +1 -1
- data/spec/lib/indexers/parallel_spec.rb +1 -1
- data/spec/lib/indexers/serial_spec.rb +1 -1
- data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
- data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
- data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
- data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
- data/spec/lib/indexes_class_spec.rb +2 -2
- data/spec/lib/indexes_indexed_spec.rb +1 -1
- data/spec/lib/indexes_indexing_spec.rb +1 -1
- data/spec/lib/indexes_spec.rb +1 -1
- data/spec/lib/indexing/bundle/base_spec.rb +7 -5
- data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
- data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
- data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
- data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
- data/spec/lib/loader_spec.rb +17 -19
- data/spec/lib/loggers/search_spec.rb +2 -2
- data/spec/lib/query/allocation_spec.rb +1 -1
- data/spec/lib/query/allocations_spec.rb +1 -1
- data/spec/lib/query/combination_spec.rb +4 -4
- data/spec/lib/query/combinations/base_spec.rb +1 -1
- data/spec/lib/query/combinations/memory_spec.rb +1 -1
- data/spec/lib/query/combinations/redis_spec.rb +1 -1
- data/spec/lib/query/indexes_spec.rb +7 -2
- data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
- data/spec/lib/query/token_spec.rb +32 -53
- data/spec/lib/query/tokens_spec.rb +30 -35
- data/spec/lib/query/weights_spec.rb +16 -16
- data/spec/lib/rack/harakiri_spec.rb +5 -5
- data/spec/lib/results_spec.rb +1 -1
- data/spec/lib/search_spec.rb +24 -22
- data/spec/lib/sinatra_spec.rb +36 -0
- data/spec/lib/sources/base_spec.rb +1 -1
- data/spec/lib/sources/couch_spec.rb +9 -9
- data/spec/lib/sources/csv_spec.rb +7 -7
- data/spec/lib/sources/db_spec.rb +2 -2
- data/spec/lib/sources/delicious_spec.rb +5 -5
- data/spec/lib/sources/mongo_spec.rb +7 -7
- data/spec/lib/sources/wrappers/base_spec.rb +2 -2
- data/spec/lib/sources/wrappers/location_spec.rb +1 -1
- data/spec/lib/statistics_spec.rb +1 -1
- data/spec/lib/tokenizers/base_spec.rb +2 -2
- data/spec/lib/tokenizers/index_spec.rb +1 -1
- data/spec/lib/tokenizers/query_spec.rb +1 -1
- metadata +30 -30
- data/lib/picky/adapters/rack/query.rb +0 -65
- data/lib/picky/index/base.rb +0 -409
- data/lib/picky/index/base_indexed.rb +0 -29
- data/lib/picky/index/base_indexing.rb +0 -127
- data/lib/picky/index/memory.rb +0 -16
- data/lib/picky/index/redis.rb +0 -16
- data/lib/picky/query/qualifiers.rb +0 -76
- data/lib/picky/query/solr.rb +0 -60
- data/lib/picky/signals.rb +0 -8
- data/lib/picky-tasks.rb +0 -6
- data/lib/tasks/spec.rake +0 -11
- data/spec/lib/query/qualifiers_spec.rb +0 -31
data/lib/picky/sources/db.rb
CHANGED
|
@@ -1,171 +1,175 @@
|
|
|
1
|
-
module
|
|
2
|
-
|
|
3
|
-
# Describes a database source. Needs a SELECT statement
|
|
4
|
-
# (with id in it), and a file option or the options from an AR config file.
|
|
5
|
-
#
|
|
6
|
-
# The select statement can be as complicated as you want,
|
|
7
|
-
# as long as it has an id in it and as long as it can be
|
|
8
|
-
# used in a CREATE TABLE AS statement.
|
|
9
|
-
# (working on that last one)
|
|
10
|
-
#
|
|
11
|
-
# Examples:
|
|
12
|
-
# Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
|
|
13
|
-
# Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
|
|
14
|
-
# Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
|
|
15
|
-
# Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
|
|
16
|
-
#
|
|
17
|
-
class DB < Base
|
|
18
|
-
|
|
19
|
-
# The select statement that was passed in.
|
|
20
|
-
#
|
|
21
|
-
attr_reader :select_statement
|
|
1
|
+
module Picky
|
|
22
2
|
|
|
23
|
-
|
|
24
|
-
#
|
|
25
|
-
attr_reader :database
|
|
3
|
+
module Sources
|
|
26
4
|
|
|
27
|
-
#
|
|
5
|
+
# Describes a database source. Needs a SELECT statement
|
|
6
|
+
# (with id in it), and a file option or the options from an AR config file.
|
|
28
7
|
#
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
8
|
+
# The select statement can be as complicated as you want,
|
|
9
|
+
# as long as it has an id in it and as long as it can be
|
|
10
|
+
# used in a CREATE TABLE AS statement.
|
|
11
|
+
# (working on that last one)
|
|
12
|
+
#
|
|
13
|
+
# Examples:
|
|
14
|
+
# Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
|
|
15
|
+
# Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
|
|
16
|
+
# Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
|
|
17
|
+
# Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
|
|
18
|
+
#
|
|
19
|
+
class DB < Base
|
|
38
20
|
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
|
43
|
-
end
|
|
21
|
+
# The select statement that was passed in.
|
|
22
|
+
#
|
|
23
|
+
attr_reader :select_statement
|
|
44
24
|
|
|
45
|
-
|
|
46
|
-
def create_database_adapter # :nodoc:
|
|
47
|
-
# TODO Do not use ActiveRecord directly.
|
|
25
|
+
# The database adapter.
|
|
48
26
|
#
|
|
49
|
-
|
|
27
|
+
attr_reader :database
|
|
28
|
+
|
|
29
|
+
# The database connection options that were either passed in or loaded from the given file.
|
|
50
30
|
#
|
|
51
|
-
|
|
52
|
-
adapter_class.abstract_class = true
|
|
53
|
-
adapter_class
|
|
54
|
-
end
|
|
31
|
+
attr_reader :connection_options, :options
|
|
55
32
|
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
# * The configuration as a hash.
|
|
63
|
-
#
|
|
64
|
-
def configure options # :nodoc:
|
|
65
|
-
@connection_options = if filename = options[:file]
|
|
66
|
-
File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
|
|
67
|
-
else
|
|
68
|
-
options
|
|
33
|
+
@@traversal_id = :__picky_id
|
|
34
|
+
|
|
35
|
+
def initialize select_statement, options = { file: 'app/db.yml' }
|
|
36
|
+
@select_statement = select_statement
|
|
37
|
+
@database = create_database_adapter
|
|
38
|
+
@options = options
|
|
69
39
|
end
|
|
70
|
-
self
|
|
71
|
-
end
|
|
72
40
|
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
configure @options
|
|
79
|
-
raise "Database backend not configured" unless connection_options
|
|
80
|
-
database.establish_connection connection_options
|
|
81
|
-
end
|
|
41
|
+
def to_s
|
|
42
|
+
parameters = [select_statement.inspect]
|
|
43
|
+
parameters << options unless options.empty?
|
|
44
|
+
%Q{#{self.class.name}(#{parameters.join(', ')})}
|
|
45
|
+
end
|
|
82
46
|
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
47
|
+
# Creates a database adapter for use with this source.
|
|
48
|
+
def create_database_adapter # :nodoc:
|
|
49
|
+
# TODO Do not use ActiveRecord directly. Use set_table_name etc.
|
|
50
|
+
#
|
|
51
|
+
adapter_class = Class.new ActiveRecord::Base
|
|
52
|
+
adapter_class.abstract_class = true
|
|
53
|
+
adapter_class
|
|
54
|
+
end
|
|
90
55
|
|
|
91
|
-
#
|
|
56
|
+
# Configure the backend.
|
|
92
57
|
#
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
#
|
|
58
|
+
# Options:
|
|
59
|
+
# Either
|
|
60
|
+
# * file => 'some/filename.yml' # With an active record configuration.
|
|
61
|
+
# Or
|
|
62
|
+
# * The configuration as a hash.
|
|
96
63
|
#
|
|
97
|
-
|
|
64
|
+
def configure options # :nodoc:
|
|
65
|
+
@connection_options = if filename = options[:file]
|
|
66
|
+
File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
|
|
67
|
+
else
|
|
68
|
+
options
|
|
69
|
+
end
|
|
70
|
+
self
|
|
71
|
+
end
|
|
98
72
|
|
|
99
|
-
#
|
|
73
|
+
# Connect the backend.
|
|
100
74
|
#
|
|
101
|
-
|
|
75
|
+
# Will raise unless connection options have been given.
|
|
76
|
+
#
|
|
77
|
+
def connect_backend
|
|
78
|
+
configure @options
|
|
79
|
+
raise "Database backend not configured" unless connection_options
|
|
80
|
+
database.establish_connection connection_options
|
|
81
|
+
end
|
|
102
82
|
|
|
103
|
-
#
|
|
83
|
+
# Take a snapshot of the data.
|
|
104
84
|
#
|
|
105
|
-
|
|
106
|
-
|
|
85
|
+
# Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
|
|
86
|
+
#
|
|
87
|
+
def take_snapshot index
|
|
88
|
+
timed_exclaim %Q{"#{index.identifier}": Taking snapshot of database data.}
|
|
107
89
|
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
def count index_name
|
|
111
|
-
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
|
|
112
|
-
end
|
|
90
|
+
origin = snapshot_table_name index.name
|
|
91
|
+
on_database = database.connection
|
|
113
92
|
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
"picky_#{index_name}_index"
|
|
118
|
-
end
|
|
93
|
+
# Drop the table if it exists.
|
|
94
|
+
#
|
|
95
|
+
on_database.drop_table origin if on_database.table_exists?(origin)
|
|
119
96
|
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
97
|
+
# The adapters currently do not support this.
|
|
98
|
+
#
|
|
99
|
+
on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
|
|
100
|
+
|
|
101
|
+
# Add a column that Picky uses to traverse the table's entries.
|
|
102
|
+
#
|
|
103
|
+
on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
|
|
104
|
+
|
|
105
|
+
# Execute any special queries this index needs executed.
|
|
106
|
+
#
|
|
107
|
+
on_database.execute index.after_indexing if index.after_indexing
|
|
125
108
|
end
|
|
126
|
-
end
|
|
127
109
|
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
|
|
110
|
+
# Counts all the entries that are used for the index.
|
|
111
|
+
#
|
|
112
|
+
def count index_name
|
|
113
|
+
database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
|
|
114
|
+
end
|
|
132
115
|
|
|
133
|
-
#
|
|
116
|
+
# The name of the snapshot table created by Picky.
|
|
134
117
|
#
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
118
|
+
def snapshot_table_name index_name
|
|
119
|
+
"picky_#{index_name}_index"
|
|
120
|
+
end
|
|
121
|
+
|
|
122
|
+
# Harvests the data to index in chunks.
|
|
123
|
+
#
|
|
124
|
+
def harvest category, &block
|
|
125
|
+
(0..count(category.index_name)).step(chunksize) do |offset|
|
|
126
|
+
get_data category, offset, &block
|
|
141
127
|
end
|
|
142
|
-
|
|
143
|
-
|
|
144
|
-
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
# Gets the data from the backend.
|
|
131
|
+
#
|
|
132
|
+
def get_data category, offset, &block # :nodoc:
|
|
133
|
+
select_statement = harvest_statement_with_offset category, offset
|
|
134
|
+
|
|
135
|
+
# TODO Rewrite ASAP.
|
|
136
|
+
#
|
|
137
|
+
if database.connection.adapter_name == "PostgreSQL"
|
|
138
|
+
id_key = 'id'
|
|
139
|
+
text_key = category.from.to_s
|
|
140
|
+
database.connection.execute(select_statement).each do |hash|
|
|
141
|
+
id, text = hash.values_at id_key, text_key
|
|
142
|
+
yield id, text if text
|
|
143
|
+
end
|
|
144
|
+
else
|
|
145
|
+
database.connection.execute(select_statement).each do |id, text|
|
|
146
|
+
yield id, text if text
|
|
147
|
+
end
|
|
145
148
|
end
|
|
146
149
|
end
|
|
147
|
-
end
|
|
148
150
|
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
151
|
+
# Builds a harvest statement for getting data to index.
|
|
152
|
+
#
|
|
153
|
+
def harvest_statement_with_offset category, offset
|
|
154
|
+
statement = harvest_statement category
|
|
153
155
|
|
|
154
|
-
|
|
156
|
+
statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
|
|
155
157
|
|
|
156
|
-
|
|
157
|
-
|
|
158
|
+
"#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
|
|
159
|
+
end
|
|
158
160
|
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
161
|
+
# The harvest statement used to pull data from the snapshot table.
|
|
162
|
+
#
|
|
163
|
+
def harvest_statement category
|
|
164
|
+
"SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
# The amount of records that are loaded each chunk.
|
|
168
|
+
#
|
|
169
|
+
def chunksize
|
|
170
|
+
25_000
|
|
171
|
+
end
|
|
164
172
|
|
|
165
|
-
# The amount of records that are loaded each chunk.
|
|
166
|
-
#
|
|
167
|
-
def chunksize
|
|
168
|
-
25_000
|
|
169
173
|
end
|
|
170
174
|
|
|
171
175
|
end
|
|
@@ -1,57 +1,61 @@
|
|
|
1
|
-
module
|
|
2
|
-
|
|
3
|
-
# Describes a Delicious (http://deli.cio.us) source.
|
|
4
|
-
#
|
|
5
|
-
# This source has a fixed set of categories:
|
|
6
|
-
# * title
|
|
7
|
-
# * tags
|
|
8
|
-
# * url
|
|
9
|
-
#
|
|
10
|
-
# Examples:
|
|
11
|
-
# Sources::CSV.new('usrnam', 'paswrd')
|
|
12
|
-
#
|
|
13
|
-
class Delicious < Base
|
|
14
|
-
|
|
15
|
-
def initialize username, password
|
|
16
|
-
check_gem
|
|
17
|
-
@username = username
|
|
18
|
-
@password = password
|
|
19
|
-
end
|
|
20
|
-
def check_gem # :nodoc:
|
|
21
|
-
require 'www/delicious'
|
|
22
|
-
rescue LoadError
|
|
23
|
-
warn_gem_missing 'www-delicious', 'the delicious source'
|
|
24
|
-
exit 1
|
|
25
|
-
end
|
|
1
|
+
module Picky
|
|
26
2
|
|
|
27
|
-
|
|
28
|
-
"#{self.class.name}(#{@username})"
|
|
29
|
-
end
|
|
3
|
+
module Sources
|
|
30
4
|
|
|
31
|
-
#
|
|
5
|
+
# Describes a Delicious (http://deli.cio.us) source.
|
|
32
6
|
#
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
yield indexed_id, text
|
|
38
|
-
end
|
|
39
|
-
end
|
|
40
|
-
|
|
7
|
+
# This source has a fixed set of categories:
|
|
8
|
+
# * title
|
|
9
|
+
# * tags
|
|
10
|
+
# * url
|
|
41
11
|
#
|
|
12
|
+
# Examples:
|
|
13
|
+
# Sources::CSV.new('usrnam', 'paswrd')
|
|
42
14
|
#
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
15
|
+
class Delicious < Base
|
|
16
|
+
|
|
17
|
+
def initialize username, password
|
|
18
|
+
check_gem
|
|
19
|
+
@username = username
|
|
20
|
+
@password = password
|
|
21
|
+
end
|
|
22
|
+
def check_gem # :nodoc:
|
|
23
|
+
require 'www/delicious'
|
|
24
|
+
rescue LoadError
|
|
25
|
+
warn_gem_missing 'www-delicious', 'the delicious source'
|
|
26
|
+
exit 1
|
|
54
27
|
end
|
|
28
|
+
|
|
29
|
+
def to_s
|
|
30
|
+
"#{self.class.name}(#{@username})"
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
# Harvests the data to index.
|
|
34
|
+
#
|
|
35
|
+
def harvest category
|
|
36
|
+
get_data do |indexed_id, data|
|
|
37
|
+
text = data[category.from]
|
|
38
|
+
next unless text
|
|
39
|
+
yield indexed_id, text
|
|
40
|
+
end
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
#
|
|
44
|
+
#
|
|
45
|
+
def get_data # :nodoc:
|
|
46
|
+
@generated_id ||= 0
|
|
47
|
+
@posts ||= WWW::Delicious.new(@username, @password).posts_recent(count: 100)
|
|
48
|
+
@posts.each do |post|
|
|
49
|
+
data = {
|
|
50
|
+
title: post.title,
|
|
51
|
+
tags: post.tags.join(' '),
|
|
52
|
+
url: post.url.to_s
|
|
53
|
+
}
|
|
54
|
+
@generated_id += 1
|
|
55
|
+
yield @generated_id, data
|
|
56
|
+
end
|
|
57
|
+
end
|
|
58
|
+
|
|
55
59
|
end
|
|
56
60
|
|
|
57
61
|
end
|
data/lib/picky/sources/mongo.rb
CHANGED
|
@@ -1,75 +1,80 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
# Important!
|
|
6
|
-
# You have to start your mongodb with --rest in order to use
|
|
7
|
-
# the rest / http interface
|
|
8
|
-
#
|
|
9
|
-
class NoMongoDBGiven < StandardError; end
|
|
3
|
+
module Sources
|
|
10
4
|
|
|
11
|
-
|
|
12
|
-
#
|
|
13
|
-
# A Mongo database source.
|
|
14
|
-
#
|
|
15
|
-
# Options:
|
|
16
|
-
# * url, db
|
|
17
|
-
# Example:
|
|
18
|
-
# Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
|
|
19
|
-
# Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
|
|
20
|
-
#
|
|
21
|
-
# and all the options of a <tt>RestClient::Resource</tt>.
|
|
22
|
-
# See http://github.com/archiloque/rest-client.
|
|
23
|
-
#
|
|
24
|
-
class Mongo < Base
|
|
25
|
-
@@id_key = '_id'
|
|
5
|
+
# Important note: We're not sure if this works already.
|
|
26
6
|
#
|
|
7
|
+
# A Mongo database source.
|
|
27
8
|
#
|
|
28
|
-
|
|
29
|
-
|
|
9
|
+
# Options:
|
|
10
|
+
# * url, db
|
|
11
|
+
# Example:
|
|
12
|
+
# Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
|
|
13
|
+
# Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
|
|
14
|
+
#
|
|
15
|
+
# and all the options of a <tt>RestClient::Resource</tt>.
|
|
16
|
+
# See http://github.com/archiloque/rest-client.
|
|
17
|
+
#
|
|
18
|
+
class Mongo < Base
|
|
30
19
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
20
|
+
# Raised when a Mongo source is instantiated without a valid uri.
|
|
21
|
+
#
|
|
22
|
+
# Important!
|
|
23
|
+
# You have to start your mongodb with --rest in order to use
|
|
24
|
+
# the rest / http interface
|
|
25
|
+
#
|
|
26
|
+
class NoDBGiven < StandardError; end
|
|
34
27
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
28
|
+
@@id_key = '_id'
|
|
29
|
+
#
|
|
30
|
+
#
|
|
31
|
+
def initialize *category_names, options
|
|
32
|
+
check_gem
|
|
39
33
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
require 'rest_client'
|
|
44
|
-
rescue LoadError
|
|
45
|
-
warn_gem_missing 'rest-client', 'the MongoDB source'
|
|
46
|
-
exit 1
|
|
47
|
-
end
|
|
34
|
+
unless options[:url] && options[:db]
|
|
35
|
+
raise_no_db_given(category_names)
|
|
36
|
+
end
|
|
48
37
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
# to return only 15 entries
|
|
54
|
-
#
|
|
55
|
-
def harvest category
|
|
56
|
-
collection = (category.from || category.index_name).to_s
|
|
57
|
-
resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
|
|
58
|
-
JSON.parse(resp)['rows'].each do |row|
|
|
59
|
-
text = row[collection].to_s
|
|
60
|
-
next unless text
|
|
61
|
-
index_key = row.delete(@@id_key) # TODO Still works, I removed .values
|
|
62
|
-
yield index_key, text
|
|
63
|
-
end
|
|
64
|
-
end
|
|
38
|
+
@db = RestClient::Resource.new options.delete(:url), options
|
|
39
|
+
@database = options.delete(:db)
|
|
40
|
+
@key_format = options[:key_format] && options[:key_format].to_sym || :to_sym
|
|
41
|
+
end
|
|
65
42
|
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
|
|
43
|
+
# Tries to require the rest_client gem.
|
|
44
|
+
#
|
|
45
|
+
def check_gem # :nodoc:
|
|
46
|
+
require 'rest_client'
|
|
47
|
+
rescue LoadError
|
|
48
|
+
warn_gem_missing 'rest-client', 'the MongoDB source'
|
|
49
|
+
exit 1
|
|
50
|
+
end
|
|
69
51
|
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
52
|
+
# Fetches the data, @limit=0 will return all records
|
|
53
|
+
#
|
|
54
|
+
# Limit is set to 0 by default - all collection entries will be send
|
|
55
|
+
# If want to limit the results, set to to any other number, e.g. limit=15
|
|
56
|
+
# to return only 15 entries
|
|
57
|
+
#
|
|
58
|
+
def harvest category
|
|
59
|
+
collection = (category.from || category.index_name).to_s
|
|
60
|
+
resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
|
|
61
|
+
JSON.parse(resp)['rows'].each do |row|
|
|
62
|
+
text = row[collection].to_s
|
|
63
|
+
next unless text
|
|
64
|
+
index_key = row.delete(@@id_key) # TODO Still works, I removed .values
|
|
65
|
+
yield index_key, text
|
|
66
|
+
end
|
|
67
|
+
end
|
|
73
68
|
|
|
69
|
+
def raise_no_db_given category_names # :nodoc:
|
|
70
|
+
raise NoDBGiven.new(category_names.join(', '))
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
def to_s
|
|
74
|
+
self.class.name
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
end
|
|
74
78
|
end
|
|
75
|
-
|
|
79
|
+
|
|
80
|
+
end
|
|
@@ -1,24 +1,28 @@
|
|
|
1
|
-
module
|
|
1
|
+
module Picky
|
|
2
2
|
|
|
3
|
-
|
|
4
|
-
#
|
|
5
|
-
# For example if you want to normalize data.
|
|
6
|
-
#
|
|
7
|
-
module Wrappers # :nodoc:all
|
|
3
|
+
module Sources
|
|
8
4
|
|
|
9
|
-
|
|
5
|
+
# Source wrappers can be used to rewrite data before it goes into the index.
|
|
6
|
+
#
|
|
7
|
+
# For example if you want to normalize data.
|
|
8
|
+
#
|
|
9
|
+
module Wrappers # :nodoc:all
|
|
10
10
|
|
|
11
|
-
|
|
11
|
+
class Base
|
|
12
12
|
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
13
|
+
attr_reader :source
|
|
14
|
+
|
|
15
|
+
# Wraps an indexing category.
|
|
16
|
+
#
|
|
17
|
+
def initialize source
|
|
18
|
+
@source = source
|
|
19
|
+
end
|
|
18
20
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
21
|
+
# Default is delegation for all methods
|
|
22
|
+
#
|
|
23
|
+
delegate :harvest, :connect_backend, :take_snapshot, :key_format, :to => :source
|
|
24
|
+
|
|
25
|
+
end
|
|
22
26
|
|
|
23
27
|
end
|
|
24
28
|
|