picky 2.7.0 → 3.0.0.pre1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (213) hide show
  1. data/lib/picky/adapters/rack/base.rb +20 -16
  2. data/lib/picky/adapters/rack/live_parameters.rb +28 -24
  3. data/lib/picky/adapters/rack/search.rb +67 -0
  4. data/lib/picky/adapters/rack.rb +27 -23
  5. data/lib/picky/application.rb +246 -236
  6. data/lib/picky/backend/base.rb +115 -119
  7. data/lib/picky/backend/file/basic.rb +102 -98
  8. data/lib/picky/backend/file/json.rb +27 -23
  9. data/lib/picky/backend/file/marshal.rb +32 -28
  10. data/lib/picky/backend/file/text.rb +45 -41
  11. data/lib/picky/backend/files.rb +19 -15
  12. data/lib/picky/backend/redis/basic.rb +76 -72
  13. data/lib/picky/backend/redis/list_hash.rb +40 -36
  14. data/lib/picky/backend/redis/string_hash.rb +30 -26
  15. data/lib/picky/backend/redis.rb +32 -28
  16. data/lib/picky/bundle.rb +82 -57
  17. data/lib/{bundling.rb → picky/bundling.rb} +0 -0
  18. data/lib/picky/calculations/location.rb +51 -47
  19. data/lib/picky/categories.rb +60 -56
  20. data/lib/picky/categories_indexed.rb +73 -82
  21. data/lib/picky/categories_indexing.rb +12 -8
  22. data/lib/picky/category.rb +109 -120
  23. data/lib/picky/category_indexed.rb +39 -41
  24. data/lib/picky/category_indexing.rb +123 -125
  25. data/lib/picky/character_substituters/west_european.rb +32 -26
  26. data/lib/{constants.rb → picky/constants.rb} +0 -0
  27. data/lib/picky/cores.rb +96 -92
  28. data/lib/{deployment.rb → picky/deployment.rb} +0 -0
  29. data/lib/picky/frontend_adapters/rack.rb +133 -118
  30. data/lib/picky/generators/aliases.rb +5 -3
  31. data/lib/picky/generators/base.rb +11 -7
  32. data/lib/picky/generators/partial/default.rb +7 -3
  33. data/lib/picky/generators/partial/none.rb +24 -20
  34. data/lib/picky/generators/partial/strategy.rb +20 -16
  35. data/lib/picky/generators/partial/substring.rb +94 -90
  36. data/lib/picky/generators/partial_generator.rb +11 -7
  37. data/lib/picky/generators/similarity/default.rb +9 -5
  38. data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
  39. data/lib/picky/generators/similarity/metaphone.rb +20 -16
  40. data/lib/picky/generators/similarity/none.rb +23 -19
  41. data/lib/picky/generators/similarity/phonetic.rb +49 -45
  42. data/lib/picky/generators/similarity/soundex.rb +20 -16
  43. data/lib/picky/generators/similarity/strategy.rb +10 -6
  44. data/lib/picky/generators/similarity_generator.rb +11 -7
  45. data/lib/picky/generators/strategy.rb +14 -10
  46. data/lib/picky/generators/weights/default.rb +9 -5
  47. data/lib/picky/generators/weights/logarithmic.rb +30 -26
  48. data/lib/picky/generators/weights/strategy.rb +10 -6
  49. data/lib/picky/generators/weights_generator.rb +11 -7
  50. data/lib/picky/helpers/measuring.rb +20 -16
  51. data/lib/picky/indexed/bundle/base.rb +39 -37
  52. data/lib/picky/indexed/bundle/memory.rb +68 -64
  53. data/lib/picky/indexed/bundle/redis.rb +73 -69
  54. data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
  55. data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
  56. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
  57. data/lib/picky/indexed/wrappers/category/location.rb +17 -13
  58. data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
  59. data/lib/picky/indexers/base.rb +26 -22
  60. data/lib/picky/indexers/parallel.rb +62 -58
  61. data/lib/picky/indexers/serial.rb +41 -37
  62. data/lib/picky/indexes/index.rb +400 -0
  63. data/lib/picky/indexes/index_indexed.rb +24 -0
  64. data/lib/picky/indexes/index_indexing.rb +138 -0
  65. data/lib/picky/indexes/memory.rb +20 -0
  66. data/lib/picky/indexes/redis.rb +20 -0
  67. data/lib/picky/indexes.rb +68 -61
  68. data/lib/picky/indexes_indexed.rb +16 -12
  69. data/lib/picky/indexes_indexing.rb +41 -37
  70. data/lib/picky/indexing/bundle/base.rb +216 -205
  71. data/lib/picky/indexing/bundle/memory.rb +16 -11
  72. data/lib/picky/indexing/bundle/redis.rb +14 -12
  73. data/lib/picky/indexing/wrappers/category/location.rb +17 -13
  74. data/lib/picky/interfaces/live_parameters.rb +159 -154
  75. data/lib/picky/loader.rb +267 -304
  76. data/lib/picky/loggers/search.rb +20 -13
  77. data/lib/picky/no_source_specified_exception.rb +7 -3
  78. data/lib/picky/performant.rb +6 -2
  79. data/lib/picky/query/allocation.rb +71 -67
  80. data/lib/picky/query/allocations.rb +99 -94
  81. data/lib/picky/query/combination.rb +70 -66
  82. data/lib/picky/query/combinations/base.rb +56 -52
  83. data/lib/picky/query/combinations/memory.rb +36 -32
  84. data/lib/picky/query/combinations/redis.rb +66 -62
  85. data/lib/picky/query/indexes.rb +175 -160
  86. data/lib/picky/query/qualifier_category_mapper.rb +43 -0
  87. data/lib/picky/query/token.rb +165 -172
  88. data/lib/picky/query/tokens.rb +86 -82
  89. data/lib/picky/query/weights.rb +44 -48
  90. data/lib/picky/query.rb +5 -1
  91. data/lib/picky/rack/harakiri.rb +51 -47
  92. data/lib/picky/results.rb +81 -77
  93. data/lib/picky/search.rb +169 -158
  94. data/lib/picky/sinatra.rb +34 -0
  95. data/lib/picky/sources/base.rb +73 -70
  96. data/lib/picky/sources/couch.rb +61 -57
  97. data/lib/picky/sources/csv.rb +68 -64
  98. data/lib/picky/sources/db.rb +139 -135
  99. data/lib/picky/sources/delicious.rb +52 -48
  100. data/lib/picky/sources/mongo.rb +68 -63
  101. data/lib/picky/sources/wrappers/base.rb +20 -16
  102. data/lib/picky/sources/wrappers/location.rb +37 -33
  103. data/lib/picky/statistics.rb +46 -43
  104. data/lib/picky/tasks.rb +3 -0
  105. data/lib/picky/tokenizers/base.rb +192 -187
  106. data/lib/picky/tokenizers/index.rb +25 -21
  107. data/lib/picky/tokenizers/location.rb +33 -29
  108. data/lib/picky/tokenizers/query.rb +49 -43
  109. data/lib/picky.rb +21 -13
  110. data/lib/tasks/application.rake +1 -1
  111. data/lib/tasks/index.rake +3 -3
  112. data/lib/tasks/routes.rake +1 -1
  113. data/lib/tasks/server.rake +1 -1
  114. data/spec/lib/adapters/rack/base_spec.rb +1 -1
  115. data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
  116. data/spec/lib/adapters/rack/query_spec.rb +1 -1
  117. data/spec/lib/application_spec.rb +39 -32
  118. data/spec/lib/backend/file/basic_spec.rb +2 -2
  119. data/spec/lib/backend/file/json_spec.rb +2 -2
  120. data/spec/lib/backend/file/marshal_spec.rb +2 -2
  121. data/spec/lib/backend/file/text_spec.rb +1 -1
  122. data/spec/lib/backend/files_spec.rb +14 -24
  123. data/spec/lib/backend/redis/basic_spec.rb +2 -2
  124. data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
  125. data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
  126. data/spec/lib/backend/redis_spec.rb +20 -13
  127. data/spec/lib/calculations/location_spec.rb +1 -1
  128. data/spec/lib/categories_indexed_spec.rb +16 -34
  129. data/spec/lib/category_indexed_spec.rb +9 -27
  130. data/spec/lib/category_indexing_spec.rb +2 -3
  131. data/spec/lib/category_spec.rb +10 -10
  132. data/spec/lib/character_substituters/west_european_spec.rb +6 -5
  133. data/spec/lib/cores_spec.rb +17 -17
  134. data/spec/lib/extensions/symbol_spec.rb +15 -1
  135. data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
  136. data/spec/lib/generators/aliases_spec.rb +3 -3
  137. data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
  138. data/spec/lib/generators/partial/default_spec.rb +3 -3
  139. data/spec/lib/generators/partial/none_spec.rb +2 -2
  140. data/spec/lib/generators/partial/substring_spec.rb +1 -1
  141. data/spec/lib/generators/partial_generator_spec.rb +3 -3
  142. data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
  143. data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
  144. data/spec/lib/generators/similarity/none_spec.rb +1 -1
  145. data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
  146. data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
  147. data/spec/lib/generators/similarity_generator_spec.rb +2 -2
  148. data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
  149. data/spec/lib/generators/weights_generator_spec.rb +1 -1
  150. data/spec/lib/helpers/measuring_spec.rb +2 -2
  151. data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
  152. data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
  153. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
  154. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
  155. data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
  156. data/spec/lib/indexers/base_spec.rb +1 -1
  157. data/spec/lib/indexers/parallel_spec.rb +1 -1
  158. data/spec/lib/indexers/serial_spec.rb +1 -1
  159. data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
  160. data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
  161. data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
  162. data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
  163. data/spec/lib/indexes_class_spec.rb +2 -2
  164. data/spec/lib/indexes_indexed_spec.rb +1 -1
  165. data/spec/lib/indexes_indexing_spec.rb +1 -1
  166. data/spec/lib/indexes_spec.rb +1 -1
  167. data/spec/lib/indexing/bundle/base_spec.rb +7 -5
  168. data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
  169. data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
  170. data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
  171. data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
  172. data/spec/lib/loader_spec.rb +17 -19
  173. data/spec/lib/loggers/search_spec.rb +2 -2
  174. data/spec/lib/query/allocation_spec.rb +1 -1
  175. data/spec/lib/query/allocations_spec.rb +1 -1
  176. data/spec/lib/query/combination_spec.rb +4 -4
  177. data/spec/lib/query/combinations/base_spec.rb +1 -1
  178. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  179. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  180. data/spec/lib/query/indexes_spec.rb +7 -2
  181. data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
  182. data/spec/lib/query/token_spec.rb +32 -53
  183. data/spec/lib/query/tokens_spec.rb +30 -35
  184. data/spec/lib/query/weights_spec.rb +16 -16
  185. data/spec/lib/rack/harakiri_spec.rb +5 -5
  186. data/spec/lib/results_spec.rb +1 -1
  187. data/spec/lib/search_spec.rb +24 -22
  188. data/spec/lib/sinatra_spec.rb +36 -0
  189. data/spec/lib/sources/base_spec.rb +1 -1
  190. data/spec/lib/sources/couch_spec.rb +9 -9
  191. data/spec/lib/sources/csv_spec.rb +7 -7
  192. data/spec/lib/sources/db_spec.rb +2 -2
  193. data/spec/lib/sources/delicious_spec.rb +5 -5
  194. data/spec/lib/sources/mongo_spec.rb +7 -7
  195. data/spec/lib/sources/wrappers/base_spec.rb +2 -2
  196. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  197. data/spec/lib/statistics_spec.rb +1 -1
  198. data/spec/lib/tokenizers/base_spec.rb +2 -2
  199. data/spec/lib/tokenizers/index_spec.rb +1 -1
  200. data/spec/lib/tokenizers/query_spec.rb +1 -1
  201. metadata +30 -30
  202. data/lib/picky/adapters/rack/query.rb +0 -65
  203. data/lib/picky/index/base.rb +0 -409
  204. data/lib/picky/index/base_indexed.rb +0 -29
  205. data/lib/picky/index/base_indexing.rb +0 -127
  206. data/lib/picky/index/memory.rb +0 -16
  207. data/lib/picky/index/redis.rb +0 -16
  208. data/lib/picky/query/qualifiers.rb +0 -76
  209. data/lib/picky/query/solr.rb +0 -60
  210. data/lib/picky/signals.rb +0 -8
  211. data/lib/picky-tasks.rb +0 -6
  212. data/lib/tasks/spec.rake +0 -11
  213. data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -1,171 +1,175 @@
1
- module Sources
2
-
3
- # Describes a database source. Needs a SELECT statement
4
- # (with id in it), and a file option or the options from an AR config file.
5
- #
6
- # The select statement can be as complicated as you want,
7
- # as long as it has an id in it and as long as it can be
8
- # used in a CREATE TABLE AS statement.
9
- # (working on that last one)
10
- #
11
- # Examples:
12
- # Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
13
- # Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
14
- # Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
15
- # Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
16
- #
17
- class DB < Base
18
-
19
- # The select statement that was passed in.
20
- #
21
- attr_reader :select_statement
1
+ module Picky
22
2
 
23
- # The database adapter.
24
- #
25
- attr_reader :database
3
+ module Sources
26
4
 
27
- # The database connection options that were either passed in or loaded from the given file.
5
+ # Describes a database source. Needs a SELECT statement
6
+ # (with id in it), and a file option or the options from an AR config file.
28
7
  #
29
- attr_reader :connection_options, :options
30
-
31
- @@traversal_id = :__picky_id
32
-
33
- def initialize select_statement, options = { file: 'app/db.yml' }
34
- @select_statement = select_statement
35
- @database = create_database_adapter
36
- @options = options
37
- end
8
+ # The select statement can be as complicated as you want,
9
+ # as long as it has an id in it and as long as it can be
10
+ # used in a CREATE TABLE AS statement.
11
+ # (working on that last one)
12
+ #
13
+ # Examples:
14
+ # Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
15
+ # Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
16
+ # Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
17
+ # Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
18
+ #
19
+ class DB < Base
38
20
 
39
- def to_s
40
- parameters = [select_statement.inspect]
41
- parameters << options unless options.empty?
42
- %Q{#{self.class.name}(#{parameters.join(', ')})}
43
- end
21
+ # The select statement that was passed in.
22
+ #
23
+ attr_reader :select_statement
44
24
 
45
- # Creates a database adapter for use with this source.
46
- def create_database_adapter # :nodoc:
47
- # TODO Do not use ActiveRecord directly.
25
+ # The database adapter.
48
26
  #
49
- # TODO Use set_table_name etc.
27
+ attr_reader :database
28
+
29
+ # The database connection options that were either passed in or loaded from the given file.
50
30
  #
51
- adapter_class = Class.new ActiveRecord::Base
52
- adapter_class.abstract_class = true
53
- adapter_class
54
- end
31
+ attr_reader :connection_options, :options
55
32
 
56
- # Configure the backend.
57
- #
58
- # Options:
59
- # Either
60
- # * file => 'some/filename.yml' # With an active record configuration.
61
- # Or
62
- # * The configuration as a hash.
63
- #
64
- def configure options # :nodoc:
65
- @connection_options = if filename = options[:file]
66
- File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
67
- else
68
- options
33
+ @@traversal_id = :__picky_id
34
+
35
+ def initialize select_statement, options = { file: 'app/db.yml' }
36
+ @select_statement = select_statement
37
+ @database = create_database_adapter
38
+ @options = options
69
39
  end
70
- self
71
- end
72
40
 
73
- # Connect the backend.
74
- #
75
- # Will raise unless connection options have been given.
76
- #
77
- def connect_backend
78
- configure @options
79
- raise "Database backend not configured" unless connection_options
80
- database.establish_connection connection_options
81
- end
41
+ def to_s
42
+ parameters = [select_statement.inspect]
43
+ parameters << options unless options.empty?
44
+ %Q{#{self.class.name}(#{parameters.join(', ')})}
45
+ end
82
46
 
83
- # Take a snapshot of the data.
84
- #
85
- # Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
86
- #
87
- def take_snapshot index
88
- origin = snapshot_table_name index.name
89
- on_database = database.connection
47
+ # Creates a database adapter for use with this source.
48
+ def create_database_adapter # :nodoc:
49
+ # TODO Do not use ActiveRecord directly. Use set_table_name etc.
50
+ #
51
+ adapter_class = Class.new ActiveRecord::Base
52
+ adapter_class.abstract_class = true
53
+ adapter_class
54
+ end
90
55
 
91
- # Drop the table if it exists.
56
+ # Configure the backend.
92
57
  #
93
- on_database.drop_table origin if on_database.table_exists?(origin)
94
-
95
- # The adapters currently do not support this.
58
+ # Options:
59
+ # Either
60
+ # * file => 'some/filename.yml' # With an active record configuration.
61
+ # Or
62
+ # * The configuration as a hash.
96
63
  #
97
- on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
64
+ def configure options # :nodoc:
65
+ @connection_options = if filename = options[:file]
66
+ File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
67
+ else
68
+ options
69
+ end
70
+ self
71
+ end
98
72
 
99
- # Add a column that Picky uses to traverse the table's entries.
73
+ # Connect the backend.
100
74
  #
101
- on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
75
+ # Will raise unless connection options have been given.
76
+ #
77
+ def connect_backend
78
+ configure @options
79
+ raise "Database backend not configured" unless connection_options
80
+ database.establish_connection connection_options
81
+ end
102
82
 
103
- # Execute any special queries this index needs executed.
83
+ # Take a snapshot of the data.
104
84
  #
105
- on_database.execute index.after_indexing if index.after_indexing
106
- end
85
+ # Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
86
+ #
87
+ def take_snapshot index
88
+ timed_exclaim %Q{"#{index.identifier}": Taking snapshot of database data.}
107
89
 
108
- # Counts all the entries that are used for the index.
109
- #
110
- def count index_name
111
- database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
112
- end
90
+ origin = snapshot_table_name index.name
91
+ on_database = database.connection
113
92
 
114
- # The name of the snapshot table created by Picky.
115
- #
116
- def snapshot_table_name index_name
117
- "picky_#{index_name}_index"
118
- end
93
+ # Drop the table if it exists.
94
+ #
95
+ on_database.drop_table origin if on_database.table_exists?(origin)
119
96
 
120
- # Harvests the data to index in chunks.
121
- #
122
- def harvest category, &block
123
- (0..count(category.index_name)).step(chunksize) do |offset|
124
- get_data category, offset, &block
97
+ # The adapters currently do not support this.
98
+ #
99
+ on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
100
+
101
+ # Add a column that Picky uses to traverse the table's entries.
102
+ #
103
+ on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
104
+
105
+ # Execute any special queries this index needs executed.
106
+ #
107
+ on_database.execute index.after_indexing if index.after_indexing
125
108
  end
126
- end
127
109
 
128
- # Gets the data from the backend.
129
- #
130
- def get_data category, offset, &block # :nodoc:
131
- select_statement = harvest_statement_with_offset category, offset
110
+ # Counts all the entries that are used for the index.
111
+ #
112
+ def count index_name
113
+ database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
114
+ end
132
115
 
133
- # TODO Rewrite ASAP.
116
+ # The name of the snapshot table created by Picky.
134
117
  #
135
- if database.connection.adapter_name == "PostgreSQL"
136
- id_key = 'id'
137
- text_key = category.from.to_s
138
- database.connection.execute(select_statement).each do |hash|
139
- id, text = hash.values_at id_key, text_key
140
- yield id, text if text
118
+ def snapshot_table_name index_name
119
+ "picky_#{index_name}_index"
120
+ end
121
+
122
+ # Harvests the data to index in chunks.
123
+ #
124
+ def harvest category, &block
125
+ (0..count(category.index_name)).step(chunksize) do |offset|
126
+ get_data category, offset, &block
141
127
  end
142
- else
143
- database.connection.execute(select_statement).each do |id, text|
144
- yield id, text if text
128
+ end
129
+
130
+ # Gets the data from the backend.
131
+ #
132
+ def get_data category, offset, &block # :nodoc:
133
+ select_statement = harvest_statement_with_offset category, offset
134
+
135
+ # TODO Rewrite ASAP.
136
+ #
137
+ if database.connection.adapter_name == "PostgreSQL"
138
+ id_key = 'id'
139
+ text_key = category.from.to_s
140
+ database.connection.execute(select_statement).each do |hash|
141
+ id, text = hash.values_at id_key, text_key
142
+ yield id, text if text
143
+ end
144
+ else
145
+ database.connection.execute(select_statement).each do |id, text|
146
+ yield id, text if text
147
+ end
145
148
  end
146
149
  end
147
- end
148
150
 
149
- # Builds a harvest statement for getting data to index.
150
- #
151
- def harvest_statement_with_offset category, offset
152
- statement = harvest_statement category
151
+ # Builds a harvest statement for getting data to index.
152
+ #
153
+ def harvest_statement_with_offset category, offset
154
+ statement = harvest_statement category
153
155
 
154
- statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
156
+ statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
155
157
 
156
- "#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
157
- end
158
+ "#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
159
+ end
158
160
 
159
- # The harvest statement used to pull data from the snapshot table.
160
- #
161
- def harvest_statement category
162
- "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
163
- end
161
+ # The harvest statement used to pull data from the snapshot table.
162
+ #
163
+ def harvest_statement category
164
+ "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
165
+ end
166
+
167
+ # The amount of records that are loaded each chunk.
168
+ #
169
+ def chunksize
170
+ 25_000
171
+ end
164
172
 
165
- # The amount of records that are loaded each chunk.
166
- #
167
- def chunksize
168
- 25_000
169
173
  end
170
174
 
171
175
  end
@@ -1,57 +1,61 @@
1
- module Sources
2
-
3
- # Describes a Delicious (http://deli.cio.us) source.
4
- #
5
- # This source has a fixed set of categories:
6
- # * title
7
- # * tags
8
- # * url
9
- #
10
- # Examples:
11
- # Sources::CSV.new('usrnam', 'paswrd')
12
- #
13
- class Delicious < Base
14
-
15
- def initialize username, password
16
- check_gem
17
- @username = username
18
- @password = password
19
- end
20
- def check_gem # :nodoc:
21
- require 'www/delicious'
22
- rescue LoadError
23
- warn_gem_missing 'www-delicious', 'the delicious source'
24
- exit 1
25
- end
1
+ module Picky
26
2
 
27
- def to_s
28
- "#{self.class.name}(#{@username})"
29
- end
3
+ module Sources
30
4
 
31
- # Harvests the data to index.
5
+ # Describes a Delicious (http://deli.cio.us) source.
32
6
  #
33
- def harvest category
34
- get_data do |indexed_id, data|
35
- text = data[category.from]
36
- next unless text
37
- yield indexed_id, text
38
- end
39
- end
40
-
7
+ # This source has a fixed set of categories:
8
+ # * title
9
+ # * tags
10
+ # * url
41
11
  #
12
+ # Examples:
13
+ # Sources::CSV.new('usrnam', 'paswrd')
42
14
  #
43
- def get_data # :nodoc:
44
- @generated_id ||= 0
45
- @posts ||= WWW::Delicious.new(@username, @password).posts_recent(count: 100)
46
- @posts.each do |post|
47
- data = {
48
- title: post.title,
49
- tags: post.tags.join(' '),
50
- url: post.url.to_s
51
- }
52
- @generated_id += 1
53
- yield @generated_id, data
15
+ class Delicious < Base
16
+
17
+ def initialize username, password
18
+ check_gem
19
+ @username = username
20
+ @password = password
21
+ end
22
+ def check_gem # :nodoc:
23
+ require 'www/delicious'
24
+ rescue LoadError
25
+ warn_gem_missing 'www-delicious', 'the delicious source'
26
+ exit 1
54
27
  end
28
+
29
+ def to_s
30
+ "#{self.class.name}(#{@username})"
31
+ end
32
+
33
+ # Harvests the data to index.
34
+ #
35
+ def harvest category
36
+ get_data do |indexed_id, data|
37
+ text = data[category.from]
38
+ next unless text
39
+ yield indexed_id, text
40
+ end
41
+ end
42
+
43
+ #
44
+ #
45
+ def get_data # :nodoc:
46
+ @generated_id ||= 0
47
+ @posts ||= WWW::Delicious.new(@username, @password).posts_recent(count: 100)
48
+ @posts.each do |post|
49
+ data = {
50
+ title: post.title,
51
+ tags: post.tags.join(' '),
52
+ url: post.url.to_s
53
+ }
54
+ @generated_id += 1
55
+ yield @generated_id, data
56
+ end
57
+ end
58
+
55
59
  end
56
60
 
57
61
  end
@@ -1,75 +1,80 @@
1
- module Sources
1
+ module Picky
2
2
 
3
- # Raised when a Mongo source is instantiated without a valid uri.
4
- #
5
- # Important!
6
- # You have to start your mongodb with --rest in order to use
7
- # the rest / http interface
8
- #
9
- class NoMongoDBGiven < StandardError; end
3
+ module Sources
10
4
 
11
- # Important note: We're not sure if this works already.
12
- #
13
- # A Mongo database source.
14
- #
15
- # Options:
16
- # * url, db
17
- # Example:
18
- # Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
19
- # Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
20
- #
21
- # and all the options of a <tt>RestClient::Resource</tt>.
22
- # See http://github.com/archiloque/rest-client.
23
- #
24
- class Mongo < Base
25
- @@id_key = '_id'
5
+ # Important note: We're not sure if this works already.
26
6
  #
7
+ # A Mongo database source.
27
8
  #
28
- def initialize *category_names, options
29
- check_gem
9
+ # Options:
10
+ # * url, db
11
+ # Example:
12
+ # Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
13
+ # Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
14
+ #
15
+ # and all the options of a <tt>RestClient::Resource</tt>.
16
+ # See http://github.com/archiloque/rest-client.
17
+ #
18
+ class Mongo < Base
30
19
 
31
- unless options[:url] && options[:db]
32
- raise_no_db_given(category_names)
33
- end
20
+ # Raised when a Mongo source is instantiated without a valid uri.
21
+ #
22
+ # Important!
23
+ # You have to start your mongodb with --rest in order to use
24
+ # the rest / http interface
25
+ #
26
+ class NoDBGiven < StandardError; end
34
27
 
35
- @db = RestClient::Resource.new options.delete(:url), options
36
- @database = options.delete(:db)
37
- @key_format = options[:key_format] && options[:key_format].to_sym || :to_sym
38
- end
28
+ @@id_key = '_id'
29
+ #
30
+ #
31
+ def initialize *category_names, options
32
+ check_gem
39
33
 
40
- # Tries to require the rest_client gem.
41
- #
42
- def check_gem # :nodoc:
43
- require 'rest_client'
44
- rescue LoadError
45
- warn_gem_missing 'rest-client', 'the MongoDB source'
46
- exit 1
47
- end
34
+ unless options[:url] && options[:db]
35
+ raise_no_db_given(category_names)
36
+ end
48
37
 
49
- # Fetches the data, @limit=0 will return all records
50
- #
51
- # Limit is set to 0 by default - all collection entries will be send
52
- # If want to limit the results, set to to any other number, e.g. limit=15
53
- # to return only 15 entries
54
- #
55
- def harvest category
56
- collection = (category.from || category.index_name).to_s
57
- resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
58
- JSON.parse(resp)['rows'].each do |row|
59
- text = row[collection].to_s
60
- next unless text
61
- index_key = row.delete(@@id_key) # TODO Still works, I removed .values
62
- yield index_key, text
63
- end
64
- end
38
+ @db = RestClient::Resource.new options.delete(:url), options
39
+ @database = options.delete(:db)
40
+ @key_format = options[:key_format] && options[:key_format].to_sym || :to_sym
41
+ end
65
42
 
66
- def raise_no_db_given category_names # :nodoc:
67
- raise NoMongoDBGiven.new(category_names.join(', '))
68
- end
43
+ # Tries to require the rest_client gem.
44
+ #
45
+ def check_gem # :nodoc:
46
+ require 'rest_client'
47
+ rescue LoadError
48
+ warn_gem_missing 'rest-client', 'the MongoDB source'
49
+ exit 1
50
+ end
69
51
 
70
- def to_s
71
- self.class.name
72
- end
52
+ # Fetches the data, @limit=0 will return all records
53
+ #
54
+ # Limit is set to 0 by default - all collection entries will be send
55
+ # If want to limit the results, set to to any other number, e.g. limit=15
56
+ # to return only 15 entries
57
+ #
58
+ def harvest category
59
+ collection = (category.from || category.index_name).to_s
60
+ resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
61
+ JSON.parse(resp)['rows'].each do |row|
62
+ text = row[collection].to_s
63
+ next unless text
64
+ index_key = row.delete(@@id_key) # TODO Still works, I removed .values
65
+ yield index_key, text
66
+ end
67
+ end
73
68
 
69
+ def raise_no_db_given category_names # :nodoc:
70
+ raise NoDBGiven.new(category_names.join(', '))
71
+ end
72
+
73
+ def to_s
74
+ self.class.name
75
+ end
76
+
77
+ end
74
78
  end
75
- end
79
+
80
+ end
@@ -1,24 +1,28 @@
1
- module Sources
1
+ module Picky
2
2
 
3
- # Source wrappers can be used to rewrite data before it goes into the index.
4
- #
5
- # For example if you want to normalize data.
6
- #
7
- module Wrappers # :nodoc:all
3
+ module Sources
8
4
 
9
- class Base
5
+ # Source wrappers can be used to rewrite data before it goes into the index.
6
+ #
7
+ # For example if you want to normalize data.
8
+ #
9
+ module Wrappers # :nodoc:all
10
10
 
11
- attr_reader :source
11
+ class Base
12
12
 
13
- # Wraps an indexing category.
14
- #
15
- def initialize source
16
- @source = source
17
- end
13
+ attr_reader :source
14
+
15
+ # Wraps an indexing category.
16
+ #
17
+ def initialize source
18
+ @source = source
19
+ end
18
20
 
19
- # Default is delegation for all methods
20
- #
21
- delegate :harvest, :connect_backend, :take_snapshot, :key_format, :to => :source
21
+ # Default is delegation for all methods
22
+ #
23
+ delegate :harvest, :connect_backend, :take_snapshot, :key_format, :to => :source
24
+
25
+ end
22
26
 
23
27
  end
24
28