picky 2.7.0 → 3.0.0.pre1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (213) hide show
  1. data/lib/picky/adapters/rack/base.rb +20 -16
  2. data/lib/picky/adapters/rack/live_parameters.rb +28 -24
  3. data/lib/picky/adapters/rack/search.rb +67 -0
  4. data/lib/picky/adapters/rack.rb +27 -23
  5. data/lib/picky/application.rb +246 -236
  6. data/lib/picky/backend/base.rb +115 -119
  7. data/lib/picky/backend/file/basic.rb +102 -98
  8. data/lib/picky/backend/file/json.rb +27 -23
  9. data/lib/picky/backend/file/marshal.rb +32 -28
  10. data/lib/picky/backend/file/text.rb +45 -41
  11. data/lib/picky/backend/files.rb +19 -15
  12. data/lib/picky/backend/redis/basic.rb +76 -72
  13. data/lib/picky/backend/redis/list_hash.rb +40 -36
  14. data/lib/picky/backend/redis/string_hash.rb +30 -26
  15. data/lib/picky/backend/redis.rb +32 -28
  16. data/lib/picky/bundle.rb +82 -57
  17. data/lib/{bundling.rb → picky/bundling.rb} +0 -0
  18. data/lib/picky/calculations/location.rb +51 -47
  19. data/lib/picky/categories.rb +60 -56
  20. data/lib/picky/categories_indexed.rb +73 -82
  21. data/lib/picky/categories_indexing.rb +12 -8
  22. data/lib/picky/category.rb +109 -120
  23. data/lib/picky/category_indexed.rb +39 -41
  24. data/lib/picky/category_indexing.rb +123 -125
  25. data/lib/picky/character_substituters/west_european.rb +32 -26
  26. data/lib/{constants.rb → picky/constants.rb} +0 -0
  27. data/lib/picky/cores.rb +96 -92
  28. data/lib/{deployment.rb → picky/deployment.rb} +0 -0
  29. data/lib/picky/frontend_adapters/rack.rb +133 -118
  30. data/lib/picky/generators/aliases.rb +5 -3
  31. data/lib/picky/generators/base.rb +11 -7
  32. data/lib/picky/generators/partial/default.rb +7 -3
  33. data/lib/picky/generators/partial/none.rb +24 -20
  34. data/lib/picky/generators/partial/strategy.rb +20 -16
  35. data/lib/picky/generators/partial/substring.rb +94 -90
  36. data/lib/picky/generators/partial_generator.rb +11 -7
  37. data/lib/picky/generators/similarity/default.rb +9 -5
  38. data/lib/picky/generators/similarity/double_metaphone.rb +20 -16
  39. data/lib/picky/generators/similarity/metaphone.rb +20 -16
  40. data/lib/picky/generators/similarity/none.rb +23 -19
  41. data/lib/picky/generators/similarity/phonetic.rb +49 -45
  42. data/lib/picky/generators/similarity/soundex.rb +20 -16
  43. data/lib/picky/generators/similarity/strategy.rb +10 -6
  44. data/lib/picky/generators/similarity_generator.rb +11 -7
  45. data/lib/picky/generators/strategy.rb +14 -10
  46. data/lib/picky/generators/weights/default.rb +9 -5
  47. data/lib/picky/generators/weights/logarithmic.rb +30 -26
  48. data/lib/picky/generators/weights/strategy.rb +10 -6
  49. data/lib/picky/generators/weights_generator.rb +11 -7
  50. data/lib/picky/helpers/measuring.rb +20 -16
  51. data/lib/picky/indexed/bundle/base.rb +39 -37
  52. data/lib/picky/indexed/bundle/memory.rb +68 -64
  53. data/lib/picky/indexed/bundle/redis.rb +73 -69
  54. data/lib/picky/indexed/wrappers/bundle/calculation.rb +26 -22
  55. data/lib/picky/indexed/wrappers/bundle/location.rb +30 -26
  56. data/lib/picky/indexed/wrappers/bundle/wrapper.rb +36 -32
  57. data/lib/picky/indexed/wrappers/category/location.rb +17 -13
  58. data/lib/picky/indexed/wrappers/exact_first.rb +46 -42
  59. data/lib/picky/indexers/base.rb +26 -22
  60. data/lib/picky/indexers/parallel.rb +62 -58
  61. data/lib/picky/indexers/serial.rb +41 -37
  62. data/lib/picky/indexes/index.rb +400 -0
  63. data/lib/picky/indexes/index_indexed.rb +24 -0
  64. data/lib/picky/indexes/index_indexing.rb +138 -0
  65. data/lib/picky/indexes/memory.rb +20 -0
  66. data/lib/picky/indexes/redis.rb +20 -0
  67. data/lib/picky/indexes.rb +68 -61
  68. data/lib/picky/indexes_indexed.rb +16 -12
  69. data/lib/picky/indexes_indexing.rb +41 -37
  70. data/lib/picky/indexing/bundle/base.rb +216 -205
  71. data/lib/picky/indexing/bundle/memory.rb +16 -11
  72. data/lib/picky/indexing/bundle/redis.rb +14 -12
  73. data/lib/picky/indexing/wrappers/category/location.rb +17 -13
  74. data/lib/picky/interfaces/live_parameters.rb +159 -154
  75. data/lib/picky/loader.rb +267 -304
  76. data/lib/picky/loggers/search.rb +20 -13
  77. data/lib/picky/no_source_specified_exception.rb +7 -3
  78. data/lib/picky/performant.rb +6 -2
  79. data/lib/picky/query/allocation.rb +71 -67
  80. data/lib/picky/query/allocations.rb +99 -94
  81. data/lib/picky/query/combination.rb +70 -66
  82. data/lib/picky/query/combinations/base.rb +56 -52
  83. data/lib/picky/query/combinations/memory.rb +36 -32
  84. data/lib/picky/query/combinations/redis.rb +66 -62
  85. data/lib/picky/query/indexes.rb +175 -160
  86. data/lib/picky/query/qualifier_category_mapper.rb +43 -0
  87. data/lib/picky/query/token.rb +165 -172
  88. data/lib/picky/query/tokens.rb +86 -82
  89. data/lib/picky/query/weights.rb +44 -48
  90. data/lib/picky/query.rb +5 -1
  91. data/lib/picky/rack/harakiri.rb +51 -47
  92. data/lib/picky/results.rb +81 -77
  93. data/lib/picky/search.rb +169 -158
  94. data/lib/picky/sinatra.rb +34 -0
  95. data/lib/picky/sources/base.rb +73 -70
  96. data/lib/picky/sources/couch.rb +61 -57
  97. data/lib/picky/sources/csv.rb +68 -64
  98. data/lib/picky/sources/db.rb +139 -135
  99. data/lib/picky/sources/delicious.rb +52 -48
  100. data/lib/picky/sources/mongo.rb +68 -63
  101. data/lib/picky/sources/wrappers/base.rb +20 -16
  102. data/lib/picky/sources/wrappers/location.rb +37 -33
  103. data/lib/picky/statistics.rb +46 -43
  104. data/lib/picky/tasks.rb +3 -0
  105. data/lib/picky/tokenizers/base.rb +192 -187
  106. data/lib/picky/tokenizers/index.rb +25 -21
  107. data/lib/picky/tokenizers/location.rb +33 -29
  108. data/lib/picky/tokenizers/query.rb +49 -43
  109. data/lib/picky.rb +21 -13
  110. data/lib/tasks/application.rake +1 -1
  111. data/lib/tasks/index.rake +3 -3
  112. data/lib/tasks/routes.rake +1 -1
  113. data/lib/tasks/server.rake +1 -1
  114. data/spec/lib/adapters/rack/base_spec.rb +1 -1
  115. data/spec/lib/adapters/rack/live_parameters_spec.rb +1 -1
  116. data/spec/lib/adapters/rack/query_spec.rb +1 -1
  117. data/spec/lib/application_spec.rb +39 -32
  118. data/spec/lib/backend/file/basic_spec.rb +2 -2
  119. data/spec/lib/backend/file/json_spec.rb +2 -2
  120. data/spec/lib/backend/file/marshal_spec.rb +2 -2
  121. data/spec/lib/backend/file/text_spec.rb +1 -1
  122. data/spec/lib/backend/files_spec.rb +14 -24
  123. data/spec/lib/backend/redis/basic_spec.rb +2 -2
  124. data/spec/lib/backend/redis/list_hash_spec.rb +3 -3
  125. data/spec/lib/backend/redis/string_hash_spec.rb +3 -3
  126. data/spec/lib/backend/redis_spec.rb +20 -13
  127. data/spec/lib/calculations/location_spec.rb +1 -1
  128. data/spec/lib/categories_indexed_spec.rb +16 -34
  129. data/spec/lib/category_indexed_spec.rb +9 -27
  130. data/spec/lib/category_indexing_spec.rb +2 -3
  131. data/spec/lib/category_spec.rb +10 -10
  132. data/spec/lib/character_substituters/west_european_spec.rb +6 -5
  133. data/spec/lib/cores_spec.rb +17 -17
  134. data/spec/lib/extensions/symbol_spec.rb +15 -1
  135. data/spec/lib/frontend_adapters/rack_spec.rb +20 -20
  136. data/spec/lib/generators/aliases_spec.rb +3 -3
  137. data/spec/lib/generators/cacher_strategy_spec.rb +1 -1
  138. data/spec/lib/generators/partial/default_spec.rb +3 -3
  139. data/spec/lib/generators/partial/none_spec.rb +2 -2
  140. data/spec/lib/generators/partial/substring_spec.rb +1 -1
  141. data/spec/lib/generators/partial_generator_spec.rb +3 -3
  142. data/spec/lib/generators/similarity/double_metaphone_spec.rb +1 -1
  143. data/spec/lib/generators/similarity/metaphone_spec.rb +1 -1
  144. data/spec/lib/generators/similarity/none_spec.rb +1 -1
  145. data/spec/lib/generators/similarity/phonetic_spec.rb +1 -1
  146. data/spec/lib/generators/similarity/soundex_spec.rb +1 -1
  147. data/spec/lib/generators/similarity_generator_spec.rb +2 -2
  148. data/spec/lib/generators/weights/logarithmic_spec.rb +1 -1
  149. data/spec/lib/generators/weights_generator_spec.rb +1 -1
  150. data/spec/lib/helpers/measuring_spec.rb +2 -2
  151. data/spec/lib/indexed/bundle/memory_spec.rb +6 -6
  152. data/spec/lib/indexed/bundle/redis_spec.rb +4 -4
  153. data/spec/lib/indexed/wrappers/bundle/calculation_spec.rb +2 -3
  154. data/spec/lib/indexed/wrappers/bundle/wrapper_spec.rb +2 -2
  155. data/spec/lib/indexed/wrappers/exact_first_spec.rb +5 -5
  156. data/spec/lib/indexers/base_spec.rb +1 -1
  157. data/spec/lib/indexers/parallel_spec.rb +1 -1
  158. data/spec/lib/indexers/serial_spec.rb +1 -1
  159. data/spec/lib/{index/base_indexed_spec.rb → indexes/index_indexed_spec.rb} +3 -3
  160. data/spec/lib/{index/base_indexing_spec.rb → indexes/index_indexing_spec.rb} +19 -2
  161. data/spec/lib/{index/base_spec.rb → indexes/index_spec.rb} +6 -25
  162. data/spec/lib/{index → indexes}/redis_spec.rb +1 -1
  163. data/spec/lib/indexes_class_spec.rb +2 -2
  164. data/spec/lib/indexes_indexed_spec.rb +1 -1
  165. data/spec/lib/indexes_indexing_spec.rb +1 -1
  166. data/spec/lib/indexes_spec.rb +1 -1
  167. data/spec/lib/indexing/bundle/base_spec.rb +7 -5
  168. data/spec/lib/indexing/bundle/memory_partial_generation_speed_spec.rb +4 -4
  169. data/spec/lib/indexing/bundle/memory_spec.rb +15 -15
  170. data/spec/lib/indexing/bundle/redis_spec.rb +9 -9
  171. data/spec/lib/interfaces/live_parameters_spec.rb +5 -5
  172. data/spec/lib/loader_spec.rb +17 -19
  173. data/spec/lib/loggers/search_spec.rb +2 -2
  174. data/spec/lib/query/allocation_spec.rb +1 -1
  175. data/spec/lib/query/allocations_spec.rb +1 -1
  176. data/spec/lib/query/combination_spec.rb +4 -4
  177. data/spec/lib/query/combinations/base_spec.rb +1 -1
  178. data/spec/lib/query/combinations/memory_spec.rb +1 -1
  179. data/spec/lib/query/combinations/redis_spec.rb +1 -1
  180. data/spec/lib/query/indexes_spec.rb +7 -2
  181. data/spec/lib/query/qualifier_category_mapper_spec.rb +34 -0
  182. data/spec/lib/query/token_spec.rb +32 -53
  183. data/spec/lib/query/tokens_spec.rb +30 -35
  184. data/spec/lib/query/weights_spec.rb +16 -16
  185. data/spec/lib/rack/harakiri_spec.rb +5 -5
  186. data/spec/lib/results_spec.rb +1 -1
  187. data/spec/lib/search_spec.rb +24 -22
  188. data/spec/lib/sinatra_spec.rb +36 -0
  189. data/spec/lib/sources/base_spec.rb +1 -1
  190. data/spec/lib/sources/couch_spec.rb +9 -9
  191. data/spec/lib/sources/csv_spec.rb +7 -7
  192. data/spec/lib/sources/db_spec.rb +2 -2
  193. data/spec/lib/sources/delicious_spec.rb +5 -5
  194. data/spec/lib/sources/mongo_spec.rb +7 -7
  195. data/spec/lib/sources/wrappers/base_spec.rb +2 -2
  196. data/spec/lib/sources/wrappers/location_spec.rb +1 -1
  197. data/spec/lib/statistics_spec.rb +1 -1
  198. data/spec/lib/tokenizers/base_spec.rb +2 -2
  199. data/spec/lib/tokenizers/index_spec.rb +1 -1
  200. data/spec/lib/tokenizers/query_spec.rb +1 -1
  201. metadata +30 -30
  202. data/lib/picky/adapters/rack/query.rb +0 -65
  203. data/lib/picky/index/base.rb +0 -409
  204. data/lib/picky/index/base_indexed.rb +0 -29
  205. data/lib/picky/index/base_indexing.rb +0 -127
  206. data/lib/picky/index/memory.rb +0 -16
  207. data/lib/picky/index/redis.rb +0 -16
  208. data/lib/picky/query/qualifiers.rb +0 -76
  209. data/lib/picky/query/solr.rb +0 -60
  210. data/lib/picky/signals.rb +0 -8
  211. data/lib/picky-tasks.rb +0 -6
  212. data/lib/tasks/spec.rake +0 -11
  213. data/spec/lib/query/qualifiers_spec.rb +0 -31
@@ -1,171 +1,175 @@
1
- module Sources
2
-
3
- # Describes a database source. Needs a SELECT statement
4
- # (with id in it), and a file option or the options from an AR config file.
5
- #
6
- # The select statement can be as complicated as you want,
7
- # as long as it has an id in it and as long as it can be
8
- # used in a CREATE TABLE AS statement.
9
- # (working on that last one)
10
- #
11
- # Examples:
12
- # Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
13
- # Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
14
- # Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
15
- # Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
16
- #
17
- class DB < Base
18
-
19
- # The select statement that was passed in.
20
- #
21
- attr_reader :select_statement
1
+ module Picky
22
2
 
23
- # The database adapter.
24
- #
25
- attr_reader :database
3
+ module Sources
26
4
 
27
- # The database connection options that were either passed in or loaded from the given file.
5
+ # Describes a database source. Needs a SELECT statement
6
+ # (with id in it), and a file option or the options from an AR config file.
28
7
  #
29
- attr_reader :connection_options, :options
30
-
31
- @@traversal_id = :__picky_id
32
-
33
- def initialize select_statement, options = { file: 'app/db.yml' }
34
- @select_statement = select_statement
35
- @database = create_database_adapter
36
- @options = options
37
- end
8
+ # The select statement can be as complicated as you want,
9
+ # as long as it has an id in it and as long as it can be
10
+ # used in a CREATE TABLE AS statement.
11
+ # (working on that last one)
12
+ #
13
+ # Examples:
14
+ # Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
15
+ # Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
16
+ # Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
17
+ # Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
18
+ #
19
+ class DB < Base
38
20
 
39
- def to_s
40
- parameters = [select_statement.inspect]
41
- parameters << options unless options.empty?
42
- %Q{#{self.class.name}(#{parameters.join(', ')})}
43
- end
21
+ # The select statement that was passed in.
22
+ #
23
+ attr_reader :select_statement
44
24
 
45
- # Creates a database adapter for use with this source.
46
- def create_database_adapter # :nodoc:
47
- # TODO Do not use ActiveRecord directly.
25
+ # The database adapter.
48
26
  #
49
- # TODO Use set_table_name etc.
27
+ attr_reader :database
28
+
29
+ # The database connection options that were either passed in or loaded from the given file.
50
30
  #
51
- adapter_class = Class.new ActiveRecord::Base
52
- adapter_class.abstract_class = true
53
- adapter_class
54
- end
31
+ attr_reader :connection_options, :options
55
32
 
56
- # Configure the backend.
57
- #
58
- # Options:
59
- # Either
60
- # * file => 'some/filename.yml' # With an active record configuration.
61
- # Or
62
- # * The configuration as a hash.
63
- #
64
- def configure options # :nodoc:
65
- @connection_options = if filename = options[:file]
66
- File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
67
- else
68
- options
33
+ @@traversal_id = :__picky_id
34
+
35
+ def initialize select_statement, options = { file: 'app/db.yml' }
36
+ @select_statement = select_statement
37
+ @database = create_database_adapter
38
+ @options = options
69
39
  end
70
- self
71
- end
72
40
 
73
- # Connect the backend.
74
- #
75
- # Will raise unless connection options have been given.
76
- #
77
- def connect_backend
78
- configure @options
79
- raise "Database backend not configured" unless connection_options
80
- database.establish_connection connection_options
81
- end
41
+ def to_s
42
+ parameters = [select_statement.inspect]
43
+ parameters << options unless options.empty?
44
+ %Q{#{self.class.name}(#{parameters.join(', ')})}
45
+ end
82
46
 
83
- # Take a snapshot of the data.
84
- #
85
- # Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
86
- #
87
- def take_snapshot index
88
- origin = snapshot_table_name index.name
89
- on_database = database.connection
47
+ # Creates a database adapter for use with this source.
48
+ def create_database_adapter # :nodoc:
49
+ # TODO Do not use ActiveRecord directly. Use set_table_name etc.
50
+ #
51
+ adapter_class = Class.new ActiveRecord::Base
52
+ adapter_class.abstract_class = true
53
+ adapter_class
54
+ end
90
55
 
91
- # Drop the table if it exists.
56
+ # Configure the backend.
92
57
  #
93
- on_database.drop_table origin if on_database.table_exists?(origin)
94
-
95
- # The adapters currently do not support this.
58
+ # Options:
59
+ # Either
60
+ # * file => 'some/filename.yml' # With an active record configuration.
61
+ # Or
62
+ # * The configuration as a hash.
96
63
  #
97
- on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
64
+ def configure options # :nodoc:
65
+ @connection_options = if filename = options[:file]
66
+ File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
67
+ else
68
+ options
69
+ end
70
+ self
71
+ end
98
72
 
99
- # Add a column that Picky uses to traverse the table's entries.
73
+ # Connect the backend.
100
74
  #
101
- on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
75
+ # Will raise unless connection options have been given.
76
+ #
77
+ def connect_backend
78
+ configure @options
79
+ raise "Database backend not configured" unless connection_options
80
+ database.establish_connection connection_options
81
+ end
102
82
 
103
- # Execute any special queries this index needs executed.
83
+ # Take a snapshot of the data.
104
84
  #
105
- on_database.execute index.after_indexing if index.after_indexing
106
- end
85
+ # Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
86
+ #
87
+ def take_snapshot index
88
+ timed_exclaim %Q{"#{index.identifier}": Taking snapshot of database data.}
107
89
 
108
- # Counts all the entries that are used for the index.
109
- #
110
- def count index_name
111
- database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
112
- end
90
+ origin = snapshot_table_name index.name
91
+ on_database = database.connection
113
92
 
114
- # The name of the snapshot table created by Picky.
115
- #
116
- def snapshot_table_name index_name
117
- "picky_#{index_name}_index"
118
- end
93
+ # Drop the table if it exists.
94
+ #
95
+ on_database.drop_table origin if on_database.table_exists?(origin)
119
96
 
120
- # Harvests the data to index in chunks.
121
- #
122
- def harvest category, &block
123
- (0..count(category.index_name)).step(chunksize) do |offset|
124
- get_data category, offset, &block
97
+ # The adapters currently do not support this.
98
+ #
99
+ on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
100
+
101
+ # Add a column that Picky uses to traverse the table's entries.
102
+ #
103
+ on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
104
+
105
+ # Execute any special queries this index needs executed.
106
+ #
107
+ on_database.execute index.after_indexing if index.after_indexing
125
108
  end
126
- end
127
109
 
128
- # Gets the data from the backend.
129
- #
130
- def get_data category, offset, &block # :nodoc:
131
- select_statement = harvest_statement_with_offset category, offset
110
+ # Counts all the entries that are used for the index.
111
+ #
112
+ def count index_name
113
+ database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
114
+ end
132
115
 
133
- # TODO Rewrite ASAP.
116
+ # The name of the snapshot table created by Picky.
134
117
  #
135
- if database.connection.adapter_name == "PostgreSQL"
136
- id_key = 'id'
137
- text_key = category.from.to_s
138
- database.connection.execute(select_statement).each do |hash|
139
- id, text = hash.values_at id_key, text_key
140
- yield id, text if text
118
+ def snapshot_table_name index_name
119
+ "picky_#{index_name}_index"
120
+ end
121
+
122
+ # Harvests the data to index in chunks.
123
+ #
124
+ def harvest category, &block
125
+ (0..count(category.index_name)).step(chunksize) do |offset|
126
+ get_data category, offset, &block
141
127
  end
142
- else
143
- database.connection.execute(select_statement).each do |id, text|
144
- yield id, text if text
128
+ end
129
+
130
+ # Gets the data from the backend.
131
+ #
132
+ def get_data category, offset, &block # :nodoc:
133
+ select_statement = harvest_statement_with_offset category, offset
134
+
135
+ # TODO Rewrite ASAP.
136
+ #
137
+ if database.connection.adapter_name == "PostgreSQL"
138
+ id_key = 'id'
139
+ text_key = category.from.to_s
140
+ database.connection.execute(select_statement).each do |hash|
141
+ id, text = hash.values_at id_key, text_key
142
+ yield id, text if text
143
+ end
144
+ else
145
+ database.connection.execute(select_statement).each do |id, text|
146
+ yield id, text if text
147
+ end
145
148
  end
146
149
  end
147
- end
148
150
 
149
- # Builds a harvest statement for getting data to index.
150
- #
151
- def harvest_statement_with_offset category, offset
152
- statement = harvest_statement category
151
+ # Builds a harvest statement for getting data to index.
152
+ #
153
+ def harvest_statement_with_offset category, offset
154
+ statement = harvest_statement category
153
155
 
154
- statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
156
+ statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
155
157
 
156
- "#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
157
- end
158
+ "#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
159
+ end
158
160
 
159
- # The harvest statement used to pull data from the snapshot table.
160
- #
161
- def harvest_statement category
162
- "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
163
- end
161
+ # The harvest statement used to pull data from the snapshot table.
162
+ #
163
+ def harvest_statement category
164
+ "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
165
+ end
166
+
167
+ # The amount of records that are loaded each chunk.
168
+ #
169
+ def chunksize
170
+ 25_000
171
+ end
164
172
 
165
- # The amount of records that are loaded each chunk.
166
- #
167
- def chunksize
168
- 25_000
169
173
  end
170
174
 
171
175
  end
@@ -1,57 +1,61 @@
1
- module Sources
2
-
3
- # Describes a Delicious (http://deli.cio.us) source.
4
- #
5
- # This source has a fixed set of categories:
6
- # * title
7
- # * tags
8
- # * url
9
- #
10
- # Examples:
11
- # Sources::CSV.new('usrnam', 'paswrd')
12
- #
13
- class Delicious < Base
14
-
15
- def initialize username, password
16
- check_gem
17
- @username = username
18
- @password = password
19
- end
20
- def check_gem # :nodoc:
21
- require 'www/delicious'
22
- rescue LoadError
23
- warn_gem_missing 'www-delicious', 'the delicious source'
24
- exit 1
25
- end
1
+ module Picky
26
2
 
27
- def to_s
28
- "#{self.class.name}(#{@username})"
29
- end
3
+ module Sources
30
4
 
31
- # Harvests the data to index.
5
+ # Describes a Delicious (http://deli.cio.us) source.
32
6
  #
33
- def harvest category
34
- get_data do |indexed_id, data|
35
- text = data[category.from]
36
- next unless text
37
- yield indexed_id, text
38
- end
39
- end
40
-
7
+ # This source has a fixed set of categories:
8
+ # * title
9
+ # * tags
10
+ # * url
41
11
  #
12
+ # Examples:
13
+ # Sources::CSV.new('usrnam', 'paswrd')
42
14
  #
43
- def get_data # :nodoc:
44
- @generated_id ||= 0
45
- @posts ||= WWW::Delicious.new(@username, @password).posts_recent(count: 100)
46
- @posts.each do |post|
47
- data = {
48
- title: post.title,
49
- tags: post.tags.join(' '),
50
- url: post.url.to_s
51
- }
52
- @generated_id += 1
53
- yield @generated_id, data
15
+ class Delicious < Base
16
+
17
+ def initialize username, password
18
+ check_gem
19
+ @username = username
20
+ @password = password
21
+ end
22
+ def check_gem # :nodoc:
23
+ require 'www/delicious'
24
+ rescue LoadError
25
+ warn_gem_missing 'www-delicious', 'the delicious source'
26
+ exit 1
54
27
  end
28
+
29
+ def to_s
30
+ "#{self.class.name}(#{@username})"
31
+ end
32
+
33
+ # Harvests the data to index.
34
+ #
35
+ def harvest category
36
+ get_data do |indexed_id, data|
37
+ text = data[category.from]
38
+ next unless text
39
+ yield indexed_id, text
40
+ end
41
+ end
42
+
43
+ #
44
+ #
45
+ def get_data # :nodoc:
46
+ @generated_id ||= 0
47
+ @posts ||= WWW::Delicious.new(@username, @password).posts_recent(count: 100)
48
+ @posts.each do |post|
49
+ data = {
50
+ title: post.title,
51
+ tags: post.tags.join(' '),
52
+ url: post.url.to_s
53
+ }
54
+ @generated_id += 1
55
+ yield @generated_id, data
56
+ end
57
+ end
58
+
55
59
  end
56
60
 
57
61
  end
@@ -1,75 +1,80 @@
1
- module Sources
1
+ module Picky
2
2
 
3
- # Raised when a Mongo source is instantiated without a valid uri.
4
- #
5
- # Important!
6
- # You have to start your mongodb with --rest in order to use
7
- # the rest / http interface
8
- #
9
- class NoMongoDBGiven < StandardError; end
3
+ module Sources
10
4
 
11
- # Important note: We're not sure if this works already.
12
- #
13
- # A Mongo database source.
14
- #
15
- # Options:
16
- # * url, db
17
- # Example:
18
- # Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
19
- # Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
20
- #
21
- # and all the options of a <tt>RestClient::Resource</tt>.
22
- # See http://github.com/archiloque/rest-client.
23
- #
24
- class Mongo < Base
25
- @@id_key = '_id'
5
+ # Important note: We're not sure if this works already.
26
6
  #
7
+ # A Mongo database source.
27
8
  #
28
- def initialize *category_names, options
29
- check_gem
9
+ # Options:
10
+ # * url, db
11
+ # Example:
12
+ # Sources::Mongo.new(:collection1, :collection2, :url => 'localhost:28017', :db => 'testdatabase')
13
+ # Be sure to escape the URL properly, e.g. # => %23 in the databasename if needed
14
+ #
15
+ # and all the options of a <tt>RestClient::Resource</tt>.
16
+ # See http://github.com/archiloque/rest-client.
17
+ #
18
+ class Mongo < Base
30
19
 
31
- unless options[:url] && options[:db]
32
- raise_no_db_given(category_names)
33
- end
20
+ # Raised when a Mongo source is instantiated without a valid uri.
21
+ #
22
+ # Important!
23
+ # You have to start your mongodb with --rest in order to use
24
+ # the rest / http interface
25
+ #
26
+ class NoDBGiven < StandardError; end
34
27
 
35
- @db = RestClient::Resource.new options.delete(:url), options
36
- @database = options.delete(:db)
37
- @key_format = options[:key_format] && options[:key_format].to_sym || :to_sym
38
- end
28
+ @@id_key = '_id'
29
+ #
30
+ #
31
+ def initialize *category_names, options
32
+ check_gem
39
33
 
40
- # Tries to require the rest_client gem.
41
- #
42
- def check_gem # :nodoc:
43
- require 'rest_client'
44
- rescue LoadError
45
- warn_gem_missing 'rest-client', 'the MongoDB source'
46
- exit 1
47
- end
34
+ unless options[:url] && options[:db]
35
+ raise_no_db_given(category_names)
36
+ end
48
37
 
49
- # Fetches the data, @limit=0 will return all records
50
- #
51
- # Limit is set to 0 by default - all collection entries will be send
52
- # If want to limit the results, set to to any other number, e.g. limit=15
53
- # to return only 15 entries
54
- #
55
- def harvest category
56
- collection = (category.from || category.index_name).to_s
57
- resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
58
- JSON.parse(resp)['rows'].each do |row|
59
- text = row[collection].to_s
60
- next unless text
61
- index_key = row.delete(@@id_key) # TODO Still works, I removed .values
62
- yield index_key, text
63
- end
64
- end
38
+ @db = RestClient::Resource.new options.delete(:url), options
39
+ @database = options.delete(:db)
40
+ @key_format = options[:key_format] && options[:key_format].to_sym || :to_sym
41
+ end
65
42
 
66
- def raise_no_db_given category_names # :nodoc:
67
- raise NoMongoDBGiven.new(category_names.join(', '))
68
- end
43
+ # Tries to require the rest_client gem.
44
+ #
45
+ def check_gem # :nodoc:
46
+ require 'rest_client'
47
+ rescue LoadError
48
+ warn_gem_missing 'rest-client', 'the MongoDB source'
49
+ exit 1
50
+ end
69
51
 
70
- def to_s
71
- self.class.name
72
- end
52
+ # Fetches the data, @limit=0 will return all records
53
+ #
54
+ # Limit is set to 0 by default - all collection entries will be send
55
+ # If want to limit the results, set to to any other number, e.g. limit=15
56
+ # to return only 15 entries
57
+ #
58
+ def harvest category
59
+ collection = (category.from || category.index_name).to_s
60
+ resp = @db["/#{@database}/#{category.index_name}/?@limit=0"].get
61
+ JSON.parse(resp)['rows'].each do |row|
62
+ text = row[collection].to_s
63
+ next unless text
64
+ index_key = row.delete(@@id_key) # TODO Still works, I removed .values
65
+ yield index_key, text
66
+ end
67
+ end
73
68
 
69
+ def raise_no_db_given category_names # :nodoc:
70
+ raise NoDBGiven.new(category_names.join(', '))
71
+ end
72
+
73
+ def to_s
74
+ self.class.name
75
+ end
76
+
77
+ end
74
78
  end
75
- end
79
+
80
+ end
@@ -1,24 +1,28 @@
1
- module Sources
1
+ module Picky
2
2
 
3
- # Source wrappers can be used to rewrite data before it goes into the index.
4
- #
5
- # For example if you want to normalize data.
6
- #
7
- module Wrappers # :nodoc:all
3
+ module Sources
8
4
 
9
- class Base
5
+ # Source wrappers can be used to rewrite data before it goes into the index.
6
+ #
7
+ # For example if you want to normalize data.
8
+ #
9
+ module Wrappers # :nodoc:all
10
10
 
11
- attr_reader :source
11
+ class Base
12
12
 
13
- # Wraps an indexing category.
14
- #
15
- def initialize source
16
- @source = source
17
- end
13
+ attr_reader :source
14
+
15
+ # Wraps an indexing category.
16
+ #
17
+ def initialize source
18
+ @source = source
19
+ end
18
20
 
19
- # Default is delegation for all methods
20
- #
21
- delegate :harvest, :connect_backend, :take_snapshot, :key_format, :to => :source
21
+ # Default is delegation for all methods
22
+ #
23
+ delegate :harvest, :connect_backend, :take_snapshot, :key_format, :to => :source
24
+
25
+ end
22
26
 
23
27
  end
24
28