picky 4.0.0pre1 → 4.0.0pre2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (112) hide show
  1. data/aux/picky/cli.rb +6 -2
  2. data/lib/picky.rb +10 -8
  3. data/lib/picky/backends/backend.rb +37 -0
  4. data/lib/picky/backends/file.rb +0 -20
  5. data/lib/picky/backends/memory.rb +0 -29
  6. data/lib/picky/backends/redis.rb +74 -15
  7. data/lib/picky/backends/redis/list.rb +1 -1
  8. data/lib/picky/backends/sqlite.rb +0 -27
  9. data/lib/picky/bundle.rb +2 -2
  10. data/lib/picky/bundle_indexed.rb +1 -1
  11. data/lib/picky/bundle_indexing.rb +1 -1
  12. data/lib/picky/categories_indexed.rb +1 -11
  13. data/lib/picky/category.rb +4 -4
  14. data/lib/picky/category/location.rb +25 -0
  15. data/lib/picky/category_realtime.rb +4 -3
  16. data/lib/picky/console.rb +1 -1
  17. data/lib/picky/constants.rb +1 -1
  18. data/lib/picky/ext/maybe_compile.rb +2 -2
  19. data/lib/picky/extensions/object.rb +3 -2
  20. data/lib/picky/generators/aliases.rb +7 -2
  21. data/lib/picky/generators/partial/default.rb +1 -0
  22. data/lib/picky/generators/similarity/default.rb +1 -0
  23. data/lib/picky/generators/similarity/phonetic.rb +13 -2
  24. data/lib/picky/generators/strategy.rb +0 -2
  25. data/lib/picky/generators/weights/constant.rb +1 -2
  26. data/lib/picky/generators/weights/default.rb +1 -0
  27. data/lib/picky/generators/weights/dynamic.rb +1 -1
  28. data/lib/picky/generators/weights/logarithmic.rb +1 -1
  29. data/lib/picky/generators/weights/{runtime.rb → stub.rb} +1 -3
  30. data/lib/picky/index.rb +3 -3
  31. data/lib/picky/index_indexing.rb +0 -2
  32. data/lib/picky/index_realtime.rb +1 -1
  33. data/lib/picky/indexers/base.rb +7 -0
  34. data/lib/picky/indexers/parallel.rb +2 -4
  35. data/lib/picky/indexers/serial.rb +2 -0
  36. data/lib/picky/indexes_indexing.rb +1 -1
  37. data/lib/picky/interfaces/live_parameters/master_child.rb +175 -0
  38. data/lib/picky/interfaces/live_parameters/unicorn.rb +37 -0
  39. data/lib/picky/loader.rb +238 -259
  40. data/lib/picky/query/allocation.rb +19 -10
  41. data/lib/picky/query/combination.rb +7 -1
  42. data/lib/picky/query/combinations.rb +1 -6
  43. data/lib/picky/query/token.rb +26 -36
  44. data/lib/picky/results.rb +18 -17
  45. data/lib/picky/scheduler.rb +2 -1
  46. data/lib/picky/search.rb +1 -1
  47. data/lib/picky/sinatra.rb +6 -6
  48. data/lib/picky/statistics.rb +2 -0
  49. data/lib/picky/tokenizer.rb +8 -8
  50. data/lib/picky/wrappers/bundle/calculation.rb +4 -4
  51. data/lib/picky/wrappers/bundle/location.rb +1 -2
  52. data/lib/tasks/framework.rake +1 -1
  53. data/lib/tasks/statistics.rake +1 -1
  54. data/lib/tasks/try.rake +1 -1
  55. data/lib/tasks/try.rb +1 -1
  56. data/spec/aux/picky/cli_spec.rb +12 -12
  57. data/spec/ext/performant_spec.rb +16 -16
  58. data/spec/functional/backends/file_spec.rb +78 -7
  59. data/spec/functional/backends/memory_spec.rb +78 -7
  60. data/spec/functional/backends/redis_spec.rb +73 -13
  61. data/spec/functional/dynamic_weights_spec.rb +3 -4
  62. data/spec/functional/realtime_spec.rb +2 -2
  63. data/spec/functional/speed_spec.rb +2 -2
  64. data/spec/functional/terminate_early_spec.rb +3 -3
  65. data/spec/lib/analytics_spec.rb +1 -1
  66. data/spec/lib/analyzer_spec.rb +5 -3
  67. data/spec/lib/categories_indexed_spec.rb +38 -20
  68. data/spec/lib/category/location_spec.rb +30 -0
  69. data/spec/lib/character_substituters/west_european_spec.rb +1 -0
  70. data/spec/lib/extensions/hash_spec.rb +6 -5
  71. data/spec/lib/extensions/module_spec.rb +6 -6
  72. data/spec/lib/extensions/object_spec.rb +9 -8
  73. data/spec/lib/extensions/string_spec.rb +1 -1
  74. data/spec/lib/generators/similarity/phonetic_spec.rb +11 -0
  75. data/spec/lib/index_realtime_spec.rb +5 -5
  76. data/spec/lib/interfaces/{live_parameters_spec.rb → live_parameters/master_child_spec.rb} +26 -26
  77. data/spec/lib/interfaces/live_parameters/unicorn_spec.rb +160 -0
  78. data/spec/lib/loader_spec.rb +65 -25
  79. data/spec/lib/query/allocation_spec.rb +25 -22
  80. data/spec/lib/query/combinations_spec.rb +13 -36
  81. data/spec/lib/query/token_spec.rb +144 -131
  82. data/spec/lib/query/tokens_spec.rb +14 -0
  83. data/spec/lib/results_spec.rb +14 -8
  84. data/spec/lib/search_spec.rb +1 -1
  85. data/spec/lib/sinatra_spec.rb +8 -8
  86. metadata +28 -91
  87. data/lib/picky/adapters/rack.rb +0 -34
  88. data/lib/picky/adapters/rack/base.rb +0 -27
  89. data/lib/picky/adapters/rack/live_parameters.rb +0 -37
  90. data/lib/picky/adapters/rack/search.rb +0 -67
  91. data/lib/picky/application.rb +0 -268
  92. data/lib/picky/frontend_adapters/rack.rb +0 -161
  93. data/lib/picky/interfaces/live_parameters.rb +0 -187
  94. data/lib/picky/sources/base.rb +0 -92
  95. data/lib/picky/sources/couch.rb +0 -76
  96. data/lib/picky/sources/csv.rb +0 -83
  97. data/lib/picky/sources/db.rb +0 -189
  98. data/lib/picky/sources/delicious.rb +0 -63
  99. data/lib/picky/sources/mongo.rb +0 -80
  100. data/lib/picky/wrappers/category/location.rb +0 -38
  101. data/lib/tasks/routes.rake +0 -8
  102. data/spec/lib/adapters/rack/base_spec.rb +0 -24
  103. data/spec/lib/adapters/rack/live_parameters_spec.rb +0 -26
  104. data/spec/lib/adapters/rack/query_spec.rb +0 -39
  105. data/spec/lib/application_spec.rb +0 -155
  106. data/spec/lib/frontend_adapters/rack_spec.rb +0 -294
  107. data/spec/lib/sources/base_spec.rb +0 -53
  108. data/spec/lib/sources/couch_spec.rb +0 -114
  109. data/spec/lib/sources/csv_spec.rb +0 -89
  110. data/spec/lib/sources/db_spec.rb +0 -125
  111. data/spec/lib/sources/delicious_spec.rb +0 -94
  112. data/spec/lib/sources/mongo_spec.rb +0 -50
@@ -1,187 +0,0 @@
1
- module Picky
2
-
3
- # This is very optional.
4
- # Only load if the user wants it.
5
- #
6
- module Interfaces
7
-
8
- # This is an interface that provides the user of
9
- # Picky with the possibility to change parameters
10
- # while the Application is running.
11
- #
12
- # Important Note: This will only work in Master/Child configurations.
13
- #
14
- class LiveParameters
15
-
16
- def initialize
17
- @child, @parent = IO.pipe
18
- start_master_process_thread
19
- end
20
-
21
- # This runs a thread that listens to child processes.
22
- #
23
- def start_master_process_thread
24
- # This thread is stopped in the children.
25
- #
26
- Thread.new do
27
- loop do
28
- IO.select([@child], nil, nil, 2) or next
29
- result = @child.gets ';;;'
30
- pid, configuration_hash = eval result
31
- next unless Hash === configuration_hash
32
- next if configuration_hash.empty?
33
- exclaim "Trying to update MASTER configuration."
34
- try_updating_configuration_with configuration_hash
35
- kill_each_worker_except pid
36
-
37
- # Fails hard on an error.
38
- #
39
- end
40
- end
41
- end
42
-
43
- # TODO This needs to be webserver agnostic.
44
- #
45
- def worker_pids
46
- Unicorn::HttpServer::WORKERS.keys
47
- end
48
-
49
- # Taken from Unicorn.
50
- #
51
- def kill_each_worker_except pid
52
- worker_pids.each do |wpid|
53
- next if wpid == pid
54
- kill_worker :KILL, wpid
55
- end
56
- end
57
- def kill_worker signal, wpid
58
- Process.kill signal, wpid
59
- exclaim "Killing worker ##{wpid} with signal #{signal}."
60
- rescue Errno::ESRCH
61
- remove_worker wpid
62
- end
63
- # TODO This needs to be Webserver agnostic.
64
- #
65
- def remove_worker wpid
66
- worker = Unicorn::HttpServer::WORKERS.delete(wpid) and worker.tmp.close rescue nil
67
- end
68
-
69
- # Updates any parameters with the ones given and
70
- # returns the updated params.
71
- #
72
- # The params are a strictly defined hash of:
73
- # * querying_removes_characters: Regexp
74
- # * querying_stopwords: Regexp
75
- # * querying_splits_text_on: Regexp
76
- #
77
- # This first tries to update in the child process,
78
- # and if successful, in the parent process
79
- #
80
- def parameters configuration_hash
81
- close_child
82
- exclaim "Trying to update worker child configuration." unless configuration_hash.empty?
83
- try_updating_configuration_with configuration_hash
84
- write_parent configuration_hash
85
- extract_configuration
86
- rescue CouldNotUpdateConfigurationError => e
87
- # I need to die such that my broken config is never used.
88
- #
89
- exclaim "Child process #{Process.pid} performs harakiri because of broken config."
90
- harakiri
91
- { e.config_key => :ERROR }
92
- end
93
- # Kills itself, but still answering the request honorably.
94
- #
95
- def harakiri
96
- Process.kill :QUIT, Process.pid
97
- end
98
- # Write the parent.
99
- #
100
- # Note: The ;;; is the end marker for the message.
101
- #
102
- def write_parent configuration_hash
103
- @parent.write "#{[Process.pid, configuration_hash]};;;"
104
- end
105
- # Close the child if it isn't yet closed.
106
- #
107
- def close_child
108
- @child.close unless @child.closed?
109
- end
110
-
111
- class CouldNotUpdateConfigurationError < StandardError
112
- attr_reader :config_key
113
- def initialize config_key, message
114
- super message
115
- @config_key = config_key
116
- end
117
- end
118
-
119
- # Tries updating the configuration in the child process or parent process.
120
- #
121
- def try_updating_configuration_with configuration_hash
122
- current_key = nil
123
- begin
124
- configuration_hash.each_pair do |key, new_value|
125
- exclaim " Setting #{key} with #{new_value}."
126
- current_key = key
127
- send :"#{key}=", new_value
128
- end
129
- rescue StandardError => e
130
- # Catch any error and reraise as config error.
131
- #
132
- raise CouldNotUpdateConfigurationError.new current_key, e.message
133
- end
134
- end
135
-
136
- def extract_configuration
137
- {
138
- querying_removes_characters: querying_removes_characters,
139
- querying_stopwords: querying_stopwords,
140
- querying_splits_text_on: querying_splits_text_on
141
- }
142
- end
143
-
144
- # TODO Move to Interface object.
145
- #
146
- def querying_removes_characters
147
- regexp = Tokenizer.query_default.instance_variable_get :@removes_characters_regexp
148
- regexp && regexp.source
149
- end
150
- def querying_removes_characters= new_value
151
- Tokenizer.query_default.instance_variable_set(:@removes_characters_regexp, %r{#{new_value}})
152
- end
153
- def querying_stopwords
154
- regexp = Tokenizer.query_default.instance_variable_get :@remove_stopwords_regexp
155
- regexp && regexp.source
156
- end
157
- def querying_stopwords= new_value
158
- Tokenizer.query_default.instance_variable_set(:@remove_stopwords_regexp, %r{#{new_value}})
159
- end
160
- def querying_splits_text_on
161
- splits = Tokenizer.query_default.instance_variable_get :@splits_text_on
162
- splits && splits.respond_to?(:source) ? splits.source : splits
163
- end
164
- def querying_splits_text_on= new_value
165
- splits = Tokenizer.query_default.instance_variable_get :@splits_text_on
166
- if splits.respond_to?(:source)
167
- Tokenizer.query_default.instance_variable_set(:@splits_text_on, %r{#{new_value}})
168
- else
169
- Tokenizer.query_default.instance_variable_set(:@splits_text_on, new_value)
170
- end
171
- end
172
-
173
- #
174
- #
175
- def to_s
176
- "Suckerfish Live Interface (Use the picky-live gem to introspect)"
177
- end
178
-
179
- end
180
-
181
- end
182
-
183
- # Aka.
184
- #
185
- LiveParameters = Interfaces::LiveParameters
186
-
187
- end
@@ -1,92 +0,0 @@
1
- module Picky
2
-
3
- # = Data Sources
4
- #
5
- # Currently, Picky offers the following Sources:
6
- # * CSV (comma – or other – separated file)
7
- # * Couch (CouchDB, key-value store)
8
- # * DB (Databases, foremost MySQL)
9
- # * Delicious (http://del.icio.us, online bookmarking service)
10
- # See also:
11
- # http://github.com/floere/picky/wiki/Sources-Configuration
12
- #
13
- # Don't worry if your source isn't here. Adding your own is easy:
14
- # http://github.com/floere/picky/wiki/Contributing-sources
15
- #
16
- module Sources
17
-
18
- # Sources are where your data comes from.
19
- #
20
- # A source has 1 mandatory and 2 optional methods:
21
- # * connect_backend (_optional_): called once for each type/category pair.
22
- # * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
23
- # * take_snapshot (_optional_): called once for each index or category (if indexing a single category).
24
- #
25
- # This base class "implements" all these methods, but they don't do anything.
26
- # Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
27
- #
28
- class Base
29
-
30
- attr_reader :key_format
31
-
32
- # Connect to the backend.
33
- #
34
- # Called once per index/category combination before harvesting.
35
- #
36
- # Examples:
37
- # * The DB backend connects the DB adapter.
38
- # * We open a connection to a key value store.
39
- # * We open an file with data.
40
- #
41
- def connect_backend
42
-
43
- end
44
-
45
- # Called by the indexer when gathering data.
46
- #
47
- # Yields the data (id, text for id) for the given category.
48
- #
49
- # When implementing or overriding your own,
50
- # be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
51
- # for the given type symbol and category symbol.
52
- #
53
- # Note: Since harvest needs to be implemented, it has no default impementation.
54
- #
55
- # def harvest category # :yields: id, text_for_id
56
- #
57
- # end
58
-
59
- # Used to take a snapshot of your data if it is fast changing.
60
- #
61
- # Called once for each index before harvesting.
62
- # If it has been called on a source already by an index,
63
- # it won't be called again for a category inside that index.
64
- #
65
- # Example:
66
- # * In a DB source, a table based on the source's select statement is created.
67
- #
68
- def take_snapshot index
69
-
70
- end
71
-
72
- # Used to check if a snapshot has been done already.
73
- #
74
- # Example:
75
- # * In a DB source, a table based on the source's select statement is created.
76
- #
77
- def with_snapshot index
78
- connect_backend
79
- @snapshot_taken ||= 0
80
- if @snapshot_taken.zero?
81
- take_snapshot index
82
- end
83
- @snapshot_taken += 1
84
- yield
85
- @snapshot_taken -= 1
86
- end
87
-
88
- end
89
-
90
- end
91
-
92
- end
@@ -1,76 +0,0 @@
1
- module Picky
2
-
3
- module Sources
4
-
5
- # A Couch database source.
6
- #
7
- # Options:
8
- # * url
9
- # and all the options of a <tt>RestClient::Resource</tt>.
10
- # See http://github.com/archiloque/rest-client.
11
- #
12
- # Examples:
13
- # Picky::Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
14
- # Picky::Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
15
- #
16
- class Couch < Base
17
-
18
- # Raised when a Couch source is instantiated without a file.
19
- #
20
- # Example:
21
- # Picky::Sources::Couch.new(:column1, :column2) # without file option
22
- #
23
- class NoDBGiven < StandardError; end
24
-
25
- #
26
- #
27
- def initialize *category_names, options
28
- check_gem
29
-
30
- Hash === options && options[:url] || raise_no_db_given(category_names)
31
-
32
- @db = RestClient::Resource.new options.delete(:url), options
33
-
34
- key_format = options.delete :key_format
35
- @key_format = key_format && key_format.intern || :to_s
36
- end
37
-
38
- def to_s
39
- self.class.name
40
- end
41
-
42
- # Tries to require the rest_client gem.
43
- #
44
- def check_gem # :nodoc:
45
- require 'rest_client'
46
- rescue LoadError
47
- warn_gem_missing 'rest-client', 'the CouchDB source'
48
- exit 1
49
- end
50
-
51
- # Harvests the data to index.
52
- #
53
- # See important note, above.
54
- #
55
- @@id_key = '_id'
56
- def harvest category
57
- category_name = category.from.to_s
58
- get_data do |doc|
59
- yield doc[@@id_key], doc[category_name] || next
60
- end
61
- end
62
-
63
- def get_data &block # :nodoc:
64
- resp = @db['_all_docs?include_docs=true'].get
65
- JSON.parse(resp)['rows'].
66
- map{|row| row['doc']}.
67
- each &block
68
- end
69
-
70
- def raise_no_db_given category_names # :nodoc:
71
- raise NoDBGiven.new(category_names.join(', '))
72
- end
73
- end
74
- end
75
-
76
- end
@@ -1,83 +0,0 @@
1
- module Picky
2
-
3
- module Sources
4
-
5
- # Describes a CSV source, a file with comma separated values in it.
6
- #
7
- # The first column is implicitly assumed to be the id column.
8
- #
9
- # It takes the same options as the Ruby 1.9 CSV class.
10
- #
11
- # Examples:
12
- # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
13
- # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
14
- # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
15
- #
16
- class CSV < Base
17
-
18
- # Raised when a CSV source is instantiated without a file.
19
- #
20
- # Example:
21
- # Sources::CSV.new(:column1, :column2) # without file option
22
- #
23
- class NoFileGiven < StandardError; end
24
-
25
- # The CSV file's path, relative to PICKY_ROOT.
26
- #
27
- attr_reader :file_name
28
-
29
- # The options that were passed into #new.
30
- #
31
- attr_reader :csv_options, :key_format
32
-
33
- # The data category names.
34
- #
35
- attr_reader :category_names
36
-
37
- def initialize *category_names, options
38
- require 'csv'
39
- @category_names = category_names
40
-
41
- @csv_options = Hash === options && options || {}
42
- @file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
43
-
44
- key_format = options.delete :key_format
45
- @key_format = key_format && key_format.intern || :to_i
46
- end
47
-
48
- def to_s
49
- parameters = category_names
50
- parameters << { file: file_name }
51
- parameters << csv_options unless csv_options.empty?
52
- %Q{#{self.class.name}(#{parameters.join(', ')})}
53
- end
54
-
55
- # Raises a NoCSVFileGiven exception.
56
- #
57
- def raise_no_file_given category_names # :nodoc:
58
- raise NoFileGiven.new(category_names.join(', '))
59
- end
60
-
61
- # Harvests the data to index.
62
- #
63
- def harvest category
64
- index = category_names.index category.from
65
- get_data do |indexed_id, *ary|
66
- text = ary[index]
67
- next unless text
68
- text.force_encoding 'utf-8' # TODO Still needed?
69
- yield indexed_id, text
70
- end
71
- end
72
-
73
- #
74
- #
75
- def get_data &block # :nodoc:
76
- ::CSV.foreach file_name, csv_options, &block
77
- end
78
-
79
- end
80
-
81
- end
82
-
83
- end
@@ -1,189 +0,0 @@
1
- module Picky
2
-
3
- module Sources
4
-
5
- # Describes a database source. Needs a SELECT statement
6
- # (with id in it), and a file option or the options from an AR config file.
7
- #
8
- # The select statement can be as complicated as you want,
9
- # as long as it has an id in it and as long as it can be
10
- # used in a CREATE TABLE AS statement.
11
- # (working on that last one)
12
- #
13
- # Examples:
14
- # Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
15
- # Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
16
- # Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
17
- # Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
18
- #
19
- class DB < Base
20
-
21
- # The select statement that was passed in.
22
- #
23
- attr_reader :select_statement
24
-
25
- # The database adapter.
26
- #
27
- attr_reader :database
28
-
29
- # The database connection options that were either passed in or loaded from the given file.
30
- #
31
- attr_reader :connection_options, :options
32
-
33
- @@traversal_id = :__picky_id
34
-
35
- def initialize select_statement, options = { file: 'app/db.yml' }
36
- check_gem
37
-
38
- @select_statement = select_statement
39
- @database = create_database_adapter
40
- @options = options
41
- end
42
-
43
- # Tries to require the active_record gem.
44
- #
45
- def check_gem # :nodoc:
46
- require 'active_record'
47
- rescue LoadError
48
- warn_gem_missing 'active_record', 'the (ActiveRecord) DB source'
49
- exit 1
50
- end
51
-
52
- def to_s
53
- parameters = [select_statement.inspect]
54
- parameters << options unless options.empty?
55
- %Q{#{self.class.name}(#{parameters.join(', ')})}
56
- end
57
-
58
- # Creates a database adapter for use with this source.
59
- #
60
- def create_database_adapter # :nodoc:
61
- # THINK Do not use ActiveRecord directly? Use set_table_name etc?
62
- #
63
- adapter_class = Class.new ActiveRecord::Base
64
- adapter_class.abstract_class = true
65
- adapter_class
66
- end
67
-
68
- # Configure the backend.
69
- #
70
- # Options:
71
- # Either
72
- # * file => 'some/filename.yml' # With an active record configuration.
73
- # Or
74
- # * The configuration as a hash.
75
- #
76
- def configure options # :nodoc:
77
- @connection_options = if filename = options[:file]
78
- File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
79
- else
80
- options
81
- end
82
- self
83
- end
84
-
85
- # Connect the backend.
86
- #
87
- # Will raise unless connection options have been given.
88
- #
89
- def connect_backend
90
- configure @options
91
- raise "Database backend not configured" unless connection_options
92
- database.establish_connection connection_options
93
- end
94
-
95
- # Take a snapshot of the data.
96
- #
97
- # Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
98
- #
99
- def take_snapshot index
100
- timed_exclaim %Q{ "#{index.identifier}": Taking snapshot of database data.}
101
-
102
- origin = snapshot_table_name index.name
103
- on_database = database.connection
104
-
105
- # Drop the table if it exists.
106
- #
107
- on_database.drop_table origin if on_database.table_exists?(origin)
108
-
109
- # The adapters currently do not support this.
110
- #
111
- on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
112
-
113
- # Add a column that Picky uses to traverse the table's entries.
114
- #
115
- on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
116
-
117
- # Execute any special queries this index needs executed.
118
- #
119
- on_database.execute index.after_indexing if index.after_indexing
120
- end
121
-
122
- # Counts all the entries that are used for the index.
123
- #
124
- def count index_name
125
- database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
126
- end
127
-
128
- # The name of the snapshot table created by Picky.
129
- #
130
- def snapshot_table_name index_name
131
- "picky_#{index_name}_index"
132
- end
133
-
134
- # Harvests the data to index in chunks.
135
- #
136
- def harvest category, &block
137
- (0..count(category.index_name)).step(chunksize) do |offset|
138
- get_data category, offset, &block
139
- end
140
- end
141
-
142
- # Gets the data from the backend.
143
- #
144
- def get_data category, offset, &block # :nodoc:
145
- select_statement = harvest_statement_with_offset category, offset
146
-
147
- # THINK Not really nice like this. Rewrite if possible.
148
- #
149
- if database.connection.adapter_name == "PostgreSQL"
150
- id_key = 'id'
151
- text_key = category.from.to_s
152
- database.connection.execute(select_statement).each do |hash|
153
- id, text = hash.values_at id_key, text_key
154
- yield id, text if text
155
- end
156
- else
157
- database.connection.execute(select_statement).each do |id, text|
158
- yield id, text if text
159
- end
160
- end
161
- end
162
-
163
- # Builds a harvest statement for getting data to index.
164
- #
165
- def harvest_statement_with_offset category, offset
166
- statement = harvest_statement category
167
-
168
- statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
169
-
170
- "#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
171
- end
172
-
173
- # The harvest statement used to pull data from the snapshot table.
174
- #
175
- def harvest_statement category
176
- "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
177
- end
178
-
179
- # The amount of records that are loaded each chunk.
180
- #
181
- def chunksize
182
- 25_000
183
- end
184
-
185
- end
186
-
187
- end
188
-
189
- end