picky 4.0.0pre1 → 4.0.0pre2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (112) hide show
  1. data/aux/picky/cli.rb +6 -2
  2. data/lib/picky.rb +10 -8
  3. data/lib/picky/backends/backend.rb +37 -0
  4. data/lib/picky/backends/file.rb +0 -20
  5. data/lib/picky/backends/memory.rb +0 -29
  6. data/lib/picky/backends/redis.rb +74 -15
  7. data/lib/picky/backends/redis/list.rb +1 -1
  8. data/lib/picky/backends/sqlite.rb +0 -27
  9. data/lib/picky/bundle.rb +2 -2
  10. data/lib/picky/bundle_indexed.rb +1 -1
  11. data/lib/picky/bundle_indexing.rb +1 -1
  12. data/lib/picky/categories_indexed.rb +1 -11
  13. data/lib/picky/category.rb +4 -4
  14. data/lib/picky/category/location.rb +25 -0
  15. data/lib/picky/category_realtime.rb +4 -3
  16. data/lib/picky/console.rb +1 -1
  17. data/lib/picky/constants.rb +1 -1
  18. data/lib/picky/ext/maybe_compile.rb +2 -2
  19. data/lib/picky/extensions/object.rb +3 -2
  20. data/lib/picky/generators/aliases.rb +7 -2
  21. data/lib/picky/generators/partial/default.rb +1 -0
  22. data/lib/picky/generators/similarity/default.rb +1 -0
  23. data/lib/picky/generators/similarity/phonetic.rb +13 -2
  24. data/lib/picky/generators/strategy.rb +0 -2
  25. data/lib/picky/generators/weights/constant.rb +1 -2
  26. data/lib/picky/generators/weights/default.rb +1 -0
  27. data/lib/picky/generators/weights/dynamic.rb +1 -1
  28. data/lib/picky/generators/weights/logarithmic.rb +1 -1
  29. data/lib/picky/generators/weights/{runtime.rb → stub.rb} +1 -3
  30. data/lib/picky/index.rb +3 -3
  31. data/lib/picky/index_indexing.rb +0 -2
  32. data/lib/picky/index_realtime.rb +1 -1
  33. data/lib/picky/indexers/base.rb +7 -0
  34. data/lib/picky/indexers/parallel.rb +2 -4
  35. data/lib/picky/indexers/serial.rb +2 -0
  36. data/lib/picky/indexes_indexing.rb +1 -1
  37. data/lib/picky/interfaces/live_parameters/master_child.rb +175 -0
  38. data/lib/picky/interfaces/live_parameters/unicorn.rb +37 -0
  39. data/lib/picky/loader.rb +238 -259
  40. data/lib/picky/query/allocation.rb +19 -10
  41. data/lib/picky/query/combination.rb +7 -1
  42. data/lib/picky/query/combinations.rb +1 -6
  43. data/lib/picky/query/token.rb +26 -36
  44. data/lib/picky/results.rb +18 -17
  45. data/lib/picky/scheduler.rb +2 -1
  46. data/lib/picky/search.rb +1 -1
  47. data/lib/picky/sinatra.rb +6 -6
  48. data/lib/picky/statistics.rb +2 -0
  49. data/lib/picky/tokenizer.rb +8 -8
  50. data/lib/picky/wrappers/bundle/calculation.rb +4 -4
  51. data/lib/picky/wrappers/bundle/location.rb +1 -2
  52. data/lib/tasks/framework.rake +1 -1
  53. data/lib/tasks/statistics.rake +1 -1
  54. data/lib/tasks/try.rake +1 -1
  55. data/lib/tasks/try.rb +1 -1
  56. data/spec/aux/picky/cli_spec.rb +12 -12
  57. data/spec/ext/performant_spec.rb +16 -16
  58. data/spec/functional/backends/file_spec.rb +78 -7
  59. data/spec/functional/backends/memory_spec.rb +78 -7
  60. data/spec/functional/backends/redis_spec.rb +73 -13
  61. data/spec/functional/dynamic_weights_spec.rb +3 -4
  62. data/spec/functional/realtime_spec.rb +2 -2
  63. data/spec/functional/speed_spec.rb +2 -2
  64. data/spec/functional/terminate_early_spec.rb +3 -3
  65. data/spec/lib/analytics_spec.rb +1 -1
  66. data/spec/lib/analyzer_spec.rb +5 -3
  67. data/spec/lib/categories_indexed_spec.rb +38 -20
  68. data/spec/lib/category/location_spec.rb +30 -0
  69. data/spec/lib/character_substituters/west_european_spec.rb +1 -0
  70. data/spec/lib/extensions/hash_spec.rb +6 -5
  71. data/spec/lib/extensions/module_spec.rb +6 -6
  72. data/spec/lib/extensions/object_spec.rb +9 -8
  73. data/spec/lib/extensions/string_spec.rb +1 -1
  74. data/spec/lib/generators/similarity/phonetic_spec.rb +11 -0
  75. data/spec/lib/index_realtime_spec.rb +5 -5
  76. data/spec/lib/interfaces/{live_parameters_spec.rb → live_parameters/master_child_spec.rb} +26 -26
  77. data/spec/lib/interfaces/live_parameters/unicorn_spec.rb +160 -0
  78. data/spec/lib/loader_spec.rb +65 -25
  79. data/spec/lib/query/allocation_spec.rb +25 -22
  80. data/spec/lib/query/combinations_spec.rb +13 -36
  81. data/spec/lib/query/token_spec.rb +144 -131
  82. data/spec/lib/query/tokens_spec.rb +14 -0
  83. data/spec/lib/results_spec.rb +14 -8
  84. data/spec/lib/search_spec.rb +1 -1
  85. data/spec/lib/sinatra_spec.rb +8 -8
  86. metadata +28 -91
  87. data/lib/picky/adapters/rack.rb +0 -34
  88. data/lib/picky/adapters/rack/base.rb +0 -27
  89. data/lib/picky/adapters/rack/live_parameters.rb +0 -37
  90. data/lib/picky/adapters/rack/search.rb +0 -67
  91. data/lib/picky/application.rb +0 -268
  92. data/lib/picky/frontend_adapters/rack.rb +0 -161
  93. data/lib/picky/interfaces/live_parameters.rb +0 -187
  94. data/lib/picky/sources/base.rb +0 -92
  95. data/lib/picky/sources/couch.rb +0 -76
  96. data/lib/picky/sources/csv.rb +0 -83
  97. data/lib/picky/sources/db.rb +0 -189
  98. data/lib/picky/sources/delicious.rb +0 -63
  99. data/lib/picky/sources/mongo.rb +0 -80
  100. data/lib/picky/wrappers/category/location.rb +0 -38
  101. data/lib/tasks/routes.rake +0 -8
  102. data/spec/lib/adapters/rack/base_spec.rb +0 -24
  103. data/spec/lib/adapters/rack/live_parameters_spec.rb +0 -26
  104. data/spec/lib/adapters/rack/query_spec.rb +0 -39
  105. data/spec/lib/application_spec.rb +0 -155
  106. data/spec/lib/frontend_adapters/rack_spec.rb +0 -294
  107. data/spec/lib/sources/base_spec.rb +0 -53
  108. data/spec/lib/sources/couch_spec.rb +0 -114
  109. data/spec/lib/sources/csv_spec.rb +0 -89
  110. data/spec/lib/sources/db_spec.rb +0 -125
  111. data/spec/lib/sources/delicious_spec.rb +0 -94
  112. data/spec/lib/sources/mongo_spec.rb +0 -50
@@ -1,187 +0,0 @@
1
- module Picky
2
-
3
- # This is very optional.
4
- # Only load if the user wants it.
5
- #
6
- module Interfaces
7
-
8
- # This is an interface that provides the user of
9
- # Picky with the possibility to change parameters
10
- # while the Application is running.
11
- #
12
- # Important Note: This will only work in Master/Child configurations.
13
- #
14
- class LiveParameters
15
-
16
- def initialize
17
- @child, @parent = IO.pipe
18
- start_master_process_thread
19
- end
20
-
21
- # This runs a thread that listens to child processes.
22
- #
23
- def start_master_process_thread
24
- # This thread is stopped in the children.
25
- #
26
- Thread.new do
27
- loop do
28
- IO.select([@child], nil, nil, 2) or next
29
- result = @child.gets ';;;'
30
- pid, configuration_hash = eval result
31
- next unless Hash === configuration_hash
32
- next if configuration_hash.empty?
33
- exclaim "Trying to update MASTER configuration."
34
- try_updating_configuration_with configuration_hash
35
- kill_each_worker_except pid
36
-
37
- # Fails hard on an error.
38
- #
39
- end
40
- end
41
- end
42
-
43
- # TODO This needs to be webserver agnostic.
44
- #
45
- def worker_pids
46
- Unicorn::HttpServer::WORKERS.keys
47
- end
48
-
49
- # Taken from Unicorn.
50
- #
51
- def kill_each_worker_except pid
52
- worker_pids.each do |wpid|
53
- next if wpid == pid
54
- kill_worker :KILL, wpid
55
- end
56
- end
57
- def kill_worker signal, wpid
58
- Process.kill signal, wpid
59
- exclaim "Killing worker ##{wpid} with signal #{signal}."
60
- rescue Errno::ESRCH
61
- remove_worker wpid
62
- end
63
- # TODO This needs to be Webserver agnostic.
64
- #
65
- def remove_worker wpid
66
- worker = Unicorn::HttpServer::WORKERS.delete(wpid) and worker.tmp.close rescue nil
67
- end
68
-
69
- # Updates any parameters with the ones given and
70
- # returns the updated params.
71
- #
72
- # The params are a strictly defined hash of:
73
- # * querying_removes_characters: Regexp
74
- # * querying_stopwords: Regexp
75
- # * querying_splits_text_on: Regexp
76
- #
77
- # This first tries to update in the child process,
78
- # and if successful, in the parent process
79
- #
80
- def parameters configuration_hash
81
- close_child
82
- exclaim "Trying to update worker child configuration." unless configuration_hash.empty?
83
- try_updating_configuration_with configuration_hash
84
- write_parent configuration_hash
85
- extract_configuration
86
- rescue CouldNotUpdateConfigurationError => e
87
- # I need to die such that my broken config is never used.
88
- #
89
- exclaim "Child process #{Process.pid} performs harakiri because of broken config."
90
- harakiri
91
- { e.config_key => :ERROR }
92
- end
93
- # Kills itself, but still answering the request honorably.
94
- #
95
- def harakiri
96
- Process.kill :QUIT, Process.pid
97
- end
98
- # Write the parent.
99
- #
100
- # Note: The ;;; is the end marker for the message.
101
- #
102
- def write_parent configuration_hash
103
- @parent.write "#{[Process.pid, configuration_hash]};;;"
104
- end
105
- # Close the child if it isn't yet closed.
106
- #
107
- def close_child
108
- @child.close unless @child.closed?
109
- end
110
-
111
- class CouldNotUpdateConfigurationError < StandardError
112
- attr_reader :config_key
113
- def initialize config_key, message
114
- super message
115
- @config_key = config_key
116
- end
117
- end
118
-
119
- # Tries updating the configuration in the child process or parent process.
120
- #
121
- def try_updating_configuration_with configuration_hash
122
- current_key = nil
123
- begin
124
- configuration_hash.each_pair do |key, new_value|
125
- exclaim " Setting #{key} with #{new_value}."
126
- current_key = key
127
- send :"#{key}=", new_value
128
- end
129
- rescue StandardError => e
130
- # Catch any error and reraise as config error.
131
- #
132
- raise CouldNotUpdateConfigurationError.new current_key, e.message
133
- end
134
- end
135
-
136
- def extract_configuration
137
- {
138
- querying_removes_characters: querying_removes_characters,
139
- querying_stopwords: querying_stopwords,
140
- querying_splits_text_on: querying_splits_text_on
141
- }
142
- end
143
-
144
- # TODO Move to Interface object.
145
- #
146
- def querying_removes_characters
147
- regexp = Tokenizer.query_default.instance_variable_get :@removes_characters_regexp
148
- regexp && regexp.source
149
- end
150
- def querying_removes_characters= new_value
151
- Tokenizer.query_default.instance_variable_set(:@removes_characters_regexp, %r{#{new_value}})
152
- end
153
- def querying_stopwords
154
- regexp = Tokenizer.query_default.instance_variable_get :@remove_stopwords_regexp
155
- regexp && regexp.source
156
- end
157
- def querying_stopwords= new_value
158
- Tokenizer.query_default.instance_variable_set(:@remove_stopwords_regexp, %r{#{new_value}})
159
- end
160
- def querying_splits_text_on
161
- splits = Tokenizer.query_default.instance_variable_get :@splits_text_on
162
- splits && splits.respond_to?(:source) ? splits.source : splits
163
- end
164
- def querying_splits_text_on= new_value
165
- splits = Tokenizer.query_default.instance_variable_get :@splits_text_on
166
- if splits.respond_to?(:source)
167
- Tokenizer.query_default.instance_variable_set(:@splits_text_on, %r{#{new_value}})
168
- else
169
- Tokenizer.query_default.instance_variable_set(:@splits_text_on, new_value)
170
- end
171
- end
172
-
173
- #
174
- #
175
- def to_s
176
- "Suckerfish Live Interface (Use the picky-live gem to introspect)"
177
- end
178
-
179
- end
180
-
181
- end
182
-
183
- # Aka.
184
- #
185
- LiveParameters = Interfaces::LiveParameters
186
-
187
- end
@@ -1,92 +0,0 @@
1
- module Picky
2
-
3
- # = Data Sources
4
- #
5
- # Currently, Picky offers the following Sources:
6
- # * CSV (comma – or other – separated file)
7
- # * Couch (CouchDB, key-value store)
8
- # * DB (Databases, foremost MySQL)
9
- # * Delicious (http://del.icio.us, online bookmarking service)
10
- # See also:
11
- # http://github.com/floere/picky/wiki/Sources-Configuration
12
- #
13
- # Don't worry if your source isn't here. Adding your own is easy:
14
- # http://github.com/floere/picky/wiki/Contributing-sources
15
- #
16
- module Sources
17
-
18
- # Sources are where your data comes from.
19
- #
20
- # A source has 1 mandatory and 2 optional methods:
21
- # * connect_backend (_optional_): called once for each type/category pair.
22
- # * harvest: Used by the indexer to gather data. Yields an indexed_id (string or integer) and a string value.
23
- # * take_snapshot (_optional_): called once for each index or category (if indexing a single category).
24
- #
25
- # This base class "implements" all these methods, but they don't do anything.
26
- # Subclass this class <tt>class MySource < Base</tt> and override the methods in your source to do something.
27
- #
28
- class Base
29
-
30
- attr_reader :key_format
31
-
32
- # Connect to the backend.
33
- #
34
- # Called once per index/category combination before harvesting.
35
- #
36
- # Examples:
37
- # * The DB backend connects the DB adapter.
38
- # * We open a connection to a key value store.
39
- # * We open an file with data.
40
- #
41
- def connect_backend
42
-
43
- end
44
-
45
- # Called by the indexer when gathering data.
46
- #
47
- # Yields the data (id, text for id) for the given category.
48
- #
49
- # When implementing or overriding your own,
50
- # be sure to <tt>yield(id, text_for_id)</tt> (or <tt>block.call(id, text_for_id)</tt>)
51
- # for the given type symbol and category symbol.
52
- #
53
- # Note: Since harvest needs to be implemented, it has no default impementation.
54
- #
55
- # def harvest category # :yields: id, text_for_id
56
- #
57
- # end
58
-
59
- # Used to take a snapshot of your data if it is fast changing.
60
- #
61
- # Called once for each index before harvesting.
62
- # If it has been called on a source already by an index,
63
- # it won't be called again for a category inside that index.
64
- #
65
- # Example:
66
- # * In a DB source, a table based on the source's select statement is created.
67
- #
68
- def take_snapshot index
69
-
70
- end
71
-
72
- # Used to check if a snapshot has been done already.
73
- #
74
- # Example:
75
- # * In a DB source, a table based on the source's select statement is created.
76
- #
77
- def with_snapshot index
78
- connect_backend
79
- @snapshot_taken ||= 0
80
- if @snapshot_taken.zero?
81
- take_snapshot index
82
- end
83
- @snapshot_taken += 1
84
- yield
85
- @snapshot_taken -= 1
86
- end
87
-
88
- end
89
-
90
- end
91
-
92
- end
@@ -1,76 +0,0 @@
1
- module Picky
2
-
3
- module Sources
4
-
5
- # A Couch database source.
6
- #
7
- # Options:
8
- # * url
9
- # and all the options of a <tt>RestClient::Resource</tt>.
10
- # See http://github.com/archiloque/rest-client.
11
- #
12
- # Examples:
13
- # Picky::Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984')
14
- # Picky::Sources::Couch.new(:title, :author, :isbn, url:'localhost:5984', user:'someuser', password:'somepassword')
15
- #
16
- class Couch < Base
17
-
18
- # Raised when a Couch source is instantiated without a file.
19
- #
20
- # Example:
21
- # Picky::Sources::Couch.new(:column1, :column2) # without file option
22
- #
23
- class NoDBGiven < StandardError; end
24
-
25
- #
26
- #
27
- def initialize *category_names, options
28
- check_gem
29
-
30
- Hash === options && options[:url] || raise_no_db_given(category_names)
31
-
32
- @db = RestClient::Resource.new options.delete(:url), options
33
-
34
- key_format = options.delete :key_format
35
- @key_format = key_format && key_format.intern || :to_s
36
- end
37
-
38
- def to_s
39
- self.class.name
40
- end
41
-
42
- # Tries to require the rest_client gem.
43
- #
44
- def check_gem # :nodoc:
45
- require 'rest_client'
46
- rescue LoadError
47
- warn_gem_missing 'rest-client', 'the CouchDB source'
48
- exit 1
49
- end
50
-
51
- # Harvests the data to index.
52
- #
53
- # See important note, above.
54
- #
55
- @@id_key = '_id'
56
- def harvest category
57
- category_name = category.from.to_s
58
- get_data do |doc|
59
- yield doc[@@id_key], doc[category_name] || next
60
- end
61
- end
62
-
63
- def get_data &block # :nodoc:
64
- resp = @db['_all_docs?include_docs=true'].get
65
- JSON.parse(resp)['rows'].
66
- map{|row| row['doc']}.
67
- each &block
68
- end
69
-
70
- def raise_no_db_given category_names # :nodoc:
71
- raise NoDBGiven.new(category_names.join(', '))
72
- end
73
- end
74
- end
75
-
76
- end
@@ -1,83 +0,0 @@
1
- module Picky
2
-
3
- module Sources
4
-
5
- # Describes a CSV source, a file with comma separated values in it.
6
- #
7
- # The first column is implicitly assumed to be the id column.
8
- #
9
- # It takes the same options as the Ruby 1.9 CSV class.
10
- #
11
- # Examples:
12
- # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv')
13
- # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', col_sep:';')
14
- # Sources::CSV.new(:title, :author, :isbn, file:'data/a_csv_file.csv', row_sep:"\n")
15
- #
16
- class CSV < Base
17
-
18
- # Raised when a CSV source is instantiated without a file.
19
- #
20
- # Example:
21
- # Sources::CSV.new(:column1, :column2) # without file option
22
- #
23
- class NoFileGiven < StandardError; end
24
-
25
- # The CSV file's path, relative to PICKY_ROOT.
26
- #
27
- attr_reader :file_name
28
-
29
- # The options that were passed into #new.
30
- #
31
- attr_reader :csv_options, :key_format
32
-
33
- # The data category names.
34
- #
35
- attr_reader :category_names
36
-
37
- def initialize *category_names, options
38
- require 'csv'
39
- @category_names = category_names
40
-
41
- @csv_options = Hash === options && options || {}
42
- @file_name = @csv_options.delete(:file) || raise_no_file_given(category_names)
43
-
44
- key_format = options.delete :key_format
45
- @key_format = key_format && key_format.intern || :to_i
46
- end
47
-
48
- def to_s
49
- parameters = category_names
50
- parameters << { file: file_name }
51
- parameters << csv_options unless csv_options.empty?
52
- %Q{#{self.class.name}(#{parameters.join(', ')})}
53
- end
54
-
55
- # Raises a NoCSVFileGiven exception.
56
- #
57
- def raise_no_file_given category_names # :nodoc:
58
- raise NoFileGiven.new(category_names.join(', '))
59
- end
60
-
61
- # Harvests the data to index.
62
- #
63
- def harvest category
64
- index = category_names.index category.from
65
- get_data do |indexed_id, *ary|
66
- text = ary[index]
67
- next unless text
68
- text.force_encoding 'utf-8' # TODO Still needed?
69
- yield indexed_id, text
70
- end
71
- end
72
-
73
- #
74
- #
75
- def get_data &block # :nodoc:
76
- ::CSV.foreach file_name, csv_options, &block
77
- end
78
-
79
- end
80
-
81
- end
82
-
83
- end
@@ -1,189 +0,0 @@
1
- module Picky
2
-
3
- module Sources
4
-
5
- # Describes a database source. Needs a SELECT statement
6
- # (with id in it), and a file option or the options from an AR config file.
7
- #
8
- # The select statement can be as complicated as you want,
9
- # as long as it has an id in it and as long as it can be
10
- # used in a CREATE TABLE AS statement.
11
- # (working on that last one)
12
- #
13
- # Examples:
14
- # Sources::DB.new('SELECT id, title, author, year FROM books') # Uses the config from app/db.yml by default.
15
- # Sources::DB.new('SELECT id, title, author, year FROM books', file: 'app/some_db.yml')
16
- # Sources::DB.new('SELECT b.id, b.title, b.author, b.publishing_year as year FROM books b INNER JOIN ON ...', file: 'app/some_db.yml')
17
- # Sources::DB.new('SELECT id, title, author, year FROM books', adapter: 'mysql', host:'localhost', ...)
18
- #
19
- class DB < Base
20
-
21
- # The select statement that was passed in.
22
- #
23
- attr_reader :select_statement
24
-
25
- # The database adapter.
26
- #
27
- attr_reader :database
28
-
29
- # The database connection options that were either passed in or loaded from the given file.
30
- #
31
- attr_reader :connection_options, :options
32
-
33
- @@traversal_id = :__picky_id
34
-
35
- def initialize select_statement, options = { file: 'app/db.yml' }
36
- check_gem
37
-
38
- @select_statement = select_statement
39
- @database = create_database_adapter
40
- @options = options
41
- end
42
-
43
- # Tries to require the active_record gem.
44
- #
45
- def check_gem # :nodoc:
46
- require 'active_record'
47
- rescue LoadError
48
- warn_gem_missing 'active_record', 'the (ActiveRecord) DB source'
49
- exit 1
50
- end
51
-
52
- def to_s
53
- parameters = [select_statement.inspect]
54
- parameters << options unless options.empty?
55
- %Q{#{self.class.name}(#{parameters.join(', ')})}
56
- end
57
-
58
- # Creates a database adapter for use with this source.
59
- #
60
- def create_database_adapter # :nodoc:
61
- # THINK Do not use ActiveRecord directly? Use set_table_name etc?
62
- #
63
- adapter_class = Class.new ActiveRecord::Base
64
- adapter_class.abstract_class = true
65
- adapter_class
66
- end
67
-
68
- # Configure the backend.
69
- #
70
- # Options:
71
- # Either
72
- # * file => 'some/filename.yml' # With an active record configuration.
73
- # Or
74
- # * The configuration as a hash.
75
- #
76
- def configure options # :nodoc:
77
- @connection_options = if filename = options[:file]
78
- File.open(File.join(PICKY_ROOT, filename)) { |file| YAML::load(file) }
79
- else
80
- options
81
- end
82
- self
83
- end
84
-
85
- # Connect the backend.
86
- #
87
- # Will raise unless connection options have been given.
88
- #
89
- def connect_backend
90
- configure @options
91
- raise "Database backend not configured" unless connection_options
92
- database.establish_connection connection_options
93
- end
94
-
95
- # Take a snapshot of the data.
96
- #
97
- # Uses CREATE TABLE AS with the given SELECT statement to create a snapshot of the data.
98
- #
99
- def take_snapshot index
100
- timed_exclaim %Q{ "#{index.identifier}": Taking snapshot of database data.}
101
-
102
- origin = snapshot_table_name index.name
103
- on_database = database.connection
104
-
105
- # Drop the table if it exists.
106
- #
107
- on_database.drop_table origin if on_database.table_exists?(origin)
108
-
109
- # The adapters currently do not support this.
110
- #
111
- on_database.execute "CREATE TABLE #{origin} AS #{select_statement}"
112
-
113
- # Add a column that Picky uses to traverse the table's entries.
114
- #
115
- on_database.add_column origin, @@traversal_id, :primary_key, :null => :false
116
-
117
- # Execute any special queries this index needs executed.
118
- #
119
- on_database.execute index.after_indexing if index.after_indexing
120
- end
121
-
122
- # Counts all the entries that are used for the index.
123
- #
124
- def count index_name
125
- database.connection.select_value("SELECT COUNT(#{@@traversal_id}) FROM #{snapshot_table_name(index_name)}").to_i
126
- end
127
-
128
- # The name of the snapshot table created by Picky.
129
- #
130
- def snapshot_table_name index_name
131
- "picky_#{index_name}_index"
132
- end
133
-
134
- # Harvests the data to index in chunks.
135
- #
136
- def harvest category, &block
137
- (0..count(category.index_name)).step(chunksize) do |offset|
138
- get_data category, offset, &block
139
- end
140
- end
141
-
142
- # Gets the data from the backend.
143
- #
144
- def get_data category, offset, &block # :nodoc:
145
- select_statement = harvest_statement_with_offset category, offset
146
-
147
- # THINK Not really nice like this. Rewrite if possible.
148
- #
149
- if database.connection.adapter_name == "PostgreSQL"
150
- id_key = 'id'
151
- text_key = category.from.to_s
152
- database.connection.execute(select_statement).each do |hash|
153
- id, text = hash.values_at id_key, text_key
154
- yield id, text if text
155
- end
156
- else
157
- database.connection.execute(select_statement).each do |id, text|
158
- yield id, text if text
159
- end
160
- end
161
- end
162
-
163
- # Builds a harvest statement for getting data to index.
164
- #
165
- def harvest_statement_with_offset category, offset
166
- statement = harvest_statement category
167
-
168
- statement += statement.include?('WHERE') ? ' AND' : ' WHERE'
169
-
170
- "#{statement} st.#{@@traversal_id} > #{offset} LIMIT #{chunksize}"
171
- end
172
-
173
- # The harvest statement used to pull data from the snapshot table.
174
- #
175
- def harvest_statement category
176
- "SELECT id, #{category.from} FROM #{snapshot_table_name(category.index_name)} st"
177
- end
178
-
179
- # The amount of records that are loaded each chunk.
180
- #
181
- def chunksize
182
- 25_000
183
- end
184
-
185
- end
186
-
187
- end
188
-
189
- end