elastic-rails 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f9ab5fe73898a276646cbf7c3f9a564d3f7419e4
4
- data.tar.gz: a90c322ea5f8226affff52f5646974ac42aebe5a
3
+ metadata.gz: 7d00b5b5c6f5a1bc7701c5d635198e88094e65ea
4
+ data.tar.gz: 09550318bf8064a884b097576ff2aa15d6702daa
5
5
  SHA512:
6
- metadata.gz: 54b6f86610924e06e58690fa99e45d74dd2c18f5fd902985efe5ff6ab827117a44ceb6be50a8637f3dee085591f677514d706556907af15e2c2b2925ebc45980
7
- data.tar.gz: 346536b41ffcaef70416b0afd92f2c54ee4b4658077a500cb71cda8c24fd572636d619eae0f4456873b8115c84dbb3db6a1ad12110755780c3663ba5709db417
6
+ metadata.gz: 5be452213033d0d2fb6a868d65709ed68260cfffb847566dd3ac2094a571c69aba9b6b75a06455f85091d5a35a12269cab78b68435efd355af40199cb1e8f4c1
7
+ data.tar.gz: 03b952d5219a6c47606db579f1b06c73c8a9c326949ce12da855f995c9053adfdd458b5e1570ae71296446a8412b4a4485c9615d3dc4d8800ca0aedc9a95ced8
data/README.md CHANGED
@@ -60,7 +60,7 @@ BikeIndex.must(brand: 'Trek', size: 'M').should(year: { gte: 2015 }).avg(:price)
60
60
  BikeIndex.must(origin: 'China').segment(:brand).each { |brand, bikes| }
61
61
  ```
62
62
 
63
- migrate: remaps if necessary
63
+ migrate: remaps if necessary
64
64
 
65
65
  reindex: attempts to rotate if index already exist
66
66
 
@@ -70,8 +70,6 @@ TODO: Write usage instructions here
70
70
 
71
71
  These are some features that will be added in the future:
72
72
 
73
- * Support for record deletion
74
-
75
73
 
76
74
  ## Development
77
75
 
@@ -1,23 +1,16 @@
1
1
  module Elastic::Core
2
2
  class Connector
3
- def initialize(_name, _types, _mapping)
3
+ def initialize(_name, _types, _mapping, settling_time: 10.seconds)
4
4
  @name = _name
5
5
  @types = _types
6
6
  @mapping = _mapping
7
+ @settling_time = settling_time
7
8
  end
8
9
 
9
10
  def index_name
10
11
  @index_name ||= "#{Elastic.config.index}_#{@name}"
11
12
  end
12
13
 
13
- def read_index_name
14
- index_name
15
- end
16
-
17
- def write_index_name
18
- Thread.current[write_index_thread_override] || write_index_alias
19
- end
20
-
21
14
  def status
22
15
  actual_name = resolve_actual_index_name
23
16
  return :not_available if actual_name.nil?
@@ -47,8 +40,8 @@ module Elastic::Core
47
40
 
48
41
  def migrate(batch_size: nil)
49
42
  unless remap
50
- rollover do
51
- copy_documents(read_index_name, write_index_name, batch_size || default_batch_size)
43
+ rollover do |new_index|
44
+ copy_to new_index, batch_size: batch_size
52
45
  end
53
46
  end
54
47
 
@@ -57,84 +50,130 @@ module Elastic::Core
57
50
 
58
51
  def index(_document)
59
52
  # TODO: validate document type
53
+ operations = write_indices.map do |write_index|
54
+ { 'index' => _document.merge('_index' => write_index) }
55
+ end
60
56
 
61
- api.index(
62
- index: write_index_name,
63
- id: _document['_id'],
64
- type: _document['_type'],
65
- body: _document['data']
66
- )
57
+ api.bulk(body: operations)
67
58
  end
68
59
 
69
60
  def bulk_index(_documents)
70
61
  # TODO: validate documents type
71
-
72
62
  body = _documents.map { |doc| { 'index' => doc } }
73
63
 
74
- retry_on_temporary_error('bulk indexing') do
75
- api.bulk(index: write_index_name, body: body)
64
+ write_indices.each do |write_index|
65
+ retry_on_temporary_error('bulk indexing') do
66
+ api.bulk(index: write_index, body: body)
67
+ end
76
68
  end
77
69
  end
78
70
 
79
- def refresh
80
- api.indices.refresh index: read_index_name
71
+ def delete(_document)
72
+ raise ArgumentError, 'document must provide an id' unless _document['_id']
73
+ raise ArgumentError, 'document must provide a type' unless _document['_type']
74
+
75
+ write_index, rolling_index = write_indices
76
+
77
+ operations = [{ 'delete' => _document.merge('_index' => write_index) }]
78
+
79
+ if rolling_index
80
+ operations << {
81
+ 'index' => _document.merge(
82
+ '_index' => rolling_index,
83
+ 'data' => { '_mark_for_deletion' => true }
84
+ )
85
+ }
86
+ end
87
+
88
+ api.bulk(body: operations)
81
89
  end
82
90
 
83
- def find(_type, _id)
84
- api.get(index: write_index_name, type: _type, id: _id)
91
+ def refresh
92
+ api.indices.refresh index: index_name
85
93
  end
86
94
 
87
- def delete(_type, _id)
88
- api.delete(index: write_index_name, type: _type, id: _id)
95
+ def find(_type, _id)
96
+ api.get(index: index_name, type: _type, id: _id)
89
97
  end
90
98
 
91
99
  def count(query: nil, type: nil)
92
- api.count(index: read_index_name, type: type, body: query)['count']
100
+ api.count(index: index_name, type: type, body: query)['count']
93
101
  end
94
102
 
95
103
  def query(query: nil, type: nil)
96
- api.search(index: read_index_name, type: type, body: query)
104
+ api.search(index: index_name, type: type, body: query)
97
105
  end
98
106
 
99
- def rollover(&_block) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
107
+ def rollover(&_block) # rubocop:disable Metrics/MethodLength
108
+ actual_index, rolling_index = resolve_write_indices
109
+
110
+ unless rolling_index.nil?
111
+ raise Elastic::RolloverError, 'rollover process already in progress'
112
+ end
113
+
100
114
  new_index = create_index_w_mapping
101
- tmp_index = create_index_w_mapping('tmp')
102
- actual_index = resolve_actual_index_name
103
115
 
104
116
  begin
105
- transfer_alias(write_index_alias, from: actual_index, to: tmp_index)
106
-
117
+ transfer_alias(write_index_alias, to: new_index)
118
+ wait_for_index_to_stabilize
107
119
  perform_optimized_write_on(new_index, &_block)
108
-
120
+ delete_marked_for_deletion new_index
109
121
  transfer_alias(index_name, from: actual_index, to: new_index)
110
- transfer_alias(write_index_alias, from: tmp_index, to: new_index)
111
- api.indices.delete index: actual_index if actual_index
122
+ transfer_alias(write_index_alias, from: actual_index)
123
+ wait_for_index_to_stabilize
124
+ api.indices.delete index: actual_index
112
125
  rescue
113
- transfer_alias(write_index_alias, from: tmp_index, to: actual_index)
114
126
  api.indices.delete index: new_index
115
- ensure
116
- # rollback
117
- # TODO: what would happen if the following fails? O.O
118
- copy_documents(tmp_index, write_index_name, small_batch_size)
119
- api.indices.delete index: tmp_index
120
- api.indices.refresh index: index_name
127
+ raise
128
+ end
129
+ end
130
+
131
+ def copy_to(_to, batch_size: nil) # rubocop:disable Metrics/AbcSize
132
+ api.indices.refresh index: index_name
133
+
134
+ r = api.search(
135
+ index: index_name,
136
+ body: { sort: ['_doc'] },
137
+ scroll: '5m',
138
+ size: batch_size || default_batch_size
139
+ )
140
+
141
+ count = 0
142
+ while !r['hits']['hits'].empty?
143
+ count += r['hits']['hits'].count
144
+ Elastic.logger.info "Copied #{count} docs"
145
+
146
+ body = r['hits']['hits'].map { |h| transform_hit_to_create(h) }
147
+ api.bulk(index: _to, body: body)
148
+
149
+ r = api.scroll scroll: '5m', scroll_id: r['_scroll_id']
121
150
  end
122
151
  end
123
152
 
124
153
  private
125
154
 
155
+ def wait_for_index_to_stabilize
156
+ return if @settling_time == 0
157
+ Elastic.logger.info "Waiting #{@settling_time * 1.2}s for write indices to stabilize ..."
158
+ sleep(@settling_time * 1.2)
159
+ end
160
+
126
161
  def api
127
162
  Elastic.config.api_client
128
163
  end
129
164
 
165
+ def write_indices
166
+ Thread.current[write_index_thread_override] || resolve_write_indices
167
+ end
168
+
130
169
  def perform_optimized_write_on(_index)
131
- old_index = Thread.current[write_index_thread_override]
132
- Thread.current[write_index_thread_override] = _index
170
+ old_indices = Thread.current[write_index_thread_override]
171
+ Thread.current[write_index_thread_override] = [_index]
133
172
  configure_index(_index, refresh_interval: -1)
134
- yield
173
+ yield _index
135
174
  ensure
136
175
  configure_index(_index, refresh_interval: '1s')
137
- Thread.current[write_index_thread_override] = old_index
176
+ Thread.current[write_index_thread_override] = old_indices
138
177
  end
139
178
 
140
179
  def write_index_thread_override
@@ -142,7 +181,26 @@ module Elastic::Core
142
181
  end
143
182
 
144
183
  def write_index_alias
145
- @write_index_alias = "#{index_name}.w"
184
+ @write_index_alias ||= "#{index_name}.w"
185
+ end
186
+
187
+ def resolve_write_indices
188
+ @write_indices = nil if write_indices_expired?
189
+ @write_indices ||= begin
190
+ result = api.indices.get_alias(name: write_index_alias)
191
+ @write_indices_expiration = @settling_time.from_now
192
+ result.keys.sort # lower timestamp first (actual)
193
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
194
+ raise Elastic::MissingIndexError, 'index does not exist, call migrate first'
195
+ end
196
+ end
197
+
198
+ def delete_marked_for_deletion(_index)
199
+ api.delete_by_query(index: _index, body: { query: { term: { _mark_for_deletion: true } } })
200
+ end
201
+
202
+ def write_indices_expired?
203
+ @write_indices_expiration && @write_indices_expiration < Time.current
146
204
  end
147
205
 
148
206
  def resolve_actual_index_name
@@ -152,8 +210,8 @@ module Elastic::Core
152
210
  nil
153
211
  end
154
212
 
155
- def create_index_w_mapping(_role = 'main')
156
- new_name = "#{index_name}:#{_role}:#{Time.now.to_i}"
213
+ def create_index_w_mapping
214
+ new_name = "#{index_name}:#{Time.now.to_i}"
157
215
  api.indices.create index: new_name
158
216
  api.cluster.health wait_for_status: 'yellow'
159
217
  setup_index_types new_name
@@ -201,44 +259,24 @@ module Elastic::Core
201
259
  api.indices.update_aliases body: { actions: actions }
202
260
  end
203
261
 
204
- def copy_documents(_from, _to, _batch_size)
205
- api.indices.refresh index: _from
206
-
207
- r = api.search(
208
- index: _from,
209
- body: { sort: ['_doc'] },
210
- scroll: '5m',
211
- size: _batch_size
212
- )
213
-
214
- count = 0
215
- while !r['hits']['hits'].empty?
216
- count += r['hits']['hits'].count
217
- Elastic.logger.info "Copied #{count} docs"
218
-
219
- body = r['hits']['hits'].map { |h| { 'index' => transform_hit_to_doc(h) } }
220
- api.bulk(index: _to, body: body)
221
-
222
- r = api.scroll scroll: '5m', scroll_id: r['_scroll_id']
223
- end
224
- end
225
-
226
262
  def configure_index(_index, _settings)
227
263
  api.indices.put_settings index: _index, body: { index: _settings }
228
264
  end
229
265
 
230
- def transform_hit_to_doc(_hit)
231
- { '_id' => _hit['_id'], '_type' => _hit['_type'], 'data' => _hit['_source'] }
266
+ def transform_hit_to_create(_hit)
267
+ {
268
+ 'create' => {
269
+ '_id' => _hit['_id'],
270
+ '_type' => _hit['_type'],
271
+ 'data' => _hit['_source']
272
+ }
273
+ }
232
274
  end
233
275
 
234
276
  def default_batch_size
235
277
  1_000
236
278
  end
237
279
 
238
- def small_batch_size
239
- 500
240
- end
241
-
242
280
  def retry_on_temporary_error(_action, retries: 3)
243
281
  return yield
244
282
  rescue Elasticsearch::Transport::Transport::Errors::ServiceUnavailable,
@@ -16,34 +16,21 @@ module Elastic::Core
16
16
  @definition.fields
17
17
  end
18
18
 
19
- def read_elastic_type
20
- object.class.to_s
21
- end
22
-
23
- def read_elastic_id
24
- if has_attribute_for_indexing?(:id)
25
- read_attribute_for_indexing(:id)
26
- end
19
+ def as_elastic_document(only_meta: false)
20
+ result = { '_type' => object.class.to_s }
21
+ result['_id'] = read_attribute_for_indexing(:id) if has_attribute_for_indexing?(:id)
22
+ result['data'] = as_elastic_source unless only_meta
23
+ result
27
24
  end
28
25
 
29
- def as_elastic_document(only_data: false)
30
- data = {}.tap do |hash|
26
+ def as_elastic_source
27
+ {}.tap do |hash|
31
28
  fields.each do |field|
32
29
  value = read_attribute_for_indexing(field.name)
33
30
  value = field.prepare_value_for_index(value)
34
31
  hash[field.name] = value
35
32
  end
36
33
  end
37
-
38
- return data if only_data
39
-
40
- result = {
41
- '_type' => read_elastic_type,
42
- 'data' => data
43
- }
44
-
45
- read_elastic_id.tap { |id| result['_id'] = id unless id.nil? }
46
- result
47
34
  end
48
35
 
49
36
  private
@@ -1,4 +1,10 @@
1
1
  module Elastic
2
2
  class Error < StandardError
3
3
  end
4
+
5
+ class MissingIndexError < Error
6
+ end
7
+
8
+ class RolloverError < Error
9
+ end
4
10
  end
@@ -41,7 +41,7 @@ module Elastic::Fields
41
41
  end
42
42
 
43
43
  def prepare_value_for_index(_values)
44
- _values.map { |v| @index.new(v).as_elastic_document(only_data: true) }
44
+ _values.map { |v| @index.new(v).as_elastic_source }
45
45
  end
46
46
 
47
47
  def prepare_value_for_result(_values)
@@ -4,7 +4,8 @@ require "elastic/railties/ar_middleware"
4
4
  require "elastic/railties/configuration_extensions"
5
5
  require "elastic/railties/type_extensions"
6
6
  require "elastic/railties/query_extensions"
7
- # disabled for now: require "elastic/railties/indexing_job"
7
+ require "elastic/railties/jobs/indexing_job"
8
+ require "elastic/railties/jobs/deleting_job"
8
9
  require "elastic/railties/indexable_record"
9
10
 
10
11
  module Elastic
@@ -8,7 +8,7 @@ module Elastic::Railties
8
8
  _collection = _collection.send(scope) if scope
9
9
  _collection.find_each(&_block)
10
10
  elsif _collection.respond_to? :each
11
- ActiveRecord::Associations::Preloader.new.preload(_collection, *includes) if includes
11
+ ActiveRecord::Associations::Preloader.new.preload(_collection, includes) if includes
12
12
  _collection.each(&_block)
13
13
  else
14
14
  raise 'Elastic ActiveRecord importing is only supported for collection types'
@@ -18,32 +18,28 @@ module Elastic::Railties
18
18
  @constantized_index_class ||= index_class.constantize
19
19
  end
20
20
 
21
- def index(on: nil, unindex: true, delayed: false)
22
- raise NotImplementedError, 'delayed indexing not implemented' if delayed
21
+ def index(on: nil, unindex: true, delayed: true)
22
+ index_m, unindex_m = delayed ? [:index_later, :unindex_later] : [:index_now, :unindex_now]
23
23
 
24
24
  if on == :create
25
- index_on_create
25
+ after_create { public_send(index_m) }
26
26
  elsif on == :save
27
- index_on_save
27
+ after_save { public_send(index_m) }
28
28
  else
29
29
  raise ArgumentError, 'must provide an indexing target when calling index \
30
30
  (ie: `index on: :save`)'
31
31
  end
32
32
 
33
- unindex_on_destroy if unindex
34
- end
35
-
36
- def index_on_create(_options = {})
37
- after_create(_options) { index_now }
33
+ before_destroy { public_send(unindex_m) } if unindex
38
34
  end
35
+ end
39
36
 
40
- def index_on_save(_options = {})
41
- after_save(_options) { index_now }
42
- end
37
+ def index_later
38
+ self.class.constantized_index_class.index_later self
39
+ end
43
40
 
44
- def unindex_on_destroy(_options = {})
45
- before_destroy(_options) { unindex_now }
46
- end
41
+ def unindex_later
42
+ self.class.constantized_index_class.delete_later self
47
43
  end
48
44
 
49
45
  def index_now
@@ -0,0 +1,7 @@
1
+ module Elastic::Railties::Jobs
2
+ class DeletingJob < ActiveJob::Base
3
+ def perform(_type, _document)
4
+ _type.constantize.connector.delete _document
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Elastic::Railties::Jobs
2
+ class IndexingJob < ActiveJob::Base
3
+ def perform(_type, _document)
4
+ _type.constantize.connector.index _document
5
+ end
6
+ end
7
+ end
@@ -9,6 +9,22 @@ module Elastic::Railties
9
9
  # TODO: check target allows options
10
10
  pre_definition.middleware_options[:ar_collect_includes] = _includes
11
11
  end
12
+
13
+ def index_later(_object)
14
+ wrapped = new(_object)
15
+
16
+ Jobs::IndexingJob
17
+ .set(queue: Elastic.config.active_job_queue)
18
+ .perform_later(to_s, wrapped.as_elastic_document.as_json)
19
+ end
20
+
21
+ def delete_later(_object)
22
+ wrapped = new(_object)
23
+
24
+ Jobs::DeletingJob
25
+ .set(queue: Elastic.config.active_job_queue)
26
+ .perform_later(to_s, wrapped.as_elastic_document(only_meta: true).as_json)
27
+ end
12
28
  end
13
29
  end
14
30
  end
data/lib/elastic/type.rb CHANGED
@@ -78,20 +78,13 @@ module Elastic
78
78
  end
79
79
 
80
80
  def self.index(_object)
81
- new(_object).save
81
+ connector.index new(_object).as_elastic_document
82
+ self
82
83
  end
83
84
 
84
85
  def self.delete(_object)
85
- wrapper = new(_object)
86
- id = wrapper.read_elastic_id
87
- raise ArgumentError, 'index does not provide an id' if id.nil?
88
-
89
- connector.delete(
90
- wrapper.read_elastic_type,
91
- wrapper.read_elastic_id
92
- )
93
-
94
- nil
86
+ connector.delete new(_object).as_elastic_document(only_meta: true)
87
+ self
95
88
  end
96
89
 
97
90
  def self.query
@@ -107,11 +100,5 @@ module Elastic
107
100
  connector.refresh
108
101
  self
109
102
  end
110
-
111
- def save
112
- self.class.tap do |klass|
113
- klass.connector.index as_elastic_document
114
- end
115
- end
116
103
  end
117
104
  end
@@ -1,3 +1,3 @@
1
1
  module Elastic
2
- VERSION = "0.7.0"
2
+ VERSION = "0.8.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elastic-rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-08-29 00:00:00.000000000 Z
11
+ date: 2016-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch
@@ -285,7 +285,8 @@ files:
285
285
  - lib/elastic/railties/ar_middleware.rb
286
286
  - lib/elastic/railties/configuration_extensions.rb
287
287
  - lib/elastic/railties/indexable_record.rb
288
- - lib/elastic/railties/indexing_job.rb
288
+ - lib/elastic/railties/jobs/deleting_job.rb
289
+ - lib/elastic/railties/jobs/indexing_job.rb
289
290
  - lib/elastic/railties/query_extensions.rb
290
291
  - lib/elastic/railties/rspec.rb
291
292
  - lib/elastic/railties/tasks/es.rake
@@ -1,8 +0,0 @@
1
- module Elastic::Railties
2
- class IndexingJob < ActiveJob::Base
3
- def perform(*_indexables)
4
- # TODO: use import for many indexables
5
- _indexables.each &:index_now
6
- end
7
- end
8
- end