elastic-rails 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f9ab5fe73898a276646cbf7c3f9a564d3f7419e4
4
- data.tar.gz: a90c322ea5f8226affff52f5646974ac42aebe5a
3
+ metadata.gz: 7d00b5b5c6f5a1bc7701c5d635198e88094e65ea
4
+ data.tar.gz: 09550318bf8064a884b097576ff2aa15d6702daa
5
5
  SHA512:
6
- metadata.gz: 54b6f86610924e06e58690fa99e45d74dd2c18f5fd902985efe5ff6ab827117a44ceb6be50a8637f3dee085591f677514d706556907af15e2c2b2925ebc45980
7
- data.tar.gz: 346536b41ffcaef70416b0afd92f2c54ee4b4658077a500cb71cda8c24fd572636d619eae0f4456873b8115c84dbb3db6a1ad12110755780c3663ba5709db417
6
+ metadata.gz: 5be452213033d0d2fb6a868d65709ed68260cfffb847566dd3ac2094a571c69aba9b6b75a06455f85091d5a35a12269cab78b68435efd355af40199cb1e8f4c1
7
+ data.tar.gz: 03b952d5219a6c47606db579f1b06c73c8a9c326949ce12da855f995c9053adfdd458b5e1570ae71296446a8412b4a4485c9615d3dc4d8800ca0aedc9a95ced8
data/README.md CHANGED
@@ -60,7 +60,7 @@ BikeIndex.must(brand: 'Trek', size: 'M').should(year: { gte: 2015 }).avg(:price)
60
60
  BikeIndex.must(origin: 'China').segment(:brand).each { |brand, bikes| }
61
61
  ```
62
62
 
63
- migrate: remaps if necessary
63
+ migrate: remaps if necessary
64
64
 
65
65
  reindex: attempts to rotate if index already exist
66
66
 
@@ -70,8 +70,6 @@ TODO: Write usage instructions here
70
70
 
71
71
  These are some features that will be added in the future:
72
72
 
73
- * Support for record deletion
74
-
75
73
 
76
74
  ## Development
77
75
 
@@ -1,23 +1,16 @@
1
1
  module Elastic::Core
2
2
  class Connector
3
- def initialize(_name, _types, _mapping)
3
+ def initialize(_name, _types, _mapping, settling_time: 10.seconds)
4
4
  @name = _name
5
5
  @types = _types
6
6
  @mapping = _mapping
7
+ @settling_time = settling_time
7
8
  end
8
9
 
9
10
  def index_name
10
11
  @index_name ||= "#{Elastic.config.index}_#{@name}"
11
12
  end
12
13
 
13
- def read_index_name
14
- index_name
15
- end
16
-
17
- def write_index_name
18
- Thread.current[write_index_thread_override] || write_index_alias
19
- end
20
-
21
14
  def status
22
15
  actual_name = resolve_actual_index_name
23
16
  return :not_available if actual_name.nil?
@@ -47,8 +40,8 @@ module Elastic::Core
47
40
 
48
41
  def migrate(batch_size: nil)
49
42
  unless remap
50
- rollover do
51
- copy_documents(read_index_name, write_index_name, batch_size || default_batch_size)
43
+ rollover do |new_index|
44
+ copy_to new_index, batch_size: batch_size
52
45
  end
53
46
  end
54
47
 
@@ -57,84 +50,130 @@ module Elastic::Core
57
50
 
58
51
  def index(_document)
59
52
  # TODO: validate document type
53
+ operations = write_indices.map do |write_index|
54
+ { 'index' => _document.merge('_index' => write_index) }
55
+ end
60
56
 
61
- api.index(
62
- index: write_index_name,
63
- id: _document['_id'],
64
- type: _document['_type'],
65
- body: _document['data']
66
- )
57
+ api.bulk(body: operations)
67
58
  end
68
59
 
69
60
  def bulk_index(_documents)
70
61
  # TODO: validate documents type
71
-
72
62
  body = _documents.map { |doc| { 'index' => doc } }
73
63
 
74
- retry_on_temporary_error('bulk indexing') do
75
- api.bulk(index: write_index_name, body: body)
64
+ write_indices.each do |write_index|
65
+ retry_on_temporary_error('bulk indexing') do
66
+ api.bulk(index: write_index, body: body)
67
+ end
76
68
  end
77
69
  end
78
70
 
79
- def refresh
80
- api.indices.refresh index: read_index_name
71
+ def delete(_document)
72
+ raise ArgumentError, 'document must provide an id' unless _document['_id']
73
+ raise ArgumentError, 'document must provide a type' unless _document['_type']
74
+
75
+ write_index, rolling_index = write_indices
76
+
77
+ operations = [{ 'delete' => _document.merge('_index' => write_index) }]
78
+
79
+ if rolling_index
80
+ operations << {
81
+ 'index' => _document.merge(
82
+ '_index' => rolling_index,
83
+ 'data' => { '_mark_for_deletion' => true }
84
+ )
85
+ }
86
+ end
87
+
88
+ api.bulk(body: operations)
81
89
  end
82
90
 
83
- def find(_type, _id)
84
- api.get(index: write_index_name, type: _type, id: _id)
91
+ def refresh
92
+ api.indices.refresh index: index_name
85
93
  end
86
94
 
87
- def delete(_type, _id)
88
- api.delete(index: write_index_name, type: _type, id: _id)
95
+ def find(_type, _id)
96
+ api.get(index: index_name, type: _type, id: _id)
89
97
  end
90
98
 
91
99
  def count(query: nil, type: nil)
92
- api.count(index: read_index_name, type: type, body: query)['count']
100
+ api.count(index: index_name, type: type, body: query)['count']
93
101
  end
94
102
 
95
103
  def query(query: nil, type: nil)
96
- api.search(index: read_index_name, type: type, body: query)
104
+ api.search(index: index_name, type: type, body: query)
97
105
  end
98
106
 
99
- def rollover(&_block) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
107
+ def rollover(&_block) # rubocop:disable Metrics/MethodLength
108
+ actual_index, rolling_index = resolve_write_indices
109
+
110
+ unless rolling_index.nil?
111
+ raise Elastic::RolloverError, 'rollover process already in progress'
112
+ end
113
+
100
114
  new_index = create_index_w_mapping
101
- tmp_index = create_index_w_mapping('tmp')
102
- actual_index = resolve_actual_index_name
103
115
 
104
116
  begin
105
- transfer_alias(write_index_alias, from: actual_index, to: tmp_index)
106
-
117
+ transfer_alias(write_index_alias, to: new_index)
118
+ wait_for_index_to_stabilize
107
119
  perform_optimized_write_on(new_index, &_block)
108
-
120
+ delete_marked_for_deletion new_index
109
121
  transfer_alias(index_name, from: actual_index, to: new_index)
110
- transfer_alias(write_index_alias, from: tmp_index, to: new_index)
111
- api.indices.delete index: actual_index if actual_index
122
+ transfer_alias(write_index_alias, from: actual_index)
123
+ wait_for_index_to_stabilize
124
+ api.indices.delete index: actual_index
112
125
  rescue
113
- transfer_alias(write_index_alias, from: tmp_index, to: actual_index)
114
126
  api.indices.delete index: new_index
115
- ensure
116
- # rollback
117
- # TODO: what would happen if the following fails? O.O
118
- copy_documents(tmp_index, write_index_name, small_batch_size)
119
- api.indices.delete index: tmp_index
120
- api.indices.refresh index: index_name
127
+ raise
128
+ end
129
+ end
130
+
131
+ def copy_to(_to, batch_size: nil) # rubocop:disable Metrics/AbcSize
132
+ api.indices.refresh index: index_name
133
+
134
+ r = api.search(
135
+ index: index_name,
136
+ body: { sort: ['_doc'] },
137
+ scroll: '5m',
138
+ size: batch_size || default_batch_size
139
+ )
140
+
141
+ count = 0
142
+ while !r['hits']['hits'].empty?
143
+ count += r['hits']['hits'].count
144
+ Elastic.logger.info "Copied #{count} docs"
145
+
146
+ body = r['hits']['hits'].map { |h| transform_hit_to_create(h) }
147
+ api.bulk(index: _to, body: body)
148
+
149
+ r = api.scroll scroll: '5m', scroll_id: r['_scroll_id']
121
150
  end
122
151
  end
123
152
 
124
153
  private
125
154
 
155
+ def wait_for_index_to_stabilize
156
+ return if @settling_time == 0
157
+ Elastic.logger.info "Waiting #{@settling_time * 1.2}s for write indices to stabilize ..."
158
+ sleep(@settling_time * 1.2)
159
+ end
160
+
126
161
  def api
127
162
  Elastic.config.api_client
128
163
  end
129
164
 
165
+ def write_indices
166
+ Thread.current[write_index_thread_override] || resolve_write_indices
167
+ end
168
+
130
169
  def perform_optimized_write_on(_index)
131
- old_index = Thread.current[write_index_thread_override]
132
- Thread.current[write_index_thread_override] = _index
170
+ old_indices = Thread.current[write_index_thread_override]
171
+ Thread.current[write_index_thread_override] = [_index]
133
172
  configure_index(_index, refresh_interval: -1)
134
- yield
173
+ yield _index
135
174
  ensure
136
175
  configure_index(_index, refresh_interval: '1s')
137
- Thread.current[write_index_thread_override] = old_index
176
+ Thread.current[write_index_thread_override] = old_indices
138
177
  end
139
178
 
140
179
  def write_index_thread_override
@@ -142,7 +181,26 @@ module Elastic::Core
142
181
  end
143
182
 
144
183
  def write_index_alias
145
- @write_index_alias = "#{index_name}.w"
184
+ @write_index_alias ||= "#{index_name}.w"
185
+ end
186
+
187
+ def resolve_write_indices
188
+ @write_indices = nil if write_indices_expired?
189
+ @write_indices ||= begin
190
+ result = api.indices.get_alias(name: write_index_alias)
191
+ @write_indices_expiration = @settling_time.from_now
192
+ result.keys.sort # lower timestamp first (actual)
193
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
194
+ raise Elastic::MissingIndexError, 'index does not exist, call migrate first'
195
+ end
196
+ end
197
+
198
+ def delete_marked_for_deletion(_index)
199
+ api.delete_by_query(index: _index, body: { query: { term: { _mark_for_deletion: true } } })
200
+ end
201
+
202
+ def write_indices_expired?
203
+ @write_indices_expiration && @write_indices_expiration < Time.current
146
204
  end
147
205
 
148
206
  def resolve_actual_index_name
@@ -152,8 +210,8 @@ module Elastic::Core
152
210
  nil
153
211
  end
154
212
 
155
- def create_index_w_mapping(_role = 'main')
156
- new_name = "#{index_name}:#{_role}:#{Time.now.to_i}"
213
+ def create_index_w_mapping
214
+ new_name = "#{index_name}:#{Time.now.to_i}"
157
215
  api.indices.create index: new_name
158
216
  api.cluster.health wait_for_status: 'yellow'
159
217
  setup_index_types new_name
@@ -201,44 +259,24 @@ module Elastic::Core
201
259
  api.indices.update_aliases body: { actions: actions }
202
260
  end
203
261
 
204
- def copy_documents(_from, _to, _batch_size)
205
- api.indices.refresh index: _from
206
-
207
- r = api.search(
208
- index: _from,
209
- body: { sort: ['_doc'] },
210
- scroll: '5m',
211
- size: _batch_size
212
- )
213
-
214
- count = 0
215
- while !r['hits']['hits'].empty?
216
- count += r['hits']['hits'].count
217
- Elastic.logger.info "Copied #{count} docs"
218
-
219
- body = r['hits']['hits'].map { |h| { 'index' => transform_hit_to_doc(h) } }
220
- api.bulk(index: _to, body: body)
221
-
222
- r = api.scroll scroll: '5m', scroll_id: r['_scroll_id']
223
- end
224
- end
225
-
226
262
  def configure_index(_index, _settings)
227
263
  api.indices.put_settings index: _index, body: { index: _settings }
228
264
  end
229
265
 
230
- def transform_hit_to_doc(_hit)
231
- { '_id' => _hit['_id'], '_type' => _hit['_type'], 'data' => _hit['_source'] }
266
+ def transform_hit_to_create(_hit)
267
+ {
268
+ 'create' => {
269
+ '_id' => _hit['_id'],
270
+ '_type' => _hit['_type'],
271
+ 'data' => _hit['_source']
272
+ }
273
+ }
232
274
  end
233
275
 
234
276
  def default_batch_size
235
277
  1_000
236
278
  end
237
279
 
238
- def small_batch_size
239
- 500
240
- end
241
-
242
280
  def retry_on_temporary_error(_action, retries: 3)
243
281
  return yield
244
282
  rescue Elasticsearch::Transport::Transport::Errors::ServiceUnavailable,
@@ -16,34 +16,21 @@ module Elastic::Core
16
16
  @definition.fields
17
17
  end
18
18
 
19
- def read_elastic_type
20
- object.class.to_s
21
- end
22
-
23
- def read_elastic_id
24
- if has_attribute_for_indexing?(:id)
25
- read_attribute_for_indexing(:id)
26
- end
19
+ def as_elastic_document(only_meta: false)
20
+ result = { '_type' => object.class.to_s }
21
+ result['_id'] = read_attribute_for_indexing(:id) if has_attribute_for_indexing?(:id)
22
+ result['data'] = as_elastic_source unless only_meta
23
+ result
27
24
  end
28
25
 
29
- def as_elastic_document(only_data: false)
30
- data = {}.tap do |hash|
26
+ def as_elastic_source
27
+ {}.tap do |hash|
31
28
  fields.each do |field|
32
29
  value = read_attribute_for_indexing(field.name)
33
30
  value = field.prepare_value_for_index(value)
34
31
  hash[field.name] = value
35
32
  end
36
33
  end
37
-
38
- return data if only_data
39
-
40
- result = {
41
- '_type' => read_elastic_type,
42
- 'data' => data
43
- }
44
-
45
- read_elastic_id.tap { |id| result['_id'] = id unless id.nil? }
46
- result
47
34
  end
48
35
 
49
36
  private
@@ -1,4 +1,10 @@
1
1
  module Elastic
2
2
  class Error < StandardError
3
3
  end
4
+
5
+ class MissingIndexError < Error
6
+ end
7
+
8
+ class RolloverError < Error
9
+ end
4
10
  end
@@ -41,7 +41,7 @@ module Elastic::Fields
41
41
  end
42
42
 
43
43
  def prepare_value_for_index(_values)
44
- _values.map { |v| @index.new(v).as_elastic_document(only_data: true) }
44
+ _values.map { |v| @index.new(v).as_elastic_source }
45
45
  end
46
46
 
47
47
  def prepare_value_for_result(_values)
@@ -4,7 +4,8 @@ require "elastic/railties/ar_middleware"
4
4
  require "elastic/railties/configuration_extensions"
5
5
  require "elastic/railties/type_extensions"
6
6
  require "elastic/railties/query_extensions"
7
- # disabled for now: require "elastic/railties/indexing_job"
7
+ require "elastic/railties/jobs/indexing_job"
8
+ require "elastic/railties/jobs/deleting_job"
8
9
  require "elastic/railties/indexable_record"
9
10
 
10
11
  module Elastic
@@ -8,7 +8,7 @@ module Elastic::Railties
8
8
  _collection = _collection.send(scope) if scope
9
9
  _collection.find_each(&_block)
10
10
  elsif _collection.respond_to? :each
11
- ActiveRecord::Associations::Preloader.new.preload(_collection, *includes) if includes
11
+ ActiveRecord::Associations::Preloader.new.preload(_collection, includes) if includes
12
12
  _collection.each(&_block)
13
13
  else
14
14
  raise 'Elastic ActiveRecord importing is only supported for collection types'
@@ -18,32 +18,28 @@ module Elastic::Railties
18
18
  @constantized_index_class ||= index_class.constantize
19
19
  end
20
20
 
21
- def index(on: nil, unindex: true, delayed: false)
22
- raise NotImplementedError, 'delayed indexing not implemented' if delayed
21
+ def index(on: nil, unindex: true, delayed: true)
22
+ index_m, unindex_m = delayed ? [:index_later, :unindex_later] : [:index_now, :unindex_now]
23
23
 
24
24
  if on == :create
25
- index_on_create
25
+ after_create { public_send(index_m) }
26
26
  elsif on == :save
27
- index_on_save
27
+ after_save { public_send(index_m) }
28
28
  else
29
29
  raise ArgumentError, 'must provide an indexing target when calling index \
30
30
  (ie: `index on: :save`)'
31
31
  end
32
32
 
33
- unindex_on_destroy if unindex
34
- end
35
-
36
- def index_on_create(_options = {})
37
- after_create(_options) { index_now }
33
+ before_destroy { public_send(unindex_m) } if unindex
38
34
  end
35
+ end
39
36
 
40
- def index_on_save(_options = {})
41
- after_save(_options) { index_now }
42
- end
37
+ def index_later
38
+ self.class.constantized_index_class.index_later self
39
+ end
43
40
 
44
- def unindex_on_destroy(_options = {})
45
- before_destroy(_options) { unindex_now }
46
- end
41
+ def unindex_later
42
+ self.class.constantized_index_class.delete_later self
47
43
  end
48
44
 
49
45
  def index_now
@@ -0,0 +1,7 @@
1
+ module Elastic::Railties::Jobs
2
+ class DeletingJob < ActiveJob::Base
3
+ def perform(_type, _document)
4
+ _type.constantize.connector.delete _document
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,7 @@
1
+ module Elastic::Railties::Jobs
2
+ class IndexingJob < ActiveJob::Base
3
+ def perform(_type, _document)
4
+ _type.constantize.connector.index _document
5
+ end
6
+ end
7
+ end
@@ -9,6 +9,22 @@ module Elastic::Railties
9
9
  # TODO: check target allows options
10
10
  pre_definition.middleware_options[:ar_collect_includes] = _includes
11
11
  end
12
+
13
+ def index_later(_object)
14
+ wrapped = new(_object)
15
+
16
+ Jobs::IndexingJob
17
+ .set(queue: Elastic.config.active_job_queue)
18
+ .perform_later(to_s, wrapped.as_elastic_document.as_json)
19
+ end
20
+
21
+ def delete_later(_object)
22
+ wrapped = new(_object)
23
+
24
+ Jobs::DeletingJob
25
+ .set(queue: Elastic.config.active_job_queue)
26
+ .perform_later(to_s, wrapped.as_elastic_document(only_meta: true).as_json)
27
+ end
12
28
  end
13
29
  end
14
30
  end
data/lib/elastic/type.rb CHANGED
@@ -78,20 +78,13 @@ module Elastic
78
78
  end
79
79
 
80
80
  def self.index(_object)
81
- new(_object).save
81
+ connector.index new(_object).as_elastic_document
82
+ self
82
83
  end
83
84
 
84
85
  def self.delete(_object)
85
- wrapper = new(_object)
86
- id = wrapper.read_elastic_id
87
- raise ArgumentError, 'index does not provide an id' if id.nil?
88
-
89
- connector.delete(
90
- wrapper.read_elastic_type,
91
- wrapper.read_elastic_id
92
- )
93
-
94
- nil
86
+ connector.delete new(_object).as_elastic_document(only_meta: true)
87
+ self
95
88
  end
96
89
 
97
90
  def self.query
@@ -107,11 +100,5 @@ module Elastic
107
100
  connector.refresh
108
101
  self
109
102
  end
110
-
111
- def save
112
- self.class.tap do |klass|
113
- klass.connector.index as_elastic_document
114
- end
115
- end
116
103
  end
117
104
  end
@@ -1,3 +1,3 @@
1
1
  module Elastic
2
- VERSION = "0.7.0"
2
+ VERSION = "0.8.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: elastic-rails
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.7.0
4
+ version: 0.8.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ignacio Baixas
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2016-08-29 00:00:00.000000000 Z
11
+ date: 2016-08-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: elasticsearch
@@ -285,7 +285,8 @@ files:
285
285
  - lib/elastic/railties/ar_middleware.rb
286
286
  - lib/elastic/railties/configuration_extensions.rb
287
287
  - lib/elastic/railties/indexable_record.rb
288
- - lib/elastic/railties/indexing_job.rb
288
+ - lib/elastic/railties/jobs/deleting_job.rb
289
+ - lib/elastic/railties/jobs/indexing_job.rb
289
290
  - lib/elastic/railties/query_extensions.rb
290
291
  - lib/elastic/railties/rspec.rb
291
292
  - lib/elastic/railties/tasks/es.rake
@@ -1,8 +0,0 @@
1
- module Elastic::Railties
2
- class IndexingJob < ActiveJob::Base
3
- def perform(*_indexables)
4
- # TODO: use import for many indexables
5
- _indexables.each &:index_now
6
- end
7
- end
8
- end