elastic-rails 0.6.4 → 0.7.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 66b9bacfe2ed32b87930002d60d7f84d8c996c05
4
- data.tar.gz: e2d4f108fb27db7ec85afdb861f35a009db2db0f
3
+ metadata.gz: f9ab5fe73898a276646cbf7c3f9a564d3f7419e4
4
+ data.tar.gz: a90c322ea5f8226affff52f5646974ac42aebe5a
5
5
  SHA512:
6
- metadata.gz: 425e22896f8f062c17dd12ead7ec8f052cf3e6a0151c71c788cc55636f3a3db2351e1a4af8bf6af87b3a7441f13c65ebecef0da7925baac0ddc709aeceffa632
7
- data.tar.gz: 9e95d2d6e6ba9d85ae68ad88e241d61ba2558d45e20e1c939006eae783c8c7041f095eb18e2f9dc561f29a6271e7a5430188c252eaa9d81166f770338799212e
6
+ metadata.gz: 54b6f86610924e06e58690fa99e45d74dd2c18f5fd902985efe5ff6ab827117a44ceb6be50a8637f3dee085591f677514d706556907af15e2c2b2925ebc45980
7
+ data.tar.gz: 346536b41ffcaef70416b0afd92f2c54ee4b4658077a500cb71cda8c24fd572636d619eae0f4456873b8115c84dbb3db6a1ad12110755780c3663ba5709db417
data/README.md CHANGED
@@ -60,9 +60,19 @@ BikeIndex.must(brand: 'Trek', size: 'M').should(year: { gte: 2015 }).avg(:price)
60
60
  BikeIndex.must(origin: 'China').segment(:brand).each { |brand, bikes| }
61
61
  ```
62
62
 
63
+ migrate: remaps if necessary
64
+
65
+ reindex: attempts to rotate if index already exist
63
66
 
64
67
  TODO: Write usage instructions here
65
68
 
69
+ ## Missing Features
70
+
71
+ These are some features that will be added in the future:
72
+
73
+ * Support for record deletion
74
+
75
+
66
76
  ## Development
67
77
 
68
78
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -66,11 +66,20 @@ module Elastic::Commands
66
66
  end
67
67
 
68
68
  def build_date_histogram(_options)
69
- Elastic::Nodes::Agg::DateHistogram.build(agg_name, @field, interval: _options[:interval])
69
+ Elastic::Nodes::Agg::DateHistogram.build(
70
+ agg_name,
71
+ @field,
72
+ interval: _options[:interval],
73
+ time_zone: _options[:time_zone]
74
+ )
70
75
  end
71
76
 
72
77
  def build_terms(_options)
73
- Elastic::Nodes::Agg::Terms.build(agg_name, @field, size: _options[:size])
78
+ Elastic::Nodes::Agg::Terms.build(
79
+ agg_name,
80
+ @field,
81
+ size: _options[:size]
82
+ )
74
83
  end
75
84
 
76
85
  def agg_name
@@ -0,0 +1,44 @@
1
+ module Elastic::Commands
2
+ class CompareMappings < Elastic::Support::Command.new(:current, :user)
3
+ def perform
4
+ user_properties.select do |field, property|
5
+ !compare_field_properties(current_properties[field], property)
6
+ end.map { |f| f[0] }
7
+ end
8
+
9
+ private
10
+
11
+ def current_properties
12
+ @current_properties ||= Hash[flatten(current)]
13
+ end
14
+
15
+ def user_properties
16
+ @user_properties ||= Hash[flatten(user)]
17
+ end
18
+
19
+ def flatten(_raw, _prefix = '')
20
+ _raw['properties'].flat_map do |name, raw_field|
21
+ if raw_field['type'] == 'nested'
22
+ childs = flatten(raw_field, name + '.')
23
+ childs << [
24
+ _prefix + name,
25
+ raw_field.slice(*(raw_field.keys - ['properties']))
26
+ ]
27
+ else
28
+ [[_prefix + name, raw_field.dup]]
29
+ end
30
+ end
31
+ end
32
+
33
+ def compare_field_properties(_current, _user)
34
+ return false if _current.nil?
35
+
36
+ case _current['type']
37
+ when 'date'
38
+ return _current == { 'format' => 'dateOptionalTime' }.merge(_user)
39
+ else
40
+ return _current == _user
41
+ end
42
+ end
43
+ end
44
+ end
@@ -3,14 +3,12 @@ module Elastic::Commands
3
3
  :index, collection: nil, batch_size: 10000, verbose: false
4
4
  )
5
5
  def perform
6
- index.adaptor.with_settings(refresh_interval: -1) do
7
- if collection.present?
8
- import_collection
9
- else
10
- targets.each { |target| import_target(target) }
11
- end
12
- flush
6
+ if collection.present?
7
+ import_collection
8
+ else
9
+ targets.each { |target| import_target(target) }
13
10
  end
11
+ flush
14
12
  end
15
13
 
16
14
  private
@@ -34,7 +32,7 @@ module Elastic::Commands
34
32
 
35
33
  def flush
36
34
  unless cache.empty?
37
- index.adaptor.bulk_index(cache)
35
+ index.connector.bulk_index(cache)
38
36
  log_flush(cache.size) if verbose
39
37
  cache.clear
40
38
  end
@@ -43,11 +41,11 @@ module Elastic::Commands
43
41
  def log_flush(_size)
44
42
  @total ||= 0
45
43
  @total += _size
46
- Elastic::Configuration.logger.info "Imported #{@total} documents"
44
+ Elastic.logger.info "Imported #{@total} documents"
47
45
  end
48
46
 
49
47
  def render_for_es(_object)
50
- index.new(_object).as_es_document
48
+ index.new(_object).as_elastic_document
51
49
  end
52
50
 
53
51
  def main_target
@@ -1,69 +1,62 @@
1
1
  module Elastic
2
- module Configuration
3
- DEFAULT = {
2
+ class Configuration
3
+ DEFAULTS = {
4
4
  host: '127.0.0.1',
5
5
  port: 9200,
6
6
  page_size: 20,
7
7
  coord_similarity: true,
8
- import_batch_size: 10_000
8
+ import_batch_size: 10_000,
9
+ whiny_indices: false,
10
+ api_client: nil, # set by method
11
+ logger: nil, # set by method
12
+ time_zone: nil # set by method
9
13
  }
10
14
 
11
- extend self
15
+ attr_accessor :host, :port, :api_client, :index, :page_size, :coord_similarity, :logger,
16
+ :import_batch_size, :whiny_indices, :time_zone
17
+
18
+ def initialize
19
+ assign_attributes DEFAULTS
20
+ end
12
21
 
13
22
  def reset
14
- @config = nil
15
- self
23
+ assign_attributes DEFAULTS
16
24
  end
17
25
 
18
- def configure(_options = nil, &_block)
19
- if _options.nil?
20
- _block.call self
21
- else
22
- @config = config.merge _options.symbolize_keys
23
- end
26
+ def assign_attributes(_options)
27
+ _options.each { |k, v| public_send("#{k}=", v) }
24
28
  self
25
29
  end
26
30
 
27
31
  def api_client
28
- config[:client] ||= load_api_client
32
+ @api_client || default_api_client
29
33
  end
30
34
 
31
- def index_name
32
- config[:index]
33
- end
34
-
35
- def indices_path
36
- 'app/indices'
37
- end
38
-
39
- def page_size
40
- @config[:page_size]
41
- end
42
-
43
- def coord_similarity
44
- @config[:coord_similarity]
35
+ def logger
36
+ @logger || default_logger
45
37
  end
46
38
 
47
- def logger
48
- @config[:logger] || default_logger
39
+ def time_zone
40
+ @time_zone || default_time_zone
49
41
  end
50
42
 
51
- def import_batch_size
52
- @config[:import_batch_size]
43
+ def time_zone=(_value)
44
+ _value = ActiveSupport::TimeZone.new(_value) if _value.is_a? String
45
+ @time_zone = _value
53
46
  end
54
47
 
55
48
  private
56
49
 
57
- def config
58
- @config ||= DEFAULT
50
+ def default_api_client
51
+ @default_api_client ||= Elasticsearch::Client.new host: @host, port: @port
59
52
  end
60
53
 
61
54
  def default_logger
62
55
  @default_logger ||= Logger.new(STDOUT)
63
56
  end
64
57
 
65
- def load_api_client
66
- Elasticsearch::Client.new host: config[:host], port: config[:port]
58
+ def default_time_zone
59
+ @default_time_zone ||= ActiveSupport::TimeZone.new('UTC')
67
60
  end
68
61
  end
69
62
  end
@@ -0,0 +1,253 @@
1
+ module Elastic::Core
2
+ class Connector
3
+ def initialize(_name, _types, _mapping)
4
+ @name = _name
5
+ @types = _types
6
+ @mapping = _mapping
7
+ end
8
+
9
+ def index_name
10
+ @index_name ||= "#{Elastic.config.index}_#{@name}"
11
+ end
12
+
13
+ def read_index_name
14
+ index_name
15
+ end
16
+
17
+ def write_index_name
18
+ Thread.current[write_index_thread_override] || write_index_alias
19
+ end
20
+
21
+ def status
22
+ actual_name = resolve_actual_index_name
23
+ return :not_available if actual_name.nil?
24
+ return :not_synchronized unless mapping_synchronized? actual_name
25
+ :ready
26
+ end
27
+
28
+ def drop
29
+ api.indices.delete index: "#{index_name}:*"
30
+ nil
31
+ end
32
+
33
+ def remap
34
+ case status
35
+ when :not_available
36
+ create_from_scratch
37
+ when :not_synchronized
38
+ begin
39
+ setup_index_types resolve_actual_index_name
40
+ rescue Elasticsearch::Transport::Transport::Errors::BadRequest
41
+ return false
42
+ end
43
+ end
44
+
45
+ true
46
+ end
47
+
48
+ def migrate(batch_size: nil)
49
+ unless remap
50
+ rollover do
51
+ copy_documents(read_index_name, write_index_name, batch_size || default_batch_size)
52
+ end
53
+ end
54
+
55
+ nil
56
+ end
57
+
58
+ def index(_document)
59
+ # TODO: validate document type
60
+
61
+ api.index(
62
+ index: write_index_name,
63
+ id: _document['_id'],
64
+ type: _document['_type'],
65
+ body: _document['data']
66
+ )
67
+ end
68
+
69
+ def bulk_index(_documents)
70
+ # TODO: validate documents type
71
+
72
+ body = _documents.map { |doc| { 'index' => doc } }
73
+
74
+ retry_on_temporary_error('bulk indexing') do
75
+ api.bulk(index: write_index_name, body: body)
76
+ end
77
+ end
78
+
79
+ def refresh
80
+ api.indices.refresh index: read_index_name
81
+ end
82
+
83
+ def find(_type, _id)
84
+ api.get(index: write_index_name, type: _type, id: _id)
85
+ end
86
+
87
+ def delete(_type, _id)
88
+ api.delete(index: write_index_name, type: _type, id: _id)
89
+ end
90
+
91
+ def count(query: nil, type: nil)
92
+ api.count(index: read_index_name, type: type, body: query)['count']
93
+ end
94
+
95
+ def query(query: nil, type: nil)
96
+ api.search(index: read_index_name, type: type, body: query)
97
+ end
98
+
99
+ def rollover(&_block) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
100
+ new_index = create_index_w_mapping
101
+ tmp_index = create_index_w_mapping('tmp')
102
+ actual_index = resolve_actual_index_name
103
+
104
+ begin
105
+ transfer_alias(write_index_alias, from: actual_index, to: tmp_index)
106
+
107
+ perform_optimized_write_on(new_index, &_block)
108
+
109
+ transfer_alias(index_name, from: actual_index, to: new_index)
110
+ transfer_alias(write_index_alias, from: tmp_index, to: new_index)
111
+ api.indices.delete index: actual_index if actual_index
112
+ rescue
113
+ transfer_alias(write_index_alias, from: tmp_index, to: actual_index)
114
+ api.indices.delete index: new_index
115
+ ensure
116
+ # rollback
117
+ # TODO: what would happen if the following fails? O.O
118
+ copy_documents(tmp_index, write_index_name, small_batch_size)
119
+ api.indices.delete index: tmp_index
120
+ api.indices.refresh index: index_name
121
+ end
122
+ end
123
+
124
+ private
125
+
126
+ def api
127
+ Elastic.config.api_client
128
+ end
129
+
130
+ def perform_optimized_write_on(_index)
131
+ old_index = Thread.current[write_index_thread_override]
132
+ Thread.current[write_index_thread_override] = _index
133
+ configure_index(_index, refresh_interval: -1)
134
+ yield
135
+ ensure
136
+ configure_index(_index, refresh_interval: '1s')
137
+ Thread.current[write_index_thread_override] = old_index
138
+ end
139
+
140
+ def write_index_thread_override
141
+ "_elastic_#{index_name}_write_index"
142
+ end
143
+
144
+ def write_index_alias
145
+ @write_index_alias = "#{index_name}.w"
146
+ end
147
+
148
+ def resolve_actual_index_name
149
+ result = api.indices.get_alias(name: index_name)
150
+ result.keys.first
151
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
152
+ nil
153
+ end
154
+
155
+ def create_index_w_mapping(_role = 'main')
156
+ new_name = "#{index_name}:#{_role}:#{Time.now.to_i}"
157
+ api.indices.create index: new_name
158
+ api.cluster.health wait_for_status: 'yellow'
159
+ setup_index_types new_name
160
+ new_name
161
+ end
162
+
163
+ def create_from_scratch
164
+ new_index = create_index_w_mapping
165
+ api.indices.update_aliases(
166
+ body: {
167
+ actions: [
168
+ { add: { index: new_index, alias: index_name } },
169
+ { add: { index: new_index, alias: write_index_alias } }
170
+ ]
171
+ }
172
+ )
173
+ end
174
+
175
+ def mapping_synchronized?(_index)
176
+ type_mappings = api.indices.get_mapping(index: _index)
177
+ return false if type_mappings[_index].nil?
178
+ type_mappings = type_mappings[_index]['mappings']
179
+
180
+ @types.all? do |type|
181
+ next false if type_mappings[type].nil?
182
+
183
+ diff = Elastic::Commands::CompareMappings.for(
184
+ current: type_mappings[type],
185
+ user: @mapping
186
+ )
187
+ diff.empty?
188
+ end
189
+ end
190
+
191
+ def setup_index_types(_index)
192
+ @types.each do |type|
193
+ api.indices.put_mapping(index: _index, type: type, body: @mapping)
194
+ end
195
+ end
196
+
197
+ def transfer_alias(_alias, from: nil, to: nil)
198
+ actions = []
199
+ actions << { remove: { index: from, alias: _alias } } if from
200
+ actions << { add: { index: to, alias: _alias } } if to
201
+ api.indices.update_aliases body: { actions: actions }
202
+ end
203
+
204
+ def copy_documents(_from, _to, _batch_size)
205
+ api.indices.refresh index: _from
206
+
207
+ r = api.search(
208
+ index: _from,
209
+ body: { sort: ['_doc'] },
210
+ scroll: '5m',
211
+ size: _batch_size
212
+ )
213
+
214
+ count = 0
215
+ while !r['hits']['hits'].empty?
216
+ count += r['hits']['hits'].count
217
+ Elastic.logger.info "Copied #{count} docs"
218
+
219
+ body = r['hits']['hits'].map { |h| { 'index' => transform_hit_to_doc(h) } }
220
+ api.bulk(index: _to, body: body)
221
+
222
+ r = api.scroll scroll: '5m', scroll_id: r['_scroll_id']
223
+ end
224
+ end
225
+
226
+ def configure_index(_index, _settings)
227
+ api.indices.put_settings index: _index, body: { index: _settings }
228
+ end
229
+
230
+ def transform_hit_to_doc(_hit)
231
+ { '_id' => _hit['_id'], '_type' => _hit['_type'], 'data' => _hit['_source'] }
232
+ end
233
+
234
+ def default_batch_size
235
+ 1_000
236
+ end
237
+
238
+ def small_batch_size
239
+ 500
240
+ end
241
+
242
+ def retry_on_temporary_error(_action, retries: 3)
243
+ return yield
244
+ rescue Elasticsearch::Transport::Transport::Errors::ServiceUnavailable,
245
+ Elasticsearch::Transport::Transport::Errors::GatewayTimeout => exc
246
+ raise if retries <= 0
247
+
248
+ Elastic.logger.warn("#{exc.class} error during '#{_action}', retrying!")
249
+ retries -= 1
250
+ retry
251
+ end
252
+ end
253
+ end
@@ -38,10 +38,6 @@ module Elastic::Core
38
38
  @field_map.each_value
39
39
  end
40
40
 
41
- def expanded_field_names
42
- @field_map.map { |_, field| field.expanded_names }.flatten
43
- end
44
-
45
41
  def freeze
46
42
  return if frozen?
47
43
  cache_targets
@@ -55,7 +55,7 @@ module Elastic::Core
55
55
  query = build_base_query
56
56
 
57
57
  if !grouped?
58
- query.size = (@config.limit || Elastic::Configuration.page_size)
58
+ query.size = (@config.limit || Elastic.config.page_size)
59
59
  query.offset = @config.offset
60
60
  query = sort_node(query)
61
61
  else
@@ -16,7 +16,17 @@ module Elastic::Core
16
16
  @definition.fields
17
17
  end
18
18
 
19
- def as_es_document(only_data: false)
19
+ def read_elastic_type
20
+ object.class.to_s
21
+ end
22
+
23
+ def read_elastic_id
24
+ if has_attribute_for_indexing?(:id)
25
+ read_attribute_for_indexing(:id)
26
+ end
27
+ end
28
+
29
+ def as_elastic_document(only_data: false)
20
30
  data = {}.tap do |hash|
21
31
  fields.each do |field|
22
32
  value = read_attribute_for_indexing(field.name)
@@ -27,8 +37,12 @@ module Elastic::Core
27
37
 
28
38
  return data if only_data
29
39
 
30
- result = { '_type' => object.class.to_s, 'data' => data }
31
- result['_id'] = read_attribute_for_indexing(:id) if has_attribute_for_indexing?(:id)
40
+ result = {
41
+ '_type' => read_elastic_type,
42
+ 'data' => data
43
+ }
44
+
45
+ read_elastic_id.tap { |id| result['_id'] = id unless id.nil? }
32
46
  result
33
47
  end
34
48
 
@@ -13,9 +13,9 @@ module Elastic::Datatypes
13
13
  def prepare_value_for_result(_value)
14
14
  case _value
15
15
  when ::String
16
- ::Time.parse(_value).utc.to_date
16
+ time_zone.parse(_value).to_date
17
17
  when ::Integer
18
- ::Time.at(_value / 1000).utc.to_date
18
+ time_zone.at(_value / 1000).to_date
19
19
  else
20
20
  _value
21
21
  end
@@ -26,7 +26,13 @@ module Elastic::Datatypes
26
26
  end
27
27
 
28
28
  def date_histogram_aggregation_defaults
29
- { interval: '1w' }
29
+ { interval: '1w', time_zone: time_zone }
30
+ end
31
+
32
+ private
33
+
34
+ def time_zone
35
+ @time_zone ||= ActiveSupport::TimeZone.new('UTC') # dates are always UTC
30
36
  end
31
37
  end
32
38
  end
@@ -10,9 +10,9 @@ module Elastic::Datatypes
10
10
  # TODO: set timezone
11
11
  case _value
12
12
  when ::String
13
- ::Time.parse(_value)
13
+ time_zone.parse(_value)
14
14
  when ::Integer
15
- ::Time.at(_value / 1000)
15
+ time_zone.at(_value / 1000)
16
16
  else
17
17
  _value
18
18
  end
@@ -23,7 +23,14 @@ module Elastic::Datatypes
23
23
  end
24
24
 
25
25
  def date_histogram_aggregation_defaults
26
- { interval: '1h' }
26
+ { interval: '1h', time_zone: time_zone }
27
+ end
28
+
29
+ private
30
+
31
+ def time_zone
32
+ # TODO: user_options[:timezone]
33
+ Elastic.config.time_zone
27
34
  end
28
35
  end
29
36
  end
@@ -0,0 +1,4 @@
1
+ module Elastic
2
+ class Error < StandardError
3
+ end
4
+ end
@@ -11,10 +11,6 @@ module Elastic::Fields
11
11
  # does nothing
12
12
  end
13
13
 
14
- def expanded_names
15
- [@name] + @index.definition.expanded_field_names.map { |n| @name + '.' + n }
16
- end
17
-
18
14
  def validate
19
15
  nil
20
16
  end
@@ -45,7 +41,7 @@ module Elastic::Fields
45
41
  end
46
42
 
47
43
  def prepare_value_for_index(_values)
48
- _values.map { |v| @index.new(v).as_es_document(only_data: true) }
44
+ _values.map { |v| @index.new(v).as_elastic_document(only_data: true) }
49
45
  end
50
46
 
51
47
  def prepare_value_for_result(_values)
@@ -35,10 +35,6 @@ module Elastic::Fields
35
35
  nil
36
36
  end
37
37
 
38
- def expanded_names
39
- [@name]
40
- end
41
-
42
38
  def needs_inference?
43
39
  mapping_inference_enabled? && !@options.key?(:type)
44
40
  end
@@ -3,21 +3,27 @@ module Elastic::Nodes::Agg
3
3
  include Elastic::Nodes::Concerns::Aggregable
4
4
  include Elastic::Nodes::Concerns::Bucketed
5
5
 
6
- def self.build(_name, _field, interval: nil)
6
+ def self.build(_name, _field, interval: nil, time_zone: nil)
7
7
  super(_name).tap do |node|
8
8
  node.field = _field
9
9
  node.interval = interval
10
+ node.time_zone = time_zone
10
11
  end
11
12
  end
12
13
 
13
14
  attr_accessor :field
14
- attr_reader :interval
15
+ attr_reader :interval, :time_zone
15
16
 
16
17
  def interval=(_value)
17
18
  raise ArgumentError, 'invalid interval' if _value && !valid_interval?(_value)
18
19
  @interval = _value
19
20
  end
20
21
 
22
+ def time_zone=(_value)
23
+ raise ArgumentError, 'invalid time_zone' if _value && !_value.is_a?(ActiveSupport::TimeZone)
24
+ @time_zone = _value
25
+ end
26
+
21
27
  def clone
22
28
  prepare_clone(super)
23
29
  end
@@ -29,6 +35,7 @@ module Elastic::Nodes::Agg
29
35
  def render(_options = {})
30
36
  hash = { 'field' => @field.to_s }
31
37
  hash['interval'] = @interval if @interval
38
+ hash['time_zone'] = @time_zone.formatted_offset if @time_zone
32
39
 
33
40
  render_aggs({ 'date_histogram' => hash }, _options)
34
41
  end
@@ -38,6 +45,7 @@ module Elastic::Nodes::Agg
38
45
  def prepare_clone(_clone)
39
46
  _clone.field = @field
40
47
  _clone.interval = @interval
48
+ _clone.time_zone = @time_zone
41
49
  _clone
42
50
  end
43
51