elastic-rails 0.6.4 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 66b9bacfe2ed32b87930002d60d7f84d8c996c05
4
- data.tar.gz: e2d4f108fb27db7ec85afdb861f35a009db2db0f
3
+ metadata.gz: f9ab5fe73898a276646cbf7c3f9a564d3f7419e4
4
+ data.tar.gz: a90c322ea5f8226affff52f5646974ac42aebe5a
5
5
  SHA512:
6
- metadata.gz: 425e22896f8f062c17dd12ead7ec8f052cf3e6a0151c71c788cc55636f3a3db2351e1a4af8bf6af87b3a7441f13c65ebecef0da7925baac0ddc709aeceffa632
7
- data.tar.gz: 9e95d2d6e6ba9d85ae68ad88e241d61ba2558d45e20e1c939006eae783c8c7041f095eb18e2f9dc561f29a6271e7a5430188c252eaa9d81166f770338799212e
6
+ metadata.gz: 54b6f86610924e06e58690fa99e45d74dd2c18f5fd902985efe5ff6ab827117a44ceb6be50a8637f3dee085591f677514d706556907af15e2c2b2925ebc45980
7
+ data.tar.gz: 346536b41ffcaef70416b0afd92f2c54ee4b4658077a500cb71cda8c24fd572636d619eae0f4456873b8115c84dbb3db6a1ad12110755780c3663ba5709db417
data/README.md CHANGED
@@ -60,9 +60,19 @@ BikeIndex.must(brand: 'Trek', size: 'M').should(year: { gte: 2015 }).avg(:price)
60
60
  BikeIndex.must(origin: 'China').segment(:brand).each { |brand, bikes| }
61
61
  ```
62
62
 
63
+ migrate: remaps if necessary
64
+
65
+ reindex: attempts to rotate if index already exist
63
66
 
64
67
  TODO: Write usage instructions here
65
68
 
69
+ ## Missing Features
70
+
71
+ These are some features that will be added in the future:
72
+
73
+ * Support for record deletion
74
+
75
+
66
76
  ## Development
67
77
 
68
78
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
@@ -66,11 +66,20 @@ module Elastic::Commands
66
66
  end
67
67
 
68
68
  def build_date_histogram(_options)
69
- Elastic::Nodes::Agg::DateHistogram.build(agg_name, @field, interval: _options[:interval])
69
+ Elastic::Nodes::Agg::DateHistogram.build(
70
+ agg_name,
71
+ @field,
72
+ interval: _options[:interval],
73
+ time_zone: _options[:time_zone]
74
+ )
70
75
  end
71
76
 
72
77
  def build_terms(_options)
73
- Elastic::Nodes::Agg::Terms.build(agg_name, @field, size: _options[:size])
78
+ Elastic::Nodes::Agg::Terms.build(
79
+ agg_name,
80
+ @field,
81
+ size: _options[:size]
82
+ )
74
83
  end
75
84
 
76
85
  def agg_name
@@ -0,0 +1,44 @@
1
+ module Elastic::Commands
2
+ class CompareMappings < Elastic::Support::Command.new(:current, :user)
3
+ def perform
4
+ user_properties.select do |field, property|
5
+ !compare_field_properties(current_properties[field], property)
6
+ end.map { |f| f[0] }
7
+ end
8
+
9
+ private
10
+
11
+ def current_properties
12
+ @current_properties ||= Hash[flatten(current)]
13
+ end
14
+
15
+ def user_properties
16
+ @user_properties ||= Hash[flatten(user)]
17
+ end
18
+
19
+ def flatten(_raw, _prefix = '')
20
+ _raw['properties'].flat_map do |name, raw_field|
21
+ if raw_field['type'] == 'nested'
22
+ childs = flatten(raw_field, name + '.')
23
+ childs << [
24
+ _prefix + name,
25
+ raw_field.slice(*(raw_field.keys - ['properties']))
26
+ ]
27
+ else
28
+ [[_prefix + name, raw_field.dup]]
29
+ end
30
+ end
31
+ end
32
+
33
+ def compare_field_properties(_current, _user)
34
+ return false if _current.nil?
35
+
36
+ case _current['type']
37
+ when 'date'
38
+ return _current == { 'format' => 'dateOptionalTime' }.merge(_user)
39
+ else
40
+ return _current == _user
41
+ end
42
+ end
43
+ end
44
+ end
@@ -3,14 +3,12 @@ module Elastic::Commands
3
3
  :index, collection: nil, batch_size: 10000, verbose: false
4
4
  )
5
5
  def perform
6
- index.adaptor.with_settings(refresh_interval: -1) do
7
- if collection.present?
8
- import_collection
9
- else
10
- targets.each { |target| import_target(target) }
11
- end
12
- flush
6
+ if collection.present?
7
+ import_collection
8
+ else
9
+ targets.each { |target| import_target(target) }
13
10
  end
11
+ flush
14
12
  end
15
13
 
16
14
  private
@@ -34,7 +32,7 @@ module Elastic::Commands
34
32
 
35
33
  def flush
36
34
  unless cache.empty?
37
- index.adaptor.bulk_index(cache)
35
+ index.connector.bulk_index(cache)
38
36
  log_flush(cache.size) if verbose
39
37
  cache.clear
40
38
  end
@@ -43,11 +41,11 @@ module Elastic::Commands
43
41
  def log_flush(_size)
44
42
  @total ||= 0
45
43
  @total += _size
46
- Elastic::Configuration.logger.info "Imported #{@total} documents"
44
+ Elastic.logger.info "Imported #{@total} documents"
47
45
  end
48
46
 
49
47
  def render_for_es(_object)
50
- index.new(_object).as_es_document
48
+ index.new(_object).as_elastic_document
51
49
  end
52
50
 
53
51
  def main_target
@@ -1,69 +1,62 @@
1
1
  module Elastic
2
- module Configuration
3
- DEFAULT = {
2
+ class Configuration
3
+ DEFAULTS = {
4
4
  host: '127.0.0.1',
5
5
  port: 9200,
6
6
  page_size: 20,
7
7
  coord_similarity: true,
8
- import_batch_size: 10_000
8
+ import_batch_size: 10_000,
9
+ whiny_indices: false,
10
+ api_client: nil, # set by method
11
+ logger: nil, # set by method
12
+ time_zone: nil # set by method
9
13
  }
10
14
 
11
- extend self
15
+ attr_accessor :host, :port, :api_client, :index, :page_size, :coord_similarity, :logger,
16
+ :import_batch_size, :whiny_indices, :time_zone
17
+
18
+ def initialize
19
+ assign_attributes DEFAULTS
20
+ end
12
21
 
13
22
  def reset
14
- @config = nil
15
- self
23
+ assign_attributes DEFAULTS
16
24
  end
17
25
 
18
- def configure(_options = nil, &_block)
19
- if _options.nil?
20
- _block.call self
21
- else
22
- @config = config.merge _options.symbolize_keys
23
- end
26
+ def assign_attributes(_options)
27
+ _options.each { |k, v| public_send("#{k}=", v) }
24
28
  self
25
29
  end
26
30
 
27
31
  def api_client
28
- config[:client] ||= load_api_client
32
+ @api_client || default_api_client
29
33
  end
30
34
 
31
- def index_name
32
- config[:index]
33
- end
34
-
35
- def indices_path
36
- 'app/indices'
37
- end
38
-
39
- def page_size
40
- @config[:page_size]
41
- end
42
-
43
- def coord_similarity
44
- @config[:coord_similarity]
35
+ def logger
36
+ @logger || default_logger
45
37
  end
46
38
 
47
- def logger
48
- @config[:logger] || default_logger
39
+ def time_zone
40
+ @time_zone || default_time_zone
49
41
  end
50
42
 
51
- def import_batch_size
52
- @config[:import_batch_size]
43
+ def time_zone=(_value)
44
+ _value = ActiveSupport::TimeZone.new(_value) if _value.is_a? String
45
+ @time_zone = _value
53
46
  end
54
47
 
55
48
  private
56
49
 
57
- def config
58
- @config ||= DEFAULT
50
+ def default_api_client
51
+ @default_api_client ||= Elasticsearch::Client.new host: @host, port: @port
59
52
  end
60
53
 
61
54
  def default_logger
62
55
  @default_logger ||= Logger.new(STDOUT)
63
56
  end
64
57
 
65
- def load_api_client
66
- Elasticsearch::Client.new host: config[:host], port: config[:port]
58
+ def default_time_zone
59
+ @default_time_zone ||= ActiveSupport::TimeZone.new('UTC')
67
60
  end
68
61
  end
69
62
  end
@@ -0,0 +1,253 @@
1
+ module Elastic::Core
2
+ class Connector
3
+ def initialize(_name, _types, _mapping)
4
+ @name = _name
5
+ @types = _types
6
+ @mapping = _mapping
7
+ end
8
+
9
+ def index_name
10
+ @index_name ||= "#{Elastic.config.index}_#{@name}"
11
+ end
12
+
13
+ def read_index_name
14
+ index_name
15
+ end
16
+
17
+ def write_index_name
18
+ Thread.current[write_index_thread_override] || write_index_alias
19
+ end
20
+
21
+ def status
22
+ actual_name = resolve_actual_index_name
23
+ return :not_available if actual_name.nil?
24
+ return :not_synchronized unless mapping_synchronized? actual_name
25
+ :ready
26
+ end
27
+
28
+ def drop
29
+ api.indices.delete index: "#{index_name}:*"
30
+ nil
31
+ end
32
+
33
+ def remap
34
+ case status
35
+ when :not_available
36
+ create_from_scratch
37
+ when :not_synchronized
38
+ begin
39
+ setup_index_types resolve_actual_index_name
40
+ rescue Elasticsearch::Transport::Transport::Errors::BadRequest
41
+ return false
42
+ end
43
+ end
44
+
45
+ true
46
+ end
47
+
48
+ def migrate(batch_size: nil)
49
+ unless remap
50
+ rollover do
51
+ copy_documents(read_index_name, write_index_name, batch_size || default_batch_size)
52
+ end
53
+ end
54
+
55
+ nil
56
+ end
57
+
58
+ def index(_document)
59
+ # TODO: validate document type
60
+
61
+ api.index(
62
+ index: write_index_name,
63
+ id: _document['_id'],
64
+ type: _document['_type'],
65
+ body: _document['data']
66
+ )
67
+ end
68
+
69
+ def bulk_index(_documents)
70
+ # TODO: validate documents type
71
+
72
+ body = _documents.map { |doc| { 'index' => doc } }
73
+
74
+ retry_on_temporary_error('bulk indexing') do
75
+ api.bulk(index: write_index_name, body: body)
76
+ end
77
+ end
78
+
79
+ def refresh
80
+ api.indices.refresh index: read_index_name
81
+ end
82
+
83
+ def find(_type, _id)
84
+ api.get(index: write_index_name, type: _type, id: _id)
85
+ end
86
+
87
+ def delete(_type, _id)
88
+ api.delete(index: write_index_name, type: _type, id: _id)
89
+ end
90
+
91
+ def count(query: nil, type: nil)
92
+ api.count(index: read_index_name, type: type, body: query)['count']
93
+ end
94
+
95
+ def query(query: nil, type: nil)
96
+ api.search(index: read_index_name, type: type, body: query)
97
+ end
98
+
99
+ def rollover(&_block) # rubocop:disable Metrics/MethodLength, Metrics/AbcSize
100
+ new_index = create_index_w_mapping
101
+ tmp_index = create_index_w_mapping('tmp')
102
+ actual_index = resolve_actual_index_name
103
+
104
+ begin
105
+ transfer_alias(write_index_alias, from: actual_index, to: tmp_index)
106
+
107
+ perform_optimized_write_on(new_index, &_block)
108
+
109
+ transfer_alias(index_name, from: actual_index, to: new_index)
110
+ transfer_alias(write_index_alias, from: tmp_index, to: new_index)
111
+ api.indices.delete index: actual_index if actual_index
112
+ rescue
113
+ transfer_alias(write_index_alias, from: tmp_index, to: actual_index)
114
+ api.indices.delete index: new_index
115
+ ensure
116
+ # rollback
117
+ # TODO: what would happen if the following fails? O.O
118
+ copy_documents(tmp_index, write_index_name, small_batch_size)
119
+ api.indices.delete index: tmp_index
120
+ api.indices.refresh index: index_name
121
+ end
122
+ end
123
+
124
+ private
125
+
126
+ def api
127
+ Elastic.config.api_client
128
+ end
129
+
130
+ def perform_optimized_write_on(_index)
131
+ old_index = Thread.current[write_index_thread_override]
132
+ Thread.current[write_index_thread_override] = _index
133
+ configure_index(_index, refresh_interval: -1)
134
+ yield
135
+ ensure
136
+ configure_index(_index, refresh_interval: '1s')
137
+ Thread.current[write_index_thread_override] = old_index
138
+ end
139
+
140
+ def write_index_thread_override
141
+ "_elastic_#{index_name}_write_index"
142
+ end
143
+
144
+ def write_index_alias
145
+ @write_index_alias = "#{index_name}.w"
146
+ end
147
+
148
+ def resolve_actual_index_name
149
+ result = api.indices.get_alias(name: index_name)
150
+ result.keys.first
151
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
152
+ nil
153
+ end
154
+
155
+ def create_index_w_mapping(_role = 'main')
156
+ new_name = "#{index_name}:#{_role}:#{Time.now.to_i}"
157
+ api.indices.create index: new_name
158
+ api.cluster.health wait_for_status: 'yellow'
159
+ setup_index_types new_name
160
+ new_name
161
+ end
162
+
163
+ def create_from_scratch
164
+ new_index = create_index_w_mapping
165
+ api.indices.update_aliases(
166
+ body: {
167
+ actions: [
168
+ { add: { index: new_index, alias: index_name } },
169
+ { add: { index: new_index, alias: write_index_alias } }
170
+ ]
171
+ }
172
+ )
173
+ end
174
+
175
+ def mapping_synchronized?(_index)
176
+ type_mappings = api.indices.get_mapping(index: _index)
177
+ return false if type_mappings[_index].nil?
178
+ type_mappings = type_mappings[_index]['mappings']
179
+
180
+ @types.all? do |type|
181
+ next false if type_mappings[type].nil?
182
+
183
+ diff = Elastic::Commands::CompareMappings.for(
184
+ current: type_mappings[type],
185
+ user: @mapping
186
+ )
187
+ diff.empty?
188
+ end
189
+ end
190
+
191
+ def setup_index_types(_index)
192
+ @types.each do |type|
193
+ api.indices.put_mapping(index: _index, type: type, body: @mapping)
194
+ end
195
+ end
196
+
197
+ def transfer_alias(_alias, from: nil, to: nil)
198
+ actions = []
199
+ actions << { remove: { index: from, alias: _alias } } if from
200
+ actions << { add: { index: to, alias: _alias } } if to
201
+ api.indices.update_aliases body: { actions: actions }
202
+ end
203
+
204
+ def copy_documents(_from, _to, _batch_size)
205
+ api.indices.refresh index: _from
206
+
207
+ r = api.search(
208
+ index: _from,
209
+ body: { sort: ['_doc'] },
210
+ scroll: '5m',
211
+ size: _batch_size
212
+ )
213
+
214
+ count = 0
215
+ while !r['hits']['hits'].empty?
216
+ count += r['hits']['hits'].count
217
+ Elastic.logger.info "Copied #{count} docs"
218
+
219
+ body = r['hits']['hits'].map { |h| { 'index' => transform_hit_to_doc(h) } }
220
+ api.bulk(index: _to, body: body)
221
+
222
+ r = api.scroll scroll: '5m', scroll_id: r['_scroll_id']
223
+ end
224
+ end
225
+
226
+ def configure_index(_index, _settings)
227
+ api.indices.put_settings index: _index, body: { index: _settings }
228
+ end
229
+
230
+ def transform_hit_to_doc(_hit)
231
+ { '_id' => _hit['_id'], '_type' => _hit['_type'], 'data' => _hit['_source'] }
232
+ end
233
+
234
+ def default_batch_size
235
+ 1_000
236
+ end
237
+
238
+ def small_batch_size
239
+ 500
240
+ end
241
+
242
+ def retry_on_temporary_error(_action, retries: 3)
243
+ return yield
244
+ rescue Elasticsearch::Transport::Transport::Errors::ServiceUnavailable,
245
+ Elasticsearch::Transport::Transport::Errors::GatewayTimeout => exc
246
+ raise if retries <= 0
247
+
248
+ Elastic.logger.warn("#{exc.class} error during '#{_action}', retrying!")
249
+ retries -= 1
250
+ retry
251
+ end
252
+ end
253
+ end
@@ -38,10 +38,6 @@ module Elastic::Core
38
38
  @field_map.each_value
39
39
  end
40
40
 
41
- def expanded_field_names
42
- @field_map.map { |_, field| field.expanded_names }.flatten
43
- end
44
-
45
41
  def freeze
46
42
  return if frozen?
47
43
  cache_targets
@@ -55,7 +55,7 @@ module Elastic::Core
55
55
  query = build_base_query
56
56
 
57
57
  if !grouped?
58
- query.size = (@config.limit || Elastic::Configuration.page_size)
58
+ query.size = (@config.limit || Elastic.config.page_size)
59
59
  query.offset = @config.offset
60
60
  query = sort_node(query)
61
61
  else
@@ -16,7 +16,17 @@ module Elastic::Core
16
16
  @definition.fields
17
17
  end
18
18
 
19
- def as_es_document(only_data: false)
19
+ def read_elastic_type
20
+ object.class.to_s
21
+ end
22
+
23
+ def read_elastic_id
24
+ if has_attribute_for_indexing?(:id)
25
+ read_attribute_for_indexing(:id)
26
+ end
27
+ end
28
+
29
+ def as_elastic_document(only_data: false)
20
30
  data = {}.tap do |hash|
21
31
  fields.each do |field|
22
32
  value = read_attribute_for_indexing(field.name)
@@ -27,8 +37,12 @@ module Elastic::Core
27
37
 
28
38
  return data if only_data
29
39
 
30
- result = { '_type' => object.class.to_s, 'data' => data }
31
- result['_id'] = read_attribute_for_indexing(:id) if has_attribute_for_indexing?(:id)
40
+ result = {
41
+ '_type' => read_elastic_type,
42
+ 'data' => data
43
+ }
44
+
45
+ read_elastic_id.tap { |id| result['_id'] = id unless id.nil? }
32
46
  result
33
47
  end
34
48
 
@@ -13,9 +13,9 @@ module Elastic::Datatypes
13
13
  def prepare_value_for_result(_value)
14
14
  case _value
15
15
  when ::String
16
- ::Time.parse(_value).utc.to_date
16
+ time_zone.parse(_value).to_date
17
17
  when ::Integer
18
- ::Time.at(_value / 1000).utc.to_date
18
+ time_zone.at(_value / 1000).to_date
19
19
  else
20
20
  _value
21
21
  end
@@ -26,7 +26,13 @@ module Elastic::Datatypes
26
26
  end
27
27
 
28
28
  def date_histogram_aggregation_defaults
29
- { interval: '1w' }
29
+ { interval: '1w', time_zone: time_zone }
30
+ end
31
+
32
+ private
33
+
34
+ def time_zone
35
+ @time_zone ||= ActiveSupport::TimeZone.new('UTC') # dates are always UTC
30
36
  end
31
37
  end
32
38
  end
@@ -10,9 +10,9 @@ module Elastic::Datatypes
10
10
  # TODO: set timezone
11
11
  case _value
12
12
  when ::String
13
- ::Time.parse(_value)
13
+ time_zone.parse(_value)
14
14
  when ::Integer
15
- ::Time.at(_value / 1000)
15
+ time_zone.at(_value / 1000)
16
16
  else
17
17
  _value
18
18
  end
@@ -23,7 +23,14 @@ module Elastic::Datatypes
23
23
  end
24
24
 
25
25
  def date_histogram_aggregation_defaults
26
- { interval: '1h' }
26
+ { interval: '1h', time_zone: time_zone }
27
+ end
28
+
29
+ private
30
+
31
+ def time_zone
32
+ # TODO: user_options[:timezone]
33
+ Elastic.config.time_zone
27
34
  end
28
35
  end
29
36
  end
@@ -0,0 +1,4 @@
1
+ module Elastic
2
+ class Error < StandardError
3
+ end
4
+ end
@@ -11,10 +11,6 @@ module Elastic::Fields
11
11
  # does nothing
12
12
  end
13
13
 
14
- def expanded_names
15
- [@name] + @index.definition.expanded_field_names.map { |n| @name + '.' + n }
16
- end
17
-
18
14
  def validate
19
15
  nil
20
16
  end
@@ -45,7 +41,7 @@ module Elastic::Fields
45
41
  end
46
42
 
47
43
  def prepare_value_for_index(_values)
48
- _values.map { |v| @index.new(v).as_es_document(only_data: true) }
44
+ _values.map { |v| @index.new(v).as_elastic_document(only_data: true) }
49
45
  end
50
46
 
51
47
  def prepare_value_for_result(_values)
@@ -35,10 +35,6 @@ module Elastic::Fields
35
35
  nil
36
36
  end
37
37
 
38
- def expanded_names
39
- [@name]
40
- end
41
-
42
38
  def needs_inference?
43
39
  mapping_inference_enabled? && !@options.key?(:type)
44
40
  end
@@ -3,21 +3,27 @@ module Elastic::Nodes::Agg
3
3
  include Elastic::Nodes::Concerns::Aggregable
4
4
  include Elastic::Nodes::Concerns::Bucketed
5
5
 
6
- def self.build(_name, _field, interval: nil)
6
+ def self.build(_name, _field, interval: nil, time_zone: nil)
7
7
  super(_name).tap do |node|
8
8
  node.field = _field
9
9
  node.interval = interval
10
+ node.time_zone = time_zone
10
11
  end
11
12
  end
12
13
 
13
14
  attr_accessor :field
14
- attr_reader :interval
15
+ attr_reader :interval, :time_zone
15
16
 
16
17
  def interval=(_value)
17
18
  raise ArgumentError, 'invalid interval' if _value && !valid_interval?(_value)
18
19
  @interval = _value
19
20
  end
20
21
 
22
+ def time_zone=(_value)
23
+ raise ArgumentError, 'invalid time_zone' if _value && !_value.is_a?(ActiveSupport::TimeZone)
24
+ @time_zone = _value
25
+ end
26
+
21
27
  def clone
22
28
  prepare_clone(super)
23
29
  end
@@ -29,6 +35,7 @@ module Elastic::Nodes::Agg
29
35
  def render(_options = {})
30
36
  hash = { 'field' => @field.to_s }
31
37
  hash['interval'] = @interval if @interval
38
+ hash['time_zone'] = @time_zone.formatted_offset if @time_zone
32
39
 
33
40
  render_aggs({ 'date_histogram' => hash }, _options)
34
41
  end
@@ -38,6 +45,7 @@ module Elastic::Nodes::Agg
38
45
  def prepare_clone(_clone)
39
46
  _clone.field = @field
40
47
  _clone.interval = @interval
48
+ _clone.time_zone = @time_zone
41
49
  _clone
42
50
  end
43
51