es-elasticity 0.2.11 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,23 +1,9 @@
1
+ require "elasticity/log_subscriber"
2
+
1
3
  module Elasticity
2
4
  class Railtie < Rails::Railtie
3
5
  initializer 'elasticity.initialize_logging' do
4
- ActiveSupport::Notifications.subscribe(/\.elasticity$/) do |name, start, finish, id, payload|
5
- time = (finish - start)*1000
6
-
7
- if logger = Elasticity.config.logger
8
- logger.debug "#{name} #{"%.2f" % time}ms #{MultiJson.dump(payload[:args], pretty: Elasticity.config.pretty_json)}"
9
-
10
- if payload[:backtrace].present?
11
- bt = Rails.backtrace_cleaner.clean(payload[:backtrace])
12
- logger.debug bt[0,4].join("\n")
13
- end
14
-
15
- exception, message = payload[:exception]
16
- if exception
17
- logger.error "#{name} #{exception}: #{message}"
18
- end
19
- end
20
- end
6
+ LogSubscriber.attach_to(:elasticity)
21
7
  end
22
8
  end
23
9
  end
@@ -1,76 +1,82 @@
1
1
  module Elasticity
2
- # Search provides a simple interface for defining a search against an Elasticsearch
3
- # index and fetching the results in different ways and mappings.
4
- #
5
- # Example:
6
- # search = Elasticity::Search.new("people", "person", {...})
7
- # search.documents(Person)
8
- class Search
9
- attr_reader :index, :document_type, :body
10
-
11
- # Creates a new Search definitions for the given index, document_type and criteria. The
12
- # search is not performend until methods are called, each method represents a different
13
- # way of fetching and mapping the data.
14
- #
15
- # The body parameter is a hash following the exact same syntax as Elasticsearch's JSON
16
- # query language.
17
- def initialize(index, document_type, body)
18
- @index = index
19
- @document_type = document_type.freeze
20
- @body = body.freeze
21
- end
2
+ module Search
3
+ # Elasticity::Search::Definition is a struct that encapsulates all the data specific to one
4
+ # ElasticSearch search.
5
+ class Definition
6
+ attr_accessor :index_name, :document_type, :body
7
+
8
+ def initialize(index_name, document_type, body)
9
+ @index_name = index_name
10
+ @document_type = document_type
11
+ @body = body
12
+ end
13
+
14
+ def update(body_changes)
15
+ self.class.new(@index_name, @document_type, @body.deep_merge(body_changes))
16
+ end
17
+
18
+ def to_search_args
19
+ { index: @index_name, type: @document_type, body: @body }
20
+ end
22
21
 
23
- # Execute the search, fetching only ids from Elasticsearch and then mapping the results
24
- # into ActiveRecord models using the provided relation.
25
- def active_records(relation)
26
- return @active_record if defined?(@active_record)
27
- response = @index.search(@document_type, @body.merge(_source: false))
28
- @active_record = Result.new(response, ActiveRecordMapper.new(relation))
22
+ def to_msearch_args
23
+ { index: @index_name, type: @document_type, search: @body }
24
+ end
29
25
  end
30
26
 
31
- # Execute the search, fetching all documents from the index and mapping the stored attributes
32
- # into instances of the provided class. It will call document_klass.new(attrs), where attrs
33
- # are the stored attributes.
34
- def documents(document_klass)
35
- return @documents if defined?(@documents)
36
- response = @index.search(@document_type, @body)
37
- @documents = Result.new(response, DocumentMapper.new(document_klass))
27
+ # Elasticity::Search::Facade provides a simple interface for defining a search and provides
28
+ # different ways of executing it against Elasticsearch. This is usually the main entry point
29
+ # for search.
30
+ class Facade
31
+ attr_accessor :search_definition
32
+
33
+ # Creates a new facade for the given search definition, providing a set of helper methods
34
+ # to trigger different type of searches and results interpretation.
35
+ def initialize(client, search_definition)
36
+ @client = client
37
+ @search_definition = search_definition
38
+ end
39
+
40
+ # Performs the search using the default search type and returning an iterator that will yield
41
+ # hash representations of the documents.
42
+ def document_hashes
43
+ LazySearch.new(@client, @search_definition)
44
+ end
45
+
46
+ # Performs the search using the default search type and returning an iterator that will yield
47
+ # each document, converted to the provided document_klass.
48
+ def documents(document_klass)
49
+ LazySearch.new(@client, @search_definition) do |hit|
50
+ document_klass.from_hit(hit)
51
+ end
52
+ end
53
+
54
+ # Performs the search using the scan search type and the scoll api to iterate over all the documents
55
+ # as fast as possible. The sort option will be discarded.
56
+ #
57
+ # More info: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/scan-scroll.html
58
+ def scan_documents(document_klass, **options)
59
+ ScanCursor.new(@client, @search_definition, document_klass, **options)
60
+ end
61
+
62
+ # Performs the search only fetching document ids using it to load ActiveRecord objects from the provided
63
+ # relation. It returns the relation matching the objects found on ElasticSearch.
64
+ def active_records(relation)
65
+ ActiveRecordProxy.new(@client, @search_definition, relation)
66
+ end
38
67
  end
39
68
 
40
- # Result is a collection representing the response from a search against an index. It's what gets
41
- # returned by any of the Elasticity::Search methods and it provides a lazily-evaluated and
42
- # lazily-mapped – using the provided mapper class.
43
- #
44
- # Example:
45
- #
46
- # response = {"took"=>0, "timed_out"=>false, "_shards"=>{"total"=>5, "successful"=>5, "failed"=>0}, "hits"=>{"total"=>2, "max_score"=>1.0, "hits"=>[
47
- # {"_index"=>"my_index", "_type"=>"my_type", "_id"=>"1", "_score"=>1.0, "_source"=> { "id" => 1, "name" => "Foo" },
48
- # {"_index"=>"my_index", "_type"=>"my_type", "_id"=>"2", "_score"=>1.0, "_source"=> { "id" => 2, "name" => "Bar" },
49
- # ]}}
50
- #
51
- # class AttributesMapper
52
- # def map(hits)
53
- # hits.map { |h| h["_source"] }
54
- # end
55
- # end
56
- #
57
- # r = Result.new(response, AttributesMapper.new)
58
- # r.total # => 2
59
- # r[0] # => { "id" => 1, "name" => "Foo" }
60
- #
61
- class Result
69
+ class LazySearch
62
70
  include Enumerable
63
71
 
64
- def initialize(response, mapper)
65
- @response = response
66
- @mapper = mapper
67
- end
72
+ delegate :each, :size, :length, :[], :+, :-, :&, :|, to: :search_results
68
73
 
69
- delegate :[], :each, :to_ary, :size, :+, :-, to: :mapping
74
+ attr_accessor :search_definition
70
75
 
71
- # The total number of entries as returned by ES
72
- def total
73
- @response["hits"]["total"]
76
+ def initialize(client, search_definition, &mapper)
77
+ @client = client
78
+ @search_definition = search_definition
79
+ @mapper = mapper
74
80
  end
75
81
 
76
82
  def empty?
@@ -81,46 +87,105 @@ module Elasticity
81
87
  empty?
82
88
  end
83
89
 
90
+ def total
91
+ response["hits"]["total"]
92
+ end
93
+
84
94
  def suggestions
85
- @response["suggest"] || {}
95
+ response["hits"]["suggest"] ||= {}
86
96
  end
87
97
 
88
- def mapping
89
- return @mapping if defined?(@mapping)
90
- hits = Array(@response["hits"]["hits"])
91
- @mapping = @mapper.map(hits)
98
+ def search_results
99
+ return @search_results if defined?(@search_results)
100
+
101
+ hits = response["hits"]["hits"]
102
+
103
+ @search_results = if @mapper.nil?
104
+ hits
105
+ else
106
+ hits.map { |hit| @mapper.(hit) }
107
+ end
108
+ end
109
+
110
+ private
111
+
112
+ def response
113
+ return @response if defined?(@response)
114
+ @response = @client.search(@search_definition.to_search_args)
92
115
  end
93
116
  end
94
117
 
95
- class DocumentMapper
96
- def initialize(document_klass)
97
- @document_klass = document_klass
118
+ class ScanCursor
119
+ include Enumerable
120
+
121
+ def initialize(client, search_definition, document_klass, size: 100, scroll: "1m")
122
+ @client = client
123
+ @search_definition = search_definition
124
+ @document_klass = document_klass
125
+ @size = size
126
+ @scroll = scroll
98
127
  end
99
128
 
100
- def map(hits)
101
- hits.map do |hit|
102
- attrs = hit["_source"].merge(_id: hit['_id'])
129
+ def empty?
130
+ total == 0
131
+ end
103
132
 
104
- if hit["highlight"]
105
- highlighted_attrs = attrs.dup
106
- attrs_set = Set.new
133
+ def blank?
134
+ empty?
135
+ end
107
136
 
108
- hit["highlight"].each do |name, v|
109
- name = name.gsub(/\..*\z/, '')
110
- next if attrs_set.include?(name)
111
- highlighted_attrs[name] = v
112
- attrs_set << name
113
- end
137
+ def total
138
+ search["hits"]["total"]
139
+ end
114
140
 
115
- highlighted = @document_klass.new(highlighted_attrs)
116
- end
141
+ def each_batch
142
+ enumerator.each do |group|
143
+ yield(group)
144
+ end
145
+ end
117
146
 
118
- @document_klass.new(attrs.merge(highlighted: highlighted))
147
+ def each
148
+ enumerator.each do |group|
149
+ group.each { |doc| yield(doc) }
119
150
  end
120
151
  end
152
+
153
+ private
154
+
155
+ def enumerator
156
+ Enumerator.new do |y|
157
+ response = search
158
+
159
+ loop do
160
+ response = @client.scroll(scroll_id: response["_scroll_id"], scroll: @scroll)
161
+ hits = response["hits"]["hits"]
162
+ break if hits.empty?
163
+
164
+ y << hits.map { |hit| @document_klass.from_hit(hit) }
165
+ end
166
+ end
167
+ end
168
+
169
+ def search
170
+ return @search if defined?(@search)
171
+ args = @search_definition.to_search_args
172
+ args = args.merge(search_type: 'scan', size: @size, scroll: @scroll)
173
+ @search = @client.search(args)
174
+ end
121
175
  end
122
176
 
123
- class ActiveRecordMapper
177
+ class ActiveRecordProxy
178
+ def self.from_hits(relation, hits)
179
+ ids = hits.map { |hit| hit["_id"] }
180
+
181
+ if ids.any?
182
+ id_col = "#{relation.connection.quote_column_name(relation.table_name)}.#{relation.connection.quote_column_name(relation.klass.primary_key)}"
183
+ relation.where("#{id_col} IN (?)", ids).order("FIELD(#{id_col},#{ids.join(',')})")
184
+ else
185
+ relation.none
186
+ end
187
+ end
188
+
124
189
  class Relation < ActiveSupport::ProxyObject
125
190
  def initialize(relation)
126
191
  @relation = relation
@@ -141,57 +206,59 @@ module Elasticity
141
206
  end
142
207
  end
143
208
 
144
- def initialize(relation)
145
- @relation = Relation.new(relation)
209
+ def initialize(client, search_definition, relation)
210
+ @client = client
211
+ @search_definition = search_definition.update(_source: false)
212
+ @relation = Relation.new(relation)
146
213
  end
147
214
 
148
- def map(hits)
149
- ids = hits.map { |h| h["_id"] }
215
+ def metadata
216
+ @metadata ||= { total: response["hits"]["total"], suggestions: response["hits"]["suggest"] || {} }
217
+ end
150
218
 
151
- if ids.any?
152
- id_col = "#{quote(@relation.table_name)}.#{quote(@relation.klass.primary_key)}"
153
- @relation.where(id: ids).order("FIELD(#{id_col},#{ids.join(',')})")
154
- else
155
- @relation.none
156
- end
219
+ def total
220
+ metadata[:total]
157
221
  end
158
222
 
159
- private
223
+ def suggestions
224
+ metadata[:suggestions]
225
+ end
160
226
 
161
- def quote(identifier)
162
- @relation.connection.quote_column_name(identifier)
227
+ def method_missing(name, *args, **options, &block)
228
+ filtered_relation.public_send(name, *args, **options, &block)
163
229
  end
164
- end
165
- end
166
230
 
167
- class DocumentSearchProxy < BasicObject
168
- def initialize(search, document_klass)
169
- @search = search
170
- @document_klass = document_klass
171
- end
231
+ private
172
232
 
173
- def index
174
- @search.index
175
- end
233
+ def response
234
+ @response ||= @client.search(@search_definition.to_search_args)
235
+ end
176
236
 
177
- def document_type
178
- @search.document_type
237
+ def filtered_relation
238
+ return @filtered_relation if defined?(@filtered_relation)
239
+ @filtered_relation = ActiveRecordProxy.from_hits(@relation, response["hits"]["hits"])
240
+ end
179
241
  end
180
242
 
181
- def body
182
- @search.body
183
- end
243
+ class DocumentProxy < BasicObject
244
+ def initialize(search, document_klass)
245
+ @search = search
246
+ @document_klass = document_klass
247
+ end
184
248
 
185
- def active_records(relation)
186
- @search.active_records(relation)
187
- end
249
+ delegate :search_definition, :active_records, to: :@search
188
250
 
189
- def documents
190
- @search.documents(@document_klass)
191
- end
251
+ def documents
252
+ @search.documents(@document_klass)
253
+ end
192
254
 
193
- def method_missing(method_name, *args, &block)
194
- documents.public_send(method_name, *args, &block)
255
+ def scan_documents(**options)
256
+ @search.scan_documents(@document_klass, **options)
257
+ end
258
+
259
+ def method_missing(method_name, *args, &block)
260
+ documents.public_send(method_name, *args, &block)
261
+ end
195
262
  end
196
263
  end
197
264
  end
@@ -0,0 +1,15 @@
1
+ module Elasticity
2
+ module Strategies
3
+ class IndexError < StandardError
4
+ attr_reader :index_base_name
5
+
6
+ def initialize(index_base_name, message)
7
+ @index_name = index_name
8
+ super("#{index_name}: #{message}")
9
+ end
10
+ end
11
+
12
+ autoload :SingleIndex, "elasticity/strategies/single_index"
13
+ autoload :AliasIndex, "elasticity/strategies/alias_index"
14
+ end
15
+ end
@@ -0,0 +1,255 @@
1
+ module Elasticity
2
+ module Strategies
3
+ # This strategy keeps two aliases that might be mapped to the same index or different index, allowing
4
+ # runtime changes by simply atomically updating the aliases. For example, look at the remap method
5
+ # implementation.
6
+ class AliasIndex
7
+ STATUSES = [:missing, :ok]
8
+
9
+ def initialize(client, index_base_name)
10
+ @client = client
11
+ @main_alias = index_base_name
12
+ @update_alias = "#{index_base_name}_update"
13
+ end
14
+
15
+ # Remap allows zero-downtime/zero-dataloss remap of elasticsearch indexes. Here is the overview
16
+ # of how it works:
17
+ #
18
+ # 1. Creates a new index with the new mapping
19
+ # 2. Update the aliases so that any write goes to the new index and reads goes to both indexes.
20
+ # 3. Use scan and scroll to iterate over all the documents in the old index, moving them to the
21
+ # new index.
22
+ # 4. Update the aliases so that all operations goes to the new index.
23
+ # 5. Deletes the old index.
24
+ #
25
+ # It does a little bit more to ensure consistency and to handle race-conditions. For more details
26
+ # look at the implementation.
27
+ def remap(index_def)
28
+ main_indexes = self.main_indexes
29
+ update_indexes = self.update_indexes
30
+
31
+ if main_indexes.size != 1 || update_indexes.size != 1 || main_indexes != update_indexes
32
+ raise "Index can't be remapped right now, check if another remapping is already happening"
33
+ end
34
+
35
+ new_index = create_index(index_def)
36
+ original_index = main_indexes[0]
37
+
38
+ begin
39
+ # Configure aliases so that search includes the old index and the new index, and writes are made to
40
+ # the new index.
41
+ @client.index_update_aliases(body: {
42
+ actions: [
43
+ { remove: { index: original_index, alias: @update_alias } },
44
+ { add: { index: new_index, alias: @update_alias } },
45
+ { add: { index: new_index, alias: @main_alias }},
46
+ ]
47
+ })
48
+
49
+ @client.index_flush(index: original_index)
50
+ cursor = @client.search index: original_index, search_type: 'scan', scroll: '1m', _source: false, size: 100
51
+ loop do
52
+ cursor = @client.scroll(scroll_id: cursor['_scroll_id'], scroll: '1m')
53
+ hits = cursor['hits']['hits']
54
+ break if hits.empty?
55
+
56
+ # Fetch documents based on the ids that existed when the migration started, to make sure we only migrate
57
+ # documents that haven't been deleted.
58
+ id_docs = hits.map do |hit|
59
+ { _index: original_index, _type: hit["_type"], _id: hit["_id"] }
60
+ end
61
+
62
+ docs = @client.mget(body: { docs: id_docs }, refresh: true)["docs"]
63
+ break if docs.empty?
64
+
65
+ # Move only documents that still exists on the old index, into the new index.
66
+ ops = []
67
+ docs.each do |doc|
68
+ ops << { index: { _index: new_index, _type: doc["_type"], _id: doc["_id"], data: doc["_source"] } } if doc["found"]
69
+ end
70
+
71
+ @client.bulk(body: ops)
72
+
73
+ # Deal with race conditions by removing from the new index any document that doesn't exist in the old index anymore.
74
+ ops = []
75
+ @client.mget(body: { docs: id_docs }, refresh: true)["docs"].each_with_index do |new_doc, idx|
76
+ if docs[idx]["found"] && !new_doc["found"]
77
+ ops << { delete: { _index: new_index, _type: new_doc["_type"], _id: new_doc["_id"] } }
78
+ end
79
+ end
80
+
81
+ @client.bulk(body: ops) unless ops.empty?
82
+ end
83
+
84
+ # Update aliases to only point to the new index.
85
+ @client.index_update_aliases(body: {
86
+ actions: [
87
+ { remove: { index: original_index, alias: @main_alias } },
88
+ ]
89
+ })
90
+ @client.index_delete(index: original_index)
91
+
92
+ rescue
93
+ @client.index_update_aliases(body: {
94
+ actions: [
95
+ { add: { index: original_index, alias: @update_alias } },
96
+ { remove: { index: new_index, alias: @update_alias } },
97
+ ]
98
+ })
99
+
100
+ @client.index_flush(index: new_index)
101
+ cursor = @client.search index: new_index, search_type: 'scan', scroll: '1m', size: 100
102
+ loop do
103
+ cursor = @client.scroll(scroll_id: cursor['_scroll_id'], scroll: '1m')
104
+ hits = cursor['hits']['hits']
105
+ break if hits.empty?
106
+
107
+ # Move all the documents that exists on the new index back to the old index
108
+ ops = []
109
+ hits.each do |doc|
110
+ ops << { index: { _index: original_index, _type: doc["_type"], _id: doc["_id"], data: doc["_source"] } }
111
+ end
112
+
113
+ @client.bulk(body: ops)
114
+ end
115
+
116
+ @client.index_flush(index: original_index)
117
+ @client.index_update_aliases(body: {
118
+ actions: [
119
+ { remove: { index: new_index, alias: @main_alias } },
120
+ ]
121
+ })
122
+ @client.index_delete(index: new_index)
123
+
124
+ raise
125
+ end
126
+ end
127
+
128
+ def status
129
+ search_exists = @client.index_exists_alias(name: @main_alias)
130
+ update_exists = @client.index_exists_alias(name: @update_alias)
131
+
132
+ case
133
+ when search_exists && update_exists
134
+ :ok
135
+ when !search_exists && !update_exists
136
+ :missing
137
+ else
138
+ :inconsistent
139
+ end
140
+ end
141
+
142
+ def missing?
143
+ status == :missing
144
+ end
145
+
146
+ def main_indexes
147
+ @client.index_get_aliases(index: "#{@main_alias}-*", name: @main_alias).keys
148
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
149
+ []
150
+ end
151
+
152
+ def update_indexes
153
+ @client.index_get_aliases(index: "#{@main_alias}-*", name: @update_alias).keys
154
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
155
+ []
156
+ end
157
+
158
+ def create(index_def)
159
+ if missing?
160
+ index_name = create_index(index_def)
161
+ @client.index_update_aliases(body: {
162
+ actions: [
163
+ { add: { index: index_name, alias: @main_alias } },
164
+ { add: { index: index_name, alias: @update_alias } },
165
+ ]
166
+ })
167
+ else
168
+ raise IndexError.new(@main_alias, "index already exists")
169
+ end
170
+ end
171
+
172
+ def create_if_undefined(index_def)
173
+ create(index_def) if missing?
174
+ end
175
+
176
+ def delete
177
+ @client.index_delete(index: "#{@main_alias}-*")
178
+ end
179
+
180
+ def delete_if_defined
181
+ delete unless missing?
182
+ end
183
+
184
+ def recreate(index_def)
185
+ delete_if_defined
186
+ create(index_def)
187
+ end
188
+
189
+ def index_document(type, id, attributes)
190
+ res = @client.index(index: @update_alias, type: type, id: id, body: attributes)
191
+
192
+ if id = res["_id"]
193
+ [id, res["created"]]
194
+ else
195
+ raise IndexError.new(@update_alias, "failed to index document")
196
+ end
197
+ end
198
+
199
+ def delete_document(type, id)
200
+ ops = (main_indexes | update_indexes).map do |index|
201
+ { delete: { _index: index, _type: type, _id: id } }
202
+ end
203
+
204
+ @client.bulk(body: ops)
205
+ end
206
+
207
+ def get_document(type, id)
208
+ @client.get(index: @main_alias, type: type, id: id)
209
+ end
210
+
211
+ def search(type, body)
212
+ Search::Facade.new(@client, Search::Definition.new(@main_alias, type, body))
213
+ end
214
+
215
+ def delete_by_query(type, body)
216
+ @client.delete_by_query(index: @main_alias, type: type, body: body)
217
+ end
218
+
219
+ def bulk
220
+ b = Bulk::Alias.new(@client, @update_alias, main_indexes)
221
+ yield b
222
+ b.execute
223
+ end
224
+
225
+ def flush
226
+ @client.index_flush(index: @update_alias)
227
+ end
228
+
229
+ def settings
230
+ args = { index: @main_alias }
231
+ settings = @client.index_get_settings(index: @main_alias)
232
+ settings[@main_alias]["settings"]
233
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
234
+ nil
235
+ end
236
+
237
+ def mappings
238
+ args = { index: @main_alias }
239
+ mapping = @client.index_get_mapping(index: @main_alias)
240
+ mapping[@main_alias]["mappings"]
241
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
242
+ nil
243
+ end
244
+
245
+ private
246
+
247
+ def create_index(index_def)
248
+ ts = Time.now.utc.strftime("%Y-%m-%d_%H:%M:%S.%6N")
249
+ index_name = "#{@main_alias}-#{ts}"
250
+ @client.index_create(index: index_name, body: index_def)
251
+ index_name
252
+ end
253
+ end
254
+ end
255
+ end