es-elasticity 0.2.11 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,23 +1,9 @@
1
+ require "elasticity/log_subscriber"
2
+
1
3
  module Elasticity
2
4
  class Railtie < Rails::Railtie
3
5
  initializer 'elasticity.initialize_logging' do
4
- ActiveSupport::Notifications.subscribe(/\.elasticity$/) do |name, start, finish, id, payload|
5
- time = (finish - start)*1000
6
-
7
- if logger = Elasticity.config.logger
8
- logger.debug "#{name} #{"%.2f" % time}ms #{MultiJson.dump(payload[:args], pretty: Elasticity.config.pretty_json)}"
9
-
10
- if payload[:backtrace].present?
11
- bt = Rails.backtrace_cleaner.clean(payload[:backtrace])
12
- logger.debug bt[0,4].join("\n")
13
- end
14
-
15
- exception, message = payload[:exception]
16
- if exception
17
- logger.error "#{name} #{exception}: #{message}"
18
- end
19
- end
20
- end
6
+ LogSubscriber.attach_to(:elasticity)
21
7
  end
22
8
  end
23
9
  end
@@ -1,76 +1,82 @@
1
1
  module Elasticity
2
- # Search provides a simple interface for defining a search against an Elasticsearch
3
- # index and fetching the results in different ways and mappings.
4
- #
5
- # Example:
6
- # search = Elasticity::Search.new("people", "person", {...})
7
- # search.documents(Person)
8
- class Search
9
- attr_reader :index, :document_type, :body
10
-
11
- # Creates a new Search definitions for the given index, document_type and criteria. The
12
- # search is not performend until methods are called, each method represents a different
13
- # way of fetching and mapping the data.
14
- #
15
- # The body parameter is a hash following the exact same syntax as Elasticsearch's JSON
16
- # query language.
17
- def initialize(index, document_type, body)
18
- @index = index
19
- @document_type = document_type.freeze
20
- @body = body.freeze
21
- end
2
+ module Search
3
+ # Elasticity::Search::Definition is a struct that encapsulates all the data specific to one
4
+ # ElasticSearch search.
5
+ class Definition
6
+ attr_accessor :index_name, :document_type, :body
7
+
8
+ def initialize(index_name, document_type, body)
9
+ @index_name = index_name
10
+ @document_type = document_type
11
+ @body = body
12
+ end
13
+
14
+ def update(body_changes)
15
+ self.class.new(@index_name, @document_type, @body.deep_merge(body_changes))
16
+ end
17
+
18
+ def to_search_args
19
+ { index: @index_name, type: @document_type, body: @body }
20
+ end
22
21
 
23
- # Execute the search, fetching only ids from Elasticsearch and then mapping the results
24
- # into ActiveRecord models using the provided relation.
25
- def active_records(relation)
26
- return @active_record if defined?(@active_record)
27
- response = @index.search(@document_type, @body.merge(_source: false))
28
- @active_record = Result.new(response, ActiveRecordMapper.new(relation))
22
+ def to_msearch_args
23
+ { index: @index_name, type: @document_type, search: @body }
24
+ end
29
25
  end
30
26
 
31
- # Execute the search, fetching all documents from the index and mapping the stored attributes
32
- # into instances of the provided class. It will call document_klass.new(attrs), where attrs
33
- # are the stored attributes.
34
- def documents(document_klass)
35
- return @documents if defined?(@documents)
36
- response = @index.search(@document_type, @body)
37
- @documents = Result.new(response, DocumentMapper.new(document_klass))
27
+ # Elasticity::Search::Facade provides a simple interface for defining a search and provides
28
+ # different ways of executing it against Elasticsearch. This is usually the main entry point
29
+ # for search.
30
+ class Facade
31
+ attr_accessor :search_definition
32
+
33
+ # Creates a new facade for the given search definition, providing a set of helper methods
34
+ # to trigger different type of searches and results interpretation.
35
+ def initialize(client, search_definition)
36
+ @client = client
37
+ @search_definition = search_definition
38
+ end
39
+
40
+ # Performs the search using the default search type and returning an iterator that will yield
41
+ # hash representations of the documents.
42
+ def document_hashes
43
+ LazySearch.new(@client, @search_definition)
44
+ end
45
+
46
+ # Performs the search using the default search type and returning an iterator that will yield
47
+ # each document, converted to the provided document_klass.
48
+ def documents(document_klass)
49
+ LazySearch.new(@client, @search_definition) do |hit|
50
+ document_klass.from_hit(hit)
51
+ end
52
+ end
53
+
54
+ # Performs the search using the scan search type and the scoll api to iterate over all the documents
55
+ # as fast as possible. The sort option will be discarded.
56
+ #
57
+ # More info: http://www.elasticsearch.org/guide/en/elasticsearch/guide/current/scan-scroll.html
58
+ def scan_documents(document_klass, **options)
59
+ ScanCursor.new(@client, @search_definition, document_klass, **options)
60
+ end
61
+
62
+ # Performs the search only fetching document ids using it to load ActiveRecord objects from the provided
63
+ # relation. It returns the relation matching the objects found on ElasticSearch.
64
+ def active_records(relation)
65
+ ActiveRecordProxy.new(@client, @search_definition, relation)
66
+ end
38
67
  end
39
68
 
40
- # Result is a collection representing the response from a search against an index. It's what gets
41
- # returned by any of the Elasticity::Search methods and it provides a lazily-evaluated and
42
- # lazily-mapped – using the provided mapper class.
43
- #
44
- # Example:
45
- #
46
- # response = {"took"=>0, "timed_out"=>false, "_shards"=>{"total"=>5, "successful"=>5, "failed"=>0}, "hits"=>{"total"=>2, "max_score"=>1.0, "hits"=>[
47
- # {"_index"=>"my_index", "_type"=>"my_type", "_id"=>"1", "_score"=>1.0, "_source"=> { "id" => 1, "name" => "Foo" },
48
- # {"_index"=>"my_index", "_type"=>"my_type", "_id"=>"2", "_score"=>1.0, "_source"=> { "id" => 2, "name" => "Bar" },
49
- # ]}}
50
- #
51
- # class AttributesMapper
52
- # def map(hits)
53
- # hits.map { |h| h["_source"] }
54
- # end
55
- # end
56
- #
57
- # r = Result.new(response, AttributesMapper.new)
58
- # r.total # => 2
59
- # r[0] # => { "id" => 1, "name" => "Foo" }
60
- #
61
- class Result
69
+ class LazySearch
62
70
  include Enumerable
63
71
 
64
- def initialize(response, mapper)
65
- @response = response
66
- @mapper = mapper
67
- end
72
+ delegate :each, :size, :length, :[], :+, :-, :&, :|, to: :search_results
68
73
 
69
- delegate :[], :each, :to_ary, :size, :+, :-, to: :mapping
74
+ attr_accessor :search_definition
70
75
 
71
- # The total number of entries as returned by ES
72
- def total
73
- @response["hits"]["total"]
76
+ def initialize(client, search_definition, &mapper)
77
+ @client = client
78
+ @search_definition = search_definition
79
+ @mapper = mapper
74
80
  end
75
81
 
76
82
  def empty?
@@ -81,46 +87,105 @@ module Elasticity
81
87
  empty?
82
88
  end
83
89
 
90
+ def total
91
+ response["hits"]["total"]
92
+ end
93
+
84
94
  def suggestions
85
- @response["suggest"] || {}
95
+ response["hits"]["suggest"] ||= {}
86
96
  end
87
97
 
88
- def mapping
89
- return @mapping if defined?(@mapping)
90
- hits = Array(@response["hits"]["hits"])
91
- @mapping = @mapper.map(hits)
98
+ def search_results
99
+ return @search_results if defined?(@search_results)
100
+
101
+ hits = response["hits"]["hits"]
102
+
103
+ @search_results = if @mapper.nil?
104
+ hits
105
+ else
106
+ hits.map { |hit| @mapper.(hit) }
107
+ end
108
+ end
109
+
110
+ private
111
+
112
+ def response
113
+ return @response if defined?(@response)
114
+ @response = @client.search(@search_definition.to_search_args)
92
115
  end
93
116
  end
94
117
 
95
- class DocumentMapper
96
- def initialize(document_klass)
97
- @document_klass = document_klass
118
+ class ScanCursor
119
+ include Enumerable
120
+
121
+ def initialize(client, search_definition, document_klass, size: 100, scroll: "1m")
122
+ @client = client
123
+ @search_definition = search_definition
124
+ @document_klass = document_klass
125
+ @size = size
126
+ @scroll = scroll
98
127
  end
99
128
 
100
- def map(hits)
101
- hits.map do |hit|
102
- attrs = hit["_source"].merge(_id: hit['_id'])
129
+ def empty?
130
+ total == 0
131
+ end
103
132
 
104
- if hit["highlight"]
105
- highlighted_attrs = attrs.dup
106
- attrs_set = Set.new
133
+ def blank?
134
+ empty?
135
+ end
107
136
 
108
- hit["highlight"].each do |name, v|
109
- name = name.gsub(/\..*\z/, '')
110
- next if attrs_set.include?(name)
111
- highlighted_attrs[name] = v
112
- attrs_set << name
113
- end
137
+ def total
138
+ search["hits"]["total"]
139
+ end
114
140
 
115
- highlighted = @document_klass.new(highlighted_attrs)
116
- end
141
+ def each_batch
142
+ enumerator.each do |group|
143
+ yield(group)
144
+ end
145
+ end
117
146
 
118
- @document_klass.new(attrs.merge(highlighted: highlighted))
147
+ def each
148
+ enumerator.each do |group|
149
+ group.each { |doc| yield(doc) }
119
150
  end
120
151
  end
152
+
153
+ private
154
+
155
+ def enumerator
156
+ Enumerator.new do |y|
157
+ response = search
158
+
159
+ loop do
160
+ response = @client.scroll(scroll_id: response["_scroll_id"], scroll: @scroll)
161
+ hits = response["hits"]["hits"]
162
+ break if hits.empty?
163
+
164
+ y << hits.map { |hit| @document_klass.from_hit(hit) }
165
+ end
166
+ end
167
+ end
168
+
169
+ def search
170
+ return @search if defined?(@search)
171
+ args = @search_definition.to_search_args
172
+ args = args.merge(search_type: 'scan', size: @size, scroll: @scroll)
173
+ @search = @client.search(args)
174
+ end
121
175
  end
122
176
 
123
- class ActiveRecordMapper
177
+ class ActiveRecordProxy
178
+ def self.from_hits(relation, hits)
179
+ ids = hits.map { |hit| hit["_id"] }
180
+
181
+ if ids.any?
182
+ id_col = "#{relation.connection.quote_column_name(relation.table_name)}.#{relation.connection.quote_column_name(relation.klass.primary_key)}"
183
+ relation.where("#{id_col} IN (?)", ids).order("FIELD(#{id_col},#{ids.join(',')})")
184
+ else
185
+ relation.none
186
+ end
187
+ end
188
+
124
189
  class Relation < ActiveSupport::ProxyObject
125
190
  def initialize(relation)
126
191
  @relation = relation
@@ -141,57 +206,59 @@ module Elasticity
141
206
  end
142
207
  end
143
208
 
144
- def initialize(relation)
145
- @relation = Relation.new(relation)
209
+ def initialize(client, search_definition, relation)
210
+ @client = client
211
+ @search_definition = search_definition.update(_source: false)
212
+ @relation = Relation.new(relation)
146
213
  end
147
214
 
148
- def map(hits)
149
- ids = hits.map { |h| h["_id"] }
215
+ def metadata
216
+ @metadata ||= { total: response["hits"]["total"], suggestions: response["hits"]["suggest"] || {} }
217
+ end
150
218
 
151
- if ids.any?
152
- id_col = "#{quote(@relation.table_name)}.#{quote(@relation.klass.primary_key)}"
153
- @relation.where(id: ids).order("FIELD(#{id_col},#{ids.join(',')})")
154
- else
155
- @relation.none
156
- end
219
+ def total
220
+ metadata[:total]
157
221
  end
158
222
 
159
- private
223
+ def suggestions
224
+ metadata[:suggestions]
225
+ end
160
226
 
161
- def quote(identifier)
162
- @relation.connection.quote_column_name(identifier)
227
+ def method_missing(name, *args, **options, &block)
228
+ filtered_relation.public_send(name, *args, **options, &block)
163
229
  end
164
- end
165
- end
166
230
 
167
- class DocumentSearchProxy < BasicObject
168
- def initialize(search, document_klass)
169
- @search = search
170
- @document_klass = document_klass
171
- end
231
+ private
172
232
 
173
- def index
174
- @search.index
175
- end
233
+ def response
234
+ @response ||= @client.search(@search_definition.to_search_args)
235
+ end
176
236
 
177
- def document_type
178
- @search.document_type
237
+ def filtered_relation
238
+ return @filtered_relation if defined?(@filtered_relation)
239
+ @filtered_relation = ActiveRecordProxy.from_hits(@relation, response["hits"]["hits"])
240
+ end
179
241
  end
180
242
 
181
- def body
182
- @search.body
183
- end
243
+ class DocumentProxy < BasicObject
244
+ def initialize(search, document_klass)
245
+ @search = search
246
+ @document_klass = document_klass
247
+ end
184
248
 
185
- def active_records(relation)
186
- @search.active_records(relation)
187
- end
249
+ delegate :search_definition, :active_records, to: :@search
188
250
 
189
- def documents
190
- @search.documents(@document_klass)
191
- end
251
+ def documents
252
+ @search.documents(@document_klass)
253
+ end
192
254
 
193
- def method_missing(method_name, *args, &block)
194
- documents.public_send(method_name, *args, &block)
255
+ def scan_documents(**options)
256
+ @search.scan_documents(@document_klass, **options)
257
+ end
258
+
259
+ def method_missing(method_name, *args, &block)
260
+ documents.public_send(method_name, *args, &block)
261
+ end
195
262
  end
196
263
  end
197
264
  end
@@ -0,0 +1,15 @@
1
+ module Elasticity
2
+ module Strategies
3
+ class IndexError < StandardError
4
+ attr_reader :index_base_name
5
+
6
+ def initialize(index_base_name, message)
7
+ @index_name = index_name
8
+ super("#{index_name}: #{message}")
9
+ end
10
+ end
11
+
12
+ autoload :SingleIndex, "elasticity/strategies/single_index"
13
+ autoload :AliasIndex, "elasticity/strategies/alias_index"
14
+ end
15
+ end
@@ -0,0 +1,255 @@
1
+ module Elasticity
2
+ module Strategies
3
+ # This strategy keeps two aliases that might be mapped to the same index or different index, allowing
4
+ # runtime changes by simply atomically updating the aliases. For example, look at the remap method
5
+ # implementation.
6
+ class AliasIndex
7
+ STATUSES = [:missing, :ok]
8
+
9
+ def initialize(client, index_base_name)
10
+ @client = client
11
+ @main_alias = index_base_name
12
+ @update_alias = "#{index_base_name}_update"
13
+ end
14
+
15
+ # Remap allows zero-downtime/zero-dataloss remap of elasticsearch indexes. Here is the overview
16
+ # of how it works:
17
+ #
18
+ # 1. Creates a new index with the new mapping
19
+ # 2. Update the aliases so that any write goes to the new index and reads goes to both indexes.
20
+ # 3. Use scan and scroll to iterate over all the documents in the old index, moving them to the
21
+ # new index.
22
+ # 4. Update the aliases so that all operations goes to the new index.
23
+ # 5. Deletes the old index.
24
+ #
25
+ # It does a little bit more to ensure consistency and to handle race-conditions. For more details
26
+ # look at the implementation.
27
+ def remap(index_def)
28
+ main_indexes = self.main_indexes
29
+ update_indexes = self.update_indexes
30
+
31
+ if main_indexes.size != 1 || update_indexes.size != 1 || main_indexes != update_indexes
32
+ raise "Index can't be remapped right now, check if another remapping is already happening"
33
+ end
34
+
35
+ new_index = create_index(index_def)
36
+ original_index = main_indexes[0]
37
+
38
+ begin
39
+ # Configure aliases so that search includes the old index and the new index, and writes are made to
40
+ # the new index.
41
+ @client.index_update_aliases(body: {
42
+ actions: [
43
+ { remove: { index: original_index, alias: @update_alias } },
44
+ { add: { index: new_index, alias: @update_alias } },
45
+ { add: { index: new_index, alias: @main_alias }},
46
+ ]
47
+ })
48
+
49
+ @client.index_flush(index: original_index)
50
+ cursor = @client.search index: original_index, search_type: 'scan', scroll: '1m', _source: false, size: 100
51
+ loop do
52
+ cursor = @client.scroll(scroll_id: cursor['_scroll_id'], scroll: '1m')
53
+ hits = cursor['hits']['hits']
54
+ break if hits.empty?
55
+
56
+ # Fetch documents based on the ids that existed when the migration started, to make sure we only migrate
57
+ # documents that haven't been deleted.
58
+ id_docs = hits.map do |hit|
59
+ { _index: original_index, _type: hit["_type"], _id: hit["_id"] }
60
+ end
61
+
62
+ docs = @client.mget(body: { docs: id_docs }, refresh: true)["docs"]
63
+ break if docs.empty?
64
+
65
+ # Move only documents that still exists on the old index, into the new index.
66
+ ops = []
67
+ docs.each do |doc|
68
+ ops << { index: { _index: new_index, _type: doc["_type"], _id: doc["_id"], data: doc["_source"] } } if doc["found"]
69
+ end
70
+
71
+ @client.bulk(body: ops)
72
+
73
+ # Deal with race conditions by removing from the new index any document that doesn't exist in the old index anymore.
74
+ ops = []
75
+ @client.mget(body: { docs: id_docs }, refresh: true)["docs"].each_with_index do |new_doc, idx|
76
+ if docs[idx]["found"] && !new_doc["found"]
77
+ ops << { delete: { _index: new_index, _type: new_doc["_type"], _id: new_doc["_id"] } }
78
+ end
79
+ end
80
+
81
+ @client.bulk(body: ops) unless ops.empty?
82
+ end
83
+
84
+ # Update aliases to only point to the new index.
85
+ @client.index_update_aliases(body: {
86
+ actions: [
87
+ { remove: { index: original_index, alias: @main_alias } },
88
+ ]
89
+ })
90
+ @client.index_delete(index: original_index)
91
+
92
+ rescue
93
+ @client.index_update_aliases(body: {
94
+ actions: [
95
+ { add: { index: original_index, alias: @update_alias } },
96
+ { remove: { index: new_index, alias: @update_alias } },
97
+ ]
98
+ })
99
+
100
+ @client.index_flush(index: new_index)
101
+ cursor = @client.search index: new_index, search_type: 'scan', scroll: '1m', size: 100
102
+ loop do
103
+ cursor = @client.scroll(scroll_id: cursor['_scroll_id'], scroll: '1m')
104
+ hits = cursor['hits']['hits']
105
+ break if hits.empty?
106
+
107
+ # Move all the documents that exists on the new index back to the old index
108
+ ops = []
109
+ hits.each do |doc|
110
+ ops << { index: { _index: original_index, _type: doc["_type"], _id: doc["_id"], data: doc["_source"] } }
111
+ end
112
+
113
+ @client.bulk(body: ops)
114
+ end
115
+
116
+ @client.index_flush(index: original_index)
117
+ @client.index_update_aliases(body: {
118
+ actions: [
119
+ { remove: { index: new_index, alias: @main_alias } },
120
+ ]
121
+ })
122
+ @client.index_delete(index: new_index)
123
+
124
+ raise
125
+ end
126
+ end
127
+
128
+ def status
129
+ search_exists = @client.index_exists_alias(name: @main_alias)
130
+ update_exists = @client.index_exists_alias(name: @update_alias)
131
+
132
+ case
133
+ when search_exists && update_exists
134
+ :ok
135
+ when !search_exists && !update_exists
136
+ :missing
137
+ else
138
+ :inconsistent
139
+ end
140
+ end
141
+
142
+ def missing?
143
+ status == :missing
144
+ end
145
+
146
+ def main_indexes
147
+ @client.index_get_aliases(index: "#{@main_alias}-*", name: @main_alias).keys
148
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
149
+ []
150
+ end
151
+
152
+ def update_indexes
153
+ @client.index_get_aliases(index: "#{@main_alias}-*", name: @update_alias).keys
154
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
155
+ []
156
+ end
157
+
158
+ def create(index_def)
159
+ if missing?
160
+ index_name = create_index(index_def)
161
+ @client.index_update_aliases(body: {
162
+ actions: [
163
+ { add: { index: index_name, alias: @main_alias } },
164
+ { add: { index: index_name, alias: @update_alias } },
165
+ ]
166
+ })
167
+ else
168
+ raise IndexError.new(@main_alias, "index already exists")
169
+ end
170
+ end
171
+
172
+ def create_if_undefined(index_def)
173
+ create(index_def) if missing?
174
+ end
175
+
176
+ def delete
177
+ @client.index_delete(index: "#{@main_alias}-*")
178
+ end
179
+
180
+ def delete_if_defined
181
+ delete unless missing?
182
+ end
183
+
184
+ def recreate(index_def)
185
+ delete_if_defined
186
+ create(index_def)
187
+ end
188
+
189
+ def index_document(type, id, attributes)
190
+ res = @client.index(index: @update_alias, type: type, id: id, body: attributes)
191
+
192
+ if id = res["_id"]
193
+ [id, res["created"]]
194
+ else
195
+ raise IndexError.new(@update_alias, "failed to index document")
196
+ end
197
+ end
198
+
199
+ def delete_document(type, id)
200
+ ops = (main_indexes | update_indexes).map do |index|
201
+ { delete: { _index: index, _type: type, _id: id } }
202
+ end
203
+
204
+ @client.bulk(body: ops)
205
+ end
206
+
207
+ def get_document(type, id)
208
+ @client.get(index: @main_alias, type: type, id: id)
209
+ end
210
+
211
+ def search(type, body)
212
+ Search::Facade.new(@client, Search::Definition.new(@main_alias, type, body))
213
+ end
214
+
215
+ def delete_by_query(type, body)
216
+ @client.delete_by_query(index: @main_alias, type: type, body: body)
217
+ end
218
+
219
+ def bulk
220
+ b = Bulk::Alias.new(@client, @update_alias, main_indexes)
221
+ yield b
222
+ b.execute
223
+ end
224
+
225
+ def flush
226
+ @client.index_flush(index: @update_alias)
227
+ end
228
+
229
+ def settings
230
+ args = { index: @main_alias }
231
+ settings = @client.index_get_settings(index: @main_alias)
232
+ settings[@main_alias]["settings"]
233
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
234
+ nil
235
+ end
236
+
237
+ def mappings
238
+ args = { index: @main_alias }
239
+ mapping = @client.index_get_mapping(index: @main_alias)
240
+ mapping[@main_alias]["mappings"]
241
+ rescue Elasticsearch::Transport::Transport::Errors::NotFound
242
+ nil
243
+ end
244
+
245
+ private
246
+
247
+ def create_index(index_def)
248
+ ts = Time.now.utc.strftime("%Y-%m-%d_%H:%M:%S.%6N")
249
+ index_name = "#{@main_alias}-#{ts}"
250
+ @client.index_create(index: index_name, body: index_def)
251
+ index_name
252
+ end
253
+ end
254
+ end
255
+ end