es_importer 0.3.0 → 0.4.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f5139f9596f419bb2e9be23229becb9ff26c180c38e8f2f4a1b3bcaf8f2fb977
4
- data.tar.gz: a3ee1f507b0ffeb95369300baad2cde7af60e10aff67fe1deae74984690c8349
3
+ metadata.gz: c535d2850816bfd5e36c616057c6db755cbe27e3e99474b6149cbc74de188bc4
4
+ data.tar.gz: 984636ea2d8aaa849dfab4cdf4d156632eefd468ff4ea67a6a96228a73b9a1b3
5
5
  SHA512:
6
- metadata.gz: 1ffe0e7c95b2c3f79eb1a97218c478863a32bd4bcd97d1fcff7db61ecec987cbb747dd53ed00d72458be77f40e03b2003fc8d44cabb2b76fde323d55841f75e3
7
- data.tar.gz: '084ebc7fbac313071e5ebd9f054d1c133407260d56518bfd20aeb633369f4f944fd81543240662a1e95d7c1d634a5d64d148733cd2f4f1cce67a03b598a6fe56'
6
+ metadata.gz: 0ad7fb8717ef68d7f61d40d09634fe1bd9df9b998e648602c9b6937efec17166dd5fea8ebdf52654ce7fc18a28e058b6668115d2813ad3fabd43e18ddd530897
7
+ data.tar.gz: 3a1194226e3063a58b66de11f4e803141fb669efc3e589815b9c282350d076c232e84b17e231a4520a56f2352eac6d19f91b22258dd2d341ba4fdf40bc6fe0b6
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- es_importer (0.3.0)
4
+ es_importer (0.4.0)
5
5
  aws-sdk
6
6
  elasticsearch
7
7
  faraday
@@ -12,13 +12,13 @@ PATH
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- aws-sdk (2.11.28)
16
- aws-sdk-resources (= 2.11.28)
17
- aws-sdk-core (2.11.28)
15
+ aws-sdk (2.11.33)
16
+ aws-sdk-resources (= 2.11.33)
17
+ aws-sdk-core (2.11.33)
18
18
  aws-sigv4 (~> 1.0)
19
19
  jmespath (~> 1.0)
20
- aws-sdk-resources (2.11.28)
21
- aws-sdk-core (= 2.11.28)
20
+ aws-sdk-resources (2.11.33)
21
+ aws-sdk-core (= 2.11.33)
22
22
  aws-sigv4 (1.0.2)
23
23
  diff-lcs (1.3)
24
24
  elasticsearch (6.0.2)
@@ -39,7 +39,7 @@ GEM
39
39
  aws-sdk-resources (>= 2, < 3)
40
40
  faraday (~> 0.9)
41
41
  ffi (1.9.23)
42
- jmespath (1.3.1)
42
+ jmespath (1.4.0)
43
43
  multi_json (1.13.1)
44
44
  multipart-post (2.0.0)
45
45
  rake (10.5.0)
data/README.md CHANGED
@@ -25,7 +25,7 @@ See inline coments:
25
25
  require 'es_importer'
26
26
 
27
27
  # configure client
28
- EsImporter.configure('http://localhost:9200')
28
+ EsImporter.configure('http://localhost:9200', logger: Logger.new($stdout))
29
29
 
30
30
  # generate some users
31
31
  users = (1..100).to_a.map do |i|
@@ -97,9 +97,18 @@ EsImporter.add_importer(importer)
97
97
  # create index
98
98
  EsImporter.create_index!(:users)
99
99
 
100
- # import users
100
+ # import single users
101
+ EsImporter.import(:users, users[0])
102
+
103
+ # import users sequentially
101
104
  EsImporter.import(:users, users)
102
105
 
106
+ # or import in bulk
107
+ EsImporter.import_in_bulk(:users, users)
108
+
109
+ # or just transform a document without importing it
110
+ EsImporter.transform_document(:users, users[0])
111
+
103
112
  # delete index
104
113
  EsImporter.delete_index!(:users)
105
114
 
@@ -107,7 +116,6 @@ EsImporter.delete_index!(:users)
107
116
 
108
117
  AWS elastic instance is also supported, region is extracted from url and credentials are set form ruby SDK.
109
118
 
110
-
111
119
  ## Development
112
120
 
113
121
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/lib/es_importer.rb CHANGED
@@ -46,7 +46,7 @@ module EsImporter
46
46
 
47
47
  # create index
48
48
  def create_index!(index)
49
- puts "Creating #{index} index at #{@es_uri} ..."
49
+ @logger&.debug("Creating #{index} index at #{@es_uri} ...")
50
50
 
51
51
  mapping = @importers.dig(index, :mapping)
52
52
  keywords = @importers.dig(index, :keywords)
@@ -64,7 +64,7 @@ module EsImporter
64
64
  if v.kind_of?(Symbol)
65
65
  field_def = {type: v}
66
66
  # field with ull def, ie. mapping: {user_id: {type: :text, analyzer: :my_analyzer}}
67
- elsif v.kind_of?(Hash)
67
+ elsif v.kind_of?(Hash)
68
68
  field_def = v
69
69
  end
70
70
 
@@ -86,80 +86,126 @@ module EsImporter
86
86
  @client.indices.create index: index, body: body
87
87
 
88
88
  rescue => error
89
- puts "Error creating #{index} index. #{error.class}: #{error.message}"
89
+ @logger&.debug("Error creating #{index} index. #{error.class}: #{error.message}")
90
90
  raise
91
91
  end
92
92
 
93
93
  # delete index
94
94
  def delete_index!(index)
95
- puts "Deleting #{index} index at #{@es_uri} ..."
95
+ @logger&.debug("Deleting #{index} index at #{@es_uri} ...")
96
96
  @client.indices.delete index: index
97
97
  rescue => error
98
- puts "Error deleting #{index} index. #{error.class}: #{error.message}"
98
+ @logger&.debug("Error deleting #{index} index. #{error.class}: #{error.message}")
99
99
  raise
100
100
  end
101
101
 
102
+ # transform document using converters
103
+ def transform_document(index, document)
104
+
105
+ # convert all keys to strings
106
+ document = _deep_transform_keys_in_object(document, &:to_s)
107
+
108
+ # convert keys or add new ones
109
+ @importers.dig(index, :converters)&.each do |keys, converter|
110
+ keys = keys.split('.')
111
+
112
+ # transform existing key
113
+ if value = document.dig(*keys)
114
+ document[keys.first] = converter.call(value, document) if keys.size == 1 # lvl 1
115
+ keys.first(keys.size-1).reduce(document, :fetch)[keys.last] = converter.call(value, document) if keys.size > 1 # lvl > 1
116
+
117
+ # add new key
118
+ else
119
+ missing_key_index = nil
120
+ keys.each_with_index do |key, i|
121
+ missing_key_index = i and break unless document.dig(*keys.first(i + 1))
122
+ end
123
+
124
+ tail_keys = keys[missing_key_index..-1]
125
+ tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
126
+
127
+ tail_keys.each_with_index do |key, i|
128
+ tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
129
+ tail_hash = tail_hash[tail_keys[i]]
130
+ end
131
+ end
132
+ end
133
+
134
+ # add elastic id
135
+ id_key = @importers.dig(index, :id_key)
136
+ generated_id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
137
+ generated_id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
138
+ document['es_id'] = generated_id
139
+
140
+ # transformed document
141
+ document
142
+ end
143
+
102
144
  # import documents
103
145
  def import(index, documents)
104
146
 
147
+ # accept single document
148
+ documents = [documents] if documents.is_a?(Hash)
149
+
105
150
  # import stats init
106
- start_time = Time.now
107
- failed = 0; imported = 0
151
+ start_time = Time.now
152
+ imported = {count: 0}
153
+ failed = {count: 0, items: []}
108
154
 
109
155
  # insert into elastic
110
156
  documents.each_with_index do |document, i|
111
157
 
112
- # convert all keys to strings
113
- document = _deep_transform_keys_in_object(document, &:to_s)
158
+ # transform document
159
+ transformed_document = transform_document(index, document)
114
160
 
115
- # generate id
116
- id_key = @importers.dig(index, :id_key)
117
- id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
118
- id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
161
+ # save it into index
162
+ begin
163
+ @client.index index: index, type: index.to_s.chomp('s'), id: transformed_document['es_id'], body: transformed_document
164
+ imported[:count] +=1
165
+ rescue => e
166
+ if @logger&.debug?
167
+ @logger.debug(e.class)
168
+ @logger.debug(e.message)
169
+ end
170
+ failed[:count] +=1
171
+ failed[:items] << {id: transformed_document['es_id'], error: e.message}
172
+ end
173
+ end
119
174
 
120
- # convert keys or add new ones
121
- @importers.dig(index, :converters)&.each do |keys, converter|
122
- keys = keys.split('.')
175
+ # print import statistics
176
+ if @logger&.debug?
177
+ @logger.debug(">>> #{index} import statistics")
178
+ @logger.debug("Imported: #{imported}")
179
+ @logger.debug("Failed: #{failed}")
180
+ @logger.debug("Time spent: #{Time.now - start_time} sec")
181
+ end
123
182
 
124
- # transform existing key
125
- if value = document.dig(*keys)
126
- document[keys.first] = converter.call(value, document) if keys.size == 1 # lvl 1
127
- keys.first(keys.size-1).reduce(document, :fetch)[keys.last] = converter.call(value, document) if keys.size > 1 # lvl > 1
183
+ # return stats
184
+ {imported: imported, failed: failed}
185
+ end
128
186
 
129
- # add new key
130
- else
131
- missing_key_index = nil
132
- keys.each_with_index do |key, i|
133
- missing_key_index = i and break unless document.dig(*keys.first(i + 1))
134
- end
187
+ def import_in_bulk(index, documents)
188
+ # import stats init
189
+ start_time = Time.now
135
190
 
136
- tail_keys = keys[missing_key_index..-1]
137
- tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
191
+ # transform documents and build bulk payload
192
+ transformed_documents_for_bulk = documents.map do |document|
193
+ transformed_document = transform_document(index, document)
194
+ es_id = transformed_document.delete('es_id')
195
+ {index: { _index: index, _type: index.to_s.chomp('s'), _id: es_id, data: transformed_document}}
196
+ end
138
197
 
139
- tail_keys.each_with_index do |key, i|
140
- tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
141
- tail_hash = tail_hash[tail_keys[i]]
142
- end
143
- end
144
- end
198
+ # import
199
+ resp = @client.bulk body: transformed_documents_for_bulk
145
200
 
146
- begin
147
- @client.index index: index, type: index.to_s.chomp('s'), id: id, body: document
148
- puts "##{i + 1} imported #{id}" if @logger
149
- imported = imported + 1
150
- rescue => e
151
- puts "##{i + 1} failed #{id}" if @logger
152
- puts e.class; puts e.message
153
- failed = failed + 1
154
- end
201
+ # print import statistics
202
+ if @logger&.debug?
203
+ @logger.debug(">>> #{index} import statistics")
204
+ @logger.debug("Time spent: #{Time.now - start_time} sec")
155
205
  end
156
206
 
157
- # print import statistics
158
- puts; puts "#{index} import statistics"; puts '-' * 100
159
- puts "Failed: #{failed}"
160
- puts "Imported: #{imported}"
161
- puts "Time spent: #{Time.now - start_time} sec"
162
- puts
207
+ # return deserialized es response
208
+ resp
163
209
  end
164
210
 
165
211
  # taken from https://github.com/rails/rails/blob/f213e926892020f9ab6c8974612c59e2ba959253/activesupport/lib/active_support/core_ext/hash/keys.rb#L145
@@ -1,3 +1,3 @@
1
1
  module EsImporter
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: es_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damir Roso
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-04-13 00:00:00.000000000 Z
11
+ date: 2018-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler