es_importer 0.3.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f5139f9596f419bb2e9be23229becb9ff26c180c38e8f2f4a1b3bcaf8f2fb977
4
- data.tar.gz: a3ee1f507b0ffeb95369300baad2cde7af60e10aff67fe1deae74984690c8349
3
+ metadata.gz: c535d2850816bfd5e36c616057c6db755cbe27e3e99474b6149cbc74de188bc4
4
+ data.tar.gz: 984636ea2d8aaa849dfab4cdf4d156632eefd468ff4ea67a6a96228a73b9a1b3
5
5
  SHA512:
6
- metadata.gz: 1ffe0e7c95b2c3f79eb1a97218c478863a32bd4bcd97d1fcff7db61ecec987cbb747dd53ed00d72458be77f40e03b2003fc8d44cabb2b76fde323d55841f75e3
7
- data.tar.gz: '084ebc7fbac313071e5ebd9f054d1c133407260d56518bfd20aeb633369f4f944fd81543240662a1e95d7c1d634a5d64d148733cd2f4f1cce67a03b598a6fe56'
6
+ metadata.gz: 0ad7fb8717ef68d7f61d40d09634fe1bd9df9b998e648602c9b6937efec17166dd5fea8ebdf52654ce7fc18a28e058b6668115d2813ad3fabd43e18ddd530897
7
+ data.tar.gz: 3a1194226e3063a58b66de11f4e803141fb669efc3e589815b9c282350d076c232e84b17e231a4520a56f2352eac6d19f91b22258dd2d341ba4fdf40bc6fe0b6
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- es_importer (0.3.0)
4
+ es_importer (0.4.0)
5
5
  aws-sdk
6
6
  elasticsearch
7
7
  faraday
@@ -12,13 +12,13 @@ PATH
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- aws-sdk (2.11.28)
16
- aws-sdk-resources (= 2.11.28)
17
- aws-sdk-core (2.11.28)
15
+ aws-sdk (2.11.33)
16
+ aws-sdk-resources (= 2.11.33)
17
+ aws-sdk-core (2.11.33)
18
18
  aws-sigv4 (~> 1.0)
19
19
  jmespath (~> 1.0)
20
- aws-sdk-resources (2.11.28)
21
- aws-sdk-core (= 2.11.28)
20
+ aws-sdk-resources (2.11.33)
21
+ aws-sdk-core (= 2.11.33)
22
22
  aws-sigv4 (1.0.2)
23
23
  diff-lcs (1.3)
24
24
  elasticsearch (6.0.2)
@@ -39,7 +39,7 @@ GEM
39
39
  aws-sdk-resources (>= 2, < 3)
40
40
  faraday (~> 0.9)
41
41
  ffi (1.9.23)
42
- jmespath (1.3.1)
42
+ jmespath (1.4.0)
43
43
  multi_json (1.13.1)
44
44
  multipart-post (2.0.0)
45
45
  rake (10.5.0)
data/README.md CHANGED
@@ -25,7 +25,7 @@ See inline coments:
25
25
  require 'es_importer'
26
26
 
27
27
  # configure client
28
- EsImporter.configure('http://localhost:9200')
28
+ EsImporter.configure('http://localhost:9200', logger: Logger.new($stdout))
29
29
 
30
30
  # generate some users
31
31
  users = (1..100).to_a.map do |i|
@@ -97,9 +97,18 @@ EsImporter.add_importer(importer)
97
97
  # create index
98
98
  EsImporter.create_index!(:users)
99
99
 
100
- # import users
100
+ # import single users
101
+ EsImporter.import(:users, users[0])
102
+
103
+ # import users sequentially
101
104
  EsImporter.import(:users, users)
102
105
 
106
+ # or import in bulk
107
+ EsImporter.import_in_bulk(:users, users)
108
+
109
+ # or just transform a document without importing it
110
+ EsImporter.transform_document(:users, users[0])
111
+
103
112
  # delete index
104
113
  EsImporter.delete_index!(:users)
105
114
 
@@ -107,7 +116,6 @@ EsImporter.delete_index!(:users)
107
116
 
108
117
  AWS elastic instance is also supported, region is extracted from url and credentials are set form ruby SDK.
109
118
 
110
-
111
119
  ## Development
112
120
 
113
121
  After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
data/lib/es_importer.rb CHANGED
@@ -46,7 +46,7 @@ module EsImporter
46
46
 
47
47
  # create index
48
48
  def create_index!(index)
49
- puts "Creating #{index} index at #{@es_uri} ..."
49
+ @logger&.debug("Creating #{index} index at #{@es_uri} ...")
50
50
 
51
51
  mapping = @importers.dig(index, :mapping)
52
52
  keywords = @importers.dig(index, :keywords)
@@ -64,7 +64,7 @@ module EsImporter
64
64
  if v.kind_of?(Symbol)
65
65
  field_def = {type: v}
66
66
  # field with ull def, ie. mapping: {user_id: {type: :text, analyzer: :my_analyzer}}
67
- elsif v.kind_of?(Hash)
67
+ elsif v.kind_of?(Hash)
68
68
  field_def = v
69
69
  end
70
70
 
@@ -86,80 +86,126 @@ module EsImporter
86
86
  @client.indices.create index: index, body: body
87
87
 
88
88
  rescue => error
89
- puts "Error creating #{index} index. #{error.class}: #{error.message}"
89
+ @logger&.debug("Error creating #{index} index. #{error.class}: #{error.message}")
90
90
  raise
91
91
  end
92
92
 
93
93
  # delete index
94
94
  def delete_index!(index)
95
- puts "Deleting #{index} index at #{@es_uri} ..."
95
+ @logger&.debug("Deleting #{index} index at #{@es_uri} ...")
96
96
  @client.indices.delete index: index
97
97
  rescue => error
98
- puts "Error deleting #{index} index. #{error.class}: #{error.message}"
98
+ @logger&.debug("Error deleting #{index} index. #{error.class}: #{error.message}")
99
99
  raise
100
100
  end
101
101
 
102
+ # transform document using converters
103
+ def transform_document(index, document)
104
+
105
+ # convert all keys to strings
106
+ document = _deep_transform_keys_in_object(document, &:to_s)
107
+
108
+ # convert keys or add new ones
109
+ @importers.dig(index, :converters)&.each do |keys, converter|
110
+ keys = keys.split('.')
111
+
112
+ # transform existing key
113
+ if value = document.dig(*keys)
114
+ document[keys.first] = converter.call(value, document) if keys.size == 1 # lvl 1
115
+ keys.first(keys.size-1).reduce(document, :fetch)[keys.last] = converter.call(value, document) if keys.size > 1 # lvl > 1
116
+
117
+ # add new key
118
+ else
119
+ missing_key_index = nil
120
+ keys.each_with_index do |key, i|
121
+ missing_key_index = i and break unless document.dig(*keys.first(i + 1))
122
+ end
123
+
124
+ tail_keys = keys[missing_key_index..-1]
125
+ tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
126
+
127
+ tail_keys.each_with_index do |key, i|
128
+ tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
129
+ tail_hash = tail_hash[tail_keys[i]]
130
+ end
131
+ end
132
+ end
133
+
134
+ # add elastic id
135
+ id_key = @importers.dig(index, :id_key)
136
+ generated_id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
137
+ generated_id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
138
+ document['es_id'] = generated_id
139
+
140
+ # transformed document
141
+ document
142
+ end
143
+
102
144
  # import documents
103
145
  def import(index, documents)
104
146
 
147
+ # accept single document
148
+ documents = [documents] if documents.is_a?(Hash)
149
+
105
150
  # import stats init
106
- start_time = Time.now
107
- failed = 0; imported = 0
151
+ start_time = Time.now
152
+ imported = {count: 0}
153
+ failed = {count: 0, items: []}
108
154
 
109
155
  # insert into elastic
110
156
  documents.each_with_index do |document, i|
111
157
 
112
- # convert all keys to strings
113
- document = _deep_transform_keys_in_object(document, &:to_s)
158
+ # transform document
159
+ transformed_document = transform_document(index, document)
114
160
 
115
- # generate id
116
- id_key = @importers.dig(index, :id_key)
117
- id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
118
- id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
161
+ # save it into index
162
+ begin
163
+ @client.index index: index, type: index.to_s.chomp('s'), id: transformed_document['es_id'], body: transformed_document
164
+ imported[:count] +=1
165
+ rescue => e
166
+ if @logger&.debug?
167
+ @logger.debug(e.class)
168
+ @logger.debug(e.message)
169
+ end
170
+ failed[:count] +=1
171
+ failed[:items] << {id: transformed_document['es_id'], error: e.message}
172
+ end
173
+ end
119
174
 
120
- # convert keys or add new ones
121
- @importers.dig(index, :converters)&.each do |keys, converter|
122
- keys = keys.split('.')
175
+ # print import statistics
176
+ if @logger&.debug?
177
+ @logger.debug(">>> #{index} import statistics")
178
+ @logger.debug("Imported: #{imported}")
179
+ @logger.debug("Failed: #{failed}")
180
+ @logger.debug("Time spent: #{Time.now - start_time} sec")
181
+ end
123
182
 
124
- # transform existing key
125
- if value = document.dig(*keys)
126
- document[keys.first] = converter.call(value, document) if keys.size == 1 # lvl 1
127
- keys.first(keys.size-1).reduce(document, :fetch)[keys.last] = converter.call(value, document) if keys.size > 1 # lvl > 1
183
+ # return stats
184
+ {imported: imported, failed: failed}
185
+ end
128
186
 
129
- # add new key
130
- else
131
- missing_key_index = nil
132
- keys.each_with_index do |key, i|
133
- missing_key_index = i and break unless document.dig(*keys.first(i + 1))
134
- end
187
+ def import_in_bulk(index, documents)
188
+ # import stats init
189
+ start_time = Time.now
135
190
 
136
- tail_keys = keys[missing_key_index..-1]
137
- tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
191
+ # transform documents and build bulk payload
192
+ transformed_documents_for_bulk = documents.map do |document|
193
+ transformed_document = transform_document(index, document)
194
+ es_id = transformed_document.delete('es_id')
195
+ {index: { _index: index, _type: index.to_s.chomp('s'), _id: es_id, data: transformed_document}}
196
+ end
138
197
 
139
- tail_keys.each_with_index do |key, i|
140
- tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
141
- tail_hash = tail_hash[tail_keys[i]]
142
- end
143
- end
144
- end
198
+ # import
199
+ resp = @client.bulk body: transformed_documents_for_bulk
145
200
 
146
- begin
147
- @client.index index: index, type: index.to_s.chomp('s'), id: id, body: document
148
- puts "##{i + 1} imported #{id}" if @logger
149
- imported = imported + 1
150
- rescue => e
151
- puts "##{i + 1} failed #{id}" if @logger
152
- puts e.class; puts e.message
153
- failed = failed + 1
154
- end
201
+ # print import statistics
202
+ if @logger&.debug?
203
+ @logger.debug(">>> #{index} import statistics")
204
+ @logger.debug("Time spent: #{Time.now - start_time} sec")
155
205
  end
156
206
 
157
- # print import statistics
158
- puts; puts "#{index} import statistics"; puts '-' * 100
159
- puts "Failed: #{failed}"
160
- puts "Imported: #{imported}"
161
- puts "Time spent: #{Time.now - start_time} sec"
162
- puts
207
+ # return deserialized es response
208
+ resp
163
209
  end
164
210
 
165
211
  # taken from https://github.com/rails/rails/blob/f213e926892020f9ab6c8974612c59e2ba959253/activesupport/lib/active_support/core_ext/hash/keys.rb#L145
@@ -1,3 +1,3 @@
1
1
  module EsImporter
2
- VERSION = "0.3.0"
2
+ VERSION = "0.4.0"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: es_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damir Roso
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-04-13 00:00:00.000000000 Z
11
+ date: 2018-04-27 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler