es_importer 0.3.0 → 0.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -7
- data/README.md +11 -3
- data/lib/es_importer.rb +95 -49
- data/lib/es_importer/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c535d2850816bfd5e36c616057c6db755cbe27e3e99474b6149cbc74de188bc4
|
4
|
+
data.tar.gz: 984636ea2d8aaa849dfab4cdf4d156632eefd468ff4ea67a6a96228a73b9a1b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ad7fb8717ef68d7f61d40d09634fe1bd9df9b998e648602c9b6937efec17166dd5fea8ebdf52654ce7fc18a28e058b6668115d2813ad3fabd43e18ddd530897
|
7
|
+
data.tar.gz: 3a1194226e3063a58b66de11f4e803141fb669efc3e589815b9c282350d076c232e84b17e231a4520a56f2352eac6d19f91b22258dd2d341ba4fdf40bc6fe0b6
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
es_importer (0.
|
4
|
+
es_importer (0.4.0)
|
5
5
|
aws-sdk
|
6
6
|
elasticsearch
|
7
7
|
faraday
|
@@ -12,13 +12,13 @@ PATH
|
|
12
12
|
GEM
|
13
13
|
remote: https://rubygems.org/
|
14
14
|
specs:
|
15
|
-
aws-sdk (2.11.
|
16
|
-
aws-sdk-resources (= 2.11.
|
17
|
-
aws-sdk-core (2.11.
|
15
|
+
aws-sdk (2.11.33)
|
16
|
+
aws-sdk-resources (= 2.11.33)
|
17
|
+
aws-sdk-core (2.11.33)
|
18
18
|
aws-sigv4 (~> 1.0)
|
19
19
|
jmespath (~> 1.0)
|
20
|
-
aws-sdk-resources (2.11.
|
21
|
-
aws-sdk-core (= 2.11.
|
20
|
+
aws-sdk-resources (2.11.33)
|
21
|
+
aws-sdk-core (= 2.11.33)
|
22
22
|
aws-sigv4 (1.0.2)
|
23
23
|
diff-lcs (1.3)
|
24
24
|
elasticsearch (6.0.2)
|
@@ -39,7 +39,7 @@ GEM
|
|
39
39
|
aws-sdk-resources (>= 2, < 3)
|
40
40
|
faraday (~> 0.9)
|
41
41
|
ffi (1.9.23)
|
42
|
-
jmespath (1.
|
42
|
+
jmespath (1.4.0)
|
43
43
|
multi_json (1.13.1)
|
44
44
|
multipart-post (2.0.0)
|
45
45
|
rake (10.5.0)
|
data/README.md
CHANGED
@@ -25,7 +25,7 @@ See inline coments:
|
|
25
25
|
require 'es_importer'
|
26
26
|
|
27
27
|
# configure client
|
28
|
-
EsImporter.configure('http://localhost:9200')
|
28
|
+
EsImporter.configure('http://localhost:9200', logger: Logger.new($stdout))
|
29
29
|
|
30
30
|
# generate some users
|
31
31
|
users = (1..100).to_a.map do |i|
|
@@ -97,9 +97,18 @@ EsImporter.add_importer(importer)
|
|
97
97
|
# create index
|
98
98
|
EsImporter.create_index!(:users)
|
99
99
|
|
100
|
-
# import users
|
100
|
+
# import single users
|
101
|
+
EsImporter.import(:users, users[0])
|
102
|
+
|
103
|
+
# import users sequentially
|
101
104
|
EsImporter.import(:users, users)
|
102
105
|
|
106
|
+
# or import in bulk
|
107
|
+
EsImporter.import_in_bulk(:users, users)
|
108
|
+
|
109
|
+
# or just transform a document without importing it
|
110
|
+
EsImporter.transform_document(:users, users[0])
|
111
|
+
|
103
112
|
# delete index
|
104
113
|
EsImporter.delete_index!(:users)
|
105
114
|
|
@@ -107,7 +116,6 @@ EsImporter.delete_index!(:users)
|
|
107
116
|
|
108
117
|
AWS elastic instance is also supported, region is extracted from url and credentials are set form ruby SDK.
|
109
118
|
|
110
|
-
|
111
119
|
## Development
|
112
120
|
|
113
121
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/es_importer.rb
CHANGED
@@ -46,7 +46,7 @@ module EsImporter
|
|
46
46
|
|
47
47
|
# create index
|
48
48
|
def create_index!(index)
|
49
|
-
|
49
|
+
@logger&.debug("Creating #{index} index at #{@es_uri} ...")
|
50
50
|
|
51
51
|
mapping = @importers.dig(index, :mapping)
|
52
52
|
keywords = @importers.dig(index, :keywords)
|
@@ -64,7 +64,7 @@ module EsImporter
|
|
64
64
|
if v.kind_of?(Symbol)
|
65
65
|
field_def = {type: v}
|
66
66
|
# field with ull def, ie. mapping: {user_id: {type: :text, analyzer: :my_analyzer}}
|
67
|
-
|
67
|
+
elsif v.kind_of?(Hash)
|
68
68
|
field_def = v
|
69
69
|
end
|
70
70
|
|
@@ -86,80 +86,126 @@ module EsImporter
|
|
86
86
|
@client.indices.create index: index, body: body
|
87
87
|
|
88
88
|
rescue => error
|
89
|
-
|
89
|
+
@logger&.debug("Error creating #{index} index. #{error.class}: #{error.message}")
|
90
90
|
raise
|
91
91
|
end
|
92
92
|
|
93
93
|
# delete index
|
94
94
|
def delete_index!(index)
|
95
|
-
|
95
|
+
@logger&.debug("Deleting #{index} index at #{@es_uri} ...")
|
96
96
|
@client.indices.delete index: index
|
97
97
|
rescue => error
|
98
|
-
|
98
|
+
@logger&.debug("Error deleting #{index} index. #{error.class}: #{error.message}")
|
99
99
|
raise
|
100
100
|
end
|
101
101
|
|
102
|
+
# transform document using converters
|
103
|
+
def transform_document(index, document)
|
104
|
+
|
105
|
+
# convert all keys to strings
|
106
|
+
document = _deep_transform_keys_in_object(document, &:to_s)
|
107
|
+
|
108
|
+
# convert keys or add new ones
|
109
|
+
@importers.dig(index, :converters)&.each do |keys, converter|
|
110
|
+
keys = keys.split('.')
|
111
|
+
|
112
|
+
# transform existing key
|
113
|
+
if value = document.dig(*keys)
|
114
|
+
document[keys.first] = converter.call(value, document) if keys.size == 1 # lvl 1
|
115
|
+
keys.first(keys.size-1).reduce(document, :fetch)[keys.last] = converter.call(value, document) if keys.size > 1 # lvl > 1
|
116
|
+
|
117
|
+
# add new key
|
118
|
+
else
|
119
|
+
missing_key_index = nil
|
120
|
+
keys.each_with_index do |key, i|
|
121
|
+
missing_key_index = i and break unless document.dig(*keys.first(i + 1))
|
122
|
+
end
|
123
|
+
|
124
|
+
tail_keys = keys[missing_key_index..-1]
|
125
|
+
tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
|
126
|
+
|
127
|
+
tail_keys.each_with_index do |key, i|
|
128
|
+
tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
|
129
|
+
tail_hash = tail_hash[tail_keys[i]]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# add elastic id
|
135
|
+
id_key = @importers.dig(index, :id_key)
|
136
|
+
generated_id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
|
137
|
+
generated_id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
|
138
|
+
document['es_id'] = generated_id
|
139
|
+
|
140
|
+
# transformed document
|
141
|
+
document
|
142
|
+
end
|
143
|
+
|
102
144
|
# import documents
|
103
145
|
def import(index, documents)
|
104
146
|
|
147
|
+
# accept single document
|
148
|
+
documents = [documents] if documents.is_a?(Hash)
|
149
|
+
|
105
150
|
# import stats init
|
106
|
-
start_time
|
107
|
-
|
151
|
+
start_time = Time.now
|
152
|
+
imported = {count: 0}
|
153
|
+
failed = {count: 0, items: []}
|
108
154
|
|
109
155
|
# insert into elastic
|
110
156
|
documents.each_with_index do |document, i|
|
111
157
|
|
112
|
-
#
|
113
|
-
|
158
|
+
# transform document
|
159
|
+
transformed_document = transform_document(index, document)
|
114
160
|
|
115
|
-
#
|
116
|
-
|
117
|
-
|
118
|
-
|
161
|
+
# save it into index
|
162
|
+
begin
|
163
|
+
@client.index index: index, type: index.to_s.chomp('s'), id: transformed_document['es_id'], body: transformed_document
|
164
|
+
imported[:count] +=1
|
165
|
+
rescue => e
|
166
|
+
if @logger&.debug?
|
167
|
+
@logger.debug(e.class)
|
168
|
+
@logger.debug(e.message)
|
169
|
+
end
|
170
|
+
failed[:count] +=1
|
171
|
+
failed[:items] << {id: transformed_document['es_id'], error: e.message}
|
172
|
+
end
|
173
|
+
end
|
119
174
|
|
120
|
-
|
121
|
-
|
122
|
-
|
175
|
+
# print import statistics
|
176
|
+
if @logger&.debug?
|
177
|
+
@logger.debug(">>> #{index} import statistics")
|
178
|
+
@logger.debug("Imported: #{imported}")
|
179
|
+
@logger.debug("Failed: #{failed}")
|
180
|
+
@logger.debug("Time spent: #{Time.now - start_time} sec")
|
181
|
+
end
|
123
182
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
keys.first(keys.size-1).reduce(document, :fetch)[keys.last] = converter.call(value, document) if keys.size > 1 # lvl > 1
|
183
|
+
# return stats
|
184
|
+
{imported: imported, failed: failed}
|
185
|
+
end
|
128
186
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
keys.each_with_index do |key, i|
|
133
|
-
missing_key_index = i and break unless document.dig(*keys.first(i + 1))
|
134
|
-
end
|
187
|
+
def import_in_bulk(index, documents)
|
188
|
+
# import stats init
|
189
|
+
start_time = Time.now
|
135
190
|
|
136
|
-
|
137
|
-
|
191
|
+
# transform documents and build bulk payload
|
192
|
+
transformed_documents_for_bulk = documents.map do |document|
|
193
|
+
transformed_document = transform_document(index, document)
|
194
|
+
es_id = transformed_document.delete('es_id')
|
195
|
+
{index: { _index: index, _type: index.to_s.chomp('s'), _id: es_id, data: transformed_document}}
|
196
|
+
end
|
138
197
|
|
139
|
-
|
140
|
-
|
141
|
-
tail_hash = tail_hash[tail_keys[i]]
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
198
|
+
# import
|
199
|
+
resp = @client.bulk body: transformed_documents_for_bulk
|
145
200
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
rescue => e
|
151
|
-
puts "##{i + 1} failed #{id}" if @logger
|
152
|
-
puts e.class; puts e.message
|
153
|
-
failed = failed + 1
|
154
|
-
end
|
201
|
+
# print import statistics
|
202
|
+
if @logger&.debug?
|
203
|
+
@logger.debug(">>> #{index} import statistics")
|
204
|
+
@logger.debug("Time spent: #{Time.now - start_time} sec")
|
155
205
|
end
|
156
206
|
|
157
|
-
#
|
158
|
-
|
159
|
-
puts "Failed: #{failed}"
|
160
|
-
puts "Imported: #{imported}"
|
161
|
-
puts "Time spent: #{Time.now - start_time} sec"
|
162
|
-
puts
|
207
|
+
# return deserialized es response
|
208
|
+
resp
|
163
209
|
end
|
164
210
|
|
165
211
|
# taken from https://github.com/rails/rails/blob/f213e926892020f9ab6c8974612c59e2ba959253/activesupport/lib/active_support/core_ext/hash/keys.rb#L145
|
data/lib/es_importer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: es_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Damir Roso
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|