es_importer 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +7 -7
- data/README.md +11 -3
- data/lib/es_importer.rb +95 -49
- data/lib/es_importer/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c535d2850816bfd5e36c616057c6db755cbe27e3e99474b6149cbc74de188bc4
|
4
|
+
data.tar.gz: 984636ea2d8aaa849dfab4cdf4d156632eefd468ff4ea67a6a96228a73b9a1b3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0ad7fb8717ef68d7f61d40d09634fe1bd9df9b998e648602c9b6937efec17166dd5fea8ebdf52654ce7fc18a28e058b6668115d2813ad3fabd43e18ddd530897
|
7
|
+
data.tar.gz: 3a1194226e3063a58b66de11f4e803141fb669efc3e589815b9c282350d076c232e84b17e231a4520a56f2352eac6d19f91b22258dd2d341ba4fdf40bc6fe0b6
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
es_importer (0.
|
4
|
+
es_importer (0.4.0)
|
5
5
|
aws-sdk
|
6
6
|
elasticsearch
|
7
7
|
faraday
|
@@ -12,13 +12,13 @@ PATH
|
|
12
12
|
GEM
|
13
13
|
remote: https://rubygems.org/
|
14
14
|
specs:
|
15
|
-
aws-sdk (2.11.
|
16
|
-
aws-sdk-resources (= 2.11.
|
17
|
-
aws-sdk-core (2.11.
|
15
|
+
aws-sdk (2.11.33)
|
16
|
+
aws-sdk-resources (= 2.11.33)
|
17
|
+
aws-sdk-core (2.11.33)
|
18
18
|
aws-sigv4 (~> 1.0)
|
19
19
|
jmespath (~> 1.0)
|
20
|
-
aws-sdk-resources (2.11.
|
21
|
-
aws-sdk-core (= 2.11.
|
20
|
+
aws-sdk-resources (2.11.33)
|
21
|
+
aws-sdk-core (= 2.11.33)
|
22
22
|
aws-sigv4 (1.0.2)
|
23
23
|
diff-lcs (1.3)
|
24
24
|
elasticsearch (6.0.2)
|
@@ -39,7 +39,7 @@ GEM
|
|
39
39
|
aws-sdk-resources (>= 2, < 3)
|
40
40
|
faraday (~> 0.9)
|
41
41
|
ffi (1.9.23)
|
42
|
-
jmespath (1.
|
42
|
+
jmespath (1.4.0)
|
43
43
|
multi_json (1.13.1)
|
44
44
|
multipart-post (2.0.0)
|
45
45
|
rake (10.5.0)
|
data/README.md
CHANGED
@@ -25,7 +25,7 @@ See inline coments:
|
|
25
25
|
require 'es_importer'
|
26
26
|
|
27
27
|
# configure client
|
28
|
-
EsImporter.configure('http://localhost:9200')
|
28
|
+
EsImporter.configure('http://localhost:9200', logger: Logger.new($stdout))
|
29
29
|
|
30
30
|
# generate some users
|
31
31
|
users = (1..100).to_a.map do |i|
|
@@ -97,9 +97,18 @@ EsImporter.add_importer(importer)
|
|
97
97
|
# create index
|
98
98
|
EsImporter.create_index!(:users)
|
99
99
|
|
100
|
-
# import users
|
100
|
+
# import single users
|
101
|
+
EsImporter.import(:users, users[0])
|
102
|
+
|
103
|
+
# import users sequentially
|
101
104
|
EsImporter.import(:users, users)
|
102
105
|
|
106
|
+
# or import in bulk
|
107
|
+
EsImporter.import_in_bulk(:users, users)
|
108
|
+
|
109
|
+
# or just transform a document without importing it
|
110
|
+
EsImporter.transform_document(:users, users[0])
|
111
|
+
|
103
112
|
# delete index
|
104
113
|
EsImporter.delete_index!(:users)
|
105
114
|
|
@@ -107,7 +116,6 @@ EsImporter.delete_index!(:users)
|
|
107
116
|
|
108
117
|
AWS elastic instance is also supported, region is extracted from url and credentials are set form ruby SDK.
|
109
118
|
|
110
|
-
|
111
119
|
## Development
|
112
120
|
|
113
121
|
After checking out the repo, run `bin/setup` to install dependencies. Then, run `rake spec` to run the tests. You can also run `bin/console` for an interactive prompt that will allow you to experiment.
|
data/lib/es_importer.rb
CHANGED
@@ -46,7 +46,7 @@ module EsImporter
|
|
46
46
|
|
47
47
|
# create index
|
48
48
|
def create_index!(index)
|
49
|
-
|
49
|
+
@logger&.debug("Creating #{index} index at #{@es_uri} ...")
|
50
50
|
|
51
51
|
mapping = @importers.dig(index, :mapping)
|
52
52
|
keywords = @importers.dig(index, :keywords)
|
@@ -64,7 +64,7 @@ module EsImporter
|
|
64
64
|
if v.kind_of?(Symbol)
|
65
65
|
field_def = {type: v}
|
66
66
|
# field with ull def, ie. mapping: {user_id: {type: :text, analyzer: :my_analyzer}}
|
67
|
-
|
67
|
+
elsif v.kind_of?(Hash)
|
68
68
|
field_def = v
|
69
69
|
end
|
70
70
|
|
@@ -86,80 +86,126 @@ module EsImporter
|
|
86
86
|
@client.indices.create index: index, body: body
|
87
87
|
|
88
88
|
rescue => error
|
89
|
-
|
89
|
+
@logger&.debug("Error creating #{index} index. #{error.class}: #{error.message}")
|
90
90
|
raise
|
91
91
|
end
|
92
92
|
|
93
93
|
# delete index
|
94
94
|
def delete_index!(index)
|
95
|
-
|
95
|
+
@logger&.debug("Deleting #{index} index at #{@es_uri} ...")
|
96
96
|
@client.indices.delete index: index
|
97
97
|
rescue => error
|
98
|
-
|
98
|
+
@logger&.debug("Error deleting #{index} index. #{error.class}: #{error.message}")
|
99
99
|
raise
|
100
100
|
end
|
101
101
|
|
102
|
+
# transform document using converters
|
103
|
+
def transform_document(index, document)
|
104
|
+
|
105
|
+
# convert all keys to strings
|
106
|
+
document = _deep_transform_keys_in_object(document, &:to_s)
|
107
|
+
|
108
|
+
# convert keys or add new ones
|
109
|
+
@importers.dig(index, :converters)&.each do |keys, converter|
|
110
|
+
keys = keys.split('.')
|
111
|
+
|
112
|
+
# transform existing key
|
113
|
+
if value = document.dig(*keys)
|
114
|
+
document[keys.first] = converter.call(value, document) if keys.size == 1 # lvl 1
|
115
|
+
keys.first(keys.size-1).reduce(document, :fetch)[keys.last] = converter.call(value, document) if keys.size > 1 # lvl > 1
|
116
|
+
|
117
|
+
# add new key
|
118
|
+
else
|
119
|
+
missing_key_index = nil
|
120
|
+
keys.each_with_index do |key, i|
|
121
|
+
missing_key_index = i and break unless document.dig(*keys.first(i + 1))
|
122
|
+
end
|
123
|
+
|
124
|
+
tail_keys = keys[missing_key_index..-1]
|
125
|
+
tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
|
126
|
+
|
127
|
+
tail_keys.each_with_index do |key, i|
|
128
|
+
tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
|
129
|
+
tail_hash = tail_hash[tail_keys[i]]
|
130
|
+
end
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# add elastic id
|
135
|
+
id_key = @importers.dig(index, :id_key)
|
136
|
+
generated_id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
|
137
|
+
generated_id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
|
138
|
+
document['es_id'] = generated_id
|
139
|
+
|
140
|
+
# transformed document
|
141
|
+
document
|
142
|
+
end
|
143
|
+
|
102
144
|
# import documents
|
103
145
|
def import(index, documents)
|
104
146
|
|
147
|
+
# accept single document
|
148
|
+
documents = [documents] if documents.is_a?(Hash)
|
149
|
+
|
105
150
|
# import stats init
|
106
|
-
start_time
|
107
|
-
|
151
|
+
start_time = Time.now
|
152
|
+
imported = {count: 0}
|
153
|
+
failed = {count: 0, items: []}
|
108
154
|
|
109
155
|
# insert into elastic
|
110
156
|
documents.each_with_index do |document, i|
|
111
157
|
|
112
|
-
#
|
113
|
-
|
158
|
+
# transform document
|
159
|
+
transformed_document = transform_document(index, document)
|
114
160
|
|
115
|
-
#
|
116
|
-
|
117
|
-
|
118
|
-
|
161
|
+
# save it into index
|
162
|
+
begin
|
163
|
+
@client.index index: index, type: index.to_s.chomp('s'), id: transformed_document['es_id'], body: transformed_document
|
164
|
+
imported[:count] +=1
|
165
|
+
rescue => e
|
166
|
+
if @logger&.debug?
|
167
|
+
@logger.debug(e.class)
|
168
|
+
@logger.debug(e.message)
|
169
|
+
end
|
170
|
+
failed[:count] +=1
|
171
|
+
failed[:items] << {id: transformed_document['es_id'], error: e.message}
|
172
|
+
end
|
173
|
+
end
|
119
174
|
|
120
|
-
|
121
|
-
|
122
|
-
|
175
|
+
# print import statistics
|
176
|
+
if @logger&.debug?
|
177
|
+
@logger.debug(">>> #{index} import statistics")
|
178
|
+
@logger.debug("Imported: #{imported}")
|
179
|
+
@logger.debug("Failed: #{failed}")
|
180
|
+
@logger.debug("Time spent: #{Time.now - start_time} sec")
|
181
|
+
end
|
123
182
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
keys.first(keys.size-1).reduce(document, :fetch)[keys.last] = converter.call(value, document) if keys.size > 1 # lvl > 1
|
183
|
+
# return stats
|
184
|
+
{imported: imported, failed: failed}
|
185
|
+
end
|
128
186
|
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
keys.each_with_index do |key, i|
|
133
|
-
missing_key_index = i and break unless document.dig(*keys.first(i + 1))
|
134
|
-
end
|
187
|
+
def import_in_bulk(index, documents)
|
188
|
+
# import stats init
|
189
|
+
start_time = Time.now
|
135
190
|
|
136
|
-
|
137
|
-
|
191
|
+
# transform documents and build bulk payload
|
192
|
+
transformed_documents_for_bulk = documents.map do |document|
|
193
|
+
transformed_document = transform_document(index, document)
|
194
|
+
es_id = transformed_document.delete('es_id')
|
195
|
+
{index: { _index: index, _type: index.to_s.chomp('s'), _id: es_id, data: transformed_document}}
|
196
|
+
end
|
138
197
|
|
139
|
-
|
140
|
-
|
141
|
-
tail_hash = tail_hash[tail_keys[i]]
|
142
|
-
end
|
143
|
-
end
|
144
|
-
end
|
198
|
+
# import
|
199
|
+
resp = @client.bulk body: transformed_documents_for_bulk
|
145
200
|
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
rescue => e
|
151
|
-
puts "##{i + 1} failed #{id}" if @logger
|
152
|
-
puts e.class; puts e.message
|
153
|
-
failed = failed + 1
|
154
|
-
end
|
201
|
+
# print import statistics
|
202
|
+
if @logger&.debug?
|
203
|
+
@logger.debug(">>> #{index} import statistics")
|
204
|
+
@logger.debug("Time spent: #{Time.now - start_time} sec")
|
155
205
|
end
|
156
206
|
|
157
|
-
#
|
158
|
-
|
159
|
-
puts "Failed: #{failed}"
|
160
|
-
puts "Imported: #{imported}"
|
161
|
-
puts "Time spent: #{Time.now - start_time} sec"
|
162
|
-
puts
|
207
|
+
# return deserialized es response
|
208
|
+
resp
|
163
209
|
end
|
164
210
|
|
165
211
|
# taken from https://github.com/rails/rails/blob/f213e926892020f9ab6c8974612c59e2ba959253/activesupport/lib/active_support/core_ext/hash/keys.rb#L145
|
data/lib/es_importer/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: es_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Damir Roso
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-04-
|
11
|
+
date: 2018-04-27 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|