es_importer 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +5 -5
- data/Gemfile.lock +6 -6
- data/README.md +17 -5
- data/lib/es_importer/version.rb +1 -1
- data/lib/es_importer.rb +56 -26
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f5139f9596f419bb2e9be23229becb9ff26c180c38e8f2f4a1b3bcaf8f2fb977
|
4
|
+
data.tar.gz: a3ee1f507b0ffeb95369300baad2cde7af60e10aff67fe1deae74984690c8349
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ffe0e7c95b2c3f79eb1a97218c478863a32bd4bcd97d1fcff7db61ecec987cbb747dd53ed00d72458be77f40e03b2003fc8d44cabb2b76fde323d55841f75e3
|
7
|
+
data.tar.gz: '084ebc7fbac313071e5ebd9f054d1c133407260d56518bfd20aeb633369f4f944fd81543240662a1e95d7c1d634a5d64d148733cd2f4f1cce67a03b598a6fe56'
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
es_importer (0.
|
4
|
+
es_importer (0.3.0)
|
5
5
|
aws-sdk
|
6
6
|
elasticsearch
|
7
7
|
faraday
|
@@ -12,13 +12,13 @@ PATH
|
|
12
12
|
GEM
|
13
13
|
remote: https://rubygems.org/
|
14
14
|
specs:
|
15
|
-
aws-sdk (2.11.
|
16
|
-
aws-sdk-resources (= 2.11.
|
17
|
-
aws-sdk-core (2.11.
|
15
|
+
aws-sdk (2.11.28)
|
16
|
+
aws-sdk-resources (= 2.11.28)
|
17
|
+
aws-sdk-core (2.11.28)
|
18
18
|
aws-sigv4 (~> 1.0)
|
19
19
|
jmespath (~> 1.0)
|
20
|
-
aws-sdk-resources (2.11.
|
21
|
-
aws-sdk-core (= 2.11.
|
20
|
+
aws-sdk-resources (2.11.28)
|
21
|
+
aws-sdk-core (= 2.11.28)
|
22
22
|
aws-sigv4 (1.0.2)
|
23
23
|
diff-lcs (1.3)
|
24
24
|
elasticsearch (6.0.2)
|
data/README.md
CHANGED
@@ -42,27 +42,39 @@ users = (1..100).to_a.map do |i|
|
|
42
42
|
}
|
43
43
|
end
|
44
44
|
|
45
|
-
# define
|
45
|
+
# define mapping and settings for index
|
46
|
+
# provide conversion procs to transform the document
|
46
47
|
importer = {
|
47
48
|
|
48
49
|
# name of the index
|
50
|
+
# NOTE: type will be set to singular form of index name, ie. users => user
|
49
51
|
users: {
|
50
52
|
|
51
53
|
# build id from single or multiple keys
|
52
54
|
id_key: [:user_id, :created_at],
|
53
55
|
|
54
56
|
# define index mapping
|
57
|
+
# supply a type for a field or a hash with field definition
|
55
58
|
mapping: {
|
56
|
-
user_id: :
|
59
|
+
user_id: :integer,
|
57
60
|
active: :boolean,
|
58
|
-
email: :text,
|
61
|
+
email: {type: :text, analyzer: :my_analyzer},
|
59
62
|
created_at: :date,
|
60
63
|
country_code: :text
|
61
64
|
},
|
62
65
|
|
66
|
+
# shortcut to set keywords to fields
|
63
67
|
# keyword generated is 'country_code.keyword'
|
64
68
|
keywords: [:country_code],
|
65
69
|
|
70
|
+
# index settings - add a custom analyzer with a ngram filter
|
71
|
+
settings: {
|
72
|
+
analysis: {
|
73
|
+
filter: {my_filter: {type: :ngram, min_gram: 3, max_gram: 4}},
|
74
|
+
analyzer: {my_analyzer: {type: :custom, tokenizer: :standard, filter: [:lowercase, :my_filter]}}
|
75
|
+
}
|
76
|
+
},
|
77
|
+
|
66
78
|
converters: {
|
67
79
|
# downcase existing field value, second argument is document being processed
|
68
80
|
'email' => Proc.new{|attr, _| attr.downcase},
|
@@ -104,7 +116,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
104
116
|
|
105
117
|
## Contributing
|
106
118
|
|
107
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
119
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/damir/es_importer. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
108
120
|
|
109
121
|
## License
|
110
122
|
|
@@ -112,4 +124,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
112
124
|
|
113
125
|
## Code of Conduct
|
114
126
|
|
115
|
-
Everyone interacting in the EsImporter project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
127
|
+
Everyone interacting in the EsImporter project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/damir/es_importer/blob/master/CODE_OF_CONDUCT.md).
|
data/lib/es_importer/version.rb
CHANGED
data/lib/es_importer.rb
CHANGED
@@ -13,7 +13,7 @@ module EsImporter
|
|
13
13
|
@client = Elasticsearch::Client.new transport: transport
|
14
14
|
end
|
15
15
|
|
16
|
-
# init
|
16
|
+
# init ransport
|
17
17
|
def transport
|
18
18
|
# parse uri for host configuration
|
19
19
|
es_uri = URI.parse(@es_uri)
|
@@ -44,51 +44,81 @@ module EsImporter
|
|
44
44
|
@importers.update(importer)
|
45
45
|
end
|
46
46
|
|
47
|
-
# create
|
48
|
-
def create_index!(
|
49
|
-
puts "Creating #{
|
50
|
-
|
47
|
+
# create index
|
48
|
+
def create_index!(index)
|
49
|
+
puts "Creating #{index} index at #{@es_uri} ..."
|
50
|
+
|
51
|
+
mapping = @importers.dig(index, :mapping)
|
52
|
+
keywords = @importers.dig(index, :keywords)
|
53
|
+
type_name = index.to_s.chomp('s')
|
54
|
+
|
55
|
+
body = {
|
56
|
+
|
57
|
+
# add mapping
|
51
58
|
mappings: {
|
52
|
-
|
59
|
+
type_name => {
|
53
60
|
dynamic: false,
|
54
|
-
properties:
|
61
|
+
properties: mapping.reduce({}) do |a, (k, v)|
|
62
|
+
|
63
|
+
# field with only type def, ie. mapping: {user_id: :text}
|
64
|
+
if v.kind_of?(Symbol)
|
65
|
+
field_def = {type: v}
|
66
|
+
# field with ull def, ie. mapping: {user_id: {type: :text, analyzer: :my_analyzer}}
|
67
|
+
elsif v.kind_of?(Hash)
|
68
|
+
field_def = v
|
69
|
+
end
|
70
|
+
|
71
|
+
# optional keywords
|
72
|
+
field_def.update(keywords&.include?(k) ? {fields: {keyword: {type: :keyword}}} : {}) if keywords
|
73
|
+
|
74
|
+
# set field definition
|
75
|
+
a.update(k => field_def)
|
76
|
+
end
|
55
77
|
}
|
56
78
|
}
|
57
79
|
}
|
80
|
+
|
81
|
+
# merge settings if its set
|
82
|
+
settings = @importers.dig(index, :settings)
|
83
|
+
body.update(settings: settings) if settings
|
84
|
+
|
85
|
+
# create index
|
86
|
+
@client.indices.create index: index, body: body
|
87
|
+
|
58
88
|
rescue => error
|
59
|
-
puts "Error creating #{
|
89
|
+
puts "Error creating #{index} index. #{error.class}: #{error.message}"
|
60
90
|
raise
|
61
91
|
end
|
62
92
|
|
63
|
-
#
|
64
|
-
def delete_index!(
|
65
|
-
puts "Deleting #{
|
66
|
-
@client.indices.delete index:
|
93
|
+
# delete index
|
94
|
+
def delete_index!(index)
|
95
|
+
puts "Deleting #{index} index at #{@es_uri} ..."
|
96
|
+
@client.indices.delete index: index
|
67
97
|
rescue => error
|
68
|
-
puts "Error deleting #{
|
98
|
+
puts "Error deleting #{index} index. #{error.class}: #{error.message}"
|
69
99
|
raise
|
70
100
|
end
|
71
101
|
|
72
102
|
# import documents
|
73
|
-
def import(
|
103
|
+
def import(index, documents)
|
74
104
|
|
75
105
|
# import stats init
|
76
106
|
start_time = Time.now
|
77
107
|
failed = 0; imported = 0
|
78
108
|
|
79
109
|
# insert into elastic
|
80
|
-
documents.each_with_index do |document,
|
110
|
+
documents.each_with_index do |document, i|
|
81
111
|
|
82
112
|
# convert all keys to strings
|
83
113
|
document = _deep_transform_keys_in_object(document, &:to_s)
|
84
114
|
|
85
115
|
# generate id
|
86
|
-
id_key = @importers.dig(
|
116
|
+
id_key = @importers.dig(index, :id_key)
|
87
117
|
id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
|
88
118
|
id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
|
89
119
|
|
90
120
|
# convert keys or add new ones
|
91
|
-
@importers.dig(
|
121
|
+
@importers.dig(index, :converters)&.each do |keys, converter|
|
92
122
|
keys = keys.split('.')
|
93
123
|
|
94
124
|
# transform existing key
|
@@ -99,33 +129,33 @@ module EsImporter
|
|
99
129
|
# add new key
|
100
130
|
else
|
101
131
|
missing_key_index = nil
|
102
|
-
keys.each_with_index do |key,
|
103
|
-
missing_key_index =
|
132
|
+
keys.each_with_index do |key, i|
|
133
|
+
missing_key_index = i and break unless document.dig(*keys.first(i + 1))
|
104
134
|
end
|
105
135
|
|
106
136
|
tail_keys = keys[missing_key_index..-1]
|
107
137
|
tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
|
108
138
|
|
109
|
-
tail_keys.each_with_index do |key,
|
110
|
-
tail_hash[tail_keys[
|
111
|
-
tail_hash = tail_hash[tail_keys[
|
139
|
+
tail_keys.each_with_index do |key, i|
|
140
|
+
tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
|
141
|
+
tail_hash = tail_hash[tail_keys[i]]
|
112
142
|
end
|
113
143
|
end
|
114
144
|
end
|
115
145
|
|
116
146
|
begin
|
117
|
-
@client.index index:
|
118
|
-
puts "##{
|
147
|
+
@client.index index: index, type: index.to_s.chomp('s'), id: id, body: document
|
148
|
+
puts "##{i + 1} imported #{id}" if @logger
|
119
149
|
imported = imported + 1
|
120
150
|
rescue => e
|
121
|
-
puts "##{
|
151
|
+
puts "##{i + 1} failed #{id}" if @logger
|
122
152
|
puts e.class; puts e.message
|
123
153
|
failed = failed + 1
|
124
154
|
end
|
125
155
|
end
|
126
156
|
|
127
157
|
# print import statistics
|
128
|
-
puts; puts "#{
|
158
|
+
puts; puts "#{index} import statistics"; puts '-' * 100
|
129
159
|
puts "Failed: #{failed}"
|
130
160
|
puts "Imported: #{imported}"
|
131
161
|
puts "Time spent: #{Time.now - start_time} sec"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: es_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Damir Roso
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -177,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
177
|
version: '0'
|
178
178
|
requirements: []
|
179
179
|
rubyforge_project:
|
180
|
-
rubygems_version: 2.
|
180
|
+
rubygems_version: 2.7.6
|
181
181
|
signing_key:
|
182
182
|
specification_version: 4
|
183
183
|
summary: Transform and import JSON documents into elastic search.
|