es_importer 0.2.0 → 0.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/Gemfile.lock +6 -6
- data/README.md +17 -5
- data/lib/es_importer/version.rb +1 -1
- data/lib/es_importer.rb +56 -26
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
|
-
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: f5139f9596f419bb2e9be23229becb9ff26c180c38e8f2f4a1b3bcaf8f2fb977
|
4
|
+
data.tar.gz: a3ee1f507b0ffeb95369300baad2cde7af60e10aff67fe1deae74984690c8349
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 1ffe0e7c95b2c3f79eb1a97218c478863a32bd4bcd97d1fcff7db61ecec987cbb747dd53ed00d72458be77f40e03b2003fc8d44cabb2b76fde323d55841f75e3
|
7
|
+
data.tar.gz: '084ebc7fbac313071e5ebd9f054d1c133407260d56518bfd20aeb633369f4f944fd81543240662a1e95d7c1d634a5d64d148733cd2f4f1cce67a03b598a6fe56'
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
es_importer (0.
|
4
|
+
es_importer (0.3.0)
|
5
5
|
aws-sdk
|
6
6
|
elasticsearch
|
7
7
|
faraday
|
@@ -12,13 +12,13 @@ PATH
|
|
12
12
|
GEM
|
13
13
|
remote: https://rubygems.org/
|
14
14
|
specs:
|
15
|
-
aws-sdk (2.11.
|
16
|
-
aws-sdk-resources (= 2.11.
|
17
|
-
aws-sdk-core (2.11.
|
15
|
+
aws-sdk (2.11.28)
|
16
|
+
aws-sdk-resources (= 2.11.28)
|
17
|
+
aws-sdk-core (2.11.28)
|
18
18
|
aws-sigv4 (~> 1.0)
|
19
19
|
jmespath (~> 1.0)
|
20
|
-
aws-sdk-resources (2.11.
|
21
|
-
aws-sdk-core (= 2.11.
|
20
|
+
aws-sdk-resources (2.11.28)
|
21
|
+
aws-sdk-core (= 2.11.28)
|
22
22
|
aws-sigv4 (1.0.2)
|
23
23
|
diff-lcs (1.3)
|
24
24
|
elasticsearch (6.0.2)
|
data/README.md
CHANGED
@@ -42,27 +42,39 @@ users = (1..100).to_a.map do |i|
|
|
42
42
|
}
|
43
43
|
end
|
44
44
|
|
45
|
-
# define
|
45
|
+
# define mapping and settings for index
|
46
|
+
# provide conversion procs to transform the document
|
46
47
|
importer = {
|
47
48
|
|
48
49
|
# name of the index
|
50
|
+
# NOTE: type will be set to singular form of index name, ie. users => user
|
49
51
|
users: {
|
50
52
|
|
51
53
|
# build id from single or multiple keys
|
52
54
|
id_key: [:user_id, :created_at],
|
53
55
|
|
54
56
|
# define index mapping
|
57
|
+
# supply a type for a field or a hash with field definition
|
55
58
|
mapping: {
|
56
|
-
user_id: :
|
59
|
+
user_id: :integer,
|
57
60
|
active: :boolean,
|
58
|
-
email: :text,
|
61
|
+
email: {type: :text, analyzer: :my_analyzer},
|
59
62
|
created_at: :date,
|
60
63
|
country_code: :text
|
61
64
|
},
|
62
65
|
|
66
|
+
# shortcut to set keywords to fields
|
63
67
|
# keyword generated is 'country_code.keyword'
|
64
68
|
keywords: [:country_code],
|
65
69
|
|
70
|
+
# index settings - add a custom analyzer with a ngram filter
|
71
|
+
settings: {
|
72
|
+
analysis: {
|
73
|
+
filter: {my_filter: {type: :ngram, min_gram: 3, max_gram: 4}},
|
74
|
+
analyzer: {my_analyzer: {type: :custom, tokenizer: :standard, filter: [:lowercase, :my_filter]}}
|
75
|
+
}
|
76
|
+
},
|
77
|
+
|
66
78
|
converters: {
|
67
79
|
# downcase existing field value, second argument is document being processed
|
68
80
|
'email' => Proc.new{|attr, _| attr.downcase},
|
@@ -104,7 +116,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
|
|
104
116
|
|
105
117
|
## Contributing
|
106
118
|
|
107
|
-
Bug reports and pull requests are welcome on GitHub at https://github.com/
|
119
|
+
Bug reports and pull requests are welcome on GitHub at https://github.com/damir/es_importer. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
|
108
120
|
|
109
121
|
## License
|
110
122
|
|
@@ -112,4 +124,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
|
|
112
124
|
|
113
125
|
## Code of Conduct
|
114
126
|
|
115
|
-
Everyone interacting in the EsImporter project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/
|
127
|
+
Everyone interacting in the EsImporter project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/damir/es_importer/blob/master/CODE_OF_CONDUCT.md).
|
data/lib/es_importer/version.rb
CHANGED
data/lib/es_importer.rb
CHANGED
@@ -13,7 +13,7 @@ module EsImporter
|
|
13
13
|
@client = Elasticsearch::Client.new transport: transport
|
14
14
|
end
|
15
15
|
|
16
|
-
# init
|
16
|
+
# init ransport
|
17
17
|
def transport
|
18
18
|
# parse uri for host configuration
|
19
19
|
es_uri = URI.parse(@es_uri)
|
@@ -44,51 +44,81 @@ module EsImporter
|
|
44
44
|
@importers.update(importer)
|
45
45
|
end
|
46
46
|
|
47
|
-
# create
|
48
|
-
def create_index!(
|
49
|
-
puts "Creating #{
|
50
|
-
|
47
|
+
# create index
|
48
|
+
def create_index!(index)
|
49
|
+
puts "Creating #{index} index at #{@es_uri} ..."
|
50
|
+
|
51
|
+
mapping = @importers.dig(index, :mapping)
|
52
|
+
keywords = @importers.dig(index, :keywords)
|
53
|
+
type_name = index.to_s.chomp('s')
|
54
|
+
|
55
|
+
body = {
|
56
|
+
|
57
|
+
# add mapping
|
51
58
|
mappings: {
|
52
|
-
|
59
|
+
type_name => {
|
53
60
|
dynamic: false,
|
54
|
-
properties:
|
61
|
+
properties: mapping.reduce({}) do |a, (k, v)|
|
62
|
+
|
63
|
+
# field with only type def, ie. mapping: {user_id: :text}
|
64
|
+
if v.kind_of?(Symbol)
|
65
|
+
field_def = {type: v}
|
66
|
+
# field with ull def, ie. mapping: {user_id: {type: :text, analyzer: :my_analyzer}}
|
67
|
+
elsif v.kind_of?(Hash)
|
68
|
+
field_def = v
|
69
|
+
end
|
70
|
+
|
71
|
+
# optional keywords
|
72
|
+
field_def.update(keywords&.include?(k) ? {fields: {keyword: {type: :keyword}}} : {}) if keywords
|
73
|
+
|
74
|
+
# set field definition
|
75
|
+
a.update(k => field_def)
|
76
|
+
end
|
55
77
|
}
|
56
78
|
}
|
57
79
|
}
|
80
|
+
|
81
|
+
# merge settings if its set
|
82
|
+
settings = @importers.dig(index, :settings)
|
83
|
+
body.update(settings: settings) if settings
|
84
|
+
|
85
|
+
# create index
|
86
|
+
@client.indices.create index: index, body: body
|
87
|
+
|
58
88
|
rescue => error
|
59
|
-
puts "Error creating #{
|
89
|
+
puts "Error creating #{index} index. #{error.class}: #{error.message}"
|
60
90
|
raise
|
61
91
|
end
|
62
92
|
|
63
|
-
#
|
64
|
-
def delete_index!(
|
65
|
-
puts "Deleting #{
|
66
|
-
@client.indices.delete index:
|
93
|
+
# delete index
|
94
|
+
def delete_index!(index)
|
95
|
+
puts "Deleting #{index} index at #{@es_uri} ..."
|
96
|
+
@client.indices.delete index: index
|
67
97
|
rescue => error
|
68
|
-
puts "Error deleting #{
|
98
|
+
puts "Error deleting #{index} index. #{error.class}: #{error.message}"
|
69
99
|
raise
|
70
100
|
end
|
71
101
|
|
72
102
|
# import documents
|
73
|
-
def import(
|
103
|
+
def import(index, documents)
|
74
104
|
|
75
105
|
# import stats init
|
76
106
|
start_time = Time.now
|
77
107
|
failed = 0; imported = 0
|
78
108
|
|
79
109
|
# insert into elastic
|
80
|
-
documents.each_with_index do |document,
|
110
|
+
documents.each_with_index do |document, i|
|
81
111
|
|
82
112
|
# convert all keys to strings
|
83
113
|
document = _deep_transform_keys_in_object(document, &:to_s)
|
84
114
|
|
85
115
|
# generate id
|
86
|
-
id_key = @importers.dig(
|
116
|
+
id_key = @importers.dig(index, :id_key)
|
87
117
|
id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
|
88
118
|
id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
|
89
119
|
|
90
120
|
# convert keys or add new ones
|
91
|
-
@importers.dig(
|
121
|
+
@importers.dig(index, :converters)&.each do |keys, converter|
|
92
122
|
keys = keys.split('.')
|
93
123
|
|
94
124
|
# transform existing key
|
@@ -99,33 +129,33 @@ module EsImporter
|
|
99
129
|
# add new key
|
100
130
|
else
|
101
131
|
missing_key_index = nil
|
102
|
-
keys.each_with_index do |key,
|
103
|
-
missing_key_index =
|
132
|
+
keys.each_with_index do |key, i|
|
133
|
+
missing_key_index = i and break unless document.dig(*keys.first(i + 1))
|
104
134
|
end
|
105
135
|
|
106
136
|
tail_keys = keys[missing_key_index..-1]
|
107
137
|
tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
|
108
138
|
|
109
|
-
tail_keys.each_with_index do |key,
|
110
|
-
tail_hash[tail_keys[
|
111
|
-
tail_hash = tail_hash[tail_keys[
|
139
|
+
tail_keys.each_with_index do |key, i|
|
140
|
+
tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
|
141
|
+
tail_hash = tail_hash[tail_keys[i]]
|
112
142
|
end
|
113
143
|
end
|
114
144
|
end
|
115
145
|
|
116
146
|
begin
|
117
|
-
@client.index index:
|
118
|
-
puts "##{
|
147
|
+
@client.index index: index, type: index.to_s.chomp('s'), id: id, body: document
|
148
|
+
puts "##{i + 1} imported #{id}" if @logger
|
119
149
|
imported = imported + 1
|
120
150
|
rescue => e
|
121
|
-
puts "##{
|
151
|
+
puts "##{i + 1} failed #{id}" if @logger
|
122
152
|
puts e.class; puts e.message
|
123
153
|
failed = failed + 1
|
124
154
|
end
|
125
155
|
end
|
126
156
|
|
127
157
|
# print import statistics
|
128
|
-
puts; puts "#{
|
158
|
+
puts; puts "#{index} import statistics"; puts '-' * 100
|
129
159
|
puts "Failed: #{failed}"
|
130
160
|
puts "Imported: #{imported}"
|
131
161
|
puts "Time spent: #{Time.now - start_time} sec"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: es_importer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Damir Roso
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-04-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -177,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
177
177
|
version: '0'
|
178
178
|
requirements: []
|
179
179
|
rubyforge_project:
|
180
|
-
rubygems_version: 2.
|
180
|
+
rubygems_version: 2.7.6
|
181
181
|
signing_key:
|
182
182
|
specification_version: 4
|
183
183
|
summary: Transform and import JSON documents into elastic search.
|