es_importer 0.2.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: f7c04aa828e0ae96ecf1435708b67cc8b16e23b7
4
- data.tar.gz: 64c25c201532396329b75ed2e0d456ba0aa8c45d
2
+ SHA256:
3
+ metadata.gz: f5139f9596f419bb2e9be23229becb9ff26c180c38e8f2f4a1b3bcaf8f2fb977
4
+ data.tar.gz: a3ee1f507b0ffeb95369300baad2cde7af60e10aff67fe1deae74984690c8349
5
5
  SHA512:
6
- metadata.gz: 7454c639c0b9242d5709bd8ec660f54fbfcceffe0a401be6b9a9a166ac4c5c0e6917b083e5b8b51d969122286f0e6798d8e3ec644fadccb9593be5fc3337d4fe
7
- data.tar.gz: 7998ff89e18e6783a86baf81aca787ebb4d4cfcbd6f6642dd24bc9a08fc34847700acc95e98fc822b39fb8d241ce65f7451d0212f1e9ff1115b7ca060f733218
6
+ metadata.gz: 1ffe0e7c95b2c3f79eb1a97218c478863a32bd4bcd97d1fcff7db61ecec987cbb747dd53ed00d72458be77f40e03b2003fc8d44cabb2b76fde323d55841f75e3
7
+ data.tar.gz: '084ebc7fbac313071e5ebd9f054d1c133407260d56518bfd20aeb633369f4f944fd81543240662a1e95d7c1d634a5d64d148733cd2f4f1cce67a03b598a6fe56'
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- es_importer (0.2.0)
4
+ es_importer (0.3.0)
5
5
  aws-sdk
6
6
  elasticsearch
7
7
  faraday
@@ -12,13 +12,13 @@ PATH
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- aws-sdk (2.11.24)
16
- aws-sdk-resources (= 2.11.24)
17
- aws-sdk-core (2.11.24)
15
+ aws-sdk (2.11.28)
16
+ aws-sdk-resources (= 2.11.28)
17
+ aws-sdk-core (2.11.28)
18
18
  aws-sigv4 (~> 1.0)
19
19
  jmespath (~> 1.0)
20
- aws-sdk-resources (2.11.24)
21
- aws-sdk-core (= 2.11.24)
20
+ aws-sdk-resources (2.11.28)
21
+ aws-sdk-core (= 2.11.28)
22
22
  aws-sigv4 (1.0.2)
23
23
  diff-lcs (1.3)
24
24
  elasticsearch (6.0.2)
data/README.md CHANGED
@@ -42,27 +42,39 @@ users = (1..100).to_a.map do |i|
42
42
  }
43
43
  end
44
44
 
45
- # define elastic id, mapping and keywords for index; provide conversion procs
45
+ # define mapping and settings for index
46
+ # provide conversion procs to transform the document
46
47
  importer = {
47
48
 
48
49
  # name of the index
50
+ # NOTE: type will be set to singular form of index name, ie. users => user
49
51
  users: {
50
52
 
51
53
  # build id from single or multiple keys
52
54
  id_key: [:user_id, :created_at],
53
55
 
54
56
  # define index mapping
57
+ # supply a type for a field or a hash with field definition
55
58
  mapping: {
56
- user_id: :text,
59
+ user_id: :integer,
57
60
  active: :boolean,
58
- email: :text,
61
+ email: {type: :text, analyzer: :my_analyzer},
59
62
  created_at: :date,
60
63
  country_code: :text
61
64
  },
62
65
 
66
+ # shortcut to set keywords to fields
63
67
  # keyword generated is 'country_code.keyword'
64
68
  keywords: [:country_code],
65
69
 
70
+ # index settings - add a custom analyzer with a ngram filter
71
+ settings: {
72
+ analysis: {
73
+ filter: {my_filter: {type: :ngram, min_gram: 3, max_gram: 4}},
74
+ analyzer: {my_analyzer: {type: :custom, tokenizer: :standard, filter: [:lowercase, :my_filter]}}
75
+ }
76
+ },
77
+
66
78
  converters: {
67
79
  # downcase existing field value, second argument is document being processed
68
80
  'email' => Proc.new{|attr, _| attr.downcase},
@@ -104,7 +116,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
104
116
 
105
117
  ## Contributing
106
118
 
107
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/es_importer. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
119
+ Bug reports and pull requests are welcome on GitHub at https://github.com/damir/es_importer. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
108
120
 
109
121
  ## License
110
122
 
@@ -112,4 +124,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
112
124
 
113
125
  ## Code of Conduct
114
126
 
115
- Everyone interacting in the EsImporter project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/es_importer/blob/master/CODE_OF_CONDUCT.md).
127
+ Everyone interacting in the EsImporter project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/damir/es_importer/blob/master/CODE_OF_CONDUCT.md).
@@ -1,3 +1,3 @@
1
1
  module EsImporter
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/es_importer.rb CHANGED
@@ -13,7 +13,7 @@ module EsImporter
13
13
  @client = Elasticsearch::Client.new transport: transport
14
14
  end
15
15
 
16
- # init es transport
16
+ # init ransport
17
17
  def transport
18
18
  # parse uri for host configuration
19
19
  es_uri = URI.parse(@es_uri)
@@ -44,51 +44,81 @@ module EsImporter
44
44
  @importers.update(importer)
45
45
  end
46
46
 
47
- # create es index
48
- def create_index!(es_index)
49
- puts "Creating #{es_index} index at #{@es_uri} ..."
50
- @client.indices.create index: es_index, body: {
47
+ # create index
48
+ def create_index!(index)
49
+ puts "Creating #{index} index at #{@es_uri} ..."
50
+
51
+ mapping = @importers.dig(index, :mapping)
52
+ keywords = @importers.dig(index, :keywords)
53
+ type_name = index.to_s.chomp('s')
54
+
55
+ body = {
56
+
57
+ # add mapping
51
58
  mappings: {
52
- es_index.to_s.chomp('s') => {
59
+ type_name => {
53
60
  dynamic: false,
54
- properties: @importers.dig(es_index, :mapping).reduce({}){|a, (k,v)| a.update({k => {type: v}.update(@importers.dig(es_index, :keywords)&.include?(k) ? {fields: {keyword: {type: :keyword}}} : {})})}
61
+ properties: mapping.reduce({}) do |a, (k, v)|
62
+
63
+ # field with only type def, ie. mapping: {user_id: :text}
64
+ if v.kind_of?(Symbol)
65
+ field_def = {type: v}
66
+ # field with ull def, ie. mapping: {user_id: {type: :text, analyzer: :my_analyzer}}
67
+ elsif v.kind_of?(Hash)
68
+ field_def = v
69
+ end
70
+
71
+ # optional keywords
72
+ field_def.update(keywords&.include?(k) ? {fields: {keyword: {type: :keyword}}} : {}) if keywords
73
+
74
+ # set field definition
75
+ a.update(k => field_def)
76
+ end
55
77
  }
56
78
  }
57
79
  }
80
+
81
+ # merge settings if its set
82
+ settings = @importers.dig(index, :settings)
83
+ body.update(settings: settings) if settings
84
+
85
+ # create index
86
+ @client.indices.create index: index, body: body
87
+
58
88
  rescue => error
59
- puts "Error creating #{es_index} index. #{error.class}: #{error.message}"
89
+ puts "Error creating #{index} index. #{error.class}: #{error.message}"
60
90
  raise
61
91
  end
62
92
 
63
- # create es index
64
- def delete_index!(es_index)
65
- puts "Deleting #{es_index} index at #{@es_uri} ..."
66
- @client.indices.delete index: es_index
93
+ # delete index
94
+ def delete_index!(index)
95
+ puts "Deleting #{index} index at #{@es_uri} ..."
96
+ @client.indices.delete index: index
67
97
  rescue => error
68
- puts "Error deleting #{es_index} index. #{error.class}: #{error.message}"
98
+ puts "Error deleting #{index} index. #{error.class}: #{error.message}"
69
99
  raise
70
100
  end
71
101
 
72
102
  # import documents
73
- def import(es_index, documents)
103
+ def import(index, documents)
74
104
 
75
105
  # import stats init
76
106
  start_time = Time.now
77
107
  failed = 0; imported = 0
78
108
 
79
109
  # insert into elastic
80
- documents.each_with_index do |document, index|
110
+ documents.each_with_index do |document, i|
81
111
 
82
112
  # convert all keys to strings
83
113
  document = _deep_transform_keys_in_object(document, &:to_s)
84
114
 
85
115
  # generate id
86
- id_key = @importers.dig(es_index, :id_key)
116
+ id_key = @importers.dig(index, :id_key)
87
117
  id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
88
118
  id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
89
119
 
90
120
  # convert keys or add new ones
91
- @importers.dig(es_index, :converters)&.each do |keys, converter|
121
+ @importers.dig(index, :converters)&.each do |keys, converter|
92
122
  keys = keys.split('.')
93
123
 
94
124
  # transform existing key
@@ -99,33 +129,33 @@ module EsImporter
99
129
  # add new key
100
130
  else
101
131
  missing_key_index = nil
102
- keys.each_with_index do |key, index|
103
- missing_key_index = index and break unless document.dig(*keys.first(index + 1))
132
+ keys.each_with_index do |key, i|
133
+ missing_key_index = i and break unless document.dig(*keys.first(i + 1))
104
134
  end
105
135
 
106
136
  tail_keys = keys[missing_key_index..-1]
107
137
  tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
108
138
 
109
- tail_keys.each_with_index do |key, index|
110
- tail_hash[tail_keys[index]] = tail_keys.size == index + 1 ? converter.call(document) : {}
111
- tail_hash = tail_hash[tail_keys[index]]
139
+ tail_keys.each_with_index do |key, i|
140
+ tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
141
+ tail_hash = tail_hash[tail_keys[i]]
112
142
  end
113
143
  end
114
144
  end
115
145
 
116
146
  begin
117
- @client.index index: es_index, type: es_index.to_s.chomp('s'), id: id, body: document
118
- puts "##{index + 1} imported #{id}" if @logger
147
+ @client.index index: index, type: index.to_s.chomp('s'), id: id, body: document
148
+ puts "##{i + 1} imported #{id}" if @logger
119
149
  imported = imported + 1
120
150
  rescue => e
121
- puts "##{index + 1} failed #{id}" if @logger
151
+ puts "##{i + 1} failed #{id}" if @logger
122
152
  puts e.class; puts e.message
123
153
  failed = failed + 1
124
154
  end
125
155
  end
126
156
 
127
157
  # print import statistics
128
- puts; puts "#{es_index} import statistics"; puts '-' * 100
158
+ puts; puts "#{index} import statistics"; puts '-' * 100
129
159
  puts "Failed: #{failed}"
130
160
  puts "Imported: #{imported}"
131
161
  puts "Time spent: #{Time.now - start_time} sec"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: es_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damir Roso
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-03-29 00:00:00.000000000 Z
11
+ date: 2018-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -177,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
177
177
  version: '0'
178
178
  requirements: []
179
179
  rubyforge_project:
180
- rubygems_version: 2.5.2
180
+ rubygems_version: 2.7.6
181
181
  signing_key:
182
182
  specification_version: 4
183
183
  summary: Transform and import JSON documents into elastic search.