es_importer 0.2.0 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
- SHA1:
3
- metadata.gz: f7c04aa828e0ae96ecf1435708b67cc8b16e23b7
4
- data.tar.gz: 64c25c201532396329b75ed2e0d456ba0aa8c45d
2
+ SHA256:
3
+ metadata.gz: f5139f9596f419bb2e9be23229becb9ff26c180c38e8f2f4a1b3bcaf8f2fb977
4
+ data.tar.gz: a3ee1f507b0ffeb95369300baad2cde7af60e10aff67fe1deae74984690c8349
5
5
  SHA512:
6
- metadata.gz: 7454c639c0b9242d5709bd8ec660f54fbfcceffe0a401be6b9a9a166ac4c5c0e6917b083e5b8b51d969122286f0e6798d8e3ec644fadccb9593be5fc3337d4fe
7
- data.tar.gz: 7998ff89e18e6783a86baf81aca787ebb4d4cfcbd6f6642dd24bc9a08fc34847700acc95e98fc822b39fb8d241ce65f7451d0212f1e9ff1115b7ca060f733218
6
+ metadata.gz: 1ffe0e7c95b2c3f79eb1a97218c478863a32bd4bcd97d1fcff7db61ecec987cbb747dd53ed00d72458be77f40e03b2003fc8d44cabb2b76fde323d55841f75e3
7
+ data.tar.gz: '084ebc7fbac313071e5ebd9f054d1c133407260d56518bfd20aeb633369f4f944fd81543240662a1e95d7c1d634a5d64d148733cd2f4f1cce67a03b598a6fe56'
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- es_importer (0.2.0)
4
+ es_importer (0.3.0)
5
5
  aws-sdk
6
6
  elasticsearch
7
7
  faraday
@@ -12,13 +12,13 @@ PATH
12
12
  GEM
13
13
  remote: https://rubygems.org/
14
14
  specs:
15
- aws-sdk (2.11.24)
16
- aws-sdk-resources (= 2.11.24)
17
- aws-sdk-core (2.11.24)
15
+ aws-sdk (2.11.28)
16
+ aws-sdk-resources (= 2.11.28)
17
+ aws-sdk-core (2.11.28)
18
18
  aws-sigv4 (~> 1.0)
19
19
  jmespath (~> 1.0)
20
- aws-sdk-resources (2.11.24)
21
- aws-sdk-core (= 2.11.24)
20
+ aws-sdk-resources (2.11.28)
21
+ aws-sdk-core (= 2.11.28)
22
22
  aws-sigv4 (1.0.2)
23
23
  diff-lcs (1.3)
24
24
  elasticsearch (6.0.2)
data/README.md CHANGED
@@ -42,27 +42,39 @@ users = (1..100).to_a.map do |i|
42
42
  }
43
43
  end
44
44
 
45
- # define elastic id, mapping and keywords for index; provide conversion procs
45
+ # define mapping and settings for index
46
+ # provide conversion procs to transform the document
46
47
  importer = {
47
48
 
48
49
  # name of the index
50
+ # NOTE: type will be set to singular form of index name, ie. users => user
49
51
  users: {
50
52
 
51
53
  # build id from single or multiple keys
52
54
  id_key: [:user_id, :created_at],
53
55
 
54
56
  # define index mapping
57
+ # supply a type for a field or a hash with field definition
55
58
  mapping: {
56
- user_id: :text,
59
+ user_id: :integer,
57
60
  active: :boolean,
58
- email: :text,
61
+ email: {type: :text, analyzer: :my_analyzer},
59
62
  created_at: :date,
60
63
  country_code: :text
61
64
  },
62
65
 
66
+ # shortcut to set keywords to fields
63
67
  # keyword generated is 'country_code.keyword'
64
68
  keywords: [:country_code],
65
69
 
70
+ # index settings - add a custom analyzer with a ngram filter
71
+ settings: {
72
+ analysis: {
73
+ filter: {my_filter: {type: :ngram, min_gram: 3, max_gram: 4}},
74
+ analyzer: {my_analyzer: {type: :custom, tokenizer: :standard, filter: [:lowercase, :my_filter]}}
75
+ }
76
+ },
77
+
66
78
  converters: {
67
79
  # downcase existing field value, second argument is document being processed
68
80
  'email' => Proc.new{|attr, _| attr.downcase},
@@ -104,7 +116,7 @@ To install this gem onto your local machine, run `bundle exec rake install`. To
104
116
 
105
117
  ## Contributing
106
118
 
107
- Bug reports and pull requests are welcome on GitHub at https://github.com/[USERNAME]/es_importer. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
119
+ Bug reports and pull requests are welcome on GitHub at https://github.com/damir/es_importer. This project is intended to be a safe, welcoming space for collaboration, and contributors are expected to adhere to the [Contributor Covenant](http://contributor-covenant.org) code of conduct.
108
120
 
109
121
  ## License
110
122
 
@@ -112,4 +124,4 @@ The gem is available as open source under the terms of the [MIT License](https:/
112
124
 
113
125
  ## Code of Conduct
114
126
 
115
- Everyone interacting in the EsImporter project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/[USERNAME]/es_importer/blob/master/CODE_OF_CONDUCT.md).
127
+ Everyone interacting in the EsImporter project’s codebases, issue trackers, chat rooms and mailing lists is expected to follow the [code of conduct](https://github.com/damir/es_importer/blob/master/CODE_OF_CONDUCT.md).
@@ -1,3 +1,3 @@
1
1
  module EsImporter
2
- VERSION = "0.2.0"
2
+ VERSION = "0.3.0"
3
3
  end
data/lib/es_importer.rb CHANGED
@@ -13,7 +13,7 @@ module EsImporter
13
13
  @client = Elasticsearch::Client.new transport: transport
14
14
  end
15
15
 
16
- # init es transport
16
+ # init ransport
17
17
  def transport
18
18
  # parse uri for host configuration
19
19
  es_uri = URI.parse(@es_uri)
@@ -44,51 +44,81 @@ module EsImporter
44
44
  @importers.update(importer)
45
45
  end
46
46
 
47
- # create es index
48
- def create_index!(es_index)
49
- puts "Creating #{es_index} index at #{@es_uri} ..."
50
- @client.indices.create index: es_index, body: {
47
+ # create index
48
+ def create_index!(index)
49
+ puts "Creating #{index} index at #{@es_uri} ..."
50
+
51
+ mapping = @importers.dig(index, :mapping)
52
+ keywords = @importers.dig(index, :keywords)
53
+ type_name = index.to_s.chomp('s')
54
+
55
+ body = {
56
+
57
+ # add mapping
51
58
  mappings: {
52
- es_index.to_s.chomp('s') => {
59
+ type_name => {
53
60
  dynamic: false,
54
- properties: @importers.dig(es_index, :mapping).reduce({}){|a, (k,v)| a.update({k => {type: v}.update(@importers.dig(es_index, :keywords)&.include?(k) ? {fields: {keyword: {type: :keyword}}} : {})})}
61
+ properties: mapping.reduce({}) do |a, (k, v)|
62
+
63
+ # field with only type def, ie. mapping: {user_id: :text}
64
+ if v.kind_of?(Symbol)
65
+ field_def = {type: v}
66
+ # field with ull def, ie. mapping: {user_id: {type: :text, analyzer: :my_analyzer}}
67
+ elsif v.kind_of?(Hash)
68
+ field_def = v
69
+ end
70
+
71
+ # optional keywords
72
+ field_def.update(keywords&.include?(k) ? {fields: {keyword: {type: :keyword}}} : {}) if keywords
73
+
74
+ # set field definition
75
+ a.update(k => field_def)
76
+ end
55
77
  }
56
78
  }
57
79
  }
80
+
81
+ # merge settings if its set
82
+ settings = @importers.dig(index, :settings)
83
+ body.update(settings: settings) if settings
84
+
85
+ # create index
86
+ @client.indices.create index: index, body: body
87
+
58
88
  rescue => error
59
- puts "Error creating #{es_index} index. #{error.class}: #{error.message}"
89
+ puts "Error creating #{index} index. #{error.class}: #{error.message}"
60
90
  raise
61
91
  end
62
92
 
63
- # create es index
64
- def delete_index!(es_index)
65
- puts "Deleting #{es_index} index at #{@es_uri} ..."
66
- @client.indices.delete index: es_index
93
+ # delete index
94
+ def delete_index!(index)
95
+ puts "Deleting #{index} index at #{@es_uri} ..."
96
+ @client.indices.delete index: index
67
97
  rescue => error
68
- puts "Error deleting #{es_index} index. #{error.class}: #{error.message}"
98
+ puts "Error deleting #{index} index. #{error.class}: #{error.message}"
69
99
  raise
70
100
  end
71
101
 
72
102
  # import documents
73
- def import(es_index, documents)
103
+ def import(index, documents)
74
104
 
75
105
  # import stats init
76
106
  start_time = Time.now
77
107
  failed = 0; imported = 0
78
108
 
79
109
  # insert into elastic
80
- documents.each_with_index do |document, index|
110
+ documents.each_with_index do |document, i|
81
111
 
82
112
  # convert all keys to strings
83
113
  document = _deep_transform_keys_in_object(document, &:to_s)
84
114
 
85
115
  # generate id
86
- id_key = @importers.dig(es_index, :id_key)
116
+ id_key = @importers.dig(index, :id_key)
87
117
  id = document[id_key.to_s] if id_key.is_a?(Symbol) # single key
88
118
  id = id_key.reduce([]){|acc, key| acc << document[key.to_s]}.join('-') if id_key.is_a?(Array) # composite key
89
119
 
90
120
  # convert keys or add new ones
91
- @importers.dig(es_index, :converters)&.each do |keys, converter|
121
+ @importers.dig(index, :converters)&.each do |keys, converter|
92
122
  keys = keys.split('.')
93
123
 
94
124
  # transform existing key
@@ -99,33 +129,33 @@ module EsImporter
99
129
  # add new key
100
130
  else
101
131
  missing_key_index = nil
102
- keys.each_with_index do |key, index|
103
- missing_key_index = index and break unless document.dig(*keys.first(index + 1))
132
+ keys.each_with_index do |key, i|
133
+ missing_key_index = i and break unless document.dig(*keys.first(i + 1))
104
134
  end
105
135
 
106
136
  tail_keys = keys[missing_key_index..-1]
107
137
  tail_hash = keys[0...missing_key_index].reduce(document, :fetch)
108
138
 
109
- tail_keys.each_with_index do |key, index|
110
- tail_hash[tail_keys[index]] = tail_keys.size == index + 1 ? converter.call(document) : {}
111
- tail_hash = tail_hash[tail_keys[index]]
139
+ tail_keys.each_with_index do |key, i|
140
+ tail_hash[tail_keys[i]] = tail_keys.size == i + 1 ? converter.call(document) : {}
141
+ tail_hash = tail_hash[tail_keys[i]]
112
142
  end
113
143
  end
114
144
  end
115
145
 
116
146
  begin
117
- @client.index index: es_index, type: es_index.to_s.chomp('s'), id: id, body: document
118
- puts "##{index + 1} imported #{id}" if @logger
147
+ @client.index index: index, type: index.to_s.chomp('s'), id: id, body: document
148
+ puts "##{i + 1} imported #{id}" if @logger
119
149
  imported = imported + 1
120
150
  rescue => e
121
- puts "##{index + 1} failed #{id}" if @logger
151
+ puts "##{i + 1} failed #{id}" if @logger
122
152
  puts e.class; puts e.message
123
153
  failed = failed + 1
124
154
  end
125
155
  end
126
156
 
127
157
  # print import statistics
128
- puts; puts "#{es_index} import statistics"; puts '-' * 100
158
+ puts; puts "#{index} import statistics"; puts '-' * 100
129
159
  puts "Failed: #{failed}"
130
160
  puts "Imported: #{imported}"
131
161
  puts "Time spent: #{Time.now - start_time} sec"
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: es_importer
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Damir Roso
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2018-03-29 00:00:00.000000000 Z
11
+ date: 2018-04-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: bundler
@@ -177,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
177
177
  version: '0'
178
178
  requirements: []
179
179
  rubyforge_project:
180
- rubygems_version: 2.5.2
180
+ rubygems_version: 2.7.6
181
181
  signing_key:
182
182
  specification_version: 4
183
183
  summary: Transform and import JSON documents into elastic search.