rotating_es_loader 0.0.0 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 929a07d2d4a9527442a16944c5970152f8d3d0e6daf6fd1693577339ae4c8ec0
4
- data.tar.gz: 836e310957e7ceb637b6896ed96435797d2955eeac9fd4e8739b99fc5a84f394
3
+ metadata.gz: a895e503532b76e0a807d233339a1d5bbb6d273e9f12972da776a50857eee8e2
4
+ data.tar.gz: a92614bd378f9d298aa51b1ae68bcf25638205092f7a6e45ffe36194246278fb
5
5
  SHA512:
6
- metadata.gz: 253ed26543206be48a15ada0d9336c770fcd6a36eb824651c211ddc38c83184265caa952c5ae166d811236655dd94c5e7a5e108b192a808d650c7968169081e6
7
- data.tar.gz: e8afa85a8eb6b52b79eb36e17b66e036bf1a160c4e5c478e02ac76a68d0c2e1213909e23bb020c415209286ca0b3d49398290194d0041fd336ba5f8fd17e8a5e
6
+ metadata.gz: 9be744ca07756790a7ed69954fa9318344e56fde8c23c91f04c2b06b8456187e6f1dca68918c686c94763ed33a92c84c11159c3ef37e6c85621b0ad0f665c5ef
7
+ data.tar.gz: 832b9495e7cddd079e084ea0f0249f0a47e731d44e46cf670f20f1dbaef028e5f569f9c2feba677872c6477d473fba4eabee28ba7c7ff5576e06f2df4078cc5a
@@ -1,5 +1,204 @@
1
- class RotatingEsLoader
2
- def self.hi
3
- puts "Hello world"
1
+ # frozen_string_literal: true
2
+
3
+ require 'rotating_es_loader/es_client'
4
+
5
+ # :nodoc
6
+ class RotatingEsLoader < EsClient
7
+ extend Memoist
8
+
9
+ # indexs with a datestamp newer than this age will not be wiped
10
+ MAX_INDEX_AGE = 3
11
+ DEFAULT_SLICE_SIZE = 50
12
+
13
+ attr_accessor :slice_size, :es_major_version
14
+
15
+ def initialize(opts)
16
+ raise('no credentials provided') unless opts[:credentials]
17
+ raise('no url provided') unless opts[:url]
18
+ raise('no definitions provided') unless opts[:index_definitions].is_a?(Hash)
19
+ uri = URI.parse(opts[:url])
20
+
21
+ super(
22
+ url: opts[:url],
23
+ credentials: opts[:credentials]
24
+ )
25
+
26
+ @index_definitions = opts[:index_definitions]
27
+ @slice_size = opts[:slice_size] || DEFAULT_SLICE_SIZE
28
+
29
+ @logger.debug("index keys: #{index_keys}")
30
+ @datasources = opts[:datasources]
31
+
32
+ index_keys.each do |key|
33
+ raise("No datasource for #{key}") unless @index_definitions[key][:datasource].respond_to?(:each)
34
+ end
35
+
36
+ es_info = client.info
37
+ @es_major_version = es_info['version']['number'].split('.').first.to_i
38
+ end
39
+
40
+ def document_type_for(key)
41
+ raise "document type not supported for ES #{es_major_version}" \
42
+ unless es_major_version <= 5
43
+ @index_definitions[key][:type]
44
+ end
45
+
46
+ def index_keys
47
+ @index_definitions.keys
48
+ end
49
+
50
+ def mappings_for(key)
51
+ @index_definitions[key][:mappings]
52
+ end
53
+
54
+ def settings_for(key)
55
+ @index_definitions[key][:settings]
56
+ end
57
+
58
+ def datasource_for(key)
59
+ @index_definitions[key][:datasource]
60
+ end
61
+
62
+ def execute
63
+ create_indices
64
+ create_documents
65
+ swap_aliases
66
+ delete_old_indices
67
+ end
68
+
69
+ def multitype_support?
70
+ return es_major_version <= 5
71
+ end
72
+
73
+ def create_documents
74
+ index_keys.each do |k|
75
+ create_documents_for_type(
76
+ name: get_index_name(k),
77
+ data: datasource_for(k),
78
+ type: document_type_for(k)
79
+ )
80
+ end
81
+ end
82
+
83
+ def create_documents_for_type(name:, data:, type: nil)
84
+ @logger.info("Creating documents of in index #{name} in batches of #{@slice_size}")
85
+ data.lazy.each_slice(@slice_size).each_with_index do |slice, slice_num|
86
+ @logger.debug("batch #{slice_num}: #{slice.size} docs")
87
+ result = client.bulk(
88
+ body: slice.flat_map do |rec|
89
+ index_record = { index: { _index: name, _id: rec[:id] } }
90
+ index_record[:index].merge!(_type: type) if es_major_version == 5
91
+
92
+ [
93
+ index_record,
94
+ rec
95
+ ]
96
+ end
97
+ )
98
+
99
+ @logger.warn("ERRORS: #{JSON.pretty_generate(result)}") if result['errors']
100
+ end
101
+ end
102
+
103
+ def create_indices
104
+ index_keys.each do |k|
105
+ create_index(name: get_index_name(k), key: k)
106
+ end
107
+ end
108
+
109
+ def key_age(key)
110
+ date_str = key.split('-')[1]
111
+ if date_str && date_str.size == 8
112
+ (Date.today - Date.parse(date_str)).to_i
113
+ else
114
+ 0
115
+ end
116
+ end
117
+
118
+ def get_index_name(key)
119
+ # TODO: make it more sequential, so that it sorts correctly
120
+ date_str = Date.today.to_s.gsub(/\D/, '') + '-' + Time.now.to_i.to_s + '-' + Process.pid.to_s
121
+ raise("provided key #{key} is not a valid index") unless index_keys.include?(key)
122
+ return key.to_s + '-' + date_str
123
+ end
124
+ memoize :get_index_name # otherwise time might change
125
+
126
+ def delete_old_indices
127
+ existing_indices = client.indices.get(index: '_all')
128
+
129
+ @logger.debug("Existing indexes: #{existing_indices.keys}")
130
+
131
+ index_keys.each do |index|
132
+ keys = existing_indices.keys.select { |k| k.include?(index.to_s) }.sort
133
+ keys_by_date = keys.group_by { |k| key_age(k) }
134
+ keys_to_delete = []
135
+
136
+ # delete all indexes, keeping one from each day for the last few days
137
+ keys_by_date.each do |age, key_list|
138
+ key_list.pop if age <= MAX_INDEX_AGE
139
+ keys_to_delete += key_list
140
+ end
141
+
142
+ unless keys_to_delete.empty?
143
+ @logger.debug("Deleting indexes #{keys_to_delete.join(', ')}")
144
+ client.indices.delete index: keys_to_delete
145
+ end
146
+ end
147
+ end
148
+
149
+ def swap_aliases
150
+ index_keys.each do |alias_name|
151
+ index_name = get_index_name(alias_name)
152
+
153
+ actions = [
154
+ { add: { index: index_name, alias: alias_name } }
155
+ ]
156
+
157
+ @logger.debug("fetching any indices attached to alias #{alias_name}")
158
+ begin
159
+ client.indices.get_alias(name: alias_name).keys.each do |index_to_remove|
160
+ actions.unshift(
161
+ remove: { index: index_to_remove, alias: alias_name }
162
+ )
163
+ end
164
+ rescue StandardError => e
165
+ @logger.warn(e)
166
+ end
167
+
168
+ @logger.debug('update_aliases actions: ' + actions.to_json)
169
+
170
+ client.indices.update_aliases body: {
171
+ actions: actions
172
+ }
173
+ end
174
+ end
175
+
176
+ def mappings_adjusted_for_es_version(key)
177
+ mapping_for_key = mappings_for(key) || @logger.warn("mappings does not contain a mapping for #{key}")
178
+ mappings = {}
179
+ if es_major_version < 6
180
+ mappings[key] = { properties: mapping_for_key }
181
+ else
182
+ mappings[:properties] = mapping_for_key
183
+ end
184
+
185
+ mappings
186
+ end
187
+
188
+ def create_index(name:, key:)
189
+ @logger.debug("creating index #{name}")
190
+
191
+ mappings = mappings_adjusted_for_es_version(key)
192
+
193
+ @logger.debug("mappings: #{mappings.to_json}")
194
+ @logger.debug("creating index #{name}")
195
+
196
+ client.indices.create({
197
+ index: name,
198
+ body: {
199
+ settings: settings_for(key),
200
+ mappings: mappings
201
+ }
202
+ }.tap { |x| puts JSON.pretty_generate(x) })
4
203
  end
5
204
  end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # :nodoc
4
+ class ArrayDatasource
5
+ include Enumerable
6
+
7
+ def initialize(data)
8
+ @data = data
9
+ @iter = data.each
10
+ end
11
+
12
+ def each(&block)
13
+ return to_enum(:each) unless block
14
+
15
+ @data.each(&block)
16
+ self
17
+ end
18
+
19
+ def size
20
+ @data.size
21
+ end
22
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday_middleware/aws_signers_v4'
4
+ require 'faraday_middleware/gzip'
5
+ require 'elasticsearch'
6
+ require 'memoist'
7
+ require 'logger'
8
+ require 'aws-sdk'
9
+
10
+ # :nodoc
11
+ class EsClient
12
+ extend Memoist
13
+
14
+ def initialize(
15
+ url:,
16
+ credentials:,
17
+ logger: nil
18
+ )
19
+
20
+ raise('credentials must be an Aws::SharedCredentials') unless \
21
+ credentials.is_a?(Aws::SharedCredentials)
22
+
23
+ @logger = logger || Logger.new(STDOUT)
24
+ @url = url
25
+ @credentials = credentials
26
+ @logger.info('URL is ' + url)
27
+ end
28
+
29
+ def client
30
+ Elasticsearch::Client.new(url: @url) do |f|
31
+ f.use FaradayMiddleware::Gzip
32
+ f.request :aws_signers_v4,
33
+ credentials: @credentials,
34
+ service_name: 'es',
35
+ region: 'us-west-1'
36
+ end
37
+ end
38
+ memoize :client
39
+
40
+ def method_missing(m, *args, &block)
41
+ @logger.debug("Delegating #{m}")
42
+ client.send(m, *args, &block)
43
+ end
44
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'xmlsimple'
4
+
5
+ # :nodoc
6
+ class SqlDatasource
7
+ include Enumerable
8
+
9
+ def initialize(sql:, ar_connection:)
10
+ @sql = sql
11
+ @ar_connection = ar_connection
12
+ raise unless @sql
13
+ end
14
+
15
+ def normalize(o)
16
+ o
17
+ end
18
+
19
+ def data
20
+ queries = @sql.is_a?(String) ? [@sql] : @sql
21
+
22
+ queries.flat_map do |query|
23
+ records_array = @ar_connection.execute(@sql)
24
+ fields = records_array.fields.map(&:to_sym)
25
+
26
+ records_array.map do |row_array|
27
+ normalize(fields.zip(row_array).to_h)
28
+ end
29
+ end
30
+ end
31
+
32
+ def each(&block)
33
+ return to_enum(:each) unless block
34
+
35
+ data.each(&block)
36
+
37
+ self
38
+ end
39
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'xmlsimple'
4
+
5
+ # :nodoc
6
+ class XmlFilesDatasource
7
+ include Enumerable
8
+
9
+ def initialize(glob)
10
+ @files = Dir.glob(glob).to_a
11
+ end
12
+
13
+ def normalize(o)
14
+ o
15
+ end
16
+
17
+ def each(&block)
18
+ return to_enum(:each) unless block
19
+
20
+ @files.each do |xml_file|
21
+ hash = XmlSimple.xml_in(
22
+ xml_file,
23
+ ForceArray: false,
24
+ SuppressEmpty: ''
25
+ )
26
+ Array(normalize(hash)).each(&block)
27
+ end
28
+
29
+ self
30
+ end
31
+
32
+ def size
33
+ @files.size
34
+ end
35
+ end
metadata CHANGED
@@ -1,15 +1,197 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rotating_es_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Kowdley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-21 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2019-09-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: aws-sdk
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 2.11.358
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 2.11.358
27
+ - !ruby/object:Gem::Dependency
28
+ name: aws-sdk-resources
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 2.11.258
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 2.11.258
41
+ - !ruby/object:Gem::Dependency
42
+ name: elasticsearch
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 5.0.5
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 5.0.5
55
+ - !ruby/object:Gem::Dependency
56
+ name: elasticsearch-extensions
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.0.31
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.0.31
69
+ - !ruby/object:Gem::Dependency
70
+ name: faraday_middleware
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.13.1
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.13.1
83
+ - !ruby/object:Gem::Dependency
84
+ name: faraday_middleware-aws-signers-v4
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.1.9
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.1.9
97
+ - !ruby/object:Gem::Dependency
98
+ name: logger
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 1.4.1
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 1.4.1
111
+ - !ruby/object:Gem::Dependency
112
+ name: memoist
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.16.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.16.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: xml-simple
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 1.1.5
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 1.1.5
139
+ - !ruby/object:Gem::Dependency
140
+ name: rake
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 12.3.3
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 12.3.3
153
+ - !ruby/object:Gem::Dependency
154
+ name: rspec
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 3.8.0
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 3.8.0
167
+ - !ruby/object:Gem::Dependency
168
+ name: rubocop
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: 0.74.0
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: 0.74.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: rubocop-rspec
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 1.35.0
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 1.35.0
13
195
  description: A base class for code that loads data into Elasticsearch
14
196
  email: mike@valuationmetricsinc.com
15
197
  executables: []
@@ -17,7 +199,11 @@ extensions: []
17
199
  extra_rdoc_files: []
18
200
  files:
19
201
  - lib/rotating_es_loader.rb
20
- homepage: https://rubygems.org/gems/rotating_es_loader
202
+ - lib/rotating_es_loader/array_datasource.rb
203
+ - lib/rotating_es_loader/es_client.rb
204
+ - lib/rotating_es_loader/sql_datasource.rb
205
+ - lib/rotating_es_loader/xml_files_datasource.rb
206
+ homepage: https://github.com/mikevm/rotating_es_loader
21
207
  licenses:
22
208
  - MIT
23
209
  metadata: {}