rotating_es_loader 0.0.0 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 929a07d2d4a9527442a16944c5970152f8d3d0e6daf6fd1693577339ae4c8ec0
4
- data.tar.gz: 836e310957e7ceb637b6896ed96435797d2955eeac9fd4e8739b99fc5a84f394
3
+ metadata.gz: a895e503532b76e0a807d233339a1d5bbb6d273e9f12972da776a50857eee8e2
4
+ data.tar.gz: a92614bd378f9d298aa51b1ae68bcf25638205092f7a6e45ffe36194246278fb
5
5
  SHA512:
6
- metadata.gz: 253ed26543206be48a15ada0d9336c770fcd6a36eb824651c211ddc38c83184265caa952c5ae166d811236655dd94c5e7a5e108b192a808d650c7968169081e6
7
- data.tar.gz: e8afa85a8eb6b52b79eb36e17b66e036bf1a160c4e5c478e02ac76a68d0c2e1213909e23bb020c415209286ca0b3d49398290194d0041fd336ba5f8fd17e8a5e
6
+ metadata.gz: 9be744ca07756790a7ed69954fa9318344e56fde8c23c91f04c2b06b8456187e6f1dca68918c686c94763ed33a92c84c11159c3ef37e6c85621b0ad0f665c5ef
7
+ data.tar.gz: 832b9495e7cddd079e084ea0f0249f0a47e731d44e46cf670f20f1dbaef028e5f569f9c2feba677872c6477d473fba4eabee28ba7c7ff5576e06f2df4078cc5a
@@ -1,5 +1,204 @@
1
- class RotatingEsLoader
2
- def self.hi
3
- puts "Hello world"
1
+ # frozen_string_literal: true
2
+
3
+ require 'rotating_es_loader/es_client'
4
+
5
+ # :nodoc
6
+ class RotatingEsLoader < EsClient
7
+ extend Memoist
8
+
9
+ # indexs with a datestamp newer than this age will not be wiped
10
+ MAX_INDEX_AGE = 3
11
+ DEFAULT_SLICE_SIZE = 50
12
+
13
+ attr_accessor :slice_size, :es_major_version
14
+
15
+ def initialize(opts)
16
+ raise('no credentials provided') unless opts[:credentials]
17
+ raise('no url provided') unless opts[:url]
18
+ raise('no definitions provided') unless opts[:index_definitions].is_a?(Hash)
19
+ uri = URI.parse(opts[:url])
20
+
21
+ super(
22
+ url: opts[:url],
23
+ credentials: opts[:credentials]
24
+ )
25
+
26
+ @index_definitions = opts[:index_definitions]
27
+ @slice_size = opts[:slice_size] || DEFAULT_SLICE_SIZE
28
+
29
+ @logger.debug("index keys: #{index_keys}")
30
+ @datasources = opts[:datasources]
31
+
32
+ index_keys.each do |key|
33
+ raise("No datasource for #{key}") unless @index_definitions[key][:datasource].respond_to?(:each)
34
+ end
35
+
36
+ es_info = client.info
37
+ @es_major_version = es_info['version']['number'].split('.').first.to_i
38
+ end
39
+
40
+ def document_type_for(key)
41
+ raise "document type not supported for ES #{es_major_version}" \
42
+ unless es_major_version <= 5
43
+ @index_definitions[key][:type]
44
+ end
45
+
46
+ def index_keys
47
+ @index_definitions.keys
48
+ end
49
+
50
+ def mappings_for(key)
51
+ @index_definitions[key][:mappings]
52
+ end
53
+
54
+ def settings_for(key)
55
+ @index_definitions[key][:settings]
56
+ end
57
+
58
+ def datasource_for(key)
59
+ @index_definitions[key][:datasource]
60
+ end
61
+
62
+ def execute
63
+ create_indices
64
+ create_documents
65
+ swap_aliases
66
+ delete_old_indices
67
+ end
68
+
69
+ def multitype_support?
70
+ return es_major_version <= 5
71
+ end
72
+
73
+ def create_documents
74
+ index_keys.each do |k|
75
+ create_documents_for_type(
76
+ name: get_index_name(k),
77
+ data: datasource_for(k),
78
+ type: document_type_for(k)
79
+ )
80
+ end
81
+ end
82
+
83
+ def create_documents_for_type(name:, data:, type: nil)
84
+ @logger.info("Creating documents of in index #{name} in batches of #{@slice_size}")
85
+ data.lazy.each_slice(@slice_size).each_with_index do |slice, slice_num|
86
+ @logger.debug("batch #{slice_num}: #{slice.size} docs")
87
+ result = client.bulk(
88
+ body: slice.flat_map do |rec|
89
+ index_record = { index: { _index: name, _id: rec[:id] } }
90
+ index_record[:index].merge!(_type: type) if es_major_version == 5
91
+
92
+ [
93
+ index_record,
94
+ rec
95
+ ]
96
+ end
97
+ )
98
+
99
+ @logger.warn("ERRORS: #{JSON.pretty_generate(result)}") if result['errors']
100
+ end
101
+ end
102
+
103
+ def create_indices
104
+ index_keys.each do |k|
105
+ create_index(name: get_index_name(k), key: k)
106
+ end
107
+ end
108
+
109
+ def key_age(key)
110
+ date_str = key.split('-')[1]
111
+ if date_str && date_str.size == 8
112
+ (Date.today - Date.parse(date_str)).to_i
113
+ else
114
+ 0
115
+ end
116
+ end
117
+
118
+ def get_index_name(key)
119
+ # TODO: make it more sequential, so that it sorts correctly
120
+ date_str = Date.today.to_s.gsub(/\D/, '') + '-' + Time.now.to_i.to_s + '-' + Process.pid.to_s
121
+ raise("provided key #{key} is not a valid index") unless index_keys.include?(key)
122
+ return key.to_s + '-' + date_str
123
+ end
124
+ memoize :get_index_name # otherwise time might change
125
+
126
+ def delete_old_indices
127
+ existing_indices = client.indices.get(index: '_all')
128
+
129
+ @logger.debug("Existing indexes: #{existing_indices.keys}")
130
+
131
+ index_keys.each do |index|
132
+ keys = existing_indices.keys.select { |k| k.include?(index.to_s) }.sort
133
+ keys_by_date = keys.group_by { |k| key_age(k) }
134
+ keys_to_delete = []
135
+
136
+ # delete all indexes, keeping one from each day for the last few days
137
+ keys_by_date.each do |age, key_list|
138
+ key_list.pop if age <= MAX_INDEX_AGE
139
+ keys_to_delete += key_list
140
+ end
141
+
142
+ unless keys_to_delete.empty?
143
+ @logger.debug("Deleting indexes #{keys_to_delete.join(', ')}")
144
+ client.indices.delete index: keys_to_delete
145
+ end
146
+ end
147
+ end
148
+
149
+ def swap_aliases
150
+ index_keys.each do |alias_name|
151
+ index_name = get_index_name(alias_name)
152
+
153
+ actions = [
154
+ { add: { index: index_name, alias: alias_name } }
155
+ ]
156
+
157
+ @logger.debug("fetching any indices attached to alias #{alias_name}")
158
+ begin
159
+ client.indices.get_alias(name: alias_name).keys.each do |index_to_remove|
160
+ actions.unshift(
161
+ remove: { index: index_to_remove, alias: alias_name }
162
+ )
163
+ end
164
+ rescue StandardError => e
165
+ @logger.warn(e)
166
+ end
167
+
168
+ @logger.debug('update_aliases actions: ' + actions.to_json)
169
+
170
+ client.indices.update_aliases body: {
171
+ actions: actions
172
+ }
173
+ end
174
+ end
175
+
176
+ def mappings_adjusted_for_es_version(key)
177
+ mapping_for_key = mappings_for(key) || @logger.warn("mappings does not contain a mapping for #{key}")
178
+ mappings = {}
179
+ if es_major_version < 6
180
+ mappings[key] = { properties: mapping_for_key }
181
+ else
182
+ mappings[:properties] = mapping_for_key
183
+ end
184
+
185
+ mappings
186
+ end
187
+
188
+ def create_index(name:, key:)
189
+ @logger.debug("creating index #{name}")
190
+
191
+ mappings = mappings_adjusted_for_es_version(key)
192
+
193
+ @logger.debug("mappings: #{mappings.to_json}")
194
+ @logger.debug("creating index #{name}")
195
+
196
+ client.indices.create({
197
+ index: name,
198
+ body: {
199
+ settings: settings_for(key),
200
+ mappings: mappings
201
+ }
202
+ }.tap { |x| puts JSON.pretty_generate(x) })
4
203
  end
5
204
  end
@@ -0,0 +1,22 @@
1
+ # frozen_string_literal: true
2
+
3
+ # :nodoc
4
+ class ArrayDatasource
5
+ include Enumerable
6
+
7
+ def initialize(data)
8
+ @data = data
9
+ @iter = data.each
10
+ end
11
+
12
+ def each(&block)
13
+ return to_enum(:each) unless block
14
+
15
+ @data.each(&block)
16
+ self
17
+ end
18
+
19
+ def size
20
+ @data.size
21
+ end
22
+ end
@@ -0,0 +1,44 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'faraday_middleware/aws_signers_v4'
4
+ require 'faraday_middleware/gzip'
5
+ require 'elasticsearch'
6
+ require 'memoist'
7
+ require 'logger'
8
+ require 'aws-sdk'
9
+
10
+ # :nodoc
11
+ class EsClient
12
+ extend Memoist
13
+
14
+ def initialize(
15
+ url:,
16
+ credentials:,
17
+ logger: nil
18
+ )
19
+
20
+ raise('credentials must be an Aws::SharedCredentials') unless \
21
+ credentials.is_a?(Aws::SharedCredentials)
22
+
23
+ @logger = logger || Logger.new(STDOUT)
24
+ @url = url
25
+ @credentials = credentials
26
+ @logger.info('URL is ' + url)
27
+ end
28
+
29
+ def client
30
+ Elasticsearch::Client.new(url: @url) do |f|
31
+ f.use FaradayMiddleware::Gzip
32
+ f.request :aws_signers_v4,
33
+ credentials: @credentials,
34
+ service_name: 'es',
35
+ region: 'us-west-1'
36
+ end
37
+ end
38
+ memoize :client
39
+
40
+ def method_missing(m, *args, &block)
41
+ @logger.debug("Delegating #{m}")
42
+ client.send(m, *args, &block)
43
+ end
44
+ end
@@ -0,0 +1,39 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'xmlsimple'
4
+
5
+ # :nodoc
6
+ class SqlDatasource
7
+ include Enumerable
8
+
9
+ def initialize(sql:, ar_connection:)
10
+ @sql = sql
11
+ @ar_connection = ar_connection
12
+ raise unless @sql
13
+ end
14
+
15
+ def normalize(o)
16
+ o
17
+ end
18
+
19
+ def data
20
+ queries = @sql.is_a?(String) ? [@sql] : @sql
21
+
22
+ queries.flat_map do |query|
23
+ records_array = @ar_connection.execute(@sql)
24
+ fields = records_array.fields.map(&:to_sym)
25
+
26
+ records_array.map do |row_array|
27
+ normalize(fields.zip(row_array).to_h)
28
+ end
29
+ end
30
+ end
31
+
32
+ def each(&block)
33
+ return to_enum(:each) unless block
34
+
35
+ data.each(&block)
36
+
37
+ self
38
+ end
39
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'xmlsimple'
4
+
5
+ # :nodoc
6
+ class XmlFilesDatasource
7
+ include Enumerable
8
+
9
+ def initialize(glob)
10
+ @files = Dir.glob(glob).to_a
11
+ end
12
+
13
+ def normalize(o)
14
+ o
15
+ end
16
+
17
+ def each(&block)
18
+ return to_enum(:each) unless block
19
+
20
+ @files.each do |xml_file|
21
+ hash = XmlSimple.xml_in(
22
+ xml_file,
23
+ ForceArray: false,
24
+ SuppressEmpty: ''
25
+ )
26
+ Array(normalize(hash)).each(&block)
27
+ end
28
+
29
+ self
30
+ end
31
+
32
+ def size
33
+ @files.size
34
+ end
35
+ end
metadata CHANGED
@@ -1,15 +1,197 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rotating_es_loader
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.0
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mike Kowdley
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-09-21 00:00:00.000000000 Z
12
- dependencies: []
11
+ date: 2019-09-25 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: aws-sdk
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 2.11.358
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 2.11.358
27
+ - !ruby/object:Gem::Dependency
28
+ name: aws-sdk-resources
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 2.11.258
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 2.11.258
41
+ - !ruby/object:Gem::Dependency
42
+ name: elasticsearch
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 5.0.5
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 5.0.5
55
+ - !ruby/object:Gem::Dependency
56
+ name: elasticsearch-extensions
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: 0.0.31
62
+ type: :runtime
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: 0.0.31
69
+ - !ruby/object:Gem::Dependency
70
+ name: faraday_middleware
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: 0.13.1
76
+ type: :runtime
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: 0.13.1
83
+ - !ruby/object:Gem::Dependency
84
+ name: faraday_middleware-aws-signers-v4
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.1.9
90
+ type: :runtime
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.1.9
97
+ - !ruby/object:Gem::Dependency
98
+ name: logger
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 1.4.1
104
+ type: :runtime
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 1.4.1
111
+ - !ruby/object:Gem::Dependency
112
+ name: memoist
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.16.0
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.16.0
125
+ - !ruby/object:Gem::Dependency
126
+ name: xml-simple
127
+ requirement: !ruby/object:Gem::Requirement
128
+ requirements:
129
+ - - "~>"
130
+ - !ruby/object:Gem::Version
131
+ version: 1.1.5
132
+ type: :runtime
133
+ prerelease: false
134
+ version_requirements: !ruby/object:Gem::Requirement
135
+ requirements:
136
+ - - "~>"
137
+ - !ruby/object:Gem::Version
138
+ version: 1.1.5
139
+ - !ruby/object:Gem::Dependency
140
+ name: rake
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 12.3.3
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 12.3.3
153
+ - !ruby/object:Gem::Dependency
154
+ name: rspec
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 3.8.0
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 3.8.0
167
+ - !ruby/object:Gem::Dependency
168
+ name: rubocop
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: 0.74.0
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: 0.74.0
181
+ - !ruby/object:Gem::Dependency
182
+ name: rubocop-rspec
183
+ requirement: !ruby/object:Gem::Requirement
184
+ requirements:
185
+ - - "~>"
186
+ - !ruby/object:Gem::Version
187
+ version: 1.35.0
188
+ type: :development
189
+ prerelease: false
190
+ version_requirements: !ruby/object:Gem::Requirement
191
+ requirements:
192
+ - - "~>"
193
+ - !ruby/object:Gem::Version
194
+ version: 1.35.0
13
195
  description: A base class for code that loads data into Elasticsearch
14
196
  email: mike@valuationmetricsinc.com
15
197
  executables: []
@@ -17,7 +199,11 @@ extensions: []
17
199
  extra_rdoc_files: []
18
200
  files:
19
201
  - lib/rotating_es_loader.rb
20
- homepage: https://rubygems.org/gems/rotating_es_loader
202
+ - lib/rotating_es_loader/array_datasource.rb
203
+ - lib/rotating_es_loader/es_client.rb
204
+ - lib/rotating_es_loader/sql_datasource.rb
205
+ - lib/rotating_es_loader/xml_files_datasource.rb
206
+ homepage: https://github.com/mikevm/rotating_es_loader
21
207
  licenses:
22
208
  - MIT
23
209
  metadata: {}