fluent-plugin-azurestorage-gen2 0.2.6 → 0.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: eb267ae2150f12ef390003715ba3bad30295ae0c
4
- data.tar.gz: f325ec26935b593e6449b91f31170da9526ef340
3
+ metadata.gz: f2ae08b739ba4ac4992427d2f87e8c5dc923d39b
4
+ data.tar.gz: 658ff0d35909232a1741d43bfa95bfd00cef3957
5
5
  SHA512:
6
- metadata.gz: 2e373e18b4351201fbffb83aec2086c8f2173e4f93b1c4e0042334d9f2697da74c0f8893aa0194bac4c1dc4a5eb1726a90123fcc9f7a2e8b136d4cd2a89fa05a
7
- data.tar.gz: 5386fa13b28894925065c9270bbed6fb1fc57b2b9486e6363c9587fa04a72ca0cdf996e9639690bd6e976301be73dda334d1a1cf72e7c8180055e46cd89f93b2
6
+ metadata.gz: 5a122fb0024c716cfbe9a6aa45f733ec938a28a0059bbb66f2d49ae885533ddb529247b757927abcc4784bfc9b78ab5f3593146cad5fbd562ac972cff15503bc
7
+ data.tar.gz: 99103e68b7c4995a7c495069eb302cfd006378c5477642bb03d76d5b9ea2462e3820a11ebc21a38c5383dc26ba5d5f3a4151f24ebf1788ddccf5f93eb3ef52e4
data/README.md CHANGED
@@ -140,6 +140,10 @@ If that setting is disabled, the worker won't fail on initialization (getting fi
140
140
 
141
141
  The defaultt `url_domain_suffix` is `.dfs.core.windows.net`, you can override this in case of private endpoints.
142
142
 
143
+ ### url_storage_resource
144
+
145
+ The url that is used during accessing a resource. Default value: `https://storage.azure.com/`
146
+
143
147
  ### azure_object_key_format
144
148
 
145
149
  The format of Azure Storage object keys. You can use several built-in variables:
@@ -148,6 +152,7 @@ The format of Azure Storage object keys. You can use several built-in variables:
148
152
  - %{time_slice}
149
153
  - %{index}
150
154
  - %{file_extension}
155
+ - %{upload_timestamp}
151
156
 
152
157
  to decide keys dynamically.
153
158
 
@@ -155,6 +160,7 @@ to decide keys dynamically.
155
160
  %{time_slice} is the time-slice in text that are formatted with *time_slice_format*.
156
161
  %{index} is the sequential number starts from 0, increments when multiple files are uploaded to Azure Storage in the same time slice.
157
162
  %{file_extention} is always "gz" for now.
163
+ %{upload_timestamp} is an upload timestamp in text that are formatted with *upload_timestamp_format*. Difference between time_slice and upload_timestamp is that the second one is the actual system timestamp (other one is from the metadata)
158
164
 
159
165
  The default format is "%{path}%{time_slice}_%{index}.%{file_extension}".
160
166
 
@@ -279,10 +285,30 @@ Format of the time used as the file name. Default is '%Y%m%d'. Use '%Y%m%d%H' to
279
285
 
280
286
  The time to wait old logs. Default is 10 minutes.
281
287
 
288
+ ### upload_timestamp_format
289
+
290
+ Format of the upload timestamp used as the file name. Can be used instead of index in case of `write_only` option is enabled. Default value is '%H%M%S%L'.
291
+
282
292
  ### utc
283
293
 
284
294
  Use UTC instead of local time.
285
295
 
296
+ ### write_only
297
+
298
+ If that option is enabled, HEAD calls are skipped during blob operations. (so make sure to set the chunk limit to 4MB in order to avoid HEAD operation because of the append operation needs the last position of the uploaded blobs).
299
+
300
+ ### proxy_url
301
+
302
+ Proxy URL for Azure endpoint.
303
+
304
+ ### proxy_username
305
+
306
+ Proxy username for Azure proxy endpoint (used only if `proxy_url` is filled)
307
+
308
+ ### proxy_password
309
+
310
+ Proxy password for Azure `proxy_username` (used only if `proxy_url` is filled)
311
+
286
312
  ## TODOs
287
313
 
288
314
  - add storage key support
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.2.6
1
+ 0.3.0
@@ -21,6 +21,7 @@ Gem::Specification.new do |gem|
21
21
  gem.add_runtime_dependency 'typhoeus', '~> 1.0', '>= 1.0.1'
22
22
  gem.add_runtime_dependency 'json', '~> 2.1', '>= 2.1.0'
23
23
  gem.add_runtime_dependency "yajl-ruby", '~> 1.4'
24
+ gem.add_runtime_dependency 'concurrent-ruby', '~> 1.1', '>= 1.1.5'
24
25
  gem.add_development_dependency 'rake', '~> 12.3', '>= 12.3.1'
25
26
  gem.add_development_dependency 'test-unit', '~> 3.3', '>= 3.3.3'
26
27
  gem.add_development_dependency 'test-unit-rr', '~> 1.0', '>= 1.0.5'
@@ -6,6 +6,7 @@ require 'tempfile'
6
6
  require 'time'
7
7
  require 'typhoeus'
8
8
  require 'fluent/plugin/output'
9
+ require 'concurrent'
9
10
  require 'zlib'
10
11
 
11
12
  module Fluent::Plugin
@@ -38,9 +39,16 @@ module Fluent::Plugin
38
39
  config_param :enable_retry, :bool, :default => false
39
40
  config_param :startup_fail_on_error, :bool, :default => true
40
41
  config_param :url_domain_suffix, :string, :default => '.dfs.core.windows.net'
42
+ config_param :url_storage_resource, :string, :default => 'https://storage.azure.com/'
41
43
  config_param :format, :string, :default => "out_file"
42
44
  config_param :time_slice_format, :string, :default => '%Y%m%d'
45
+ config_param :hex_random_length, :integer, default: 4
43
46
  config_param :command_parameter, :string, :default => nil
47
+ config_param :proxy_url, :string, :default => nil
48
+ config_param :proxy_username, :string, :default => nil
49
+ config_param :proxy_password, :string, :default => nil, :secret => true
50
+ config_param :write_only, :bool, :default => false
51
+ config_param :upload_timestamp_format, :string, :default => '%H%M%S%L'
44
52
 
45
53
  DEFAULT_FORMAT_TYPE = "out_file"
46
54
  ACCESS_TOKEN_API_VERSION = "2018-02-01"
@@ -74,16 +82,6 @@ module Fluent::Plugin
74
82
 
75
83
  @formatter = formatter_create
76
84
 
77
- if @localtime
78
- @path_slicer = Proc.new {|path|
79
- Time.now.strftime(path)
80
- }
81
- else
82
- @path_slicer = Proc.new {|path|
83
- Time.now.utc.strftime(path)
84
- }
85
- end
86
-
87
85
  if @azure_container.nil?
88
86
  raise Fluent::ConfigError, "azure_container is needed"
89
87
  end
@@ -97,7 +95,7 @@ module Fluent::Plugin
97
95
  else
98
96
  @final_file_extension = @compressor.ext
99
97
  end
100
-
98
+ @values_for_object_chunk = {}
101
99
  end
102
100
 
103
101
  def multi_workers_ready?
@@ -126,17 +124,16 @@ module Fluent::Plugin
126
124
  end
127
125
 
128
126
  def write(chunk)
129
- metadata = chunk.metadata
130
127
  if @store_as.nil? || @store_as == "none"
131
- generate_log_name(metadata, @current_index)
128
+ generate_log_name(chunk, @current_index)
132
129
  if @last_azure_storage_path != @azure_storage_path
133
130
  @current_index = 0
134
- generate_log_name(metadata, @current_index)
131
+ generate_log_name(chunk, @current_index)
135
132
  end
136
133
  raw_data = chunk.read
137
134
  unless raw_data.empty?
138
135
  log.debug "azurestorage_gen2: processing raw data", chunk_id: dump_unique_id_hex(chunk.unique_id)
139
- upload_blob(raw_data, metadata)
136
+ upload_blob(raw_data, chunk)
140
137
  end
141
138
  chunk.close rescue nil
142
139
  @last_azure_storage_path = @azure_storage_path
@@ -146,51 +143,76 @@ module Fluent::Plugin
146
143
  begin
147
144
  @compressor.compress(chunk, tmp)
148
145
  tmp.rewind
149
- generate_log_name(metadata, @current_index)
146
+ generate_log_name(chunk, @current_index)
150
147
  if @last_azure_storage_path != @azure_storage_path
151
148
  @current_index = 0
152
- generate_log_name(metadata, @current_index)
149
+ generate_log_name(chunk, @current_index)
153
150
  end
154
151
  log.debug "azurestorage_gen2: Start uploading temp file: #{tmp.path}"
155
152
  content = File.open(tmp.path, 'rb') { |file| file.read }
156
- upload_blob(content, metadata)
153
+ upload_blob(content, chunk)
157
154
  @last_azure_storage_path = @azure_storage_path
158
155
  ensure
159
156
  tmp.close(true) rescue nil
160
157
  end
158
+ @values_for_object_chunk.delete(chunk.unique_id)
161
159
  end
162
160
 
163
161
  end
164
162
 
165
163
  private
166
- def upload_blob(content, metadata)
164
+ def upload_blob(content, chunk)
167
165
  log.debug "azurestorage_gen2: Uploading blob: #{@azure_storage_path}"
168
- existing_content_length = get_blob_properties(@azure_storage_path)
169
- if existing_content_length == 0
166
+ if @write_only
170
167
  create_blob(@azure_storage_path)
168
+ append_blob(content, chunk, 0)
169
+ else
170
+ existing_content_length = get_blob_properties(@azure_storage_path)
171
+ if existing_content_length == 0
172
+ create_blob(@azure_storage_path)
173
+ end
174
+ append_blob(content, chunk, existing_content_length)
171
175
  end
172
- append_blob(content, metadata, existing_content_length)
173
176
  end
174
177
 
175
178
  private
176
- def generate_log_name(metadata, index)
179
+ def generate_log_name(chunk, index)
180
+ metadata = chunk.metadata
177
181
  time_slice = if metadata.timekey.nil?
178
182
  ''.freeze
179
183
  else
180
184
  Time.at(metadata.timekey).utc.strftime(@time_slice_format)
181
185
  end
182
- path = @path_slicer.call(@path)
183
- values_for_object_key = {
184
- "%{path}" => path,
185
- "%{time_slice}" => time_slice,
186
+ if @localtime
187
+ hms_slicer = Time.now.strftime("%H%M%S")
188
+ upload_timestamp = Time.now.strftime(@upload_timestamp_format)
189
+ else
190
+ hms_slicer = Time.now.utc.strftime("%H%M%S")
191
+ upload_timestamp = Time.now.utc.strftime(@upload_timestamp_format)
192
+ end
193
+
194
+ @values_for_object_chunk[chunk.unique_id] ||= {
195
+ "%{hex_random}" => hex_random(chunk),
196
+ }
197
+ values_for_object_key_pre = {
198
+ "%{path}" => @path,
186
199
  "%{index}" => index,
187
200
  "%{uuid_flush}" => uuid_random,
188
- "%{file_extension}" => @final_file_extension
201
+ "%{file_extension}" => @final_file_extension,
202
+ "%{upload_timestamp}" => upload_timestamp,
189
203
  }
190
- storage_path = @azure_object_key_format.gsub(%r(%{[^}]+}), values_for_object_key)
191
- extracted_path = extract_placeholders(storage_path, metadata)
192
- extracted_path = "/" + extracted_path unless extracted_path.start_with?("/")
193
- @azure_storage_path = extracted_path
204
+ values_for_object_key_post = {
205
+ "%{date_slice}" => time_slice,
206
+ "%{time_slice}" => time_slice,
207
+ "%{hms_slice}" => hms_slicer,
208
+ }.merge!(@values_for_object_chunk[chunk.unique_id])
209
+ storage_path = @azure_object_key_format.gsub(%r(%{[^}]+})) do |matched_key|
210
+ values_for_object_key_pre.fetch(matched_key, matched_key)
211
+ end
212
+ storage_path = extract_placeholders(storage_path, chunk)
213
+ storage_path = storage_path.gsub(%r(%{[^}]+}), values_for_object_key_post)
214
+ storage_path = "/" + storage_path unless storage_path.start_with?("/")
215
+ @azure_storage_path = storage_path
194
216
  end
195
217
 
196
218
  def setup_access_token
@@ -242,11 +264,16 @@ module Fluent::Plugin
242
264
  # https://docs.microsoft.com/en-us/azure/active-directory/managed-identities-azure-resources/tutorial-linux-vm-access-storage#get-an-access-token-and-use-it-to-call-azure-storage
243
265
  private
244
266
  def acquire_access_token_msi
245
- params = { :"api-version" => ACCESS_TOKEN_API_VERSION, :resource => "https://storage.azures.com/" }
267
+ params = { :"api-version" => ACCESS_TOKEN_API_VERSION, :resource => "#{@url_storage_resource}" }
246
268
  unless @azure_instance_msi.nil?
247
269
  params[:msi_res_id] = @azure_instance_msi
248
270
  end
249
- request = Typhoeus::Request.new("http://169.254.169.254/metadata/identity/oauth2/token", params: params, headers: { Metadata: "true"})
271
+ req_opts = {
272
+ :params => params,
273
+ :headers => { Metadata: "true" }
274
+ }
275
+ add_proxy_options(req_opts)
276
+ request = Typhoeus::Request.new("http://169.254.169.254/metadata/identity/oauth2/token", req_opts)
250
277
  request.on_complete do |response|
251
278
  if response.success?
252
279
  data = JSON.parse(response.body)
@@ -261,10 +288,16 @@ module Fluent::Plugin
261
288
 
262
289
  private
263
290
  def acquire_access_token_oauth_app
264
- params = { :"api-version" => ACCESS_TOKEN_API_VERSION, :resource => "https://storage.azure.com/"}
291
+ params = { :"api-version" => ACCESS_TOKEN_API_VERSION, :resource => "#{@url_storage_resource}"}
265
292
  headers = {:"Content-Type" => "application/x-www-form-urlencoded"}
266
- content = "grant_type=client_credentials&client_id=#{@azure_oauth_app_id}&client_secret=#{@azure_oauth_secret}&resource=https://storage.azure.com/"
267
- request = Typhoeus::Request.new("https://login.microsoftonline.com/#{@azure_oauth_tenant_id}/oauth2/token", :body => content, :headers => headers)
293
+ content = "grant_type=client_credentials&client_id=#{@azure_oauth_app_id}&client_secret=#{@azure_oauth_secret}&resource=#{@url_storage_resource}"
294
+ req_opts = {
295
+ :params => params,
296
+ :body => content,
297
+ :headers => headers
298
+ }
299
+ add_proxy_options(req_opts)
300
+ request = Typhoeus::Request.new("https://login.microsoftonline.com/#{@azure_oauth_tenant_id}/oauth2/token", req_opts)
268
301
  request.on_complete do |response|
269
302
  if response.success?
270
303
  data = JSON.parse(response.body)
@@ -279,7 +312,7 @@ module Fluent::Plugin
279
312
 
280
313
  private
281
314
  def acquire_access_token_by_az
282
- access_token=`az account get-access-token --resource https://storage.azure.com/ --query accessToken -o tsv`
315
+ access_token=`az account get-access-token --resource #{@url_storage_resource} --query accessToken -o tsv`
283
316
  log.debug "azurestorage_gen2: Token response: #{access_token}"
284
317
  @azure_access_token = access_token.chomp
285
318
  end
@@ -291,7 +324,13 @@ module Fluent::Plugin
291
324
  params = {:resource => "filesystem" }
292
325
  auth_header = create_auth_header("head", datestamp, "#{@azure_container}", headers, params)
293
326
  headers[:Authorization] = auth_header
294
- request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}", :method => :head, :params => params, :headers=> headers)
327
+ req_opts = {
328
+ :method => :head,
329
+ :params => params,
330
+ :headers => headers
331
+ }
332
+ add_proxy_options(req_opts)
333
+ request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}", req_opts)
295
334
  request.on_complete do |response|
296
335
  if response.success?
297
336
  log.info "azurestorage_gen2: Container '#{@azure_container}' exists."
@@ -318,7 +357,13 @@ module Fluent::Plugin
318
357
  params = {:resource => "filesystem" }
319
358
  auth_header = create_auth_header("put", datestamp, "#{@azure_container}", headers, params)
320
359
  headers[:Authorization] = auth_header
321
- request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}", :method => :put, :params => params, :headers=> headers)
360
+ req_opts = {
361
+ :method => :put,
362
+ :params => params,
363
+ :headers => headers
364
+ }
365
+ add_proxy_options(req_opts)
366
+ request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}", req_opts)
322
367
  request.on_complete do |response|
323
368
  if response.success?
324
369
  log.debug "azurestorage_gen2: Container '#{@azure_container}' created, response code: #{response.code}"
@@ -338,7 +383,13 @@ module Fluent::Plugin
338
383
  params = {:resource => "file", :recursive => "false"}
339
384
  auth_header = create_auth_header("put", datestamp, "#{@azure_container}#{blob_path}", headers, params)
340
385
  headers[:Authorization] = auth_header
341
- request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}#{blob_path}", :method => :put, :params => params, :headers=> headers)
386
+ req_opts = {
387
+ :method => :put,
388
+ :params => params,
389
+ :headers => headers
390
+ }
391
+ add_proxy_options(req_opts)
392
+ request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}#{blob_path}", req_opts)
342
393
  request.on_complete do |response|
343
394
  if response.success?
344
395
  log.debug "azurestorage_gen2: Blob '#{blob_path}' has been created, response code: #{response.code}"
@@ -361,7 +412,14 @@ module Fluent::Plugin
361
412
  params = {:action => "append", :position => "#{position}"}
362
413
  auth_header = create_auth_header("patch", datestamp, "#{@azure_container}#{blob_path}", headers, params)
363
414
  headers[:Authorization] = auth_header
364
- request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}#{blob_path}", :method => :patch, :headers=> headers, :params => params, :body => content)
415
+ req_opts = {
416
+ :method => :patch,
417
+ :params => params,
418
+ :headers => headers,
419
+ :body => content
420
+ }
421
+ add_proxy_options(req_opts)
422
+ request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}#{blob_path}", req_opts)
365
423
  request.on_complete do |response|
366
424
  if response.success?
367
425
  log.debug "azurestorage_gen2: Blob '#{blob_path}' has been appended, response code: #{response.code}"
@@ -386,7 +444,13 @@ module Fluent::Plugin
386
444
  params = {:action => "flush", :position => "#{position}"}
387
445
  auth_header = create_auth_header("patch", datestamp, "#{@azure_container}#{blob_path}",headers, params)
388
446
  headers[:Authorization] = auth_header
389
- request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}#{blob_path}", :method => :patch, :params => params, :headers=> headers)
447
+ req_opts = {
448
+ :method => :patch,
449
+ :params => params,
450
+ :headers => headers
451
+ }
452
+ add_proxy_options(req_opts)
453
+ request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}#{blob_path}", req_opts)
390
454
  request.on_complete do |response|
391
455
  if response.success?
392
456
  log.debug "azurestorage_gen2: Blob '#{blob_path}' flush was successful, response code: #{response.code}"
@@ -407,7 +471,13 @@ module Fluent::Plugin
407
471
  content_length = -1
408
472
  auth_header = create_auth_header("head", datestamp, "#{@azure_container}#{blob_path}", headers, params)
409
473
  headers[:Authorization] = auth_header
410
- request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}#{blob_path}", :method => :head, :params => params, :headers=> headers)
474
+ req_opts = {
475
+ :method => :head,
476
+ :params => params,
477
+ :headers => headers
478
+ }
479
+ add_proxy_options(req_opts)
480
+ request = Typhoeus::Request.new("https://#{azure_storage_account}#{@url_domain_suffix}/#{@azure_container}#{blob_path}", req_opts)
411
481
  request.on_complete do |response|
412
482
  if response.success?
413
483
  log.debug "azurestorage_gen2: Get blob properties for '#{blob_path}', response headers: #{response.headers}"
@@ -426,7 +496,7 @@ module Fluent::Plugin
426
496
  end
427
497
 
428
498
  private
429
- def append_blob(content, metadata, existing_content_length)
499
+ def append_blob(content, chunk, existing_content_length)
430
500
  position = 0
431
501
  log.debug "azurestorage_gen2: append_blob.start: Content size: #{content.length}"
432
502
  loop do
@@ -475,6 +545,16 @@ module Fluent::Plugin
475
545
  "SharedKey #{@azure_storage_account}:#{signed(method, datestamp, resource, headers, params)}"
476
546
  end
477
547
  end
548
+
549
+ private
550
+ def add_proxy_options(req_opts = {})
551
+ unless @proxy_url.nil?
552
+ req_opts[:proxy] = @proxy_url
553
+ unless @proxy_username.nil? || @proxy_password.nil?
554
+ req_opts[:proxyuserpwd] = "#{@proxy_username}:#{@proxy_password}"
555
+ end
556
+ end
557
+ end
478
558
 
479
559
  private
480
560
  def signed(method, datestamp, resource, headers, params)
@@ -544,6 +624,12 @@ module Fluent::Plugin
544
624
  require 'uuidtools'
545
625
  ::UUIDTools::UUID.random_create.to_s
546
626
  end
627
+
628
+ def hex_random(chunk)
629
+ unique_hex = Fluent::UniqueId.hex(chunk.unique_id)
630
+ unique_hex.reverse!
631
+ unique_hex[0...@hex_random_length]
632
+ end
547
633
 
548
634
  def timekey_to_timeformat(timekey)
549
635
  case timekey
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-azurestorage-gen2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.6
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Oliver Szabo
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-02-19 00:00:00.000000000 Z
11
+ date: 2020-06-17 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: fluentd
@@ -104,6 +104,26 @@ dependencies:
104
104
  - - "~>"
105
105
  - !ruby/object:Gem::Version
106
106
  version: '1.4'
107
+ - !ruby/object:Gem::Dependency
108
+ name: concurrent-ruby
109
+ requirement: !ruby/object:Gem::Requirement
110
+ requirements:
111
+ - - "~>"
112
+ - !ruby/object:Gem::Version
113
+ version: '1.1'
114
+ - - ">="
115
+ - !ruby/object:Gem::Version
116
+ version: 1.1.5
117
+ type: :runtime
118
+ prerelease: false
119
+ version_requirements: !ruby/object:Gem::Requirement
120
+ requirements:
121
+ - - "~>"
122
+ - !ruby/object:Gem::Version
123
+ version: '1.1'
124
+ - - ">="
125
+ - !ruby/object:Gem::Version
126
+ version: 1.1.5
107
127
  - !ruby/object:Gem::Dependency
108
128
  name: rake
109
129
  requirement: !ruby/object:Gem::Requirement