fluent-plugin-s3-file-inclusion 1.4.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +13 -0
- data/.travis.yml +24 -0
- data/AUTHORS +2 -0
- data/ChangeLog +375 -0
- data/Gemfile +3 -0
- data/Gemfile.v0.12 +6 -0
- data/README.md +845 -0
- data/Rakefile +14 -0
- data/VERSION +1 -0
- data/appveyor.yml +25 -0
- data/fluent-plugin-s3.gemspec +26 -0
- data/lib/fluent/log-ext.rb +12 -0
- data/lib/fluent/plugin/in_s3.rb +419 -0
- data/lib/fluent/plugin/out_s3.rb +642 -0
- data/lib/fluent/plugin/s3_compressor_gzip_command.rb +52 -0
- data/lib/fluent/plugin/s3_compressor_lzma2.rb +35 -0
- data/lib/fluent/plugin/s3_compressor_lzo.rb +35 -0
- data/lib/fluent/plugin/s3_extractor_gzip_command.rb +46 -0
- data/lib/fluent/plugin/s3_extractor_lzma2.rb +40 -0
- data/lib/fluent/plugin/s3_extractor_lzo.rb +40 -0
- data/test/test_in_s3.rb +513 -0
- data/test/test_out_s3.rb +713 -0
- metadata +169 -0
@@ -0,0 +1,642 @@
|
|
1
|
+
require 'fluent/plugin/output'
|
2
|
+
require 'fluent/log-ext'
|
3
|
+
require 'fluent/timezone'
|
4
|
+
require 'aws-sdk-s3'
|
5
|
+
require 'zlib'
|
6
|
+
require 'time'
|
7
|
+
require 'tempfile'
|
8
|
+
require 'securerandom'
|
9
|
+
|
10
|
+
module Fluent::Plugin
|
11
|
+
class S3Output < Output
|
12
|
+
Fluent::Plugin.register_output('s3', self)
|
13
|
+
|
14
|
+
helpers :compat_parameters, :formatter, :inject
|
15
|
+
|
16
|
+
def initialize
|
17
|
+
super
|
18
|
+
@compressor = nil
|
19
|
+
@uuid_flush_enabled = false
|
20
|
+
end
|
21
|
+
|
22
|
+
desc "Path prefix of the files on S3"
|
23
|
+
config_param :path, :string, default: ""
|
24
|
+
desc "The Server-side encryption algorithm used when storing this object in S3 (AES256, aws:kms)"
|
25
|
+
config_param :use_server_side_encryption, :string, default: nil
|
26
|
+
desc "Use aws-sdk-ruby bundled cert"
|
27
|
+
config_param :use_bundled_cert, :bool, default: false
|
28
|
+
desc "AWS access key id"
|
29
|
+
config_param :aws_key_id, :string, default: nil, secret: true
|
30
|
+
desc "AWS secret key."
|
31
|
+
config_param :aws_sec_key, :string, default: nil, secret: true
|
32
|
+
config_section :assume_role_credentials, multi: false do
|
33
|
+
desc "The Amazon Resource Name (ARN) of the role to assume"
|
34
|
+
config_param :role_arn, :string, secret: true
|
35
|
+
desc "An identifier for the assumed role session"
|
36
|
+
config_param :role_session_name, :string
|
37
|
+
desc "An IAM policy in JSON format"
|
38
|
+
config_param :policy, :string, default: nil
|
39
|
+
desc "The duration, in seconds, of the role session (900-3600)"
|
40
|
+
config_param :duration_seconds, :integer, default: nil
|
41
|
+
desc "A unique identifier that is used by third parties when assuming roles in their customers' accounts."
|
42
|
+
config_param :external_id, :string, default: nil, secret: true
|
43
|
+
desc "The region of the STS endpoint to use."
|
44
|
+
config_param :sts_region, :string, default: nil
|
45
|
+
desc "A http proxy url for requests to aws sts service"
|
46
|
+
config_param :sts_http_proxy, :string, default: nil, secret: true
|
47
|
+
desc "A url for a regional sts api endpoint, the default is global"
|
48
|
+
config_param :sts_endpoint_url, :string, default: nil
|
49
|
+
end
|
50
|
+
# See the following link for additional params that could be added:
|
51
|
+
# https://docs.aws.amazon.com/sdk-for-ruby/v3/api/Aws/STS/Client.html#assume_role_with_web_identity-instance_method
|
52
|
+
config_section :web_identity_credentials, multi: false do
|
53
|
+
desc "The Amazon Resource Name (ARN) of the role to assume"
|
54
|
+
config_param :role_arn, :string # required
|
55
|
+
desc "An identifier for the assumed role session"
|
56
|
+
config_param :role_session_name, :string #required
|
57
|
+
desc "The absolute path to the file on disk containing the OIDC token"
|
58
|
+
config_param :web_identity_token_file, :string #required
|
59
|
+
desc "An IAM policy in JSON format"
|
60
|
+
config_param :policy, :string, default: nil
|
61
|
+
desc "The duration, in seconds, of the role session (900-43200)"
|
62
|
+
config_param :duration_seconds, :integer, default: nil
|
63
|
+
desc "The region of the STS endpoint to use."
|
64
|
+
config_param :sts_region, :string, default: nil
|
65
|
+
end
|
66
|
+
config_section :instance_profile_credentials, multi: false do
|
67
|
+
desc "Number of times to retry when retrieving credentials"
|
68
|
+
config_param :retries, :integer, default: nil
|
69
|
+
desc "IP address (default:169.254.169.254)"
|
70
|
+
config_param :ip_address, :string, default: nil
|
71
|
+
desc "Port number (default:80)"
|
72
|
+
config_param :port, :integer, default: nil
|
73
|
+
desc "Number of seconds to wait for the connection to open"
|
74
|
+
config_param :http_open_timeout, :float, default: nil
|
75
|
+
desc "Number of seconds to wait for one block to be read"
|
76
|
+
config_param :http_read_timeout, :float, default: nil
|
77
|
+
# config_param :delay, :integer or :proc, :default => nil
|
78
|
+
# config_param :http_degub_output, :io, :default => nil
|
79
|
+
end
|
80
|
+
config_section :shared_credentials, multi: false do
|
81
|
+
desc "Path to the shared file. (default: $HOME/.aws/credentials)"
|
82
|
+
config_param :path, :string, default: nil
|
83
|
+
desc "Profile name. Default to 'default' or ENV['AWS_PROFILE']"
|
84
|
+
config_param :profile_name, :string, default: nil
|
85
|
+
end
|
86
|
+
desc "The number of attempts to load instance profile credentials from the EC2 metadata service using IAM role"
|
87
|
+
config_param :aws_iam_retries, :integer, default: nil, deprecated: "Use 'instance_profile_credentials' instead"
|
88
|
+
desc "S3 bucket name"
|
89
|
+
config_param :s3_bucket, :string
|
90
|
+
desc "S3 region name"
|
91
|
+
config_param :s3_region, :string, default: ENV["AWS_REGION"] || "us-east-1"
|
92
|
+
desc "Use 's3_region' instead"
|
93
|
+
config_param :s3_endpoint, :string, default: nil
|
94
|
+
desc "If true, S3 Transfer Acceleration will be enabled for uploads. IMPORTANT: You must first enable this feature on your destination S3 bucket"
|
95
|
+
config_param :enable_transfer_acceleration, :bool, default: false
|
96
|
+
desc "If true, use Amazon S3 Dual-Stack Endpoints. Will make it possible to use either IPv4 or IPv6 when connecting to S3."
|
97
|
+
config_param :enable_dual_stack, :bool, default: false
|
98
|
+
desc "If false, the certificate of endpoint will not be verified"
|
99
|
+
config_param :ssl_verify_peer, :bool, :default => true
|
100
|
+
desc "The format of S3 object keys"
|
101
|
+
config_param :s3_object_key_format, :string, default: "%{path}%{time_slice}_%{index}.%{file_extension}"
|
102
|
+
desc "If true, the bucket name is always left in the request URI and never moved to the host as a sub-domain"
|
103
|
+
config_param :force_path_style, :bool, default: false, deprecated: "S3 will drop path style API in 2020: See https://aws.amazon.com/blogs/aws/amazon-s3-path-deprecation-plan-the-rest-of-the-story/"
|
104
|
+
desc "Archive format on S3"
|
105
|
+
config_param :store_as, :string, default: "gzip"
|
106
|
+
desc "Create S3 bucket if it does not exists"
|
107
|
+
config_param :auto_create_bucket, :bool, default: true
|
108
|
+
desc "Check AWS key on start"
|
109
|
+
config_param :check_apikey_on_start, :bool, default: true
|
110
|
+
desc "URI of proxy environment"
|
111
|
+
config_param :proxy_uri, :string, default: nil
|
112
|
+
desc "Use S3 reduced redundancy storage for 33% cheaper pricing. Deprecated. Use storage_class instead"
|
113
|
+
config_param :reduced_redundancy, :bool, default: false, deprecated: "Use storage_class parameter instead."
|
114
|
+
desc "The type of storage to use for the object(STANDARD,REDUCED_REDUNDANCY,STANDARD_IA)"
|
115
|
+
config_param :storage_class, :string, default: "STANDARD"
|
116
|
+
desc "Permission for the object in S3"
|
117
|
+
config_param :acl, :string, default: nil
|
118
|
+
desc "Allows grantee READ, READ_ACP, and WRITE_ACP permissions on the object"
|
119
|
+
config_param :grant_full_control, :string, default: nil
|
120
|
+
desc "Allows grantee to read the object data and its metadata"
|
121
|
+
config_param :grant_read, :string, default: nil
|
122
|
+
desc "Allows grantee to read the object ACL"
|
123
|
+
config_param :grant_read_acp, :string, default: nil
|
124
|
+
desc "Allows grantee to write the ACL for the applicable object"
|
125
|
+
config_param :grant_write_acp, :string, default: nil
|
126
|
+
desc "The length of `%{hex_random}` placeholder(4-16)"
|
127
|
+
config_param :hex_random_length, :integer, default: 4
|
128
|
+
desc "`sprintf` format for `%{index}`"
|
129
|
+
config_param :index_format, :string, default: "%d"
|
130
|
+
desc "Overwrite already existing path"
|
131
|
+
config_param :overwrite, :bool, default: false
|
132
|
+
desc "Check bucket if exists or not"
|
133
|
+
config_param :check_bucket, :bool, default: true
|
134
|
+
desc "Check object before creation"
|
135
|
+
config_param :check_object, :bool, default: true
|
136
|
+
desc "Specifies the AWS KMS key ID to use for object encryption"
|
137
|
+
config_param :ssekms_key_id, :string, default: nil, secret: true
|
138
|
+
desc "Specifies the algorithm to use to when encrypting the object"
|
139
|
+
config_param :sse_customer_algorithm, :string, default: nil
|
140
|
+
desc "Specifies the customer-provided encryption key for Amazon S3 to use in encrypting data"
|
141
|
+
config_param :sse_customer_key, :string, default: nil, secret: true
|
142
|
+
desc "Specifies the 128-bit MD5 digest of the encryption key according to RFC 1321"
|
143
|
+
config_param :sse_customer_key_md5, :string, default: nil, secret: true
|
144
|
+
desc "AWS SDK uses MD5 for API request/response by default"
|
145
|
+
config_param :compute_checksums, :bool, default: nil # use nil to follow SDK default configuration
|
146
|
+
desc "Signature version for API Request (s3,v4)"
|
147
|
+
config_param :signature_version, :string, default: nil # use nil to follow SDK default configuration
|
148
|
+
desc "Given a threshold to treat events as delay, output warning logs if delayed events were put into s3"
|
149
|
+
config_param :warn_for_delay, :time, default: nil
|
150
|
+
desc "Arbitrary S3 metadata headers to set for the object"
|
151
|
+
config_param :s3_metadata, :hash, default: nil
|
152
|
+
config_section :bucket_lifecycle_rule, param_name: :bucket_lifecycle_rules, multi: true do
|
153
|
+
desc "A unique ID for this rule"
|
154
|
+
config_param :id, :string
|
155
|
+
desc "Objects whose keys begin with this prefix will be affected by the rule. If not specified all objects of the bucket will be affected"
|
156
|
+
config_param :prefix, :string, default: ''
|
157
|
+
desc "The number of days before the object will expire"
|
158
|
+
config_param :expiration_days, :integer
|
159
|
+
end
|
160
|
+
|
161
|
+
DEFAULT_FORMAT_TYPE = "out_file"
|
162
|
+
|
163
|
+
config_section :format do
|
164
|
+
config_set_default :@type, DEFAULT_FORMAT_TYPE
|
165
|
+
end
|
166
|
+
|
167
|
+
config_section :buffer do
|
168
|
+
config_set_default :chunk_keys, ['time']
|
169
|
+
config_set_default :timekey, (60 * 60 * 24)
|
170
|
+
end
|
171
|
+
|
172
|
+
attr_reader :bucket
|
173
|
+
|
174
|
+
MAX_HEX_RANDOM_LENGTH = 16
|
175
|
+
|
176
|
+
def configure(conf)
|
177
|
+
compat_parameters_convert(conf, :buffer, :formatter, :inject)
|
178
|
+
|
179
|
+
super
|
180
|
+
|
181
|
+
Aws.use_bundled_cert! if @use_bundled_cert
|
182
|
+
|
183
|
+
if @s3_endpoint && (@s3_endpoint.end_with?('amazonaws.com') && !['fips', 'gov'].any? { |e| @s3_endpoint.include?(e) })
|
184
|
+
raise Fluent::ConfigError, "s3_endpoint parameter is not supported for S3, use s3_region instead. This parameter is for S3 compatible services"
|
185
|
+
end
|
186
|
+
|
187
|
+
begin
|
188
|
+
buffer_type = @buffer_config[:@type]
|
189
|
+
@compressor = COMPRESSOR_REGISTRY.lookup(@store_as).new(buffer_type: buffer_type, log: log)
|
190
|
+
rescue => e
|
191
|
+
log.warn "'#{@store_as}' not supported. Use 'text' instead: error = #{e.message}"
|
192
|
+
@compressor = TextCompressor.new
|
193
|
+
end
|
194
|
+
@compressor.configure(conf)
|
195
|
+
|
196
|
+
@formatter = formatter_create
|
197
|
+
|
198
|
+
if @hex_random_length > MAX_HEX_RANDOM_LENGTH
|
199
|
+
raise Fluent::ConfigError, "hex_random_length parameter must be less than or equal to #{MAX_HEX_RANDOM_LENGTH}"
|
200
|
+
end
|
201
|
+
|
202
|
+
unless @index_format =~ /^%(0\d*)?[dxX]$/
|
203
|
+
raise Fluent::ConfigError, "index_format parameter should follow `%[flags][width]type`. `0` is the only supported flag, and is mandatory if width is specified. `d`, `x` and `X` are supported types"
|
204
|
+
end
|
205
|
+
|
206
|
+
if @reduced_redundancy
|
207
|
+
log.warn "reduced_redundancy parameter is deprecated. Use storage_class parameter instead"
|
208
|
+
@storage_class = "REDUCED_REDUNDANCY"
|
209
|
+
end
|
210
|
+
|
211
|
+
@s3_object_key_format = process_s3_object_key_format
|
212
|
+
if !@check_object
|
213
|
+
if conf.has_key?('s3_object_key_format')
|
214
|
+
log.warn "Set 'check_object false' and s3_object_key_format is specified. Check s3_object_key_format is unique in each write. If not, existing file will be overwritten."
|
215
|
+
else
|
216
|
+
log.warn "Set 'check_object false' and s3_object_key_format is not specified. Use '%{path}/%{time_slice}_%{hms_slice}.%{file_extension}' for s3_object_key_format"
|
217
|
+
@s3_object_key_format = "%{path}/%{time_slice}_%{hms_slice}.%{file_extension}"
|
218
|
+
end
|
219
|
+
end
|
220
|
+
|
221
|
+
check_s3_path_safety(conf)
|
222
|
+
|
223
|
+
# For backward compatibility
|
224
|
+
# TODO: Remove time_slice_format when end of support compat_parameters
|
225
|
+
@configured_time_slice_format = conf['time_slice_format']
|
226
|
+
@values_for_s3_object_chunk = {}
|
227
|
+
@time_slice_with_tz = Fluent::Timezone.formatter(@timekey_zone, @configured_time_slice_format || timekey_to_timeformat(@buffer_config['timekey']))
|
228
|
+
end
|
229
|
+
|
230
|
+
def multi_workers_ready?
|
231
|
+
true
|
232
|
+
end
|
233
|
+
|
234
|
+
def start
|
235
|
+
options = setup_credentials
|
236
|
+
options[:region] = @s3_region if @s3_region
|
237
|
+
options[:endpoint] = @s3_endpoint if @s3_endpoint
|
238
|
+
options[:use_accelerate_endpoint] = @enable_transfer_acceleration
|
239
|
+
options[:use_dualstack_endpoint] = @enable_dual_stack
|
240
|
+
options[:http_proxy] = @proxy_uri if @proxy_uri
|
241
|
+
options[:force_path_style] = @force_path_style
|
242
|
+
options[:compute_checksums] = @compute_checksums unless @compute_checksums.nil?
|
243
|
+
options[:signature_version] = @signature_version unless @signature_version.nil?
|
244
|
+
options[:ssl_verify_peer] = @ssl_verify_peer
|
245
|
+
log.on_trace do
|
246
|
+
options[:http_wire_trace] = true
|
247
|
+
options[:logger] = log
|
248
|
+
end
|
249
|
+
|
250
|
+
s3_client = Aws::S3::Client.new(options)
|
251
|
+
@s3 = Aws::S3::Resource.new(client: s3_client)
|
252
|
+
@bucket = @s3.bucket(@s3_bucket)
|
253
|
+
|
254
|
+
check_apikeys if @check_apikey_on_start
|
255
|
+
ensure_bucket if @check_bucket
|
256
|
+
ensure_bucket_lifecycle
|
257
|
+
|
258
|
+
super
|
259
|
+
end
|
260
|
+
|
261
|
+
def format(tag, time, record)
|
262
|
+
r = inject_values_to_record(tag, time, record)
|
263
|
+
@formatter.format(tag, time, r)
|
264
|
+
end
|
265
|
+
|
266
|
+
def write(chunk)
|
267
|
+
i = 0
|
268
|
+
metadata = chunk.metadata
|
269
|
+
previous_path = nil
|
270
|
+
time_slice = if metadata.timekey.nil?
|
271
|
+
''.freeze
|
272
|
+
else
|
273
|
+
@time_slice_with_tz.call(metadata.timekey)
|
274
|
+
end
|
275
|
+
|
276
|
+
if @check_object
|
277
|
+
begin
|
278
|
+
@values_for_s3_object_chunk[chunk.unique_id] ||= {
|
279
|
+
"%{hex_random}" => hex_random(chunk),
|
280
|
+
}
|
281
|
+
values_for_s3_object_key_pre = {
|
282
|
+
"%{path}" => @path,
|
283
|
+
"%{file_extension}" => @compressor.ext,
|
284
|
+
}
|
285
|
+
values_for_s3_object_key_post = {
|
286
|
+
"%{time_slice}" => time_slice,
|
287
|
+
"%{index}" => sprintf(@index_format,i),
|
288
|
+
}.merge!(@values_for_s3_object_chunk[chunk.unique_id])
|
289
|
+
values_for_s3_object_key_post["%{uuid_flush}".freeze] = uuid_random if @uuid_flush_enabled
|
290
|
+
|
291
|
+
s3path = @s3_object_key_format.gsub(%r(%{[^}]+})) do |matched_key|
|
292
|
+
values_for_s3_object_key_pre.fetch(matched_key, matched_key)
|
293
|
+
end
|
294
|
+
s3path = extract_placeholders(s3path, chunk)
|
295
|
+
s3path = s3path.gsub(%r(%{[^}]+}), values_for_s3_object_key_post)
|
296
|
+
if (i > 0) && (s3path == previous_path)
|
297
|
+
if @overwrite
|
298
|
+
log.warn "#{s3path} already exists, but will overwrite"
|
299
|
+
break
|
300
|
+
else
|
301
|
+
raise "duplicated path is generated. use %{index} in s3_object_key_format: path = #{s3path}"
|
302
|
+
end
|
303
|
+
end
|
304
|
+
|
305
|
+
i += 1
|
306
|
+
previous_path = s3path
|
307
|
+
end while @bucket.object(s3path).exists?
|
308
|
+
else
|
309
|
+
if @localtime
|
310
|
+
hms_slicer = Time.now.strftime("%H%M%S")
|
311
|
+
else
|
312
|
+
hms_slicer = Time.now.utc.strftime("%H%M%S")
|
313
|
+
end
|
314
|
+
|
315
|
+
@values_for_s3_object_chunk[chunk.unique_id] ||= {
|
316
|
+
"%{hex_random}" => hex_random(chunk),
|
317
|
+
}
|
318
|
+
values_for_s3_object_key_pre = {
|
319
|
+
"%{path}" => @path,
|
320
|
+
"%{file_extension}" => @compressor.ext,
|
321
|
+
}
|
322
|
+
values_for_s3_object_key_post = {
|
323
|
+
"%{date_slice}" => time_slice, # For backward compatibility
|
324
|
+
"%{time_slice}" => time_slice,
|
325
|
+
"%{hms_slice}" => hms_slicer,
|
326
|
+
}.merge!(@values_for_s3_object_chunk[chunk.unique_id])
|
327
|
+
values_for_s3_object_key_post["%{uuid_flush}".freeze] = uuid_random if @uuid_flush_enabled
|
328
|
+
|
329
|
+
s3path = @s3_object_key_format.gsub(%r(%{[^}]+})) do |matched_key|
|
330
|
+
values_for_s3_object_key_pre.fetch(matched_key, matched_key)
|
331
|
+
end
|
332
|
+
s3path = extract_placeholders(s3path, chunk)
|
333
|
+
s3path = s3path.gsub(%r(%{[^}]+}), values_for_s3_object_key_post)
|
334
|
+
end
|
335
|
+
|
336
|
+
tmp = Tempfile.new("s3-")
|
337
|
+
tmp.binmode
|
338
|
+
begin
|
339
|
+
@compressor.compress(chunk, tmp)
|
340
|
+
tmp.rewind
|
341
|
+
log.debug "out_s3: write chunk #{dump_unique_id_hex(chunk.unique_id)} with metadata #{chunk.metadata} to s3://#{@s3_bucket}/#{s3path}"
|
342
|
+
|
343
|
+
put_options = {
|
344
|
+
body: tmp,
|
345
|
+
content_type: @compressor.content_type,
|
346
|
+
storage_class: @storage_class,
|
347
|
+
}
|
348
|
+
put_options[:server_side_encryption] = @use_server_side_encryption if @use_server_side_encryption
|
349
|
+
put_options[:ssekms_key_id] = @ssekms_key_id if @ssekms_key_id
|
350
|
+
put_options[:sse_customer_algorithm] = @sse_customer_algorithm if @sse_customer_algorithm
|
351
|
+
put_options[:sse_customer_key] = @sse_customer_key if @sse_customer_key
|
352
|
+
put_options[:sse_customer_key_md5] = @sse_customer_key_md5 if @sse_customer_key_md5
|
353
|
+
put_options[:acl] = @acl if @acl
|
354
|
+
put_options[:grant_full_control] = @grant_full_control if @grant_full_control
|
355
|
+
put_options[:grant_read] = @grant_read if @grant_read
|
356
|
+
put_options[:grant_read_acp] = @grant_read_acp if @grant_read_acp
|
357
|
+
put_options[:grant_write_acp] = @grant_write_acp if @grant_write_acp
|
358
|
+
|
359
|
+
if @s3_metadata
|
360
|
+
put_options[:metadata] = {}
|
361
|
+
@s3_metadata.each do |k, v|
|
362
|
+
put_options[:metadata][k] = extract_placeholders(v, chunk).gsub(%r(%{[^}]+}), {"%{index}" => sprintf(@index_format, i - 1)})
|
363
|
+
end
|
364
|
+
end
|
365
|
+
@bucket.object(s3path).put(put_options)
|
366
|
+
|
367
|
+
@values_for_s3_object_chunk.delete(chunk.unique_id)
|
368
|
+
|
369
|
+
if @warn_for_delay
|
370
|
+
if Time.at(chunk.metadata.timekey) < Time.now - @warn_for_delay
|
371
|
+
log.warn "out_s3: delayed events were put to s3://#{@s3_bucket}/#{s3path}"
|
372
|
+
end
|
373
|
+
end
|
374
|
+
ensure
|
375
|
+
tmp.close(true) rescue nil
|
376
|
+
end
|
377
|
+
end
|
378
|
+
|
379
|
+
private
|
380
|
+
|
381
|
+
def hex_random(chunk)
|
382
|
+
unique_hex = Fluent::UniqueId.hex(chunk.unique_id)
|
383
|
+
unique_hex.reverse! # unique_hex is like (time_sec, time_usec, rand) => reversing gives more randomness
|
384
|
+
unique_hex[0...@hex_random_length]
|
385
|
+
end
|
386
|
+
|
387
|
+
def uuid_random
|
388
|
+
SecureRandom.uuid
|
389
|
+
end
|
390
|
+
|
391
|
+
# This is stolen from Fluentd
|
392
|
+
def timekey_to_timeformat(timekey)
|
393
|
+
case timekey
|
394
|
+
when nil then ''
|
395
|
+
when 0...60 then '%Y%m%d%H%M%S' # 60 exclusive
|
396
|
+
when 60...3600 then '%Y%m%d%H%M'
|
397
|
+
when 3600...86400 then '%Y%m%d%H'
|
398
|
+
else '%Y%m%d'
|
399
|
+
end
|
400
|
+
end
|
401
|
+
|
402
|
+
def ensure_bucket
|
403
|
+
if !@bucket.exists?
|
404
|
+
if @auto_create_bucket
|
405
|
+
log.info "Creating bucket #{@s3_bucket} on #{@s3_endpoint}"
|
406
|
+
@s3.create_bucket(bucket: @s3_bucket)
|
407
|
+
else
|
408
|
+
raise "The specified bucket does not exist: bucket = #{@s3_bucket}"
|
409
|
+
end
|
410
|
+
end
|
411
|
+
end
|
412
|
+
|
413
|
+
def ensure_bucket_lifecycle
|
414
|
+
unless @bucket_lifecycle_rules.empty?
|
415
|
+
old_rules = get_bucket_lifecycle_rules
|
416
|
+
new_rules = @bucket_lifecycle_rules.sort_by { |rule| rule.id }.map do |rule|
|
417
|
+
{ id: rule.id, expiration: { days: rule.expiration_days }, prefix: rule.prefix, status: "Enabled" }
|
418
|
+
end
|
419
|
+
|
420
|
+
unless old_rules == new_rules
|
421
|
+
log.info "Configuring bucket lifecycle rules for #{@s3_bucket} on #{@s3_endpoint}"
|
422
|
+
@bucket.lifecycle_configuration.put({ lifecycle_configuration: { rules: new_rules } })
|
423
|
+
end
|
424
|
+
end
|
425
|
+
end
|
426
|
+
|
427
|
+
def get_bucket_lifecycle_rules
|
428
|
+
begin
|
429
|
+
@bucket.lifecycle_configuration.rules.sort_by { |rule| rule[:id] }.map do |rule|
|
430
|
+
{ id: rule[:id], expiration: { days: rule[:expiration][:days] }, prefix: rule[:prefix], status: rule[:status] }
|
431
|
+
end
|
432
|
+
rescue Aws::S3::Errors::NoSuchLifecycleConfiguration
|
433
|
+
[]
|
434
|
+
end
|
435
|
+
end
|
436
|
+
|
437
|
+
def process_s3_object_key_format
|
438
|
+
%W(%{uuid} %{uuid:random} %{uuid:hostname} %{uuid:timestamp}).each { |ph|
|
439
|
+
if @s3_object_key_format.include?(ph)
|
440
|
+
raise Fluent::ConfigError, %!#{ph} placeholder in s3_object_key_format is removed!
|
441
|
+
end
|
442
|
+
}
|
443
|
+
|
444
|
+
if @s3_object_key_format.include?('%{uuid_flush}')
|
445
|
+
@uuid_flush_enabled = true
|
446
|
+
end
|
447
|
+
|
448
|
+
@s3_object_key_format.gsub('%{hostname}') { |expr|
|
449
|
+
log.warn "%{hostname} will be removed in the future. Use \"\#{Socket.gethostname}\" instead"
|
450
|
+
Socket.gethostname
|
451
|
+
}
|
452
|
+
end
|
453
|
+
|
454
|
+
def check_s3_path_safety(conf)
|
455
|
+
unless conf.has_key?('s3_object_key_format')
|
456
|
+
log.warn "The default value of s3_object_key_format will use ${chunk_id} instead of %{index} to avoid object conflict in v2"
|
457
|
+
end
|
458
|
+
|
459
|
+
if (@buffer_config.flush_thread_count > 1) && ['${chunk_id}', '%{uuid_flush}'].none? { |key| @s3_object_key_format.include?(key) }
|
460
|
+
log.warn "No ${chunk_id} or %{uuid_flush} in s3_object_key_format with multiple flush threads. Recommend to set ${chunk_id} or %{uuid_flush} to avoid data lost by object conflict"
|
461
|
+
end
|
462
|
+
end
|
463
|
+
|
464
|
+
def check_apikeys
|
465
|
+
@bucket.objects(prefix: @path, :max_keys => 1).first
|
466
|
+
rescue Aws::S3::Errors::NoSuchBucket
|
467
|
+
# ignore NoSuchBucket Error because ensure_bucket checks it.
|
468
|
+
rescue => e
|
469
|
+
raise "can't call S3 API. Please check your credentials or s3_region configuration. error = #{e.inspect}"
|
470
|
+
end
|
471
|
+
|
472
|
+
def setup_credentials
|
473
|
+
options = {}
|
474
|
+
credentials_options = {}
|
475
|
+
case
|
476
|
+
when @aws_key_id && @aws_sec_key
|
477
|
+
options[:access_key_id] = @aws_key_id
|
478
|
+
options[:secret_access_key] = @aws_sec_key
|
479
|
+
when @assume_role_credentials
|
480
|
+
c = @assume_role_credentials
|
481
|
+
credentials_options[:role_arn] = c.role_arn
|
482
|
+
credentials_options[:role_session_name] = c.role_session_name
|
483
|
+
credentials_options[:policy] = c.policy if c.policy
|
484
|
+
credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds
|
485
|
+
credentials_options[:external_id] = c.external_id if c.external_id
|
486
|
+
credentials_options[:sts_endpoint_url] = c.sts_endpoint_url if c.sts_endpoint_url
|
487
|
+
credentials_options[:sts_http_proxy] = c.sts_http_proxy if c.sts_http_proxy
|
488
|
+
if c.sts_http_proxy && c.sts_endpoint_url
|
489
|
+
credentials_options[:client] = Aws::STS::Client.new(http_proxy: c.sts_http_proxy, endpoint: c.sts_endpoint_url)
|
490
|
+
elsif @region && c.sts_http_proxy
|
491
|
+
credentials_options[:client] = Aws::STS::Client.new(region: @region, http_proxy: c.sts_http_proxy)
|
492
|
+
elsif @region && c.sts_endpoint_url
|
493
|
+
credentials_options[:client] = Aws::STS::Client.new(region: @region, endpoint: c.sts_endpoint_url)
|
494
|
+
elsif c.sts_http_proxy
|
495
|
+
credentials_options[:client] = Aws::STS::Client.new(http_proxy: c.sts_http_proxy)
|
496
|
+
elsif c.sts_endpoint_url
|
497
|
+
credentials_options[:client] = Aws::STS::Client.new(endpoint: c.sts_endpoint_url)
|
498
|
+
elsif c.sts_region
|
499
|
+
credentials_options[:client] = Aws::STS::Client.new(region: c.sts_region)
|
500
|
+
elsif @s3_region
|
501
|
+
credentials_options[:client] = Aws::STS::Client.new(region: @s3_region)
|
502
|
+
end
|
503
|
+
options[:credentials] = Aws::AssumeRoleCredentials.new(credentials_options)
|
504
|
+
when @web_identity_credentials
|
505
|
+
c = @web_identity_credentials
|
506
|
+
credentials_options[:role_arn] = c.role_arn
|
507
|
+
credentials_options[:role_session_name] = c.role_session_name
|
508
|
+
credentials_options[:web_identity_token_file] = c.web_identity_token_file
|
509
|
+
credentials_options[:policy] = c.policy if c.policy
|
510
|
+
credentials_options[:duration_seconds] = c.duration_seconds if c.duration_seconds
|
511
|
+
if c.sts_region
|
512
|
+
credentials_options[:client] = Aws::STS::Client.new(:region => c.sts_region)
|
513
|
+
elsif @s3_region
|
514
|
+
credentials_options[:client] = Aws::STS::Client.new(:region => @s3_region)
|
515
|
+
end
|
516
|
+
options[:credentials] = Aws::AssumeRoleWebIdentityCredentials.new(credentials_options)
|
517
|
+
when @instance_profile_credentials
|
518
|
+
c = @instance_profile_credentials
|
519
|
+
credentials_options[:retries] = c.retries if c.retries
|
520
|
+
credentials_options[:ip_address] = c.ip_address if c.ip_address
|
521
|
+
credentials_options[:port] = c.port if c.port
|
522
|
+
credentials_options[:http_open_timeout] = c.http_open_timeout if c.http_open_timeout
|
523
|
+
credentials_options[:http_read_timeout] = c.http_read_timeout if c.http_read_timeout
|
524
|
+
if ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"]
|
525
|
+
options[:credentials] = Aws::ECSCredentials.new(credentials_options)
|
526
|
+
else
|
527
|
+
options[:credentials] = Aws::InstanceProfileCredentials.new(credentials_options)
|
528
|
+
end
|
529
|
+
when @shared_credentials
|
530
|
+
c = @shared_credentials
|
531
|
+
credentials_options[:path] = c.path if c.path
|
532
|
+
credentials_options[:profile_name] = c.profile_name if c.profile_name
|
533
|
+
options[:credentials] = Aws::SharedCredentials.new(credentials_options)
|
534
|
+
when @aws_iam_retries
|
535
|
+
log.warn("'aws_iam_retries' parameter is deprecated. Use 'instance_profile_credentials' instead")
|
536
|
+
credentials_options[:retries] = @aws_iam_retries
|
537
|
+
if ENV["AWS_CONTAINER_CREDENTIALS_RELATIVE_URI"]
|
538
|
+
options[:credentials] = Aws::ECSCredentials.new(credentials_options)
|
539
|
+
else
|
540
|
+
options[:credentials] = Aws::InstanceProfileCredentials.new(credentials_options)
|
541
|
+
end
|
542
|
+
else
|
543
|
+
# Use default credentials
|
544
|
+
# See http://docs.aws.amazon.com/sdkforruby/api/Aws/S3/Client.html
|
545
|
+
end
|
546
|
+
options
|
547
|
+
end
|
548
|
+
|
549
|
+
class Compressor
|
550
|
+
include Fluent::Configurable
|
551
|
+
|
552
|
+
def initialize(opts = {})
|
553
|
+
super()
|
554
|
+
@buffer_type = opts[:buffer_type]
|
555
|
+
@log = opts[:log]
|
556
|
+
end
|
557
|
+
|
558
|
+
attr_reader :buffer_type, :log
|
559
|
+
|
560
|
+
def configure(conf)
|
561
|
+
super
|
562
|
+
end
|
563
|
+
|
564
|
+
def ext
|
565
|
+
end
|
566
|
+
|
567
|
+
def content_type
|
568
|
+
end
|
569
|
+
|
570
|
+
def compress(chunk, tmp)
|
571
|
+
end
|
572
|
+
|
573
|
+
private
|
574
|
+
|
575
|
+
def check_command(command, algo = nil)
|
576
|
+
require 'open3'
|
577
|
+
|
578
|
+
algo = command if algo.nil?
|
579
|
+
begin
|
580
|
+
Open3.capture3("#{command} -V")
|
581
|
+
rescue Errno::ENOENT
|
582
|
+
raise Fluent::ConfigError, "'#{command}' utility must be in PATH for #{algo} compression"
|
583
|
+
end
|
584
|
+
end
|
585
|
+
end
|
586
|
+
|
587
|
+
class GzipCompressor < Compressor
|
588
|
+
def ext
|
589
|
+
'gz'.freeze
|
590
|
+
end
|
591
|
+
|
592
|
+
def content_type
|
593
|
+
'application/x-gzip'.freeze
|
594
|
+
end
|
595
|
+
|
596
|
+
def compress(chunk, tmp)
|
597
|
+
w = Zlib::GzipWriter.new(tmp)
|
598
|
+
chunk.write_to(w)
|
599
|
+
w.finish
|
600
|
+
ensure
|
601
|
+
w.finish rescue nil
|
602
|
+
end
|
603
|
+
end
|
604
|
+
|
605
|
+
class TextCompressor < Compressor
|
606
|
+
def ext
|
607
|
+
'txt'.freeze
|
608
|
+
end
|
609
|
+
|
610
|
+
def content_type
|
611
|
+
'text/plain'.freeze
|
612
|
+
end
|
613
|
+
|
614
|
+
def compress(chunk, tmp)
|
615
|
+
chunk.write_to(tmp)
|
616
|
+
end
|
617
|
+
end
|
618
|
+
|
619
|
+
class JsonCompressor < TextCompressor
|
620
|
+
def ext
|
621
|
+
'json'.freeze
|
622
|
+
end
|
623
|
+
|
624
|
+
def content_type
|
625
|
+
'application/json'.freeze
|
626
|
+
end
|
627
|
+
end
|
628
|
+
|
629
|
+
COMPRESSOR_REGISTRY = Fluent::Registry.new(:s3_compressor_type, 'fluent/plugin/s3_compressor_')
|
630
|
+
{
|
631
|
+
'gzip' => GzipCompressor,
|
632
|
+
'json' => JsonCompressor,
|
633
|
+
'text' => TextCompressor
|
634
|
+
}.each { |name, compressor|
|
635
|
+
COMPRESSOR_REGISTRY.register(name, compressor)
|
636
|
+
}
|
637
|
+
|
638
|
+
def self.register_compressor(name, compressor)
|
639
|
+
COMPRESSOR_REGISTRY.register(name, compressor)
|
640
|
+
end
|
641
|
+
end
|
642
|
+
end
|