fluent-plugin-s3 1.7.0 → 1.7.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db01fa7706627c40baf9e272644c4449c47e4adac7c67ae336099c1c65021e04
4
- data.tar.gz: d74f9c06bcce4606b62d06ba416f934824213e42b508cf49dc093392c34692fe
3
+ metadata.gz: 1450176cd690d79412b7522ce8e67163c2f3c9c1f87941e57d6b3f3410bdf330
4
+ data.tar.gz: ee66229b8cd65ef2507feb5229d54322ed2f076e2daefb3f32e3b0e2ba47c634
5
5
  SHA512:
6
- metadata.gz: 920c6ddc28b300bf9b00c2a21005d4adfeefc2e6c59ddb9afffedb2fcd45145baf70e2f7ec01997c12380d7b31def2289ece915a3099d0045b086314c914a555
7
- data.tar.gz: eb84c4fc47ad9840fb375d65f532f7f5df701ce733b8959c6e037ffd45010548555ae5695b9e0839d4af60967ec791b5cb4cd049e103653b7b441a4f2bf54209
6
+ metadata.gz: f0428c1da0ef734a65d86aec878374636cf4bfd10ed2ab06578d3d8a9cc8ee607e62b4a85c05144599dec2167995dbd836b70cb7cfb66f38fc38d5e82dc1073a
7
+ data.tar.gz: bb4d7820ff8fa7fa3e58092b69fc78bb4804d92854206fb61c4efa21b415519eff682cf64763a9ec237f30f8997ab20b0b395bb8f1f17a8c88a9c63b3381b02b
data/ChangeLog CHANGED
@@ -1,3 +1,13 @@
1
+ Release 1.7.2 - 2022/10/19
2
+
3
+ * in_s3: Add `event_bridge_mode` parameter
4
+ * out_s3: Fix `s3_object_key_format` check to allow `%{hex_random}` as well as `%{uuid_flush}` or `${chunk_id}`
5
+
6
+ Release 1.7.1 - 2022/07/15
7
+
8
+ * in_s3: Add `match_regexp` parameter to selectively download S3 files based on the object key
9
+ * out_s3: Support `ssl_ca_bundle` and `ssl_ca_directory` parameter
10
+
1
11
  Release 1.7.0 - 2022/06/14
2
12
 
3
13
  * in_s3: Allow multi workers
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.7.0
1
+ 1.7.2
data/docs/howto.md CHANGED
@@ -7,7 +7,7 @@ downstream processors to better identify the source of a given record.
7
7
 
8
8
  # IAM Policy
9
9
 
10
- The following is an example for a IAM policy needed to write to an s3 bucket (matches my-s3bucket/logs, my-s3bucket-test, etc.).
10
+ The following is an example for a IAM policy needed to write to an s3 bucket (matches my-s3bucket/logs, my-s3bucket/test, etc.).
11
11
 
12
12
  {
13
13
  "Version": "2012-10-17",
data/docs/input.md CHANGED
@@ -18,6 +18,7 @@ See also [Configuration: credentials](credentials.md) for common comprehensive p
18
18
  s3_bucket YOUR_S3_BUCKET_NAME
19
19
  s3_region ap-northeast-1
20
20
  add_object_metadata true
21
+ match_regexp production_.*
21
22
 
22
23
  <sqs>
23
24
  queue_name YOUR_SQS_QUEUE_NAME
@@ -28,6 +29,10 @@ See also [Configuration: credentials](credentials.md) for common comprehensive p
28
29
 
29
30
  Whether or not object metadata should be added to the record. Defaults to `false`. See below for details.
30
31
 
32
+ ## match_regexp
33
+
34
+ If provided, process the S3 object only if its keys matches the regular expression
35
+
31
36
  ## s3_bucket (required)
32
37
 
33
38
  S3 bucket name.
@@ -96,3 +101,7 @@ The long polling interval. Default is 20.
96
101
  ### retry_error_interval
97
102
 
98
103
  Interval to retry polling SQS if polling unsuccessful, in seconds. Default is 300.
104
+
105
+ ### event_bridge_mode
106
+ When true, Amazon S3 Event Notification should be configured using the EventBridge integration. Default is false.
107
+ See [Configure S3 event notification using EventBridge](https://docs.aws.amazon.com/AmazonS3/latest/userguide/EventBridge.html) for additional information.
data/docs/output.md CHANGED
@@ -81,6 +81,14 @@ This fixes the following error often seen in Windows:
81
81
 
82
82
  SSL_connect returned=1 errno=0 state=SSLv3 read server certificate B: certificate verify failed (Seahorse::Client::NetworkingError)
83
83
 
84
+ ## ssl_ca_bundle
85
+
86
+ Full path to the SSL certificate authority bundle file that should be used when verifying peer certificates. If you do not pass `ssl_ca_bundle` or `ssl_ca_directory` the the system default will be used if available.
87
+
88
+ ## ssl_ca_directory
89
+
90
+ Full path of the directory that contains the unbundled SSL certificate authority files for verifying peer certificates. If you do not pass `ssl_ca_bundle` or `ssl_ca_directory` the the system default will be used if available.
91
+
84
92
  ## ssl_verify_peer
85
93
 
86
94
  Verify SSL certificate of the endpoint. Default is true. Set false when you want to ignore the endpoint SSL certificate.
@@ -90,6 +90,8 @@ module Fluent::Plugin
90
90
  config_param :check_apikey_on_start, :bool, default: true
91
91
  desc "URI of proxy environment"
92
92
  config_param :proxy_uri, :string, default: nil
93
+ desc "Optional RegEx to match incoming messages"
94
+ config_param :match_regexp, :regexp, default: nil
93
95
 
94
96
  config_section :sqs, required: true, multi: false do
95
97
  desc "SQS queue name"
@@ -108,6 +110,8 @@ module Fluent::Plugin
108
110
  config_param :wait_time_seconds, :integer, default: 20
109
111
  desc "Polling error retry interval."
110
112
  config_param :retry_error_interval, :integer, default: 300
113
+ desc "Event bridge mode"
114
+ config_param :event_bridge_mode, :bool, default: false
111
115
  end
112
116
 
113
117
  desc "Tag string"
@@ -203,8 +207,12 @@ module Fluent::Plugin
203
207
  begin
204
208
  body = Yajl.load(message.body)
205
209
  log.debug(body)
206
- next unless body["Records"] # skip test queue
207
-
210
+ next unless is_valid_queue(body) # skip test queue
211
+ if @match_regexp
212
+ raw_key = get_raw_key(body)
213
+ key = CGI.unescape(raw_key)
214
+ next unless @match_regexp.match?(key)
215
+ end
208
216
  process(body)
209
217
  rescue => e
210
218
  log.warn(error: e)
@@ -219,6 +227,24 @@ module Fluent::Plugin
219
227
  end
220
228
  end
221
229
 
230
+ def is_valid_queue(body)
231
+ if @sqs.event_bridge_mode
232
+ log.debug("checking for eventbridge property")
233
+ !!body["detail"]
234
+ else
235
+ log.debug("checking for Records property")
236
+ !!body["Records"]
237
+ end
238
+ end
239
+
240
+ def get_raw_key(body)
241
+ if @sqs.event_bridge_mode
242
+ body["detail"]["object"]["key"]
243
+ else
244
+ body["Records"].first["s3"]["object"]["key"]
245
+ end
246
+ end
247
+
222
248
  def setup_credentials
223
249
  options = {}
224
250
  credentials_options = {}
@@ -311,8 +337,7 @@ module Fluent::Plugin
311
337
  end
312
338
 
313
339
  def process(body)
314
- s3 = body["Records"].first["s3"]
315
- raw_key = s3["object"]["key"]
340
+ raw_key = get_raw_key(body)
316
341
  key = CGI.unescape(raw_key)
317
342
 
318
343
  io = @bucket.object(key).get.body
@@ -97,6 +97,10 @@ module Fluent::Plugin
97
97
  config_param :enable_dual_stack, :bool, default: false
98
98
  desc "If false, the certificate of endpoint will not be verified"
99
99
  config_param :ssl_verify_peer, :bool, :default => true
100
+ desc "Full path to the SSL certificate authority bundle file that should be used when verifying peer certificates. If unspecified, defaults to the system CA if available."
101
+ config_param :ssl_ca_bundle, :string, :default => nil
102
+ desc "Full path of the directory that contains the unbundled SSL certificate authority files for verifying peer certificates. If you do not pass ssl_ca_bundle or ssl_ca_directory the the system default will be used if available."
103
+ config_param :ssl_ca_directory, :string, :default => nil
100
104
  desc "The format of S3 object keys"
101
105
  config_param :s3_object_key_format, :string, default: "%{path}%{time_slice}_%{index}.%{file_extension}"
102
106
  desc "If true, the bucket name is always left in the request URI and never moved to the host as a sub-domain"
@@ -249,6 +253,8 @@ module Fluent::Plugin
249
253
  options[:compute_checksums] = @compute_checksums unless @compute_checksums.nil?
250
254
  options[:signature_version] = @signature_version unless @signature_version.nil?
251
255
  options[:ssl_verify_peer] = @ssl_verify_peer
256
+ options[:ssl_ca_bundle] = @ssl_ca_bundle if @ssl_ca_bundle
257
+ options[:ssl_ca_directory] = @ssl_ca_directory if @ssl_ca_directory
252
258
  log.on_trace do
253
259
  options[:http_wire_trace] = true
254
260
  options[:logger] = log
@@ -465,8 +471,8 @@ module Fluent::Plugin
465
471
  end
466
472
 
467
473
  is_working_on_parallel = @buffer_config.flush_thread_count > 1 || system_config.workers > 1
468
- if is_working_on_parallel && ['${chunk_id}', '%{uuid_flush}'].none? { |key| @s3_object_key_format.include?(key) }
469
- log.warn "No ${chunk_id} or %{uuid_flush} in s3_object_key_format with multiple flush threads or multiple workers. Recommend to set ${chunk_id} or %{uuid_flush} to avoid data lost by object conflict"
474
+ if is_working_on_parallel && ['${chunk_id}', '%{uuid_flush}', '%{hex_random}'].none? { |key| @s3_object_key_format.include?(key) }
475
+ log.warn "No ${chunk_id}, %{uuid_flush} or %{hex_random} in s3_object_key_format with multiple flush threads or multiple workers. Recommend to set ${chunk_id}, %{uuid_flush} or %{hex_random} to avoid data lost by object conflict"
470
476
  end
471
477
  end
472
478
 
data/test/test_in_s3.rb CHANGED
@@ -166,9 +166,9 @@ buffer_type memory
166
166
  aws_key_id sqs_test_key_id
167
167
  </sqs>
168
168
  EOS
169
- create_driver(conf)
170
- }
171
- end
169
+ create_driver(conf)
170
+ }
171
+ end
172
172
 
173
173
  def test_sqs_with_invalid_aws_keys_missing_key_id
174
174
  assert_raise(Fluent::ConfigError, "sqs/aws_key_id or sqs/aws_sec_key is missing") {
@@ -613,4 +613,114 @@ EOS
613
613
  ]
614
614
  assert_equal(expected_records, events.map {|_tag, _time, record| record })
615
615
  end
616
+
617
+ def test_regexp_matching
618
+ setup_mocks
619
+ d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nmatch_regexp .*_key?")
620
+
621
+ s3_object = stub(Object.new)
622
+ s3_response = stub(Object.new)
623
+ s3_response.body { StringIO.new("aaa bbb ccc") }
624
+ s3_object.get { s3_response }
625
+ @s3_bucket.object(anything).at_least(1) { s3_object }
626
+
627
+ body = {
628
+ "Records" => [
629
+ {
630
+ "s3" => {
631
+ "object" => {
632
+ "key" => "test_key"
633
+ }
634
+ }
635
+ }
636
+ ]
637
+ }
638
+ message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
639
+ @sqs_poller.get_messages(anything, anything) do |config, stats|
640
+ config.before_request.call(stats) if config.before_request
641
+ stats.request_count += 1
642
+ if stats.request_count >= 1
643
+ d.instance.instance_variable_set(:@running, false)
644
+ end
645
+ [message]
646
+ end
647
+ d.run(expect_emits: 1)
648
+ events = d.events
649
+ assert_equal({ "message" => "aaa bbb ccc" }, events.first[2])
650
+ end
651
+
652
+ def test_regexp_not_matching
653
+ setup_mocks
654
+ d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nmatch_regexp live?_key")
655
+
656
+ body = {
657
+ "Records" => [
658
+ {
659
+ "s3" => {
660
+ "object" => {
661
+ "key" => "test_key"
662
+ }
663
+ }
664
+ }
665
+ ]
666
+ }
667
+ message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
668
+ @sqs_poller.get_messages(anything, anything) do |config, stats|
669
+ config.before_request.call(stats) if config.before_request
670
+ stats.request_count += 1
671
+ if stats.request_count >= 1
672
+ d.instance.instance_variable_set(:@running, false)
673
+ end
674
+ [message]
675
+ end
676
+ assert_nothing_raised do
677
+ d.run {}
678
+ end
679
+ end
680
+
681
+ def test_event_bridge_mode
682
+ setup_mocks
683
+ d = create_driver("
684
+ aws_key_id test_key_id
685
+ aws_sec_key test_sec_key
686
+ s3_bucket test_bucket
687
+ buffer_type memory
688
+ check_apikey_on_start false
689
+ store_as text
690
+ format none
691
+ <sqs>
692
+ event_bridge_mode true
693
+ queue_name test_queue
694
+ queue_owner_aws_account_id 123456789123
695
+ </sqs>
696
+ ")
697
+
698
+ s3_object = stub(Object.new)
699
+ s3_response = stub(Object.new)
700
+ s3_response.body { StringIO.new("aaa") }
701
+ s3_object.get { s3_response }
702
+ @s3_bucket.object(anything).at_least(1) { s3_object }
703
+
704
+ body = {
705
+ "detail" => {
706
+ "object" => {
707
+ "key" => "test_key"
708
+ }
709
+ }
710
+ }
711
+
712
+ message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
713
+ @sqs_poller.get_messages(anything, anything) do |config, stats|
714
+ config.before_request.call(stats) if config.before_request
715
+ stats.request_count += 1
716
+ if stats.request_count >= 1
717
+ d.instance.instance_variable_set(:@running, false)
718
+ end
719
+ [message]
720
+ end
721
+ d.run(expect_emits: 1)
722
+ events = d.events
723
+ assert_equal({ "message" => "aaa" }, events.first[2])
724
+ end
725
+
616
726
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-06-14 00:00:00.000000000 Z
12
+ date: 2022-10-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd