fluent-plugin-s3 1.7.0 → 1.7.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: db01fa7706627c40baf9e272644c4449c47e4adac7c67ae336099c1c65021e04
4
- data.tar.gz: d74f9c06bcce4606b62d06ba416f934824213e42b508cf49dc093392c34692fe
3
+ metadata.gz: 1450176cd690d79412b7522ce8e67163c2f3c9c1f87941e57d6b3f3410bdf330
4
+ data.tar.gz: ee66229b8cd65ef2507feb5229d54322ed2f076e2daefb3f32e3b0e2ba47c634
5
5
  SHA512:
6
- metadata.gz: 920c6ddc28b300bf9b00c2a21005d4adfeefc2e6c59ddb9afffedb2fcd45145baf70e2f7ec01997c12380d7b31def2289ece915a3099d0045b086314c914a555
7
- data.tar.gz: eb84c4fc47ad9840fb375d65f532f7f5df701ce733b8959c6e037ffd45010548555ae5695b9e0839d4af60967ec791b5cb4cd049e103653b7b441a4f2bf54209
6
+ metadata.gz: f0428c1da0ef734a65d86aec878374636cf4bfd10ed2ab06578d3d8a9cc8ee607e62b4a85c05144599dec2167995dbd836b70cb7cfb66f38fc38d5e82dc1073a
7
+ data.tar.gz: bb4d7820ff8fa7fa3e58092b69fc78bb4804d92854206fb61c4efa21b415519eff682cf64763a9ec237f30f8997ab20b0b395bb8f1f17a8c88a9c63b3381b02b
data/ChangeLog CHANGED
@@ -1,3 +1,13 @@
1
+ Release 1.7.2 - 2022/10/19
2
+
3
+ * in_s3: Add `event_bridge_mode` parameter
4
+ * out_s3: Fix `s3_object_key_format` check to allow `%{hex_random}` as well as `%{uuid_flush}` or `${chunk_id}`
5
+
6
+ Release 1.7.1 - 2022/07/15
7
+
8
+ * in_s3: Add `match_regexp` parameter to selectively download S3 files based on the object key
9
+ * out_s3: Support `ssl_ca_bundle` and `ssl_ca_directory` parameter
10
+
1
11
  Release 1.7.0 - 2022/06/14
2
12
 
3
13
  * in_s3: Allow multi workers
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.7.0
1
+ 1.7.2
data/docs/howto.md CHANGED
@@ -7,7 +7,7 @@ downstream processors to better identify the source of a given record.
7
7
 
8
8
  # IAM Policy
9
9
 
10
- The following is an example for a IAM policy needed to write to an s3 bucket (matches my-s3bucket/logs, my-s3bucket-test, etc.).
10
+ The following is an example for a IAM policy needed to write to an s3 bucket (matches my-s3bucket/logs, my-s3bucket/test, etc.).
11
11
 
12
12
  {
13
13
  "Version": "2012-10-17",
data/docs/input.md CHANGED
@@ -18,6 +18,7 @@ See also [Configuration: credentials](credentials.md) for common comprehensive p
18
18
  s3_bucket YOUR_S3_BUCKET_NAME
19
19
  s3_region ap-northeast-1
20
20
  add_object_metadata true
21
+ match_regexp production_.*
21
22
 
22
23
  <sqs>
23
24
  queue_name YOUR_SQS_QUEUE_NAME
@@ -28,6 +29,10 @@ See also [Configuration: credentials](credentials.md) for common comprehensive p
28
29
 
29
30
  Whether or not object metadata should be added to the record. Defaults to `false`. See below for details.
30
31
 
32
+ ## match_regexp
33
+
34
+ If provided, process the S3 object only if its keys matches the regular expression
35
+
31
36
  ## s3_bucket (required)
32
37
 
33
38
  S3 bucket name.
@@ -96,3 +101,7 @@ The long polling interval. Default is 20.
96
101
  ### retry_error_interval
97
102
 
98
103
  Interval to retry polling SQS if polling unsuccessful, in seconds. Default is 300.
104
+
105
+ ### event_bridge_mode
106
+ When true, Amazon S3 Event Notification should be configured using the EventBridge integration. Default is false.
107
+ See [Configure S3 event notification using EventBridge](https://docs.aws.amazon.com/AmazonS3/latest/userguide/EventBridge.html) for additional information.
data/docs/output.md CHANGED
@@ -81,6 +81,14 @@ This fixes the following error often seen in Windows:
81
81
 
82
82
  SSL_connect returned=1 errno=0 state=SSLv3 read server certificate B: certificate verify failed (Seahorse::Client::NetworkingError)
83
83
 
84
+ ## ssl_ca_bundle
85
+
86
+ Full path to the SSL certificate authority bundle file that should be used when verifying peer certificates. If you do not pass `ssl_ca_bundle` or `ssl_ca_directory` the the system default will be used if available.
87
+
88
+ ## ssl_ca_directory
89
+
90
+ Full path of the directory that contains the unbundled SSL certificate authority files for verifying peer certificates. If you do not pass `ssl_ca_bundle` or `ssl_ca_directory` the the system default will be used if available.
91
+
84
92
  ## ssl_verify_peer
85
93
 
86
94
  Verify SSL certificate of the endpoint. Default is true. Set false when you want to ignore the endpoint SSL certificate.
@@ -90,6 +90,8 @@ module Fluent::Plugin
90
90
  config_param :check_apikey_on_start, :bool, default: true
91
91
  desc "URI of proxy environment"
92
92
  config_param :proxy_uri, :string, default: nil
93
+ desc "Optional RegEx to match incoming messages"
94
+ config_param :match_regexp, :regexp, default: nil
93
95
 
94
96
  config_section :sqs, required: true, multi: false do
95
97
  desc "SQS queue name"
@@ -108,6 +110,8 @@ module Fluent::Plugin
108
110
  config_param :wait_time_seconds, :integer, default: 20
109
111
  desc "Polling error retry interval."
110
112
  config_param :retry_error_interval, :integer, default: 300
113
+ desc "Event bridge mode"
114
+ config_param :event_bridge_mode, :bool, default: false
111
115
  end
112
116
 
113
117
  desc "Tag string"
@@ -203,8 +207,12 @@ module Fluent::Plugin
203
207
  begin
204
208
  body = Yajl.load(message.body)
205
209
  log.debug(body)
206
- next unless body["Records"] # skip test queue
207
-
210
+ next unless is_valid_queue(body) # skip test queue
211
+ if @match_regexp
212
+ raw_key = get_raw_key(body)
213
+ key = CGI.unescape(raw_key)
214
+ next unless @match_regexp.match?(key)
215
+ end
208
216
  process(body)
209
217
  rescue => e
210
218
  log.warn(error: e)
@@ -219,6 +227,24 @@ module Fluent::Plugin
219
227
  end
220
228
  end
221
229
 
230
+ def is_valid_queue(body)
231
+ if @sqs.event_bridge_mode
232
+ log.debug("checking for eventbridge property")
233
+ !!body["detail"]
234
+ else
235
+ log.debug("checking for Records property")
236
+ !!body["Records"]
237
+ end
238
+ end
239
+
240
+ def get_raw_key(body)
241
+ if @sqs.event_bridge_mode
242
+ body["detail"]["object"]["key"]
243
+ else
244
+ body["Records"].first["s3"]["object"]["key"]
245
+ end
246
+ end
247
+
222
248
  def setup_credentials
223
249
  options = {}
224
250
  credentials_options = {}
@@ -311,8 +337,7 @@ module Fluent::Plugin
311
337
  end
312
338
 
313
339
  def process(body)
314
- s3 = body["Records"].first["s3"]
315
- raw_key = s3["object"]["key"]
340
+ raw_key = get_raw_key(body)
316
341
  key = CGI.unescape(raw_key)
317
342
 
318
343
  io = @bucket.object(key).get.body
@@ -97,6 +97,10 @@ module Fluent::Plugin
97
97
  config_param :enable_dual_stack, :bool, default: false
98
98
  desc "If false, the certificate of endpoint will not be verified"
99
99
  config_param :ssl_verify_peer, :bool, :default => true
100
+ desc "Full path to the SSL certificate authority bundle file that should be used when verifying peer certificates. If unspecified, defaults to the system CA if available."
101
+ config_param :ssl_ca_bundle, :string, :default => nil
102
+ desc "Full path of the directory that contains the unbundled SSL certificate authority files for verifying peer certificates. If you do not pass ssl_ca_bundle or ssl_ca_directory the the system default will be used if available."
103
+ config_param :ssl_ca_directory, :string, :default => nil
100
104
  desc "The format of S3 object keys"
101
105
  config_param :s3_object_key_format, :string, default: "%{path}%{time_slice}_%{index}.%{file_extension}"
102
106
  desc "If true, the bucket name is always left in the request URI and never moved to the host as a sub-domain"
@@ -249,6 +253,8 @@ module Fluent::Plugin
249
253
  options[:compute_checksums] = @compute_checksums unless @compute_checksums.nil?
250
254
  options[:signature_version] = @signature_version unless @signature_version.nil?
251
255
  options[:ssl_verify_peer] = @ssl_verify_peer
256
+ options[:ssl_ca_bundle] = @ssl_ca_bundle if @ssl_ca_bundle
257
+ options[:ssl_ca_directory] = @ssl_ca_directory if @ssl_ca_directory
252
258
  log.on_trace do
253
259
  options[:http_wire_trace] = true
254
260
  options[:logger] = log
@@ -465,8 +471,8 @@ module Fluent::Plugin
465
471
  end
466
472
 
467
473
  is_working_on_parallel = @buffer_config.flush_thread_count > 1 || system_config.workers > 1
468
- if is_working_on_parallel && ['${chunk_id}', '%{uuid_flush}'].none? { |key| @s3_object_key_format.include?(key) }
469
- log.warn "No ${chunk_id} or %{uuid_flush} in s3_object_key_format with multiple flush threads or multiple workers. Recommend to set ${chunk_id} or %{uuid_flush} to avoid data lost by object conflict"
474
+ if is_working_on_parallel && ['${chunk_id}', '%{uuid_flush}', '%{hex_random}'].none? { |key| @s3_object_key_format.include?(key) }
475
+ log.warn "No ${chunk_id}, %{uuid_flush} or %{hex_random} in s3_object_key_format with multiple flush threads or multiple workers. Recommend to set ${chunk_id}, %{uuid_flush} or %{hex_random} to avoid data lost by object conflict"
470
476
  end
471
477
  end
472
478
 
data/test/test_in_s3.rb CHANGED
@@ -166,9 +166,9 @@ buffer_type memory
166
166
  aws_key_id sqs_test_key_id
167
167
  </sqs>
168
168
  EOS
169
- create_driver(conf)
170
- }
171
- end
169
+ create_driver(conf)
170
+ }
171
+ end
172
172
 
173
173
  def test_sqs_with_invalid_aws_keys_missing_key_id
174
174
  assert_raise(Fluent::ConfigError, "sqs/aws_key_id or sqs/aws_sec_key is missing") {
@@ -613,4 +613,114 @@ EOS
613
613
  ]
614
614
  assert_equal(expected_records, events.map {|_tag, _time, record| record })
615
615
  end
616
+
617
+ def test_regexp_matching
618
+ setup_mocks
619
+ d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nmatch_regexp .*_key?")
620
+
621
+ s3_object = stub(Object.new)
622
+ s3_response = stub(Object.new)
623
+ s3_response.body { StringIO.new("aaa bbb ccc") }
624
+ s3_object.get { s3_response }
625
+ @s3_bucket.object(anything).at_least(1) { s3_object }
626
+
627
+ body = {
628
+ "Records" => [
629
+ {
630
+ "s3" => {
631
+ "object" => {
632
+ "key" => "test_key"
633
+ }
634
+ }
635
+ }
636
+ ]
637
+ }
638
+ message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
639
+ @sqs_poller.get_messages(anything, anything) do |config, stats|
640
+ config.before_request.call(stats) if config.before_request
641
+ stats.request_count += 1
642
+ if stats.request_count >= 1
643
+ d.instance.instance_variable_set(:@running, false)
644
+ end
645
+ [message]
646
+ end
647
+ d.run(expect_emits: 1)
648
+ events = d.events
649
+ assert_equal({ "message" => "aaa bbb ccc" }, events.first[2])
650
+ end
651
+
652
+ def test_regexp_not_matching
653
+ setup_mocks
654
+ d = create_driver(CONFIG + "\ncheck_apikey_on_start false\nstore_as text\nformat none\nmatch_regexp live?_key")
655
+
656
+ body = {
657
+ "Records" => [
658
+ {
659
+ "s3" => {
660
+ "object" => {
661
+ "key" => "test_key"
662
+ }
663
+ }
664
+ }
665
+ ]
666
+ }
667
+ message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
668
+ @sqs_poller.get_messages(anything, anything) do |config, stats|
669
+ config.before_request.call(stats) if config.before_request
670
+ stats.request_count += 1
671
+ if stats.request_count >= 1
672
+ d.instance.instance_variable_set(:@running, false)
673
+ end
674
+ [message]
675
+ end
676
+ assert_nothing_raised do
677
+ d.run {}
678
+ end
679
+ end
680
+
681
+ def test_event_bridge_mode
682
+ setup_mocks
683
+ d = create_driver("
684
+ aws_key_id test_key_id
685
+ aws_sec_key test_sec_key
686
+ s3_bucket test_bucket
687
+ buffer_type memory
688
+ check_apikey_on_start false
689
+ store_as text
690
+ format none
691
+ <sqs>
692
+ event_bridge_mode true
693
+ queue_name test_queue
694
+ queue_owner_aws_account_id 123456789123
695
+ </sqs>
696
+ ")
697
+
698
+ s3_object = stub(Object.new)
699
+ s3_response = stub(Object.new)
700
+ s3_response.body { StringIO.new("aaa") }
701
+ s3_object.get { s3_response }
702
+ @s3_bucket.object(anything).at_least(1) { s3_object }
703
+
704
+ body = {
705
+ "detail" => {
706
+ "object" => {
707
+ "key" => "test_key"
708
+ }
709
+ }
710
+ }
711
+
712
+ message = Struct::StubMessage.new(1, 1, Yajl.dump(body))
713
+ @sqs_poller.get_messages(anything, anything) do |config, stats|
714
+ config.before_request.call(stats) if config.before_request
715
+ stats.request_count += 1
716
+ if stats.request_count >= 1
717
+ d.instance.instance_variable_set(:@running, false)
718
+ end
719
+ [message]
720
+ end
721
+ d.run(expect_emits: 1)
722
+ events = d.events
723
+ assert_equal({ "message" => "aaa" }, events.first[2])
724
+ end
725
+
616
726
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: fluent-plugin-s3
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.7.0
4
+ version: 1.7.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Sadayuki Furuhashi
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2022-06-14 00:00:00.000000000 Z
12
+ date: 2022-10-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: fluentd