fluent-plugin-gcloud-pubsub-custom-compress-batches 1.3.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fluent/plugin/output"
4
+ require "fluent/plugin/gcloud_pubsub/client"
5
+ require "fluent/plugin/gcloud_pubsub/metrics"
6
+ require "fluent/plugin_helper/inject"
7
+ require "prometheus/client"
8
+
9
+ module Fluent::Plugin
10
+ class GcloudPubSubOutput < Output
11
+ include Fluent::PluginHelper::Inject
12
+
13
+ Fluent::Plugin.register_output("gcloud_pubsub", self)
14
+
15
+ helpers :compat_parameters, :formatter
16
+
17
+ DEFAULT_BUFFER_TYPE = "memory"
18
+ DEFAULT_FORMATTER_TYPE = "json"
19
+
20
+ desc "Set your GCP project."
21
+ config_param :project, :string, default: nil
22
+ desc "Set your credential file path."
23
+ config_param :key, :string, default: nil
24
+ desc "Set topic name to publish."
25
+ config_param :topic, :string
26
+ desc "If set to `true`, specified topic will be created when it doesn't exist."
27
+ config_param :autocreate_topic, :bool, default: false
28
+ desc "Publishing messages count per request to Cloud Pub/Sub."
29
+ config_param :max_messages, :integer, default: 1000
30
+ desc "Publishing messages bytesize per request to Cloud Pub/Sub."
31
+ config_param :max_total_size, :integer, default: 9_800_000 # 9.8MB
32
+ desc "Limit bytesize per message."
33
+ config_param :max_message_size, :integer, default: 4_000_000 # 4MB
34
+ desc "Extract these fields from the record and send them as attributes on the Pub/Sub message. " \
35
+ "Cannot be set if compress_batches is enabled."
36
+ config_param :attribute_keys, :array, default: []
37
+ desc "The prefix for Prometheus metric names"
38
+ config_param :metric_prefix, :string, default: "fluentd_output_gcloud_pubsub"
39
+ desc "If set to `true`, messages will be batched and compressed before publication"
40
+ config_param :compress_batches, :bool, default: false
41
+
42
+ config_section :buffer do
43
+ config_set_default :@type, DEFAULT_BUFFER_TYPE
44
+ end
45
+
46
+ config_section :format do
47
+ config_set_default :@type, DEFAULT_FORMATTER_TYPE
48
+ end
49
+
50
+ # rubocop:disable Metrics/MethodLength
51
+ def configure(conf)
52
+ compat_parameters_convert(conf, :buffer, :formatter)
53
+ super
54
+ placeholder_validate!(:topic, @topic)
55
+ @formatter = formatter_create
56
+
57
+ if @compress_batches && !@attribute_keys.empty?
58
+ # The attribute_keys option is implemented by extracting keys from the
59
+ # record and setting them on the Pub/Sub message.
60
+ # This is not possible in compressed mode, because we're sending just a
61
+ # single Pub/Sub message that comprises many records, therefore the
62
+ # attribute keys would clash.
63
+ raise Fluent::ConfigError, ":attribute_keys cannot be used when compression is enabled"
64
+ end
65
+
66
+ @messages_published =
67
+ Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_messages_published_per_batch") do
68
+ ::Prometheus::Client.registry.histogram(
69
+ :"#{@metric_prefix}_messages_published_per_batch",
70
+ "Number of records published to Pub/Sub per buffer flush",
71
+ {},
72
+ [1, 10, 50, 100, 250, 500, 1000],
73
+ )
74
+ end
75
+
76
+ @bytes_published =
77
+ Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_messages_published_bytes") do
78
+ ::Prometheus::Client.registry.histogram(
79
+ :"#{@metric_prefix}_messages_published_bytes",
80
+ "Total size in bytes of the records published to Pub/Sub",
81
+ {},
82
+ [100, 1000, 10_000, 100_000, 1_000_000, 5_000_000, 10_000_000],
83
+ )
84
+ end
85
+
86
+ @compression_enabled =
87
+ Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_compression_enabled") do
88
+ ::Prometheus::Client.registry.gauge(
89
+ :"#{@metric_prefix}_compression_enabled",
90
+ "Whether compression/batching is enabled",
91
+ {},
92
+ )
93
+ end
94
+ @compression_enabled.set(common_labels, @compress_batches ? 1 : 0)
95
+ end
96
+ # rubocop:enable Metrics/MethodLength
97
+
98
+ def start
99
+ super
100
+ @publisher = Fluent::GcloudPubSub::Publisher.new @project, @key, @autocreate_topic, @metric_prefix
101
+ end
102
+
103
+ def format(tag, time, record)
104
+ record = inject_values_to_record(tag, time, record)
105
+ attributes = {}
106
+ @attribute_keys.each do |key|
107
+ attributes[key] = record.delete(key)
108
+ end
109
+ [@formatter.format(tag, time, record), attributes].to_msgpack
110
+ end
111
+
112
+ def formatted_to_msgpack_binary?
113
+ true
114
+ end
115
+
116
+ def multi_workers_ready?
117
+ true
118
+ end
119
+
120
+ def write(chunk)
121
+ topic = extract_placeholders(@topic, chunk.metadata)
122
+
123
+ messages = []
124
+ size = 0
125
+
126
+ chunk.msgpack_each do |msg, attr|
127
+ msg = Fluent::GcloudPubSub::Message.new(msg, attr)
128
+ if msg.bytesize > @max_message_size
129
+ log.warn "Drop a message because its size exceeds `max_message_size`", size: msg.bytesize
130
+ next
131
+ end
132
+ if messages.length + 1 > @max_messages || size + msg.bytesize > @max_total_size
133
+ publish(topic, messages)
134
+ messages = []
135
+ size = 0
136
+ end
137
+ messages << msg
138
+ size += msg.bytesize
139
+ end
140
+
141
+ publish(topic, messages) unless messages.empty?
142
+ rescue Fluent::GcloudPubSub::RetryableError => e
143
+ log.warn "Retryable error occurs. Fluentd will retry.", error_message: e.to_s, error_class: e.class.to_s
144
+ raise e
145
+ rescue StandardError => e
146
+ log.error "unexpected error", error_message: e.to_s, error_class: e.class.to_s
147
+ log.error_backtrace
148
+ raise e
149
+ end
150
+
151
+ private
152
+
153
+ def publish(topic, messages)
154
+ size = messages.map(&:bytesize).inject(:+)
155
+ log.debug "send message topic:#{topic} length:#{messages.length} size:#{size}"
156
+
157
+ @messages_published.observe(common_labels, messages.length)
158
+ @bytes_published.observe(common_labels, size)
159
+
160
+ @publisher.publish(topic, messages, @compress_batches)
161
+ end
162
+
163
+ def common_labels
164
+ { topic: @topic }
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,455 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+
6
+ require_relative "../test_helper"
7
+ require "fluent/test/driver/input"
8
+
9
+ class GcloudPubSubInputTest < Test::Unit::TestCase
10
+ CONFIG = %(
11
+ tag test
12
+ project project-test
13
+ topic topic-test
14
+ subscription subscription-test
15
+ key key-test
16
+ )
17
+
18
+ DEFAULT_HOST = "127.0.0.1"
19
+ DEFAULT_PORT = 24_680
20
+
21
+ class DummyInvalidMsgData
22
+ def data
23
+ "foo:bar"
24
+ end
25
+ end
26
+ class DummyInvalidMessage
27
+ def message
28
+ DummyInvalidMsgData.new
29
+ end
30
+
31
+ def data
32
+ message.data
33
+ end
34
+
35
+ def attributes
36
+ { "attr_1" => "a", "attr_2" => "b" }
37
+ end
38
+ end
39
+
40
+ def create_driver(conf = CONFIG)
41
+ Fluent::Test::Driver::Input.new(Fluent::Plugin::GcloudPubSubInput).configure(conf)
42
+ end
43
+
44
+ def http_get(path)
45
+ http = Net::HTTP.new(DEFAULT_HOST, DEFAULT_PORT)
46
+ req = Net::HTTP::Get.new(path, { "Content-Type" => "application/x-www-form-urlencoded" })
47
+ http.request(req)
48
+ end
49
+
50
+ setup do
51
+ Fluent::Test.setup
52
+ end
53
+
54
+ sub_test_case "configure" do
55
+ test "all params are configured" do
56
+ d = create_driver(%(
57
+ tag test
58
+ project project-test
59
+ topic topic-test
60
+ subscription subscription-test
61
+ key key-test
62
+ max_messages 1000
63
+ return_immediately true
64
+ pull_interval 2
65
+ pull_threads 3
66
+ attribute_keys attr-test
67
+ enable_rpc true
68
+ rpc_bind 127.0.0.1
69
+ rpc_port 24681
70
+ ))
71
+
72
+ assert_equal("test", d.instance.tag)
73
+ assert_equal("project-test", d.instance.project)
74
+ assert_equal("topic-test", d.instance.topic)
75
+ assert_equal("subscription-test", d.instance.subscription)
76
+ assert_equal("key-test", d.instance.key)
77
+ assert_equal(2.0, d.instance.pull_interval)
78
+ assert_equal(1000, d.instance.max_messages)
79
+ assert_equal(true, d.instance.return_immediately)
80
+ assert_equal(3, d.instance.pull_threads)
81
+ assert_equal(["attr-test"], d.instance.attribute_keys)
82
+ assert_equal(true, d.instance.enable_rpc)
83
+ assert_equal("127.0.0.1", d.instance.rpc_bind)
84
+ assert_equal(24_681, d.instance.rpc_port)
85
+ end
86
+
87
+ test "default values are configured" do
88
+ d = create_driver
89
+ assert_equal(5.0, d.instance.pull_interval)
90
+ assert_equal(100, d.instance.max_messages)
91
+ assert_equal(true, d.instance.return_immediately)
92
+ assert_equal(1, d.instance.pull_threads)
93
+ assert_equal([], d.instance.attribute_keys)
94
+ assert_equal(false, d.instance.enable_rpc)
95
+ assert_equal("0.0.0.0", d.instance.rpc_bind)
96
+ assert_equal(24_680, d.instance.rpc_port)
97
+ end
98
+ end
99
+
100
+ sub_test_case "start" do
101
+ setup do
102
+ @topic_mock = mock!
103
+ @pubsub_mock = mock!.topic("topic-test").at_least(1) { @topic_mock }
104
+ stub(Google::Cloud::Pubsub).new { @pubsub_mock }
105
+ end
106
+
107
+ test "40x error occurred on connecting to Pub/Sub" do
108
+ @topic_mock.subscription("subscription-test").once do
109
+ raise Google::Cloud::NotFoundError, "TEST"
110
+ end
111
+
112
+ d = create_driver
113
+ assert_raise Google::Cloud::NotFoundError do
114
+ d.run {}
115
+ end
116
+ end
117
+
118
+ test "50x error occurred on connecting to Pub/Sub" do
119
+ @topic_mock.subscription("subscription-test").once do
120
+ raise Google::Cloud::UnavailableError, "TEST"
121
+ end
122
+
123
+ d = create_driver
124
+ assert_raise Google::Cloud::UnavailableError do
125
+ d.run {}
126
+ end
127
+ end
128
+
129
+ test "subscription is nil" do
130
+ @topic_mock.subscription("subscription-test").once { nil }
131
+
132
+ d = create_driver
133
+ assert_raise Fluent::GcloudPubSub::Error do
134
+ d.run {}
135
+ end
136
+ end
137
+ end
138
+
139
+ sub_test_case "emit" do
140
+ class DummyMsgData
141
+ def data
142
+ '{"foo": "bar"}'
143
+ end
144
+ end
145
+
146
+ class DummyMessage
147
+ def message
148
+ DummyMsgData.new
149
+ end
150
+
151
+ def data
152
+ message.data
153
+ end
154
+
155
+ def attributes
156
+ { "attr_1" => "a", "attr_2" => "b" }
157
+ end
158
+ end
159
+
160
+ class DummyCompressedMessageData
161
+ attr_reader :data
162
+
163
+ def initialize(messages)
164
+ @data = Zlib::Deflate.deflate(messages.join(30.chr))
165
+ end
166
+ end
167
+
168
+ class DummyCompressedMessage
169
+ attr_reader :message
170
+
171
+ def initialize(messages)
172
+ @message = DummyCompressedMessageData.new(messages)
173
+ end
174
+
175
+ def data
176
+ message.data
177
+ end
178
+
179
+ def attributes
180
+ { "compression_algorithm" => "zlib" }
181
+ end
182
+ end
183
+
184
+ class DummyMsgDataWithTagKey
185
+ def initialize(tag)
186
+ @tag = tag
187
+ end
188
+
189
+ def data
190
+ '{"foo": "bar", "test_tag_key": "' + @tag + '"}'
191
+ end
192
+ end
193
+ class DummyMessageWithTagKey
194
+ def initialize(tag)
195
+ @tag = tag
196
+ end
197
+
198
+ def message
199
+ DummyMsgDataWithTagKey.new @tag
200
+ end
201
+
202
+ def data
203
+ message.data
204
+ end
205
+
206
+ def attributes
207
+ { "attr_1" => "a", "attr_2" => "b" }
208
+ end
209
+ end
210
+
211
+ setup do
212
+ @subscriber = mock!
213
+ @topic_mock = mock!.subscription("subscription-test") { @subscriber }
214
+ @pubsub_mock = mock!.topic("topic-test") { @topic_mock }
215
+ stub(Google::Cloud::Pubsub).new { @pubsub_mock }
216
+ end
217
+
218
+ test "empty" do
219
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { [] }
220
+ @subscriber.acknowledge.times(0)
221
+
222
+ d = create_driver
223
+ d.run(expect_emits: 1, timeout: 3)
224
+
225
+ assert_true d.events.empty?
226
+ end
227
+
228
+ test "simple" do
229
+ messages = Array.new(1, DummyMessage.new)
230
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
231
+ @subscriber.acknowledge(messages).at_least(1)
232
+
233
+ d = create_driver
234
+ d.run(expect_emits: 1, timeout: 3)
235
+ emits = d.events
236
+
237
+ assert(emits.length >= 1)
238
+ emits.each do |tag, _time, record|
239
+ assert_equal("test", tag)
240
+ assert_equal({ "foo" => "bar" }, record)
241
+ end
242
+ end
243
+
244
+ test "multithread" do
245
+ messages = Array.new(1, DummyMessage.new)
246
+ @subscriber.pull(immediate: true, max: 100).at_least(2) { messages }
247
+ @subscriber.acknowledge(messages).at_least(2)
248
+
249
+ d = create_driver("#{CONFIG}\npull_threads 2")
250
+ d.run(expect_emits: 2, timeout: 1)
251
+ emits = d.events
252
+
253
+ assert(emits.length >= 2)
254
+ emits.each do |tag, _time, record|
255
+ assert_equal("test", tag)
256
+ assert_equal({ "foo" => "bar" }, record)
257
+ end
258
+ end
259
+
260
+ test "with tag_key" do
261
+ messages = [
262
+ DummyMessageWithTagKey.new("tag1"),
263
+ DummyMessageWithTagKey.new("tag2"),
264
+ DummyMessage.new,
265
+ ]
266
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
267
+ @subscriber.acknowledge(messages).at_least(1)
268
+
269
+ d = create_driver("#{CONFIG}\ntag_key test_tag_key")
270
+ d.run(expect_emits: 1, timeout: 3)
271
+ emits = d.events
272
+
273
+ assert(emits.length >= 3)
274
+ # test tag
275
+ assert_equal("tag1", emits[0][0])
276
+ assert_equal("tag2", emits[1][0])
277
+ assert_equal("test", emits[2][0])
278
+ # test record
279
+ emits.each do |_tag, _time, record|
280
+ assert_equal({ "foo" => "bar" }, record)
281
+ end
282
+ end
283
+
284
+ test "invalid messages with parse_error_action exception " do
285
+ messages = Array.new(1, DummyInvalidMessage.new)
286
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
287
+ @subscriber.acknowledge.times(0)
288
+
289
+ d = create_driver
290
+ d.run(expect_emits: 1, timeout: 3)
291
+ assert_true d.events.empty?
292
+ end
293
+
294
+ test "with attributes" do
295
+ messages = Array.new(1, DummyMessage.new)
296
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
297
+ @subscriber.acknowledge(messages).at_least(1)
298
+
299
+ d = create_driver("#{CONFIG}\nattribute_keys attr_1")
300
+ d.run(expect_emits: 1, timeout: 3)
301
+ emits = d.events
302
+
303
+ assert(emits.length >= 1)
304
+ emits.each do |tag, _time, record|
305
+ assert_equal("test", tag)
306
+ assert_equal({ "foo" => "bar", "attr_1" => "a" }, record)
307
+ end
308
+ end
309
+
310
+ test "compressed batch of messages" do
311
+ original_messages = [
312
+ { foo: "bar" },
313
+ { baz: "qux" },
314
+ ]
315
+ messages = Array.new(1, DummyCompressedMessage.new(original_messages.map(&:to_json)))
316
+
317
+ @subscriber.pull(immediate: true, max: 100).once { messages }
318
+ @subscriber.acknowledge(messages).at_least(1)
319
+
320
+ d = create_driver
321
+ d.run(expect_emits: 1, timeout: 3)
322
+ emits = d.events
323
+
324
+ output_records = emits.map do |e|
325
+ # Pick out only the record element, i.e. ignore the time and tag
326
+ record = e[2]
327
+ # Convert the keys from strings to symbols, to allow for strict comparison
328
+ record.map { |k, v| [k.to_sym, v] }.to_h
329
+ end
330
+
331
+ assert_equal(original_messages, output_records)
332
+ end
333
+
334
+ test "invalid messages with parse_error_action warning" do
335
+ messages = Array.new(1, DummyInvalidMessage.new)
336
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
337
+ @subscriber.acknowledge(messages).at_least(1)
338
+
339
+ d = create_driver("#{CONFIG}\nparse_error_action warning")
340
+ d.run(expect_emits: 1, timeout: 3)
341
+ assert_true d.events.empty?
342
+ end
343
+
344
+ test "retry if raised error" do
345
+ class UnknownError < StandardError
346
+ end
347
+ @subscriber.pull(immediate: true, max: 100).at_least(2) { raise UnknownError, "test" }
348
+ @subscriber.acknowledge.times(0)
349
+
350
+ d = create_driver(CONFIG + "pull_interval 0.5")
351
+ d.run(expect_emits: 1, timeout: 0.8)
352
+
353
+ assert_equal(0.5, d.instance.pull_interval)
354
+ assert_true d.events.empty?
355
+ end
356
+
357
+ test "retry if raised RetryableError on pull" do
358
+ @subscriber.pull(immediate: true, max: 100).at_least(2) { raise Google::Cloud::UnavailableError, "TEST" }
359
+ @subscriber.acknowledge.times(0)
360
+
361
+ d = create_driver("#{CONFIG}\npull_interval 0.5")
362
+ d.run(expect_emits: 1, timeout: 0.8)
363
+
364
+ assert_equal(0.5, d.instance.pull_interval)
365
+ assert_true d.events.empty?
366
+ end
367
+
368
+ test "retry if raised RetryableError on acknowledge" do
369
+ messages = Array.new(1, DummyMessage.new)
370
+ @subscriber.pull(immediate: true, max: 100).at_least(2) { messages }
371
+ @subscriber.acknowledge(messages).at_least(2) { raise Google::Cloud::UnavailableError, "TEST" }
372
+
373
+ d = create_driver("#{CONFIG}\npull_interval 0.5")
374
+ d.run(expect_emits: 2, timeout: 3)
375
+ emits = d.events
376
+
377
+ # not acknowledged, but already emitted to engine.
378
+ assert(emits.length >= 2)
379
+ emits.each do |tag, _time, record|
380
+ assert_equal("test", tag)
381
+ assert_equal({ "foo" => "bar" }, record)
382
+ end
383
+ end
384
+
385
+ test "stop by http rpc" do
386
+ messages = Array.new(1, DummyMessage.new)
387
+ @subscriber.pull(immediate: true, max: 100).once { messages }
388
+ @subscriber.acknowledge(messages).once
389
+
390
+ d = create_driver("#{CONFIG}\npull_interval 1.0\nenable_rpc true")
391
+ assert_equal(false, d.instance.instance_variable_get(:@stop_pull))
392
+
393
+ d.run do
394
+ http_get("/api/in_gcloud_pubsub/pull/stop")
395
+ sleep 0.75
396
+ # d.run sleeps 0.5 sec
397
+ end
398
+ emits = d.events
399
+
400
+ assert_equal(1, emits.length)
401
+ assert_true d.instance.instance_variable_get(:@stop_pull)
402
+
403
+ emits.each do |tag, _time, record|
404
+ assert_equal("test", tag)
405
+ assert_equal({ "foo" => "bar" }, record)
406
+ end
407
+ end
408
+
409
+ test "start by http rpc" do
410
+ messages = Array.new(1, DummyMessage.new)
411
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
412
+ @subscriber.acknowledge(messages).at_least(1)
413
+
414
+ d = create_driver("#{CONFIG}\npull_interval 1.0\nenable_rpc true")
415
+ d.instance.stop_pull
416
+ assert_equal(true, d.instance.instance_variable_get(:@stop_pull))
417
+
418
+ d.run(expect_emits: 1, timeout: 3) do
419
+ http_get("/api/in_gcloud_pubsub/pull/start")
420
+ sleep 0.75
421
+ # d.run sleeps 0.5 sec
422
+ end
423
+ emits = d.events
424
+
425
+ assert_equal(true, !emits.empty?)
426
+ assert_false d.instance.instance_variable_get(:@stop_pull)
427
+
428
+ emits.each do |tag, _time, record|
429
+ assert_equal("test", tag)
430
+ assert_equal({ "foo" => "bar" }, record)
431
+ end
432
+ end
433
+
434
+ test "get status by http rpc when started" do
435
+ d = create_driver("#{CONFIG}\npull_interval 1.0\nenable_rpc true")
436
+ assert_false d.instance.instance_variable_get(:@stop_pull)
437
+
438
+ d.run do
439
+ res = http_get("/api/in_gcloud_pubsub/pull/status")
440
+ assert_equal({ "ok" => true, "status" => "started" }, JSON.parse(res.body))
441
+ end
442
+ end
443
+
444
+ test "get status by http rpc when stopped" do
445
+ d = create_driver("#{CONFIG}\npull_interval 1.0\nenable_rpc true")
446
+ d.instance.stop_pull
447
+ assert_true d.instance.instance_variable_get(:@stop_pull)
448
+
449
+ d.run do
450
+ res = http_get("/api/in_gcloud_pubsub/pull/status")
451
+ assert_equal({ "ok" => true, "status" => "stopped" }, JSON.parse(res.body))
452
+ end
453
+ end
454
+ end
455
+ end