fluent-plugin-gcloud-pubsub-custom-compress-batches 1.3.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,167 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fluent/plugin/output"
4
+ require "fluent/plugin/gcloud_pubsub/client"
5
+ require "fluent/plugin/gcloud_pubsub/metrics"
6
+ require "fluent/plugin_helper/inject"
7
+ require "prometheus/client"
8
+
9
+ module Fluent::Plugin
10
+ class GcloudPubSubOutput < Output
11
+ include Fluent::PluginHelper::Inject
12
+
13
+ Fluent::Plugin.register_output("gcloud_pubsub", self)
14
+
15
+ helpers :compat_parameters, :formatter
16
+
17
+ DEFAULT_BUFFER_TYPE = "memory"
18
+ DEFAULT_FORMATTER_TYPE = "json"
19
+
20
+ desc "Set your GCP project."
21
+ config_param :project, :string, default: nil
22
+ desc "Set your credential file path."
23
+ config_param :key, :string, default: nil
24
+ desc "Set topic name to publish."
25
+ config_param :topic, :string
26
+ desc "If set to `true`, specified topic will be created when it doesn't exist."
27
+ config_param :autocreate_topic, :bool, default: false
28
+ desc "Publishing messages count per request to Cloud Pub/Sub."
29
+ config_param :max_messages, :integer, default: 1000
30
+ desc "Publishing messages bytesize per request to Cloud Pub/Sub."
31
+ config_param :max_total_size, :integer, default: 9_800_000 # 9.8MB
32
+ desc "Limit bytesize per message."
33
+ config_param :max_message_size, :integer, default: 4_000_000 # 4MB
34
+ desc "Extract these fields from the record and send them as attributes on the Pub/Sub message. " \
35
+ "Cannot be set if compress_batches is enabled."
36
+ config_param :attribute_keys, :array, default: []
37
+ desc "The prefix for Prometheus metric names"
38
+ config_param :metric_prefix, :string, default: "fluentd_output_gcloud_pubsub"
39
+ desc "If set to `true`, messages will be batched and compressed before publication"
40
+ config_param :compress_batches, :bool, default: false
41
+
42
+ config_section :buffer do
43
+ config_set_default :@type, DEFAULT_BUFFER_TYPE
44
+ end
45
+
46
+ config_section :format do
47
+ config_set_default :@type, DEFAULT_FORMATTER_TYPE
48
+ end
49
+
50
+ # rubocop:disable Metrics/MethodLength
51
+ def configure(conf)
52
+ compat_parameters_convert(conf, :buffer, :formatter)
53
+ super
54
+ placeholder_validate!(:topic, @topic)
55
+ @formatter = formatter_create
56
+
57
+ if @compress_batches && !@attribute_keys.empty?
58
+ # The attribute_keys option is implemented by extracting keys from the
59
+ # record and setting them on the Pub/Sub message.
60
+ # This is not possible in compressed mode, because we're sending just a
61
+ # single Pub/Sub message that comprises many records, therefore the
62
+ # attribute keys would clash.
63
+ raise Fluent::ConfigError, ":attribute_keys cannot be used when compression is enabled"
64
+ end
65
+
66
+ @messages_published =
67
+ Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_messages_published_per_batch") do
68
+ ::Prometheus::Client.registry.histogram(
69
+ :"#{@metric_prefix}_messages_published_per_batch",
70
+ "Number of records published to Pub/Sub per buffer flush",
71
+ {},
72
+ [1, 10, 50, 100, 250, 500, 1000],
73
+ )
74
+ end
75
+
76
+ @bytes_published =
77
+ Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_messages_published_bytes") do
78
+ ::Prometheus::Client.registry.histogram(
79
+ :"#{@metric_prefix}_messages_published_bytes",
80
+ "Total size in bytes of the records published to Pub/Sub",
81
+ {},
82
+ [100, 1000, 10_000, 100_000, 1_000_000, 5_000_000, 10_000_000],
83
+ )
84
+ end
85
+
86
+ @compression_enabled =
87
+ Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_compression_enabled") do
88
+ ::Prometheus::Client.registry.gauge(
89
+ :"#{@metric_prefix}_compression_enabled",
90
+ "Whether compression/batching is enabled",
91
+ {},
92
+ )
93
+ end
94
+ @compression_enabled.set(common_labels, @compress_batches ? 1 : 0)
95
+ end
96
+ # rubocop:enable Metrics/MethodLength
97
+
98
+ def start
99
+ super
100
+ @publisher = Fluent::GcloudPubSub::Publisher.new @project, @key, @autocreate_topic, @metric_prefix
101
+ end
102
+
103
+ def format(tag, time, record)
104
+ record = inject_values_to_record(tag, time, record)
105
+ attributes = {}
106
+ @attribute_keys.each do |key|
107
+ attributes[key] = record.delete(key)
108
+ end
109
+ [@formatter.format(tag, time, record), attributes].to_msgpack
110
+ end
111
+
112
+ def formatted_to_msgpack_binary?
113
+ true
114
+ end
115
+
116
+ def multi_workers_ready?
117
+ true
118
+ end
119
+
120
+ def write(chunk)
121
+ topic = extract_placeholders(@topic, chunk.metadata)
122
+
123
+ messages = []
124
+ size = 0
125
+
126
+ chunk.msgpack_each do |msg, attr|
127
+ msg = Fluent::GcloudPubSub::Message.new(msg, attr)
128
+ if msg.bytesize > @max_message_size
129
+ log.warn "Drop a message because its size exceeds `max_message_size`", size: msg.bytesize
130
+ next
131
+ end
132
+ if messages.length + 1 > @max_messages || size + msg.bytesize > @max_total_size
133
+ publish(topic, messages)
134
+ messages = []
135
+ size = 0
136
+ end
137
+ messages << msg
138
+ size += msg.bytesize
139
+ end
140
+
141
+ publish(topic, messages) unless messages.empty?
142
+ rescue Fluent::GcloudPubSub::RetryableError => e
143
+ log.warn "Retryable error occurs. Fluentd will retry.", error_message: e.to_s, error_class: e.class.to_s
144
+ raise e
145
+ rescue StandardError => e
146
+ log.error "unexpected error", error_message: e.to_s, error_class: e.class.to_s
147
+ log.error_backtrace
148
+ raise e
149
+ end
150
+
151
+ private
152
+
153
+ def publish(topic, messages)
154
+ size = messages.map(&:bytesize).inject(:+)
155
+ log.debug "send message topic:#{topic} length:#{messages.length} size:#{size}"
156
+
157
+ @messages_published.observe(common_labels, messages.length)
158
+ @bytes_published.observe(common_labels, size)
159
+
160
+ @publisher.publish(topic, messages, @compress_batches)
161
+ end
162
+
163
+ def common_labels
164
+ { topic: @topic }
165
+ end
166
+ end
167
+ end
@@ -0,0 +1,455 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "net/http"
4
+ require "json"
5
+
6
+ require_relative "../test_helper"
7
+ require "fluent/test/driver/input"
8
+
9
+ class GcloudPubSubInputTest < Test::Unit::TestCase
10
+ CONFIG = %(
11
+ tag test
12
+ project project-test
13
+ topic topic-test
14
+ subscription subscription-test
15
+ key key-test
16
+ )
17
+
18
+ DEFAULT_HOST = "127.0.0.1"
19
+ DEFAULT_PORT = 24_680
20
+
21
+ class DummyInvalidMsgData
22
+ def data
23
+ "foo:bar"
24
+ end
25
+ end
26
+ class DummyInvalidMessage
27
+ def message
28
+ DummyInvalidMsgData.new
29
+ end
30
+
31
+ def data
32
+ message.data
33
+ end
34
+
35
+ def attributes
36
+ { "attr_1" => "a", "attr_2" => "b" }
37
+ end
38
+ end
39
+
40
+ def create_driver(conf = CONFIG)
41
+ Fluent::Test::Driver::Input.new(Fluent::Plugin::GcloudPubSubInput).configure(conf)
42
+ end
43
+
44
+ def http_get(path)
45
+ http = Net::HTTP.new(DEFAULT_HOST, DEFAULT_PORT)
46
+ req = Net::HTTP::Get.new(path, { "Content-Type" => "application/x-www-form-urlencoded" })
47
+ http.request(req)
48
+ end
49
+
50
+ setup do
51
+ Fluent::Test.setup
52
+ end
53
+
54
+ sub_test_case "configure" do
55
+ test "all params are configured" do
56
+ d = create_driver(%(
57
+ tag test
58
+ project project-test
59
+ topic topic-test
60
+ subscription subscription-test
61
+ key key-test
62
+ max_messages 1000
63
+ return_immediately true
64
+ pull_interval 2
65
+ pull_threads 3
66
+ attribute_keys attr-test
67
+ enable_rpc true
68
+ rpc_bind 127.0.0.1
69
+ rpc_port 24681
70
+ ))
71
+
72
+ assert_equal("test", d.instance.tag)
73
+ assert_equal("project-test", d.instance.project)
74
+ assert_equal("topic-test", d.instance.topic)
75
+ assert_equal("subscription-test", d.instance.subscription)
76
+ assert_equal("key-test", d.instance.key)
77
+ assert_equal(2.0, d.instance.pull_interval)
78
+ assert_equal(1000, d.instance.max_messages)
79
+ assert_equal(true, d.instance.return_immediately)
80
+ assert_equal(3, d.instance.pull_threads)
81
+ assert_equal(["attr-test"], d.instance.attribute_keys)
82
+ assert_equal(true, d.instance.enable_rpc)
83
+ assert_equal("127.0.0.1", d.instance.rpc_bind)
84
+ assert_equal(24_681, d.instance.rpc_port)
85
+ end
86
+
87
+ test "default values are configured" do
88
+ d = create_driver
89
+ assert_equal(5.0, d.instance.pull_interval)
90
+ assert_equal(100, d.instance.max_messages)
91
+ assert_equal(true, d.instance.return_immediately)
92
+ assert_equal(1, d.instance.pull_threads)
93
+ assert_equal([], d.instance.attribute_keys)
94
+ assert_equal(false, d.instance.enable_rpc)
95
+ assert_equal("0.0.0.0", d.instance.rpc_bind)
96
+ assert_equal(24_680, d.instance.rpc_port)
97
+ end
98
+ end
99
+
100
+ sub_test_case "start" do
101
+ setup do
102
+ @topic_mock = mock!
103
+ @pubsub_mock = mock!.topic("topic-test").at_least(1) { @topic_mock }
104
+ stub(Google::Cloud::Pubsub).new { @pubsub_mock }
105
+ end
106
+
107
+ test "40x error occurred on connecting to Pub/Sub" do
108
+ @topic_mock.subscription("subscription-test").once do
109
+ raise Google::Cloud::NotFoundError, "TEST"
110
+ end
111
+
112
+ d = create_driver
113
+ assert_raise Google::Cloud::NotFoundError do
114
+ d.run {}
115
+ end
116
+ end
117
+
118
+ test "50x error occurred on connecting to Pub/Sub" do
119
+ @topic_mock.subscription("subscription-test").once do
120
+ raise Google::Cloud::UnavailableError, "TEST"
121
+ end
122
+
123
+ d = create_driver
124
+ assert_raise Google::Cloud::UnavailableError do
125
+ d.run {}
126
+ end
127
+ end
128
+
129
+ test "subscription is nil" do
130
+ @topic_mock.subscription("subscription-test").once { nil }
131
+
132
+ d = create_driver
133
+ assert_raise Fluent::GcloudPubSub::Error do
134
+ d.run {}
135
+ end
136
+ end
137
+ end
138
+
139
+ sub_test_case "emit" do
140
+ class DummyMsgData
141
+ def data
142
+ '{"foo": "bar"}'
143
+ end
144
+ end
145
+
146
+ class DummyMessage
147
+ def message
148
+ DummyMsgData.new
149
+ end
150
+
151
+ def data
152
+ message.data
153
+ end
154
+
155
+ def attributes
156
+ { "attr_1" => "a", "attr_2" => "b" }
157
+ end
158
+ end
159
+
160
+ class DummyCompressedMessageData
161
+ attr_reader :data
162
+
163
+ def initialize(messages)
164
+ @data = Zlib::Deflate.deflate(messages.join(30.chr))
165
+ end
166
+ end
167
+
168
+ class DummyCompressedMessage
169
+ attr_reader :message
170
+
171
+ def initialize(messages)
172
+ @message = DummyCompressedMessageData.new(messages)
173
+ end
174
+
175
+ def data
176
+ message.data
177
+ end
178
+
179
+ def attributes
180
+ { "compression_algorithm" => "zlib" }
181
+ end
182
+ end
183
+
184
+ class DummyMsgDataWithTagKey
185
+ def initialize(tag)
186
+ @tag = tag
187
+ end
188
+
189
+ def data
190
+ '{"foo": "bar", "test_tag_key": "' + @tag + '"}'
191
+ end
192
+ end
193
+ class DummyMessageWithTagKey
194
+ def initialize(tag)
195
+ @tag = tag
196
+ end
197
+
198
+ def message
199
+ DummyMsgDataWithTagKey.new @tag
200
+ end
201
+
202
+ def data
203
+ message.data
204
+ end
205
+
206
+ def attributes
207
+ { "attr_1" => "a", "attr_2" => "b" }
208
+ end
209
+ end
210
+
211
+ setup do
212
+ @subscriber = mock!
213
+ @topic_mock = mock!.subscription("subscription-test") { @subscriber }
214
+ @pubsub_mock = mock!.topic("topic-test") { @topic_mock }
215
+ stub(Google::Cloud::Pubsub).new { @pubsub_mock }
216
+ end
217
+
218
+ test "empty" do
219
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { [] }
220
+ @subscriber.acknowledge.times(0)
221
+
222
+ d = create_driver
223
+ d.run(expect_emits: 1, timeout: 3)
224
+
225
+ assert_true d.events.empty?
226
+ end
227
+
228
+ test "simple" do
229
+ messages = Array.new(1, DummyMessage.new)
230
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
231
+ @subscriber.acknowledge(messages).at_least(1)
232
+
233
+ d = create_driver
234
+ d.run(expect_emits: 1, timeout: 3)
235
+ emits = d.events
236
+
237
+ assert(emits.length >= 1)
238
+ emits.each do |tag, _time, record|
239
+ assert_equal("test", tag)
240
+ assert_equal({ "foo" => "bar" }, record)
241
+ end
242
+ end
243
+
244
+ test "multithread" do
245
+ messages = Array.new(1, DummyMessage.new)
246
+ @subscriber.pull(immediate: true, max: 100).at_least(2) { messages }
247
+ @subscriber.acknowledge(messages).at_least(2)
248
+
249
+ d = create_driver("#{CONFIG}\npull_threads 2")
250
+ d.run(expect_emits: 2, timeout: 1)
251
+ emits = d.events
252
+
253
+ assert(emits.length >= 2)
254
+ emits.each do |tag, _time, record|
255
+ assert_equal("test", tag)
256
+ assert_equal({ "foo" => "bar" }, record)
257
+ end
258
+ end
259
+
260
+ test "with tag_key" do
261
+ messages = [
262
+ DummyMessageWithTagKey.new("tag1"),
263
+ DummyMessageWithTagKey.new("tag2"),
264
+ DummyMessage.new,
265
+ ]
266
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
267
+ @subscriber.acknowledge(messages).at_least(1)
268
+
269
+ d = create_driver("#{CONFIG}\ntag_key test_tag_key")
270
+ d.run(expect_emits: 1, timeout: 3)
271
+ emits = d.events
272
+
273
+ assert(emits.length >= 3)
274
+ # test tag
275
+ assert_equal("tag1", emits[0][0])
276
+ assert_equal("tag2", emits[1][0])
277
+ assert_equal("test", emits[2][0])
278
+ # test record
279
+ emits.each do |_tag, _time, record|
280
+ assert_equal({ "foo" => "bar" }, record)
281
+ end
282
+ end
283
+
284
+ test "invalid messages with parse_error_action exception " do
285
+ messages = Array.new(1, DummyInvalidMessage.new)
286
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
287
+ @subscriber.acknowledge.times(0)
288
+
289
+ d = create_driver
290
+ d.run(expect_emits: 1, timeout: 3)
291
+ assert_true d.events.empty?
292
+ end
293
+
294
+ test "with attributes" do
295
+ messages = Array.new(1, DummyMessage.new)
296
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
297
+ @subscriber.acknowledge(messages).at_least(1)
298
+
299
+ d = create_driver("#{CONFIG}\nattribute_keys attr_1")
300
+ d.run(expect_emits: 1, timeout: 3)
301
+ emits = d.events
302
+
303
+ assert(emits.length >= 1)
304
+ emits.each do |tag, _time, record|
305
+ assert_equal("test", tag)
306
+ assert_equal({ "foo" => "bar", "attr_1" => "a" }, record)
307
+ end
308
+ end
309
+
310
+ test "compressed batch of messages" do
311
+ original_messages = [
312
+ { foo: "bar" },
313
+ { baz: "qux" },
314
+ ]
315
+ messages = Array.new(1, DummyCompressedMessage.new(original_messages.map(&:to_json)))
316
+
317
+ @subscriber.pull(immediate: true, max: 100).once { messages }
318
+ @subscriber.acknowledge(messages).at_least(1)
319
+
320
+ d = create_driver
321
+ d.run(expect_emits: 1, timeout: 3)
322
+ emits = d.events
323
+
324
+ output_records = emits.map do |e|
325
+ # Pick out only the record element, i.e. ignore the time and tag
326
+ record = e[2]
327
+ # Convert the keys from strings to symbols, to allow for strict comparison
328
+ record.map { |k, v| [k.to_sym, v] }.to_h
329
+ end
330
+
331
+ assert_equal(original_messages, output_records)
332
+ end
333
+
334
+ test "invalid messages with parse_error_action warning" do
335
+ messages = Array.new(1, DummyInvalidMessage.new)
336
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
337
+ @subscriber.acknowledge(messages).at_least(1)
338
+
339
+ d = create_driver("#{CONFIG}\nparse_error_action warning")
340
+ d.run(expect_emits: 1, timeout: 3)
341
+ assert_true d.events.empty?
342
+ end
343
+
344
+ test "retry if raised error" do
345
+ class UnknownError < StandardError
346
+ end
347
+ @subscriber.pull(immediate: true, max: 100).at_least(2) { raise UnknownError, "test" }
348
+ @subscriber.acknowledge.times(0)
349
+
350
+ d = create_driver(CONFIG + "pull_interval 0.5")
351
+ d.run(expect_emits: 1, timeout: 0.8)
352
+
353
+ assert_equal(0.5, d.instance.pull_interval)
354
+ assert_true d.events.empty?
355
+ end
356
+
357
+ test "retry if raised RetryableError on pull" do
358
+ @subscriber.pull(immediate: true, max: 100).at_least(2) { raise Google::Cloud::UnavailableError, "TEST" }
359
+ @subscriber.acknowledge.times(0)
360
+
361
+ d = create_driver("#{CONFIG}\npull_interval 0.5")
362
+ d.run(expect_emits: 1, timeout: 0.8)
363
+
364
+ assert_equal(0.5, d.instance.pull_interval)
365
+ assert_true d.events.empty?
366
+ end
367
+
368
+ test "retry if raised RetryableError on acknowledge" do
369
+ messages = Array.new(1, DummyMessage.new)
370
+ @subscriber.pull(immediate: true, max: 100).at_least(2) { messages }
371
+ @subscriber.acknowledge(messages).at_least(2) { raise Google::Cloud::UnavailableError, "TEST" }
372
+
373
+ d = create_driver("#{CONFIG}\npull_interval 0.5")
374
+ d.run(expect_emits: 2, timeout: 3)
375
+ emits = d.events
376
+
377
+ # not acknowledged, but already emitted to engine.
378
+ assert(emits.length >= 2)
379
+ emits.each do |tag, _time, record|
380
+ assert_equal("test", tag)
381
+ assert_equal({ "foo" => "bar" }, record)
382
+ end
383
+ end
384
+
385
+ test "stop by http rpc" do
386
+ messages = Array.new(1, DummyMessage.new)
387
+ @subscriber.pull(immediate: true, max: 100).once { messages }
388
+ @subscriber.acknowledge(messages).once
389
+
390
+ d = create_driver("#{CONFIG}\npull_interval 1.0\nenable_rpc true")
391
+ assert_equal(false, d.instance.instance_variable_get(:@stop_pull))
392
+
393
+ d.run do
394
+ http_get("/api/in_gcloud_pubsub/pull/stop")
395
+ sleep 0.75
396
+ # d.run sleeps 0.5 sec
397
+ end
398
+ emits = d.events
399
+
400
+ assert_equal(1, emits.length)
401
+ assert_true d.instance.instance_variable_get(:@stop_pull)
402
+
403
+ emits.each do |tag, _time, record|
404
+ assert_equal("test", tag)
405
+ assert_equal({ "foo" => "bar" }, record)
406
+ end
407
+ end
408
+
409
+ test "start by http rpc" do
410
+ messages = Array.new(1, DummyMessage.new)
411
+ @subscriber.pull(immediate: true, max: 100).at_least(1) { messages }
412
+ @subscriber.acknowledge(messages).at_least(1)
413
+
414
+ d = create_driver("#{CONFIG}\npull_interval 1.0\nenable_rpc true")
415
+ d.instance.stop_pull
416
+ assert_equal(true, d.instance.instance_variable_get(:@stop_pull))
417
+
418
+ d.run(expect_emits: 1, timeout: 3) do
419
+ http_get("/api/in_gcloud_pubsub/pull/start")
420
+ sleep 0.75
421
+ # d.run sleeps 0.5 sec
422
+ end
423
+ emits = d.events
424
+
425
+ assert_equal(true, !emits.empty?)
426
+ assert_false d.instance.instance_variable_get(:@stop_pull)
427
+
428
+ emits.each do |tag, _time, record|
429
+ assert_equal("test", tag)
430
+ assert_equal({ "foo" => "bar" }, record)
431
+ end
432
+ end
433
+
434
+ test "get status by http rpc when started" do
435
+ d = create_driver("#{CONFIG}\npull_interval 1.0\nenable_rpc true")
436
+ assert_false d.instance.instance_variable_get(:@stop_pull)
437
+
438
+ d.run do
439
+ res = http_get("/api/in_gcloud_pubsub/pull/status")
440
+ assert_equal({ "ok" => true, "status" => "started" }, JSON.parse(res.body))
441
+ end
442
+ end
443
+
444
+ test "get status by http rpc when stopped" do
445
+ d = create_driver("#{CONFIG}\npull_interval 1.0\nenable_rpc true")
446
+ d.instance.stop_pull
447
+ assert_true d.instance.instance_variable_get(:@stop_pull)
448
+
449
+ d.run do
450
+ res = http_get("/api/in_gcloud_pubsub/pull/status")
451
+ assert_equal({ "ok" => true, "status" => "stopped" }, JSON.parse(res.body))
452
+ end
453
+ end
454
+ end
455
+ end