fluent-plugin-gcloud-pubsub-custom-compress-batches 1.3.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +46 -0
- data/.rubocop_todo.yml +98 -0
- data/.travis.yml +26 -0
- data/CHANGELOG.md +143 -0
- data/Gemfile +5 -0
- data/LICENSE +22 -0
- data/README.md +250 -0
- data/Rakefile +14 -0
- data/fluent-plugin-gcloud-pubsub-custom.gemspec +32 -0
- data/lib/fluent/plugin/gcloud_pubsub/client.rb +173 -0
- data/lib/fluent/plugin/gcloud_pubsub/metrics.rb +24 -0
- data/lib/fluent/plugin/in_gcloud_pubsub.rb +303 -0
- data/lib/fluent/plugin/out_gcloud_pubsub.rb +167 -0
- data/test/plugin/test_in_gcloud_pubsub.rb +455 -0
- data/test/plugin/test_out_gcloud_pubsub.rb +308 -0
- data/test/test_helper.rb +33 -0
- metadata +208 -0
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler"
|
4
|
+
Bundler::GemHelper.install_tasks
|
5
|
+
|
6
|
+
require "rake/testtask"
|
7
|
+
|
8
|
+
Rake::TestTask.new(:test) do |test|
|
9
|
+
test.libs << "lib" << "test"
|
10
|
+
test.test_files = FileList["test/plugin/test_*.rb"]
|
11
|
+
test.verbose = true
|
12
|
+
end
|
13
|
+
|
14
|
+
task default: [:build]
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.push File.expand_path("lib", __dir__)
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.name = "fluent-plugin-gcloud-pubsub-custom-compress-batches"
|
7
|
+
gem.description = "Google Cloud Pub/Sub input/output plugin for Fluentd event collector - with payload compression. Forked from https://github.com/gocardless/fluent-plugin-gcloud-pubsub-custom"
|
8
|
+
gem.license = "MIT"
|
9
|
+
gem.homepage = "https://github.com/calvinaditya95/fluent-plugin-gcloud-pubsub-custom"
|
10
|
+
gem.summary = "Google Cloud Pub/Sub input/output plugin for Fluentd event collector - with payload compression"
|
11
|
+
gem.version = "1.3.4"
|
12
|
+
gem.authors = ["Calvin Aditya"]
|
13
|
+
gem.email = "calvin.aditya95@gmail.com"
|
14
|
+
gem.files = `git ls-files`.split("\n")
|
15
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
|
19
|
+
gem.add_runtime_dependency "fluentd", [">= 0.14.15", "< 2"]
|
20
|
+
gem.add_runtime_dependency "google-cloud-pubsub", "~> 0.30.0"
|
21
|
+
|
22
|
+
# Use the same version constraint as fluent-plugin-prometheus currently specifies
|
23
|
+
gem.add_runtime_dependency "prometheus-client", "< 0.10"
|
24
|
+
|
25
|
+
gem.add_development_dependency "bundler"
|
26
|
+
gem.add_development_dependency "pry"
|
27
|
+
gem.add_development_dependency "pry-byebug"
|
28
|
+
gem.add_development_dependency "rake"
|
29
|
+
gem.add_development_dependency "rubocop", "~>0.83"
|
30
|
+
gem.add_development_dependency "test-unit"
|
31
|
+
gem.add_development_dependency "test-unit-rr"
|
32
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "google/cloud/pubsub"
|
4
|
+
require "zlib"
|
5
|
+
|
6
|
+
module Fluent
|
7
|
+
module GcloudPubSub
|
8
|
+
class Error < StandardError
|
9
|
+
end
|
10
|
+
class RetryableError < Error
|
11
|
+
end
|
12
|
+
|
13
|
+
COMPRESSION_ALGORITHM_ZLIB = "zlib"
|
14
|
+
# 30 is the ASCII record separator character
|
15
|
+
BATCHED_RECORD_SEPARATOR = 30.chr
|
16
|
+
|
17
|
+
class Message
|
18
|
+
attr_reader :message, :attributes
|
19
|
+
|
20
|
+
def initialize(message, attributes = {})
|
21
|
+
@message = message
|
22
|
+
@attributes = attributes
|
23
|
+
end
|
24
|
+
|
25
|
+
def bytesize
|
26
|
+
attr_size = 0
|
27
|
+
@attributes.each do |key, val|
|
28
|
+
attr_size += key.bytesize + val.bytesize
|
29
|
+
end
|
30
|
+
@message.bytesize + attr_size
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class Publisher
|
35
|
+
def initialize(project, key, autocreate_topic, metric_prefix)
|
36
|
+
@pubsub = Google::Cloud::Pubsub.new project_id: project, credentials: key
|
37
|
+
@autocreate_topic = autocreate_topic
|
38
|
+
@topics = {}
|
39
|
+
|
40
|
+
# rubocop:disable Layout/LineLength
|
41
|
+
@compression_ratio =
|
42
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{metric_prefix}_messages_compressed_size_per_original_size_ratio") do
|
43
|
+
::Prometheus::Client.registry.histogram(
|
44
|
+
:"#{metric_prefix}_messages_compressed_size_per_original_size_ratio",
|
45
|
+
"Compression ratio achieved on a batch of messages",
|
46
|
+
{},
|
47
|
+
# We expect compression for even a single message to be typically
|
48
|
+
# above 2x (0.5/50%), so bias the buckets towards the higher end
|
49
|
+
# of the range.
|
50
|
+
[0, 0.25, 0.5, 0.75, 0.85, 0.9, 0.95, 0.975, 1],
|
51
|
+
)
|
52
|
+
end
|
53
|
+
|
54
|
+
@compression_duration =
|
55
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{metric_prefix}_messages_compression_duration_seconds") do
|
56
|
+
::Prometheus::Client.registry.histogram(
|
57
|
+
:"#{metric_prefix}_messages_compression_duration_seconds",
|
58
|
+
"Time taken to compress a batch of messages",
|
59
|
+
{},
|
60
|
+
[0, 0.0001, 0.0005, 0.001, 0.01, 0.05, 0.1, 0.25, 0.5, 1],
|
61
|
+
)
|
62
|
+
end
|
63
|
+
# rubocop:enable Layout/LineLength
|
64
|
+
end
|
65
|
+
|
66
|
+
def topic(topic_name)
|
67
|
+
return @topics[topic_name] if @topics.key? topic_name
|
68
|
+
|
69
|
+
client = @pubsub.topic topic_name
|
70
|
+
client = @pubsub.create_topic topic_name if client.nil? && @autocreate_topic
|
71
|
+
raise Error, "topic:#{topic_name} does not exist." if client.nil?
|
72
|
+
|
73
|
+
@topics[topic_name] = client
|
74
|
+
client
|
75
|
+
end
|
76
|
+
|
77
|
+
def publish(topic_name, messages, compress_batches = false)
|
78
|
+
if compress_batches
|
79
|
+
topic(topic_name).publish(*compress_messages_with_zlib(messages, topic_name))
|
80
|
+
else
|
81
|
+
topic(topic_name).publish do |batch|
|
82
|
+
messages.each do |m|
|
83
|
+
batch.publish m.message, m.attributes
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
rescue Google::Cloud::UnavailableError, Google::Cloud::DeadlineExceededError, Google::Cloud::InternalError => e
|
88
|
+
raise RetryableError, "Google api returns error:#{e.class} message:#{e}"
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def compress_messages_with_zlib(messages, topic_name)
|
94
|
+
original_size = messages.sum(&:bytesize)
|
95
|
+
# This should never happen, only a programming error or major
|
96
|
+
# misconfiguration should lead to this situation. But checking against
|
97
|
+
# it here avoids a potential division by zero later on.
|
98
|
+
raise ArgumentError, "not compressing empty inputs" if original_size.zero?
|
99
|
+
|
100
|
+
# Here we're implicitly dropping the 'attributes' field of the messages
|
101
|
+
# that we're iterating over.
|
102
|
+
# This is fine, because the :attribute_keys config param is not
|
103
|
+
# supported when in compressed mode, so this field will always be
|
104
|
+
# empty.
|
105
|
+
packed_messages = messages.map(&:message).join(BATCHED_RECORD_SEPARATOR)
|
106
|
+
|
107
|
+
duration, compressed_messages = Fluent::GcloudPubSub::Metrics.measure_duration do
|
108
|
+
Zlib::Deflate.deflate(packed_messages)
|
109
|
+
end
|
110
|
+
|
111
|
+
@compression_duration.observe(
|
112
|
+
{ topic: topic_name, algorithm: COMPRESSION_ALGORITHM_ZLIB },
|
113
|
+
duration,
|
114
|
+
)
|
115
|
+
|
116
|
+
compressed_size = compressed_messages.bytesize
|
117
|
+
@compression_ratio.observe(
|
118
|
+
{ topic: topic_name, algorithm: COMPRESSION_ALGORITHM_ZLIB },
|
119
|
+
# If original = 1MiB and compressed = 256KiB; then metric value = 0.75 = 75% when plotted
|
120
|
+
1 - compressed_size.to_f / original_size,
|
121
|
+
)
|
122
|
+
|
123
|
+
[compressed_messages, { "compression_algorithm": COMPRESSION_ALGORITHM_ZLIB }]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
class Subscriber
|
128
|
+
def initialize(project, key, topic_name, subscription_name)
|
129
|
+
pubsub = Google::Cloud::Pubsub.new project_id: project, credentials: key
|
130
|
+
if topic_name.nil?
|
131
|
+
@client = pubsub.subscription subscription_name
|
132
|
+
else
|
133
|
+
topic = pubsub.topic topic_name
|
134
|
+
@client = topic.subscription subscription_name
|
135
|
+
end
|
136
|
+
raise Error, "subscription:#{subscription_name} does not exist." if @client.nil?
|
137
|
+
end
|
138
|
+
|
139
|
+
def pull(immediate, max)
|
140
|
+
@client.pull immediate: immediate, max: max
|
141
|
+
rescue Google::Cloud::UnavailableError, Google::Cloud::DeadlineExceededError, Google::Cloud::InternalError => e
|
142
|
+
raise RetryableError, "Google pull api returns error:#{e.class} message:#{e}"
|
143
|
+
end
|
144
|
+
|
145
|
+
def acknowledge(messages)
|
146
|
+
@client.acknowledge messages
|
147
|
+
rescue Google::Cloud::UnavailableError, Google::Cloud::DeadlineExceededError, Google::Cloud::InternalError => e
|
148
|
+
raise RetryableError, "Google acknowledge api returns error:#{e.class} message:#{e}"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
class MessageUnpacker
|
153
|
+
def self.unpack(message)
|
154
|
+
attributes = message.attributes
|
155
|
+
algorithm = attributes["compression_algorithm"]
|
156
|
+
|
157
|
+
case algorithm
|
158
|
+
when nil
|
159
|
+
# For an uncompressed message return the single line and attributes
|
160
|
+
[[message.message.data.chomp, message.attributes]]
|
161
|
+
when COMPRESSION_ALGORITHM_ZLIB
|
162
|
+
# Return all of the lines in the message, with empty attributes
|
163
|
+
Zlib::Inflate
|
164
|
+
.inflate(message.message.data)
|
165
|
+
.split(BATCHED_RECORD_SEPARATOR)
|
166
|
+
.map { |line| [line, {}] }
|
167
|
+
else
|
168
|
+
raise ArgumentError, "unknown compression algorithm: '#{algorithm}'"
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Fluent
|
4
|
+
module GcloudPubSub
|
5
|
+
# Utilities for interacting with Prometheus metrics
|
6
|
+
module Metrics
|
7
|
+
def self.register_or_existing(metric_name)
|
8
|
+
return ::Prometheus::Client.registry.get(metric_name) if ::Prometheus::Client.registry.exist?(metric_name)
|
9
|
+
|
10
|
+
yield
|
11
|
+
end
|
12
|
+
|
13
|
+
# Time the elapsed execution of the provided block, return the duration
|
14
|
+
# as the first element followed by the result of the block.
|
15
|
+
def self.measure_duration
|
16
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
17
|
+
result = yield
|
18
|
+
finish = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
19
|
+
|
20
|
+
[finish - start, *result]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,303 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require "webrick"
|
5
|
+
|
6
|
+
require "fluent/plugin/input"
|
7
|
+
require "fluent/plugin/parser"
|
8
|
+
|
9
|
+
require "fluent/plugin/gcloud_pubsub/client"
|
10
|
+
require "fluent/plugin/gcloud_pubsub/metrics"
|
11
|
+
|
12
|
+
require "prometheus/client"
|
13
|
+
|
14
|
+
module Fluent::Plugin
|
15
|
+
class GcloudPubSubInput < Input
|
16
|
+
Fluent::Plugin.register_input("gcloud_pubsub", self)
|
17
|
+
|
18
|
+
helpers :compat_parameters, :parser, :thread
|
19
|
+
|
20
|
+
DEFAULT_PARSER_TYPE = "json"
|
21
|
+
|
22
|
+
class FailedParseError < StandardError
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "Set tag of messages."
|
26
|
+
config_param :tag, :string
|
27
|
+
desc "Set key to be used as tag."
|
28
|
+
config_param :tag_key, :string, default: nil
|
29
|
+
desc "Set your GCP project."
|
30
|
+
config_param :project, :string, default: nil
|
31
|
+
desc "Set your credential file path."
|
32
|
+
config_param :key, :string, default: nil
|
33
|
+
desc "Set topic name to pull."
|
34
|
+
config_param :topic, :string, default: nil
|
35
|
+
desc "Set subscription name to pull."
|
36
|
+
config_param :subscription, :string
|
37
|
+
desc "Pulling messages by intervals of specified seconds."
|
38
|
+
config_param :pull_interval, :float, default: 5.0
|
39
|
+
desc "Max messages pulling at once."
|
40
|
+
config_param :max_messages, :integer, default: 100
|
41
|
+
desc "Setting `true`, keepalive connection to wait for new messages."
|
42
|
+
config_param :return_immediately, :bool, default: true
|
43
|
+
desc "Set number of threads to pull messages."
|
44
|
+
config_param :pull_threads, :integer, default: 1
|
45
|
+
desc "Acquire these fields from attributes on the Pub/Sub message and merge them into the record"
|
46
|
+
config_param :attribute_keys, :array, default: []
|
47
|
+
desc "Set error type when parsing messages fails."
|
48
|
+
config_param :parse_error_action, :enum, default: :exception, list: %i[exception warning]
|
49
|
+
desc "The prefix for Prometheus metric names"
|
50
|
+
config_param :metric_prefix, :string, default: "fluentd_input_gcloud_pubsub"
|
51
|
+
# for HTTP RPC
|
52
|
+
desc "If `true` is specified, HTTP RPC to stop or start pulling message is enabled."
|
53
|
+
config_param :enable_rpc, :bool, default: false
|
54
|
+
desc "Bind IP address for HTTP RPC."
|
55
|
+
config_param :rpc_bind, :string, default: "0.0.0.0"
|
56
|
+
desc "Port for HTTP RPC."
|
57
|
+
config_param :rpc_port, :integer, default: 24_680
|
58
|
+
|
59
|
+
config_section :parse do
|
60
|
+
config_set_default :@type, DEFAULT_PARSER_TYPE
|
61
|
+
end
|
62
|
+
|
63
|
+
class RPCServlet < WEBrick::HTTPServlet::AbstractServlet
|
64
|
+
class Error < StandardError; end
|
65
|
+
|
66
|
+
def initialize(server, plugin)
|
67
|
+
super
|
68
|
+
@plugin = plugin
|
69
|
+
end
|
70
|
+
|
71
|
+
# rubocop:disable Naming/MethodName
|
72
|
+
def do_GET(req, res)
|
73
|
+
begin
|
74
|
+
code, header, body = process(req, res)
|
75
|
+
rescue StandardError
|
76
|
+
code, header, body = render_json(500, {
|
77
|
+
"ok" => false,
|
78
|
+
"message" => "Internal Server Error",
|
79
|
+
"error" => $ERROR_INFO.to_s,
|
80
|
+
"backtrace" => $ERROR_INFO.backtrace,
|
81
|
+
})
|
82
|
+
end
|
83
|
+
|
84
|
+
res.status = code
|
85
|
+
header.each_pair do |k, v|
|
86
|
+
res[k] = v
|
87
|
+
end
|
88
|
+
res.body = body
|
89
|
+
end
|
90
|
+
# rubocop:enable Naming/MethodName
|
91
|
+
|
92
|
+
def render_json(code, obj)
|
93
|
+
[code, { "Content-Type" => "application/json" }, obj.to_json]
|
94
|
+
end
|
95
|
+
|
96
|
+
def process(req, _res)
|
97
|
+
ret = { "ok" => true }
|
98
|
+
case req.path_info
|
99
|
+
when "/stop"
|
100
|
+
@plugin.stop_pull
|
101
|
+
when "/start"
|
102
|
+
@plugin.start_pull
|
103
|
+
when "/status"
|
104
|
+
ret["status"] = @plugin.status_of_pull
|
105
|
+
else
|
106
|
+
raise Error, "Invalid path_info: #{req.path_info}"
|
107
|
+
end
|
108
|
+
render_json(200, ret)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# rubocop:disable Metrics/MethodLength
|
113
|
+
def configure(conf)
|
114
|
+
compat_parameters_convert(conf, :parser)
|
115
|
+
super
|
116
|
+
@rpc_srv = nil
|
117
|
+
@rpc_thread = nil
|
118
|
+
@stop_pull = false
|
119
|
+
|
120
|
+
@extract_tag = if @tag_key.nil?
|
121
|
+
method(:static_tag)
|
122
|
+
else
|
123
|
+
method(:dynamic_tag)
|
124
|
+
end
|
125
|
+
|
126
|
+
@parser = parser_create
|
127
|
+
|
128
|
+
@messages_pulled =
|
129
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_messages_pulled") do
|
130
|
+
::Prometheus::Client.registry.histogram(
|
131
|
+
:"#{@metric_prefix}_messages_pulled",
|
132
|
+
"Number of Pub/Sub messages pulled by the subscriber on each invocation",
|
133
|
+
{},
|
134
|
+
[0, 1, 10, 50, 100, 250, 500, 1000],
|
135
|
+
)
|
136
|
+
end
|
137
|
+
|
138
|
+
@messages_pulled_bytes =
|
139
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_messages_pulled_bytes") do
|
140
|
+
::Prometheus::Client.registry.histogram(
|
141
|
+
:"#{@metric_prefix}_messages_pulled_bytes",
|
142
|
+
"Total size in bytes of the Pub/Sub messages pulled by the subscriber on each invocation",
|
143
|
+
{},
|
144
|
+
[100, 1000, 10_000, 100_000, 1_000_000, 5_000_000, 10_000_000],
|
145
|
+
)
|
146
|
+
end
|
147
|
+
|
148
|
+
@pull_errors =
|
149
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_pull_errors_total") do
|
150
|
+
::Prometheus::Client.registry.counter(
|
151
|
+
:"#{@metric_prefix}_pull_errors_total",
|
152
|
+
"Errors encountered while pulling or processing messages",
|
153
|
+
{},
|
154
|
+
)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
# rubocop:enable Metrics/MethodLength
|
158
|
+
|
159
|
+
def start
|
160
|
+
super
|
161
|
+
start_rpc if @enable_rpc
|
162
|
+
|
163
|
+
@subscriber = Fluent::GcloudPubSub::Subscriber.new @project, @key, @topic, @subscription
|
164
|
+
log.debug "connected subscription:#{@subscription} in project #{@project}"
|
165
|
+
|
166
|
+
@emit_guard = Mutex.new
|
167
|
+
@stop_subscribing = false
|
168
|
+
@subscribe_threads = []
|
169
|
+
@pull_threads.times do |idx|
|
170
|
+
@subscribe_threads.push thread_create("in_gcloud_pubsub_subscribe_#{idx}".to_sym, &method(:subscribe))
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def shutdown
|
175
|
+
if @rpc_srv
|
176
|
+
@rpc_srv.shutdown
|
177
|
+
@rpc_srv = nil
|
178
|
+
end
|
179
|
+
@rpc_thread = nil if @rpc_thread
|
180
|
+
@stop_subscribing = true
|
181
|
+
@subscribe_threads.each(&:join)
|
182
|
+
super
|
183
|
+
end
|
184
|
+
|
185
|
+
def stop_pull
|
186
|
+
@stop_pull = true
|
187
|
+
log.info "stop pull from subscription:#{@subscription}"
|
188
|
+
end
|
189
|
+
|
190
|
+
def start_pull
|
191
|
+
@stop_pull = false
|
192
|
+
log.info "start pull from subscription:#{@subscription}"
|
193
|
+
end
|
194
|
+
|
195
|
+
def status_of_pull
|
196
|
+
@stop_pull ? "stopped" : "started"
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
|
201
|
+
def static_tag(_record)
|
202
|
+
@tag
|
203
|
+
end
|
204
|
+
|
205
|
+
def dynamic_tag(record)
|
206
|
+
record.delete(@tag_key) || @tag
|
207
|
+
end
|
208
|
+
|
209
|
+
def start_rpc
|
210
|
+
log.info "listening http rpc server on http://#{@rpc_bind}:#{@rpc_port}/"
|
211
|
+
@rpc_srv = WEBrick::HTTPServer.new(
|
212
|
+
{
|
213
|
+
BindAddress: @rpc_bind,
|
214
|
+
Port: @rpc_port,
|
215
|
+
Logger: WEBrick::Log.new(STDERR, WEBrick::Log::FATAL),
|
216
|
+
AccessLog: [],
|
217
|
+
},
|
218
|
+
)
|
219
|
+
@rpc_srv.mount("/api/in_gcloud_pubsub/pull/", RPCServlet, self)
|
220
|
+
@rpc_thread = thread_create(:in_gcloud_pubsub_rpc_thread) do
|
221
|
+
@rpc_srv.start
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def subscribe
|
226
|
+
until @stop_subscribing
|
227
|
+
_subscribe unless @stop_pull
|
228
|
+
|
229
|
+
sleep @pull_interval if @return_immediately || @stop_pull
|
230
|
+
end
|
231
|
+
rescue StandardError => e
|
232
|
+
log.error "unexpected error", error_message: e.to_s, error_class: e.class.to_s
|
233
|
+
log.error_backtrace e.backtrace
|
234
|
+
end
|
235
|
+
|
236
|
+
def _subscribe
|
237
|
+
messages = @subscriber.pull @return_immediately, @max_messages
|
238
|
+
@messages_pulled.observe(common_labels, messages.size)
|
239
|
+
if messages.empty?
|
240
|
+
log.debug "no messages are pulled"
|
241
|
+
return
|
242
|
+
end
|
243
|
+
|
244
|
+
messages_size = messages.sum do |message|
|
245
|
+
message.data.bytesize + message.attributes.sum { |k, v| k.bytesize + v.bytesize }
|
246
|
+
end
|
247
|
+
@messages_pulled_bytes.observe(common_labels, messages_size)
|
248
|
+
|
249
|
+
process messages
|
250
|
+
@subscriber.acknowledge messages
|
251
|
+
|
252
|
+
log.debug "#{messages.length} message(s) processed"
|
253
|
+
rescue Fluent::GcloudPubSub::RetryableError => e
|
254
|
+
@pull_errors.increment(common_labels.merge({ retryable: true }))
|
255
|
+
log.warn "Retryable error occurs. Fluentd will retry.", error_message: e.to_s, error_class: e.class.to_s
|
256
|
+
rescue StandardError => e
|
257
|
+
@pull_errors.increment(common_labels.merge({ retryable: false }))
|
258
|
+
log.error "unexpected error", error_message: e.to_s, error_class: e.class.to_s
|
259
|
+
log.error_backtrace e.backtrace
|
260
|
+
end
|
261
|
+
|
262
|
+
def process(messages)
|
263
|
+
event_streams = Hash.new do |hsh, key|
|
264
|
+
hsh[key] = Fluent::MultiEventStream.new
|
265
|
+
end
|
266
|
+
|
267
|
+
messages.each do |m|
|
268
|
+
lines_attributes = Fluent::GcloudPubSub::MessageUnpacker.unpack(m)
|
269
|
+
|
270
|
+
lines_attributes.each do |line, attributes|
|
271
|
+
@parser.parse(line) do |time, record|
|
272
|
+
if time && record
|
273
|
+
@attribute_keys.each do |key|
|
274
|
+
record[key] = attributes[key]
|
275
|
+
end
|
276
|
+
|
277
|
+
event_streams[@extract_tag.call(record)].add(time, record)
|
278
|
+
else
|
279
|
+
case @parse_error_action
|
280
|
+
when :exception
|
281
|
+
raise FailedParseError, "pattern not match: #{line}"
|
282
|
+
else
|
283
|
+
log.warn "pattern not match", record: line
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
event_streams.each do |tag, es|
|
291
|
+
# There are some output plugins not to supposed to be called with multi-threading.
|
292
|
+
# Maybe remove in the future.
|
293
|
+
@emit_guard.synchronize do
|
294
|
+
router.emit_stream(tag, es)
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
def common_labels
|
300
|
+
{ subscription: @subscription }
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|