fluent-plugin-gcloud-pubsub-custom-compress-batches 1.3.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.rubocop.yml +46 -0
- data/.rubocop_todo.yml +98 -0
- data/.travis.yml +26 -0
- data/CHANGELOG.md +143 -0
- data/Gemfile +5 -0
- data/LICENSE +22 -0
- data/README.md +250 -0
- data/Rakefile +14 -0
- data/fluent-plugin-gcloud-pubsub-custom.gemspec +32 -0
- data/lib/fluent/plugin/gcloud_pubsub/client.rb +173 -0
- data/lib/fluent/plugin/gcloud_pubsub/metrics.rb +24 -0
- data/lib/fluent/plugin/in_gcloud_pubsub.rb +303 -0
- data/lib/fluent/plugin/out_gcloud_pubsub.rb +167 -0
- data/test/plugin/test_in_gcloud_pubsub.rb +455 -0
- data/test/plugin/test_out_gcloud_pubsub.rb +308 -0
- data/test/test_helper.rb +33 -0
- metadata +208 -0
data/Rakefile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "bundler"
|
4
|
+
Bundler::GemHelper.install_tasks
|
5
|
+
|
6
|
+
require "rake/testtask"
|
7
|
+
|
8
|
+
Rake::TestTask.new(:test) do |test|
|
9
|
+
test.libs << "lib" << "test"
|
10
|
+
test.test_files = FileList["test/plugin/test_*.rb"]
|
11
|
+
test.verbose = true
|
12
|
+
end
|
13
|
+
|
14
|
+
task default: [:build]
|
@@ -0,0 +1,32 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
$LOAD_PATH.push File.expand_path("lib", __dir__)
|
4
|
+
|
5
|
+
Gem::Specification.new do |gem|
|
6
|
+
gem.name = "fluent-plugin-gcloud-pubsub-custom-compress-batches"
|
7
|
+
gem.description = "Google Cloud Pub/Sub input/output plugin for Fluentd event collector - with payload compression. Forked from https://github.com/gocardless/fluent-plugin-gcloud-pubsub-custom"
|
8
|
+
gem.license = "MIT"
|
9
|
+
gem.homepage = "https://github.com/calvinaditya95/fluent-plugin-gcloud-pubsub-custom"
|
10
|
+
gem.summary = "Google Cloud Pub/Sub input/output plugin for Fluentd event collector - with payload compression"
|
11
|
+
gem.version = "1.3.4"
|
12
|
+
gem.authors = ["Calvin Aditya"]
|
13
|
+
gem.email = "calvin.aditya95@gmail.com"
|
14
|
+
gem.files = `git ls-files`.split("\n")
|
15
|
+
gem.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
16
|
+
gem.executables = `git ls-files -- bin/*`.split("\n").map { |f| File.basename(f) }
|
17
|
+
gem.require_paths = ["lib"]
|
18
|
+
|
19
|
+
gem.add_runtime_dependency "fluentd", [">= 0.14.15", "< 2"]
|
20
|
+
gem.add_runtime_dependency "google-cloud-pubsub", "~> 0.30.0"
|
21
|
+
|
22
|
+
# Use the same version constraint as fluent-plugin-prometheus currently specifies
|
23
|
+
gem.add_runtime_dependency "prometheus-client", "< 0.10"
|
24
|
+
|
25
|
+
gem.add_development_dependency "bundler"
|
26
|
+
gem.add_development_dependency "pry"
|
27
|
+
gem.add_development_dependency "pry-byebug"
|
28
|
+
gem.add_development_dependency "rake"
|
29
|
+
gem.add_development_dependency "rubocop", "~>0.83"
|
30
|
+
gem.add_development_dependency "test-unit"
|
31
|
+
gem.add_development_dependency "test-unit-rr"
|
32
|
+
end
|
@@ -0,0 +1,173 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "google/cloud/pubsub"
|
4
|
+
require "zlib"
|
5
|
+
|
6
|
+
module Fluent
|
7
|
+
module GcloudPubSub
|
8
|
+
class Error < StandardError
|
9
|
+
end
|
10
|
+
class RetryableError < Error
|
11
|
+
end
|
12
|
+
|
13
|
+
COMPRESSION_ALGORITHM_ZLIB = "zlib"
|
14
|
+
# 30 is the ASCII record separator character
|
15
|
+
BATCHED_RECORD_SEPARATOR = 30.chr
|
16
|
+
|
17
|
+
class Message
|
18
|
+
attr_reader :message, :attributes
|
19
|
+
|
20
|
+
def initialize(message, attributes = {})
|
21
|
+
@message = message
|
22
|
+
@attributes = attributes
|
23
|
+
end
|
24
|
+
|
25
|
+
def bytesize
|
26
|
+
attr_size = 0
|
27
|
+
@attributes.each do |key, val|
|
28
|
+
attr_size += key.bytesize + val.bytesize
|
29
|
+
end
|
30
|
+
@message.bytesize + attr_size
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
class Publisher
|
35
|
+
def initialize(project, key, autocreate_topic, metric_prefix)
|
36
|
+
@pubsub = Google::Cloud::Pubsub.new project_id: project, credentials: key
|
37
|
+
@autocreate_topic = autocreate_topic
|
38
|
+
@topics = {}
|
39
|
+
|
40
|
+
# rubocop:disable Layout/LineLength
|
41
|
+
@compression_ratio =
|
42
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{metric_prefix}_messages_compressed_size_per_original_size_ratio") do
|
43
|
+
::Prometheus::Client.registry.histogram(
|
44
|
+
:"#{metric_prefix}_messages_compressed_size_per_original_size_ratio",
|
45
|
+
"Compression ratio achieved on a batch of messages",
|
46
|
+
{},
|
47
|
+
# We expect compression for even a single message to be typically
|
48
|
+
# above 2x (0.5/50%), so bias the buckets towards the higher end
|
49
|
+
# of the range.
|
50
|
+
[0, 0.25, 0.5, 0.75, 0.85, 0.9, 0.95, 0.975, 1],
|
51
|
+
)
|
52
|
+
end
|
53
|
+
|
54
|
+
@compression_duration =
|
55
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{metric_prefix}_messages_compression_duration_seconds") do
|
56
|
+
::Prometheus::Client.registry.histogram(
|
57
|
+
:"#{metric_prefix}_messages_compression_duration_seconds",
|
58
|
+
"Time taken to compress a batch of messages",
|
59
|
+
{},
|
60
|
+
[0, 0.0001, 0.0005, 0.001, 0.01, 0.05, 0.1, 0.25, 0.5, 1],
|
61
|
+
)
|
62
|
+
end
|
63
|
+
# rubocop:enable Layout/LineLength
|
64
|
+
end
|
65
|
+
|
66
|
+
def topic(topic_name)
|
67
|
+
return @topics[topic_name] if @topics.key? topic_name
|
68
|
+
|
69
|
+
client = @pubsub.topic topic_name
|
70
|
+
client = @pubsub.create_topic topic_name if client.nil? && @autocreate_topic
|
71
|
+
raise Error, "topic:#{topic_name} does not exist." if client.nil?
|
72
|
+
|
73
|
+
@topics[topic_name] = client
|
74
|
+
client
|
75
|
+
end
|
76
|
+
|
77
|
+
def publish(topic_name, messages, compress_batches = false)
|
78
|
+
if compress_batches
|
79
|
+
topic(topic_name).publish(*compress_messages_with_zlib(messages, topic_name))
|
80
|
+
else
|
81
|
+
topic(topic_name).publish do |batch|
|
82
|
+
messages.each do |m|
|
83
|
+
batch.publish m.message, m.attributes
|
84
|
+
end
|
85
|
+
end
|
86
|
+
end
|
87
|
+
rescue Google::Cloud::UnavailableError, Google::Cloud::DeadlineExceededError, Google::Cloud::InternalError => e
|
88
|
+
raise RetryableError, "Google api returns error:#{e.class} message:#{e}"
|
89
|
+
end
|
90
|
+
|
91
|
+
private
|
92
|
+
|
93
|
+
def compress_messages_with_zlib(messages, topic_name)
|
94
|
+
original_size = messages.sum(&:bytesize)
|
95
|
+
# This should never happen, only a programming error or major
|
96
|
+
# misconfiguration should lead to this situation. But checking against
|
97
|
+
# it here avoids a potential division by zero later on.
|
98
|
+
raise ArgumentError, "not compressing empty inputs" if original_size.zero?
|
99
|
+
|
100
|
+
# Here we're implicitly dropping the 'attributes' field of the messages
|
101
|
+
# that we're iterating over.
|
102
|
+
# This is fine, because the :attribute_keys config param is not
|
103
|
+
# supported when in compressed mode, so this field will always be
|
104
|
+
# empty.
|
105
|
+
packed_messages = messages.map(&:message).join(BATCHED_RECORD_SEPARATOR)
|
106
|
+
|
107
|
+
duration, compressed_messages = Fluent::GcloudPubSub::Metrics.measure_duration do
|
108
|
+
Zlib::Deflate.deflate(packed_messages)
|
109
|
+
end
|
110
|
+
|
111
|
+
@compression_duration.observe(
|
112
|
+
{ topic: topic_name, algorithm: COMPRESSION_ALGORITHM_ZLIB },
|
113
|
+
duration,
|
114
|
+
)
|
115
|
+
|
116
|
+
compressed_size = compressed_messages.bytesize
|
117
|
+
@compression_ratio.observe(
|
118
|
+
{ topic: topic_name, algorithm: COMPRESSION_ALGORITHM_ZLIB },
|
119
|
+
# If original = 1MiB and compressed = 256KiB; then metric value = 0.75 = 75% when plotted
|
120
|
+
1 - compressed_size.to_f / original_size,
|
121
|
+
)
|
122
|
+
|
123
|
+
[compressed_messages, { "compression_algorithm": COMPRESSION_ALGORITHM_ZLIB }]
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
class Subscriber
|
128
|
+
def initialize(project, key, topic_name, subscription_name)
|
129
|
+
pubsub = Google::Cloud::Pubsub.new project_id: project, credentials: key
|
130
|
+
if topic_name.nil?
|
131
|
+
@client = pubsub.subscription subscription_name
|
132
|
+
else
|
133
|
+
topic = pubsub.topic topic_name
|
134
|
+
@client = topic.subscription subscription_name
|
135
|
+
end
|
136
|
+
raise Error, "subscription:#{subscription_name} does not exist." if @client.nil?
|
137
|
+
end
|
138
|
+
|
139
|
+
def pull(immediate, max)
|
140
|
+
@client.pull immediate: immediate, max: max
|
141
|
+
rescue Google::Cloud::UnavailableError, Google::Cloud::DeadlineExceededError, Google::Cloud::InternalError => e
|
142
|
+
raise RetryableError, "Google pull api returns error:#{e.class} message:#{e}"
|
143
|
+
end
|
144
|
+
|
145
|
+
def acknowledge(messages)
|
146
|
+
@client.acknowledge messages
|
147
|
+
rescue Google::Cloud::UnavailableError, Google::Cloud::DeadlineExceededError, Google::Cloud::InternalError => e
|
148
|
+
raise RetryableError, "Google acknowledge api returns error:#{e.class} message:#{e}"
|
149
|
+
end
|
150
|
+
end
|
151
|
+
|
152
|
+
class MessageUnpacker
|
153
|
+
def self.unpack(message)
|
154
|
+
attributes = message.attributes
|
155
|
+
algorithm = attributes["compression_algorithm"]
|
156
|
+
|
157
|
+
case algorithm
|
158
|
+
when nil
|
159
|
+
# For an uncompressed message return the single line and attributes
|
160
|
+
[[message.message.data.chomp, message.attributes]]
|
161
|
+
when COMPRESSION_ALGORITHM_ZLIB
|
162
|
+
# Return all of the lines in the message, with empty attributes
|
163
|
+
Zlib::Inflate
|
164
|
+
.inflate(message.message.data)
|
165
|
+
.split(BATCHED_RECORD_SEPARATOR)
|
166
|
+
.map { |line| [line, {}] }
|
167
|
+
else
|
168
|
+
raise ArgumentError, "unknown compression algorithm: '#{algorithm}'"
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|
@@ -0,0 +1,24 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Fluent
|
4
|
+
module GcloudPubSub
|
5
|
+
# Utilities for interacting with Prometheus metrics
|
6
|
+
module Metrics
|
7
|
+
def self.register_or_existing(metric_name)
|
8
|
+
return ::Prometheus::Client.registry.get(metric_name) if ::Prometheus::Client.registry.exist?(metric_name)
|
9
|
+
|
10
|
+
yield
|
11
|
+
end
|
12
|
+
|
13
|
+
# Time the elapsed execution of the provided block, return the duration
|
14
|
+
# as the first element followed by the result of the block.
|
15
|
+
def self.measure_duration
|
16
|
+
start = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
17
|
+
result = yield
|
18
|
+
finish = Process.clock_gettime(Process::CLOCK_MONOTONIC)
|
19
|
+
|
20
|
+
[finish - start, *result]
|
21
|
+
end
|
22
|
+
end
|
23
|
+
end
|
24
|
+
end
|
@@ -0,0 +1,303 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
require "webrick"
|
5
|
+
|
6
|
+
require "fluent/plugin/input"
|
7
|
+
require "fluent/plugin/parser"
|
8
|
+
|
9
|
+
require "fluent/plugin/gcloud_pubsub/client"
|
10
|
+
require "fluent/plugin/gcloud_pubsub/metrics"
|
11
|
+
|
12
|
+
require "prometheus/client"
|
13
|
+
|
14
|
+
module Fluent::Plugin
|
15
|
+
class GcloudPubSubInput < Input
|
16
|
+
Fluent::Plugin.register_input("gcloud_pubsub", self)
|
17
|
+
|
18
|
+
helpers :compat_parameters, :parser, :thread
|
19
|
+
|
20
|
+
DEFAULT_PARSER_TYPE = "json"
|
21
|
+
|
22
|
+
class FailedParseError < StandardError
|
23
|
+
end
|
24
|
+
|
25
|
+
desc "Set tag of messages."
|
26
|
+
config_param :tag, :string
|
27
|
+
desc "Set key to be used as tag."
|
28
|
+
config_param :tag_key, :string, default: nil
|
29
|
+
desc "Set your GCP project."
|
30
|
+
config_param :project, :string, default: nil
|
31
|
+
desc "Set your credential file path."
|
32
|
+
config_param :key, :string, default: nil
|
33
|
+
desc "Set topic name to pull."
|
34
|
+
config_param :topic, :string, default: nil
|
35
|
+
desc "Set subscription name to pull."
|
36
|
+
config_param :subscription, :string
|
37
|
+
desc "Pulling messages by intervals of specified seconds."
|
38
|
+
config_param :pull_interval, :float, default: 5.0
|
39
|
+
desc "Max messages pulling at once."
|
40
|
+
config_param :max_messages, :integer, default: 100
|
41
|
+
desc "Setting `true`, keepalive connection to wait for new messages."
|
42
|
+
config_param :return_immediately, :bool, default: true
|
43
|
+
desc "Set number of threads to pull messages."
|
44
|
+
config_param :pull_threads, :integer, default: 1
|
45
|
+
desc "Acquire these fields from attributes on the Pub/Sub message and merge them into the record"
|
46
|
+
config_param :attribute_keys, :array, default: []
|
47
|
+
desc "Set error type when parsing messages fails."
|
48
|
+
config_param :parse_error_action, :enum, default: :exception, list: %i[exception warning]
|
49
|
+
desc "The prefix for Prometheus metric names"
|
50
|
+
config_param :metric_prefix, :string, default: "fluentd_input_gcloud_pubsub"
|
51
|
+
# for HTTP RPC
|
52
|
+
desc "If `true` is specified, HTTP RPC to stop or start pulling message is enabled."
|
53
|
+
config_param :enable_rpc, :bool, default: false
|
54
|
+
desc "Bind IP address for HTTP RPC."
|
55
|
+
config_param :rpc_bind, :string, default: "0.0.0.0"
|
56
|
+
desc "Port for HTTP RPC."
|
57
|
+
config_param :rpc_port, :integer, default: 24_680
|
58
|
+
|
59
|
+
config_section :parse do
|
60
|
+
config_set_default :@type, DEFAULT_PARSER_TYPE
|
61
|
+
end
|
62
|
+
|
63
|
+
class RPCServlet < WEBrick::HTTPServlet::AbstractServlet
|
64
|
+
class Error < StandardError; end
|
65
|
+
|
66
|
+
def initialize(server, plugin)
|
67
|
+
super
|
68
|
+
@plugin = plugin
|
69
|
+
end
|
70
|
+
|
71
|
+
# rubocop:disable Naming/MethodName
|
72
|
+
def do_GET(req, res)
|
73
|
+
begin
|
74
|
+
code, header, body = process(req, res)
|
75
|
+
rescue StandardError
|
76
|
+
code, header, body = render_json(500, {
|
77
|
+
"ok" => false,
|
78
|
+
"message" => "Internal Server Error",
|
79
|
+
"error" => $ERROR_INFO.to_s,
|
80
|
+
"backtrace" => $ERROR_INFO.backtrace,
|
81
|
+
})
|
82
|
+
end
|
83
|
+
|
84
|
+
res.status = code
|
85
|
+
header.each_pair do |k, v|
|
86
|
+
res[k] = v
|
87
|
+
end
|
88
|
+
res.body = body
|
89
|
+
end
|
90
|
+
# rubocop:enable Naming/MethodName
|
91
|
+
|
92
|
+
def render_json(code, obj)
|
93
|
+
[code, { "Content-Type" => "application/json" }, obj.to_json]
|
94
|
+
end
|
95
|
+
|
96
|
+
def process(req, _res)
|
97
|
+
ret = { "ok" => true }
|
98
|
+
case req.path_info
|
99
|
+
when "/stop"
|
100
|
+
@plugin.stop_pull
|
101
|
+
when "/start"
|
102
|
+
@plugin.start_pull
|
103
|
+
when "/status"
|
104
|
+
ret["status"] = @plugin.status_of_pull
|
105
|
+
else
|
106
|
+
raise Error, "Invalid path_info: #{req.path_info}"
|
107
|
+
end
|
108
|
+
render_json(200, ret)
|
109
|
+
end
|
110
|
+
end
|
111
|
+
|
112
|
+
# rubocop:disable Metrics/MethodLength
|
113
|
+
def configure(conf)
|
114
|
+
compat_parameters_convert(conf, :parser)
|
115
|
+
super
|
116
|
+
@rpc_srv = nil
|
117
|
+
@rpc_thread = nil
|
118
|
+
@stop_pull = false
|
119
|
+
|
120
|
+
@extract_tag = if @tag_key.nil?
|
121
|
+
method(:static_tag)
|
122
|
+
else
|
123
|
+
method(:dynamic_tag)
|
124
|
+
end
|
125
|
+
|
126
|
+
@parser = parser_create
|
127
|
+
|
128
|
+
@messages_pulled =
|
129
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_messages_pulled") do
|
130
|
+
::Prometheus::Client.registry.histogram(
|
131
|
+
:"#{@metric_prefix}_messages_pulled",
|
132
|
+
"Number of Pub/Sub messages pulled by the subscriber on each invocation",
|
133
|
+
{},
|
134
|
+
[0, 1, 10, 50, 100, 250, 500, 1000],
|
135
|
+
)
|
136
|
+
end
|
137
|
+
|
138
|
+
@messages_pulled_bytes =
|
139
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_messages_pulled_bytes") do
|
140
|
+
::Prometheus::Client.registry.histogram(
|
141
|
+
:"#{@metric_prefix}_messages_pulled_bytes",
|
142
|
+
"Total size in bytes of the Pub/Sub messages pulled by the subscriber on each invocation",
|
143
|
+
{},
|
144
|
+
[100, 1000, 10_000, 100_000, 1_000_000, 5_000_000, 10_000_000],
|
145
|
+
)
|
146
|
+
end
|
147
|
+
|
148
|
+
@pull_errors =
|
149
|
+
Fluent::GcloudPubSub::Metrics.register_or_existing(:"#{@metric_prefix}_pull_errors_total") do
|
150
|
+
::Prometheus::Client.registry.counter(
|
151
|
+
:"#{@metric_prefix}_pull_errors_total",
|
152
|
+
"Errors encountered while pulling or processing messages",
|
153
|
+
{},
|
154
|
+
)
|
155
|
+
end
|
156
|
+
end
|
157
|
+
# rubocop:enable Metrics/MethodLength
|
158
|
+
|
159
|
+
def start
|
160
|
+
super
|
161
|
+
start_rpc if @enable_rpc
|
162
|
+
|
163
|
+
@subscriber = Fluent::GcloudPubSub::Subscriber.new @project, @key, @topic, @subscription
|
164
|
+
log.debug "connected subscription:#{@subscription} in project #{@project}"
|
165
|
+
|
166
|
+
@emit_guard = Mutex.new
|
167
|
+
@stop_subscribing = false
|
168
|
+
@subscribe_threads = []
|
169
|
+
@pull_threads.times do |idx|
|
170
|
+
@subscribe_threads.push thread_create("in_gcloud_pubsub_subscribe_#{idx}".to_sym, &method(:subscribe))
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
def shutdown
|
175
|
+
if @rpc_srv
|
176
|
+
@rpc_srv.shutdown
|
177
|
+
@rpc_srv = nil
|
178
|
+
end
|
179
|
+
@rpc_thread = nil if @rpc_thread
|
180
|
+
@stop_subscribing = true
|
181
|
+
@subscribe_threads.each(&:join)
|
182
|
+
super
|
183
|
+
end
|
184
|
+
|
185
|
+
def stop_pull
|
186
|
+
@stop_pull = true
|
187
|
+
log.info "stop pull from subscription:#{@subscription}"
|
188
|
+
end
|
189
|
+
|
190
|
+
def start_pull
|
191
|
+
@stop_pull = false
|
192
|
+
log.info "start pull from subscription:#{@subscription}"
|
193
|
+
end
|
194
|
+
|
195
|
+
def status_of_pull
|
196
|
+
@stop_pull ? "stopped" : "started"
|
197
|
+
end
|
198
|
+
|
199
|
+
private
|
200
|
+
|
201
|
+
def static_tag(_record)
|
202
|
+
@tag
|
203
|
+
end
|
204
|
+
|
205
|
+
def dynamic_tag(record)
|
206
|
+
record.delete(@tag_key) || @tag
|
207
|
+
end
|
208
|
+
|
209
|
+
def start_rpc
|
210
|
+
log.info "listening http rpc server on http://#{@rpc_bind}:#{@rpc_port}/"
|
211
|
+
@rpc_srv = WEBrick::HTTPServer.new(
|
212
|
+
{
|
213
|
+
BindAddress: @rpc_bind,
|
214
|
+
Port: @rpc_port,
|
215
|
+
Logger: WEBrick::Log.new(STDERR, WEBrick::Log::FATAL),
|
216
|
+
AccessLog: [],
|
217
|
+
},
|
218
|
+
)
|
219
|
+
@rpc_srv.mount("/api/in_gcloud_pubsub/pull/", RPCServlet, self)
|
220
|
+
@rpc_thread = thread_create(:in_gcloud_pubsub_rpc_thread) do
|
221
|
+
@rpc_srv.start
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def subscribe
|
226
|
+
until @stop_subscribing
|
227
|
+
_subscribe unless @stop_pull
|
228
|
+
|
229
|
+
sleep @pull_interval if @return_immediately || @stop_pull
|
230
|
+
end
|
231
|
+
rescue StandardError => e
|
232
|
+
log.error "unexpected error", error_message: e.to_s, error_class: e.class.to_s
|
233
|
+
log.error_backtrace e.backtrace
|
234
|
+
end
|
235
|
+
|
236
|
+
def _subscribe
|
237
|
+
messages = @subscriber.pull @return_immediately, @max_messages
|
238
|
+
@messages_pulled.observe(common_labels, messages.size)
|
239
|
+
if messages.empty?
|
240
|
+
log.debug "no messages are pulled"
|
241
|
+
return
|
242
|
+
end
|
243
|
+
|
244
|
+
messages_size = messages.sum do |message|
|
245
|
+
message.data.bytesize + message.attributes.sum { |k, v| k.bytesize + v.bytesize }
|
246
|
+
end
|
247
|
+
@messages_pulled_bytes.observe(common_labels, messages_size)
|
248
|
+
|
249
|
+
process messages
|
250
|
+
@subscriber.acknowledge messages
|
251
|
+
|
252
|
+
log.debug "#{messages.length} message(s) processed"
|
253
|
+
rescue Fluent::GcloudPubSub::RetryableError => e
|
254
|
+
@pull_errors.increment(common_labels.merge({ retryable: true }))
|
255
|
+
log.warn "Retryable error occurs. Fluentd will retry.", error_message: e.to_s, error_class: e.class.to_s
|
256
|
+
rescue StandardError => e
|
257
|
+
@pull_errors.increment(common_labels.merge({ retryable: false }))
|
258
|
+
log.error "unexpected error", error_message: e.to_s, error_class: e.class.to_s
|
259
|
+
log.error_backtrace e.backtrace
|
260
|
+
end
|
261
|
+
|
262
|
+
def process(messages)
|
263
|
+
event_streams = Hash.new do |hsh, key|
|
264
|
+
hsh[key] = Fluent::MultiEventStream.new
|
265
|
+
end
|
266
|
+
|
267
|
+
messages.each do |m|
|
268
|
+
lines_attributes = Fluent::GcloudPubSub::MessageUnpacker.unpack(m)
|
269
|
+
|
270
|
+
lines_attributes.each do |line, attributes|
|
271
|
+
@parser.parse(line) do |time, record|
|
272
|
+
if time && record
|
273
|
+
@attribute_keys.each do |key|
|
274
|
+
record[key] = attributes[key]
|
275
|
+
end
|
276
|
+
|
277
|
+
event_streams[@extract_tag.call(record)].add(time, record)
|
278
|
+
else
|
279
|
+
case @parse_error_action
|
280
|
+
when :exception
|
281
|
+
raise FailedParseError, "pattern not match: #{line}"
|
282
|
+
else
|
283
|
+
log.warn "pattern not match", record: line
|
284
|
+
end
|
285
|
+
end
|
286
|
+
end
|
287
|
+
end
|
288
|
+
end
|
289
|
+
|
290
|
+
event_streams.each do |tag, es|
|
291
|
+
# There are some output plugins not to supposed to be called with multi-threading.
|
292
|
+
# Maybe remove in the future.
|
293
|
+
@emit_guard.synchronize do
|
294
|
+
router.emit_stream(tag, es)
|
295
|
+
end
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
def common_labels
|
300
|
+
{ subscription: @subscription }
|
301
|
+
end
|
302
|
+
end
|
303
|
+
end
|