fluentd 0.14.1 → 0.14.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of fluentd might be problematic. Click here for more details.

Files changed (129) hide show
  1. checksums.yaml +4 -4
  2. data/ChangeLog +110 -1
  3. data/Rakefile +5 -1
  4. data/appveyor.yml +7 -1
  5. data/example/in_forward.conf +4 -0
  6. data/lib/fluent/compat/exec_util.rb +129 -0
  7. data/lib/fluent/compat/file_util.rb +54 -0
  8. data/lib/fluent/compat/filter.rb +21 -3
  9. data/lib/fluent/compat/formatter.rb +4 -2
  10. data/lib/fluent/compat/formatter_utils.rb +85 -0
  11. data/lib/fluent/compat/handle_tag_and_time_mixin.rb +60 -0
  12. data/lib/fluent/compat/input.rb +1 -3
  13. data/lib/fluent/compat/output.rb +95 -39
  14. data/lib/fluent/compat/parser.rb +17 -0
  15. data/lib/fluent/compat/parser_utils.rb +40 -0
  16. data/lib/fluent/compat/socket_util.rb +165 -0
  17. data/lib/fluent/compat/string_util.rb +34 -0
  18. data/lib/fluent/{test/driver/owner.rb → compat/structured_format_mixin.rb} +5 -11
  19. data/lib/fluent/config/element.rb +2 -2
  20. data/lib/fluent/configurable.rb +2 -1
  21. data/lib/fluent/event.rb +61 -7
  22. data/lib/fluent/event_router.rb +1 -1
  23. data/lib/fluent/plugin.rb +7 -7
  24. data/lib/fluent/plugin/buf_file.rb +5 -2
  25. data/lib/fluent/plugin/buffer.rb +194 -64
  26. data/lib/fluent/plugin/buffer/chunk.rb +28 -3
  27. data/lib/fluent/plugin/buffer/file_chunk.rb +5 -21
  28. data/lib/fluent/plugin/buffer/memory_chunk.rb +1 -11
  29. data/lib/fluent/plugin/exec_util.rb +2 -112
  30. data/lib/fluent/plugin/file_util.rb +3 -38
  31. data/lib/fluent/plugin/file_wrapper.rb +1 -1
  32. data/lib/fluent/plugin/filter_grep.rb +3 -7
  33. data/lib/fluent/plugin/filter_record_transformer.rb +5 -5
  34. data/lib/fluent/plugin/filter_stdout.rb +18 -11
  35. data/lib/fluent/plugin/formatter.rb +0 -48
  36. data/lib/fluent/plugin/formatter_csv.rb +7 -8
  37. data/lib/fluent/plugin/formatter_hash.rb +1 -4
  38. data/lib/fluent/plugin/formatter_json.rb +1 -4
  39. data/lib/fluent/plugin/formatter_ltsv.rb +5 -6
  40. data/lib/fluent/plugin/formatter_msgpack.rb +1 -4
  41. data/lib/fluent/plugin/formatter_out_file.rb +36 -3
  42. data/lib/fluent/plugin/formatter_stdout.rb +36 -1
  43. data/lib/fluent/plugin/in_dummy.rb +9 -2
  44. data/lib/fluent/plugin/in_exec.rb +20 -57
  45. data/lib/fluent/plugin/in_forward.rb +4 -3
  46. data/lib/fluent/plugin/in_object_space.rb +8 -44
  47. data/lib/fluent/plugin/in_syslog.rb +13 -24
  48. data/lib/fluent/plugin/in_tail.rb +3 -0
  49. data/lib/fluent/plugin/out_buffered_stdout.rb +14 -4
  50. data/lib/fluent/plugin/out_exec.rb +7 -5
  51. data/lib/fluent/plugin/out_exec_filter.rb +10 -10
  52. data/lib/fluent/plugin/out_file.rb +1 -3
  53. data/lib/fluent/plugin/out_forward.rb +38 -57
  54. data/lib/fluent/plugin/out_stdout.rb +14 -5
  55. data/lib/fluent/plugin/out_stream.rb +3 -0
  56. data/lib/fluent/plugin/output.rb +31 -14
  57. data/lib/fluent/plugin/parser.rb +0 -69
  58. data/lib/fluent/plugin/parser_apache.rb +10 -6
  59. data/lib/fluent/plugin/parser_apache_error.rb +8 -3
  60. data/lib/fluent/plugin/parser_csv.rb +3 -1
  61. data/lib/fluent/plugin/parser_json.rb +1 -1
  62. data/lib/fluent/plugin/parser_multiline.rb +5 -3
  63. data/lib/fluent/plugin/parser_nginx.rb +10 -6
  64. data/lib/fluent/plugin/parser_regexp.rb +73 -0
  65. data/lib/fluent/plugin/socket_util.rb +2 -148
  66. data/lib/fluent/plugin/storage_local.rb +1 -1
  67. data/lib/fluent/plugin/string_util.rb +3 -18
  68. data/lib/fluent/plugin_helper.rb +1 -0
  69. data/lib/fluent/plugin_helper/compat_parameters.rb +166 -41
  70. data/lib/fluent/plugin_helper/formatter.rb +30 -19
  71. data/lib/fluent/plugin_helper/inject.rb +25 -12
  72. data/lib/fluent/plugin_helper/parser.rb +22 -13
  73. data/lib/fluent/plugin_helper/storage.rb +22 -13
  74. data/lib/fluent/registry.rb +19 -6
  75. data/lib/fluent/supervisor.rb +27 -1
  76. data/lib/fluent/test/driver/base.rb +16 -92
  77. data/lib/fluent/test/driver/base_owned.rb +17 -53
  78. data/lib/fluent/test/driver/base_owner.rb +125 -0
  79. data/lib/fluent/test/driver/filter.rb +24 -2
  80. data/lib/fluent/test/driver/input.rb +2 -2
  81. data/lib/fluent/test/driver/multi_output.rb +2 -2
  82. data/lib/fluent/test/driver/output.rb +3 -5
  83. data/lib/fluent/test/helpers.rb +25 -0
  84. data/lib/fluent/test/input_test.rb +4 -4
  85. data/lib/fluent/test/output_test.rb +3 -3
  86. data/lib/fluent/version.rb +1 -1
  87. data/test/config/test_element.rb +135 -6
  88. data/test/plugin/test_buf_file.rb +71 -3
  89. data/test/plugin/test_buffer.rb +305 -86
  90. data/test/plugin/test_buffer_chunk.rb +60 -2
  91. data/test/plugin/test_buffer_file_chunk.rb +4 -3
  92. data/test/plugin/test_filter_grep.rb +25 -21
  93. data/test/plugin/test_filter_record_transformer.rb +75 -67
  94. data/test/plugin/test_filter_stdout.rb +171 -74
  95. data/test/plugin/test_formatter_csv.rb +94 -0
  96. data/test/plugin/test_formatter_json.rb +30 -0
  97. data/test/plugin/test_formatter_ltsv.rb +52 -0
  98. data/test/plugin/test_formatter_msgpack.rb +28 -0
  99. data/test/plugin/test_formatter_out_file.rb +95 -0
  100. data/test/plugin/test_formatter_single_value.rb +38 -0
  101. data/test/plugin/test_in_dummy.rb +95 -0
  102. data/test/plugin/test_in_exec.rb +27 -31
  103. data/test/plugin/test_in_forward.rb +24 -0
  104. data/test/plugin/test_in_gc_stat.rb +5 -5
  105. data/test/plugin/test_in_object_space.rb +4 -4
  106. data/test/plugin/test_in_syslog.rb +60 -35
  107. data/test/plugin/test_out_buffered_stdout.rb +17 -3
  108. data/test/plugin/test_out_forward.rb +93 -5
  109. data/test/plugin/test_out_stdout.rb +14 -3
  110. data/test/plugin/test_output_as_buffered_retries.rb +20 -0
  111. data/test/plugin/test_output_as_buffered_secondary.rb +16 -0
  112. data/test/plugin/test_output_as_standard.rb +22 -22
  113. data/test/plugin/test_parser_apache.rb +13 -9
  114. data/test/plugin/test_parser_apache_error.rb +11 -6
  115. data/test/plugin/test_parser_csv.rb +35 -25
  116. data/test/plugin/test_parser_nginx.rb +11 -5
  117. data/test/plugin/test_parser_regexp.rb +235 -68
  118. data/test/plugin/test_parser_tsv.rb +54 -58
  119. data/test/plugin_helper/test_compat_parameters.rb +111 -46
  120. data/test/plugin_helper/test_formatter.rb +40 -0
  121. data/test/plugin_helper/test_inject.rb +101 -2
  122. data/test/plugin_helper/test_parser.rb +40 -0
  123. data/test/plugin_helper/test_storage.rb +43 -0
  124. data/test/test_event.rb +93 -0
  125. data/test/test_event_router.rb +13 -4
  126. data/test/test_event_time.rb +0 -3
  127. data/test/test_formatter.rb +7 -164
  128. data/test/test_plugin_classes.rb +28 -1
  129. metadata +24 -3
@@ -0,0 +1,165 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ require 'ipaddr'
18
+
19
+ require 'cool.io'
20
+
21
+ require 'fluent/plugin'
22
+ require 'fluent/input'
23
+
24
+ module Fluent
25
+ module Compat
26
+ module SocketUtil
27
+ def create_udp_socket(host)
28
+ if IPAddr.new(IPSocket.getaddress(host)).ipv4?
29
+ UDPSocket.new
30
+ else
31
+ UDPSocket.new(Socket::AF_INET6)
32
+ end
33
+ end
34
+ module_function :create_udp_socket
35
+
36
+ class UdpHandler < Coolio::IO
37
+ def initialize(io, log, body_size_limit, callback)
38
+ super(io)
39
+ @io = io
40
+ @log = log
41
+ @body_size_limit = body_size_limit
42
+ @callback = callback
43
+ end
44
+
45
+ def on_readable
46
+ msg, addr = @io.recvfrom_nonblock(@body_size_limit)
47
+ msg.chomp!
48
+ @callback.call(msg, addr)
49
+ rescue => e
50
+ @log.error "unexpected error", error: e
51
+ end
52
+ end
53
+
54
+ class TcpHandler < Coolio::Socket
55
+ PEERADDR_FAILED = ["?", "?", "name resolusion failed", "?"]
56
+
57
+ def initialize(io, log, delimiter, callback)
58
+ super(io)
59
+ @timeout = 0
60
+ if io.is_a?(TCPSocket)
61
+ @addr = (io.peeraddr rescue PEERADDR_FAILED)
62
+
63
+ opt = [1, @timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
64
+ io.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
65
+ end
66
+ @delimiter = delimiter
67
+ @callback = callback
68
+ @log = log
69
+ @log.trace { "accepted fluent socket object_id=#{self.object_id}" }
70
+ @buffer = "".force_encoding('ASCII-8BIT')
71
+ end
72
+
73
+ def on_connect
74
+ end
75
+
76
+ def on_read(data)
77
+ @buffer << data
78
+ pos = 0
79
+
80
+ while i = @buffer.index(@delimiter, pos)
81
+ msg = @buffer[pos...i]
82
+ @callback.call(msg, @addr)
83
+ pos = i + @delimiter.length
84
+ end
85
+ @buffer.slice!(0, pos) if pos > 0
86
+ rescue => e
87
+ @log.error "unexpected error", error: e
88
+ close
89
+ end
90
+
91
+ def on_close
92
+ @log.trace { "closed fluent socket object_id=#{self.object_id}" }
93
+ end
94
+ end
95
+
96
+ class BaseInput < Fluent::Input
97
+ def initialize
98
+ super
99
+ require 'fluent/parser'
100
+ end
101
+
102
+ desc 'Tag of output events.'
103
+ config_param :tag, :string
104
+ desc 'The format of the payload.'
105
+ config_param :format, :string
106
+ desc 'The port to listen to.'
107
+ config_param :port, :integer, default: 5150
108
+ desc 'The bind address to listen to.'
109
+ config_param :bind, :string, default: '0.0.0.0'
110
+ desc "The field name of the client's hostname."
111
+ config_param :source_host_key, :string, default: nil
112
+ config_param :blocking_timeout, :time, default: 0.5
113
+
114
+ def configure(conf)
115
+ super
116
+
117
+ @parser = Plugin.new_parser(@format)
118
+ @parser.configure(conf)
119
+ end
120
+
121
+ def start
122
+ super
123
+
124
+ @loop = Coolio::Loop.new
125
+ @handler = listen(method(:on_message))
126
+ @loop.attach(@handler)
127
+ @thread = Thread.new(&method(:run))
128
+ end
129
+
130
+ def shutdown
131
+ @loop.watchers.each { |w| w.detach }
132
+ @loop.stop if @loop.instance_variable_get("@running")
133
+ @handler.close
134
+ @thread.join
135
+
136
+ super
137
+ end
138
+
139
+ def run
140
+ @loop.run(@blocking_timeout)
141
+ rescue => e
142
+ log.error "unexpected error", error: e
143
+ log.error_backtrace
144
+ end
145
+
146
+ private
147
+
148
+ def on_message(msg, addr)
149
+ @parser.parse(msg) { |time, record|
150
+ unless time && record
151
+ log.warn "pattern not match: #{msg.inspect}"
152
+ return
153
+ end
154
+
155
+ record[@source_host_key] = addr[3] if @source_host_key
156
+ router.emit(@tag, time, record)
157
+ }
158
+ rescue => e
159
+ log.error msg.dump, error: e, host: addr[3]
160
+ log.error_backtrace
161
+ end
162
+ end
163
+ end
164
+ end
165
+ end
@@ -0,0 +1,34 @@
1
+ #
2
+ # Fluentd
3
+ #
4
+ # Licensed under the Apache License, Version 2.0 (the "License");
5
+ # you may not use this file except in compliance with the License.
6
+ # You may obtain a copy of the License at
7
+ #
8
+ # http://www.apache.org/licenses/LICENSE-2.0
9
+ #
10
+ # Unless required by applicable law or agreed to in writing, software
11
+ # distributed under the License is distributed on an "AS IS" BASIS,
12
+ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13
+ # See the License for the specific language governing permissions and
14
+ # limitations under the License.
15
+ #
16
+
17
+ module Fluent
18
+ module Compat
19
+ module StringUtil
20
+ def match_regexp(regexp, string)
21
+ begin
22
+ return regexp.match(string)
23
+ rescue ArgumentError => e
24
+ raise e unless e.message.index("invalid byte sequence in".freeze).zero?
25
+ $log.info "invalid byte sequence is replaced in `#{string}`"
26
+ string = string.scrub('?')
27
+ retry
28
+ end
29
+ return true
30
+ end
31
+ module_function :match_regexp
32
+ end
33
+ end
34
+ end
@@ -14,18 +14,12 @@
14
14
  # limitations under the License.
15
15
  #
16
16
 
17
- require 'fluent/plugin/base'
18
- require 'fluent/plugin_id'
19
- require 'fluent/log'
20
- require 'fluent/plugin_helper'
21
-
22
17
  module Fluent
23
- module Test
24
- module Driver
25
- class Owner < Fluent::Plugin::Base
26
- include PluginId
27
- include PluginLoggerMixin
28
- include PluginHelper::Mixin
18
+ module Compat
19
+ module StructuredFormatMixin
20
+ def format(tag, time, record)
21
+ filter_record(tag, time, record)
22
+ format_record(record)
29
23
  end
30
24
  end
31
25
  end
@@ -140,7 +140,7 @@ module Fluent
140
140
  out << "#{indent}<#{@name} #{@arg}>\n"
141
141
  end
142
142
  each_pair { |k, v|
143
- out << dump_value(k, v, indent, nindent)
143
+ out << dump_value(k, v, nindent)
144
144
  }
145
145
  @elements.each { |e|
146
146
  out << e.to_s(nest + 1)
@@ -186,7 +186,7 @@ module Fluent
186
186
  opts[:type]
187
187
  end
188
188
 
189
- def dump_value(k, v, indent, nindent)
189
+ def dump_value(k, v, nindent)
190
190
  if secret_param?(k)
191
191
  "#{nindent}#{k} xxxxxx\n"
192
192
  else
@@ -144,9 +144,10 @@ module Fluent
144
144
  end
145
145
 
146
146
  def config_section(name, **kwargs, &block)
147
+ section_already_exists = !!merged_configure_proxy.sections[name]
147
148
  configure_proxy(self.name).config_section(name, **kwargs, &block)
148
149
  variable_name = configure_proxy(self.name).sections[name].variable_name
149
- unless self.respond_to?(variable_name)
150
+ if !section_already_exists && !self.respond_to?(variable_name)
150
151
  attr_accessor variable_name
151
152
  end
152
153
  end
@@ -44,6 +44,10 @@ module Fluent
44
44
  false
45
45
  end
46
46
 
47
+ def slice(index, num)
48
+ raise NotImplementedError, "DO NOT USE THIS CLASS directly."
49
+ end
50
+
47
51
  def each(&block)
48
52
  raise NotImplementedError, "DO NOT USE THIS CLASS directly."
49
53
  end
@@ -124,6 +128,10 @@ module Fluent
124
128
  @entries.empty?
125
129
  end
126
130
 
131
+ def slice(index, num)
132
+ ArrayEventStream.new(@entries.slice(index, num))
133
+ end
134
+
127
135
  def each(&block)
128
136
  @entries.each(&block)
129
137
  nil
@@ -167,6 +175,10 @@ module Fluent
167
175
  @time_array.empty?
168
176
  end
169
177
 
178
+ def slice(index, num)
179
+ MultiEventStream.new(@time_array.slice(index, num), @record_array.slice(index, num))
180
+ end
181
+
170
182
  def each(&block)
171
183
  time_array = @time_array
172
184
  record_array = @record_array
@@ -178,23 +190,32 @@ module Fluent
178
190
  end
179
191
 
180
192
  class MessagePackEventStream < EventStream
181
- # Keep cached_unpacker argument for existence plugins
182
- def initialize(data, cached_unpacker = nil, size = 0)
193
+ # https://github.com/msgpack/msgpack-ruby/issues/119
194
+
195
+ # Keep cached_unpacker argument for existing plugins
196
+ def initialize(data, cached_unpacker = nil, size = 0, unpacked_times: nil, unpacked_records: nil)
183
197
  @data = data
184
198
  @size = size
199
+ @unpacked_times = unpacked_times
200
+ @unpacked_records = unpacked_records
185
201
  end
186
202
 
187
203
  def empty?
188
- # This is not correct, but actual number of records will be shown after iteration, and
189
- # "size" argument is always 0 currently (because forward protocol doesn't tell it to destination)
190
- false
204
+ @data.empty?
191
205
  end
192
206
 
193
207
  def dup
194
- MessagePackEventStream.new(@data.dup, @size)
208
+ if @unpacked_times
209
+ MessagePackEventStream.new(@data.dup, nil, @size, unpacked_times: @unpacked_times, unpacked_records: @unpacked_records.map(&:dup))
210
+ else
211
+ MessagePackEventStream.new(@data.dup, nil, @size)
212
+ end
195
213
  end
196
214
 
197
215
  def size
216
+ # @size is unbelievable always when @size == 0
217
+ # If the number of events is really zero, unpacking events takes very short time.
218
+ ensure_unpacked! if @size == 0
198
219
  @size
199
220
  end
200
221
 
@@ -202,8 +223,41 @@ module Fluent
202
223
  true
203
224
  end
204
225
 
226
+ def ensure_unpacked!
227
+ return if @unpacked_times && @unpacked_records
228
+ @unpacked_times = []
229
+ @unpacked_records = []
230
+ msgpack_unpacker.feed_each(@data) do |time, record|
231
+ @unpacked_times << time
232
+ @unpacked_records << record
233
+ end
234
+ # @size should be updated always right after unpack.
235
+ # The real size of unpacked objects are correct, rather than given size.
236
+ @size = @unpacked_times.size
237
+ end
238
+
239
+ # This method returns MultiEventStream, because there are no reason
240
+ # to surve binary serialized by msgpack.
241
+ def slice(index, num)
242
+ ensure_unpacked!
243
+ MultiEventStream.new(@unpacked_times.slice(index, num), @unpacked_records.slice(index, num))
244
+ end
245
+
205
246
  def each(&block)
206
- msgpack_unpacker.feed_each(@data, &block)
247
+ if @unpacked_times
248
+ @unpacked_times.each_with_index do |time, i|
249
+ block.call(time, @unpacked_records[i])
250
+ end
251
+ else
252
+ @unpacked_times = []
253
+ @unpacked_records = []
254
+ msgpack_unpacker.feed_each(@data) do |time, record|
255
+ @unpacked_times << time
256
+ @unpacked_records << record
257
+ block.call(time, record)
258
+ end
259
+ @size = @unpacked_times.size
260
+ end
207
261
  nil
208
262
  end
209
263
 
@@ -159,7 +159,7 @@ module Fluent
159
159
  pipeline = nil
160
160
  @match_rules.each_with_index { |rule, i|
161
161
  if rule.match?(tag)
162
- if rule.collector.is_a?(Filter)
162
+ if rule.collector.is_a?(Plugin::Filter)
163
163
  pipeline ||= Pipeline.new
164
164
  pipeline.add_filter(rule.collector)
165
165
  else
@@ -26,15 +26,15 @@ module Fluent
26
26
  # ex: storage, buffer chunk, ...
27
27
 
28
28
  # first class plugins (instantiated by Engine)
29
- INPUT_REGISTRY = Registry.new(:input, 'fluent/plugin/in_')
30
- OUTPUT_REGISTRY = Registry.new(:output, 'fluent/plugin/out_')
31
- FILTER_REGISTRY = Registry.new(:filter, 'fluent/plugin/filter_')
29
+ INPUT_REGISTRY = Registry.new(:input, 'fluent/plugin/in_', dir_search_prefix: 'in_')
30
+ OUTPUT_REGISTRY = Registry.new(:output, 'fluent/plugin/out_', dir_search_prefix: 'out_')
31
+ FILTER_REGISTRY = Registry.new(:filter, 'fluent/plugin/filter_', dir_search_prefix: 'filter_')
32
32
 
33
33
  # feature plugin: second class plugins (instanciated by Plugins or Helpers)
34
- BUFFER_REGISTRY = Registry.new(:buffer, 'fluent/plugin/buf_')
35
- PARSER_REGISTRY = Registry.new(:parser, 'fluent/plugin/parser_')
36
- FORMATTER_REGISTRY = Registry.new(:formatter, 'fluent/plugin/formatter_')
37
- STORAGE_REGISTRY = Registry.new(:storage, 'fluent/plugin/storage_')
34
+ BUFFER_REGISTRY = Registry.new(:buffer, 'fluent/plugin/buf_', dir_search_prefix: 'buf_')
35
+ PARSER_REGISTRY = Registry.new(:parser, 'fluent/plugin/parser_', dir_search_prefix: 'parser_')
36
+ FORMATTER_REGISTRY = Registry.new(:formatter, 'fluent/plugin/formatter_', dir_search_prefix: 'formatter_')
37
+ STORAGE_REGISTRY = Registry.new(:storage, 'fluent/plugin/storage_', dir_search_prefix: 'storage_')
38
38
 
39
39
  REGISTRIES = [INPUT_REGISTRY, OUTPUT_REGISTRY, FILTER_REGISTRY, BUFFER_REGISTRY, PARSER_REGISTRY, FORMATTER_REGISTRY, STORAGE_REGISTRY]
40
40
 
@@ -122,14 +122,17 @@ module Fluent
122
122
  m = new_metadata() # this metadata will be overwritten by resuming .meta file content
123
123
  # so it should not added into @metadata_list for now
124
124
  mode = Fluent::Plugin::Buffer::FileChunk.assume_chunk_state(path)
125
+ if mode == :unknown
126
+ log.debug "uknown state chunk found", path: path
127
+ next
128
+ end
129
+
125
130
  chunk = Fluent::Plugin::Buffer::FileChunk.new(m, path, mode) # file chunk resumes contents of metadata
126
131
  case chunk.state
127
132
  when :staged
128
133
  stage[chunk.metadata] = chunk
129
134
  when :queued
130
135
  queue << chunk
131
- else
132
- raise "BUG: unexpected chunk state '#{chunk.state}' for path '#{path}'"
133
136
  end
134
137
  end
135
138
 
@@ -104,11 +104,13 @@ module Fluent
104
104
  @queued_num[chunk.metadata] += 1
105
105
  @queue_size += chunk.bytesize
106
106
  end
107
+ log.debug "buffer started", instance: self.object_id, stage_size: @stage_size, queue_size: @queue_size
107
108
  end
108
109
 
109
110
  def close
110
111
  super
111
112
  synchronize do
113
+ log.debug "closing buffer", instance: self.object_id
112
114
  @dequeued.each_pair do |chunk_id, chunk|
113
115
  chunk.close
114
116
  end
@@ -156,6 +158,7 @@ module Fluent
156
158
  end
157
159
 
158
160
  def add_metadata(metadata)
161
+ log.trace "adding metadata", instance: self.object_id, metadata: metadata
159
162
  synchronize do
160
163
  if i = @metadata_list.index(metadata)
161
164
  @metadata_list[i]
@@ -172,26 +175,38 @@ module Fluent
172
175
  end
173
176
 
174
177
  # metadata MUST have consistent object_id for each variation
175
- # data MUST be Array of serialized events
178
+ # data MUST be Array of serialized events, or EventStream
176
179
  # metadata_and_data MUST be a hash of { metadata => data }
177
- def write(metadata_and_data, bulk: false, enqueue: false)
180
+ def write(metadata_and_data, format: nil, size: nil, enqueue: false)
178
181
  return if metadata_and_data.size < 1
179
182
  raise BufferOverflowError, "buffer space has too many data" unless storable?
180
183
 
184
+ log.trace "writing events into buffer", instance: self.object_id, metadata_size: metadata_and_data.size
185
+
181
186
  staged_bytesize = 0
182
187
  operated_chunks = []
188
+ unstaged_chunks = {} # metadata => [chunk, chunk, ...]
189
+ chunks_to_enqueue = []
183
190
 
184
191
  begin
185
192
  metadata_and_data.each do |metadata, data|
186
- write_once(metadata, data, bulk: bulk) do |chunk, adding_bytesize|
193
+ write_once(metadata, data, format: format, size: size) do |chunk, adding_bytesize|
187
194
  chunk.mon_enter # add lock to prevent to be committed/rollbacked from other threads
188
195
  operated_chunks << chunk
189
- staged_bytesize += adding_bytesize
196
+ if chunk.staged?
197
+ staged_bytesize += adding_bytesize
198
+ elsif chunk.unstaged?
199
+ unstaged_chunks[metadata] ||= []
200
+ unstaged_chunks[metadata] << chunk
201
+ end
190
202
  end
191
203
  end
192
204
 
193
205
  return if operated_chunks.empty?
194
206
 
207
+ # Now, this thread acquires many locks of chunks... getting buffer-global lock causes dead lock.
208
+ # Any operations needs buffer-global lock (including enqueueing) should be done after releasing locks.
209
+
195
210
  first_chunk = operated_chunks.shift
196
211
  # Following commits for other chunks also can finish successfully if the first commit operation
197
212
  # finishes without any exceptions.
@@ -199,7 +214,9 @@ module Fluent
199
214
  # permission errors, disk failures and other permanent(fatal) errors.
200
215
  begin
201
216
  first_chunk.commit
202
- enqueue_chunk(first_chunk.metadata) if enqueue || chunk_size_full?(first_chunk)
217
+ if enqueue || first_chunk.unstaged? || chunk_size_full?(first_chunk)
218
+ chunks_to_enqueue << first_chunk
219
+ end
203
220
  first_chunk.mon_exit
204
221
  rescue
205
222
  operated_chunks.unshift(first_chunk)
@@ -211,7 +228,9 @@ module Fluent
211
228
  operated_chunks.each do |chunk|
212
229
  begin
213
230
  chunk.commit
214
- enqueue_chunk(chunk.metadata) if enqueue || chunk_size_full?(chunk)
231
+ if enqueue || chunk.unstaged? || chunk_size_full?(chunk)
232
+ chunks_to_enqueue << chunk
233
+ end
215
234
  chunk.mon_exit
216
235
  rescue => e
217
236
  chunk.rollback
@@ -219,9 +238,34 @@ module Fluent
219
238
  errors << e
220
239
  end
221
240
  end
222
- operated_chunks.clear if errors.empty?
223
241
 
224
- @stage_size += staged_bytesize
242
+ # All locks about chunks are released.
243
+
244
+ synchronize do
245
+ # At here, staged chunks may be enqueued by other threads.
246
+ @stage_size += staged_bytesize
247
+
248
+ chunks_to_enqueue.each do |c|
249
+ if c.staged? && (enqueue || chunk_size_full?(c))
250
+ m = c.metadata
251
+ enqueue_chunk(m)
252
+ if unstaged_chunks[m]
253
+ u = unstaged_chunks[m].pop
254
+ if u.unstaged? && !chunk_size_full?(u)
255
+ @stage[m] = u.staged!
256
+ @stage_size += u.bytesize
257
+ end
258
+ end
259
+ elsif c.unstaged?
260
+ enqueue_unstaged_chunk(c)
261
+ else
262
+ # previously staged chunk is already enqueued, closed or purged.
263
+ # no problem.
264
+ end
265
+ end
266
+ end
267
+
268
+ operated_chunks.clear if errors.empty?
225
269
 
226
270
  if errors.size > 0
227
271
  log.warn "error occurs in committing chunks: only first one raised", errors: errors.map(&:class)
@@ -230,6 +274,9 @@ module Fluent
230
274
  ensure
231
275
  operated_chunks.each do |chunk|
232
276
  chunk.rollback rescue nil # nothing possible to do for #rollback failure
277
+ if chunk.unstaged?
278
+ chunk.purge rescue nil # to prevent leakage of unstaged chunks
279
+ end
233
280
  chunk.mon_exit rescue nil # this may raise ThreadError for chunks already committed
234
281
  end
235
282
  end
@@ -251,6 +298,7 @@ module Fluent
251
298
  end
252
299
 
253
300
  def enqueue_chunk(metadata)
301
+ log.debug "enqueueing chunk", instance: self.object_id, metadata: metadata
254
302
  synchronize do
255
303
  chunk = @stage.delete(metadata)
256
304
  return nil unless chunk
@@ -271,7 +319,21 @@ module Fluent
271
319
  nil
272
320
  end
273
321
 
322
+ def enqueue_unstaged_chunk(chunk)
323
+ log.debug "enqueueing unstaged chunk", instance: self.object_id, metadata: chunk.metadata
324
+ synchronize do
325
+ chunk.synchronize do
326
+ metadata = chunk.metadata
327
+ @queue << chunk
328
+ @queued_num[metadata] = @queued_num.fetch(metadata, 0) + 1
329
+ chunk.enqueued! if chunk.respond_to?(:enqueued!)
330
+ end
331
+ @queue_size += chunk.bytesize
332
+ end
333
+ end
334
+
274
335
  def enqueue_all
336
+ log.debug "enqueueing all chunks in buffer", instance: self.object_id
275
337
  synchronize do
276
338
  if block_given?
277
339
  @stage.keys.each do |metadata|
@@ -289,6 +351,7 @@ module Fluent
289
351
 
290
352
  def dequeue_chunk
291
353
  return nil if @queue.empty?
354
+ log.debug "dequeueing a chunk", instance: self.object_id
292
355
  synchronize do
293
356
  chunk = @queue.shift
294
357
 
@@ -297,15 +360,18 @@ module Fluent
297
360
 
298
361
  @dequeued[chunk.unique_id] = chunk
299
362
  @queued_num[chunk.metadata] -= 1 # BUG if nil, 0 or subzero
363
+ log.debug "chunk dequeued", instance: self.object_id, metadata: chunk.metadata
300
364
  chunk
301
365
  end
302
366
  end
303
367
 
304
368
  def takeback_chunk(chunk_id)
369
+ log.debug "taking back a chunk", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id)
305
370
  synchronize do
306
371
  chunk = @dequeued.delete(chunk_id)
307
372
  return false unless chunk # already purged by other thread
308
373
  @queue.unshift(chunk)
374
+ log.debug "chunk taken back", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: chunk.metadata
309
375
  @queued_num[chunk.metadata] += 1 # BUG if nil
310
376
  end
311
377
  true
@@ -317,22 +383,26 @@ module Fluent
317
383
  return nil unless chunk # purged by other threads
318
384
 
319
385
  metadata = chunk.metadata
386
+ log.debug "purging a chunk", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata
320
387
  begin
321
388
  bytesize = chunk.bytesize
322
389
  chunk.purge
323
390
  @queue_size -= bytesize
324
391
  rescue => e
325
392
  log.error "failed to purge buffer chunk", chunk_id: dump_unique_id_hex(chunk_id), error_class: e.class, error: e
393
+ log.error_backtrace
326
394
  end
327
395
 
328
396
  if metadata && !@stage[metadata] && (!@queued_num[metadata] || @queued_num[metadata] < 1)
329
397
  @metadata_list.delete(metadata)
330
398
  end
399
+ log.debug "chunk purged", instance: self.object_id, chunk_id: dump_unique_id_hex(chunk_id), metadata: metadata
331
400
  end
332
401
  nil
333
402
  end
334
403
 
335
404
  def clear_queue!
405
+ log.debug "clearing queue", instance: self.object_id
336
406
  synchronize do
337
407
  until @queue.empty?
338
408
  begin
@@ -341,6 +411,7 @@ module Fluent
341
411
  q.purge
342
412
  rescue => e
343
413
  log.error "unexpected error while clearing buffer queue", error_class: e.class, error: e
414
+ log.error_backtrace
344
415
  end
345
416
  end
346
417
  @queue_size = 0
@@ -357,16 +428,20 @@ module Fluent
357
428
 
358
429
  class ShouldRetry < StandardError; end
359
430
 
360
- def write_once(metadata, data, bulk: false, &block)
361
- return if !bulk && (data.nil? || data.empty?)
362
- return if bulk && (data.empty? || data.first.nil? || data.first.empty?)
431
+ # write once into a chunk
432
+ # 1. append whole data into existing chunk
433
+ # 2. commit it & return unless chunk_size_over?
434
+ # 3. enqueue existing chunk & retry whole method if chunk was not empty
435
+ # 4. go to step_by_step writing
436
+
437
+ def write_once(metadata, data, format: nil, size: nil, &block)
438
+ return if data.empty?
363
439
 
364
440
  stored = false
365
441
  adding_bytesize = nil
366
442
 
367
- chunk = synchronize { @stage[metadata] ||= generate_chunk(metadata) }
368
- enqueue_list = []
369
-
443
+ chunk = synchronize { @stage[metadata] ||= generate_chunk(metadata).staged! }
444
+ enqueue_chunk_before_retry = false
370
445
  chunk.synchronize do
371
446
  # retry this method if chunk is already queued (between getting chunk and entering critical section)
372
447
  raise ShouldRetry unless chunk.staged?
@@ -375,20 +450,27 @@ module Fluent
375
450
 
376
451
  original_bytesize = chunk.bytesize
377
452
  begin
378
- if bulk
379
- content, size = data
380
- chunk.concat(content, size)
453
+ if format
454
+ serialized = format.call(data)
455
+ chunk.concat(serialized, size ? size.call : data.size)
381
456
  else
382
457
  chunk.append(data)
383
458
  end
384
459
  adding_bytesize = chunk.bytesize - original_bytesize
385
460
 
386
461
  if chunk_size_over?(chunk)
387
- if empty_chunk && bulk
388
- log.warn "chunk bytes limit exceeds for a bulk event stream: #{bulk.bytesize}bytes"
389
- stored = true
390
- else
391
- chunk.rollback
462
+ if format && empty_chunk
463
+ log.warn "chunk bytes limit exceeds for an emitted event stream: #{adding_bytesize}bytes"
464
+ end
465
+ chunk.rollback
466
+
467
+ if format && !empty_chunk
468
+ # Event streams should be appended into a chunk at once
469
+ # as far as possible, to improve performance of formatting.
470
+ # Event stream may be a MessagePackEventStream. We don't want to split it into
471
+ # 2 or more chunks (except for a case that the event stream is larger than chunk limit).
472
+ enqueue_chunk_before_retry = true
473
+ raise ShouldRetry
392
474
  end
393
475
  else
394
476
  stored = true
@@ -400,74 +482,122 @@ module Fluent
400
482
 
401
483
  if stored
402
484
  block.call(chunk, adding_bytesize)
403
- elsif bulk
404
- # this metadata might be enqueued already by other threads
405
- # but #enqueue_chunk does nothing in such case
406
- enqueue_list << metadata
407
- raise ShouldRetry
408
485
  end
409
486
  end
410
487
 
411
488
  unless stored
412
489
  # try step-by-step appending if data can't be stored into existing a chunk in non-bulk mode
413
- write_step_by_step(metadata, data, data.size / 3, &block)
490
+ #
491
+ # 1/10 size of original event stream (splits_count == 10) seems enough small
492
+ # to try emitting events into existing chunk.
493
+ # it does not matter to split event stream into very small splits, because chunks have less
494
+ # overhead to write data many times (even about file buffer chunks).
495
+ write_step_by_step(metadata, data, format, 10, &block)
414
496
  end
415
497
  rescue ShouldRetry
416
- enqueue_list.each do |m|
417
- enqueue_chunk(m)
418
- end
498
+ enqueue_chunk(metadata) if enqueue_chunk_before_retry
419
499
  retry
420
500
  end
421
501
 
422
- def write_step_by_step(metadata, data, attempt_records, &block)
423
- while data.size > 0
424
- if attempt_records < MINIMUM_APPEND_ATTEMPT_RECORDS
425
- attempt_records = MINIMUM_APPEND_ATTEMPT_RECORDS
426
- end
502
+ # EventStream can be split into many streams
503
+ # because (es1 + es2).to_msgpack_stream == es1.to_msgpack_stream + es2.to_msgpack_stream
504
+
505
+ # 1. split event streams into many (10 -> 100 -> 1000 -> ...) chunks
506
+ # 2. append splits into the staged chunks as much as possible
507
+ # 3. create unstaged chunk and append rest splits -> repeat it for all splits
508
+
509
+ def write_step_by_step(metadata, data, format, splits_count, &block)
510
+ splits = []
511
+ if splits_count > data.size
512
+ splits_count = data.size
513
+ end
514
+ slice_size = if data.size % splits_count == 0
515
+ data.size / splits_count
516
+ else
517
+ data.size / (splits_count - 1)
518
+ end
519
+ slice_origin = 0
520
+ while slice_origin < data.size
521
+ splits << data.slice(slice_origin, slice_size)
522
+ slice_origin += slice_size
523
+ end
524
+
525
+ # This method will append events into the staged chunk at first.
526
+ # Then, will generate chunks not staged (not queued) to append rest data.
527
+ staged_chunk_used = false
528
+ modified_chunks = []
529
+ get_next_chunk = ->(){
530
+ c = if staged_chunk_used
531
+ # Staging new chunk here is bad idea:
532
+ # Recovering whole state including newly staged chunks is much harder than current implementation.
533
+ generate_chunk(metadata)
534
+ else
535
+ synchronize{ @stage[metadata] ||= generate_chunk(metadata).staged! }
536
+ end
537
+ modified_chunks << c
538
+ c
539
+ }
540
+
541
+ writing_splits_index = 0
542
+ enqueue_chunk_before_retry = false
543
+
544
+ while writing_splits_index < splits.size
545
+ chunk = get_next_chunk.call
546
+ chunk.synchronize do
547
+ raise ShouldRetry unless chunk.writable?
548
+ staged_chunk_used = true if chunk.staged?
427
549
 
428
- chunk = synchronize{ @stage[metadata] ||= generate_chunk(metadata) }
429
- chunk.synchronize do # critical section for chunk (chunk append/commit/rollback)
430
- raise ShouldRetry unless chunk.staged?
550
+ original_bytesize = chunk.bytesize
431
551
  begin
432
- empty_chunk = chunk.empty?
433
- original_bytesize = chunk.bytesize
552
+ while writing_splits_index < splits.size
553
+ split = splits[writing_splits_index]
554
+ if format
555
+ chunk.concat(format.call(split), split.size)
556
+ else
557
+ chunk.append(split)
558
+ end
434
559
 
435
- attempt = data.slice(0, attempt_records)
436
- chunk.append(attempt)
437
- adding_bytesize = (chunk.bytesize - original_bytesize)
560
+ if chunk_size_over?(chunk) # split size is larger than difference between size_full? and size_over?
561
+ chunk.rollback
438
562
 
439
- if chunk_size_over?(chunk)
440
- chunk.rollback
563
+ if split.size == 1 && original_bytesize == 0
564
+ big_record_size = format ? format.call(split).bytesize : split.first.bytesize
565
+ raise BufferChunkOverflowError, "a #{big_record_size}bytes record is larger than buffer chunk limit size"
566
+ end
441
567
 
442
- if attempt_records <= MINIMUM_APPEND_ATTEMPT_RECORDS
443
- if empty_chunk # record is too large even for empty chunk
444
- raise BufferChunkOverflowError, "minimum append butch exceeds chunk bytes limit"
568
+ if chunk_size_full?(chunk) || split.size == 1
569
+ enqueue_chunk_before_retry = true
570
+ else
571
+ splits_count *= 10
445
572
  end
446
- # no more records for this chunk -> enqueue -> to be flushed
447
- enqueue_chunk(metadata) # `chunk` will be removed from stage
448
- attempt_records = data.size # fresh chunk may have enough space
449
- else
450
- # whole data can be processed by twice operation
451
- # ( by using apttempt /= 2, 3 operations required for odd numbers of data)
452
- attempt_records = (attempt_records / 2) + 1
573
+
574
+ raise ShouldRetry
453
575
  end
454
576
 
455
- next
456
- end
577
+ writing_splits_index += 1
457
578
 
458
- block.call(chunk, adding_bytesize)
459
- data.slice!(0, attempt_records)
460
- # same attempt size
461
- nil # discard return value of data.slice!() immediately
579
+ if chunk_size_full?(chunk)
580
+ break
581
+ end
582
+ end
462
583
  rescue
463
- chunk.rollback
584
+ chunk.purge if chunk.unstaged? # unstaged chunk will leak unless purge it
464
585
  raise
465
586
  end
587
+
588
+ block.call(chunk, chunk.bytesize - original_bytesize)
466
589
  end
467
590
  end
468
591
  rescue ShouldRetry
592
+ modified_chunks.each do |mc|
593
+ mc.rollback rescue nil
594
+ if mc.unstaged?
595
+ mc.purge rescue nil
596
+ end
597
+ end
598
+ enqueue_chunk(metadata) if enqueue_chunk_before_retry
469
599
  retry
470
- end # write_step_by_step
600
+ end
471
601
  end
472
602
  end
473
603
  end