fluentd 0.10.35 → 0.10.36
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of fluentd might be problematic. Click here for more details.
- data/.travis.yml +13 -0
- data/ChangeLog +9 -0
- data/fluentd.gemspec +1 -1
- data/lib/fluent/buffer.rb +210 -214
- data/lib/fluent/command/fluentd.rb +4 -0
- data/lib/fluent/config.rb +1 -0
- data/lib/fluent/engine.rb +10 -10
- data/lib/fluent/output.rb +404 -406
- data/lib/fluent/plugin/buf_file.rb +146 -151
- data/lib/fluent/plugin/buf_memory.rb +62 -67
- data/lib/fluent/plugin/in_debug_agent.rb +27 -31
- data/lib/fluent/plugin/in_exec.rb +86 -90
- data/lib/fluent/plugin/in_forward.rb +171 -171
- data/lib/fluent/plugin/in_gc_stat.rb +43 -47
- data/lib/fluent/plugin/in_http.rb +214 -216
- data/lib/fluent/plugin/in_monitor_agent.rb +212 -214
- data/lib/fluent/plugin/in_object_space.rb +75 -79
- data/lib/fluent/plugin/in_status.rb +44 -50
- data/lib/fluent/plugin/in_stream.rb +159 -160
- data/lib/fluent/plugin/in_syslog.rb +149 -153
- data/lib/fluent/plugin/in_tail.rb +382 -387
- data/lib/fluent/plugin/out_copy.rb +40 -45
- data/lib/fluent/plugin/out_exec.rb +52 -57
- data/lib/fluent/plugin/out_exec_filter.rb +327 -331
- data/lib/fluent/plugin/out_file.rb +78 -74
- data/lib/fluent/plugin/out_forward.rb +410 -414
- data/lib/fluent/plugin/out_null.rb +15 -19
- data/lib/fluent/plugin/out_roundrobin.rb +63 -68
- data/lib/fluent/plugin/out_stdout.rb +9 -14
- data/lib/fluent/plugin/out_stream.rb +83 -90
- data/lib/fluent/plugin/out_test.rb +42 -46
- data/lib/fluent/supervisor.rb +15 -0
- data/lib/fluent/version.rb +1 -1
- data/test/plugin/in_stream.rb +2 -0
- data/test/plugin/out_file.rb +19 -1
- metadata +6 -5
@@ -16,96 +16,100 @@
|
|
16
16
|
# limitations under the License.
|
17
17
|
#
|
18
18
|
module Fluent
|
19
|
+
class FileOutput < TimeSlicedOutput
|
20
|
+
Plugin.register_output('file', self)
|
19
21
|
|
22
|
+
SUPPORTED_COMPRESS = {
|
23
|
+
'gz' => :gz,
|
24
|
+
'gzip' => :gz,
|
25
|
+
}
|
20
26
|
|
21
|
-
|
22
|
-
Plugin.register_output('file', self)
|
27
|
+
config_param :path, :string
|
23
28
|
|
24
|
-
|
25
|
-
'gz' => :gz,
|
26
|
-
'gzip' => :gz,
|
27
|
-
}
|
29
|
+
config_param :time_format, :string, :default => nil
|
28
30
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
unless c
|
36
|
-
raise ConfigError, "Unsupported compression algorithm '#{val}'"
|
31
|
+
config_param :compress, :default => nil do |val|
|
32
|
+
c = SUPPORTED_COMPRESS[val]
|
33
|
+
unless c
|
34
|
+
raise ConfigError, "Unsupported compression algorithm '#{val}'"
|
35
|
+
end
|
36
|
+
c
|
37
37
|
end
|
38
|
-
c
|
39
|
-
end
|
40
38
|
|
41
|
-
|
42
|
-
require 'zlib'
|
43
|
-
require 'time'
|
44
|
-
super
|
45
|
-
end
|
39
|
+
config_param :symlink_path, :string, :default => nil
|
46
40
|
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
unless @path
|
52
|
-
raise ConfigError, "'path' parameter is required on file output"
|
41
|
+
def initialize
|
42
|
+
require 'zlib'
|
43
|
+
require 'time'
|
44
|
+
super
|
53
45
|
end
|
54
46
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
47
|
+
def configure(conf)
|
48
|
+
if path = conf['path']
|
49
|
+
@path = path
|
50
|
+
end
|
51
|
+
unless @path
|
52
|
+
raise ConfigError, "'path' parameter is required on file output"
|
53
|
+
end
|
54
|
+
|
55
|
+
if pos = @path.index('*')
|
56
|
+
@path_prefix = @path[0,pos]
|
57
|
+
@path_suffix = @path[pos+1..-1]
|
58
|
+
conf['buffer_path'] ||= "#{@path}"
|
59
|
+
else
|
60
|
+
@path_prefix = @path+"."
|
61
|
+
@path_suffix = ".log"
|
62
|
+
conf['buffer_path'] ||= "#{@path}.*"
|
63
|
+
end
|
64
|
+
|
65
|
+
super
|
66
|
+
|
67
|
+
@timef = TimeFormatter.new(@time_format, @localtime)
|
63
68
|
end
|
64
69
|
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
def format(tag, time, record)
|
71
|
-
time_str = @timef.format(time)
|
72
|
-
"#{time_str}\t#{tag}\t#{Yajl.dump(record)}\n"
|
73
|
-
end
|
70
|
+
def format(tag, time, record)
|
71
|
+
time_str = @timef.format(time)
|
72
|
+
"#{time_str}\t#{tag}\t#{Yajl.dump(record)}\n"
|
73
|
+
end
|
74
74
|
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
75
|
+
def write(chunk)
|
76
|
+
case @compress
|
77
|
+
when nil
|
78
|
+
suffix = ''
|
79
|
+
when :gz
|
80
|
+
suffix = ".gz"
|
81
|
+
end
|
82
|
+
|
83
|
+
i = 0
|
84
|
+
begin
|
85
|
+
path = "#{@path_prefix}#{chunk.key}_#{i}#{@path_suffix}#{suffix}"
|
86
|
+
i += 1
|
87
|
+
end while File.exist?(path)
|
88
|
+
FileUtils.mkdir_p File.dirname(path)
|
89
|
+
|
90
|
+
case @compress
|
91
|
+
when nil
|
92
|
+
File.open(path, "a", DEFAULT_FILE_PERMISSION) {|f|
|
93
|
+
chunk.write_to(f)
|
94
|
+
}
|
95
|
+
when :gz
|
96
|
+
Zlib::GzipWriter.open(path) {|f|
|
97
|
+
chunk.write_to(f)
|
98
|
+
}
|
99
|
+
end
|
100
|
+
create_symlink(path, suffix) if @symlink_path
|
101
|
+
|
102
|
+
return path # for test
|
81
103
|
end
|
82
104
|
|
83
|
-
|
84
|
-
|
85
|
-
path = "#{@path_prefix}#{chunk.key}_#{i}#{@path_suffix}#{suffix}"
|
86
|
-
i += 1
|
87
|
-
end while File.exist?(path)
|
88
|
-
FileUtils.mkdir_p File.dirname(path)
|
89
|
-
|
90
|
-
case @compress
|
91
|
-
when nil
|
92
|
-
File.open(path, "a", DEFAULT_FILE_PERMISSION) {|f|
|
93
|
-
chunk.write_to(f)
|
94
|
-
}
|
95
|
-
when :gz
|
96
|
-
Zlib::GzipWriter.open(path) {|f|
|
97
|
-
chunk.write_to(f)
|
98
|
-
}
|
105
|
+
def secondary_init(primary)
|
106
|
+
# don't warn even if primary.class is not FileOutput
|
99
107
|
end
|
100
108
|
|
101
|
-
|
102
|
-
end
|
109
|
+
private
|
103
110
|
|
104
|
-
|
105
|
-
|
111
|
+
def create_symlink(path, suffix)
|
112
|
+
FileUtils.ln_sf(path, "#{@symlink_path}#{suffix}")
|
113
|
+
end
|
106
114
|
end
|
107
115
|
end
|
108
|
-
|
109
|
-
|
110
|
-
end
|
111
|
-
|
@@ -16,514 +16,510 @@
|
|
16
16
|
# limitations under the License.
|
17
17
|
#
|
18
18
|
module Fluent
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
require 'fluent/plugin/socket_util'
|
29
|
-
@nodes = [] #=> [Node]
|
30
|
-
end
|
31
|
-
|
32
|
-
config_param :send_timeout, :time, :default => 60
|
33
|
-
config_param :heartbeat_type, :default => :udp do |val|
|
34
|
-
case val.downcase
|
35
|
-
when 'tcp'
|
36
|
-
:tcp
|
37
|
-
when 'udp'
|
38
|
-
:udp
|
39
|
-
else
|
40
|
-
raise ConfigError, "forward output heartbeat type should be 'tcp' or 'udp'"
|
19
|
+
class ForwardOutput < ObjectBufferedOutput
|
20
|
+
Plugin.register_output('forward', self)
|
21
|
+
|
22
|
+
def initialize
|
23
|
+
super
|
24
|
+
require 'socket'
|
25
|
+
require 'fileutils'
|
26
|
+
require 'fluent/plugin/socket_util'
|
27
|
+
@nodes = [] #=> [Node]
|
41
28
|
end
|
42
|
-
end
|
43
|
-
config_param :heartbeat_interval, :time, :default => 1
|
44
|
-
config_param :recover_wait, :time, :default => 10
|
45
|
-
config_param :hard_timeout, :time, :default => 60
|
46
|
-
config_param :expire_dns_cache, :time, :default => nil # 0 means disable cache
|
47
|
-
config_param :phi_threshold, :integer, :default => 16
|
48
|
-
attr_reader :nodes
|
49
29
|
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
30
|
+
config_param :send_timeout, :time, :default => 60
|
31
|
+
config_param :heartbeat_type, :default => :udp do |val|
|
32
|
+
case val.downcase
|
33
|
+
when 'tcp'
|
34
|
+
:tcp
|
35
|
+
when 'udp'
|
36
|
+
:udp
|
37
|
+
else
|
38
|
+
raise ConfigError, "forward output heartbeat type should be 'tcp' or 'udp'"
|
39
|
+
end
|
40
|
+
end
|
41
|
+
config_param :heartbeat_interval, :time, :default => 1
|
42
|
+
config_param :recover_wait, :time, :default => 10
|
43
|
+
config_param :hard_timeout, :time, :default => 60
|
44
|
+
config_param :expire_dns_cache, :time, :default => nil # 0 means disable cache
|
45
|
+
config_param :phi_threshold, :integer, :default => 16
|
46
|
+
attr_reader :nodes
|
56
47
|
|
57
48
|
# backward compatibility
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
49
|
+
config_param :port, :integer, :default => DEFAULT_LISTEN_PORT
|
50
|
+
config_param :host, :string, :default => nil
|
51
|
+
|
52
|
+
def configure(conf)
|
53
|
+
super
|
54
|
+
|
55
|
+
# backward compatibility
|
56
|
+
if host = conf['host']
|
57
|
+
$log.warn "'host' option in forward output is obsoleted. Use '<server> host xxx </server>' instead."
|
58
|
+
port = conf['port']
|
59
|
+
port = port ? port.to_i : DEFAULT_LISTEN_PORT
|
60
|
+
e = conf.add_element('server')
|
61
|
+
e['host'] = host
|
62
|
+
e['port'] = port.to_s
|
63
|
+
end
|
66
64
|
|
67
|
-
|
65
|
+
recover_sample_size = @recover_wait / @heartbeat_interval
|
68
66
|
|
69
|
-
|
70
|
-
|
67
|
+
conf.elements.each {|e|
|
68
|
+
next if e.name != "server"
|
71
69
|
|
72
|
-
|
73
|
-
|
74
|
-
|
70
|
+
host = e['host']
|
71
|
+
port = e['port']
|
72
|
+
port = port ? port.to_i : DEFAULT_LISTEN_PORT
|
75
73
|
|
76
|
-
|
77
|
-
|
74
|
+
weight = e['weight']
|
75
|
+
weight = weight ? weight.to_i : 60
|
78
76
|
|
79
|
-
|
77
|
+
standby = !!e['standby']
|
80
78
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
79
|
+
name = e['name']
|
80
|
+
unless name
|
81
|
+
name = "#{host}:#{port}"
|
82
|
+
end
|
85
83
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
84
|
+
failure = FailureDetector.new(@heartbeat_interval, @hard_timeout, Time.now.to_i.to_f)
|
85
|
+
@nodes << Node.new(name, host, port, weight, standby, failure,
|
86
|
+
@phi_threshold, recover_sample_size, @expire_dns_cache)
|
87
|
+
$log.info "adding forwarding server '#{name}'", :host=>host, :port=>port, :weight=>weight
|
88
|
+
}
|
89
|
+
end
|
92
90
|
|
93
|
-
|
94
|
-
|
91
|
+
def start
|
92
|
+
super
|
95
93
|
|
96
|
-
|
97
|
-
|
98
|
-
|
94
|
+
@rand_seed = Random.new.seed
|
95
|
+
rebuild_weight_array
|
96
|
+
@rr = 0
|
99
97
|
|
100
|
-
|
98
|
+
@loop = Coolio::Loop.new
|
101
99
|
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
100
|
+
if @heartbeat_type == :udp
|
101
|
+
# assuming all hosts use udp
|
102
|
+
@usock = SocketUtil.create_udp_socket(@nodes.first.host)
|
103
|
+
@usock.fcntl(Fcntl::F_SETFL, Fcntl::O_NONBLOCK)
|
104
|
+
@hb = HeartbeatHandler.new(@usock, method(:on_heartbeat))
|
105
|
+
@loop.attach(@hb)
|
106
|
+
end
|
109
107
|
|
110
|
-
|
111
|
-
|
108
|
+
@timer = HeartbeatRequestTimer.new(@heartbeat_interval, method(:on_timer))
|
109
|
+
@loop.attach(@timer)
|
112
110
|
|
113
|
-
|
114
|
-
|
111
|
+
@thread = Thread.new(&method(:run))
|
112
|
+
end
|
115
113
|
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
114
|
+
def shutdown
|
115
|
+
@finished = true
|
116
|
+
@loop.watchers.each {|w| w.detach }
|
117
|
+
@loop.stop
|
118
|
+
@thread.join
|
119
|
+
@usock.close if @usock
|
120
|
+
end
|
123
121
|
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
122
|
+
def run
|
123
|
+
@loop.run
|
124
|
+
rescue
|
125
|
+
$log.error "unexpected error", :error=>$!.to_s
|
126
|
+
$log.error_backtrace
|
127
|
+
end
|
130
128
|
|
131
|
-
|
132
|
-
|
129
|
+
def write_objects(tag, chunk)
|
130
|
+
return if chunk.empty?
|
133
131
|
|
134
|
-
|
132
|
+
error = nil
|
135
133
|
|
136
|
-
|
137
|
-
|
138
|
-
|
139
|
-
|
134
|
+
wlen = @weight_array.length
|
135
|
+
wlen.times do
|
136
|
+
@rr = (@rr + 1) % wlen
|
137
|
+
node = @weight_array[@rr]
|
140
138
|
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
139
|
+
if node.available?
|
140
|
+
begin
|
141
|
+
send_data(node, tag, chunk)
|
142
|
+
return
|
143
|
+
rescue
|
144
|
+
# for load balancing during detecting crashed servers
|
145
|
+
error = $! # use the latest error
|
146
|
+
end
|
148
147
|
end
|
149
148
|
end
|
150
|
-
end
|
151
149
|
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
150
|
+
if error
|
151
|
+
raise error
|
152
|
+
else
|
153
|
+
raise "no nodes are available" # TODO message
|
154
|
+
end
|
156
155
|
end
|
157
|
-
end
|
158
156
|
|
159
|
-
|
157
|
+
private
|
160
158
|
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
159
|
+
def rebuild_weight_array
|
160
|
+
standby_nodes, regular_nodes = @nodes.partition {|n|
|
161
|
+
n.standby?
|
162
|
+
}
|
165
163
|
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
end
|
171
|
-
}
|
172
|
-
$log.debug "rebuilding weight array", :lost_weight=>lost_weight
|
173
|
-
|
174
|
-
if lost_weight > 0
|
175
|
-
standby_nodes.each {|n|
|
176
|
-
if n.available?
|
177
|
-
regular_nodes << n
|
178
|
-
$log.info "using standby node #{n.host}:#{n.port}", :weight=>n.weight
|
179
|
-
lost_weight -= n.weight
|
180
|
-
break if lost_weight <= 0
|
164
|
+
lost_weight = 0
|
165
|
+
regular_nodes.each {|n|
|
166
|
+
unless n.available?
|
167
|
+
lost_weight += n.weight
|
181
168
|
end
|
182
169
|
}
|
183
|
-
|
170
|
+
$log.debug "rebuilding weight array", :lost_weight=>lost_weight
|
171
|
+
|
172
|
+
if lost_weight > 0
|
173
|
+
standby_nodes.each {|n|
|
174
|
+
if n.available?
|
175
|
+
regular_nodes << n
|
176
|
+
$log.warn "using standby node #{n.host}:#{n.port}", :weight=>n.weight
|
177
|
+
lost_weight -= n.weight
|
178
|
+
break if lost_weight <= 0
|
179
|
+
end
|
180
|
+
}
|
181
|
+
end
|
184
182
|
|
185
|
-
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
183
|
+
weight_array = []
|
184
|
+
gcd = regular_nodes.map {|n| n.weight }.inject(0) {|r,w| r.gcd(w) }
|
185
|
+
regular_nodes.each {|n|
|
186
|
+
(n.weight / gcd).times {
|
187
|
+
weight_array << n
|
188
|
+
}
|
190
189
|
}
|
191
|
-
}
|
192
190
|
|
193
|
-
|
194
|
-
|
195
|
-
|
191
|
+
# for load balancing during detecting crashed servers
|
192
|
+
coe = (regular_nodes.size * 6) / weight_array.size
|
193
|
+
weight_array *= coe if coe > 1
|
196
194
|
|
197
|
-
|
198
|
-
|
195
|
+
r = Random.new(@rand_seed)
|
196
|
+
weight_array.sort_by! { r.rand }
|
199
197
|
|
200
|
-
|
201
|
-
|
198
|
+
@weight_array = weight_array
|
199
|
+
end
|
200
|
+
|
201
|
+
# MessagePack FixArray length = 2
|
202
|
+
FORWARD_HEADER = [0x92].pack('C')
|
202
203
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
|
214
|
-
|
215
|
-
|
216
|
-
|
217
|
-
ensure
|
218
|
-
sock.close
|
204
|
+
#FORWARD_TCP_HEARTBEAT_DATA = FORWARD_HEADER + ''.to_msgpack + [].to_msgpack
|
205
|
+
def send_heartbeat_tcp(node)
|
206
|
+
sock = connect(node)
|
207
|
+
begin
|
208
|
+
opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
209
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
210
|
+
opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
211
|
+
# don't send any data to not cause a compatibility problem
|
212
|
+
#sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
213
|
+
#sock.write FORWARD_TCP_HEARTBEAT_DATA
|
214
|
+
node.heartbeat(true)
|
215
|
+
ensure
|
216
|
+
sock.close
|
217
|
+
end
|
219
218
|
end
|
220
|
-
end
|
221
219
|
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
220
|
+
def send_data(node, tag, chunk)
|
221
|
+
sock = connect(node)
|
222
|
+
begin
|
223
|
+
opt = [1, @send_timeout.to_i].pack('I!I!') # { int l_onoff; int l_linger; }
|
224
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_LINGER, opt)
|
225
|
+
|
226
|
+
opt = [@send_timeout.to_i, 0].pack('L!L!') # struct timeval
|
227
|
+
sock.setsockopt(Socket::SOL_SOCKET, Socket::SO_SNDTIMEO, opt)
|
228
|
+
|
229
|
+
# beginArray(2)
|
230
|
+
sock.write FORWARD_HEADER
|
231
|
+
|
232
|
+
# writeRaw(tag)
|
233
|
+
sock.write tag.to_msgpack # tag
|
234
|
+
|
235
|
+
# beginRaw(size)
|
236
|
+
sz = chunk.size
|
237
|
+
#if sz < 32
|
238
|
+
# # FixRaw
|
239
|
+
# sock.write [0xa0 | sz].pack('C')
|
240
|
+
#elsif sz < 65536
|
241
|
+
# # raw 16
|
242
|
+
# sock.write [0xda, sz].pack('Cn')
|
243
|
+
#else
|
246
244
|
# raw 32
|
247
245
|
sock.write [0xdb, sz].pack('CN')
|
248
|
-
|
246
|
+
#end
|
249
247
|
|
250
|
-
|
251
|
-
|
248
|
+
# writeRawBody(packed_es)
|
249
|
+
chunk.write_to(sock)
|
252
250
|
|
253
|
-
|
254
|
-
|
255
|
-
|
251
|
+
node.heartbeat(false)
|
252
|
+
ensure
|
253
|
+
sock.close
|
254
|
+
end
|
256
255
|
end
|
257
|
-
end
|
258
256
|
|
259
|
-
|
260
|
-
|
261
|
-
|
262
|
-
|
257
|
+
def connect(node)
|
258
|
+
# TODO unix socket?
|
259
|
+
TCPSocket.new(node.resolved_host, node.port)
|
260
|
+
end
|
263
261
|
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
262
|
+
class HeartbeatRequestTimer < Coolio::TimerWatcher
|
263
|
+
def initialize(interval, callback)
|
264
|
+
super(interval, true)
|
265
|
+
@callback = callback
|
266
|
+
end
|
267
|
+
|
268
|
+
def on_timer
|
269
|
+
@callback.call
|
270
|
+
rescue
|
271
|
+
# TODO log?
|
272
|
+
end
|
268
273
|
end
|
269
274
|
|
270
275
|
def on_timer
|
271
|
-
@
|
272
|
-
|
273
|
-
|
276
|
+
return if @finished
|
277
|
+
@nodes.each {|n|
|
278
|
+
if n.tick
|
279
|
+
rebuild_weight_array
|
280
|
+
end
|
281
|
+
begin
|
282
|
+
#$log.trace "sending heartbeat #{n.host}:#{n.port} on #{@heartbeat_type}"
|
283
|
+
if @heartbeat_type == :tcp
|
284
|
+
send_heartbeat_tcp(n)
|
285
|
+
else
|
286
|
+
@usock.send "\0", 0, Socket.pack_sockaddr_in(n.port, n.resolved_host)
|
287
|
+
end
|
288
|
+
rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR
|
289
|
+
# TODO log
|
290
|
+
$log.debug "failed to send heartbeat packet to #{n.host}:#{n.port}", :error=>$!.to_s
|
291
|
+
end
|
292
|
+
}
|
274
293
|
end
|
275
|
-
end
|
276
294
|
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
295
|
+
class HeartbeatHandler < Coolio::IO
|
296
|
+
def initialize(io, callback)
|
297
|
+
super(io)
|
298
|
+
@io = io
|
299
|
+
@callback = callback
|
282
300
|
end
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
287
|
-
|
288
|
-
|
301
|
+
|
302
|
+
def on_readable
|
303
|
+
begin
|
304
|
+
msg, addr = @io.recvfrom(1024)
|
305
|
+
rescue Errno::EAGAIN, Errno::EWOULDBLOCK, Errno::EINTR
|
306
|
+
return
|
289
307
|
end
|
290
|
-
|
291
|
-
|
292
|
-
|
308
|
+
host = addr[3]
|
309
|
+
port = addr[1]
|
310
|
+
sockaddr = Socket.pack_sockaddr_in(port, host)
|
311
|
+
@callback.call(sockaddr, msg)
|
312
|
+
rescue
|
313
|
+
# TODO log?
|
293
314
|
end
|
294
|
-
}
|
295
|
-
end
|
296
|
-
|
297
|
-
class HeartbeatHandler < Coolio::IO
|
298
|
-
def initialize(io, callback)
|
299
|
-
super(io)
|
300
|
-
@io = io
|
301
|
-
@callback = callback
|
302
315
|
end
|
303
316
|
|
304
|
-
def
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
317
|
+
def on_heartbeat(sockaddr, msg)
|
318
|
+
port, host = Socket.unpack_sockaddr_in(sockaddr)
|
319
|
+
if node = @nodes.find {|n| n.sockaddr == sockaddr }
|
320
|
+
#$log.trace "heartbeat from '#{node.name}'", :host=>node.host, :port=>node.port
|
321
|
+
if node.heartbeat
|
322
|
+
rebuild_weight_array
|
323
|
+
end
|
309
324
|
end
|
310
|
-
host = addr[3]
|
311
|
-
port = addr[1]
|
312
|
-
sockaddr = Socket.pack_sockaddr_in(port, host)
|
313
|
-
@callback.call(sockaddr, msg)
|
314
|
-
rescue
|
315
|
-
# TODO log?
|
316
325
|
end
|
317
|
-
end
|
318
326
|
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
|
325
|
-
|
326
|
-
|
327
|
-
|
327
|
+
class Node
|
328
|
+
def initialize(name, host, port, weight, standby, failure,
|
329
|
+
phi_threshold, recover_sample_size, expire_dns_cache)
|
330
|
+
@name = name
|
331
|
+
@host = host
|
332
|
+
@port = port
|
333
|
+
@weight = weight
|
334
|
+
@standby = standby
|
335
|
+
@failure = failure
|
336
|
+
@phi_threshold = phi_threshold
|
337
|
+
@recover_sample_size = recover_sample_size
|
338
|
+
@expire_dns_cache = expire_dns_cache
|
339
|
+
@available = true
|
328
340
|
|
329
|
-
|
330
|
-
|
331
|
-
|
332
|
-
|
333
|
-
@host = host
|
334
|
-
@port = port
|
335
|
-
@weight = weight
|
336
|
-
@standby = standby
|
337
|
-
@failure = failure
|
338
|
-
@phi_threshold = phi_threshold
|
339
|
-
@recover_sample_size = recover_sample_size
|
340
|
-
@expire_dns_cache = expire_dns_cache
|
341
|
-
@available = true
|
342
|
-
|
343
|
-
@resolved_host = nil
|
344
|
-
@resolved_time = 0
|
345
|
-
resolved_host # check dns
|
346
|
-
end
|
341
|
+
@resolved_host = nil
|
342
|
+
@resolved_time = 0
|
343
|
+
resolved_host # check dns
|
344
|
+
end
|
347
345
|
|
348
|
-
|
349
|
-
|
350
|
-
|
346
|
+
attr_reader :name, :host, :port, :weight
|
347
|
+
attr_writer :weight, :standby, :available
|
348
|
+
attr_reader :sockaddr # used by on_heartbeat
|
351
349
|
|
352
|
-
|
353
|
-
|
354
|
-
|
350
|
+
def available?
|
351
|
+
@available
|
352
|
+
end
|
355
353
|
|
356
|
-
|
357
|
-
|
358
|
-
|
354
|
+
def standby?
|
355
|
+
@standby
|
356
|
+
end
|
359
357
|
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
358
|
+
def resolved_host
|
359
|
+
case @expire_dns_cache
|
360
|
+
when 0
|
361
|
+
# cache is disabled
|
362
|
+
return resolve_dns!
|
365
363
|
|
366
|
-
|
367
|
-
|
368
|
-
|
364
|
+
when nil
|
365
|
+
# persistent cache
|
366
|
+
return @resolved_host ||= resolve_dns!
|
369
367
|
|
370
|
-
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
|
368
|
+
else
|
369
|
+
now = Engine.now
|
370
|
+
rh = @resolved_host
|
371
|
+
if !rh || now - @resolved_time >= @expire_dns_cache
|
372
|
+
rh = @resolved_host = resolve_dns!
|
373
|
+
@resolved_time = now
|
374
|
+
end
|
375
|
+
return rh
|
376
376
|
end
|
377
|
-
return rh
|
378
377
|
end
|
379
|
-
end
|
380
378
|
|
381
|
-
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
|
386
|
-
|
379
|
+
def resolve_dns!
|
380
|
+
@sockaddr = Socket.pack_sockaddr_in(@port, @host)
|
381
|
+
port, resolved_host = Socket.unpack_sockaddr_in(@sockaddr)
|
382
|
+
return resolved_host
|
383
|
+
end
|
384
|
+
private :resolve_dns!
|
385
|
+
|
386
|
+
def tick
|
387
|
+
now = Time.now.to_f
|
388
|
+
if !@available
|
389
|
+
if @failure.hard_timeout?(now)
|
390
|
+
@failure.clear
|
391
|
+
end
|
392
|
+
return nil
|
393
|
+
end
|
387
394
|
|
388
|
-
def tick
|
389
|
-
now = Time.now.to_f
|
390
|
-
if !@available
|
391
395
|
if @failure.hard_timeout?(now)
|
396
|
+
$log.warn "detached forwarding server '#{@name}'", :host=>@host, :port=>@port, :hard_timeout=>true
|
397
|
+
@available = false
|
398
|
+
@resolved_host = nil # expire cached host
|
392
399
|
@failure.clear
|
400
|
+
return true
|
393
401
|
end
|
394
|
-
return nil
|
395
|
-
end
|
396
402
|
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
401
|
-
|
402
|
-
|
403
|
+
phi = @failure.phi(now)
|
404
|
+
#$log.trace "phi '#{@name}'", :host=>@host, :port=>@port, :phi=>phi
|
405
|
+
if phi > @phi_threshold
|
406
|
+
$log.warn "detached forwarding server '#{@name}'", :host=>@host, :port=>@port, :phi=>phi
|
407
|
+
@available = false
|
408
|
+
@resolved_host = nil # expire cached host
|
409
|
+
@failure.clear
|
410
|
+
return true
|
411
|
+
else
|
412
|
+
return false
|
413
|
+
end
|
403
414
|
end
|
404
415
|
|
405
|
-
|
406
|
-
|
407
|
-
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
414
|
-
|
416
|
+
def heartbeat(detect=true)
|
417
|
+
now = Time.now.to_f
|
418
|
+
@failure.add(now)
|
419
|
+
#$log.trace "heartbeat from '#{@name}'", :host=>@host, :port=>@port, :available=>@available, :sample_size=>@failure.sample_size
|
420
|
+
if detect && !@available && @failure.sample_size > @recover_sample_size
|
421
|
+
@available = true
|
422
|
+
$log.warn "recovered forwarding server '#{@name}'", :host=>@host, :port=>@port
|
423
|
+
return true
|
424
|
+
else
|
425
|
+
return nil
|
426
|
+
end
|
415
427
|
end
|
416
|
-
end
|
417
428
|
|
418
|
-
|
419
|
-
|
420
|
-
@failure.add(now)
|
421
|
-
#$log.trace "heartbeat from '#{@name}'", :host=>@host, :port=>@port, :available=>@available, :sample_size=>@failure.sample_size
|
422
|
-
if detect && !@available && @failure.sample_size > @recover_sample_size
|
423
|
-
@available = true
|
424
|
-
$log.info "recovered forwarding server '#{@name}'", :host=>@host, :port=>@port
|
425
|
-
return true
|
426
|
-
else
|
427
|
-
return nil
|
429
|
+
def to_msgpack(out = '')
|
430
|
+
[@host, @port, @weight, @available].to_msgpack(out)
|
428
431
|
end
|
429
432
|
end
|
430
433
|
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
end
|
435
|
-
|
436
|
-
class FailureDetector
|
437
|
-
PHI_FACTOR = 1.0 / Math.log(10.0)
|
438
|
-
SAMPLE_SIZE = 1000
|
434
|
+
class FailureDetector
|
435
|
+
PHI_FACTOR = 1.0 / Math.log(10.0)
|
436
|
+
SAMPLE_SIZE = 1000
|
439
437
|
|
440
|
-
|
441
|
-
|
442
|
-
|
443
|
-
|
438
|
+
def initialize(heartbeat_interval, hard_timeout, init_last)
|
439
|
+
@heartbeat_interval = heartbeat_interval
|
440
|
+
@last = init_last
|
441
|
+
@hard_timeout = hard_timeout
|
444
442
|
|
445
|
-
|
446
|
-
|
447
|
-
|
448
|
-
|
443
|
+
# microsec
|
444
|
+
@init_gap = (heartbeat_interval * 1e6).to_i
|
445
|
+
@window = [@init_gap]
|
446
|
+
end
|
449
447
|
|
450
|
-
|
451
|
-
|
452
|
-
|
448
|
+
def hard_timeout?(now)
|
449
|
+
now - @last > @hard_timeout
|
450
|
+
end
|
453
451
|
|
454
|
-
|
455
|
-
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
452
|
+
def add(now)
|
453
|
+
if @window.empty?
|
454
|
+
@window << @init_gap
|
455
|
+
@last = now
|
456
|
+
else
|
457
|
+
gap = now - @last
|
458
|
+
@window << (gap * 1e6).to_i
|
459
|
+
@window.shift if @window.length > SAMPLE_SIZE
|
460
|
+
@last = now
|
461
|
+
end
|
463
462
|
end
|
464
|
-
end
|
465
463
|
|
466
|
-
|
467
|
-
|
468
|
-
|
464
|
+
def phi(now)
|
465
|
+
size = @window.size
|
466
|
+
return 0.0 if size == 0
|
469
467
|
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
468
|
+
# Calculate weighted moving average
|
469
|
+
mean_usec = 0
|
470
|
+
fact = 0
|
471
|
+
@window.each_with_index {|gap,i|
|
472
|
+
mean_usec += gap * (1+i)
|
473
|
+
fact += (1+i)
|
474
|
+
}
|
475
|
+
mean_usec = mean_usec / fact
|
478
476
|
|
479
|
-
|
480
|
-
|
477
|
+
# Normalize arrive intervals into 1sec
|
478
|
+
mean = (mean_usec.to_f / 1e6) - @heartbeat_interval + 1
|
481
479
|
|
482
|
-
|
483
|
-
|
484
|
-
|
480
|
+
# Calculate phi of the phi accrual failure detector
|
481
|
+
t = now - @last - @heartbeat_interval + 1
|
482
|
+
phi = PHI_FACTOR * t / mean
|
485
483
|
|
486
|
-
|
487
|
-
|
484
|
+
return phi
|
485
|
+
end
|
488
486
|
|
489
|
-
|
490
|
-
|
491
|
-
|
487
|
+
def sample_size
|
488
|
+
@window.size
|
489
|
+
end
|
492
490
|
|
493
|
-
|
494
|
-
|
495
|
-
|
491
|
+
def clear
|
492
|
+
@window.clear
|
493
|
+
@last = 0
|
494
|
+
end
|
496
495
|
end
|
497
|
-
end
|
498
|
-
|
499
|
-
## TODO
|
500
|
-
#class RPC
|
501
|
-
# def initialize(this)
|
502
|
-
# @this = this
|
503
|
-
# end
|
504
|
-
#
|
505
|
-
# def list_nodes
|
506
|
-
# @this.nodes
|
507
|
-
# end
|
508
|
-
#
|
509
|
-
# def list_fault_nodes
|
510
|
-
# list_nodes.select {|n| !n.available? }
|
511
|
-
# end
|
512
|
-
#
|
513
|
-
# def list_available_nodes
|
514
|
-
# list_nodes.select {|n| n.available? }
|
515
|
-
# end
|
516
|
-
#
|
517
|
-
# def add_node(name, host, port, weight)
|
518
|
-
# end
|
519
|
-
#
|
520
|
-
# def recover_node(host, port)
|
521
|
-
# end
|
522
|
-
#
|
523
|
-
# def remove_node(host, port)
|
524
|
-
# end
|
525
|
-
#end
|
526
|
-
end
|
527
|
-
|
528
496
|
|
497
|
+
## TODO
|
498
|
+
#class RPC
|
499
|
+
# def initialize(this)
|
500
|
+
# @this = this
|
501
|
+
# end
|
502
|
+
#
|
503
|
+
# def list_nodes
|
504
|
+
# @this.nodes
|
505
|
+
# end
|
506
|
+
#
|
507
|
+
# def list_fault_nodes
|
508
|
+
# list_nodes.select {|n| !n.available? }
|
509
|
+
# end
|
510
|
+
#
|
511
|
+
# def list_available_nodes
|
512
|
+
# list_nodes.select {|n| n.available? }
|
513
|
+
# end
|
514
|
+
#
|
515
|
+
# def add_node(name, host, port, weight)
|
516
|
+
# end
|
517
|
+
#
|
518
|
+
# def recover_node(host, port)
|
519
|
+
# end
|
520
|
+
#
|
521
|
+
# def remove_node(host, port)
|
522
|
+
# end
|
523
|
+
#end
|
524
|
+
end
|
529
525
|
end
|