nats 0.4.10 → 0.4.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -6,17 +6,19 @@ module NATSD #:nodoc: all
6
6
  class Server
7
7
 
8
8
  class << self
9
- attr_reader :id, :info, :log_time, :auth_required, :debug_flag, :trace_flag, :options
10
- attr_reader :max_payload, :max_pending, :max_control_line, :auth_timeout
9
+ attr_reader :id, :info, :log_time, :auth_required, :ssl_required, :debug_flag, :trace_flag, :options
10
+ attr_reader :max_payload, :max_pending, :max_control_line, :auth_timeout, :ssl_timeout, :ping_interval, :ping_max
11
+ attr_accessor :varz, :healthz, :max_connections, :num_connections, :in_msgs, :out_msgs, :in_bytes, :out_bytes
11
12
 
12
13
  alias auth_required? :auth_required
14
+ alias ssl_required? :ssl_required
13
15
  alias debug_flag? :debug_flag
14
16
  alias trace_flag? :trace_flag
15
17
 
16
18
  def version; "nats-server version #{NATSD::VERSION}" end
17
19
 
18
- def host; @options[:addr] end
19
- def port; @options[:port] end
20
+ def host; @options[:addr] end
21
+ def port; @options[:port] end
20
22
  def pid_file; @options[:pid_file] end
21
23
 
22
24
  def process_options(argv=[])
@@ -36,10 +38,18 @@ module NATSD #:nodoc: all
36
38
 
37
39
  @id, @cid = fast_uuid, 1
38
40
  @sublist = Sublist.new
41
+
42
+ @num_connections = 0
43
+ @in_msgs = @out_msgs = 0
44
+ @in_bytes = @out_bytes = 0
45
+
39
46
  @info = {
40
47
  :server_id => Server.id,
48
+ :host => host,
49
+ :port => port,
41
50
  :version => VERSION,
42
51
  :auth_required => auth_required?,
52
+ :ssl_required => ssl_required?,
43
53
  :max_payload => @max_payload
44
54
  }
45
55
 
@@ -47,12 +57,21 @@ module NATSD #:nodoc: all
47
57
  if @options[:daemonize]
48
58
  require 'rubygems'
49
59
  require 'daemons'
60
+ require 'tmpdir'
50
61
  unless @options[:log_file]
51
62
  # These log messages visible to controlling TTY
52
63
  log "Starting #{NATSD::APP_NAME} version #{NATSD::VERSION} on port #{NATSD::Server.port}"
64
+ log "Starting http monitor on port #{@options[:http_port]}" if @options[:http_port]
53
65
  log "Switching to daemon mode"
54
66
  end
55
- Daemons.daemonize(:app_name => APP_NAME, :mode => :exec)
67
+ opts = {
68
+ :app_name => APP_NAME,
69
+ :mode => :exec,
70
+ :dir_mode => :normal,
71
+ :dir => Dir.tmpdir
72
+ }
73
+ Daemons.daemonize(opts)
74
+ FileUtils.rm_f("#{Dir.tmpdir}/#{APP_NAME}.pid")
56
75
  end
57
76
 
58
77
  setup_logs
@@ -74,30 +93,41 @@ module NATSD #:nodoc: all
74
93
  end
75
94
 
76
95
  def deliver_to_subscriber(sub, subject, reply, msg)
77
-
78
- # Allows nil reply to not have extra space
79
- reply = reply + ' ' if reply
80
-
81
96
  conn = sub.conn
82
97
 
83
- conn.send_data("MSG #{subject} #{sub.sid} #{reply}#{msg.bytesize}#{CR_LF}")
84
- conn.send_data(msg)
85
- conn.send_data(CR_LF)
98
+ # Accounting
99
+ @out_msgs += 1
100
+ conn.out_msgs += 1
101
+ unless msg.nil?
102
+ mbs = msg.bytesize
103
+ @out_bytes += mbs
104
+ conn.out_bytes += mbs
105
+ end
106
+
107
+ conn.queue_data("MSG #{subject} #{sub.sid} #{reply}#{msg.bytesize}#{CR_LF}#{msg}#{CR_LF}")
86
108
 
87
109
  # Account for these response and check for auto-unsubscribe (pruning interest graph)
88
110
  sub.num_responses += 1
89
111
  conn.delete_subscriber(sub) if (sub.max_responses && sub.num_responses >= sub.max_responses)
90
112
 
91
113
  # Check the outbound queue here and react if need be..
92
- if conn.get_outbound_data_size > NATSD::Server.max_pending
114
+ if (conn.get_outbound_data_size + conn.writev_size) > NATSD::Server.max_pending
93
115
  conn.error_close SLOW_CONSUMER
94
- log "Slow consumer dropped, exceeded #{NATSD::Server.max_pending} bytes pending", conn.client_info
116
+ maxp = pretty_size(NATSD::Server.max_pending)
117
+ log "Slow consumer dropped, exceeded #{maxp} pending", conn.client_info
95
118
  end
96
119
  end
97
120
 
98
121
  def route_to_subscribers(subject, reply, msg)
99
122
  qsubs = nil
100
123
 
124
+ # Allows nil reply to not have extra space
125
+ reply = reply + ' ' if reply
126
+
127
+ # Accounting
128
+ @in_msgs += 1
129
+ @in_bytes += msg.bytesize unless msg.nil?
130
+
101
131
  @sublist.match(subject).each do |sub|
102
132
  # Skip anyone in the closing state
103
133
  next if sub.conn.closing
@@ -128,7 +158,8 @@ module NATSD #:nodoc: all
128
158
  end
129
159
 
130
160
  def auth_ok?(user, pass)
131
- user == @options[:user] && pass == @options[:pass]
161
+ @options[:users].each { |u| return true if (user == u[:user] && pass == u[:pass]) }
162
+ false
132
163
  end
133
164
 
134
165
  def cid
@@ -139,173 +170,43 @@ module NATSD #:nodoc: all
139
170
  @info.to_json
140
171
  end
141
172
 
142
- end
143
- end
144
-
145
- module Connection #:nodoc:
173
+ # Monitoring
174
+ def start_http_server
175
+ return unless port = @options[:http_port]
146
176
 
147
- attr_reader :cid, :closing
177
+ require 'thin'
148
178
 
149
- def client_info
150
- @client_info ||= Socket.unpack_sockaddr_in(get_peername)
151
- end
179
+ log "Starting http monitor on port #{port}"
152
180
 
153
- def post_init
154
- @cid = Server.cid
155
- @subscriptions = {}
156
- @verbose = @pedantic = true # suppressed by most clients, but allows friendly telnet
157
- @receive_data_calls = 0
158
- @parse_state = AWAITING_CONTROL_LINE
159
- send_info
160
- @auth_pending = EM.add_timer(NATSD::Server.auth_timeout) { connect_auth_timeout } if Server.auth_required?
161
- debug "Client connection created", client_info, cid
162
- end
181
+ @healthz = "ok\n"
163
182
 
164
- def connect_auth_timeout
165
- error_close AUTH_REQUIRED
166
- debug "Connection timeout due to lack of auth credentials", cid
167
- end
183
+ @varz = {
184
+ :start => Time.now,
185
+ :options => @options,
186
+ :cores => num_cpu_cores
187
+ }
168
188
 
169
- def receive_data(data)
170
- @receive_data_calls += 1
171
- @buf = @buf ? @buf << data : data
172
- return close_connection if @buf =~ /(\006|\004)/ # ctrl+c or ctrl+d for telnet friendly
173
-
174
- # while (@buf && !@buf.empty? && !@closing)
175
- while (@buf && !@closing)
176
- case @parse_state
177
- when AWAITING_CONTROL_LINE
178
- case @buf
179
- when PUB_OP
180
- ctrace('PUB OP', strip_op($&)) if NATSD::Server.trace_flag?
181
- return connect_auth_timeout if @auth_pending
182
- @buf = $'
183
- @parse_state = AWAITING_MSG_PAYLOAD
184
- @msg_sub, @msg_reply, @msg_size = $1, $3, $4.to_i
185
- if (@msg_size > NATSD::Server.max_payload)
186
- debug "Message payload size exceeded (#{@msg_size}/#{NATSD::Server.max_payload}), closing connection"
187
- error_close PAYLOAD_TOO_BIG
188
- end
189
- send_data(INVALID_SUBJECT) if (@pedantic && !(@msg_sub =~ SUB_NO_WC))
190
- when SUB_OP
191
- ctrace('SUB OP', strip_op($&)) if NATSD::Server.trace_flag?
192
- return connect_auth_timeout if @auth_pending
193
- @buf = $'
194
- sub, qgroup, sid = $1, $3, $4
195
- return send_data(INVALID_SUBJECT) if !($1 =~ SUB)
196
- return send_data(INVALID_SID_TAKEN) if @subscriptions[sid]
197
- sub = Subscriber.new(self, sub, sid, qgroup, 0)
198
- @subscriptions[sid] = sub
199
- Server.subscribe(sub)
200
- send_data(OK) if @verbose
201
- when UNSUB_OP
202
- ctrace('UNSUB OP', strip_op($&)) if NATSD::Server.trace_flag?
203
- return connect_auth_timeout if @auth_pending
204
- @buf = $'
205
- sid, sub = $1, @subscriptions[$1]
206
- if sub
207
- # If we have set max_responses, we will unsubscribe once we have received
208
- # the appropriate amount of responses.
209
- sub.max_responses = ($2 && $3) ? $3.to_i : nil
210
- delete_subscriber(sub) unless (sub.max_responses && (sub.num_responses < sub.max_responses))
211
- send_data(OK) if @verbose
212
- else
213
- send_data(INVALID_SID_NOEXIST) if @pedantic
189
+ http_server = Thin::Server.new(@options[:http_net], port, :signals => false) do
190
+ Thin::Logging.silent = true
191
+ if NATSD::Server.options[:http_user]
192
+ auth = [NATSD::Server.options[:http_user], NATSD::Server.options[:http_password]]
193
+ use Rack::Auth::Basic do |username, password|
194
+ [username, password] == auth
214
195
  end
215
- when PING
216
- ctrace('PING OP', strip_op($&)) if NATSD::Server.trace_flag?
217
- @buf = $'
218
- send_data(PONG_RESPONSE)
219
- when CONNECT
220
- ctrace('CONNECT OP', strip_op($&)) if NATSD::Server.trace_flag?
221
- @buf = $'
222
- begin
223
- config = JSON.parse($1)
224
- process_connect_config(config)
225
- rescue => e
226
- send_data(INVALID_CONFIG)
227
- log_error
228
- end
229
- when INFO
230
- ctrace('INFO OP', strip_op($&)) if NATSD::Server.trace_flag?
231
- return connect_auth_timeout if @auth_pending
232
- @buf = $'
233
- send_info
234
- when UNKNOWN
235
- ctrace('Unknown Op', strip_op($&)) if NATSD::Server.trace_flag?
236
- return connect_auth_timeout if @auth_pending
237
- @buf = $'
238
- send_data(UNKNOWN_OP)
239
- else
240
- # If we are here we do not have a complete line yet that we understand.
241
- # If too big, cut the connection off.
242
- if @buf.bytesize > NATSD::Server.max_control_line
243
- debug "Control line size exceeded (#{@buf.bytesize}/#{NATSD::Server.max_control_line}), closing connection.."
244
- error_close PROTOCOL_OP_TOO_BIG
245
- end
246
- return
247
196
  end
248
- @buf = nil if (@buf && @buf.empty?)
249
-
250
- when AWAITING_MSG_PAYLOAD
251
- return unless (@buf.bytesize >= (@msg_size + CR_LF_SIZE))
252
- msg = @buf.slice(0, @msg_size)
253
- ctrace('Processing msg', @msg_sub, @msg_reply, msg) if NATSD::Server.trace_flag?
254
- send_data(OK) if @verbose
255
- Server.route_to_subscribers(@msg_sub, @msg_reply, msg)
256
- @buf = @buf.slice((@msg_size + CR_LF_SIZE), @buf.bytesize)
257
- @msg_sub = @msg_size = @reply = nil
258
- @parse_state = AWAITING_CONTROL_LINE
259
- @buf = nil if (@buf && @buf.empty?)
197
+ map '/healthz' do
198
+ run lambda { |env| [200, RACK_TEXT_HDR, NATSD::Server.healthz] }
199
+ end
200
+ map '/varz' do
201
+ run Varz.new
202
+ end
203
+ map '/connz' do
204
+ run Connz.new
205
+ end
260
206
  end
207
+ http_server.start!
261
208
  end
262
- end
263
-
264
- def send_info
265
- send_data("INFO #{Server.info_string}#{CR_LF}")
266
- end
267
-
268
- def process_connect_config(config)
269
- @verbose = config['verbose'] unless config['verbose'].nil?
270
- @pedantic = config['pedantic'] unless config['pedantic'].nil?
271
- return send_data(OK) unless Server.auth_required?
272
-
273
- EM.cancel_timer(@auth_pending)
274
- if Server.auth_ok?(config['user'], config['pass'])
275
- send_data(OK) if @verbose
276
- @auth_pending = nil
277
- else
278
- error_close AUTH_FAILED
279
- debug "Authorization failed for connection", cid
280
- end
281
- end
282
-
283
- def delete_subscriber(sub)
284
- ctrace('DELSUB OP', sub.subject, sub.qgroup, sub.sid) if NATSD::Server.trace_flag?
285
- Server.unsubscribe(sub)
286
- @subscriptions.delete(sub.sid)
287
- end
288
-
289
- def error_close(msg)
290
- send_data(msg)
291
- close_connection_after_writing
292
- @closing = true
293
- end
294
-
295
- def unbind
296
- debug "Client connection closed", client_info, cid
297
- # ctrace "Receive_Data called #{@receive_data_calls} times." if @receive_data_calls > 0
298
- @subscriptions.each_value { |sub| Server.unsubscribe(sub) }
299
- EM.cancel_timer(@auth_pending) if @auth_pending
300
- @auth_pending = nil
301
- end
302
-
303
- def ctrace(*args)
304
- trace(args, "c: #{cid}")
305
- end
306
209
 
307
- def strip_op(op='')
308
- op.dup.sub(CR_LF, EMPTY)
309
210
  end
310
211
  end
311
212
 
@@ -20,6 +20,8 @@ class Sublist #:nodoc:
20
20
  SublistNode = Struct.new(:leaf_nodes, :next_level)
21
21
  SublistLevel = Struct.new(:nodes, :pwc, :fwc)
22
22
 
23
+ EMPTY_LEVEL = SublistLevel.new({})
24
+
23
25
  def initialize(options = {})
24
26
  @count = 0
25
27
  @results = []
@@ -41,6 +43,13 @@ class Sublist #:nodoc:
41
43
  def enable_cache; @cache ||= {}; end
42
44
  def clear_cache; @cache = {} if @cache; end
43
45
 
46
+ # Random removal
47
+ def prune_cache
48
+ return unless @cache
49
+ keys = @cache.keys
50
+ @cache.delete(keys[rand(keys.size)])
51
+ end
52
+
44
53
  # Insert a subscriber into the sublist for the given subject.
45
54
  def insert(subject, subscriber)
46
55
  # TODO - validate subject as correct.
@@ -57,25 +66,13 @@ class Sublist #:nodoc:
57
66
  node.leaf_nodes.push(subscriber)
58
67
  @count += 1
59
68
  clear_cache # Clear the cache
69
+ node.next_level = nil if node.next_level == EMPTY_LEVEL
60
70
  end
61
71
 
62
72
  # Remove a given subscriber from the sublist for the given subject.
63
73
  def remove(subject, subscriber)
64
- # TODO: implement (remember cache and count cleanup if applicable)
65
- # Reference counts and GC for long empty tree.
66
- level, tokens = @root, subject.split('.')
67
- for token in tokens
68
- next unless level
69
- case token
70
- when FWC then node = level.fwc
71
- when PWC then node = level.pwc
72
- else node = level.nodes[token]
73
- end
74
- level = node.next_level
75
- end
76
- # This could be expensize if a large number of subscribers exist.
77
- node.leaf_nodes.delete(subscriber) if (node && node.leaf_nodes)
78
- clear_cache # Clear the cache
74
+ return unless subject && subscriber
75
+ remove_level(@root, subject.split('.'), subscriber)
79
76
  end
80
77
 
81
78
  # Match a subject to all subscribers, return the array of matches.
@@ -86,7 +83,7 @@ class Sublist #:nodoc:
86
83
  matchAll(@root, tokens)
87
84
  # FIXME: This is too low tech, will revisit when needed.
88
85
  if @cache
89
- clear_cache if @cache.size > CACHE_SIZE
86
+ prune_cache if @cache.size > CACHE_SIZE
90
87
  @cache[subject] = Array.new(@results).freeze # Avoid tampering of copy
91
88
  end
92
89
  @results
@@ -113,4 +110,58 @@ class Sublist #:nodoc:
113
110
  @results.concat(node.leaf_nodes) if node
114
111
  end
115
112
 
113
+ def prune_level(level, node, token)
114
+ # Prune here if needed.
115
+ return unless level && node
116
+ return unless node.leaf_nodes.empty? && (!node.next_level || node.next_level == EMPTY_LEVEL)
117
+ if node == level.fwc
118
+ level.fwc = nil
119
+ elsif node == level.pwc
120
+ level.pwc = nil
121
+ else
122
+ level.nodes.delete(token)
123
+ end
124
+ end
125
+
126
+ def remove_level(level, tokens, subscriber)
127
+ return unless level
128
+ token = tokens.shift
129
+ case token
130
+ when FWC then node = level.fwc
131
+ when PWC then node = level.pwc
132
+ else node = level.nodes[token]
133
+ end
134
+ return unless node
135
+
136
+ # This could be expensive if a large number of subscribers exist.
137
+ if tokens.empty?
138
+ if (node.leaf_nodes && node.leaf_nodes.delete(subscriber))
139
+ @count -= 1
140
+ prune_level(level, node, token)
141
+ clear_cache # Clear the cache
142
+ end
143
+ else
144
+ remove_level(node.next_level, tokens, subscriber)
145
+ prune_level(level, node, token)
146
+ end
147
+ end
148
+
149
+ ################################################
150
+ # Used for tests on pruning subscription nodes.
151
+ ################################################
152
+
153
+ def node_count_level(level, nc)
154
+ return 0 unless level
155
+ nc += 1 if level.fwc
156
+ nc += node_count_level(level.pwc.next_level, nc+1) if level.pwc
157
+ level.nodes.each_value do |node|
158
+ nc += node_count_level(node.next_level, nc)
159
+ end
160
+ nc += level.nodes.length
161
+ end
162
+
163
+ def node_count
164
+ node_count_level(@root, 0)
165
+ end
166
+
116
167
  end
@@ -7,7 +7,7 @@ end
7
7
 
8
8
  def log(*args) #:nodoc:
9
9
  args.unshift(Time.now) if NATSD::Server.log_time
10
- pp args.compact
10
+ PP::pp(args.compact, $stdout, 120)
11
11
  end
12
12
 
13
13
  def debug(*args) #:nodoc:
@@ -22,13 +22,56 @@ def log_error(e=$!) #:nodoc:
22
22
  debug e, e.backtrace
23
23
  end
24
24
 
25
+ def uptime_string(delta)
26
+ num_seconds = delta.to_i
27
+ days = num_seconds / (60 * 60 * 24);
28
+ num_seconds -= days * (60 * 60 * 24);
29
+ hours = num_seconds / (60 * 60);
30
+ num_seconds -= hours * (60 * 60);
31
+ minutes = num_seconds / 60;
32
+ num_seconds -= minutes * 60;
33
+ "#{days}d:#{hours}h:#{minutes}m:#{num_seconds}s"
34
+ end
35
+
36
+ def pretty_size(size, prec=1)
37
+ return 'NA' unless size
38
+ return "#{size}B" if size < 1024
39
+ return sprintf("%.#{prec}fK", size/1024.0) if size < (1024*1024)
40
+ return sprintf("%.#{prec}fM", size/(1024.0*1024.0)) if size < (1024*1024*1024)
41
+ return sprintf("%.#{prec}fG", size/(1024.0*1024.0*1024.0))
42
+ end
43
+
44
+ def num_cpu_cores
45
+ if RUBY_PLATFORM =~ /linux/
46
+ return `cat /proc/cpuinfo | grep processor | wc -l`.to_i
47
+ elsif RUBY_PLATFORM =~ /darwin/
48
+ `sysctl -n hw.ncpu`.strip.to_i
49
+ elsif RUBY_PLATFORM =~ /freebsd|netbsd/
50
+ `sysctl hw.ncpu`.strip.to_i
51
+ else
52
+ return 1
53
+ end
54
+ end
55
+
25
56
  def shutdown #:nodoc:
26
57
  puts
27
58
  log 'Server exiting..'
28
59
  EM.stop
29
- FileUtils.rm(NATSD::Server.pid_file) if NATSD::Server.pid_file
60
+ if NATSD::Server.pid_file
61
+ FileUtils.rm(NATSD::Server.pid_file) if File.exists? NATSD::Server.pid_file
62
+ end
30
63
  exit
31
64
  end
32
65
 
33
66
  ['TERM','INT'].each { |s| trap(s) { shutdown } }
34
67
 
68
+ # FIXME - Should probably be smarter when lots of connections
69
+ def dump_connection_state
70
+ log "Dumping connection state on SIG_USR2"
71
+ ObjectSpace.each_object(NATSD::Connection) do |c|
72
+ log c.info unless c.closing?
73
+ end
74
+ log 'Connection Dump Complete'
75
+ end
76
+
77
+ trap('USR2') { dump_connection_state }