ustate-client 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +21 -0
- data/README.markdown +111 -0
- data/lib/ustate/client/query.rb +17 -0
- data/lib/ustate/client.rb +108 -0
- data/lib/ustate/dash/config.rb +0 -0
- data/lib/ustate/dash/controller/css.rb +5 -0
- data/lib/ustate/dash/controller/index.rb +5 -0
- data/lib/ustate/dash/helper/renderer.rb +209 -0
- data/lib/ustate/dash/state.rb +75 -0
- data/lib/ustate/dash/views/css.scss +39 -0
- data/lib/ustate/dash/views/index.erubis +3 -0
- data/lib/ustate/dash/views/layout.erubis +16 -0
- data/lib/ustate/dash.rb +111 -0
- data/lib/ustate/message.rb +16 -0
- data/lib/ustate/query.rb +7 -0
- data/lib/ustate/query_string.rb +1066 -0
- data/lib/ustate/query_string.treetop +116 -0
- data/lib/ustate/server/backends/base.rb +103 -0
- data/lib/ustate/server/backends/tcp.rb +33 -0
- data/lib/ustate/server/backends.rb +4 -0
- data/lib/ustate/server/connection.rb +82 -0
- data/lib/ustate/server/graphite.rb +11 -0
- data/lib/ustate/server/index.rb +134 -0
- data/lib/ustate/server.rb +66 -0
- data/lib/ustate/state.rb +18 -0
- data/lib/ustate/version.rb +3 -0
- data/lib/ustate.rb +9 -0
- metadata +71 -0
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2011 Kyle Kingsbury
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
Overview
|
2
|
+
======
|
3
|
+
|
4
|
+
UState ("United States", "microstate", etc.) is a state aggregation daemon. It
|
5
|
+
accepts a stream of state transitions and maintains an index of service states,
|
6
|
+
which can be queried or forwarded to various handlers. A state is simply:
|
7
|
+
|
8
|
+
state {
|
9
|
+
host: A hostname, e.g. "api1", "foo.com",
|
10
|
+
service: e.g. "API port 8000 reqs/sec",
|
11
|
+
state: Any string less than 255 bytes, e.g. "ok", "warning", "critical",
|
12
|
+
time: The time that the service entered this state, in unix time,
|
13
|
+
description: Freeform text,
|
14
|
+
metric_f: A floating-point number associated with this state, e.g. the number of reqs/sec
|
15
|
+
}
|
16
|
+
|
17
|
+
At http://showyou.com, we use UState to monitor the health and performance of hundreds of services across our infrastructure, including CPU, queries/second, latency bounds, disk usage, queues, and others.
|
18
|
+
|
19
|
+
UState also includes a simple dashboard Sinatra app.
|
20
|
+
|
21
|
+
Installing
|
22
|
+
==========
|
23
|
+
|
24
|
+
git clone git://github.com/aphyr/ustate.git
|
25
|
+
|
26
|
+
or
|
27
|
+
|
28
|
+
gem install ustate-client
|
29
|
+
|
30
|
+
For the client:
|
31
|
+
|
32
|
+
gem install beefcake trollop
|
33
|
+
|
34
|
+
For the server:
|
35
|
+
|
36
|
+
gem install treetop eventmachine sequel sqlite3 trollop beefcake
|
37
|
+
|
38
|
+
For the dashboard:
|
39
|
+
|
40
|
+
gem install sinatra thin erubis sass
|
41
|
+
|
42
|
+
Getting started
|
43
|
+
===============
|
44
|
+
|
45
|
+
To try it out, install all the gems above, and clone the repository. Start the server with
|
46
|
+
|
47
|
+
bin/server [--host host] [--port port]
|
48
|
+
|
49
|
+
UState listens on TCP socket host:port, and accepts connections from clients. Start a basic testing client with
|
50
|
+
|
51
|
+
bin/test
|
52
|
+
|
53
|
+
The tester spews randomly generated statistics at a server on the default local host and port. To see it in action, run the dashboard:
|
54
|
+
|
55
|
+
cd lib/ustate/dash
|
56
|
+
../../../bin/dash
|
57
|
+
|
58
|
+
|
59
|
+
The Dashboard
|
60
|
+
=============
|
61
|
+
|
62
|
+
The dashboard runs a file in the local directory: config.rb. That file can
|
63
|
+
override any configuration options on the Dash class (hence all Sinatra
|
64
|
+
configuration) as well as the Ustate client, etc.
|
65
|
+
|
66
|
+
set :port, 6000 # HTTP server on port 6000
|
67
|
+
config[:client][:host] = 'my.ustate.server'
|
68
|
+
|
69
|
+
It also loads views from the local directory. Sinatra makes it awkward to
|
70
|
+
compose multiple view directories, so you'll probably want to create your own
|
71
|
+
view/ and config.rb. I've provided an example stylesheet, layout, and dashboard
|
72
|
+
in lib/ustate/dash/views--as well as an extensive set of functions for laying
|
73
|
+
out states corresponding to any query: see lib/ustate/dash/helper/renderer.rb.
|
74
|
+
The way I figure, you're almost certainly going to want to write your own, so
|
75
|
+
I'm going to give you the tools you need, and get out of your way.
|
76
|
+
|
77
|
+
Protocol
|
78
|
+
========
|
79
|
+
|
80
|
+
A connection to UState is a stream of messages. Each message is a 4 byte
|
81
|
+
network-endian integer *length*, followed by a Procol Buffers Message of
|
82
|
+
*length* bytes. See lib/ustate/message.rb for the protobuf particulars.
|
83
|
+
|
84
|
+
The server will accept a repeated list of States, and respond with a
|
85
|
+
confirmation message with either an acknowledgement or an error. Check the
|
86
|
+
success boolean in the Message.
|
87
|
+
|
88
|
+
You can also query states using a very basic expression language. The grammar is specified as a Parsable Expression Grammar in query_string.treetop. Examples include:
|
89
|
+
|
90
|
+
state = "ok"
|
91
|
+
(service =~ "disk%") or (state == "critical" and host =~ "%.trioptimum.com")
|
92
|
+
|
93
|
+
Search queries will return a message with repeated States matching that expression. An empty expression matches all states.
|
94
|
+
|
95
|
+
Performance
|
96
|
+
===========
|
97
|
+
|
98
|
+
It's Ruby. It ain't gonna be fast. However, on my 4-year-old core 2 duo, I see >600 inserts/sec or queries/sec. The client is fully threadsafe, and performs well concurrently. I will continue to tune UState for latency and throughput, and welcome patches.
|
99
|
+
|
100
|
+
For large installations, I plan to implement a selective forwarder. Local ustate servers can accept high volumes of states from a small set of nodes, and forward updates at a larger granularity to supervisors, and so forth, in a tree. The query language should be able to support proxying requests to the most recent source of a state, so very large sets of services can be maintained at high granularity.
|
101
|
+
|
102
|
+
Goals
|
103
|
+
=====
|
104
|
+
|
105
|
+
Immediately, I'll be porting our internal email alerter to UState. Users register for interest in certain types of states or transitions, and receive emails when those events occur.
|
106
|
+
|
107
|
+
In the medium term, I'll be connecting UState to Graphite (or perhaps another
|
108
|
+
graphing tool) for metrics archival and soft-realtime graphs. I have an
|
109
|
+
internal gnuplot system which is clunky and deserves retirement.
|
110
|
+
|
111
|
+
When the protocol and architecture are finalized, I plan to reimplement the server in a faster language.
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module UState
|
2
|
+
class Client::Query
|
3
|
+
# Little query builder
|
4
|
+
def initialize
|
5
|
+
@predicate = nil
|
6
|
+
end
|
7
|
+
|
8
|
+
def method_missing(field)
|
9
|
+
field = field.to_sym
|
10
|
+
beefcake_field = UState::Query.fields.find { |f|
|
11
|
+
f.name == field
|
12
|
+
}
|
13
|
+
raise ArgumentError, "no such field #{field.inspect}" unless beefcake_field
|
14
|
+
beefcake_field.type::Proxy
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
class UState::Client
|
2
|
+
class Error < RuntimeError; end
|
3
|
+
class InvalidResponse < Error; end
|
4
|
+
class ServerError < Error; end
|
5
|
+
|
6
|
+
require 'thread'
|
7
|
+
require 'socket'
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
HOST = '127.0.0.1'
|
11
|
+
PORT = 55956
|
12
|
+
|
13
|
+
TYPE_STATE = 1
|
14
|
+
|
15
|
+
attr_accessor :host, :port, :socket
|
16
|
+
|
17
|
+
def initialize(opts = {})
|
18
|
+
@host = opts[:host] || HOST
|
19
|
+
@port = opts[:port] || PORT
|
20
|
+
@locket = Mutex.new
|
21
|
+
end
|
22
|
+
|
23
|
+
# Send a state
|
24
|
+
def <<(state_opts)
|
25
|
+
# Create state
|
26
|
+
state = UState::State.new(state_opts)
|
27
|
+
state.time ||= Time.now.utc.to_i
|
28
|
+
state.host ||= Socket.gethostname
|
29
|
+
|
30
|
+
message = UState::Message.new :states => [state]
|
31
|
+
|
32
|
+
# Transmit
|
33
|
+
with_connection do |s|
|
34
|
+
s << message.encode_with_length
|
35
|
+
read_message s
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def connect
|
40
|
+
@socket = TCPSocket.new(@host, @port)
|
41
|
+
end
|
42
|
+
|
43
|
+
def close
|
44
|
+
@locket.synchronize do
|
45
|
+
@socket.close
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def connected?
|
50
|
+
not @socket.closed?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Ask for states
|
54
|
+
def query(string = nil)
|
55
|
+
message = UState::Message.new query: UState::Query.new(string: string)
|
56
|
+
with_connection do |s|
|
57
|
+
s << message.encode_with_length
|
58
|
+
read_message s
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Read a message from a stream
|
63
|
+
def read_message(s)
|
64
|
+
if buffer = s.read(4) and buffer.size == 4
|
65
|
+
length = buffer.unpack('N').first
|
66
|
+
begin
|
67
|
+
str = s.read length
|
68
|
+
message = UState::Message.decode str
|
69
|
+
rescue => e
|
70
|
+
puts "Message was #{str.inspect}"
|
71
|
+
raise
|
72
|
+
end
|
73
|
+
|
74
|
+
unless message.ok
|
75
|
+
puts "Failed"
|
76
|
+
raise ServerError, message.error
|
77
|
+
end
|
78
|
+
|
79
|
+
message
|
80
|
+
else
|
81
|
+
raise InvalidResponse, "unexpected EOF"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Yields a connection in the block.
|
86
|
+
def with_connection
|
87
|
+
tries = 0
|
88
|
+
|
89
|
+
@locket.synchronize do
|
90
|
+
begin
|
91
|
+
tries += 1
|
92
|
+
yield (@socket or connect)
|
93
|
+
rescue IOError => e
|
94
|
+
raise if tries > 3
|
95
|
+
connect and retry
|
96
|
+
rescue Errno::EPIPE => e
|
97
|
+
raise if tries > 3
|
98
|
+
connect and retry
|
99
|
+
rescue Errno::ECONNREFUSED => e
|
100
|
+
raise if tries > 3
|
101
|
+
connect and retry
|
102
|
+
rescue Errno::ECONNRESET => e
|
103
|
+
raise if tries > 3
|
104
|
+
connect and retry
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
File without changes
|
@@ -0,0 +1,209 @@
|
|
1
|
+
module UState
|
2
|
+
class Dash
|
3
|
+
helpers do
|
4
|
+
include Rack::Utils
|
5
|
+
|
6
|
+
alias_method :h, :escape_html
|
7
|
+
|
8
|
+
# Returns a scalar factor from 0.2 to 1, where 0.2 is "on the order of
|
9
|
+
# age_scale ago", and 1 is "very recent"
|
10
|
+
def age_fraction(time)
|
11
|
+
return 1 if time.nil?
|
12
|
+
|
13
|
+
x = 1 - ((Time.now.to_f - time) / Dash.config[:age_scale])
|
14
|
+
if x < 0.2
|
15
|
+
0.2
|
16
|
+
elsif x > 1
|
17
|
+
1
|
18
|
+
else
|
19
|
+
x
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Finds the longest common prefix of a list of strings.
|
24
|
+
# i.e. 'abc, 'ab', 'abdf' => 'ab'
|
25
|
+
def longest_common_prefix(strings, prefix = '')
|
26
|
+
return strings.first if strings.size <= 1
|
27
|
+
|
28
|
+
first = strings[0][0,1] or return prefix
|
29
|
+
tails = strings[1..-1].inject([strings[0][1..-1]]) do |tails, string|
|
30
|
+
if string[0,1] != first
|
31
|
+
return prefix
|
32
|
+
else
|
33
|
+
tails << string[1..-1]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
longest_common_prefix(tails, prefix + first)
|
38
|
+
end
|
39
|
+
|
40
|
+
# An overview of states
|
41
|
+
def state_list(states)
|
42
|
+
ul(states.map { |s| state_short s })
|
43
|
+
end
|
44
|
+
|
45
|
+
def state_grid(states = Dash.client.query)
|
46
|
+
h2('States by Host') +
|
47
|
+
table(
|
48
|
+
*State.partition(states, :host).map do |host, states|
|
49
|
+
tr(
|
50
|
+
th(host, class: 'host'),
|
51
|
+
*State.sort(states, :service).map do |state|
|
52
|
+
state_short state
|
53
|
+
end
|
54
|
+
)
|
55
|
+
end
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Renders a state as the given HTML tag with a % width corresponding to
|
60
|
+
# metric / max.
|
61
|
+
def state_bar(s, opts = {})
|
62
|
+
opts = {tag: 'div', max: 1}.merge opts
|
63
|
+
|
64
|
+
return '' unless s
|
65
|
+
x = s.metric
|
66
|
+
|
67
|
+
# Text
|
68
|
+
text = case x
|
69
|
+
when Float
|
70
|
+
'%.2f' % x
|
71
|
+
when Integer
|
72
|
+
x.to_s
|
73
|
+
else
|
74
|
+
'?'
|
75
|
+
end
|
76
|
+
|
77
|
+
# Size
|
78
|
+
size = begin
|
79
|
+
(x || 0) * 100 / opts[:max]
|
80
|
+
rescue ZeroDivisionError
|
81
|
+
0
|
82
|
+
end
|
83
|
+
|
84
|
+
tag opts[:tag], h(text),
|
85
|
+
:class => "state #{s.state}",
|
86
|
+
style: "opacity: #{age_fraction s.time}; width: #{size}%",
|
87
|
+
title: s.description
|
88
|
+
end
|
89
|
+
|
90
|
+
# Renders a set of states in a chart. Each row is a given host, each
|
91
|
+
# service is a column. Each state is shown as a bar with an inferred
|
92
|
+
# maximum for the entire service, so you can readily compare multiple
|
93
|
+
# hosts.
|
94
|
+
#
|
95
|
+
# Takes a a set of states and options:
|
96
|
+
# title: the title of the chart. Inferred to be the longest common
|
97
|
+
# prefix of all services.
|
98
|
+
# maxima: maps each service to the maximum value used to display its
|
99
|
+
# bar.
|
100
|
+
# service_names: maps each service to a friendly name. Default service
|
101
|
+
# names have common prefixes removed.
|
102
|
+
# hosts: an array of hosts for rows. Default is every host present in
|
103
|
+
# states, sorted.
|
104
|
+
def state_chart(states, opts = {})
|
105
|
+
o = {
|
106
|
+
:maxima => {},
|
107
|
+
:service_names => {}
|
108
|
+
}.merge opts
|
109
|
+
|
110
|
+
# Get all services
|
111
|
+
services = states.map { |s| s.service }.compact.uniq.sort
|
112
|
+
|
113
|
+
# Figure out what name to use for each service.
|
114
|
+
prefix = longest_common_prefix services
|
115
|
+
service_names = services.inject({}) do |names, service|
|
116
|
+
names[service] = service[prefix.length..-1]
|
117
|
+
names
|
118
|
+
end.merge o[:service_names]
|
119
|
+
|
120
|
+
# Compute maximum for each service
|
121
|
+
maxima = states.inject(Hash.new(0)) do |m, s|
|
122
|
+
m[s.service] = [s.metric, m[s.service]].max
|
123
|
+
m
|
124
|
+
end.merge o[:maxima]
|
125
|
+
|
126
|
+
# Compute union of all hosts for these states, if no
|
127
|
+
# list of hosts explicitly given.
|
128
|
+
hosts = o[:hosts] || states.map do |state|
|
129
|
+
state.host
|
130
|
+
end.compact
|
131
|
+
hosts = hosts.uniq.sort
|
132
|
+
|
133
|
+
# Construct index
|
134
|
+
by = states.inject({}) do |index, s|
|
135
|
+
index[[s.host, s.service]] = s
|
136
|
+
index
|
137
|
+
end
|
138
|
+
|
139
|
+
# Title
|
140
|
+
title = o[:title] || prefix.capitalize rescue 'Unknown'
|
141
|
+
|
142
|
+
h2(title) +
|
143
|
+
table(
|
144
|
+
tr(
|
145
|
+
th,
|
146
|
+
*services.map do |service|
|
147
|
+
th service_names[service]
|
148
|
+
end
|
149
|
+
),
|
150
|
+
*hosts.map do |host|
|
151
|
+
tr(
|
152
|
+
th(host),
|
153
|
+
*services.map do |service|
|
154
|
+
s = by[[host, service]]
|
155
|
+
td(
|
156
|
+
s ? state_bar(s, max: maxima[service]) : nil
|
157
|
+
)
|
158
|
+
end
|
159
|
+
)
|
160
|
+
end,
|
161
|
+
:class => 'chart'
|
162
|
+
)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Renders a state as a short tag.
|
166
|
+
def state_short(s, opts={tag: 'li'})
|
167
|
+
if s
|
168
|
+
"<#{opts[:tag]} class=\"state #{s.state}\" style=\"opacity: #{age_fraction s.time}\" title=\"#{h s.description}\">#{h s.host} #{h s.service}</#{opts[:tag]}>"
|
169
|
+
else
|
170
|
+
"<#{opts[:tag]} class=\"service\"></#{opts[:tag]}>"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# Renders a time to an HTML tag.
|
175
|
+
def time(unix)
|
176
|
+
t = Time.at(unix)
|
177
|
+
"<time datetime=\"#{t.iso8601}\">#{t.strftime(Dash.config[:strftime])}</time>"
|
178
|
+
end
|
179
|
+
|
180
|
+
# Renders an HTML tag
|
181
|
+
def tag(tag, *a)
|
182
|
+
if Hash === a.last
|
183
|
+
opts = a.pop
|
184
|
+
else
|
185
|
+
opts = {}
|
186
|
+
end
|
187
|
+
|
188
|
+
attrs = opts.map do |k,v|
|
189
|
+
"#{k}=\"#{h v}\""
|
190
|
+
end.join ' '
|
191
|
+
|
192
|
+
content = if block_given?
|
193
|
+
a << yield
|
194
|
+
else
|
195
|
+
a
|
196
|
+
end.flatten.join("\n")
|
197
|
+
|
198
|
+
s = "<#{tag} #{attrs}>#{content}</#{tag}>"
|
199
|
+
end
|
200
|
+
|
201
|
+
# Specific tag aliases
|
202
|
+
%w(div span h1 h2 h3 h4 h5 h6 ul ol li table th tr td u i b).each do |tag|
|
203
|
+
class_eval "def #{tag}(*a, &block)
|
204
|
+
tag #{tag.inspect}, *a, &block
|
205
|
+
end"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|