ustate-client 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +21 -0
- data/README.markdown +111 -0
- data/lib/ustate/client/query.rb +17 -0
- data/lib/ustate/client.rb +108 -0
- data/lib/ustate/dash/config.rb +0 -0
- data/lib/ustate/dash/controller/css.rb +5 -0
- data/lib/ustate/dash/controller/index.rb +5 -0
- data/lib/ustate/dash/helper/renderer.rb +209 -0
- data/lib/ustate/dash/state.rb +75 -0
- data/lib/ustate/dash/views/css.scss +39 -0
- data/lib/ustate/dash/views/index.erubis +3 -0
- data/lib/ustate/dash/views/layout.erubis +16 -0
- data/lib/ustate/dash.rb +111 -0
- data/lib/ustate/message.rb +16 -0
- data/lib/ustate/query.rb +7 -0
- data/lib/ustate/query_string.rb +1066 -0
- data/lib/ustate/query_string.treetop +116 -0
- data/lib/ustate/server/backends/base.rb +103 -0
- data/lib/ustate/server/backends/tcp.rb +33 -0
- data/lib/ustate/server/backends.rb +4 -0
- data/lib/ustate/server/connection.rb +82 -0
- data/lib/ustate/server/graphite.rb +11 -0
- data/lib/ustate/server/index.rb +134 -0
- data/lib/ustate/server.rb +66 -0
- data/lib/ustate/state.rb +18 -0
- data/lib/ustate/version.rb +3 -0
- data/lib/ustate.rb +9 -0
- metadata +71 -0
data/LICENSE
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License
|
2
|
+
|
3
|
+
Copyright (c) 2011 Kyle Kingsbury
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.markdown
ADDED
@@ -0,0 +1,111 @@
|
|
1
|
+
Overview
|
2
|
+
======
|
3
|
+
|
4
|
+
UState ("United States", "microstate", etc.) is a state aggregation daemon. It
|
5
|
+
accepts a stream of state transitions and maintains an index of service states,
|
6
|
+
which can be queried or forwarded to various handlers. A state is simply:
|
7
|
+
|
8
|
+
state {
|
9
|
+
host: A hostname, e.g. "api1", "foo.com",
|
10
|
+
service: e.g. "API port 8000 reqs/sec",
|
11
|
+
state: Any string less than 255 bytes, e.g. "ok", "warning", "critical",
|
12
|
+
time: The time that the service entered this state, in unix time,
|
13
|
+
description: Freeform text,
|
14
|
+
metric_f: A floating-point number associated with this state, e.g. the number of reqs/sec
|
15
|
+
}
|
16
|
+
|
17
|
+
At http://showyou.com, we use UState to monitor the health and performance of hundreds of services across our infrastructure, including CPU, queries/second, latency bounds, disk usage, queues, and others.
|
18
|
+
|
19
|
+
UState also includes a simple dashboard Sinatra app.
|
20
|
+
|
21
|
+
Installing
|
22
|
+
==========
|
23
|
+
|
24
|
+
git clone git://github.com/aphyr/ustate.git
|
25
|
+
|
26
|
+
or
|
27
|
+
|
28
|
+
gem install ustate-client
|
29
|
+
|
30
|
+
For the client:
|
31
|
+
|
32
|
+
gem install beefcake trollop
|
33
|
+
|
34
|
+
For the server:
|
35
|
+
|
36
|
+
gem install treetop eventmachine sequel sqlite3 trollop beefcake
|
37
|
+
|
38
|
+
For the dashboard:
|
39
|
+
|
40
|
+
gem install sinatra thin erubis sass
|
41
|
+
|
42
|
+
Getting started
|
43
|
+
===============
|
44
|
+
|
45
|
+
To try it out, install all the gems above, and clone the repository. Start the server with
|
46
|
+
|
47
|
+
bin/server [--host host] [--port port]
|
48
|
+
|
49
|
+
UState listens on TCP socket host:port, and accepts connections from clients. Start a basic testing client with
|
50
|
+
|
51
|
+
bin/test
|
52
|
+
|
53
|
+
The tester spews randomly generated statistics at a server on the default local host and port. To see it in action, run the dashboard:
|
54
|
+
|
55
|
+
cd lib/ustate/dash
|
56
|
+
../../../bin/dash
|
57
|
+
|
58
|
+
|
59
|
+
The Dashboard
|
60
|
+
=============
|
61
|
+
|
62
|
+
The dashboard runs a file in the local directory: config.rb. That file can
|
63
|
+
override any configuration options on the Dash class (hence all Sinatra
|
64
|
+
configuration) as well as the Ustate client, etc.
|
65
|
+
|
66
|
+
set :port, 6000 # HTTP server on port 6000
|
67
|
+
config[:client][:host] = 'my.ustate.server'
|
68
|
+
|
69
|
+
It also loads views from the local directory. Sinatra makes it awkward to
|
70
|
+
compose multiple view directories, so you'll probably want to create your own
|
71
|
+
view/ and config.rb. I've provided an example stylesheet, layout, and dashboard
|
72
|
+
in lib/ustate/dash/views--as well as an extensive set of functions for laying
|
73
|
+
out states corresponding to any query: see lib/ustate/dash/helper/renderer.rb.
|
74
|
+
The way I figure, you're almost certainly going to want to write your own, so
|
75
|
+
I'm going to give you the tools you need, and get out of your way.
|
76
|
+
|
77
|
+
Protocol
|
78
|
+
========
|
79
|
+
|
80
|
+
A connection to UState is a stream of messages. Each message is a 4 byte
|
81
|
+
network-endian integer *length*, followed by a Procol Buffers Message of
|
82
|
+
*length* bytes. See lib/ustate/message.rb for the protobuf particulars.
|
83
|
+
|
84
|
+
The server will accept a repeated list of States, and respond with a
|
85
|
+
confirmation message with either an acknowledgement or an error. Check the
|
86
|
+
success boolean in the Message.
|
87
|
+
|
88
|
+
You can also query states using a very basic expression language. The grammar is specified as a Parsable Expression Grammar in query_string.treetop. Examples include:
|
89
|
+
|
90
|
+
state = "ok"
|
91
|
+
(service =~ "disk%") or (state == "critical" and host =~ "%.trioptimum.com")
|
92
|
+
|
93
|
+
Search queries will return a message with repeated States matching that expression. An empty expression matches all states.
|
94
|
+
|
95
|
+
Performance
|
96
|
+
===========
|
97
|
+
|
98
|
+
It's Ruby. It ain't gonna be fast. However, on my 4-year-old core 2 duo, I see >600 inserts/sec or queries/sec. The client is fully threadsafe, and performs well concurrently. I will continue to tune UState for latency and throughput, and welcome patches.
|
99
|
+
|
100
|
+
For large installations, I plan to implement a selective forwarder. Local ustate servers can accept high volumes of states from a small set of nodes, and forward updates at a larger granularity to supervisors, and so forth, in a tree. The query language should be able to support proxying requests to the most recent source of a state, so very large sets of services can be maintained at high granularity.
|
101
|
+
|
102
|
+
Goals
|
103
|
+
=====
|
104
|
+
|
105
|
+
Immediately, I'll be porting our internal email alerter to UState. Users register for interest in certain types of states or transitions, and receive emails when those events occur.
|
106
|
+
|
107
|
+
In the medium term, I'll be connecting UState to Graphite (or perhaps another
|
108
|
+
graphing tool) for metrics archival and soft-realtime graphs. I have an
|
109
|
+
internal gnuplot system which is clunky and deserves retirement.
|
110
|
+
|
111
|
+
When the protocol and architecture are finalized, I plan to reimplement the server in a faster language.
|
@@ -0,0 +1,17 @@
|
|
1
|
+
module UState
|
2
|
+
class Client::Query
|
3
|
+
# Little query builder
|
4
|
+
def initialize
|
5
|
+
@predicate = nil
|
6
|
+
end
|
7
|
+
|
8
|
+
def method_missing(field)
|
9
|
+
field = field.to_sym
|
10
|
+
beefcake_field = UState::Query.fields.find { |f|
|
11
|
+
f.name == field
|
12
|
+
}
|
13
|
+
raise ArgumentError, "no such field #{field.inspect}" unless beefcake_field
|
14
|
+
beefcake_field.type::Proxy
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
class UState::Client
|
2
|
+
class Error < RuntimeError; end
|
3
|
+
class InvalidResponse < Error; end
|
4
|
+
class ServerError < Error; end
|
5
|
+
|
6
|
+
require 'thread'
|
7
|
+
require 'socket'
|
8
|
+
require 'time'
|
9
|
+
|
10
|
+
HOST = '127.0.0.1'
|
11
|
+
PORT = 55956
|
12
|
+
|
13
|
+
TYPE_STATE = 1
|
14
|
+
|
15
|
+
attr_accessor :host, :port, :socket
|
16
|
+
|
17
|
+
def initialize(opts = {})
|
18
|
+
@host = opts[:host] || HOST
|
19
|
+
@port = opts[:port] || PORT
|
20
|
+
@locket = Mutex.new
|
21
|
+
end
|
22
|
+
|
23
|
+
# Send a state
|
24
|
+
def <<(state_opts)
|
25
|
+
# Create state
|
26
|
+
state = UState::State.new(state_opts)
|
27
|
+
state.time ||= Time.now.utc.to_i
|
28
|
+
state.host ||= Socket.gethostname
|
29
|
+
|
30
|
+
message = UState::Message.new :states => [state]
|
31
|
+
|
32
|
+
# Transmit
|
33
|
+
with_connection do |s|
|
34
|
+
s << message.encode_with_length
|
35
|
+
read_message s
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
def connect
|
40
|
+
@socket = TCPSocket.new(@host, @port)
|
41
|
+
end
|
42
|
+
|
43
|
+
def close
|
44
|
+
@locket.synchronize do
|
45
|
+
@socket.close
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def connected?
|
50
|
+
not @socket.closed?
|
51
|
+
end
|
52
|
+
|
53
|
+
# Ask for states
|
54
|
+
def query(string = nil)
|
55
|
+
message = UState::Message.new query: UState::Query.new(string: string)
|
56
|
+
with_connection do |s|
|
57
|
+
s << message.encode_with_length
|
58
|
+
read_message s
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# Read a message from a stream
|
63
|
+
def read_message(s)
|
64
|
+
if buffer = s.read(4) and buffer.size == 4
|
65
|
+
length = buffer.unpack('N').first
|
66
|
+
begin
|
67
|
+
str = s.read length
|
68
|
+
message = UState::Message.decode str
|
69
|
+
rescue => e
|
70
|
+
puts "Message was #{str.inspect}"
|
71
|
+
raise
|
72
|
+
end
|
73
|
+
|
74
|
+
unless message.ok
|
75
|
+
puts "Failed"
|
76
|
+
raise ServerError, message.error
|
77
|
+
end
|
78
|
+
|
79
|
+
message
|
80
|
+
else
|
81
|
+
raise InvalidResponse, "unexpected EOF"
|
82
|
+
end
|
83
|
+
end
|
84
|
+
|
85
|
+
# Yields a connection in the block.
|
86
|
+
def with_connection
|
87
|
+
tries = 0
|
88
|
+
|
89
|
+
@locket.synchronize do
|
90
|
+
begin
|
91
|
+
tries += 1
|
92
|
+
yield (@socket or connect)
|
93
|
+
rescue IOError => e
|
94
|
+
raise if tries > 3
|
95
|
+
connect and retry
|
96
|
+
rescue Errno::EPIPE => e
|
97
|
+
raise if tries > 3
|
98
|
+
connect and retry
|
99
|
+
rescue Errno::ECONNREFUSED => e
|
100
|
+
raise if tries > 3
|
101
|
+
connect and retry
|
102
|
+
rescue Errno::ECONNRESET => e
|
103
|
+
raise if tries > 3
|
104
|
+
connect and retry
|
105
|
+
end
|
106
|
+
end
|
107
|
+
end
|
108
|
+
end
|
File without changes
|
@@ -0,0 +1,209 @@
|
|
1
|
+
module UState
|
2
|
+
class Dash
|
3
|
+
helpers do
|
4
|
+
include Rack::Utils
|
5
|
+
|
6
|
+
alias_method :h, :escape_html
|
7
|
+
|
8
|
+
# Returns a scalar factor from 0.2 to 1, where 0.2 is "on the order of
|
9
|
+
# age_scale ago", and 1 is "very recent"
|
10
|
+
def age_fraction(time)
|
11
|
+
return 1 if time.nil?
|
12
|
+
|
13
|
+
x = 1 - ((Time.now.to_f - time) / Dash.config[:age_scale])
|
14
|
+
if x < 0.2
|
15
|
+
0.2
|
16
|
+
elsif x > 1
|
17
|
+
1
|
18
|
+
else
|
19
|
+
x
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Finds the longest common prefix of a list of strings.
|
24
|
+
# i.e. 'abc, 'ab', 'abdf' => 'ab'
|
25
|
+
def longest_common_prefix(strings, prefix = '')
|
26
|
+
return strings.first if strings.size <= 1
|
27
|
+
|
28
|
+
first = strings[0][0,1] or return prefix
|
29
|
+
tails = strings[1..-1].inject([strings[0][1..-1]]) do |tails, string|
|
30
|
+
if string[0,1] != first
|
31
|
+
return prefix
|
32
|
+
else
|
33
|
+
tails << string[1..-1]
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
longest_common_prefix(tails, prefix + first)
|
38
|
+
end
|
39
|
+
|
40
|
+
# An overview of states
|
41
|
+
def state_list(states)
|
42
|
+
ul(states.map { |s| state_short s })
|
43
|
+
end
|
44
|
+
|
45
|
+
def state_grid(states = Dash.client.query)
|
46
|
+
h2('States by Host') +
|
47
|
+
table(
|
48
|
+
*State.partition(states, :host).map do |host, states|
|
49
|
+
tr(
|
50
|
+
th(host, class: 'host'),
|
51
|
+
*State.sort(states, :service).map do |state|
|
52
|
+
state_short state
|
53
|
+
end
|
54
|
+
)
|
55
|
+
end
|
56
|
+
)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Renders a state as the given HTML tag with a % width corresponding to
|
60
|
+
# metric / max.
|
61
|
+
def state_bar(s, opts = {})
|
62
|
+
opts = {tag: 'div', max: 1}.merge opts
|
63
|
+
|
64
|
+
return '' unless s
|
65
|
+
x = s.metric
|
66
|
+
|
67
|
+
# Text
|
68
|
+
text = case x
|
69
|
+
when Float
|
70
|
+
'%.2f' % x
|
71
|
+
when Integer
|
72
|
+
x.to_s
|
73
|
+
else
|
74
|
+
'?'
|
75
|
+
end
|
76
|
+
|
77
|
+
# Size
|
78
|
+
size = begin
|
79
|
+
(x || 0) * 100 / opts[:max]
|
80
|
+
rescue ZeroDivisionError
|
81
|
+
0
|
82
|
+
end
|
83
|
+
|
84
|
+
tag opts[:tag], h(text),
|
85
|
+
:class => "state #{s.state}",
|
86
|
+
style: "opacity: #{age_fraction s.time}; width: #{size}%",
|
87
|
+
title: s.description
|
88
|
+
end
|
89
|
+
|
90
|
+
# Renders a set of states in a chart. Each row is a given host, each
|
91
|
+
# service is a column. Each state is shown as a bar with an inferred
|
92
|
+
# maximum for the entire service, so you can readily compare multiple
|
93
|
+
# hosts.
|
94
|
+
#
|
95
|
+
# Takes a a set of states and options:
|
96
|
+
# title: the title of the chart. Inferred to be the longest common
|
97
|
+
# prefix of all services.
|
98
|
+
# maxima: maps each service to the maximum value used to display its
|
99
|
+
# bar.
|
100
|
+
# service_names: maps each service to a friendly name. Default service
|
101
|
+
# names have common prefixes removed.
|
102
|
+
# hosts: an array of hosts for rows. Default is every host present in
|
103
|
+
# states, sorted.
|
104
|
+
def state_chart(states, opts = {})
|
105
|
+
o = {
|
106
|
+
:maxima => {},
|
107
|
+
:service_names => {}
|
108
|
+
}.merge opts
|
109
|
+
|
110
|
+
# Get all services
|
111
|
+
services = states.map { |s| s.service }.compact.uniq.sort
|
112
|
+
|
113
|
+
# Figure out what name to use for each service.
|
114
|
+
prefix = longest_common_prefix services
|
115
|
+
service_names = services.inject({}) do |names, service|
|
116
|
+
names[service] = service[prefix.length..-1]
|
117
|
+
names
|
118
|
+
end.merge o[:service_names]
|
119
|
+
|
120
|
+
# Compute maximum for each service
|
121
|
+
maxima = states.inject(Hash.new(0)) do |m, s|
|
122
|
+
m[s.service] = [s.metric, m[s.service]].max
|
123
|
+
m
|
124
|
+
end.merge o[:maxima]
|
125
|
+
|
126
|
+
# Compute union of all hosts for these states, if no
|
127
|
+
# list of hosts explicitly given.
|
128
|
+
hosts = o[:hosts] || states.map do |state|
|
129
|
+
state.host
|
130
|
+
end.compact
|
131
|
+
hosts = hosts.uniq.sort
|
132
|
+
|
133
|
+
# Construct index
|
134
|
+
by = states.inject({}) do |index, s|
|
135
|
+
index[[s.host, s.service]] = s
|
136
|
+
index
|
137
|
+
end
|
138
|
+
|
139
|
+
# Title
|
140
|
+
title = o[:title] || prefix.capitalize rescue 'Unknown'
|
141
|
+
|
142
|
+
h2(title) +
|
143
|
+
table(
|
144
|
+
tr(
|
145
|
+
th,
|
146
|
+
*services.map do |service|
|
147
|
+
th service_names[service]
|
148
|
+
end
|
149
|
+
),
|
150
|
+
*hosts.map do |host|
|
151
|
+
tr(
|
152
|
+
th(host),
|
153
|
+
*services.map do |service|
|
154
|
+
s = by[[host, service]]
|
155
|
+
td(
|
156
|
+
s ? state_bar(s, max: maxima[service]) : nil
|
157
|
+
)
|
158
|
+
end
|
159
|
+
)
|
160
|
+
end,
|
161
|
+
:class => 'chart'
|
162
|
+
)
|
163
|
+
end
|
164
|
+
|
165
|
+
# Renders a state as a short tag.
|
166
|
+
def state_short(s, opts={tag: 'li'})
|
167
|
+
if s
|
168
|
+
"<#{opts[:tag]} class=\"state #{s.state}\" style=\"opacity: #{age_fraction s.time}\" title=\"#{h s.description}\">#{h s.host} #{h s.service}</#{opts[:tag]}>"
|
169
|
+
else
|
170
|
+
"<#{opts[:tag]} class=\"service\"></#{opts[:tag]}>"
|
171
|
+
end
|
172
|
+
end
|
173
|
+
|
174
|
+
# Renders a time to an HTML tag.
|
175
|
+
def time(unix)
|
176
|
+
t = Time.at(unix)
|
177
|
+
"<time datetime=\"#{t.iso8601}\">#{t.strftime(Dash.config[:strftime])}</time>"
|
178
|
+
end
|
179
|
+
|
180
|
+
# Renders an HTML tag
|
181
|
+
def tag(tag, *a)
|
182
|
+
if Hash === a.last
|
183
|
+
opts = a.pop
|
184
|
+
else
|
185
|
+
opts = {}
|
186
|
+
end
|
187
|
+
|
188
|
+
attrs = opts.map do |k,v|
|
189
|
+
"#{k}=\"#{h v}\""
|
190
|
+
end.join ' '
|
191
|
+
|
192
|
+
content = if block_given?
|
193
|
+
a << yield
|
194
|
+
else
|
195
|
+
a
|
196
|
+
end.flatten.join("\n")
|
197
|
+
|
198
|
+
s = "<#{tag} #{attrs}>#{content}</#{tag}>"
|
199
|
+
end
|
200
|
+
|
201
|
+
# Specific tag aliases
|
202
|
+
%w(div span h1 h2 h3 h4 h5 h6 ul ol li table th tr td u i b).each do |tag|
|
203
|
+
class_eval "def #{tag}(*a, &block)
|
204
|
+
tag #{tag.inspect}, *a, &block
|
205
|
+
end"
|
206
|
+
end
|
207
|
+
end
|
208
|
+
end
|
209
|
+
end
|