rec 1.0.1 → 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rec/correlator.rb +16 -8
- data/lib/rec/{mock-alert.rb → mock-notify.rb} +9 -5
- data/lib/rec/{alert.rb → notify.rb} +19 -18
- data/lib/rec/rule.rb +26 -9
- data/lib/rec/state.rb +77 -31
- data/lib/rec.rb +8 -128
- metadata +12 -12
data/lib/rec/correlator.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
require 'rec/rule'
|
2
2
|
|
3
|
-
module REC
|
3
|
+
module REC # :nodoc:
|
4
4
|
|
5
5
|
# The Correlator reads in log entries, matching them against the ruleset,
|
6
6
|
# creates states as necessary, generates new (correlated) events, and sends alerts.
|
7
7
|
class Correlator
|
8
8
|
|
9
|
+
# The current time from the perspective of the latest event processed,
|
10
|
+
# not necessarily actual clock time.
|
11
|
+
@@now = Time.now()
|
9
12
|
@@eventsIn = 0
|
10
13
|
@@eventsMissed = 0
|
11
14
|
|
@@ -16,9 +19,14 @@ class Correlator
|
|
16
19
|
self.new().start()
|
17
20
|
end
|
18
21
|
|
22
|
+
# Makes the current processing time effectively global
|
23
|
+
def self.now()
|
24
|
+
@@now
|
25
|
+
end
|
26
|
+
|
19
27
|
# Create a new Correlator
|
20
28
|
def initialize()
|
21
|
-
@
|
29
|
+
@startupTime = Time.now()
|
22
30
|
@year = @startupTime.year
|
23
31
|
@running = false
|
24
32
|
end
|
@@ -26,7 +34,7 @@ class Correlator
|
|
26
34
|
# Start a Correlator. +INT+ and +TERM+ signals will stop the Correlator.
|
27
35
|
# +USR1+ signal will cause Correlator to display statistics and continue running.
|
28
36
|
#
|
29
|
-
# Missed events are written to
|
37
|
+
# Missed events are written to File Descriptor 3, in case they are of interest
|
30
38
|
# (typically while testing rulesets)
|
31
39
|
def start()
|
32
40
|
Signal.trap("INT") { finish() }
|
@@ -55,17 +63,17 @@ class Correlator
|
|
55
63
|
logLine.strip!()
|
56
64
|
next if logLine.empty?
|
57
65
|
@@eventsIn += 1
|
58
|
-
|
66
|
+
@@time, message = parse(logLine)
|
59
67
|
$stderr.puts("< "+message) if $debug
|
60
|
-
State.expire_states(
|
68
|
+
State.expire_states() # remove expired states before we check the rules
|
61
69
|
eventMatched = false
|
62
70
|
Rule.each { |rule|
|
63
71
|
title = rule.check(message)
|
64
72
|
eventMatched = true unless title.nil? # empty match is still a match
|
65
73
|
next if title.nil?
|
66
74
|
break if title.empty? # match without a message means 'swallow this event'
|
67
|
-
state = State[title] || rule.create_state(title
|
68
|
-
rule.react(state,
|
75
|
+
state = State[title] || rule.create_state(title)
|
76
|
+
rule.react(state, logLine)
|
69
77
|
$stderr.puts("breaking after rule #{rule.rid}") unless (!$debug or rule.continue())
|
70
78
|
break unless rule.continue()
|
71
79
|
}
|
@@ -110,7 +118,7 @@ class Correlator
|
|
110
118
|
end
|
111
119
|
message = $'
|
112
120
|
else
|
113
|
-
time =
|
121
|
+
time = Correlator.now().freeze() # time stands still
|
114
122
|
message = logLine
|
115
123
|
end
|
116
124
|
[time, message]
|
@@ -1,23 +1,27 @@
|
|
1
|
-
module REC
|
2
|
-
|
3
|
-
# mock the
|
4
|
-
|
1
|
+
module REC # :nodoc:
|
2
|
+
|
3
|
+
# mock the Notify class for testing purposes
|
4
|
+
# :nodoc:
|
5
|
+
class Notify
|
5
6
|
|
6
7
|
@@emailsSent = []
|
7
8
|
@@jabbersSent = []
|
8
|
-
|
9
|
+
# :nodoc:
|
9
10
|
def self.email(alert, recipient=@@emailTo, subject=@@defaultSubject)
|
10
11
|
@@emailsSent << [alert, recipient, subject]
|
11
12
|
end
|
12
13
|
|
14
|
+
# :nodoc:
|
13
15
|
def self.jabber(alert, recipient=@@jabberTo, subject=@@defaultSubject)
|
14
16
|
@@jabbersSent << [alert, recipient, subject]
|
15
17
|
end
|
16
18
|
|
19
|
+
# :nodoc:
|
17
20
|
def emailsSent()
|
18
21
|
@@emailsSent
|
19
22
|
end
|
20
23
|
|
24
|
+
# :nodoc:
|
21
25
|
def jabbersSent()
|
22
26
|
@@jabberSent
|
23
27
|
end
|
@@ -2,33 +2,34 @@ require 'rubygems'
|
|
2
2
|
require 'net/smtp'
|
3
3
|
require 'xmpp4r'
|
4
4
|
|
5
|
-
module REC
|
5
|
+
module REC # :nodoc:
|
6
|
+
|
6
7
|
# Provides the capability to send alerts
|
7
8
|
# == mail
|
8
9
|
# The simplest approach is to use the native +mail+ program (no credentials required)
|
9
|
-
#
|
10
|
+
# Notify.mail(alert)
|
10
11
|
#
|
11
12
|
# == email and jabber
|
12
13
|
# You can also send emails and instant messages via servers, but you'll need to provide
|
13
14
|
# credentials to do that.
|
14
|
-
#
|
15
|
-
#
|
15
|
+
# Notify.smtp_credentials(user, password, domain, server, port)
|
16
|
+
# Notify.jabber_credentials(user, password, server)
|
16
17
|
#
|
17
18
|
# Then you can send messages:
|
18
|
-
#
|
19
|
-
#
|
19
|
+
# Notify.email(alert)
|
20
|
+
# Notify.jabber(alert)
|
20
21
|
# or send messages to another recipient, with another subject
|
21
|
-
#
|
22
|
-
#
|
22
|
+
# Notify.email(alert, you@example.com, "Serious problem")
|
23
|
+
# Notify.jabber(alert, boss@example.com)
|
23
24
|
#
|
24
25
|
# == Sleeping
|
25
26
|
# If you want to avoid being sent instant messages during sleeping hours, you can
|
26
27
|
# specify a range of working hours during which urgent alerts may be sent by jabber
|
27
28
|
# and outside those working hours the alert will be sent by email instead
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
29
|
+
# Notify.workHours(9,18) # IMs only between 9am and 6pm
|
30
|
+
# Notify.urgent(alert) # sent as instant message if during work hours, else by email
|
31
|
+
# Notify.jabber(alert) # sent as instant message regardless of the time
|
32
|
+
# Notify.normal(alert) # sent by email
|
32
33
|
#
|
33
34
|
# == Securing credentials
|
34
35
|
# In order to keep email/messaging credentials secure, they can be provided in
|
@@ -36,13 +37,13 @@ module REC
|
|
36
37
|
# so the rules script need not contain passwords.
|
37
38
|
# load("/home/rec/alert.conf")
|
38
39
|
# Loads the credentials into the rules. The file can contain something like:
|
39
|
-
#
|
40
|
-
#
|
40
|
+
# Notify.email_credentials("rec@gmail.com", "tricky", "mydomain.com")
|
41
|
+
# Notify.jabber_credentials("rec@gmail.com", "tricky")
|
41
42
|
# <code>/home/rec/alert.conf</code> should be readable only by the otherwise
|
42
43
|
# unprivileged user (sec) running the script.
|
43
44
|
#
|
44
|
-
# While we're on the topic of security, just a reminder that
|
45
|
-
module
|
45
|
+
# While we're on the topic of security, just a reminder that Notify::mail requires no credentials.
|
46
|
+
module Notify
|
46
47
|
|
47
48
|
# Sets the default subject for alerts, overriding the system default of "Alert"
|
48
49
|
def self.default_subject(subject)
|
@@ -100,7 +101,7 @@ module Alert
|
|
100
101
|
end
|
101
102
|
|
102
103
|
# define the working hours during which instant messages are allowed
|
103
|
-
# Note that
|
104
|
+
# Note that Notify.work_hours(7,21) means "7am-9pm" as you would assume, so from 07:00 to 20:59
|
104
105
|
def self.work_hours(start, finish)
|
105
106
|
@@workHours = start..finish
|
106
107
|
end
|
@@ -115,7 +116,7 @@ module Alert
|
|
115
116
|
end
|
116
117
|
end
|
117
118
|
|
118
|
-
# Alias for
|
119
|
+
# Alias for Notify::email
|
119
120
|
def self.normal(alert)
|
120
121
|
self.email(alert)
|
121
122
|
end
|
data/lib/rec/rule.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rec/state'
|
2
2
|
|
3
|
-
module REC
|
3
|
+
module REC # :nodoc:
|
4
4
|
|
5
5
|
# A Rule specifies which log entries to match, what to remember, and what to do about them.
|
6
6
|
class Rule
|
@@ -71,21 +71,38 @@ class Rule
|
|
71
71
|
# }) { |state|
|
72
72
|
# state.alert_first_only()
|
73
73
|
# }
|
74
|
-
|
74
|
+
def action()
|
75
|
+
@params[:action]
|
76
|
+
end
|
75
77
|
|
76
78
|
# Creates a new rule. +rid+ must be unique.
|
79
|
+
# +action+ may be supplied as a block argument:
|
80
|
+
# :lifespan => 479
|
81
|
+
# }) { |state|
|
82
|
+
# state.alert_first_only()
|
83
|
+
# }
|
84
|
+
# or as a Proc.new value to the :action key:
|
85
|
+
# :lifespan => 479,
|
86
|
+
# :action => Proc.new { |state|
|
87
|
+
# state.alert_first_only()
|
88
|
+
# }
|
89
|
+
# })
|
90
|
+
# In most cases, an action is required when a state reacts with an event, but
|
91
|
+
# only in certain cases do we need an :onexpiry block. That is why the action
|
92
|
+
# may be supplied as a block if desired.
|
93
|
+
#
|
77
94
|
def initialize(rid, params={}, &action)
|
78
95
|
@rid = rid
|
79
96
|
@pattern = params[:pattern] || raise("No pattern specified for rule #{@ruleId}")
|
80
97
|
@message = params[:message] || "" # no message means no state created - ie. ignore event
|
81
98
|
@lifespan = params[:lifespan] || 0
|
82
|
-
@alert = params[:alert] || @message
|
83
99
|
@allstates = params[:allstates] || []
|
84
100
|
@anystates = params[:anystates] || []
|
85
101
|
@notstates = params[:notstates] || []
|
86
102
|
@details = params[:details] || []
|
87
103
|
@params = params
|
88
|
-
@action = action
|
104
|
+
@params[:action] = action unless action.nil? # store the action into the params
|
105
|
+
@params[:alert] = @params[:message] unless @params.has_key?(:alert) # default alert if absent
|
89
106
|
@matches = nil
|
90
107
|
Rule << self
|
91
108
|
end
|
@@ -107,18 +124,18 @@ class Rule
|
|
107
124
|
end
|
108
125
|
|
109
126
|
# Creates a state with the given title at the specified time
|
110
|
-
def create_state(title
|
127
|
+
def create_state(title)
|
111
128
|
@@created[@rid] += 1
|
112
129
|
$stderr.puts("+ Creating new state #{title}") if $debug
|
113
|
-
State.new(title,
|
130
|
+
State.new(title, @lifespan, @params)
|
114
131
|
end
|
115
132
|
|
116
133
|
# Executes any action specified by the rule
|
117
|
-
def react(state,
|
134
|
+
def react(state, logLine)
|
118
135
|
@@reacted[@rid] += 1
|
119
|
-
state.update(
|
136
|
+
state.update(@rid, @matches, logLine)
|
120
137
|
$stderr.puts("~ Rule #{@rid}, state = #{state.inspect()}") if $debug
|
121
|
-
|
138
|
+
action().call(state) if action()
|
122
139
|
end
|
123
140
|
|
124
141
|
# Returns the +continue+ parameter. If false, stop processing rules for this event.
|
data/lib/rec/state.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'time'
|
2
2
|
|
3
|
-
module REC
|
3
|
+
module REC # :nodoc:
|
4
4
|
|
5
5
|
# A State is an object that represents the memory of something having happened.
|
6
6
|
# For example, "server terra is down".
|
@@ -17,6 +17,22 @@ module REC
|
|
17
17
|
# of what has gone before. You cannot *correlate* events without keeping State.
|
18
18
|
class State
|
19
19
|
|
20
|
+
# shortcut action to generate a message on each event
|
21
|
+
Generate = Proc.new { |state|
|
22
|
+
state.generate()
|
23
|
+
}
|
24
|
+
|
25
|
+
# shortcut action to generate a message and release the state immediately
|
26
|
+
Generate_and_release = Proc.new { |state|
|
27
|
+
state.generate()
|
28
|
+
state.release()
|
29
|
+
}
|
30
|
+
|
31
|
+
# shortcut action to generate a message on first event only
|
32
|
+
Generate_first_only = Proc.new { |state|
|
33
|
+
state.generate_first_only()
|
34
|
+
}
|
35
|
+
|
20
36
|
# A array of Timeouts. A Timeout struct has two elements:
|
21
37
|
# - timestamp at which to expire
|
22
38
|
# - key of the state to be expired
|
@@ -49,10 +65,10 @@ class State
|
|
49
65
|
end
|
50
66
|
end
|
51
67
|
|
52
|
-
# Deletes all expired states
|
53
|
-
def self.expire_states(
|
68
|
+
# Deletes all expired states, executing :onexpiry blocks before deletion
|
69
|
+
def self.expire_states()
|
54
70
|
timeout = @@timeouts.first()
|
55
|
-
while @@timeouts.length > 0 and timeout.expiry <
|
71
|
+
while @@timeouts.length > 0 and timeout.expiry < Correlator.now() do
|
56
72
|
state = State[timeout.key]
|
57
73
|
if state.nil?
|
58
74
|
@@timeouts.shift
|
@@ -60,6 +76,8 @@ class State
|
|
60
76
|
next
|
61
77
|
end
|
62
78
|
#$stderr.puts("Releasing state #{state.title} with count of #{state.count}")
|
79
|
+
final = Rule[state.rid].params[:final]
|
80
|
+
final.call(state) if final
|
63
81
|
@@states.delete(@@timeouts.shift().key)
|
64
82
|
timeout = @@timeouts.first()
|
65
83
|
end
|
@@ -73,6 +91,18 @@ class State
|
|
73
91
|
[statesCount, @@eventsOut]
|
74
92
|
end
|
75
93
|
|
94
|
+
# Returns a matching state or nil.
|
95
|
+
#
|
96
|
+
# This is used to locate a state, typically the other state created by a pair of
|
97
|
+
# rules. For example, in a rule handling "Server earth is up" we want to find the state
|
98
|
+
# corresponding to "Server earth is down"
|
99
|
+
# - template is a string like "Server %host$s is down"
|
100
|
+
# - state is the current state
|
101
|
+
def self.find(template, state)
|
102
|
+
title = template.sprinth(state.params)
|
103
|
+
@@states[title]
|
104
|
+
end
|
105
|
+
|
76
106
|
# unique ID of the rule which gave rise to this state
|
77
107
|
attr_reader :rid
|
78
108
|
# The unique title for this state (eg. "server earth is down")
|
@@ -81,6 +111,8 @@ class State
|
|
81
111
|
attr_reader :lifespan
|
82
112
|
# An alert message to be sent if subsequent events warrant it
|
83
113
|
attr_reader :alert
|
114
|
+
# An alert message to be sent when the state expires
|
115
|
+
attr_reader :final
|
84
116
|
# Hash of parameters of the rule that created this state
|
85
117
|
attr_reader :params
|
86
118
|
# Number of times this state has been matched
|
@@ -89,31 +121,28 @@ class State
|
|
89
121
|
attr_reader :created
|
90
122
|
# last time this state was updated
|
91
123
|
attr_reader :updated
|
92
|
-
# age of this state
|
93
|
-
attr_reader :dur
|
94
124
|
# Hash of custom fields to be remembered
|
95
125
|
attr_reader :details
|
126
|
+
# Array of original log entries matching this state
|
127
|
+
attr_reader :logs
|
96
128
|
|
97
|
-
# Creates a new state with the given (unique) +title+,
|
129
|
+
# Creates a new state with the given (unique) +title+,
|
98
130
|
# the +lifespan+ before the state is forgotten, and a hash of parameters.
|
99
131
|
#
|
100
132
|
# Note that the time is not necessarily 'now' because REC can be executed against
|
101
133
|
# historical log files. It uses the timestamp of the original log entry, not the
|
102
134
|
# current clock time.
|
103
|
-
def initialize(title,
|
135
|
+
def initialize(title, lifespan, params={})
|
136
|
+
@created = @updated = Correlator.now()
|
104
137
|
@title = title
|
105
138
|
@lifespan = lifespan.to_f
|
106
139
|
@params = params
|
107
140
|
@count = 0
|
108
|
-
@created = time
|
109
|
-
@updated = time
|
110
|
-
@dur = 0
|
111
141
|
@rid = 0
|
112
|
-
@alert = ""
|
113
142
|
@logs = [] # array of remembered logLines
|
114
143
|
@details = {} # hash of remembered details
|
115
144
|
@@states[title] = self
|
116
|
-
State.timeout_at(
|
145
|
+
State.timeout_at(@created + @lifespan, @title)
|
117
146
|
end
|
118
147
|
|
119
148
|
# Updates the statistics of this state (following a match).
|
@@ -123,18 +152,21 @@ class State
|
|
123
152
|
# - age of this state
|
124
153
|
# - the ID of the rule which last matched
|
125
154
|
# - more details (custom fields) may be added to memory
|
126
|
-
# -
|
155
|
+
# - message templates (for values to be interpolated into)
|
127
156
|
# - a list of the original log entries pertaining to this state
|
128
|
-
def update(
|
157
|
+
def update(rid, matches, logLine=nil)
|
129
158
|
@count = @count.succ
|
130
|
-
@updated =
|
131
|
-
@dur = @updated - @created
|
159
|
+
@updated = Correlator.now()
|
132
160
|
@rid = rid
|
133
161
|
@details.merge!(matches)
|
134
|
-
@alert = alert
|
135
162
|
@logs << logLine if @params[:capture]
|
136
163
|
end
|
137
164
|
|
165
|
+
# Returns the age of this state
|
166
|
+
def age
|
167
|
+
Correlator.now() - @created
|
168
|
+
end
|
169
|
+
|
138
170
|
# Forget a state.
|
139
171
|
# - if no pattern is provided, forget this state.
|
140
172
|
# - if a pattern is provided, use the stats for this state to determine the title
|
@@ -145,35 +177,49 @@ class State
|
|
145
177
|
if pattern.nil?
|
146
178
|
@@states.delete(@title)
|
147
179
|
else
|
148
|
-
@@states.delete(pattern.sprinth(stats))
|
180
|
+
@@states.delete(pattern.sprinth(stats()))
|
149
181
|
end
|
150
182
|
end
|
151
183
|
|
184
|
+
# Resets the expiry time to be +dur+ seconds after current time (this may
|
185
|
+
# be shorter than the original lifetime)
|
186
|
+
def extend_for(dur)
|
187
|
+
n = @@timeouts.find_index { |to|
|
188
|
+
to.title == @title
|
189
|
+
}
|
190
|
+
@@timeouts[n..n] = [] unless n.nil?
|
191
|
+
expiry = Correlator.now() + dur
|
192
|
+
self.timeout_at(expiry, @title)
|
193
|
+
end
|
194
|
+
|
152
195
|
# Returns the details of the state (ie. whatever custom fields were defined, eg. userid)
|
153
196
|
# merged with the standard statistics:
|
154
197
|
# - count: number of matches so far
|
155
|
-
# -
|
198
|
+
# - age: age of the state
|
156
199
|
# - created: time when this state was created
|
157
200
|
# - updated: time last updated (eg. when the latest event matched this state)
|
158
201
|
def stats()
|
159
|
-
@details.merge({"count"=>@count, "
|
202
|
+
@details.merge({"count"=>@count, "age"=>age(), "created"=>@created, "updated"=>@updated})
|
160
203
|
end
|
161
204
|
|
162
205
|
# Creates a new event, writes it to the output log, and returns the event.
|
163
|
-
#
|
164
206
|
# An *event* (or 'log entry') is a timestamp followed by a *message*
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
207
|
+
#
|
208
|
+
# Returns the message only (without the timestamp).
|
209
|
+
def generate(sym = :alert)
|
210
|
+
message = @params[sym].sprinth(stats())
|
211
|
+
if message.length > 0
|
212
|
+
event = "%s %s" % [@created.iso8601, message] + @logs.join("\n")
|
213
|
+
print("> ") if $debug
|
214
|
+
puts(event)
|
215
|
+
@@eventsOut = @@eventsOut + 1
|
216
|
+
end
|
217
|
+
message
|
172
218
|
end
|
173
219
|
|
174
220
|
# Creates a new event when this state is created, but ignores later occurrences
|
175
|
-
def
|
176
|
-
|
221
|
+
def generate_first_only(sym = :alert)
|
222
|
+
generate(sym) if @count == 1
|
177
223
|
end
|
178
224
|
|
179
225
|
# Allow access to any parameter by a convenience method
|
data/lib/rec.rb
CHANGED
@@ -1,132 +1,12 @@
|
|
1
|
-
# Ruby Event Correlation
|
2
1
|
require 'string'
|
3
2
|
require 'rec/correlator'
|
4
|
-
require 'rec/
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
$ sudo gem install rec
|
13
|
-
|
14
|
-
2. Select a ruleset or create your own
|
15
|
-
#!/usr/bin/ruby
|
16
|
-
require 'rec'
|
17
|
-
include REC
|
18
|
-
require 'rulesets/postfix-rules'
|
19
|
-
Correlator::start()
|
20
|
-
|
21
|
-
3. Start it up
|
22
|
-
$ rulesets/rules.rb < /var/log/mail.log 3>missed.log 2>control.log > newevents.log
|
23
|
-
|
24
|
-
== Why correlate events?
|
25
|
-
We all know that we should read our log files. But reading log files is *really* boring,
|
26
|
-
and frankly its easy to miss important things in all the superfluous detail.
|
27
|
-
|
28
|
-
[Save time]
|
29
|
-
If you are lazy enough to not want to review all of your log files manually forever, and
|
30
|
-
smart enough to work out what needs monitoring and when you might want to pay attention,
|
31
|
-
then wouldn't it be good if you could define those rules and let the computer do what it
|
32
|
-
does best?
|
33
|
-
|
34
|
-
[Generate meaning]
|
35
|
-
The logs of many applications are filled with entries that are quite low level - perhaps
|
36
|
-
wonderful for debugging, but typically not terribly meaningful in terms of business.
|
37
|
-
Wouldn't it be good if we could summarise a bunch of low level events into a single
|
38
|
-
business event - and then just read the <em>business log</em>.
|
39
|
-
|
40
|
-
== Alternatives
|
41
|
-
There are several alternatives to REC which may suit your needs better:
|
42
|
-
* splunk[www.splunk.com]
|
43
|
-
* nagios[www.nagios.com]
|
44
|
-
* scalextreme.com[www.scalextreme.com]
|
45
|
-
While I like these options, I find they take a lot of configuring.
|
46
|
-
They also has some dependencies that make them a bit heavier than you may want.
|
47
|
-
If you just want to keep track of a few kinds of events, want a lot of flexibility
|
48
|
-
and control without too much effort, then REC may be of some value.
|
49
|
-
|
50
|
-
== How does REC work?
|
51
|
-
Each entry in a log file is an *event*.
|
52
|
-
The Correlator reads the events, and attempts to match an event against each Rule.
|
53
|
-
If an event matches a rule, the rule creates a State which just means we're remembering
|
54
|
-
that the event matched a rule. The pattern to match is a regexp, and the captured values
|
55
|
-
are named. For example
|
56
|
-
# log entry => "nfs: server earth not responding"
|
57
|
-
pattern => /nfs\: server (\w+) not responding/
|
58
|
-
details => ['host']
|
59
|
-
# values of interest are captured into a hash => {'host' => 'earth' }
|
60
|
-
:message => "Server %host$s is down"
|
61
|
-
# interpolation with named parameters => "Server earth is down"
|
62
|
-
|
63
|
-
A state has a fixed lifetime, set when it is created. At the end of its life, it may simply
|
64
|
-
expire quietly, or a pre-defined action may be executed. For example, if we find a server is down,
|
65
|
-
we may want to wait for 3 minutes and if it is not up again, then alert the administrator.
|
66
|
-
The server being down is a state, and two states are distinguished by their *titles*. For example,
|
67
|
-
"host earth is down" and "host terra is down".
|
68
|
-
|
69
|
-
Now that we're remembering a set of states, we can match events against not only the event's
|
70
|
-
message, but also other states. For example, we can match "host terra is up" against a previously
|
71
|
-
created state "host terra is down", and generate a new event "host terra is back up after 14 minutes".
|
72
|
-
We can also 'swallow' all of the rest of the "host terra is down" events because they add nothing new.
|
73
|
-
This <em>event compression</em> means the administrator gets one important message, and not 27
|
74
|
-
distracting alerts.
|
75
|
-
|
76
|
-
An Alert can be sent by email or IM, depending on your preferences and working hours.
|
77
|
-
The destinations and credentials are supplied to your ruleset:
|
78
|
-
# For better security, move the next few lines into a file readable only by
|
79
|
-
# the user running this script eg. /home/rec/alert.conf
|
80
|
-
# and then require that file
|
81
|
-
Alert.smtp_credentials("rec@gmail.com", "recret", "myfirm.com")
|
82
|
-
Alert.emailTo = "me@myfirm.com"
|
83
|
-
Alert.jabber_credentials("rec@gmail.com", "recret")
|
84
|
-
Alert.jabberTo = "me@myfirm.com"
|
85
|
-
|
86
|
-
Rules can then send an alert when desired. Two common cases involve alerting immediately
|
87
|
-
on the first event (eg. "host terra is down"), and alerting on expiry or at a subsequent event
|
88
|
-
(eg. "host terra is back up").
|
89
|
-
state.alert_first_only() # => generate a new event on first original event
|
90
|
-
# or
|
91
|
-
Alert.normal(state.alert_first_only()) # => log and also send the new event via email
|
92
|
-
|
93
|
-
In most cases, however, it is not necessary to alert the administrator at all. It is enough to
|
94
|
-
log the new event in the output logfile for later review.
|
95
|
-
|
96
|
-
== Examples
|
97
|
-
The best way to understand REC is to see how rules are written. The following examples were
|
98
|
-
inspired by SEC (simple-evcorr.sourceforge.net), so they employ the same names.
|
99
|
-
|
100
|
-
1. Warn if an user is having trouble executing sudo commands
|
101
|
-
The log entry (/var/log/secure) looks like this:
|
102
|
-
|
103
|
-
Sep 16 07:09:22 earth sudo: richard : 3 incorrect password attempts ;...
|
104
|
-
|
105
|
-
and the rule might look like this:
|
106
|
-
|
107
|
-
# single threshold rule
|
108
|
-
Rule.new(10034, {
|
109
|
-
:pattern => /\w+ sudo\: (\w+) \: 3 incorrect password attempts/,
|
110
|
-
:details => ["userid"],
|
111
|
-
:message => "Failed sudo password for user %userid$s",
|
112
|
-
:lifespan => 60,
|
113
|
-
:alert => "'Too much sudo activity' userid=%userid$s attempts=%count$d dur=%dur$0.3fs ",
|
114
|
-
:threshold => 3,
|
115
|
-
:capture => true
|
116
|
-
}) { |state|
|
117
|
-
if state.count == state.threshold
|
118
|
-
Alert.urgent(state.generate_alert())
|
119
|
-
state.release()
|
120
|
-
end
|
121
|
-
}
|
122
|
-
|
123
|
-
This rule matches the log entry against the *pattern*. Note that the captured value
|
124
|
-
<code>(\w+)</code> is stored as the +userid+. The +details+ parameter specifies the names
|
125
|
-
of the captured values in the sequence they appear in the pattern.
|
126
|
-
|
127
|
-
The *message* specifies the title of the state that is created. If there is no title
|
128
|
-
|
129
|
-
|
130
|
-
=end
|
3
|
+
require 'rec/notify'
|
4
|
+
|
5
|
+
# The REC module is a namespace containing:
|
6
|
+
# - REC::Correlator
|
7
|
+
# - REC::Rule
|
8
|
+
# - REC::State
|
9
|
+
# - REC::Alert
|
10
|
+
#
|
131
11
|
module REC
|
132
12
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 2
|
9
|
+
version: 1.0.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Richard Kernahan
|
@@ -14,17 +14,17 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2012-09-
|
17
|
+
date: 2012-09-17 00:00:00 +10:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
21
|
-
description: "\tSifts through your log files in real time, using stateful intelligence to determine\n\
|
22
|
-
\twhat is really important. REC can alert you (by email or IM) or it can simply condense\n\
|
23
|
-
\ta large log file into a much shorter and more meaningful log.\n\
|
24
|
-
\tREC is inspired by Risto Vaarandi's brilliant *sec* (simple-evcorr.sourceforge.net)\n\
|
25
|
-
\tbut is original code and any defects are entirely mine.\n\
|
26
|
-
\tWhile event correlation is inherently complex, REC attempts to make common tasks easy\n\
|
27
|
-
\twhile preserving plenty of power and flexibility for ambitious tasks.\n"
|
21
|
+
description: "\t\tSifts through your log files in real time, using stateful intelligence to determine\n\
|
22
|
+
\t\twhat is really important. REC can alert you (by email or IM) or it can simply condense\n\
|
23
|
+
\t\ta large log file into a much shorter and more meaningful log.\n\
|
24
|
+
\t\tREC is inspired by Risto Vaarandi's brilliant *sec* (simple-evcorr.sourceforge.net)\n\
|
25
|
+
\t\tbut is original code and any defects are entirely mine.\n\
|
26
|
+
\t\tWhile event correlation is inherently complex, REC attempts to make common tasks easy\n\
|
27
|
+
\t\twhile preserving plenty of power and flexibility for ambitious tasks.\n"
|
28
28
|
email: dev.rec@finalstep.com.au
|
29
29
|
executables: []
|
30
30
|
|
@@ -37,8 +37,8 @@ files:
|
|
37
37
|
- lib/rec/rule.rb
|
38
38
|
- lib/rec/state.rb
|
39
39
|
- lib/rec/correlator.rb
|
40
|
-
- lib/rec/
|
41
|
-
- lib/rec/mock-
|
40
|
+
- lib/rec/notify.rb
|
41
|
+
- lib/rec/mock-notify.rb
|
42
42
|
- lib/string.rb
|
43
43
|
has_rdoc: true
|
44
44
|
homepage: http://rubygems.org/gems/rec
|