rec 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rec/correlator.rb +16 -8
- data/lib/rec/{mock-alert.rb → mock-notify.rb} +9 -5
- data/lib/rec/{alert.rb → notify.rb} +19 -18
- data/lib/rec/rule.rb +26 -9
- data/lib/rec/state.rb +77 -31
- data/lib/rec.rb +8 -128
- metadata +12 -12
data/lib/rec/correlator.rb
CHANGED
@@ -1,11 +1,14 @@
|
|
1
1
|
require 'rec/rule'
|
2
2
|
|
3
|
-
module REC
|
3
|
+
module REC # :nodoc:
|
4
4
|
|
5
5
|
# The Correlator reads in log entries, matching them against the ruleset,
|
6
6
|
# creates states as necessary, generates new (correlated) events, and sends alerts.
|
7
7
|
class Correlator
|
8
8
|
|
9
|
+
# The current time from the perspective of the latest event processed,
|
10
|
+
# not necessarily actual clock time.
|
11
|
+
@@now = Time.now()
|
9
12
|
@@eventsIn = 0
|
10
13
|
@@eventsMissed = 0
|
11
14
|
|
@@ -16,9 +19,14 @@ class Correlator
|
|
16
19
|
self.new().start()
|
17
20
|
end
|
18
21
|
|
22
|
+
# Makes the current processing time effectively global
|
23
|
+
def self.now()
|
24
|
+
@@now
|
25
|
+
end
|
26
|
+
|
19
27
|
# Create a new Correlator
|
20
28
|
def initialize()
|
21
|
-
@
|
29
|
+
@startupTime = Time.now()
|
22
30
|
@year = @startupTime.year
|
23
31
|
@running = false
|
24
32
|
end
|
@@ -26,7 +34,7 @@ class Correlator
|
|
26
34
|
# Start a Correlator. +INT+ and +TERM+ signals will stop the Correlator.
|
27
35
|
# +USR1+ signal will cause Correlator to display statistics and continue running.
|
28
36
|
#
|
29
|
-
# Missed events are written to
|
37
|
+
# Missed events are written to File Descriptor 3, in case they are of interest
|
30
38
|
# (typically while testing rulesets)
|
31
39
|
def start()
|
32
40
|
Signal.trap("INT") { finish() }
|
@@ -55,17 +63,17 @@ class Correlator
|
|
55
63
|
logLine.strip!()
|
56
64
|
next if logLine.empty?
|
57
65
|
@@eventsIn += 1
|
58
|
-
|
66
|
+
@@time, message = parse(logLine)
|
59
67
|
$stderr.puts("< "+message) if $debug
|
60
|
-
State.expire_states(
|
68
|
+
State.expire_states() # remove expired states before we check the rules
|
61
69
|
eventMatched = false
|
62
70
|
Rule.each { |rule|
|
63
71
|
title = rule.check(message)
|
64
72
|
eventMatched = true unless title.nil? # empty match is still a match
|
65
73
|
next if title.nil?
|
66
74
|
break if title.empty? # match without a message means 'swallow this event'
|
67
|
-
state = State[title] || rule.create_state(title
|
68
|
-
rule.react(state,
|
75
|
+
state = State[title] || rule.create_state(title)
|
76
|
+
rule.react(state, logLine)
|
69
77
|
$stderr.puts("breaking after rule #{rule.rid}") unless (!$debug or rule.continue())
|
70
78
|
break unless rule.continue()
|
71
79
|
}
|
@@ -110,7 +118,7 @@ class Correlator
|
|
110
118
|
end
|
111
119
|
message = $'
|
112
120
|
else
|
113
|
-
time =
|
121
|
+
time = Correlator.now().freeze() # time stands still
|
114
122
|
message = logLine
|
115
123
|
end
|
116
124
|
[time, message]
|
@@ -1,23 +1,27 @@
|
|
1
|
-
module REC
|
2
|
-
|
3
|
-
# mock the
|
4
|
-
|
1
|
+
module REC # :nodoc:
|
2
|
+
|
3
|
+
# mock the Notify class for testing purposes
|
4
|
+
# :nodoc:
|
5
|
+
class Notify
|
5
6
|
|
6
7
|
@@emailsSent = []
|
7
8
|
@@jabbersSent = []
|
8
|
-
|
9
|
+
# :nodoc:
|
9
10
|
def self.email(alert, recipient=@@emailTo, subject=@@defaultSubject)
|
10
11
|
@@emailsSent << [alert, recipient, subject]
|
11
12
|
end
|
12
13
|
|
14
|
+
# :nodoc:
|
13
15
|
def self.jabber(alert, recipient=@@jabberTo, subject=@@defaultSubject)
|
14
16
|
@@jabbersSent << [alert, recipient, subject]
|
15
17
|
end
|
16
18
|
|
19
|
+
# :nodoc:
|
17
20
|
def emailsSent()
|
18
21
|
@@emailsSent
|
19
22
|
end
|
20
23
|
|
24
|
+
# :nodoc:
|
21
25
|
def jabbersSent()
|
22
26
|
@@jabberSent
|
23
27
|
end
|
@@ -2,33 +2,34 @@ require 'rubygems'
|
|
2
2
|
require 'net/smtp'
|
3
3
|
require 'xmpp4r'
|
4
4
|
|
5
|
-
module REC
|
5
|
+
module REC # :nodoc:
|
6
|
+
|
6
7
|
# Provides the capability to send alerts
|
7
8
|
# == mail
|
8
9
|
# The simplest approach is to use the native +mail+ program (no credentials required)
|
9
|
-
#
|
10
|
+
# Notify.mail(alert)
|
10
11
|
#
|
11
12
|
# == email and jabber
|
12
13
|
# You can also send emails and instant messages via servers, but you'll need to provide
|
13
14
|
# credentials to do that.
|
14
|
-
#
|
15
|
-
#
|
15
|
+
# Notify.smtp_credentials(user, password, domain, server, port)
|
16
|
+
# Notify.jabber_credentials(user, password, server)
|
16
17
|
#
|
17
18
|
# Then you can send messages:
|
18
|
-
#
|
19
|
-
#
|
19
|
+
# Notify.email(alert)
|
20
|
+
# Notify.jabber(alert)
|
20
21
|
# or send messages to another recipient, with another subject
|
21
|
-
#
|
22
|
-
#
|
22
|
+
# Notify.email(alert, you@example.com, "Serious problem")
|
23
|
+
# Notify.jabber(alert, boss@example.com)
|
23
24
|
#
|
24
25
|
# == Sleeping
|
25
26
|
# If you want to avoid being sent instant messages during sleeping hours, you can
|
26
27
|
# specify a range of working hours during which urgent alerts may be sent by jabber
|
27
28
|
# and outside those working hours the alert will be sent by email instead
|
28
|
-
#
|
29
|
-
#
|
30
|
-
#
|
31
|
-
#
|
29
|
+
# Notify.workHours(9,18) # IMs only between 9am and 6pm
|
30
|
+
# Notify.urgent(alert) # sent as instant message if during work hours, else by email
|
31
|
+
# Notify.jabber(alert) # sent as instant message regardless of the time
|
32
|
+
# Notify.normal(alert) # sent by email
|
32
33
|
#
|
33
34
|
# == Securing credentials
|
34
35
|
# In order to keep email/messaging credentials secure, they can be provided in
|
@@ -36,13 +37,13 @@ module REC
|
|
36
37
|
# so the rules script need not contain passwords.
|
37
38
|
# load("/home/rec/alert.conf")
|
38
39
|
# Loads the credentials into the rules. The file can contain something like:
|
39
|
-
#
|
40
|
-
#
|
40
|
+
# Notify.email_credentials("rec@gmail.com", "tricky", "mydomain.com")
|
41
|
+
# Notify.jabber_credentials("rec@gmail.com", "tricky")
|
41
42
|
# <code>/home/rec/alert.conf</code> should be readable only by the otherwise
|
42
43
|
# unprivileged user (sec) running the script.
|
43
44
|
#
|
44
|
-
# While we're on the topic of security, just a reminder that
|
45
|
-
module
|
45
|
+
# While we're on the topic of security, just a reminder that Notify::mail requires no credentials.
|
46
|
+
module Notify
|
46
47
|
|
47
48
|
# Sets the default subject for alerts, overriding the system default of "Alert"
|
48
49
|
def self.default_subject(subject)
|
@@ -100,7 +101,7 @@ module Alert
|
|
100
101
|
end
|
101
102
|
|
102
103
|
# define the working hours during which instant messages are allowed
|
103
|
-
# Note that
|
104
|
+
# Note that Notify.work_hours(7,21) means "7am-9pm" as you would assume, so from 07:00 to 20:59
|
104
105
|
def self.work_hours(start, finish)
|
105
106
|
@@workHours = start..finish
|
106
107
|
end
|
@@ -115,7 +116,7 @@ module Alert
|
|
115
116
|
end
|
116
117
|
end
|
117
118
|
|
118
|
-
# Alias for
|
119
|
+
# Alias for Notify::email
|
119
120
|
def self.normal(alert)
|
120
121
|
self.email(alert)
|
121
122
|
end
|
data/lib/rec/rule.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'rec/state'
|
2
2
|
|
3
|
-
module REC
|
3
|
+
module REC # :nodoc:
|
4
4
|
|
5
5
|
# A Rule specifies which log entries to match, what to remember, and what to do about them.
|
6
6
|
class Rule
|
@@ -71,21 +71,38 @@ class Rule
|
|
71
71
|
# }) { |state|
|
72
72
|
# state.alert_first_only()
|
73
73
|
# }
|
74
|
-
|
74
|
+
def action()
|
75
|
+
@params[:action]
|
76
|
+
end
|
75
77
|
|
76
78
|
# Creates a new rule. +rid+ must be unique.
|
79
|
+
# +action+ may be supplied as a block argument:
|
80
|
+
# :lifespan => 479
|
81
|
+
# }) { |state|
|
82
|
+
# state.alert_first_only()
|
83
|
+
# }
|
84
|
+
# or as a Proc.new value to the :action key:
|
85
|
+
# :lifespan => 479,
|
86
|
+
# :action => Proc.new { |state|
|
87
|
+
# state.alert_first_only()
|
88
|
+
# }
|
89
|
+
# })
|
90
|
+
# In most cases, an action is required when a state reacts with an event, but
|
91
|
+
# only in certain cases do we need an :onexpiry block. That is why the action
|
92
|
+
# may be supplied as a block if desired.
|
93
|
+
#
|
77
94
|
def initialize(rid, params={}, &action)
|
78
95
|
@rid = rid
|
79
96
|
@pattern = params[:pattern] || raise("No pattern specified for rule #{@ruleId}")
|
80
97
|
@message = params[:message] || "" # no message means no state created - ie. ignore event
|
81
98
|
@lifespan = params[:lifespan] || 0
|
82
|
-
@alert = params[:alert] || @message
|
83
99
|
@allstates = params[:allstates] || []
|
84
100
|
@anystates = params[:anystates] || []
|
85
101
|
@notstates = params[:notstates] || []
|
86
102
|
@details = params[:details] || []
|
87
103
|
@params = params
|
88
|
-
@action = action
|
104
|
+
@params[:action] = action unless action.nil? # store the action into the params
|
105
|
+
@params[:alert] = @params[:message] unless @params.has_key?(:alert) # default alert if absent
|
89
106
|
@matches = nil
|
90
107
|
Rule << self
|
91
108
|
end
|
@@ -107,18 +124,18 @@ class Rule
|
|
107
124
|
end
|
108
125
|
|
109
126
|
# Creates a state with the given title at the specified time
|
110
|
-
def create_state(title
|
127
|
+
def create_state(title)
|
111
128
|
@@created[@rid] += 1
|
112
129
|
$stderr.puts("+ Creating new state #{title}") if $debug
|
113
|
-
State.new(title,
|
130
|
+
State.new(title, @lifespan, @params)
|
114
131
|
end
|
115
132
|
|
116
133
|
# Executes any action specified by the rule
|
117
|
-
def react(state,
|
134
|
+
def react(state, logLine)
|
118
135
|
@@reacted[@rid] += 1
|
119
|
-
state.update(
|
136
|
+
state.update(@rid, @matches, logLine)
|
120
137
|
$stderr.puts("~ Rule #{@rid}, state = #{state.inspect()}") if $debug
|
121
|
-
|
138
|
+
action().call(state) if action()
|
122
139
|
end
|
123
140
|
|
124
141
|
# Returns the +continue+ parameter. If false, stop processing rules for this event.
|
data/lib/rec/state.rb
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'time'
|
2
2
|
|
3
|
-
module REC
|
3
|
+
module REC # :nodoc:
|
4
4
|
|
5
5
|
# A State is an object that represents the memory of something having happened.
|
6
6
|
# For example, "server terra is down".
|
@@ -17,6 +17,22 @@ module REC
|
|
17
17
|
# of what has gone before. You cannot *correlate* events without keeping State.
|
18
18
|
class State
|
19
19
|
|
20
|
+
# shortcut action to generate a message on each event
|
21
|
+
Generate = Proc.new { |state|
|
22
|
+
state.generate()
|
23
|
+
}
|
24
|
+
|
25
|
+
# shortcut action to generate a message and release the state immediately
|
26
|
+
Generate_and_release = Proc.new { |state|
|
27
|
+
state.generate()
|
28
|
+
state.release()
|
29
|
+
}
|
30
|
+
|
31
|
+
# shortcut action to generate a message on first event only
|
32
|
+
Generate_first_only = Proc.new { |state|
|
33
|
+
state.generate_first_only()
|
34
|
+
}
|
35
|
+
|
20
36
|
# A array of Timeouts. A Timeout struct has two elements:
|
21
37
|
# - timestamp at which to expire
|
22
38
|
# - key of the state to be expired
|
@@ -49,10 +65,10 @@ class State
|
|
49
65
|
end
|
50
66
|
end
|
51
67
|
|
52
|
-
# Deletes all expired states
|
53
|
-
def self.expire_states(
|
68
|
+
# Deletes all expired states, executing :onexpiry blocks before deletion
|
69
|
+
def self.expire_states()
|
54
70
|
timeout = @@timeouts.first()
|
55
|
-
while @@timeouts.length > 0 and timeout.expiry <
|
71
|
+
while @@timeouts.length > 0 and timeout.expiry < Correlator.now() do
|
56
72
|
state = State[timeout.key]
|
57
73
|
if state.nil?
|
58
74
|
@@timeouts.shift
|
@@ -60,6 +76,8 @@ class State
|
|
60
76
|
next
|
61
77
|
end
|
62
78
|
#$stderr.puts("Releasing state #{state.title} with count of #{state.count}")
|
79
|
+
final = Rule[state.rid].params[:final]
|
80
|
+
final.call(state) if final
|
63
81
|
@@states.delete(@@timeouts.shift().key)
|
64
82
|
timeout = @@timeouts.first()
|
65
83
|
end
|
@@ -73,6 +91,18 @@ class State
|
|
73
91
|
[statesCount, @@eventsOut]
|
74
92
|
end
|
75
93
|
|
94
|
+
# Returns a matching state or nil.
|
95
|
+
#
|
96
|
+
# This is used to locate a state, typically the other state created by a pair of
|
97
|
+
# rules. For example, in a rule handling "Server earth is up" we want to find the state
|
98
|
+
# corresponding to "Server earth is down"
|
99
|
+
# - template is a string like "Server %host$s is down"
|
100
|
+
# - state is the current state
|
101
|
+
def self.find(template, state)
|
102
|
+
title = template.sprinth(state.params)
|
103
|
+
@@states[title]
|
104
|
+
end
|
105
|
+
|
76
106
|
# unique ID of the rule which gave rise to this state
|
77
107
|
attr_reader :rid
|
78
108
|
# The unique title for this state (eg. "server earth is down")
|
@@ -81,6 +111,8 @@ class State
|
|
81
111
|
attr_reader :lifespan
|
82
112
|
# An alert message to be sent if subsequent events warrant it
|
83
113
|
attr_reader :alert
|
114
|
+
# An alert message to be sent when the state expires
|
115
|
+
attr_reader :final
|
84
116
|
# Hash of parameters of the rule that created this state
|
85
117
|
attr_reader :params
|
86
118
|
# Number of times this state has been matched
|
@@ -89,31 +121,28 @@ class State
|
|
89
121
|
attr_reader :created
|
90
122
|
# last time this state was updated
|
91
123
|
attr_reader :updated
|
92
|
-
# age of this state
|
93
|
-
attr_reader :dur
|
94
124
|
# Hash of custom fields to be remembered
|
95
125
|
attr_reader :details
|
126
|
+
# Array of original log entries matching this state
|
127
|
+
attr_reader :logs
|
96
128
|
|
97
|
-
# Creates a new state with the given (unique) +title+,
|
129
|
+
# Creates a new state with the given (unique) +title+,
|
98
130
|
# the +lifespan+ before the state is forgotten, and a hash of parameters.
|
99
131
|
#
|
100
132
|
# Note that the time is not necessarily 'now' because REC can be executed against
|
101
133
|
# historical log files. It uses the timestamp of the original log entry, not the
|
102
134
|
# current clock time.
|
103
|
-
def initialize(title,
|
135
|
+
def initialize(title, lifespan, params={})
|
136
|
+
@created = @updated = Correlator.now()
|
104
137
|
@title = title
|
105
138
|
@lifespan = lifespan.to_f
|
106
139
|
@params = params
|
107
140
|
@count = 0
|
108
|
-
@created = time
|
109
|
-
@updated = time
|
110
|
-
@dur = 0
|
111
141
|
@rid = 0
|
112
|
-
@alert = ""
|
113
142
|
@logs = [] # array of remembered logLines
|
114
143
|
@details = {} # hash of remembered details
|
115
144
|
@@states[title] = self
|
116
|
-
State.timeout_at(
|
145
|
+
State.timeout_at(@created + @lifespan, @title)
|
117
146
|
end
|
118
147
|
|
119
148
|
# Updates the statistics of this state (following a match).
|
@@ -123,18 +152,21 @@ class State
|
|
123
152
|
# - age of this state
|
124
153
|
# - the ID of the rule which last matched
|
125
154
|
# - more details (custom fields) may be added to memory
|
126
|
-
# -
|
155
|
+
# - message templates (for values to be interpolated into)
|
127
156
|
# - a list of the original log entries pertaining to this state
|
128
|
-
def update(
|
157
|
+
def update(rid, matches, logLine=nil)
|
129
158
|
@count = @count.succ
|
130
|
-
@updated =
|
131
|
-
@dur = @updated - @created
|
159
|
+
@updated = Correlator.now()
|
132
160
|
@rid = rid
|
133
161
|
@details.merge!(matches)
|
134
|
-
@alert = alert
|
135
162
|
@logs << logLine if @params[:capture]
|
136
163
|
end
|
137
164
|
|
165
|
+
# Returns the age of this state
|
166
|
+
def age
|
167
|
+
Correlator.now() - @created
|
168
|
+
end
|
169
|
+
|
138
170
|
# Forget a state.
|
139
171
|
# - if no pattern is provided, forget this state.
|
140
172
|
# - if a pattern is provided, use the stats for this state to determine the title
|
@@ -145,35 +177,49 @@ class State
|
|
145
177
|
if pattern.nil?
|
146
178
|
@@states.delete(@title)
|
147
179
|
else
|
148
|
-
@@states.delete(pattern.sprinth(stats))
|
180
|
+
@@states.delete(pattern.sprinth(stats()))
|
149
181
|
end
|
150
182
|
end
|
151
183
|
|
184
|
+
# Resets the expiry time to be +dur+ seconds after current time (this may
|
185
|
+
# be shorter than the original lifetime)
|
186
|
+
def extend_for(dur)
|
187
|
+
n = @@timeouts.find_index { |to|
|
188
|
+
to.title == @title
|
189
|
+
}
|
190
|
+
@@timeouts[n..n] = [] unless n.nil?
|
191
|
+
expiry = Correlator.now() + dur
|
192
|
+
self.timeout_at(expiry, @title)
|
193
|
+
end
|
194
|
+
|
152
195
|
# Returns the details of the state (ie. whatever custom fields were defined, eg. userid)
|
153
196
|
# merged with the standard statistics:
|
154
197
|
# - count: number of matches so far
|
155
|
-
# -
|
198
|
+
# - age: age of the state
|
156
199
|
# - created: time when this state was created
|
157
200
|
# - updated: time last updated (eg. when the latest event matched this state)
|
158
201
|
def stats()
|
159
|
-
@details.merge({"count"=>@count, "
|
202
|
+
@details.merge({"count"=>@count, "age"=>age(), "created"=>@created, "updated"=>@updated})
|
160
203
|
end
|
161
204
|
|
162
205
|
# Creates a new event, writes it to the output log, and returns the event.
|
163
|
-
#
|
164
206
|
# An *event* (or 'log entry') is a timestamp followed by a *message*
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
207
|
+
#
|
208
|
+
# Returns the message only (without the timestamp).
|
209
|
+
def generate(sym = :alert)
|
210
|
+
message = @params[sym].sprinth(stats())
|
211
|
+
if message.length > 0
|
212
|
+
event = "%s %s" % [@created.iso8601, message] + @logs.join("\n")
|
213
|
+
print("> ") if $debug
|
214
|
+
puts(event)
|
215
|
+
@@eventsOut = @@eventsOut + 1
|
216
|
+
end
|
217
|
+
message
|
172
218
|
end
|
173
219
|
|
174
220
|
# Creates a new event when this state is created, but ignores later occurrences
|
175
|
-
def
|
176
|
-
|
221
|
+
def generate_first_only(sym = :alert)
|
222
|
+
generate(sym) if @count == 1
|
177
223
|
end
|
178
224
|
|
179
225
|
# Allow access to any parameter by a convenience method
|
data/lib/rec.rb
CHANGED
@@ -1,132 +1,12 @@
|
|
1
|
-
# Ruby Event Correlation
|
2
1
|
require 'string'
|
3
2
|
require 'rec/correlator'
|
4
|
-
require 'rec/
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
$ sudo gem install rec
|
13
|
-
|
14
|
-
2. Select a ruleset or create your own
|
15
|
-
#!/usr/bin/ruby
|
16
|
-
require 'rec'
|
17
|
-
include REC
|
18
|
-
require 'rulesets/postfix-rules'
|
19
|
-
Correlator::start()
|
20
|
-
|
21
|
-
3. Start it up
|
22
|
-
$ rulesets/rules.rb < /var/log/mail.log 3>missed.log 2>control.log > newevents.log
|
23
|
-
|
24
|
-
== Why correlate events?
|
25
|
-
We all know that we should read our log files. But reading log files is *really* boring,
|
26
|
-
and frankly its easy to miss important things in all the superfluous detail.
|
27
|
-
|
28
|
-
[Save time]
|
29
|
-
If you are lazy enough to not want to review all of your log files manually forever, and
|
30
|
-
smart enough to work out what needs monitoring and when you might want to pay attention,
|
31
|
-
then wouldn't it be good if you could define those rules and let the computer do what it
|
32
|
-
does best?
|
33
|
-
|
34
|
-
[Generate meaning]
|
35
|
-
The logs of many applications are filled with entries that are quite low level - perhaps
|
36
|
-
wonderful for debugging, but typically not terribly meaningful in terms of business.
|
37
|
-
Wouldn't it be good if we could summarise a bunch of low level events into a single
|
38
|
-
business event - and then just read the <em>business log</em>.
|
39
|
-
|
40
|
-
== Alternatives
|
41
|
-
There are several alternatives to REC which may suit your needs better:
|
42
|
-
* splunk[www.splunk.com]
|
43
|
-
* nagios[www.nagios.com]
|
44
|
-
* scalextreme.com[www.scalextreme.com]
|
45
|
-
While I like these options, I find they take a lot of configuring.
|
46
|
-
They also has some dependencies that make them a bit heavier than you may want.
|
47
|
-
If you just want to keep track of a few kinds of events, want a lot of flexibility
|
48
|
-
and control without too much effort, then REC may be of some value.
|
49
|
-
|
50
|
-
== How does REC work?
|
51
|
-
Each entry in a log file is an *event*.
|
52
|
-
The Correlator reads the events, and attempts to match an event against each Rule.
|
53
|
-
If an event matches a rule, the rule creates a State which just means we're remembering
|
54
|
-
that the event matched a rule. The pattern to match is a regexp, and the captured values
|
55
|
-
are named. For example
|
56
|
-
# log entry => "nfs: server earth not responding"
|
57
|
-
pattern => /nfs\: server (\w+) not responding/
|
58
|
-
details => ['host']
|
59
|
-
# values of interest are captured into a hash => {'host' => 'earth' }
|
60
|
-
:message => "Server %host$s is down"
|
61
|
-
# interpolation with named parameters => "Server earth is down"
|
62
|
-
|
63
|
-
A state has a fixed lifetime, set when it is created. At the end of its life, it may simply
|
64
|
-
expire quietly, or a pre-defined action may be executed. For example, if we find a server is down,
|
65
|
-
we may want to wait for 3 minutes and if it is not up again, then alert the administrator.
|
66
|
-
The server being down is a state, and two states are distinguished by their *titles*. For example,
|
67
|
-
"host earth is down" and "host terra is down".
|
68
|
-
|
69
|
-
Now that we're remembering a set of states, we can match events against not only the event's
|
70
|
-
message, but also other states. For example, we can match "host terra is up" against a previously
|
71
|
-
created state "host terra is down", and generate a new event "host terra is back up after 14 minutes".
|
72
|
-
We can also 'swallow' all of the rest of the "host terra is down" events because they add nothing new.
|
73
|
-
This <em>event compression</em> means the administrator gets one important message, and not 27
|
74
|
-
distracting alerts.
|
75
|
-
|
76
|
-
An Alert can be sent by email or IM, depending on your preferences and working hours.
|
77
|
-
The destinations and credentials are supplied to your ruleset:
|
78
|
-
# For better security, move the next few lines into a file readable only by
|
79
|
-
# the user running this script eg. /home/rec/alert.conf
|
80
|
-
# and then require that file
|
81
|
-
Alert.smtp_credentials("rec@gmail.com", "recret", "myfirm.com")
|
82
|
-
Alert.emailTo = "me@myfirm.com"
|
83
|
-
Alert.jabber_credentials("rec@gmail.com", "recret")
|
84
|
-
Alert.jabberTo = "me@myfirm.com"
|
85
|
-
|
86
|
-
Rules can then send an alert when desired. Two common cases involve alerting immediately
|
87
|
-
on the first event (eg. "host terra is down"), and alerting on expiry or at a subsequent event
|
88
|
-
(eg. "host terra is back up").
|
89
|
-
state.alert_first_only() # => generate a new event on first original event
|
90
|
-
# or
|
91
|
-
Alert.normal(state.alert_first_only()) # => log and also send the new event via email
|
92
|
-
|
93
|
-
In most cases, however, it is not necessary to alert the administrator at all. It is enough to
|
94
|
-
log the new event in the output logfile for later review.
|
95
|
-
|
96
|
-
== Examples
|
97
|
-
The best way to understand REC is to see how rules are written. The following examples were
|
98
|
-
inspired by SEC (simple-evcorr.sourceforge.net), so they employ the same names.
|
99
|
-
|
100
|
-
1. Warn if an user is having trouble executing sudo commands
|
101
|
-
The log entry (/var/log/secure) looks like this:
|
102
|
-
|
103
|
-
Sep 16 07:09:22 earth sudo: richard : 3 incorrect password attempts ;...
|
104
|
-
|
105
|
-
and the rule might look like this:
|
106
|
-
|
107
|
-
# single threshold rule
|
108
|
-
Rule.new(10034, {
|
109
|
-
:pattern => /\w+ sudo\: (\w+) \: 3 incorrect password attempts/,
|
110
|
-
:details => ["userid"],
|
111
|
-
:message => "Failed sudo password for user %userid$s",
|
112
|
-
:lifespan => 60,
|
113
|
-
:alert => "'Too much sudo activity' userid=%userid$s attempts=%count$d dur=%dur$0.3fs ",
|
114
|
-
:threshold => 3,
|
115
|
-
:capture => true
|
116
|
-
}) { |state|
|
117
|
-
if state.count == state.threshold
|
118
|
-
Alert.urgent(state.generate_alert())
|
119
|
-
state.release()
|
120
|
-
end
|
121
|
-
}
|
122
|
-
|
123
|
-
This rule matches the log entry against the *pattern*. Note that the captured value
|
124
|
-
<code>(\w+)</code> is stored as the +userid+. The +details+ parameter specifies the names
|
125
|
-
of the captured values in the sequence they appear in the pattern.
|
126
|
-
|
127
|
-
The *message* specifies the title of the state that is created. If there is no title
|
128
|
-
|
129
|
-
|
130
|
-
=end
|
3
|
+
require 'rec/notify'
|
4
|
+
|
5
|
+
# The REC module is a namespace containing:
|
6
|
+
# - REC::Correlator
|
7
|
+
# - REC::Rule
|
8
|
+
# - REC::State
|
9
|
+
# - REC::Alert
|
10
|
+
#
|
131
11
|
module REC
|
132
12
|
end
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 2
|
9
|
+
version: 1.0.2
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Richard Kernahan
|
@@ -14,17 +14,17 @@ autorequire:
|
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
16
|
|
17
|
-
date: 2012-09-
|
17
|
+
date: 2012-09-17 00:00:00 +10:00
|
18
18
|
default_executable:
|
19
19
|
dependencies: []
|
20
20
|
|
21
|
-
description: "\tSifts through your log files in real time, using stateful intelligence to determine\n\
|
22
|
-
\twhat is really important. REC can alert you (by email or IM) or it can simply condense\n\
|
23
|
-
\ta large log file into a much shorter and more meaningful log.\n\
|
24
|
-
\tREC is inspired by Risto Vaarandi's brilliant *sec* (simple-evcorr.sourceforge.net)\n\
|
25
|
-
\tbut is original code and any defects are entirely mine.\n\
|
26
|
-
\tWhile event correlation is inherently complex, REC attempts to make common tasks easy\n\
|
27
|
-
\twhile preserving plenty of power and flexibility for ambitious tasks.\n"
|
21
|
+
description: "\t\tSifts through your log files in real time, using stateful intelligence to determine\n\
|
22
|
+
\t\twhat is really important. REC can alert you (by email or IM) or it can simply condense\n\
|
23
|
+
\t\ta large log file into a much shorter and more meaningful log.\n\
|
24
|
+
\t\tREC is inspired by Risto Vaarandi's brilliant *sec* (simple-evcorr.sourceforge.net)\n\
|
25
|
+
\t\tbut is original code and any defects are entirely mine.\n\
|
26
|
+
\t\tWhile event correlation is inherently complex, REC attempts to make common tasks easy\n\
|
27
|
+
\t\twhile preserving plenty of power and flexibility for ambitious tasks.\n"
|
28
28
|
email: dev.rec@finalstep.com.au
|
29
29
|
executables: []
|
30
30
|
|
@@ -37,8 +37,8 @@ files:
|
|
37
37
|
- lib/rec/rule.rb
|
38
38
|
- lib/rec/state.rb
|
39
39
|
- lib/rec/correlator.rb
|
40
|
-
- lib/rec/
|
41
|
-
- lib/rec/mock-
|
40
|
+
- lib/rec/notify.rb
|
41
|
+
- lib/rec/mock-notify.rb
|
42
42
|
- lib/string.rb
|
43
43
|
has_rdoc: true
|
44
44
|
homepage: http://rubygems.org/gems/rec
|