rec 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/rec/alert.rb +44 -35
- data/lib/rec/correlator.rb +23 -3
- data/lib/rec/mock-alert.rb +1 -0
- data/lib/rec/rule.rb +51 -6
- data/lib/rec/state.rb +90 -4
- data/lib/rec.rb +128 -0
- data/lib/string.rb +7 -5
- metadata +3 -3
data/lib/rec/alert.rb
CHANGED
@@ -3,47 +3,53 @@ require 'net/smtp'
|
|
3
3
|
require 'xmpp4r'
|
4
4
|
|
5
5
|
module REC
|
6
|
+
# Provides the capability to send alerts
|
7
|
+
# == mail
|
8
|
+
# The simplest approach is to use the native +mail+ program (no credentials required)
|
9
|
+
# Alert.mail(alert)
|
10
|
+
#
|
11
|
+
# == email and jabber
|
12
|
+
# You can also send emails and instant messages via servers, but you'll need to provide
|
13
|
+
# credentials to do that.
|
14
|
+
# Alert.smtp_credentials(user, password, domain, server, port)
|
15
|
+
# Alert.jabber_credentials(user, password, server)
|
16
|
+
#
|
17
|
+
# Then you can send messages:
|
18
|
+
# Alert.email(alert)
|
19
|
+
# Alert.jabber(alert)
|
20
|
+
# or send messages to another recipient, with another subject
|
21
|
+
# Alert.email(alert, you@example.com, "Serious problem")
|
22
|
+
# Alert.jabber(alert, boss@example.com)
|
23
|
+
#
|
24
|
+
# == Sleeping
|
25
|
+
# If you want to avoid being sent instant messages during sleeping hours, you can
|
26
|
+
# specify a range of working hours during which urgent alerts may be sent by jabber
|
27
|
+
# and outside those working hours the alert will be sent by email instead
|
28
|
+
# Alert.workHours(9,18) # IMs only between 9am and 6pm
|
29
|
+
# Alert.urgent(alert) # sent as instant message if during work hours, else by email
|
30
|
+
# Alert.jabber(alert) # sent as instant message regardless of the time
|
31
|
+
# Alert.normal(alert) # sent by email
|
32
|
+
#
|
33
|
+
# == Securing credentials
|
34
|
+
# In order to keep email/messaging credentials secure, they can be provided in
|
35
|
+
# a file that is only readable by the user executing the rules
|
36
|
+
# so the rules script need not contain passwords.
|
37
|
+
# load("/home/rec/alert.conf")
|
38
|
+
# Loads the credentials into the rules. The file can contain something like:
|
39
|
+
# Alert.email_credentials("rec@gmail.com", "tricky", "mydomain.com")
|
40
|
+
# Alert.jabber_credentials("rec@gmail.com", "tricky")
|
41
|
+
# <code>/home/rec/alert.conf</code> should be readable only by the otherwise
|
42
|
+
# unprivileged user (sec) running the script.
|
43
|
+
#
|
44
|
+
# While we're on the topic of security, just a reminder that Alert::mail requires no credentials.
|
6
45
|
module Alert
|
7
46
|
|
8
|
-
#
|
9
|
-
# --mail--
|
10
|
-
# The simplest approach is to use the native +mail+ program (no credentials required)
|
11
|
-
# Alert.mail(alert)
|
12
|
-
#
|
13
|
-
# --email and jabber--
|
14
|
-
# You can also send emails and instant messages via servers, but you'll need to provide
|
15
|
-
# credentials to do that.
|
16
|
-
# - Alert.smtp_credentials(user, password, domain, server, port)
|
17
|
-
# - Alert.jabber_credentials(user, password, server)
|
18
|
-
#
|
19
|
-
# Then you can send messages:
|
20
|
-
# - Alert.email(alert)
|
21
|
-
# - Alert.jabber(alert)
|
22
|
-
# or send messages to another recipient, with another subject
|
23
|
-
# - Alert.email(alert, you@example.com, "Serious problem")
|
24
|
-
# - Alert.jabber(alert, boss@example.com)
|
25
|
-
#
|
26
|
-
# --Sleeping--
|
27
|
-
# If you want to avoid being sent instant messages during sleeping hours, you can
|
28
|
-
# specify a range of working hours during which urgent alerts may be sent by jabber
|
29
|
-
# and outside those working hours the alert will be sent by email instead
|
30
|
-
# Alert.workHours(9,18) # IMs only between 9am and 6pm
|
31
|
-
# Alert.urgent(alert) # sent as instant message if during work hours, else by email
|
32
|
-
# Alert.jabber(alert) # sent as instant message regardless of the time
|
33
|
-
# Alert.normal(alert) # sent by email
|
34
|
-
|
35
|
-
|
47
|
+
# Sets the default subject for alerts, overriding the system default of "Alert"
|
36
48
|
def self.default_subject(subject)
|
37
49
|
@@defaultSubject = subject
|
38
50
|
end
|
39
51
|
@@defaultSubject = "Alert"
|
40
52
|
|
41
|
-
#load("/home/rec/alert.conf") can contain something like this:
|
42
|
-
# Alert.email_credentials("rec@gmail.com", "tricky", "mydomain.com")
|
43
|
-
# Alert.jabber_credentials("rec@gmail.com", "tricky")
|
44
|
-
#so the rules script need not contain passwords.
|
45
|
-
#/home/rec/alert.conf should be readable only by the otherwise unprivileged user (sec)
|
46
|
-
# running the script
|
47
53
|
|
48
54
|
# provides the credentials needed for sending email
|
49
55
|
def self.smtp_credentials(user, password, domain, server="smtp.gmail.com", port=587)
|
@@ -61,10 +67,12 @@ module Alert
|
|
61
67
|
@@jabberServer = server
|
62
68
|
end
|
63
69
|
|
70
|
+
# sets the email address to receive alerts
|
64
71
|
def self.emailTo=(address)
|
65
72
|
@@emailTo = address
|
66
73
|
end
|
67
74
|
|
75
|
+
# sets the jabber address to receive alerts
|
68
76
|
def self.jabberTo=(address)
|
69
77
|
@@jabberTo = address
|
70
78
|
end
|
@@ -92,7 +100,7 @@ module Alert
|
|
92
100
|
end
|
93
101
|
|
94
102
|
# define the working hours during which instant messages are allowed
|
95
|
-
# Note that Alert.work_hours(7,21) means "7am-9pm" as you would
|
103
|
+
# Note that Alert.work_hours(7,21) means "7am-9pm" as you would assume, so from 07:00 to 20:59
|
96
104
|
def self.work_hours(start, finish)
|
97
105
|
@@workHours = start..finish
|
98
106
|
end
|
@@ -107,6 +115,7 @@ module Alert
|
|
107
115
|
end
|
108
116
|
end
|
109
117
|
|
118
|
+
# Alias for Alert::email
|
110
119
|
def self.normal(alert)
|
111
120
|
self.email(alert)
|
112
121
|
end
|
data/lib/rec/correlator.rb
CHANGED
@@ -1,22 +1,33 @@
|
|
1
1
|
require 'rec/rule'
|
2
2
|
|
3
3
|
module REC
|
4
|
+
|
5
|
+
# The Correlator reads in log entries, matching them against the ruleset,
|
6
|
+
# creates states as necessary, generates new (correlated) events, and sends alerts.
|
4
7
|
class Correlator
|
5
8
|
|
6
9
|
@@eventsIn = 0
|
7
10
|
@@eventsMissed = 0
|
8
11
|
|
12
|
+
# Convenience method to create and start a correlator. Possible options are:
|
13
|
+
# - :debug => true
|
9
14
|
def self.start(opts={})
|
10
15
|
$debug = opts[:debug] || false
|
11
16
|
self.new().start()
|
12
17
|
end
|
13
18
|
|
19
|
+
# Create a new Correlator
|
14
20
|
def initialize()
|
15
21
|
@time = @startupTime = Time.now()
|
16
22
|
@year = @startupTime.year
|
17
23
|
@running = false
|
18
24
|
end
|
19
25
|
|
26
|
+
# Start a Correlator. +INT+ and +TERM+ signals will stop the Correlator.
|
27
|
+
# +USR1+ signal will cause Correlator to display statistics and continue running.
|
28
|
+
#
|
29
|
+
# Missed events are written to IO Stream 3, in case they are of interest
|
30
|
+
# (typically while testing rulesets)
|
20
31
|
def start()
|
21
32
|
Signal.trap("INT") { finish() }
|
22
33
|
Signal.trap("TERM") { finish() }
|
@@ -24,7 +35,7 @@ class Correlator
|
|
24
35
|
stats()
|
25
36
|
run()
|
26
37
|
}
|
27
|
-
$stderr.puts("
|
38
|
+
$stderr.puts("rec is starting...")
|
28
39
|
begin
|
29
40
|
$miss = IO.open(3, "a") # for missed events
|
30
41
|
rescue
|
@@ -34,6 +45,9 @@ class Correlator
|
|
34
45
|
run()
|
35
46
|
end
|
36
47
|
|
48
|
+
# reads the next input log entry, parses it into a timestamp and a message,
|
49
|
+
# and checks the message against the ruleset. Continuously loops until
|
50
|
+
# the log input stream is closed, or interrupted by a signal
|
37
51
|
def run()
|
38
52
|
while @running and !$stdin.eof? do
|
39
53
|
logLine = gets()
|
@@ -63,15 +77,21 @@ class Correlator
|
|
63
77
|
finish() if $stdin.eof?
|
64
78
|
end
|
65
79
|
|
80
|
+
# Stop correlating, close IO streams and exit.
|
66
81
|
def finish()
|
67
82
|
@running = false
|
68
83
|
$miss.close() unless $miss.nil? or $miss.closed?
|
69
84
|
# NOTE: some states may have something useful to say, or maybe we could store them
|
70
85
|
stats()
|
71
|
-
$stderr.puts("
|
86
|
+
$stderr.puts("rec is finished.")
|
72
87
|
exit 0
|
73
88
|
end
|
74
89
|
|
90
|
+
# Parses a log entry into a timestamp and a message. Handles formats like:
|
91
|
+
# Apr 22 16:40:18 aqua Firewall[205]: ...
|
92
|
+
# [err] Fri Dec 30 23:58:56 2011 - scan error: ...
|
93
|
+
# 2012-04-22 08:43:22.099 EST - Module: ...
|
94
|
+
# otherwise time stands still.
|
75
95
|
def parse(logLine)
|
76
96
|
if logLine =~ /^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d+)\s(\d\d)\:(\d\d)\:(\d\d)/
|
77
97
|
# Apr 22 16:40:18 aqua Firewall[205]: Skype is listening from 0.0.0.0:51304 proto=6
|
@@ -96,8 +116,8 @@ class Correlator
|
|
96
116
|
[time, message]
|
97
117
|
end
|
98
118
|
|
119
|
+
# Reports statistics to stderr
|
99
120
|
def stats()
|
100
|
-
# report statistics to stderr
|
101
121
|
checked, matched, created, reacted, rules = Rule.stats()
|
102
122
|
statesCount, eventsOut = State.stats()
|
103
123
|
$stderr.puts("-"*40)
|
data/lib/rec/mock-alert.rb
CHANGED
data/lib/rec/rule.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
require 'rec/state'
|
2
2
|
|
3
3
|
module REC
|
4
|
+
|
5
|
+
# A Rule specifies which log entries to match, what to remember, and what to do about them.
|
4
6
|
class Rule
|
5
7
|
|
6
8
|
@@rules = []
|
7
|
-
|
8
|
-
@@
|
9
|
-
@@
|
10
|
-
@@
|
11
|
-
@@
|
9
|
+
# hash index of rules to allow lookup of messages etc.
|
10
|
+
@@index = {}
|
11
|
+
@@checked = {}
|
12
|
+
@@matched = {}
|
13
|
+
@@created = {}
|
14
|
+
@@reacted = {}
|
12
15
|
|
16
|
+
# Convenience method to iterate through the ruleset
|
13
17
|
def self.each(&block)
|
14
18
|
@@rules.each(&block)
|
15
19
|
end
|
16
20
|
|
21
|
+
# Adds a rule to the ruleset
|
17
22
|
def self.<<(rule)
|
18
23
|
@@rules << rule
|
19
24
|
@@index[rule.rid] = rule
|
@@ -23,16 +28,52 @@ class Rule
|
|
23
28
|
@@reacted[rule.rid] = 0
|
24
29
|
end
|
25
30
|
|
31
|
+
# Get a rule belonging to the key of +rid+
|
26
32
|
def self.[](rid)
|
27
33
|
@@index[rid]
|
28
34
|
end
|
29
35
|
|
36
|
+
# Returns some summary statistics in a 5-element array, the first four elements
|
37
|
+
# a hash keyed on rule ID, the fifth is an array of rules:
|
38
|
+
# 1. number of times each rule was checked
|
39
|
+
# 2. number of times each rule was matched
|
40
|
+
# 3. number of states created by each rule
|
41
|
+
# 4. number of times #react was called on each rule
|
42
|
+
# 5. list of rules, evaluated in sequence for each event
|
30
43
|
def self.stats()
|
31
44
|
[@@checked, @@matched, @@created, @@reacted, @@rules]
|
32
45
|
end
|
33
46
|
|
34
|
-
|
47
|
+
# the unique ID of the rule
|
48
|
+
attr_reader :rid
|
49
|
+
# the regexp pattern to match an original log entry against
|
50
|
+
# :pattern => /^\s\w+\sFirewall\[\d+\]\:\sSkype is listening from 0.0.0.0:(\d+)/,
|
51
|
+
# :details => ["port"],
|
52
|
+
# Note that regexp captures must correspond to customer field names in +details+
|
53
|
+
attr_reader :pattern
|
54
|
+
# the template for the title of any state created.
|
55
|
+
# :message => "sudo activity for user %userid$s",
|
56
|
+
# will create states with titles like "sudo activity for user richard"
|
57
|
+
attr_reader :message
|
58
|
+
# the time in seconds for a created state to persist
|
59
|
+
attr_reader :lifespan
|
60
|
+
# the template for an alert message to be generated should it be necessary
|
61
|
+
attr_reader :alert
|
62
|
+
# hash of the rules parameters, passed in when creating the rule
|
63
|
+
attr_reader :params
|
64
|
+
# block to be executed when #react is called. For example:
|
65
|
+
# Rule.new(10035, {
|
66
|
+
# :pattern => /^\s\w+\sFirewall\[\d+\]\:\sSkype is listening from 0.0.0.0:(\d+)/,
|
67
|
+
# :details => ["port"],
|
68
|
+
# :message => "Skype conversation started on port %port$d",
|
69
|
+
# :alert => "Skype running on port %port$d",
|
70
|
+
# :lifespan => 479
|
71
|
+
# }) { |state|
|
72
|
+
# state.alert_first_only()
|
73
|
+
# }
|
74
|
+
attr_reader :action
|
35
75
|
|
76
|
+
# Creates a new rule. +rid+ must be unique.
|
36
77
|
def initialize(rid, params={}, &action)
|
37
78
|
@rid = rid
|
38
79
|
@pattern = params[:pattern] || raise("No pattern specified for rule #{@ruleId}")
|
@@ -49,6 +90,7 @@ class Rule
|
|
49
90
|
Rule << self
|
50
91
|
end
|
51
92
|
|
93
|
+
# Checks the original +logMessage+ against the rule, looking for a match.
|
52
94
|
def check(logMessage)
|
53
95
|
@@checked[@rid] += 1
|
54
96
|
matchData = @pattern.match(logMessage) || return
|
@@ -64,12 +106,14 @@ class Rule
|
|
64
106
|
return(title)
|
65
107
|
end
|
66
108
|
|
109
|
+
# Creates a state with the given title at the specified time
|
67
110
|
def create_state(title, time)
|
68
111
|
@@created[@rid] += 1
|
69
112
|
$stderr.puts("+ Creating new state #{title}") if $debug
|
70
113
|
State.new(title, time, @lifespan, @params)
|
71
114
|
end
|
72
115
|
|
116
|
+
# Executes any action specified by the rule
|
73
117
|
def react(state, time, logLine)
|
74
118
|
@@reacted[@rid] += 1
|
75
119
|
state.update(time, @rid, @matches, @alert, logLine)
|
@@ -77,6 +121,7 @@ class Rule
|
|
77
121
|
@action.call(state) if @action
|
78
122
|
end
|
79
123
|
|
124
|
+
# Returns the +continue+ parameter. If false, stop processing rules for this event.
|
80
125
|
def continue()
|
81
126
|
@params[:continue]
|
82
127
|
end
|
data/lib/rec/state.rb
CHANGED
@@ -1,18 +1,42 @@
|
|
1
1
|
require 'time'
|
2
2
|
|
3
3
|
module REC
|
4
|
+
|
5
|
+
# A State is an object that represents the memory of something having happened.
|
6
|
+
# For example, "server terra is down".
|
7
|
+
# It also remembers useful statistics about what caused this state to be (the
|
8
|
+
# original log entries and the rule they matched),
|
9
|
+
# for how long it should remain in memory, what it pertains to (eg. the server
|
10
|
+
# called 'terra').
|
11
|
+
#
|
12
|
+
# A state is also useful for other rules to refer to. For example, a second rule
|
13
|
+
# matching "host terra is up" can check if the server is currently down by reference
|
14
|
+
# to the state with a title of "host terra is down".
|
15
|
+
#
|
16
|
+
# This is much more useful than matching log entries one by one without any memory
|
17
|
+
# of what has gone before. You cannot *correlate* events without keeping State.
|
4
18
|
class State
|
5
19
|
|
20
|
+
# A array of Timeouts. A Timeout struct has two elements:
|
21
|
+
# - timestamp at which to expire
|
22
|
+
# - key of the state to be expired
|
6
23
|
@@timeouts = []
|
24
|
+
|
25
|
+
# A hash of states, keyed on state title
|
7
26
|
@@states = {}
|
27
|
+
|
28
|
+
# A count of new events sent to output
|
8
29
|
@@eventsOut = 0
|
9
30
|
|
10
31
|
Struct.new("Timeout", :expiry, :key)
|
11
32
|
|
33
|
+
# Returns the state matching the given key (title)
|
12
34
|
def self.[](key)
|
13
35
|
@@states[key]
|
14
36
|
end
|
15
37
|
|
38
|
+
# Add a Timeout for the given time and specified state. This timeout is sorted
|
39
|
+
# into the correct sequence of timeouts to make State::expire_states more efficient.
|
16
40
|
def self.timeout_at(time, title)
|
17
41
|
tnew = Struct::Timeout.new(time, title)
|
18
42
|
n = @@timeouts.find_index { |to|
|
@@ -25,6 +49,7 @@ class State
|
|
25
49
|
end
|
26
50
|
end
|
27
51
|
|
52
|
+
# Deletes all expired states
|
28
53
|
def self.expire_states(time)
|
29
54
|
timeout = @@timeouts.first()
|
30
55
|
while @@timeouts.length > 0 and timeout.expiry < time do
|
@@ -40,13 +65,41 @@ class State
|
|
40
65
|
end
|
41
66
|
end
|
42
67
|
|
68
|
+
# Returns a 2-element array containing:
|
69
|
+
# - the number of states
|
70
|
+
# - the number of new events sent to output
|
43
71
|
def self.stats()
|
44
72
|
statesCount = @@states.keys.length
|
45
73
|
[statesCount, @@eventsOut]
|
46
74
|
end
|
47
75
|
|
48
|
-
|
76
|
+
# unique ID of the rule which gave rise to this state
|
77
|
+
attr_reader :rid
|
78
|
+
# The unique title for this state (eg. "server earth is down")
|
79
|
+
attr_reader :title
|
80
|
+
# How long this state shoudl live before being automatically forgotten
|
81
|
+
attr_reader :lifespan
|
82
|
+
# An alert message to be sent if subsequent events warrant it
|
83
|
+
attr_reader :alert
|
84
|
+
# Hash of parameters of the rule that created this state
|
85
|
+
attr_reader :params
|
86
|
+
# Number of times this state has been matched
|
87
|
+
attr_reader :count
|
88
|
+
# time when this state was created
|
89
|
+
attr_reader :created
|
90
|
+
# last time this state was updated
|
91
|
+
attr_reader :updated
|
92
|
+
# age of this state
|
93
|
+
attr_reader :dur
|
94
|
+
# Hash of custom fields to be remembered
|
95
|
+
attr_reader :details
|
49
96
|
|
97
|
+
# Creates a new state with the given (unique) +title+, the +time+ of creation,
|
98
|
+
# the +lifespan+ before the state is forgotten, and a hash of parameters.
|
99
|
+
#
|
100
|
+
# Note that the time is not necessarily 'now' because REC can be executed against
|
101
|
+
# historical log files. It uses the timestamp of the original log entry, not the
|
102
|
+
# current clock time.
|
50
103
|
def initialize(title, time, lifespan, params={})
|
51
104
|
@title = title
|
52
105
|
@lifespan = lifespan.to_f
|
@@ -63,6 +116,15 @@ class State
|
|
63
116
|
State.timeout_at(time + @lifespan, @title)
|
64
117
|
end
|
65
118
|
|
119
|
+
# Updates the statistics of this state (following a match).
|
120
|
+
# Remembers lots of useful things:
|
121
|
+
# - the number of matches so far
|
122
|
+
# - time last updated
|
123
|
+
# - age of this state
|
124
|
+
# - the ID of the rule which last matched
|
125
|
+
# - more details (custom fields) may be added to memory
|
126
|
+
# - the alert message
|
127
|
+
# - a list of the original log entries pertaining to this state
|
66
128
|
def update(time, rid, matches, alert, logLine=nil)
|
67
129
|
@count = @count.succ
|
68
130
|
@updated = time
|
@@ -73,14 +135,33 @@ class State
|
|
73
135
|
@logs << logLine if @params[:capture]
|
74
136
|
end
|
75
137
|
|
76
|
-
|
77
|
-
|
138
|
+
# Forget a state.
|
139
|
+
# - if no pattern is provided, forget this state.
|
140
|
+
# - if a pattern is provided, use the stats for this state to determine the title
|
141
|
+
# of the other state, and remove that from memory.
|
142
|
+
# For example, if the server is back up again, we no longer need to remember that
|
143
|
+
# the server was down.
|
144
|
+
def release(pattern=nil)
|
145
|
+
if pattern.nil?
|
146
|
+
@@states.delete(@title)
|
147
|
+
else
|
148
|
+
@@states.delete(pattern.sprinth(stats))
|
149
|
+
end
|
78
150
|
end
|
79
151
|
|
152
|
+
# Returns the details of the state (ie. whatever custom fields were defined, eg. userid)
|
153
|
+
# merged with the standard statistics:
|
154
|
+
# - count: number of matches so far
|
155
|
+
# - dur: age of the state
|
156
|
+
# - created: time when this state was created
|
157
|
+
# - updated: time last updated (eg. when the latest event matched this state)
|
80
158
|
def stats()
|
81
159
|
@details.merge({"count"=>@count, "dur"=>@dur, "created"=>@created, "updated"=>@updated})
|
82
160
|
end
|
83
161
|
|
162
|
+
# Creates a new event, writes it to the output log, and returns the event.
|
163
|
+
#
|
164
|
+
# An *event* (or 'log entry') is a timestamp followed by a *message*
|
84
165
|
def generate_alert()
|
85
166
|
message = @alert.sprinth(stats())
|
86
167
|
event = "%s %s" % [@created.iso8601, message] + @logs.join("\n")
|
@@ -89,11 +170,16 @@ class State
|
|
89
170
|
@@eventsOut = @@eventsOut + 1
|
90
171
|
event
|
91
172
|
end
|
92
|
-
|
173
|
+
|
174
|
+
# Creates a new event when this state is created, but ignores later occurrences
|
93
175
|
def alert_first_only()
|
94
176
|
generate_alert() if @count == 1
|
95
177
|
end
|
96
178
|
|
179
|
+
# Allow access to any parameter by a convenience method
|
180
|
+
# state.capture
|
181
|
+
# is more succinct than
|
182
|
+
# state.params['capture']
|
97
183
|
def method_missing(symbol, *args)
|
98
184
|
@params[symbol]
|
99
185
|
end
|
data/lib/rec.rb
CHANGED
@@ -2,3 +2,131 @@
|
|
2
2
|
require 'string'
|
3
3
|
require 'rec/correlator'
|
4
4
|
require 'rec/alert'
|
5
|
+
|
6
|
+
=begin rdoc
|
7
|
+
= Ruby Event Correlation
|
8
|
+
REC correlates events in order to generate a smaller set of more meaningful events.
|
9
|
+
|
10
|
+
== Installation
|
11
|
+
1. Install the gem
|
12
|
+
$ sudo gem install rec
|
13
|
+
|
14
|
+
2. Select a ruleset or create your own
|
15
|
+
#!/usr/bin/ruby
|
16
|
+
require 'rec'
|
17
|
+
include REC
|
18
|
+
require 'rulesets/postfix-rules'
|
19
|
+
Correlator::start()
|
20
|
+
|
21
|
+
3. Start it up
|
22
|
+
$ rulesets/rules.rb < /var/log/mail.log 3>missed.log 2>control.log > newevents.log
|
23
|
+
|
24
|
+
== Why correlate events?
|
25
|
+
We all know that we should read our log files. But reading log files is *really* boring,
|
26
|
+
and frankly its easy to miss important things in all the superfluous detail.
|
27
|
+
|
28
|
+
[Save time]
|
29
|
+
If you are lazy enough to not want to review all of your log files manually forever, and
|
30
|
+
smart enough to work out what needs monitoring and when you might want to pay attention,
|
31
|
+
then wouldn't it be good if you could define those rules and let the computer do what it
|
32
|
+
does best?
|
33
|
+
|
34
|
+
[Generate meaning]
|
35
|
+
The logs of many applications are filled with entries that are quite low level - perhaps
|
36
|
+
wonderful for debugging, but typically not terribly meaningful in terms of business.
|
37
|
+
Wouldn't it be good if we could summarise a bunch of low level events into a single
|
38
|
+
business event - and then just read the <em>business log</em>.
|
39
|
+
|
40
|
+
== Alternatives
|
41
|
+
There are several alternatives to REC which may suit your needs better:
|
42
|
+
* splunk[www.splunk.com]
|
43
|
+
* nagios[www.nagios.com]
|
44
|
+
* scalextreme.com[www.scalextreme.com]
|
45
|
+
While I like these options, I find they take a lot of configuring.
|
46
|
+
They also has some dependencies that make them a bit heavier than you may want.
|
47
|
+
If you just want to keep track of a few kinds of events, want a lot of flexibility
|
48
|
+
and control without too much effort, then REC may be of some value.
|
49
|
+
|
50
|
+
== How does REC work?
|
51
|
+
Each entry in a log file is an *event*.
|
52
|
+
The Correlator reads the events, and attempts to match an event against each Rule.
|
53
|
+
If an event matches a rule, the rule creates a State which just means we're remembering
|
54
|
+
that the event matched a rule. The pattern to match is a regexp, and the captured values
|
55
|
+
are named. For example
|
56
|
+
# log entry => "nfs: server earth not responding"
|
57
|
+
pattern => /nfs\: server (\w+) not responding/
|
58
|
+
details => ['host']
|
59
|
+
# values of interest are captured into a hash => {'host' => 'earth' }
|
60
|
+
:message => "Server %host$s is down"
|
61
|
+
# interpolation with named parameters => "Server earth is down"
|
62
|
+
|
63
|
+
A state has a fixed lifetime, set when it is created. At the end of its life, it may simply
|
64
|
+
expire quietly, or a pre-defined action may be executed. For example, if we find a server is down,
|
65
|
+
we may want to wait for 3 minutes and if it is not up again, then alert the administrator.
|
66
|
+
The server being down is a state, and two states are distinguished by their *titles*. For example,
|
67
|
+
"host earth is down" and "host terra is down".
|
68
|
+
|
69
|
+
Now that we're remembering a set of states, we can match events against not only the event's
|
70
|
+
message, but also other states. For example, we can match "host terra is up" against a previously
|
71
|
+
created state "host terra is down", and generate a new event "host terra is back up after 14 minutes".
|
72
|
+
We can also 'swallow' all of the rest of the "host terra is down" events because they add nothing new.
|
73
|
+
This <em>event compression</em> means the administrator gets one important message, and not 27
|
74
|
+
distracting alerts.
|
75
|
+
|
76
|
+
An Alert can be sent by email or IM, depending on your preferences and working hours.
|
77
|
+
The destinations and credentials are supplied to your ruleset:
|
78
|
+
# For better security, move the next few lines into a file readable only by
|
79
|
+
# the user running this script eg. /home/rec/alert.conf
|
80
|
+
# and then require that file
|
81
|
+
Alert.smtp_credentials("rec@gmail.com", "recret", "myfirm.com")
|
82
|
+
Alert.emailTo = "me@myfirm.com"
|
83
|
+
Alert.jabber_credentials("rec@gmail.com", "recret")
|
84
|
+
Alert.jabberTo = "me@myfirm.com"
|
85
|
+
|
86
|
+
Rules can then send an alert when desired. Two common cases involve alerting immediately
|
87
|
+
on the first event (eg. "host terra is down"), and alerting on expiry or at a subsequent event
|
88
|
+
(eg. "host terra is back up").
|
89
|
+
state.alert_first_only() # => generate a new event on first original event
|
90
|
+
# or
|
91
|
+
Alert.normal(state.alert_first_only()) # => log and also send the new event via email
|
92
|
+
|
93
|
+
In most cases, however, it is not necessary to alert the administrator at all. It is enough to
|
94
|
+
log the new event in the output logfile for later review.
|
95
|
+
|
96
|
+
== Examples
|
97
|
+
The best way to understand REC is to see how rules are written. The following examples were
|
98
|
+
inspired by SEC (simple-evcorr.sourceforge.net), so they employ the same names.
|
99
|
+
|
100
|
+
1. Warn if an user is having trouble executing sudo commands
|
101
|
+
The log entry (/var/log/secure) looks like this:
|
102
|
+
|
103
|
+
Sep 16 07:09:22 earth sudo: richard : 3 incorrect password attempts ;...
|
104
|
+
|
105
|
+
and the rule might look like this:
|
106
|
+
|
107
|
+
# single threshold rule
|
108
|
+
Rule.new(10034, {
|
109
|
+
:pattern => /\w+ sudo\: (\w+) \: 3 incorrect password attempts/,
|
110
|
+
:details => ["userid"],
|
111
|
+
:message => "Failed sudo password for user %userid$s",
|
112
|
+
:lifespan => 60,
|
113
|
+
:alert => "'Too much sudo activity' userid=%userid$s attempts=%count$d dur=%dur$0.3fs ",
|
114
|
+
:threshold => 3,
|
115
|
+
:capture => true
|
116
|
+
}) { |state|
|
117
|
+
if state.count == state.threshold
|
118
|
+
Alert.urgent(state.generate_alert())
|
119
|
+
state.release()
|
120
|
+
end
|
121
|
+
}
|
122
|
+
|
123
|
+
This rule matches the log entry against the *pattern*. Note that the captured value
|
124
|
+
<code>(\w+)</code> is stored as the +userid+. The +details+ parameter specifies the names
|
125
|
+
of the captured values in the sequence they appear in the pattern.
|
126
|
+
|
127
|
+
The *message* specifies the title of the state that is created. If there is no title
|
128
|
+
|
129
|
+
|
130
|
+
=end
|
131
|
+
module REC
|
132
|
+
end
|
data/lib/string.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
# Custom extension to String
|
1
2
|
class String
|
2
|
-
#
|
3
|
-
#
|
4
|
-
# '
|
5
|
-
#
|
6
|
-
#
|
3
|
+
# Interpolates hash values into a formatted string.
|
4
|
+
# s = "Stats uid %uid$-5d belongs to %userid$s"
|
5
|
+
# h = {'uid': 501, 'userid': 'richard}
|
6
|
+
# s.sprinth(h)
|
7
|
+
# # intermediate => "Stats uid %-5d belongs to %s" % [501, 'richard']
|
8
|
+
# # finally => "Stats uid 501 belongs to richard"
|
7
9
|
def sprinth(hash={})
|
8
10
|
raise ArgumentError.new("sprinth argument must be a Hash") unless hash.is_a?(Hash)
|
9
11
|
self.gsub(/\%\w+\$/,"%") % self.scan(/\%\w+\$/).collect { |token| hash[token[1..-2]] }
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 1
|
9
|
+
version: 1.0.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Richard Kernahan
|
@@ -25,7 +25,7 @@ description: "\tSifts through your log files in real time, using stateful intell
|
|
25
25
|
\tbut is original code and any defects are entirely mine.\n\
|
26
26
|
\tWhile event correlation is inherently complex, REC attempts to make common tasks easy\n\
|
27
27
|
\twhile preserving plenty of power and flexibility for ambitious tasks.\n"
|
28
|
-
email: rec@finalstep.com.au
|
28
|
+
email: dev.rec@finalstep.com.au
|
29
29
|
executables: []
|
30
30
|
|
31
31
|
extensions: []
|