rec 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/lib/rec/alert.rb +44 -35
- data/lib/rec/correlator.rb +23 -3
- data/lib/rec/mock-alert.rb +1 -0
- data/lib/rec/rule.rb +51 -6
- data/lib/rec/state.rb +90 -4
- data/lib/rec.rb +128 -0
- data/lib/string.rb +7 -5
- metadata +3 -3
data/lib/rec/alert.rb
CHANGED
@@ -3,47 +3,53 @@ require 'net/smtp'
|
|
3
3
|
require 'xmpp4r'
|
4
4
|
|
5
5
|
module REC
|
6
|
+
# Provides the capability to send alerts
|
7
|
+
# == mail
|
8
|
+
# The simplest approach is to use the native +mail+ program (no credentials required)
|
9
|
+
# Alert.mail(alert)
|
10
|
+
#
|
11
|
+
# == email and jabber
|
12
|
+
# You can also send emails and instant messages via servers, but you'll need to provide
|
13
|
+
# credentials to do that.
|
14
|
+
# Alert.smtp_credentials(user, password, domain, server, port)
|
15
|
+
# Alert.jabber_credentials(user, password, server)
|
16
|
+
#
|
17
|
+
# Then you can send messages:
|
18
|
+
# Alert.email(alert)
|
19
|
+
# Alert.jabber(alert)
|
20
|
+
# or send messages to another recipient, with another subject
|
21
|
+
# Alert.email(alert, you@example.com, "Serious problem")
|
22
|
+
# Alert.jabber(alert, boss@example.com)
|
23
|
+
#
|
24
|
+
# == Sleeping
|
25
|
+
# If you want to avoid being sent instant messages during sleeping hours, you can
|
26
|
+
# specify a range of working hours during which urgent alerts may be sent by jabber
|
27
|
+
# and outside those working hours the alert will be sent by email instead
|
28
|
+
# Alert.workHours(9,18) # IMs only between 9am and 6pm
|
29
|
+
# Alert.urgent(alert) # sent as instant message if during work hours, else by email
|
30
|
+
# Alert.jabber(alert) # sent as instant message regardless of the time
|
31
|
+
# Alert.normal(alert) # sent by email
|
32
|
+
#
|
33
|
+
# == Securing credentials
|
34
|
+
# In order to keep email/messaging credentials secure, they can be provided in
|
35
|
+
# a file that is only readable by the user executing the rules
|
36
|
+
# so the rules script need not contain passwords.
|
37
|
+
# load("/home/rec/alert.conf")
|
38
|
+
# Loads the credentials into the rules. The file can contain something like:
|
39
|
+
# Alert.email_credentials("rec@gmail.com", "tricky", "mydomain.com")
|
40
|
+
# Alert.jabber_credentials("rec@gmail.com", "tricky")
|
41
|
+
# <code>/home/rec/alert.conf</code> should be readable only by the otherwise
|
42
|
+
# unprivileged user (sec) running the script.
|
43
|
+
#
|
44
|
+
# While we're on the topic of security, just a reminder that Alert::mail requires no credentials.
|
6
45
|
module Alert
|
7
46
|
|
8
|
-
#
|
9
|
-
# --mail--
|
10
|
-
# The simplest approach is to use the native +mail+ program (no credentials required)
|
11
|
-
# Alert.mail(alert)
|
12
|
-
#
|
13
|
-
# --email and jabber--
|
14
|
-
# You can also send emails and instant messages via servers, but you'll need to provide
|
15
|
-
# credentials to do that.
|
16
|
-
# - Alert.smtp_credentials(user, password, domain, server, port)
|
17
|
-
# - Alert.jabber_credentials(user, password, server)
|
18
|
-
#
|
19
|
-
# Then you can send messages:
|
20
|
-
# - Alert.email(alert)
|
21
|
-
# - Alert.jabber(alert)
|
22
|
-
# or send messages to another recipient, with another subject
|
23
|
-
# - Alert.email(alert, you@example.com, "Serious problem")
|
24
|
-
# - Alert.jabber(alert, boss@example.com)
|
25
|
-
#
|
26
|
-
# --Sleeping--
|
27
|
-
# If you want to avoid being sent instant messages during sleeping hours, you can
|
28
|
-
# specify a range of working hours during which urgent alerts may be sent by jabber
|
29
|
-
# and outside those working hours the alert will be sent by email instead
|
30
|
-
# Alert.workHours(9,18) # IMs only between 9am and 6pm
|
31
|
-
# Alert.urgent(alert) # sent as instant message if during work hours, else by email
|
32
|
-
# Alert.jabber(alert) # sent as instant message regardless of the time
|
33
|
-
# Alert.normal(alert) # sent by email
|
34
|
-
|
35
|
-
|
47
|
+
# Sets the default subject for alerts, overriding the system default of "Alert"
|
36
48
|
def self.default_subject(subject)
|
37
49
|
@@defaultSubject = subject
|
38
50
|
end
|
39
51
|
@@defaultSubject = "Alert"
|
40
52
|
|
41
|
-
#load("/home/rec/alert.conf") can contain something like this:
|
42
|
-
# Alert.email_credentials("rec@gmail.com", "tricky", "mydomain.com")
|
43
|
-
# Alert.jabber_credentials("rec@gmail.com", "tricky")
|
44
|
-
#so the rules script need not contain passwords.
|
45
|
-
#/home/rec/alert.conf should be readable only by the otherwise unprivileged user (sec)
|
46
|
-
# running the script
|
47
53
|
|
48
54
|
# provides the credentials needed for sending email
|
49
55
|
def self.smtp_credentials(user, password, domain, server="smtp.gmail.com", port=587)
|
@@ -61,10 +67,12 @@ module Alert
|
|
61
67
|
@@jabberServer = server
|
62
68
|
end
|
63
69
|
|
70
|
+
# sets the email address to receive alerts
|
64
71
|
def self.emailTo=(address)
|
65
72
|
@@emailTo = address
|
66
73
|
end
|
67
74
|
|
75
|
+
# sets the jabber address to receive alerts
|
68
76
|
def self.jabberTo=(address)
|
69
77
|
@@jabberTo = address
|
70
78
|
end
|
@@ -92,7 +100,7 @@ module Alert
|
|
92
100
|
end
|
93
101
|
|
94
102
|
# define the working hours during which instant messages are allowed
|
95
|
-
# Note that Alert.work_hours(7,21) means "7am-9pm" as you would
|
103
|
+
# Note that Alert.work_hours(7,21) means "7am-9pm" as you would assume, so from 07:00 to 20:59
|
96
104
|
def self.work_hours(start, finish)
|
97
105
|
@@workHours = start..finish
|
98
106
|
end
|
@@ -107,6 +115,7 @@ module Alert
|
|
107
115
|
end
|
108
116
|
end
|
109
117
|
|
118
|
+
# Alias for Alert::email
|
110
119
|
def self.normal(alert)
|
111
120
|
self.email(alert)
|
112
121
|
end
|
data/lib/rec/correlator.rb
CHANGED
@@ -1,22 +1,33 @@
|
|
1
1
|
require 'rec/rule'
|
2
2
|
|
3
3
|
module REC
|
4
|
+
|
5
|
+
# The Correlator reads in log entries, matching them against the ruleset,
|
6
|
+
# creates states as necessary, generates new (correlated) events, and sends alerts.
|
4
7
|
class Correlator
|
5
8
|
|
6
9
|
@@eventsIn = 0
|
7
10
|
@@eventsMissed = 0
|
8
11
|
|
12
|
+
# Convenience method to create and start a correlator. Possible options are:
|
13
|
+
# - :debug => true
|
9
14
|
def self.start(opts={})
|
10
15
|
$debug = opts[:debug] || false
|
11
16
|
self.new().start()
|
12
17
|
end
|
13
18
|
|
19
|
+
# Create a new Correlator
|
14
20
|
def initialize()
|
15
21
|
@time = @startupTime = Time.now()
|
16
22
|
@year = @startupTime.year
|
17
23
|
@running = false
|
18
24
|
end
|
19
25
|
|
26
|
+
# Start a Correlator. +INT+ and +TERM+ signals will stop the Correlator.
|
27
|
+
# +USR1+ signal will cause Correlator to display statistics and continue running.
|
28
|
+
#
|
29
|
+
# Missed events are written to IO Stream 3, in case they are of interest
|
30
|
+
# (typically while testing rulesets)
|
20
31
|
def start()
|
21
32
|
Signal.trap("INT") { finish() }
|
22
33
|
Signal.trap("TERM") { finish() }
|
@@ -24,7 +35,7 @@ class Correlator
|
|
24
35
|
stats()
|
25
36
|
run()
|
26
37
|
}
|
27
|
-
$stderr.puts("
|
38
|
+
$stderr.puts("rec is starting...")
|
28
39
|
begin
|
29
40
|
$miss = IO.open(3, "a") # for missed events
|
30
41
|
rescue
|
@@ -34,6 +45,9 @@ class Correlator
|
|
34
45
|
run()
|
35
46
|
end
|
36
47
|
|
48
|
+
# reads the next input log entry, parses it into a timestamp and a message,
|
49
|
+
# and checks the message against the ruleset. Continuously loops until
|
50
|
+
# the log input stream is closed, or interrupted by a signal
|
37
51
|
def run()
|
38
52
|
while @running and !$stdin.eof? do
|
39
53
|
logLine = gets()
|
@@ -63,15 +77,21 @@ class Correlator
|
|
63
77
|
finish() if $stdin.eof?
|
64
78
|
end
|
65
79
|
|
80
|
+
# Stop correlating, close IO streams and exit.
|
66
81
|
def finish()
|
67
82
|
@running = false
|
68
83
|
$miss.close() unless $miss.nil? or $miss.closed?
|
69
84
|
# NOTE: some states may have something useful to say, or maybe we could store them
|
70
85
|
stats()
|
71
|
-
$stderr.puts("
|
86
|
+
$stderr.puts("rec is finished.")
|
72
87
|
exit 0
|
73
88
|
end
|
74
89
|
|
90
|
+
# Parses a log entry into a timestamp and a message. Handles formats like:
|
91
|
+
# Apr 22 16:40:18 aqua Firewall[205]: ...
|
92
|
+
# [err] Fri Dec 30 23:58:56 2011 - scan error: ...
|
93
|
+
# 2012-04-22 08:43:22.099 EST - Module: ...
|
94
|
+
# otherwise time stands still.
|
75
95
|
def parse(logLine)
|
76
96
|
if logLine =~ /^(Jan|Feb|Mar|Apr|May|Jun|Jul|Aug|Sep|Oct|Nov|Dec)\s+(\d+)\s(\d\d)\:(\d\d)\:(\d\d)/
|
77
97
|
# Apr 22 16:40:18 aqua Firewall[205]: Skype is listening from 0.0.0.0:51304 proto=6
|
@@ -96,8 +116,8 @@ class Correlator
|
|
96
116
|
[time, message]
|
97
117
|
end
|
98
118
|
|
119
|
+
# Reports statistics to stderr
|
99
120
|
def stats()
|
100
|
-
# report statistics to stderr
|
101
121
|
checked, matched, created, reacted, rules = Rule.stats()
|
102
122
|
statesCount, eventsOut = State.stats()
|
103
123
|
$stderr.puts("-"*40)
|
data/lib/rec/mock-alert.rb
CHANGED
data/lib/rec/rule.rb
CHANGED
@@ -1,19 +1,24 @@
|
|
1
1
|
require 'rec/state'
|
2
2
|
|
3
3
|
module REC
|
4
|
+
|
5
|
+
# A Rule specifies which log entries to match, what to remember, and what to do about them.
|
4
6
|
class Rule
|
5
7
|
|
6
8
|
@@rules = []
|
7
|
-
|
8
|
-
@@
|
9
|
-
@@
|
10
|
-
@@
|
11
|
-
@@
|
9
|
+
# hash index of rules to allow lookup of messages etc.
|
10
|
+
@@index = {}
|
11
|
+
@@checked = {}
|
12
|
+
@@matched = {}
|
13
|
+
@@created = {}
|
14
|
+
@@reacted = {}
|
12
15
|
|
16
|
+
# Convenience method to iterate through the ruleset
|
13
17
|
def self.each(&block)
|
14
18
|
@@rules.each(&block)
|
15
19
|
end
|
16
20
|
|
21
|
+
# Adds a rule to the ruleset
|
17
22
|
def self.<<(rule)
|
18
23
|
@@rules << rule
|
19
24
|
@@index[rule.rid] = rule
|
@@ -23,16 +28,52 @@ class Rule
|
|
23
28
|
@@reacted[rule.rid] = 0
|
24
29
|
end
|
25
30
|
|
31
|
+
# Get a rule belonging to the key of +rid+
|
26
32
|
def self.[](rid)
|
27
33
|
@@index[rid]
|
28
34
|
end
|
29
35
|
|
36
|
+
# Returns some summary statistics in a 5-element array, the first four elements
|
37
|
+
# a hash keyed on rule ID, the fifth is an array of rules:
|
38
|
+
# 1. number of times each rule was checked
|
39
|
+
# 2. number of times each rule was matched
|
40
|
+
# 3. number of states created by each rule
|
41
|
+
# 4. number of times #react was called on each rule
|
42
|
+
# 5. list of rules, evaluated in sequence for each event
|
30
43
|
def self.stats()
|
31
44
|
[@@checked, @@matched, @@created, @@reacted, @@rules]
|
32
45
|
end
|
33
46
|
|
34
|
-
|
47
|
+
# the unique ID of the rule
|
48
|
+
attr_reader :rid
|
49
|
+
# the regexp pattern to match an original log entry against
|
50
|
+
# :pattern => /^\s\w+\sFirewall\[\d+\]\:\sSkype is listening from 0.0.0.0:(\d+)/,
|
51
|
+
# :details => ["port"],
|
52
|
+
# Note that regexp captures must correspond to customer field names in +details+
|
53
|
+
attr_reader :pattern
|
54
|
+
# the template for the title of any state created.
|
55
|
+
# :message => "sudo activity for user %userid$s",
|
56
|
+
# will create states with titles like "sudo activity for user richard"
|
57
|
+
attr_reader :message
|
58
|
+
# the time in seconds for a created state to persist
|
59
|
+
attr_reader :lifespan
|
60
|
+
# the template for an alert message to be generated should it be necessary
|
61
|
+
attr_reader :alert
|
62
|
+
# hash of the rules parameters, passed in when creating the rule
|
63
|
+
attr_reader :params
|
64
|
+
# block to be executed when #react is called. For example:
|
65
|
+
# Rule.new(10035, {
|
66
|
+
# :pattern => /^\s\w+\sFirewall\[\d+\]\:\sSkype is listening from 0.0.0.0:(\d+)/,
|
67
|
+
# :details => ["port"],
|
68
|
+
# :message => "Skype conversation started on port %port$d",
|
69
|
+
# :alert => "Skype running on port %port$d",
|
70
|
+
# :lifespan => 479
|
71
|
+
# }) { |state|
|
72
|
+
# state.alert_first_only()
|
73
|
+
# }
|
74
|
+
attr_reader :action
|
35
75
|
|
76
|
+
# Creates a new rule. +rid+ must be unique.
|
36
77
|
def initialize(rid, params={}, &action)
|
37
78
|
@rid = rid
|
38
79
|
@pattern = params[:pattern] || raise("No pattern specified for rule #{@ruleId}")
|
@@ -49,6 +90,7 @@ class Rule
|
|
49
90
|
Rule << self
|
50
91
|
end
|
51
92
|
|
93
|
+
# Checks the original +logMessage+ against the rule, looking for a match.
|
52
94
|
def check(logMessage)
|
53
95
|
@@checked[@rid] += 1
|
54
96
|
matchData = @pattern.match(logMessage) || return
|
@@ -64,12 +106,14 @@ class Rule
|
|
64
106
|
return(title)
|
65
107
|
end
|
66
108
|
|
109
|
+
# Creates a state with the given title at the specified time
|
67
110
|
def create_state(title, time)
|
68
111
|
@@created[@rid] += 1
|
69
112
|
$stderr.puts("+ Creating new state #{title}") if $debug
|
70
113
|
State.new(title, time, @lifespan, @params)
|
71
114
|
end
|
72
115
|
|
116
|
+
# Executes any action specified by the rule
|
73
117
|
def react(state, time, logLine)
|
74
118
|
@@reacted[@rid] += 1
|
75
119
|
state.update(time, @rid, @matches, @alert, logLine)
|
@@ -77,6 +121,7 @@ class Rule
|
|
77
121
|
@action.call(state) if @action
|
78
122
|
end
|
79
123
|
|
124
|
+
# Returns the +continue+ parameter. If false, stop processing rules for this event.
|
80
125
|
def continue()
|
81
126
|
@params[:continue]
|
82
127
|
end
|
data/lib/rec/state.rb
CHANGED
@@ -1,18 +1,42 @@
|
|
1
1
|
require 'time'
|
2
2
|
|
3
3
|
module REC
|
4
|
+
|
5
|
+
# A State is an object that represents the memory of something having happened.
|
6
|
+
# For example, "server terra is down".
|
7
|
+
# It also remembers useful statistics about what caused this state to be (the
|
8
|
+
# original log entries and the rule they matched),
|
9
|
+
# for how long it should remain in memory, what it pertains to (eg. the server
|
10
|
+
# called 'terra').
|
11
|
+
#
|
12
|
+
# A state is also useful for other rules to refer to. For example, a second rule
|
13
|
+
# matching "host terra is up" can check if the server is currently down by reference
|
14
|
+
# to the state with a title of "host terra is down".
|
15
|
+
#
|
16
|
+
# This is much more useful than matching log entries one by one without any memory
|
17
|
+
# of what has gone before. You cannot *correlate* events without keeping State.
|
4
18
|
class State
|
5
19
|
|
20
|
+
# A array of Timeouts. A Timeout struct has two elements:
|
21
|
+
# - timestamp at which to expire
|
22
|
+
# - key of the state to be expired
|
6
23
|
@@timeouts = []
|
24
|
+
|
25
|
+
# A hash of states, keyed on state title
|
7
26
|
@@states = {}
|
27
|
+
|
28
|
+
# A count of new events sent to output
|
8
29
|
@@eventsOut = 0
|
9
30
|
|
10
31
|
Struct.new("Timeout", :expiry, :key)
|
11
32
|
|
33
|
+
# Returns the state matching the given key (title)
|
12
34
|
def self.[](key)
|
13
35
|
@@states[key]
|
14
36
|
end
|
15
37
|
|
38
|
+
# Add a Timeout for the given time and specified state. This timeout is sorted
|
39
|
+
# into the correct sequence of timeouts to make State::expire_states more efficient.
|
16
40
|
def self.timeout_at(time, title)
|
17
41
|
tnew = Struct::Timeout.new(time, title)
|
18
42
|
n = @@timeouts.find_index { |to|
|
@@ -25,6 +49,7 @@ class State
|
|
25
49
|
end
|
26
50
|
end
|
27
51
|
|
52
|
+
# Deletes all expired states
|
28
53
|
def self.expire_states(time)
|
29
54
|
timeout = @@timeouts.first()
|
30
55
|
while @@timeouts.length > 0 and timeout.expiry < time do
|
@@ -40,13 +65,41 @@ class State
|
|
40
65
|
end
|
41
66
|
end
|
42
67
|
|
68
|
+
# Returns a 2-element array containing:
|
69
|
+
# - the number of states
|
70
|
+
# - the number of new events sent to output
|
43
71
|
def self.stats()
|
44
72
|
statesCount = @@states.keys.length
|
45
73
|
[statesCount, @@eventsOut]
|
46
74
|
end
|
47
75
|
|
48
|
-
|
76
|
+
# unique ID of the rule which gave rise to this state
|
77
|
+
attr_reader :rid
|
78
|
+
# The unique title for this state (eg. "server earth is down")
|
79
|
+
attr_reader :title
|
80
|
+
# How long this state shoudl live before being automatically forgotten
|
81
|
+
attr_reader :lifespan
|
82
|
+
# An alert message to be sent if subsequent events warrant it
|
83
|
+
attr_reader :alert
|
84
|
+
# Hash of parameters of the rule that created this state
|
85
|
+
attr_reader :params
|
86
|
+
# Number of times this state has been matched
|
87
|
+
attr_reader :count
|
88
|
+
# time when this state was created
|
89
|
+
attr_reader :created
|
90
|
+
# last time this state was updated
|
91
|
+
attr_reader :updated
|
92
|
+
# age of this state
|
93
|
+
attr_reader :dur
|
94
|
+
# Hash of custom fields to be remembered
|
95
|
+
attr_reader :details
|
49
96
|
|
97
|
+
# Creates a new state with the given (unique) +title+, the +time+ of creation,
|
98
|
+
# the +lifespan+ before the state is forgotten, and a hash of parameters.
|
99
|
+
#
|
100
|
+
# Note that the time is not necessarily 'now' because REC can be executed against
|
101
|
+
# historical log files. It uses the timestamp of the original log entry, not the
|
102
|
+
# current clock time.
|
50
103
|
def initialize(title, time, lifespan, params={})
|
51
104
|
@title = title
|
52
105
|
@lifespan = lifespan.to_f
|
@@ -63,6 +116,15 @@ class State
|
|
63
116
|
State.timeout_at(time + @lifespan, @title)
|
64
117
|
end
|
65
118
|
|
119
|
+
# Updates the statistics of this state (following a match).
|
120
|
+
# Remembers lots of useful things:
|
121
|
+
# - the number of matches so far
|
122
|
+
# - time last updated
|
123
|
+
# - age of this state
|
124
|
+
# - the ID of the rule which last matched
|
125
|
+
# - more details (custom fields) may be added to memory
|
126
|
+
# - the alert message
|
127
|
+
# - a list of the original log entries pertaining to this state
|
66
128
|
def update(time, rid, matches, alert, logLine=nil)
|
67
129
|
@count = @count.succ
|
68
130
|
@updated = time
|
@@ -73,14 +135,33 @@ class State
|
|
73
135
|
@logs << logLine if @params[:capture]
|
74
136
|
end
|
75
137
|
|
76
|
-
|
77
|
-
|
138
|
+
# Forget a state.
|
139
|
+
# - if no pattern is provided, forget this state.
|
140
|
+
# - if a pattern is provided, use the stats for this state to determine the title
|
141
|
+
# of the other state, and remove that from memory.
|
142
|
+
# For example, if the server is back up again, we no longer need to remember that
|
143
|
+
# the server was down.
|
144
|
+
def release(pattern=nil)
|
145
|
+
if pattern.nil?
|
146
|
+
@@states.delete(@title)
|
147
|
+
else
|
148
|
+
@@states.delete(pattern.sprinth(stats))
|
149
|
+
end
|
78
150
|
end
|
79
151
|
|
152
|
+
# Returns the details of the state (ie. whatever custom fields were defined, eg. userid)
|
153
|
+
# merged with the standard statistics:
|
154
|
+
# - count: number of matches so far
|
155
|
+
# - dur: age of the state
|
156
|
+
# - created: time when this state was created
|
157
|
+
# - updated: time last updated (eg. when the latest event matched this state)
|
80
158
|
def stats()
|
81
159
|
@details.merge({"count"=>@count, "dur"=>@dur, "created"=>@created, "updated"=>@updated})
|
82
160
|
end
|
83
161
|
|
162
|
+
# Creates a new event, writes it to the output log, and returns the event.
|
163
|
+
#
|
164
|
+
# An *event* (or 'log entry') is a timestamp followed by a *message*
|
84
165
|
def generate_alert()
|
85
166
|
message = @alert.sprinth(stats())
|
86
167
|
event = "%s %s" % [@created.iso8601, message] + @logs.join("\n")
|
@@ -89,11 +170,16 @@ class State
|
|
89
170
|
@@eventsOut = @@eventsOut + 1
|
90
171
|
event
|
91
172
|
end
|
92
|
-
|
173
|
+
|
174
|
+
# Creates a new event when this state is created, but ignores later occurrences
|
93
175
|
def alert_first_only()
|
94
176
|
generate_alert() if @count == 1
|
95
177
|
end
|
96
178
|
|
179
|
+
# Allow access to any parameter by a convenience method
|
180
|
+
# state.capture
|
181
|
+
# is more succinct than
|
182
|
+
# state.params['capture']
|
97
183
|
def method_missing(symbol, *args)
|
98
184
|
@params[symbol]
|
99
185
|
end
|
data/lib/rec.rb
CHANGED
@@ -2,3 +2,131 @@
|
|
2
2
|
require 'string'
|
3
3
|
require 'rec/correlator'
|
4
4
|
require 'rec/alert'
|
5
|
+
|
6
|
+
=begin rdoc
|
7
|
+
= Ruby Event Correlation
|
8
|
+
REC correlates events in order to generate a smaller set of more meaningful events.
|
9
|
+
|
10
|
+
== Installation
|
11
|
+
1. Install the gem
|
12
|
+
$ sudo gem install rec
|
13
|
+
|
14
|
+
2. Select a ruleset or create your own
|
15
|
+
#!/usr/bin/ruby
|
16
|
+
require 'rec'
|
17
|
+
include REC
|
18
|
+
require 'rulesets/postfix-rules'
|
19
|
+
Correlator::start()
|
20
|
+
|
21
|
+
3. Start it up
|
22
|
+
$ rulesets/rules.rb < /var/log/mail.log 3>missed.log 2>control.log > newevents.log
|
23
|
+
|
24
|
+
== Why correlate events?
|
25
|
+
We all know that we should read our log files. But reading log files is *really* boring,
|
26
|
+
and frankly its easy to miss important things in all the superfluous detail.
|
27
|
+
|
28
|
+
[Save time]
|
29
|
+
If you are lazy enough to not want to review all of your log files manually forever, and
|
30
|
+
smart enough to work out what needs monitoring and when you might want to pay attention,
|
31
|
+
then wouldn't it be good if you could define those rules and let the computer do what it
|
32
|
+
does best?
|
33
|
+
|
34
|
+
[Generate meaning]
|
35
|
+
The logs of many applications are filled with entries that are quite low level - perhaps
|
36
|
+
wonderful for debugging, but typically not terribly meaningful in terms of business.
|
37
|
+
Wouldn't it be good if we could summarise a bunch of low level events into a single
|
38
|
+
business event - and then just read the <em>business log</em>.
|
39
|
+
|
40
|
+
== Alternatives
|
41
|
+
There are several alternatives to REC which may suit your needs better:
|
42
|
+
* splunk[www.splunk.com]
|
43
|
+
* nagios[www.nagios.com]
|
44
|
+
* scalextreme.com[www.scalextreme.com]
|
45
|
+
While I like these options, I find they take a lot of configuring.
|
46
|
+
They also has some dependencies that make them a bit heavier than you may want.
|
47
|
+
If you just want to keep track of a few kinds of events, want a lot of flexibility
|
48
|
+
and control without too much effort, then REC may be of some value.
|
49
|
+
|
50
|
+
== How does REC work?
|
51
|
+
Each entry in a log file is an *event*.
|
52
|
+
The Correlator reads the events, and attempts to match an event against each Rule.
|
53
|
+
If an event matches a rule, the rule creates a State which just means we're remembering
|
54
|
+
that the event matched a rule. The pattern to match is a regexp, and the captured values
|
55
|
+
are named. For example
|
56
|
+
# log entry => "nfs: server earth not responding"
|
57
|
+
pattern => /nfs\: server (\w+) not responding/
|
58
|
+
details => ['host']
|
59
|
+
# values of interest are captured into a hash => {'host' => 'earth' }
|
60
|
+
:message => "Server %host$s is down"
|
61
|
+
# interpolation with named parameters => "Server earth is down"
|
62
|
+
|
63
|
+
A state has a fixed lifetime, set when it is created. At the end of its life, it may simply
|
64
|
+
expire quietly, or a pre-defined action may be executed. For example, if we find a server is down,
|
65
|
+
we may want to wait for 3 minutes and if it is not up again, then alert the administrator.
|
66
|
+
The server being down is a state, and two states are distinguished by their *titles*. For example,
|
67
|
+
"host earth is down" and "host terra is down".
|
68
|
+
|
69
|
+
Now that we're remembering a set of states, we can match events against not only the event's
|
70
|
+
message, but also other states. For example, we can match "host terra is up" against a previously
|
71
|
+
created state "host terra is down", and generate a new event "host terra is back up after 14 minutes".
|
72
|
+
We can also 'swallow' all of the rest of the "host terra is down" events because they add nothing new.
|
73
|
+
This <em>event compression</em> means the administrator gets one important message, and not 27
|
74
|
+
distracting alerts.
|
75
|
+
|
76
|
+
An Alert can be sent by email or IM, depending on your preferences and working hours.
|
77
|
+
The destinations and credentials are supplied to your ruleset:
|
78
|
+
# For better security, move the next few lines into a file readable only by
|
79
|
+
# the user running this script eg. /home/rec/alert.conf
|
80
|
+
# and then require that file
|
81
|
+
Alert.smtp_credentials("rec@gmail.com", "recret", "myfirm.com")
|
82
|
+
Alert.emailTo = "me@myfirm.com"
|
83
|
+
Alert.jabber_credentials("rec@gmail.com", "recret")
|
84
|
+
Alert.jabberTo = "me@myfirm.com"
|
85
|
+
|
86
|
+
Rules can then send an alert when desired. Two common cases involve alerting immediately
|
87
|
+
on the first event (eg. "host terra is down"), and alerting on expiry or at a subsequent event
|
88
|
+
(eg. "host terra is back up").
|
89
|
+
state.alert_first_only() # => generate a new event on first original event
|
90
|
+
# or
|
91
|
+
Alert.normal(state.alert_first_only()) # => log and also send the new event via email
|
92
|
+
|
93
|
+
In most cases, however, it is not necessary to alert the administrator at all. It is enough to
|
94
|
+
log the new event in the output logfile for later review.
|
95
|
+
|
96
|
+
== Examples
|
97
|
+
The best way to understand REC is to see how rules are written. The following examples were
|
98
|
+
inspired by SEC (simple-evcorr.sourceforge.net), so they employ the same names.
|
99
|
+
|
100
|
+
1. Warn if an user is having trouble executing sudo commands
|
101
|
+
The log entry (/var/log/secure) looks like this:
|
102
|
+
|
103
|
+
Sep 16 07:09:22 earth sudo: richard : 3 incorrect password attempts ;...
|
104
|
+
|
105
|
+
and the rule might look like this:
|
106
|
+
|
107
|
+
# single threshold rule
|
108
|
+
Rule.new(10034, {
|
109
|
+
:pattern => /\w+ sudo\: (\w+) \: 3 incorrect password attempts/,
|
110
|
+
:details => ["userid"],
|
111
|
+
:message => "Failed sudo password for user %userid$s",
|
112
|
+
:lifespan => 60,
|
113
|
+
:alert => "'Too much sudo activity' userid=%userid$s attempts=%count$d dur=%dur$0.3fs ",
|
114
|
+
:threshold => 3,
|
115
|
+
:capture => true
|
116
|
+
}) { |state|
|
117
|
+
if state.count == state.threshold
|
118
|
+
Alert.urgent(state.generate_alert())
|
119
|
+
state.release()
|
120
|
+
end
|
121
|
+
}
|
122
|
+
|
123
|
+
This rule matches the log entry against the *pattern*. Note that the captured value
|
124
|
+
<code>(\w+)</code> is stored as the +userid+. The +details+ parameter specifies the names
|
125
|
+
of the captured values in the sequence they appear in the pattern.
|
126
|
+
|
127
|
+
The *message* specifies the title of the state that is created. If there is no title
|
128
|
+
|
129
|
+
|
130
|
+
=end
|
131
|
+
module REC
|
132
|
+
end
|
data/lib/string.rb
CHANGED
@@ -1,9 +1,11 @@
|
|
1
|
+
# Custom extension to String
|
1
2
|
class String
|
2
|
-
#
|
3
|
-
#
|
4
|
-
# '
|
5
|
-
#
|
6
|
-
#
|
3
|
+
# Interpolates hash values into a formatted string.
|
4
|
+
# s = "Stats uid %uid$-5d belongs to %userid$s"
|
5
|
+
# h = {'uid': 501, 'userid': 'richard}
|
6
|
+
# s.sprinth(h)
|
7
|
+
# # intermediate => "Stats uid %-5d belongs to %s" % [501, 'richard']
|
8
|
+
# # finally => "Stats uid 501 belongs to richard"
|
7
9
|
def sprinth(hash={})
|
8
10
|
raise ArgumentError.new("sprinth argument must be a Hash") unless hash.is_a?(Hash)
|
9
11
|
self.gsub(/\%\w+\$/,"%") % self.scan(/\%\w+\$/).collect { |token| hash[token[1..-2]] }
|
metadata
CHANGED
@@ -5,8 +5,8 @@ version: !ruby/object:Gem::Version
|
|
5
5
|
segments:
|
6
6
|
- 1
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
version: 1.0.
|
8
|
+
- 1
|
9
|
+
version: 1.0.1
|
10
10
|
platform: ruby
|
11
11
|
authors:
|
12
12
|
- Richard Kernahan
|
@@ -25,7 +25,7 @@ description: "\tSifts through your log files in real time, using stateful intell
|
|
25
25
|
\tbut is original code and any defects are entirely mine.\n\
|
26
26
|
\tWhile event correlation is inherently complex, REC attempts to make common tasks easy\n\
|
27
27
|
\twhile preserving plenty of power and flexibility for ambitious tasks.\n"
|
28
|
-
email: rec@finalstep.com.au
|
28
|
+
email: dev.rec@finalstep.com.au
|
29
29
|
executables: []
|
30
30
|
|
31
31
|
extensions: []
|