log2json 0.1.5
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +2 -0
- data/Gemfile.lock +24 -0
- data/README +66 -0
- data/bin/lines2redis +73 -0
- data/bin/nginxlog2json +58 -0
- data/bin/redis2es +146 -0
- data/bin/syslog2json +23 -0
- data/bin/tail +0 -0
- data/bin/tail-log +7 -0
- data/bin/tail-log.sh +67 -0
- data/bin/track-tails +54 -0
- data/lib/log2json.rb +217 -0
- data/lib/log2json/filters/base.patterns +93 -0
- data/lib/log2json/filters/nginx_access.rb +46 -0
- data/lib/log2json/filters/syslog.rb +62 -0
- data/lib/log2json/railslogger.rb +96 -0
- data/log2json.gemspec +18 -0
- data/src/coreutils-8.13_tail.patch +9 -0
- data/src/tail.c +2224 -0
- metadata +192 -0
data/bin/track-tails
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
#
|
3
|
+
# Helper for the tail-log.sh script to actually create and maintain the sincedb.
|
4
|
+
#
|
5
|
+
# A sincedb is just a directory that contains subdirectories to text files that
|
6
|
+
# record the stats of the files that we're following. Each file records the
|
7
|
+
# inode, size, and the number of lines from the start of the file read last time.
|
8
|
+
#
|
9
|
+
require 'fileutils'
|
10
|
+
|
11
|
+
|
12
|
+
@fmap = {} # path => [inode_number, file_size, number_of_lines_read]
|
13
|
+
@sincedb_dir = ARGV.shift()
|
14
|
+
|
15
|
+
# Note: We expect each -n+N argument to be followed by a file path
|
16
|
+
while not ARGV.empty? do
|
17
|
+
arg = ARGV.shift()
|
18
|
+
if arg =~ /^-n\+(\d+)$/
|
19
|
+
fpath = ARGV.shift()
|
20
|
+
next if not File.exists?(fpath)
|
21
|
+
fstat = File.stat(fpath)
|
22
|
+
@fmap[fpath] = [fstat.ino, fstat.size, $1.to_i() - 1]
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
def commit
|
27
|
+
return if @fmap.nil?
|
28
|
+
@fmap.each do |fpath, t|
|
29
|
+
sincedb_path = "#{@sincedb_dir}/#{fpath}.since"
|
30
|
+
FileUtils.mkdir_p(File.dirname(sincedb_path))
|
31
|
+
IO.write(sincedb_path, "#{t.join(' ')}\n")
|
32
|
+
end
|
33
|
+
end
|
34
|
+
at_exit(&method(:commit))
|
35
|
+
|
36
|
+
# Note: You probably want to set the locale env var: LC_CTYPE=en_US.UTF-8
|
37
|
+
while line = gets()
|
38
|
+
if line =~ /^==> (.+) <==(?: \[(.+)\])?$/
|
39
|
+
fpath = $1
|
40
|
+
if @fmap[fpath].nil? or $2 =~ /^new_file$|^truncated$/
|
41
|
+
fstat = File.stat(fpath)
|
42
|
+
@fmap[fpath] = [fstat.ino, fstat.size, 0]
|
43
|
+
end
|
44
|
+
STDOUT.write(line); STDOUT.flush()
|
45
|
+
next
|
46
|
+
end
|
47
|
+
STDOUT.write(line); STDOUT.flush()
|
48
|
+
@fmap[fpath][2] += 1
|
49
|
+
# Note: In the case of interruption, there's a chance that the line count is
|
50
|
+
# one line behind the number of log lines written to stdout. This is
|
51
|
+
# OK since we'd rather output a duplicate log line rather than miss
|
52
|
+
# one.
|
53
|
+
end
|
54
|
+
|
data/lib/log2json.rb
ADDED
@@ -0,0 +1,217 @@
|
|
1
|
+
require 'json'
|
2
|
+
require 'grok-pure'
|
3
|
+
|
4
|
+
module Log2Json
|
5
|
+
|
6
|
+
def self.main(filters, opts={})
|
7
|
+
output = opts[:output] || STDOUT
|
8
|
+
spitter = opts[:spitter]
|
9
|
+
if spitter.nil?
|
10
|
+
# configure the spitter to take config overrides from ENV
|
11
|
+
config = {}
|
12
|
+
Spitter::CONFIG.keys.each do |name|
|
13
|
+
key = name.to_s.downcase
|
14
|
+
config[name] = ENV[key] if ENV.member?(key)
|
15
|
+
end
|
16
|
+
spitter = ::Log2Json::Spitter.new(STDIN, ENV['type'], config)
|
17
|
+
end
|
18
|
+
spitter.each_record do |rec|
|
19
|
+
filters[rec['@type']].each { |f| f.filter(rec) }
|
20
|
+
if ! rec['@timestamp'].nil?
|
21
|
+
output.write(rec.to_json() << "\n")
|
22
|
+
output.flush()
|
23
|
+
# NOTE: Ruby's built-in json module, by default, doesn't output any
|
24
|
+
# literal newline characters while serializing. So using
|
25
|
+
# newlines as json record separator is fine here.
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
|
31
|
+
# A generic front-end to filters. It sits between an input and a filter, taking
|
32
|
+
# log lines from an input and normalizing them into logstash-compatible JSON log
|
33
|
+
# records for filters to consume.
|
34
|
+
#
|
35
|
+
# An input represents the source of log records. The only requirement of of an input
|
36
|
+
# is that it outputs to stdout a stream of lines(one line for each log record), with
|
37
|
+
# the first line indicating the source(eg, file path, url, ...) of the log lines that
|
38
|
+
# follow it. By default, the format of such source-indicating line is the same as
|
39
|
+
# those spit out by the tail utility when multiple files are followed.(ie, ==> file-a.txt <==)
|
40
|
+
# The format is customizable via a regex.
|
41
|
+
#
|
42
|
+
# For each type of logs that you'd like to ship, there will be 1 input process, 1 log2json
|
43
|
+
# process(with perhaps multiple filters configured), and 1 output process. All connected
|
44
|
+
# via unix pipes. The idea is that you can implement your log input and output processes
|
45
|
+
# as shell scripts, and filters can be implemented in ruby(likely using Log2Json::Filters::GrokFilter)
|
46
|
+
# and installed as ruby gems. Then, you will configure and combine filters and create a
|
47
|
+
# Spitter that would use them. See the log2json ruby script for details.
|
48
|
+
#
|
49
|
+
#
|
50
|
+
#
|
51
|
+
class Spitter
|
52
|
+
|
53
|
+
CONFIG = {
|
54
|
+
LOG_INPUT_ENCODING: "UTF-8",
|
55
|
+
UTC_TIMESTAMP_FORMAT: "%FT%T.%6NZ",
|
56
|
+
SOURCE_SEPERATOR_REGEX: Regexp.new("^==> (.+) <=="),
|
57
|
+
# because /.../ screws up syntax highlighting in vim so I use Regexp.new(...)
|
58
|
+
|
59
|
+
TAGS: '',
|
60
|
+
FIELDS: '',
|
61
|
+
}
|
62
|
+
|
63
|
+
attr_reader :options
|
64
|
+
|
65
|
+
def initialize(input_file, type, opts={})
|
66
|
+
@input = input_file
|
67
|
+
@type = type || ''
|
68
|
+
# type can be either a string or a hash whose keys are pathes specified
|
69
|
+
# as regex and values are type strings.
|
70
|
+
|
71
|
+
@options = CONFIG.merge(opts)
|
72
|
+
|
73
|
+
@source_host = %x(hostname).chomp()
|
74
|
+
@source_path = nil
|
75
|
+
@tags = options[:TAGS].strip.split(/\s*,\s*/)
|
76
|
+
|
77
|
+
fields = options[:FIELDS].strip.gsub(/,/, ' ').split(/ +/)
|
78
|
+
raise "Number of keys or values in fields must be even!" if fields.length % 2 != 0
|
79
|
+
|
80
|
+
@fields = {}
|
81
|
+
while not fields.empty? do
|
82
|
+
k, v = fields.pop(2)
|
83
|
+
@fields[k] = v
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
def each_record(&block)
|
88
|
+
@input.each_line do |line|
|
89
|
+
line.force_encoding(options[:LOG_INPUT_ENCODING])
|
90
|
+
line.chomp!
|
91
|
+
next if line.empty?
|
92
|
+
if line =~ options[:SOURCE_SEPERATOR_REGEX]
|
93
|
+
@source_path = $1
|
94
|
+
next
|
95
|
+
end
|
96
|
+
block.call({
|
97
|
+
# Every record has a '@type' this is how we match filters to log records.
|
98
|
+
# Note: in Ruby 1.9, Hash are ordered, so here we'll be matching source path
|
99
|
+
# against the regex in the order they are defined.
|
100
|
+
'@type' => if @type.is_a?(String)
|
101
|
+
@type
|
102
|
+
else # @type is a Hash
|
103
|
+
if type = @type.find { |re, t| re =~ @source_path }
|
104
|
+
type[1]
|
105
|
+
else
|
106
|
+
@type[nil] || ''
|
107
|
+
end
|
108
|
+
end,
|
109
|
+
'@source_path' => @source_path,
|
110
|
+
'@source_host' => @source_host,
|
111
|
+
'@timestamp' => Time.new.utc.strftime(options[:UTC_TIMESTAMP_FORMAT]),
|
112
|
+
'@message' => line,
|
113
|
+
'@tags' => @tags.clone, # defaults to []
|
114
|
+
'@fields' => @fields.clone, # defaluts to {}
|
115
|
+
})
|
116
|
+
end
|
117
|
+
end
|
118
|
+
end # Spitter
|
119
|
+
|
120
|
+
|
121
|
+
module Filters #--------------------------------------
|
122
|
+
|
123
|
+
# A filter takes a JSON log record, process it by adding, correcting or
|
124
|
+
# even removing attributes from it if necessary.
|
125
|
+
class GrokFilter
|
126
|
+
|
127
|
+
DEFAULT_PATTERNS = File.join(File.dirname(__FILE__),
|
128
|
+
'log2json', 'filters', 'base.patterns')
|
129
|
+
|
130
|
+
CONFIG = {
|
131
|
+
NAMED_CAPTURES_ONLY: true,
|
132
|
+
KEEP_EMTPY_CAPTURES: false
|
133
|
+
}
|
134
|
+
|
135
|
+
|
136
|
+
attr_reader :type, :name
|
137
|
+
|
138
|
+
def initialize(type, name, regexps, opts={}, &filter_block)
|
139
|
+
@type = type
|
140
|
+
@name = name
|
141
|
+
@filter_block = filter_block
|
142
|
+
@record_kvs = opts.select { |k,v| k.start_with?('@') }
|
143
|
+
@config = opts.select { |k,v| not k.start_with?('@') }.merge CONFIG
|
144
|
+
|
145
|
+
@pile = Grok::Pile.new
|
146
|
+
@pile.add_patterns_from_file(@config[:pattern_file] || DEFAULT_PATTERNS)
|
147
|
+
regexps.each { |re| @pile.compile(re) }
|
148
|
+
end
|
149
|
+
|
150
|
+
# Filter the log record.
|
151
|
+
#
|
152
|
+
# This means checking if the record matches the patterns of this filter and
|
153
|
+
# add the captured groups as members of the @fields of the record if
|
154
|
+
# there's a match.
|
155
|
+
#
|
156
|
+
# Any '@' key-values configured for this filter will also
|
157
|
+
# be added to the record after merging the captured groups.
|
158
|
+
#
|
159
|
+
# Return the record at the end if there's a match else return nil.
|
160
|
+
# If the '@timestamp' attribute is removed from a record then the record will
|
161
|
+
# be dropped.
|
162
|
+
def filter(record)
|
163
|
+
grok, match = @pile.match(record['@message'])
|
164
|
+
if match
|
165
|
+
# code stolen and modified from logstash's grok filter.
|
166
|
+
fields = record['@fields']
|
167
|
+
match.each_capture() do |key, value|
|
168
|
+
next if value.nil? and not @config[:KEEP_EMTPY_CAPTURES]
|
169
|
+
if key.include?(':')
|
170
|
+
pattern_name, key, value_type = key.split(':') # ie, %{pattern_name:key:value_type}
|
171
|
+
case value_type
|
172
|
+
when 'int' ; value = value.to_i
|
173
|
+
when 'float'; value = value.to_f
|
174
|
+
end
|
175
|
+
else
|
176
|
+
next if @config[:NAMED_CAPTURES_ONLY]
|
177
|
+
end
|
178
|
+
if fields[key].nil?
|
179
|
+
fields[key] = value
|
180
|
+
else # if there already exists a field for the captured value
|
181
|
+
# then we aggregate the captured values in an array for the field.
|
182
|
+
if not fields[key].is_a?(Array)
|
183
|
+
fields[key] = [fields[key]]
|
184
|
+
end
|
185
|
+
fields[key] << value
|
186
|
+
end
|
187
|
+
end
|
188
|
+
|
189
|
+
record.merge!(@record_kvs) do |k, oldval, newval|
|
190
|
+
if k == '@tags'
|
191
|
+
oldval.concat(newval).uniq!
|
192
|
+
elsif k == '@fields'
|
193
|
+
oldval.merge!(newval)
|
194
|
+
end
|
195
|
+
end
|
196
|
+
(fields['filtered_by'] ||= []) << name
|
197
|
+
if @filter_block
|
198
|
+
@filter_block.call(record)
|
199
|
+
else
|
200
|
+
record
|
201
|
+
end
|
202
|
+
else
|
203
|
+
nil
|
204
|
+
end
|
205
|
+
end
|
206
|
+
end # end class GrokFilter
|
207
|
+
|
208
|
+
|
209
|
+
end # end module Filters
|
210
|
+
|
211
|
+
|
212
|
+
|
213
|
+
|
214
|
+
|
215
|
+
|
216
|
+
end # Log2Json module
|
217
|
+
|
@@ -0,0 +1,93 @@
|
|
1
|
+
USERNAME [a-zA-Z0-9_-]+
|
2
|
+
USER %{USERNAME}
|
3
|
+
INT (?:[+-]?(?:[0-9]+))
|
4
|
+
BASE10NUM (?<![0-9.+-])(?>[+-]?(?:(?:[0-9]+(?:\.[0-9]+)?)|(?:\.[0-9]+)))
|
5
|
+
NUMBER (?:%{BASE10NUM})
|
6
|
+
BASE16NUM (?<![0-9A-Fa-f])(?:[+-]?(?:0x)?(?:[0-9A-Fa-f]+))
|
7
|
+
BASE16FLOAT \b(?<![0-9A-Fa-f.])(?:[+-]?(?:0x)?(?:(?:[0-9A-Fa-f]+(?:\.[0-9A-Fa-f]*)?)|(?:\.[0-9A-Fa-f]+)))\b
|
8
|
+
|
9
|
+
POSINT \b(?:[0-9]+)\b
|
10
|
+
WORD \b\w+\b
|
11
|
+
NOTSPACE \S+
|
12
|
+
DATA .*?
|
13
|
+
GREEDYDATA .*
|
14
|
+
#QUOTEDSTRING (?:(?<!\\)(?:"(?:\\.|[^\\"])*"|(?:'(?:\\.|[^\\'])*')|(?:`(?:\\.|[^\\`])*`)))
|
15
|
+
QUOTEDSTRING (?:(?<!\\)(?:"(?>[^\\"]+|\\.)*")|(?:'(?>[^\\']+|\\.)*')|(?:`(?>[^\\`]+|\\.)*`))
|
16
|
+
|
17
|
+
# Networking
|
18
|
+
MAC (?:%{CISCOMAC}|%{WINDOWSMAC}|%{COMMONMAC})
|
19
|
+
CISCOMAC (?:(?:[A-Fa-f0-9]{4}\.){2}[A-Fa-f0-9]{4})
|
20
|
+
WINDOWSMAC (?:(?:[A-Fa-f0-9]{2}-){5}[A-Fa-f0-9]{2})
|
21
|
+
COMMONMAC (?:(?:[A-Fa-f0-9]{2}:){5}[A-Fa-f0-9]{2})
|
22
|
+
IP (?<![0-9])(?:(?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2})[.](?:25[0-5]|2[0-4][0-9]|[0-1]?[0-9]{1,2}))(?![0-9])
|
23
|
+
HOSTNAME \b(?:[0-9A-Za-z][0-9A-Za-z-]{0,62})(?:\.(?:[0-9A-Za-z][0-9A-Za-z-]{0,62}))*(\.?|\b)
|
24
|
+
HOST %{HOSTNAME}
|
25
|
+
IPORHOST (?:%{HOSTNAME}|%{IP})
|
26
|
+
HOSTPORT (?:%{IPORHOST=~/\./}:%{POSINT})
|
27
|
+
|
28
|
+
# paths
|
29
|
+
PATH (?:%{UNIXPATH}|%{WINPATH})
|
30
|
+
UNIXPATH (?:/(?:[\w_%!$@:.,-]+|\\.)*)+
|
31
|
+
#UNIXPATH (?<![\w\/])(?:/[^\/\s?*]*)+
|
32
|
+
LINUXTTY (?:/dev/pts/%{POSINT})
|
33
|
+
BSDTTY (?:/dev/tty[pq][a-z0-9])
|
34
|
+
TTY (?:%{BSDTTY}|%{LINUXTTY})
|
35
|
+
WINPATH (?:[A-Za-z]+:|\\)(?:\\[^\\?*]*)+
|
36
|
+
URIPROTO [A-Za-z]+(\+[A-Za-z+]+)?
|
37
|
+
URIHOST %{IPORHOST}(?::%{POSINT:port})?
|
38
|
+
# uripath comes loosely from RFC1738, but mostly from what Firefox
|
39
|
+
# doesn't turn into %XX
|
40
|
+
URIPATH (?:/[A-Za-z0-9$.+!*'(),~:#%_-]*)+
|
41
|
+
#URIPARAM \?(?:[A-Za-z0-9]+(?:=(?:[^&]*))?(?:&(?:[A-Za-z0-9]+(?:=(?:[^&]*))?)?)*)?
|
42
|
+
URIPARAM \?[A-Za-z0-9$.+!*'(),~#%&/=:;_-]*
|
43
|
+
URIPATHPARAM %{URIPATH}(?:%{URIPARAM})?
|
44
|
+
URI %{URIPROTO}://(?:%{USER}(?::[^@]*)?@)?(?:%{URIHOST})?(?:%{URIPATHPARAM})?
|
45
|
+
|
46
|
+
# Months: January, Feb, 3, 03, 12, December
|
47
|
+
MONTH \b(?:[Jj]an(?:uary)?|[Ff]eb(?:ruary)?|[Mm]ar(?:ch)?|[Aa]pr(?:il)?|[Mm]ay|[Jj]un(?:e)?|[Jj]ul(?:y)?|[Aa]ug(?:ust)?|[Ss]ep(?:tember)?|[Oo]ct(?:ober)?|[Nn]ov(?:ember)?|[Dd]ec(?:ember)?)\b
|
48
|
+
MONTHNUM (?:0?[1-9]|1[0-2])
|
49
|
+
MONTHDAY (?:3[01]|[1-2]?[0-9]|0?[1-9])
|
50
|
+
|
51
|
+
# Days: Monday, Tue, Thu, etc...
|
52
|
+
DAY (?:[Mm]on(?:day)?|[Tt]ue(?:sday)?|[Ww]ed(?:nesday)?|[Tt]hu(?:rsday)?|[Ff]ri(?:day)?|[Ss]at(?:urday)?|[Ss]un(?:day)?)
|
53
|
+
|
54
|
+
# Years?
|
55
|
+
YEAR [0-9]+
|
56
|
+
# Time: HH:MM:SS
|
57
|
+
#TIME \d{2}:\d{2}(?::\d{2}(?:\.\d+)?)?
|
58
|
+
# I'm still on the fence about using grok to perform the time match,
|
59
|
+
# since it's probably slower.
|
60
|
+
# TIME %{POSINT<24}:%{POSINT<60}(?::%{POSINT<60}(?:\.%{POSINT})?)?
|
61
|
+
HOUR (?:2[0123]|[01][0-9])
|
62
|
+
MINUTE (?:[0-5][0-9])
|
63
|
+
# '60' is a leap second in most time standards and thus is valid.
|
64
|
+
SECOND (?:(?:[0-5][0-9]|60)(?:[.,][0-9]+)?)
|
65
|
+
TIME (?<![0-9])%{HOUR}:%{MINUTE}(?::%{SECOND})(?![0-9])
|
66
|
+
# datestamp is YYYY/MM/DD-HH:MM:SS.UUUU (or something like it)
|
67
|
+
DATE_US %{MONTHNUM}[/-]%{MONTHDAY}[/-]%{YEAR}
|
68
|
+
DATE_EU %{YEAR}[/-]%{MONTHNUM}[/-]%{MONTHDAY}
|
69
|
+
ISO8601_TIMEZONE (?:Z|[+-]%{HOUR}(?::?%{MINUTE}))
|
70
|
+
ISO8601_SECOND (?:%{SECOND}|60)
|
71
|
+
TIMESTAMP_ISO8601 %{YEAR}-%{MONTHNUM}-%{MONTHDAY}[T ]%{HOUR}:?%{MINUTE}(?::?%{SECOND})?%{ISO8601_TIMEZONE}?
|
72
|
+
DATE %{DATE_US}|%{DATE_EU}
|
73
|
+
DATESTAMP %{DATE}[- ]%{TIME}
|
74
|
+
TZ (?:[PMCE][SD]T)
|
75
|
+
DATESTAMP_RFC822 %{DAY} %{MONTH} %{MONTHDAY} %{YEAR} %{TIME} %{TZ}
|
76
|
+
DATESTAMP_OTHER %{DAY} %{MONTH} %{MONTHDAY} %{TIME} %{TZ} %{YEAR}
|
77
|
+
|
78
|
+
# Syslog Dates: Month Day HH:MM:SS
|
79
|
+
SYSLOGTIMESTAMP %{MONTH} +%{MONTHDAY} %{TIME}
|
80
|
+
PROG (?:[\w._/-]+)
|
81
|
+
SYSLOGPROG %{PROG:program}(?:\[%{POSINT:pid}\])?
|
82
|
+
SYSLOGHOST %{IPORHOST}
|
83
|
+
SYSLOGFACILITY <%{POSINT:facility}.%{POSINT:priority}>
|
84
|
+
|
85
|
+
ZONE %{INT}
|
86
|
+
HTTPDATE %{MONTHDAY}/%{MONTH}/%{YEAR}:%{TIME} %{ZONE}
|
87
|
+
|
88
|
+
# Shortcuts
|
89
|
+
QS %{QUOTEDSTRING}
|
90
|
+
|
91
|
+
# Log formats
|
92
|
+
SYSLOGBASE %{SYSLOGTIMESTAMP:timestamp} (?:%{SYSLOGFACILITY} )?%{SYSLOGHOST:logsource} %{SYSLOGPROG}:
|
93
|
+
COMBINEDAPACHELOG %{IPORHOST:clientip} %{USER:ident} %{USER:auth} \[%{HTTPDATE:timestamp}\] "%{WORD:verb} %{URIPATHPARAM:request} HTTP/%{NUMBER:httpversion}" %{NUMBER:response} (?:%{NUMBER:bytes}|-) "(?:%{URI:referrer}|-)" %{QS:agent}
|
@@ -0,0 +1,46 @@
|
|
1
|
+
require 'log2json'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Log2Json
|
5
|
+
module Filters
|
6
|
+
#----
|
7
|
+
|
8
|
+
class NginxAccessLogFilter < GrokFilter
|
9
|
+
|
10
|
+
def initialize(name, config={})
|
11
|
+
# Thanks to - http://boojapathy.wordpress.com/2012/04/29/logstash-graylog-cant-ask-more-for-logging/
|
12
|
+
#
|
13
|
+
# 10.33.158.237 - - [12/Apr/2013:13:27:54 -0000] "GET /UEFA/news.json?blackberry_native_version=1.9.4&locale=es HTTP/1.1" 200 6495 "-" "-" "-" "-" "-" cache_status:BYPASS
|
14
|
+
#
|
15
|
+
type = config.delete(:type) {'nginx-access'}
|
16
|
+
super(type, name, [
|
17
|
+
%w[ %{IP:ip}
|
18
|
+
(?:%{HOST:host}|-)
|
19
|
+
(?:%{USER:user}|-)
|
20
|
+
\\\[%{HTTPDATE:datetime}\\\] +"(?:%{WORD:method} %{URIPATHPARAM:path} HTTP/%{NUMBER:version}|%{DATA:request})"
|
21
|
+
%{NUMBER:status}
|
22
|
+
(?:%{NUMBER:size}|-)
|
23
|
+
%{QUOTEDSTRING:referrer}
|
24
|
+
%{QUOTEDSTRING:user_agent}
|
25
|
+
(?:%{GREEDYDATA:extra_info})
|
26
|
+
].join(' ') ], config
|
27
|
+
)
|
28
|
+
end
|
29
|
+
|
30
|
+
def filter(record)
|
31
|
+
return nil if super(record).nil?
|
32
|
+
# eg, 23/Nov/2012:19:11:10 +0000
|
33
|
+
record['@timestamp'] = DateTime.strptime(record['@fields']['datetime'], "%d/%b/%Y:%T %z")
|
34
|
+
record['@fields'].delete('datetime')
|
35
|
+
record['@tags'] << "nginx" << "http"
|
36
|
+
record
|
37
|
+
end
|
38
|
+
|
39
|
+
end # NginxAccessLogFilter
|
40
|
+
|
41
|
+
|
42
|
+
|
43
|
+
|
44
|
+
#----
|
45
|
+
end
|
46
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'log2json'
|
2
|
+
require 'date'
|
3
|
+
|
4
|
+
module Log2Json
|
5
|
+
module Filters
|
6
|
+
#----
|
7
|
+
|
8
|
+
# A default syslog filter.
|
9
|
+
# This works the rsyslog and its default configuration as distributed with Ubuntu 12.04 LTS.
|
10
|
+
#
|
11
|
+
# It also assumes your syslog timestamp is in UTC. To make sure, add the following line to
|
12
|
+
# /etc/default/rsyslog:
|
13
|
+
#
|
14
|
+
# export TZ=UTC
|
15
|
+
#
|
16
|
+
# and then restart rsyslog.(ie, sudo service restart rsyslog)
|
17
|
+
# Other settings for rsyslog you might want to adjust includes:
|
18
|
+
#
|
19
|
+
#
|
20
|
+
# MaxMessageSize 64k # Increase the message size allowed to 64k (default is like 2k... or something.)
|
21
|
+
#
|
22
|
+
# $IMUXSockRateLimitInterval 0 # Disable rate limiting, so we are sure to get every single message logged.
|
23
|
+
# # Note: Add it after $ModLoad imuxsock
|
24
|
+
#
|
25
|
+
#
|
26
|
+
class SyslogFilter < GrokFilter
|
27
|
+
|
28
|
+
def initialize(name, config={})
|
29
|
+
type = config.delete(:type) {'syslog'}
|
30
|
+
super(type, name, [
|
31
|
+
%w[ %{SYSLOGTIMESTAMP:syslog_timestamp}
|
32
|
+
%{SYSLOGHOST:syslog_hostname}?
|
33
|
+
%{PROG:syslog_program}(?:\\\[%{POSINT:syslog_pid}\\\])?:
|
34
|
+
%{GREEDYDATA:syslog_message}
|
35
|
+
].join(' ')], config
|
36
|
+
)
|
37
|
+
end
|
38
|
+
|
39
|
+
def filter(record)
|
40
|
+
return nil if super(record).nil?
|
41
|
+
record['@received_at'] = record['@timestamp']
|
42
|
+
record['@received_from'] = record['@source_host']
|
43
|
+
|
44
|
+
fields = record['@fields']
|
45
|
+
|
46
|
+
fields['syslog_timestamp'] += '+0000'
|
47
|
+
record['@timestamp'] = DateTime.strptime(fields["syslog_timestamp"], "%b %e %T%z") # eg, Apr 12 15:55:28+0000
|
48
|
+
|
49
|
+
record['@source_host'] = fields['syslog_hostname']
|
50
|
+
record['@message'] = fields['syslog_message'].gsub(/#012/, "\n")
|
51
|
+
record['@tags'] << fields['syslog_program']
|
52
|
+
fields.each_key { |k| fields.delete(k) if k.start_with?('syslog_') }
|
53
|
+
record
|
54
|
+
end
|
55
|
+
|
56
|
+
end # SyslogFilter
|
57
|
+
|
58
|
+
|
59
|
+
|
60
|
+
#----
|
61
|
+
end
|
62
|
+
end
|