logfile_interval 2.1.5 → 2.2.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/README.md +5 -0
- data/bin/aggregate_access_log.rb +3 -4
- data/lib/logfile_interval/aggregator_set.rb +2 -0
- data/lib/logfile_interval/interval_builder.rb +2 -3
- data/lib/logfile_interval/logfile.rb +1 -1
- data/lib/logfile_interval/parsed_line/base.rb +10 -3
- data/lib/logfile_interval/parsed_line/parser.rb +103 -66
- data/lib/logfile_interval/version.rb +1 -1
- data/spec/lib/logfile_spec.rb +9 -0
- data/spec/support/lib/access_log.rb +2 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c7f2e7cabde319d9e316d67b25f41786d30a3a65
|
4
|
+
data.tar.gz: 31af6fa687dbd9368e13faa64154ce778d27f3a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bafa4a0527c1c2b33479ce61276830c39ca588648c95a22e39660e3c26ec410dcdeaf9c47453902619347e6f7b15dcdb40d0dfb8ef8a6fb8e2584882a3b77ffc
|
7
|
+
data.tar.gz: 0dcafb906ba7c9ad3e2e88733858e8e54cfa9a54ae8e9f94a3e3acc6d4a14b87c4193541d1af98620bdac73ebd2ad3721815b56fa4ecf282e6bcb93d46be1855
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -33,6 +33,8 @@ class AccessLog < LogfileInterval::ParsedLine::Base
|
|
33
33
|
add_column :name => 'code', :pos => 4, :aggregator => :count
|
34
34
|
add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
|
35
35
|
|
36
|
+
skip :pos => 3, :regex => /firefox/
|
37
|
+
|
36
38
|
def time
|
37
39
|
DateTime.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z').to_time
|
38
40
|
end
|
@@ -97,6 +99,8 @@ class AccessLog < LogfileInterval::ParsedLine::Base
|
|
97
99
|
add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
|
98
100
|
add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => 'ip', :conversion => :integer
|
99
101
|
|
102
|
+
skip :pos => 3, :regex => /firefox/
|
103
|
+
|
100
104
|
def time
|
101
105
|
Time.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z')
|
102
106
|
end
|
@@ -105,6 +109,7 @@ end
|
|
105
109
|
#### The parser must define:
|
106
110
|
* A regex that extracts the fields out of each line.
|
107
111
|
* A set of columns that will to be parsed and aggregated in time intervals.
|
112
|
+
* 0 or more column that will be skipped if the column value matches the specified regex
|
108
113
|
* A 'time' method that converts the mandatory timestamp field of a line into a Time object.
|
109
114
|
|
110
115
|
#### Attributes of a column:
|
data/bin/aggregate_access_log.rb
CHANGED
@@ -18,9 +18,9 @@ class AccessLogParsedLine < LogfileInterval::ParsedLine::Base
|
|
18
18
|
|
19
19
|
add_column :name => 'ip', :pos => 1, :aggregator => :count
|
20
20
|
add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
|
21
|
-
add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by =>
|
22
|
-
add_column :name => 'length', :pos => 5, :aggregator => :average,
|
23
|
-
add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by =>
|
21
|
+
add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => :ip
|
22
|
+
add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
|
23
|
+
add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => :ip, :conversion => :integer
|
24
24
|
add_column :name => 'referer', :pos => 6, :aggregator => :count
|
25
25
|
add_column :name => 'referer_by_ip', :pos => 6, :aggregator => :count, :group_by => :ip
|
26
26
|
|
@@ -40,4 +40,3 @@ builder.each_interval do |interval|
|
|
40
40
|
pp interval[:referer_by_ip]
|
41
41
|
STDIN.gets
|
42
42
|
end
|
43
|
-
|
@@ -12,6 +12,8 @@ module LogfileInterval
|
|
12
12
|
def add(record)
|
13
13
|
@parser_columns.each do |name, options|
|
14
14
|
next unless @aggregators[name]
|
15
|
+
next unless options[:noskip] || !record.skip_with_exceptions?
|
16
|
+
|
15
17
|
group_by_value = record[options[:group_by]] if options[:group_by]
|
16
18
|
@aggregators[name].add(record[name], group_by_value)
|
17
19
|
end
|
@@ -20,9 +20,8 @@ module LogfileInterval
|
|
20
20
|
|
21
21
|
case order
|
22
22
|
when :asc then self.extend Ascending
|
23
|
-
|
24
|
-
|
25
|
-
else raise ArgumentError, "Can't determine parsed_lines_enum sort order"
|
23
|
+
else
|
24
|
+
self.extend Descending
|
26
25
|
end
|
27
26
|
end
|
28
27
|
|
@@ -43,7 +43,7 @@ module LogfileInterval
|
|
43
43
|
return enum_for(:each_parsed_line) unless block_given?
|
44
44
|
each_line do |line|
|
45
45
|
record = parser.create_record(line)
|
46
|
-
yield record if record
|
46
|
+
yield record if record && !record.skip?
|
47
47
|
end
|
48
48
|
end
|
49
49
|
alias_method :each, :each_parsed_line
|
@@ -10,12 +10,22 @@ module LogfileInterval
|
|
10
10
|
def initialize(line)
|
11
11
|
@data = self.class.parse(line)
|
12
12
|
@valid = @data ? true : false
|
13
|
+
@skip = @data ? @data[:skip] : false
|
14
|
+
@skip_with_exceptions = @data ? @data[:skip_with_exceptions] : false
|
13
15
|
end
|
14
16
|
|
15
17
|
def valid?
|
16
18
|
@valid
|
17
19
|
end
|
18
20
|
|
21
|
+
def skip?
|
22
|
+
@skip
|
23
|
+
end
|
24
|
+
|
25
|
+
def skip_with_exceptions?
|
26
|
+
@skip_with_exceptions
|
27
|
+
end
|
28
|
+
|
19
29
|
def time
|
20
30
|
raise NotImplemented
|
21
31
|
end
|
@@ -26,6 +36,3 @@ module LogfileInterval
|
|
26
36
|
end
|
27
37
|
end
|
28
38
|
end
|
29
|
-
|
30
|
-
|
31
|
-
|
@@ -3,95 +3,132 @@ module LogfileInterval
|
|
3
3
|
class ConfigurationError < StandardError; end
|
4
4
|
|
5
5
|
module Parser
|
6
|
-
|
7
|
-
def columns
|
8
|
-
@columns ||= {}
|
9
|
-
end
|
10
|
-
|
11
|
-
def set_regex(regex)
|
12
|
-
@regex = regex
|
13
|
-
end
|
6
|
+
attr_reader :regex
|
14
7
|
|
15
|
-
|
16
|
-
|
17
|
-
|
8
|
+
def columns
|
9
|
+
@columns ||= {}
|
10
|
+
end
|
18
11
|
|
19
|
-
|
20
|
-
|
12
|
+
def skip_columns
|
13
|
+
@skip_columns ||= []
|
14
|
+
end
|
21
15
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
end
|
16
|
+
def skip_columns_with_exceptions
|
17
|
+
@skip_columns_with_exceptions ||= []
|
18
|
+
end
|
26
19
|
|
27
|
-
|
28
|
-
|
29
|
-
|
20
|
+
def set_regex(regex)
|
21
|
+
@regex = regex
|
22
|
+
end
|
30
23
|
|
31
|
-
|
32
|
-
|
24
|
+
def add_column(options)
|
25
|
+
validate_column_options(options)
|
26
|
+
options = sanitize_column_options(options)
|
33
27
|
|
34
|
-
|
35
|
-
|
36
|
-
val = match_data[options[:pos]]
|
37
|
-
data[name] = convert(val, options[:conversion])
|
38
|
-
end
|
39
|
-
data
|
40
|
-
end
|
28
|
+
name = options[:name]
|
29
|
+
columns[name] = options
|
41
30
|
|
42
|
-
|
43
|
-
|
44
|
-
return record if record.valid?
|
45
|
-
return nil
|
31
|
+
define_method(name) do
|
32
|
+
@data[name]
|
46
33
|
end
|
34
|
+
end
|
47
35
|
|
48
|
-
|
49
|
-
|
50
|
-
|
36
|
+
def skip(options)
|
37
|
+
unless options[:pos] && options[:regex]
|
38
|
+
raise ConfigurationError, "skip option must include pos and regex"
|
51
39
|
end
|
52
40
|
|
41
|
+
skip_columns << { pos: options[:pos], regex: options[:regex] }
|
42
|
+
end
|
53
43
|
|
54
|
-
|
55
|
-
|
44
|
+
def skip_with_exceptions(options)
|
45
|
+
unless options[:pos] && options[:regex]
|
46
|
+
raise ConfigurationError, "skip option must include pos and regex"
|
56
47
|
end
|
57
48
|
|
58
|
-
|
49
|
+
skip_columns_with_exceptions << { pos: options[:pos], regex: options[:regex] }
|
50
|
+
end
|
59
51
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
end
|
67
|
-
end
|
52
|
+
def parse(line)
|
53
|
+
raise ConfigurationError, 'There must be at least 1 configured column' unless columns.any?
|
54
|
+
raise ConfigurationError, 'A regex must be set' unless regex
|
55
|
+
|
56
|
+
match_data = regex.match(line)
|
57
|
+
return nil unless match_data
|
68
58
|
|
69
|
-
|
70
|
-
|
59
|
+
data = { skip: false }
|
60
|
+
columns.each do |name, options|
|
61
|
+
val = match_data[options[:pos]]
|
62
|
+
data[name] = convert(val, options[:conversion])
|
71
63
|
end
|
72
64
|
|
73
|
-
|
74
|
-
|
75
|
-
if options
|
76
|
-
|
77
|
-
|
78
|
-
else
|
79
|
-
options.delete(:group_by)
|
80
|
-
end
|
65
|
+
skip_columns.each do |options|
|
66
|
+
val = match_data[options[:pos]]
|
67
|
+
if val =~ options[:regex]
|
68
|
+
data[:skip] = true
|
69
|
+
break
|
81
70
|
end
|
82
|
-
options[:conversion] = options.fetch(:conversion, :string)
|
83
|
-
options[:aggregator_class] = Aggregator::Base.klass(options[:aggregator])
|
84
|
-
options.delete(:aggregator)
|
85
|
-
options
|
86
71
|
end
|
87
72
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
73
|
+
data
|
74
|
+
end
|
75
|
+
|
76
|
+
def create_record(line)
|
77
|
+
record = new(line)
|
78
|
+
return record if record.valid?
|
79
|
+
return nil
|
80
|
+
end
|
81
|
+
|
82
|
+
def set_column_custom_options(column_name, options)
|
83
|
+
raise ArgumentError, "Invalid column name: #{column_name}" unless columns.has_key?(column_name)
|
84
|
+
columns[column_name][:custom_options] = options
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
def each(&block)
|
89
|
+
columns.each(&block)
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def validate_column_options(options)
|
95
|
+
validate_option(options, :name)
|
96
|
+
validate_option(options, :pos)
|
97
|
+
validate_option(options, :aggregator)
|
98
|
+
if options[:name].to_s == 'skip'
|
99
|
+
raise ConfigurationError, "'skip' is a reserved column name"
|
100
|
+
end
|
101
|
+
unless Aggregator::Base.exist?(options[:aggregator]) || options[:aggregator] == :timestamp
|
102
|
+
raise ConfigurationError, "aggregator must be one of #{Aggregator::Base.all.join(', ')}"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def validate_option(options, key, errmsg = nil)
|
107
|
+
raise ConfigurationError, errmsg || "#{key} is a mandatory column option" unless options.has_key?(key)
|
108
|
+
end
|
109
|
+
|
110
|
+
def sanitize_column_options(options)
|
111
|
+
options[:name] = options[:name].to_sym
|
112
|
+
if options.has_key?(:group_by)
|
113
|
+
if options[:group_by].to_sym != options[:name]
|
114
|
+
options[:group_by] = options[:group_by].to_sym
|
115
|
+
else
|
116
|
+
options.delete(:group_by)
|
93
117
|
end
|
94
118
|
end
|
119
|
+
options[:conversion] = options.fetch(:conversion, :string)
|
120
|
+
options[:aggregator_class] = Aggregator::Base.klass(options[:aggregator])
|
121
|
+
options.delete(:aggregator)
|
122
|
+
options
|
123
|
+
end
|
124
|
+
|
125
|
+
def convert(val, conversion)
|
126
|
+
case conversion
|
127
|
+
when :integer then val.to_i
|
128
|
+
when :float then val.to_f
|
129
|
+
else val
|
130
|
+
end
|
131
|
+
end
|
95
132
|
end
|
96
133
|
end
|
97
134
|
end
|
data/spec/lib/logfile_spec.rb
CHANGED
@@ -78,6 +78,15 @@ module LogfileInterval
|
|
78
78
|
records.last.length_by_ip.should == 185
|
79
79
|
end
|
80
80
|
|
81
|
+
it 'skips lines matching skip options' do
|
82
|
+
records = []
|
83
|
+
@alf.each_parsed_line do |record|
|
84
|
+
records << record
|
85
|
+
end
|
86
|
+
|
87
|
+
records.size.should == 6
|
88
|
+
end
|
89
|
+
|
81
90
|
context 'without a block' do
|
82
91
|
it 'should return an enumerator' do
|
83
92
|
e = @alf.each_parsed_line
|
@@ -15,6 +15,8 @@ module LogfileInterval
|
|
15
15
|
add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
|
16
16
|
add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :conversion => :integer, :group_by => 'ip'
|
17
17
|
|
18
|
+
skip :pos => 7, :regex => /Spinn3r/
|
19
|
+
|
18
20
|
def time
|
19
21
|
Time.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z')
|
20
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logfile_interval
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1
|
4
|
+
version: 2.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Philippe Le Rohellec
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -164,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
164
|
version: '0'
|
165
165
|
requirements: []
|
166
166
|
rubyforge_project:
|
167
|
-
rubygems_version: 2.
|
167
|
+
rubygems_version: 2.6.13
|
168
168
|
signing_key:
|
169
169
|
specification_version: 4
|
170
170
|
summary: Aggregate logfile data into intervals
|