logfile_interval 2.1.5 → 2.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.travis.yml +2 -1
- data/README.md +5 -0
- data/bin/aggregate_access_log.rb +3 -4
- data/lib/logfile_interval/aggregator_set.rb +2 -0
- data/lib/logfile_interval/interval_builder.rb +2 -3
- data/lib/logfile_interval/logfile.rb +1 -1
- data/lib/logfile_interval/parsed_line/base.rb +10 -3
- data/lib/logfile_interval/parsed_line/parser.rb +103 -66
- data/lib/logfile_interval/version.rb +1 -1
- data/spec/lib/logfile_spec.rb +9 -0
- data/spec/support/lib/access_log.rb +2 -0
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: c7f2e7cabde319d9e316d67b25f41786d30a3a65
|
4
|
+
data.tar.gz: 31af6fa687dbd9368e13faa64154ce778d27f3a2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: bafa4a0527c1c2b33479ce61276830c39ca588648c95a22e39660e3c26ec410dcdeaf9c47453902619347e6f7b15dcdb40d0dfb8ef8a6fb8e2584882a3b77ffc
|
7
|
+
data.tar.gz: 0dcafb906ba7c9ad3e2e88733858e8e54cfa9a54ae8e9f94a3e3acc6d4a14b87c4193541d1af98620bdac73ebd2ad3721815b56fa4ecf282e6bcb93d46be1855
|
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -33,6 +33,8 @@ class AccessLog < LogfileInterval::ParsedLine::Base
|
|
33
33
|
add_column :name => 'code', :pos => 4, :aggregator => :count
|
34
34
|
add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => 'ip'
|
35
35
|
|
36
|
+
skip :pos => 3, :regex => /firefox/
|
37
|
+
|
36
38
|
def time
|
37
39
|
DateTime.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z').to_time
|
38
40
|
end
|
@@ -97,6 +99,8 @@ class AccessLog < LogfileInterval::ParsedLine::Base
|
|
97
99
|
add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
|
98
100
|
add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => 'ip', :conversion => :integer
|
99
101
|
|
102
|
+
skip :pos => 3, :regex => /firefox/
|
103
|
+
|
100
104
|
def time
|
101
105
|
Time.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z')
|
102
106
|
end
|
@@ -105,6 +109,7 @@ end
|
|
105
109
|
#### The parser must define:
|
106
110
|
* A regex that extracts the fields out of each line.
|
107
111
|
* A set of columns that will to be parsed and aggregated in time intervals.
|
112
|
+
* 0 or more column that will be skipped if the column value matches the specified regex
|
108
113
|
* A 'time' method that converts the mandatory timestamp field of a line into a Time object.
|
109
114
|
|
110
115
|
#### Attributes of a column:
|
data/bin/aggregate_access_log.rb
CHANGED
@@ -18,9 +18,9 @@ class AccessLogParsedLine < LogfileInterval::ParsedLine::Base
|
|
18
18
|
|
19
19
|
add_column :name => 'ip', :pos => 1, :aggregator => :count
|
20
20
|
add_column :name => 'timestamp', :pos => 2, :aggregator => :timestamp
|
21
|
-
add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by =>
|
22
|
-
add_column :name => 'length', :pos => 5, :aggregator => :average,
|
23
|
-
add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by =>
|
21
|
+
add_column :name => 'code_by_ip', :pos => 4, :aggregator => :count, :group_by => :ip
|
22
|
+
add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
|
23
|
+
add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :group_by => :ip, :conversion => :integer
|
24
24
|
add_column :name => 'referer', :pos => 6, :aggregator => :count
|
25
25
|
add_column :name => 'referer_by_ip', :pos => 6, :aggregator => :count, :group_by => :ip
|
26
26
|
|
@@ -40,4 +40,3 @@ builder.each_interval do |interval|
|
|
40
40
|
pp interval[:referer_by_ip]
|
41
41
|
STDIN.gets
|
42
42
|
end
|
43
|
-
|
@@ -12,6 +12,8 @@ module LogfileInterval
|
|
12
12
|
def add(record)
|
13
13
|
@parser_columns.each do |name, options|
|
14
14
|
next unless @aggregators[name]
|
15
|
+
next unless options[:noskip] || !record.skip_with_exceptions?
|
16
|
+
|
15
17
|
group_by_value = record[options[:group_by]] if options[:group_by]
|
16
18
|
@aggregators[name].add(record[name], group_by_value)
|
17
19
|
end
|
@@ -20,9 +20,8 @@ module LogfileInterval
|
|
20
20
|
|
21
21
|
case order
|
22
22
|
when :asc then self.extend Ascending
|
23
|
-
|
24
|
-
|
25
|
-
else raise ArgumentError, "Can't determine parsed_lines_enum sort order"
|
23
|
+
else
|
24
|
+
self.extend Descending
|
26
25
|
end
|
27
26
|
end
|
28
27
|
|
@@ -43,7 +43,7 @@ module LogfileInterval
|
|
43
43
|
return enum_for(:each_parsed_line) unless block_given?
|
44
44
|
each_line do |line|
|
45
45
|
record = parser.create_record(line)
|
46
|
-
yield record if record
|
46
|
+
yield record if record && !record.skip?
|
47
47
|
end
|
48
48
|
end
|
49
49
|
alias_method :each, :each_parsed_line
|
@@ -10,12 +10,22 @@ module LogfileInterval
|
|
10
10
|
def initialize(line)
|
11
11
|
@data = self.class.parse(line)
|
12
12
|
@valid = @data ? true : false
|
13
|
+
@skip = @data ? @data[:skip] : false
|
14
|
+
@skip_with_exceptions = @data ? @data[:skip_with_exceptions] : false
|
13
15
|
end
|
14
16
|
|
15
17
|
def valid?
|
16
18
|
@valid
|
17
19
|
end
|
18
20
|
|
21
|
+
def skip?
|
22
|
+
@skip
|
23
|
+
end
|
24
|
+
|
25
|
+
def skip_with_exceptions?
|
26
|
+
@skip_with_exceptions
|
27
|
+
end
|
28
|
+
|
19
29
|
def time
|
20
30
|
raise NotImplemented
|
21
31
|
end
|
@@ -26,6 +36,3 @@ module LogfileInterval
|
|
26
36
|
end
|
27
37
|
end
|
28
38
|
end
|
29
|
-
|
30
|
-
|
31
|
-
|
@@ -3,95 +3,132 @@ module LogfileInterval
|
|
3
3
|
class ConfigurationError < StandardError; end
|
4
4
|
|
5
5
|
module Parser
|
6
|
-
|
7
|
-
def columns
|
8
|
-
@columns ||= {}
|
9
|
-
end
|
10
|
-
|
11
|
-
def set_regex(regex)
|
12
|
-
@regex = regex
|
13
|
-
end
|
6
|
+
attr_reader :regex
|
14
7
|
|
15
|
-
|
16
|
-
|
17
|
-
|
8
|
+
def columns
|
9
|
+
@columns ||= {}
|
10
|
+
end
|
18
11
|
|
19
|
-
|
20
|
-
|
12
|
+
def skip_columns
|
13
|
+
@skip_columns ||= []
|
14
|
+
end
|
21
15
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
end
|
16
|
+
def skip_columns_with_exceptions
|
17
|
+
@skip_columns_with_exceptions ||= []
|
18
|
+
end
|
26
19
|
|
27
|
-
|
28
|
-
|
29
|
-
|
20
|
+
def set_regex(regex)
|
21
|
+
@regex = regex
|
22
|
+
end
|
30
23
|
|
31
|
-
|
32
|
-
|
24
|
+
def add_column(options)
|
25
|
+
validate_column_options(options)
|
26
|
+
options = sanitize_column_options(options)
|
33
27
|
|
34
|
-
|
35
|
-
|
36
|
-
val = match_data[options[:pos]]
|
37
|
-
data[name] = convert(val, options[:conversion])
|
38
|
-
end
|
39
|
-
data
|
40
|
-
end
|
28
|
+
name = options[:name]
|
29
|
+
columns[name] = options
|
41
30
|
|
42
|
-
|
43
|
-
|
44
|
-
return record if record.valid?
|
45
|
-
return nil
|
31
|
+
define_method(name) do
|
32
|
+
@data[name]
|
46
33
|
end
|
34
|
+
end
|
47
35
|
|
48
|
-
|
49
|
-
|
50
|
-
|
36
|
+
def skip(options)
|
37
|
+
unless options[:pos] && options[:regex]
|
38
|
+
raise ConfigurationError, "skip option must include pos and regex"
|
51
39
|
end
|
52
40
|
|
41
|
+
skip_columns << { pos: options[:pos], regex: options[:regex] }
|
42
|
+
end
|
53
43
|
|
54
|
-
|
55
|
-
|
44
|
+
def skip_with_exceptions(options)
|
45
|
+
unless options[:pos] && options[:regex]
|
46
|
+
raise ConfigurationError, "skip option must include pos and regex"
|
56
47
|
end
|
57
48
|
|
58
|
-
|
49
|
+
skip_columns_with_exceptions << { pos: options[:pos], regex: options[:regex] }
|
50
|
+
end
|
59
51
|
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
end
|
67
|
-
end
|
52
|
+
def parse(line)
|
53
|
+
raise ConfigurationError, 'There must be at least 1 configured column' unless columns.any?
|
54
|
+
raise ConfigurationError, 'A regex must be set' unless regex
|
55
|
+
|
56
|
+
match_data = regex.match(line)
|
57
|
+
return nil unless match_data
|
68
58
|
|
69
|
-
|
70
|
-
|
59
|
+
data = { skip: false }
|
60
|
+
columns.each do |name, options|
|
61
|
+
val = match_data[options[:pos]]
|
62
|
+
data[name] = convert(val, options[:conversion])
|
71
63
|
end
|
72
64
|
|
73
|
-
|
74
|
-
|
75
|
-
if options
|
76
|
-
|
77
|
-
|
78
|
-
else
|
79
|
-
options.delete(:group_by)
|
80
|
-
end
|
65
|
+
skip_columns.each do |options|
|
66
|
+
val = match_data[options[:pos]]
|
67
|
+
if val =~ options[:regex]
|
68
|
+
data[:skip] = true
|
69
|
+
break
|
81
70
|
end
|
82
|
-
options[:conversion] = options.fetch(:conversion, :string)
|
83
|
-
options[:aggregator_class] = Aggregator::Base.klass(options[:aggregator])
|
84
|
-
options.delete(:aggregator)
|
85
|
-
options
|
86
71
|
end
|
87
72
|
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
73
|
+
data
|
74
|
+
end
|
75
|
+
|
76
|
+
def create_record(line)
|
77
|
+
record = new(line)
|
78
|
+
return record if record.valid?
|
79
|
+
return nil
|
80
|
+
end
|
81
|
+
|
82
|
+
def set_column_custom_options(column_name, options)
|
83
|
+
raise ArgumentError, "Invalid column name: #{column_name}" unless columns.has_key?(column_name)
|
84
|
+
columns[column_name][:custom_options] = options
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
def each(&block)
|
89
|
+
columns.each(&block)
|
90
|
+
end
|
91
|
+
|
92
|
+
private
|
93
|
+
|
94
|
+
def validate_column_options(options)
|
95
|
+
validate_option(options, :name)
|
96
|
+
validate_option(options, :pos)
|
97
|
+
validate_option(options, :aggregator)
|
98
|
+
if options[:name].to_s == 'skip'
|
99
|
+
raise ConfigurationError, "'skip' is a reserved column name"
|
100
|
+
end
|
101
|
+
unless Aggregator::Base.exist?(options[:aggregator]) || options[:aggregator] == :timestamp
|
102
|
+
raise ConfigurationError, "aggregator must be one of #{Aggregator::Base.all.join(', ')}"
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def validate_option(options, key, errmsg = nil)
|
107
|
+
raise ConfigurationError, errmsg || "#{key} is a mandatory column option" unless options.has_key?(key)
|
108
|
+
end
|
109
|
+
|
110
|
+
def sanitize_column_options(options)
|
111
|
+
options[:name] = options[:name].to_sym
|
112
|
+
if options.has_key?(:group_by)
|
113
|
+
if options[:group_by].to_sym != options[:name]
|
114
|
+
options[:group_by] = options[:group_by].to_sym
|
115
|
+
else
|
116
|
+
options.delete(:group_by)
|
93
117
|
end
|
94
118
|
end
|
119
|
+
options[:conversion] = options.fetch(:conversion, :string)
|
120
|
+
options[:aggregator_class] = Aggregator::Base.klass(options[:aggregator])
|
121
|
+
options.delete(:aggregator)
|
122
|
+
options
|
123
|
+
end
|
124
|
+
|
125
|
+
def convert(val, conversion)
|
126
|
+
case conversion
|
127
|
+
when :integer then val.to_i
|
128
|
+
when :float then val.to_f
|
129
|
+
else val
|
130
|
+
end
|
131
|
+
end
|
95
132
|
end
|
96
133
|
end
|
97
134
|
end
|
data/spec/lib/logfile_spec.rb
CHANGED
@@ -78,6 +78,15 @@ module LogfileInterval
|
|
78
78
|
records.last.length_by_ip.should == 185
|
79
79
|
end
|
80
80
|
|
81
|
+
it 'skips lines matching skip options' do
|
82
|
+
records = []
|
83
|
+
@alf.each_parsed_line do |record|
|
84
|
+
records << record
|
85
|
+
end
|
86
|
+
|
87
|
+
records.size.should == 6
|
88
|
+
end
|
89
|
+
|
81
90
|
context 'without a block' do
|
82
91
|
it 'should return an enumerator' do
|
83
92
|
e = @alf.each_parsed_line
|
@@ -15,6 +15,8 @@ module LogfileInterval
|
|
15
15
|
add_column :name => 'length', :pos => 5, :aggregator => :average, :conversion => :integer
|
16
16
|
add_column :name => 'length_by_ip', :pos => 5, :aggregator => :average, :conversion => :integer, :group_by => 'ip'
|
17
17
|
|
18
|
+
skip :pos => 7, :regex => /Spinn3r/
|
19
|
+
|
18
20
|
def time
|
19
21
|
Time.strptime(self.timestamp, '%d/%b/%Y:%H:%M:%S %z')
|
20
22
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: logfile_interval
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1
|
4
|
+
version: 2.2.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Philippe Le Rohellec
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2018-03-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: bundler
|
@@ -164,7 +164,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
164
164
|
version: '0'
|
165
165
|
requirements: []
|
166
166
|
rubyforge_project:
|
167
|
-
rubygems_version: 2.
|
167
|
+
rubygems_version: 2.6.13
|
168
168
|
signing_key:
|
169
169
|
specification_version: 4
|
170
170
|
summary: Aggregate logfile data into intervals
|